From 10f8cac7f4b7accd7a647e1096c85241f0687805 Mon Sep 17 00:00:00 2001 From: Andreas Skaar Date: Mon, 8 Oct 2018 22:05:05 +0200 Subject: [PATCH 001/246] Update runtime/CSharp/runtime/CSharp/Antlr4.Runtime/Atn/SingletonPredictionContext.cs NullReferenceException in CSharp target #2304 --- .../CSharp/Antlr4.Runtime/Atn/SingletonPredictionContext.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/CSharp/runtime/CSharp/Antlr4.Runtime/Atn/SingletonPredictionContext.cs b/runtime/CSharp/runtime/CSharp/Antlr4.Runtime/Atn/SingletonPredictionContext.cs index 2f8f1bb5d..e162dcaca 100644 --- a/runtime/CSharp/runtime/CSharp/Antlr4.Runtime/Atn/SingletonPredictionContext.cs +++ b/runtime/CSharp/runtime/CSharp/Antlr4.Runtime/Atn/SingletonPredictionContext.cs @@ -79,7 +79,7 @@ namespace Antlr4.Runtime.Atn return false; } Antlr4.Runtime.Atn.SingletonPredictionContext other = (Antlr4.Runtime.Atn.SingletonPredictionContext)o; - return returnState == other.returnState && parent.Equals(other.parent); + return returnState == other.returnState && (parent != null && parent.Equals(other.parent)); } public override string ToString() From 8b458f85c02fa93e9e216c7899a28776f00185c9 Mon Sep 17 00:00:00 2001 From: Andreas Skaar Date: Mon, 8 Oct 2018 22:15:05 +0200 Subject: [PATCH 002/246] Update contributors.txt sign of contributors.txt --- contributors.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/contributors.txt b/contributors.txt index 3a223169c..8afdd597d 100644 --- a/contributors.txt +++ b/contributors.txt @@ -192,4 +192,5 @@ YYYY/MM/DD, github id, Full name, email 2018/06/16, EternalPhane, Zongyuan Zuo, eternalphane@gmail.com 2018/07/03, jgoppert, James Goppert, james.goppert@gmail.com 2018/07/27, Maksim Novikov, mnovikov.work@gmail.com -2018/08/03, ENDOH takanao, djmchl@gmail.com \ No newline at end of file +2018/08/03, ENDOH takanao, djmchl@gmail.com +2018/10/08, xsIceman, Andreas Skaar, andreas.skaar@gmail.com \ No newline at end of file From 3cd2878b28cae0a04b3fa251d1af3c7155bbbd33 Mon Sep 17 00:00:00 2001 From: Wail Khemir Date: Thu, 3 Jan 2019 15:26:47 -0500 Subject: [PATCH 003/246] Fix code generation for catch statements in python --- contributors.txt | 3 ++- .../org/antlr/v4/tool/templates/codegen/Python2/Python2.stg | 3 +-- .../org/antlr/v4/tool/templates/codegen/Python3/Python3.stg | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/contributors.txt b/contributors.txt index f48052823..ebda19f64 100644 --- a/contributors.txt +++ b/contributors.txt @@ -210,4 +210,5 @@ YYYY/MM/DD, github id, Full name, email 2018/11/14, nxtstep, Adriaan (Arjan) Duz, codewithadriaan[et]gmail[dot]com 2018/11/15, amykyta3, Alex Mykyta, amykyta3@users.noreply.github.com 2018/11/29, hannemann-tamas, Ralf Hannemann-Tamas, ralf.ht@gmail.com -2018/12/20, WalterCouto, Walter Couto, WalterCouto@users.noreply.github.com \ No newline at end of file +2018/12/20, WalterCouto, Walter Couto, WalterCouto@users.noreply.github.com +2019/01/02, wkhemir, Wail Khemir, khemirwail@gmail.com diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Python2/Python2.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Python2/Python2.stg index b56b8cbdb..b3aadfd55 100644 --- a/tool/resources/org/antlr/v4/tool/templates/codegen/Python2/Python2.stg +++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Python2/Python2.stg @@ -517,9 +517,8 @@ if not : >> ExceptionClause(e, catchArg, catchAction) ::= << -catch () { +except : -} >> // lexer actions are not associated with model objects diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Python3/Python3.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Python3/Python3.stg index 650d0002e..b2b23a19d 100644 --- a/tool/resources/org/antlr/v4/tool/templates/codegen/Python3/Python3.stg +++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Python3/Python3.stg @@ -529,9 +529,8 @@ if not : >> ExceptionClause(e, catchArg, catchAction) ::= << -catch () { +except : -} >> // lexer actions are not associated with model objects From 46bd9e55690e5607d5cbe94df988d97c384ed4f3 Mon Sep 17 00:00:00 2001 From: Larry Li Date: Sun, 19 Jan 2020 18:22:06 +1100 Subject: [PATCH 004/246] add dart target --- .travis.yml | 4 + .travis/before-install-linux-dart.sh | 10 + .travis/run-tests-dart.sh | 4 + contributors.txt | 3 +- runtime-testsuite/pom.xml | 1 + .../v4/test/runtime/templates/Dart.test.stg | 318 ++ .../v4/test/runtime/BaseRuntimeTest.java | 13 +- .../v4/test/runtime/dart/BaseDartTest.java | 1149 +++++++ .../runtime/dart/TestCompositeLexers.java | 26 + .../runtime/dart/TestCompositeParsers.java | 26 + .../runtime/dart/TestFullContextParsing.java | 26 + .../test/runtime/dart/TestLeftRecursion.java | 26 + .../v4/test/runtime/dart/TestLexerErrors.java | 26 + .../v4/test/runtime/dart/TestLexerExec.java | 26 + .../v4/test/runtime/dart/TestListeners.java | 26 + .../v4/test/runtime/dart/TestParseTrees.java | 26 + .../test/runtime/dart/TestParserErrors.java | 26 + .../v4/test/runtime/dart/TestParserExec.java | 26 + .../v4/test/runtime/dart/TestPerformance.java | 26 + .../runtime/dart/TestSemPredEvalLexer.java | 26 + .../runtime/dart/TestSemPredEvalParser.java | 26 + .../antlr/v4/test/runtime/dart/TestSets.java | 26 + .../descriptors/PerformanceDescriptors.java | 2 +- runtime/Dart/.gitignore | 23 + runtime/Dart/LICENSE.txt | 52 + runtime/Dart/README.md | 11 + runtime/Dart/lib/antlr4.dart | 21 + runtime/Dart/lib/src/atn/atn.dart | 18 + runtime/Dart/lib/src/atn/src/atn.dart | 189 ++ runtime/Dart/lib/src/atn/src/atn_config.dart | 248 ++ .../Dart/lib/src/atn/src/atn_config_set.dart | 281 ++ .../lib/src/atn/src/atn_deserializer.dart | 823 +++++ .../Dart/lib/src/atn/src/atn_simulator.dart | 101 + runtime/Dart/lib/src/atn/src/atn_state.dart | 298 ++ runtime/Dart/lib/src/atn/src/atn_type.dart | 18 + runtime/Dart/lib/src/atn/src/info.dart | 666 ++++ .../Dart/lib/src/atn/src/lexer_action.dart | 678 +++++ .../src/atn/src/lexer_action_executor.dart | 181 ++ .../lib/src/atn/src/lexer_atn_simulator.dart | 747 +++++ .../lib/src/atn/src/parser_atn_simulator.dart | 2701 +++++++++++++++++ .../src/atn/src/profiling_atn_simulator.dart | 228 ++ .../lib/src/atn/src/semantic_context.dart | 399 +++ runtime/Dart/lib/src/atn/src/transition.dart | 283 ++ runtime/Dart/lib/src/dfa/dfa.dart | 8 + runtime/Dart/lib/src/dfa/src/dfa.dart | 148 + .../Dart/lib/src/dfa/src/dfa_serializer.dart | 74 + runtime/Dart/lib/src/dfa/src/dfa_state.dart | 156 + runtime/Dart/lib/src/error/error.dart | 10 + .../error/src/diagnostic_error_listener.dart | 121 + .../lib/src/error/src/error_listener.dart | 250 ++ .../lib/src/error/src/error_strategy.dart | 955 ++++++ runtime/Dart/lib/src/error/src/errors.dart | 219 ++ runtime/Dart/lib/src/input_stream.dart | 350 +++ runtime/Dart/lib/src/interval_set.dart | 750 +++++ runtime/Dart/lib/src/lexer.dart | 342 +++ runtime/Dart/lib/src/ll1_analyzer.dart | 211 ++ runtime/Dart/lib/src/misc/multi_map.dart | 34 + runtime/Dart/lib/src/misc/pair.dart | 26 + runtime/Dart/lib/src/parser.dart | 839 +++++ runtime/Dart/lib/src/parser_interpreter.dart | 396 +++ runtime/Dart/lib/src/parser_rule_context.dart | 288 ++ runtime/Dart/lib/src/prediction_context.dart | 874 ++++++ runtime/Dart/lib/src/recognizer.dart | 197 ++ runtime/Dart/lib/src/rule_context.dart | 185 ++ runtime/Dart/lib/src/runtime_meta_data.dart | 199 ++ runtime/Dart/lib/src/token.dart | 479 +++ runtime/Dart/lib/src/token_factory.dart | 97 + runtime/Dart/lib/src/token_source.dart | 273 ++ runtime/Dart/lib/src/token_stream.dart | 650 ++++ .../Dart/lib/src/tree/src/pattern/chunk.dart | 108 + .../tree/src/pattern/parse_tree_match.dart | 701 +++++ runtime/Dart/lib/src/tree/src/tree.dart | 365 +++ runtime/Dart/lib/src/tree/src/trees.dart | 238 ++ runtime/Dart/lib/src/tree/tree.dart | 10 + runtime/Dart/lib/src/util/bit_set.dart | 301 ++ runtime/Dart/lib/src/util/murmur_hash.dart | 85 + runtime/Dart/lib/src/util/utils.dart | 33 + runtime/Dart/lib/src/vocabulary.dart | 268 ++ runtime/Dart/pubspec.yaml | 10 + .../v4/tool/templates/codegen/Dart/Dart.stg | 875 ++++++ .../antlr/v4/codegen/target/DartTarget.java | 112 + 81 files changed, 21068 insertions(+), 3 deletions(-) create mode 100755 .travis/before-install-linux-dart.sh create mode 100755 .travis/run-tests-dart.sh create mode 100644 runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/Dart.test.stg create mode 100644 runtime-testsuite/test/org/antlr/v4/test/runtime/dart/BaseDartTest.java create mode 100644 runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestCompositeLexers.java create mode 100644 runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestCompositeParsers.java create mode 100644 runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestFullContextParsing.java create mode 100644 runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestLeftRecursion.java create mode 100644 runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestLexerErrors.java create mode 100644 runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestLexerExec.java create mode 100644 runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestListeners.java create mode 100644 runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestParseTrees.java create mode 100644 runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestParserErrors.java create mode 100644 runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestParserExec.java create mode 100644 runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestPerformance.java create mode 100644 runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestSemPredEvalLexer.java create mode 100644 runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestSemPredEvalParser.java create mode 100644 runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestSets.java create mode 100644 runtime/Dart/.gitignore create mode 100644 runtime/Dart/LICENSE.txt create mode 100644 runtime/Dart/README.md create mode 100644 runtime/Dart/lib/antlr4.dart create mode 100644 runtime/Dart/lib/src/atn/atn.dart create mode 100644 runtime/Dart/lib/src/atn/src/atn.dart create mode 100644 runtime/Dart/lib/src/atn/src/atn_config.dart create mode 100644 runtime/Dart/lib/src/atn/src/atn_config_set.dart create mode 100644 runtime/Dart/lib/src/atn/src/atn_deserializer.dart create mode 100644 runtime/Dart/lib/src/atn/src/atn_simulator.dart create mode 100644 runtime/Dart/lib/src/atn/src/atn_state.dart create mode 100644 runtime/Dart/lib/src/atn/src/atn_type.dart create mode 100644 runtime/Dart/lib/src/atn/src/info.dart create mode 100644 runtime/Dart/lib/src/atn/src/lexer_action.dart create mode 100644 runtime/Dart/lib/src/atn/src/lexer_action_executor.dart create mode 100644 runtime/Dart/lib/src/atn/src/lexer_atn_simulator.dart create mode 100644 runtime/Dart/lib/src/atn/src/parser_atn_simulator.dart create mode 100644 runtime/Dart/lib/src/atn/src/profiling_atn_simulator.dart create mode 100644 runtime/Dart/lib/src/atn/src/semantic_context.dart create mode 100644 runtime/Dart/lib/src/atn/src/transition.dart create mode 100644 runtime/Dart/lib/src/dfa/dfa.dart create mode 100644 runtime/Dart/lib/src/dfa/src/dfa.dart create mode 100644 runtime/Dart/lib/src/dfa/src/dfa_serializer.dart create mode 100644 runtime/Dart/lib/src/dfa/src/dfa_state.dart create mode 100644 runtime/Dart/lib/src/error/error.dart create mode 100644 runtime/Dart/lib/src/error/src/diagnostic_error_listener.dart create mode 100644 runtime/Dart/lib/src/error/src/error_listener.dart create mode 100644 runtime/Dart/lib/src/error/src/error_strategy.dart create mode 100644 runtime/Dart/lib/src/error/src/errors.dart create mode 100644 runtime/Dart/lib/src/input_stream.dart create mode 100644 runtime/Dart/lib/src/interval_set.dart create mode 100644 runtime/Dart/lib/src/lexer.dart create mode 100644 runtime/Dart/lib/src/ll1_analyzer.dart create mode 100644 runtime/Dart/lib/src/misc/multi_map.dart create mode 100644 runtime/Dart/lib/src/misc/pair.dart create mode 100644 runtime/Dart/lib/src/parser.dart create mode 100644 runtime/Dart/lib/src/parser_interpreter.dart create mode 100644 runtime/Dart/lib/src/parser_rule_context.dart create mode 100644 runtime/Dart/lib/src/prediction_context.dart create mode 100644 runtime/Dart/lib/src/recognizer.dart create mode 100644 runtime/Dart/lib/src/rule_context.dart create mode 100644 runtime/Dart/lib/src/runtime_meta_data.dart create mode 100644 runtime/Dart/lib/src/token.dart create mode 100644 runtime/Dart/lib/src/token_factory.dart create mode 100644 runtime/Dart/lib/src/token_source.dart create mode 100644 runtime/Dart/lib/src/token_stream.dart create mode 100644 runtime/Dart/lib/src/tree/src/pattern/chunk.dart create mode 100644 runtime/Dart/lib/src/tree/src/pattern/parse_tree_match.dart create mode 100644 runtime/Dart/lib/src/tree/src/tree.dart create mode 100644 runtime/Dart/lib/src/tree/src/trees.dart create mode 100644 runtime/Dart/lib/src/tree/tree.dart create mode 100644 runtime/Dart/lib/src/util/bit_set.dart create mode 100644 runtime/Dart/lib/src/util/murmur_hash.dart create mode 100644 runtime/Dart/lib/src/util/utils.dart create mode 100644 runtime/Dart/lib/src/vocabulary.dart create mode 100644 runtime/Dart/pubspec.yaml create mode 100644 tool/resources/org/antlr/v4/tool/templates/codegen/Dart/Dart.stg create mode 100644 tool/src/org/antlr/v4/codegen/target/DartTarget.java diff --git a/.travis.yml b/.travis.yml index cae5dae3d..16665093c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -152,6 +152,10 @@ matrix: jdk: openjdk8 env: TARGET=csharp stage: main-test + - os: linux + jdk: openjdk8 + env: TARGET=dart + stage: main-test - os: linux language: php php: diff --git a/.travis/before-install-linux-dart.sh b/.travis/before-install-linux-dart.sh new file mode 100755 index 000000000..f9f8266ff --- /dev/null +++ b/.travis/before-install-linux-dart.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -euo pipefail + +sudo apt-get -y install apt-transport-https +sudo sh -c 'wget -qO- https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add -' +sudo sh -c 'wget -qO- https://storage.googleapis.com/download.dartlang.org/linux/debian/dart_stable.list > /etc/apt/sources.list.d/dart_stable.list' +sudo apt-get -q update +sudo apt-get -y install dart +export PATH="$PATH:/usr/lib/dart/bin" diff --git a/.travis/run-tests-dart.sh b/.travis/run-tests-dart.sh new file mode 100755 index 000000000..8053a90d7 --- /dev/null +++ b/.travis/run-tests-dart.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +set -euo pipefail +mvn -q -Dparallel=classes -DthreadCount=4 -Dtest=dart.* test diff --git a/contributors.txt b/contributors.txt index 1fe97ab8b..fdc7a19ef 100644 --- a/contributors.txt +++ b/contributors.txt @@ -236,4 +236,5 @@ YYYY/MM/DD, github id, Full name, email 2019/10/31, a-square, Alexei Averchenko, lex.aver@gmail.com 2019/11/11, foxeverl, Liu Xinfeng, liuxf1986[at]gmail[dot]com 2019/11/17, felixn, Felix Nieuwenhuizhen, felix@tdlrali.com -2019/11/18, mlilback, Mark Lilback, mark@lilback.com \ No newline at end of file +2019/11/18, mlilback, Mark Lilback, mark@lilback.com +2020/01/19, lingyv-li, Lingyu Li, lingyv.li@gmail.com \ No newline at end of file diff --git a/runtime-testsuite/pom.xml b/runtime-testsuite/pom.xml index 33ea5e9f1..2bb36dc58 100644 --- a/runtime-testsuite/pom.xml +++ b/runtime-testsuite/pom.xml @@ -116,6 +116,7 @@ **/python2/Test*.java **/python3/Test*.java **/php/Test*.java + **/dart/Test*.java ${antlr.tests.swift} diff --git a/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/Dart.test.stg b/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/Dart.test.stg new file mode 100644 index 000000000..73bcb9523 --- /dev/null +++ b/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/Dart.test.stg @@ -0,0 +1,318 @@ +writeln(s) ::= <);>> +write(s) ::= <);>> +writeList(s) ::= <);>> + +False() ::= "false" + +True() ::= "true" + +Not(v) ::= "!" + +Assert(s) ::= <);>> + +Cast(t,v) ::= "( as )" + +Append(a,b) ::= ".toString() + .toString()" + +AppendStr(a,b) ::= <%%> + +Concat(a,b) ::= "" + +AssertIsList(v) ::= "assert ( is List);" // just use static type system + +AssignLocal(s,v) ::= " = ;" + +InitIntMember(n,v) ::= <%int = ;%> + +InitBooleanMember(n,v) ::= <%bool = ;%> + +InitIntVar(n,v) ::= <%%> + +IntArg(n) ::= "int " + +VarRef(n) ::= "" + +GetMember(n) ::= <%this.%> + +SetMember(n,v) ::= <%this. = ;%> + +AddMember(n,v) ::= <%this. += ;%> + +MemberEquals(n,v) ::= <%this. == %> + +ModMemberEquals(n,m,v) ::= <%this. % == %> + +ModMemberNotEquals(n,m,v) ::= <%this. % != %> + +DumpDFA() ::= "this.dumpDFA();" + +Pass() ::= "" + +StringList() ::= "List\" + +BuildParseTrees() ::= "buildParseTree = true;" + +BailErrorStrategy() ::= <%errorHandler = new BailErrorStrategy();%> + +ToStringTree(s) ::= <%.toStringTree(parser: this)%> + +Column() ::= "this.charPositionInLine" + +Text() ::= "this.text" + +ValEquals(a,b) ::= <%==%> + +TextEquals(a) ::= <%this.text == ""%> + +PlusText(a) ::= <%"" + this.text%> + +InputText() ::= "this.tokenStream.text" + +LTEquals(i, v) ::= <%this.tokenStream.LT().text == %> + +LANotEquals(i, v) ::= <%this.tokenStream.LA()!=%> + +TokenStartColumnEquals(i) ::= <%this.tokenStartCharPositionInLine==%> + +ImportListener(X) ::= "" + +GetExpectedTokenNames() ::= "this.expectedTokens.toString(vocabulary: this.vocabulary)" + +RuleInvocationStack() ::= "ruleInvocationStack" + +LL_EXACT_AMBIG_DETECTION() ::= <> + +ParserToken(parser, token) ::= <%.TOKEN_%> + +Production(p) ::= <%

%> + +Result(r) ::= <%%> + +ParserPropertyMember() ::= << +@members { +bool Property() { + return true; +} +} +>> + +ParserPropertyCall(p, call) ::= "

." + +PositionAdjustingLexerDef() ::= << +class PositionAdjustingLexerATNSimulator extends LexerATNSimulator { + PositionAdjustingLexerATNSimulator(Lexer recog, ATN atn, + List decisionToDFA, PredictionContextCache sharedContextCache) + : super(atn, decisionToDFA, sharedContextCache, recog: recog); + + void resetAcceptPosition(CharStream input, int index, int line, + int charPositionInLine) { + input.seek(index); + this.line = line; + this.charPositionInLine = charPositionInLine; + consume(input); + } +} +>> + +PositionAdjustingLexer() ::= << +@override +Token nextToken() { + if (!(super.interpreter is PositionAdjustingLexerATNSimulator)) { + interpreter = new PositionAdjustingLexerATNSimulator( + this, _ATN, _decisionToDFA, _sharedContextCache); + } + + return super.nextToken(); +} + +@override +Token emit() { + switch (type) { + case TOKEN_TOKENS: + handleAcceptPositionForKeyword("tokens"); + break; + + case TOKEN_LABEL: + handleAcceptPositionForIdentifier(); + break; + + default: + break; + } + + return super.emit(); +} + +bool handleAcceptPositionForIdentifier() { + String tokenText = text; + int identifierLength = 0; + while (identifierLength \< tokenText.length && + isIdentifierChar(tokenText[identifierLength])) { + identifierLength++; + } + + if (inputStream.index > tokenStartCharIndex + identifierLength) { + int offset = identifierLength - 1; + interpreter.resetAcceptPosition(inputStream, tokenStartCharIndex + offset, + tokenStartLine, tokenStartCharPositionInLine + offset); + return true; + } + + return false; +} + +bool handleAcceptPositionForKeyword(String keyword) { + if (inputStream.index > tokenStartCharIndex + keyword.length) { + int offset = keyword.length - 1; + interpreter.resetAcceptPosition(inputStream, tokenStartCharIndex + offset, + tokenStartLine, tokenStartCharPositionInLine + offset); + return true; + } + + return false; +} + +@override +PositionAdjustingLexerATNSimulator get interpreter { + return super.interpreter as PositionAdjustingLexerATNSimulator; +} + +static bool isIdentifierChar(String c) { + return isLetterOrDigit(c) || c == '_'; +} + +static const ZERO = 48; +static const LOWER_A = 97; +static const LOWER_Z = 122; +static const UPPER_A = 65; +static const UPPER_Z = 90; + +static bool isLetterOrDigit(String char) => isLetter(char) || isDigit(char); + +// Note: this is intentially ASCII only +static bool isLetter(String char) { + if (char == null) return false; + var cc = char.codeUnitAt(0); + return cc >= LOWER_A && cc \<= LOWER_Z || cc >= UPPER_A && cc \<= UPPER_Z; +} + +static bool isDigit(String char) { + if (char == null) return false; + var cc = char.codeUnitAt(0); + return cc >= ZERO && cc \< ZERO + 10; +} +>> + +BasicListener(X) ::= << +@parser::definitions { +class LeafListener extends TBaseListener { + void visitTerminal(TerminalNode node) { + print(node.symbol.text); + } +} +} +>> + +WalkListener(s) ::= << +ParseTreeWalker walker = new ParseTreeWalker(); +walker.walk(new LeafListener(), ); +>> + +TreeNodeWithAltNumField(X) ::= << +@parser::definitions { +class MyRuleNode extends ParserRuleContext { + int altNum; + + MyRuleNode(ParserRuleContext parent, int invokingStateNumber) + : super(parent, invokingStateNumber); + + @override int get altNumber { + return altNum; + } + + @override void set altNumber(int altNum) { + this.altNum = altNum; + } +} +} +>> + +TokenGetterListener(X) ::= << +@parser::definitions { +class LeafListener extends TBaseListener { + void exitA(AContext ctx) { + if (ctx.childCount==2) + stdout.write("${ctx.INT(0).symbol.text} ${ctx.INT(1).symbol.text} ${ctx.INTs()}"); + else + print(ctx.ID().symbol); + } +} +} +>> + +RuleGetterListener(X) ::= << +@parser::definitions { +class LeafListener extends TBaseListener { + void exitA(AContext ctx) { + if (ctx.childCount==2) { + stdout.write("${ctx.b(0).start.text} ${ctx.b(1).start.text} ${ctx.bs()[0].start.text}"); + } else + print(ctx.b(0).start.text); + } +} +} +>> + + +LRListener(X) ::= << +@parser::definitions { +class LeafListener extends TBaseListener { + void exitE(EContext ctx) { + if (ctx.childCount==3) { + stdout.write("${ctx.e(0).start.text} ${ctx.e(1).start.text} ${ctx.es()[0].start.text}\n"); + } else + print(ctx.INT().symbol.text); + } +} +} +>> + +LRWithLabelsListener(X) ::= << +@parser::definitions { +class LeafListener extends TBaseListener { + void exitCall(CallContext ctx) { + stdout.write("${ctx.e().start.text} ${ctx.eList()}"); + } + void exitInt(IntContext ctx) { + print(ctx.INT().symbol.text); + } +} +} +>> + +DeclareContextListGettersFunction() ::= << +void foo() { + SContext s = null; + List\ a = s.as(); + List\ b = s.bs(); +} +>> + +Declare_foo() ::= << + void foo() {print("foo");} +>> + +Invoke_foo() ::= "foo();" + +Declare_pred() ::= <> + +Invoke_pred(v) ::= <)>> + +ParserTokenType(t) ::= "Parser." +ContextRuleFunction(ctx, rule) ::= "." +StringType() ::= "String" +ContextMember(ctx, subctx, member) ::= ".." diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/BaseRuntimeTest.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/BaseRuntimeTest.java index f7874d671..94acf1956 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/BaseRuntimeTest.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/BaseRuntimeTest.java @@ -51,7 +51,8 @@ public abstract class BaseRuntimeTest { "CSharp", "Python2", "Python3", "PHP", - "Node", "Safari", "Firefox", "Explorer", "Chrome" + "Node", "Safari", "Firefox", "Explorer", "Chrome", + "Dart" }; public final static String[] JavaScriptTargets = { "Node", "Safari", "Firefox", "Explorer", "Chrome" @@ -302,6 +303,16 @@ public abstract class BaseRuntimeTest { } } + public static String readFile(String dir, String fileName) { + try { + return String.copyValueOf(Utils.readFile(dir+"/"+fileName, "UTF-8")); + } + catch (IOException ioe) { + System.err.println("can't read file"); + ioe.printStackTrace(System.err); + } + return null; + } protected static void assertCorrectOutput(RuntimeTestDescriptor descriptor, RuntimeTestSupport delegate, String actualOutput) { String actualParseErrors = delegate.getParseErrors(); diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/BaseDartTest.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/BaseDartTest.java new file mode 100644 index 000000000..9d9df1f8a --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/BaseDartTest.java @@ -0,0 +1,1149 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.Tool; +import org.antlr.v4.analysis.AnalysisPipeline; +import org.antlr.v4.automata.ATNFactory; +import org.antlr.v4.automata.ATNPrinter; +import org.antlr.v4.automata.LexerATNFactory; +import org.antlr.v4.automata.ParserATNFactory; +import org.antlr.v4.codegen.CodeGenerator; +import org.antlr.v4.misc.Utils; +import org.antlr.v4.runtime.*; +import org.antlr.v4.runtime.atn.*; +import org.antlr.v4.runtime.dfa.DFA; +import org.antlr.v4.runtime.misc.IntegerList; +import org.antlr.v4.runtime.misc.Interval; +import org.antlr.v4.runtime.misc.Pair; +import org.antlr.v4.runtime.tree.ParseTree; +import org.antlr.v4.semantics.SemanticPipeline; +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.ErrorQueue; +import org.antlr.v4.test.runtime.RuntimeTestSupport; +import org.antlr.v4.test.runtime.StreamVacuum; +import org.antlr.v4.tool.*; +import org.stringtemplate.v4.ST; +import org.stringtemplate.v4.STGroup; +import org.stringtemplate.v4.STGroupString; + +import java.io.*; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.net.URL; +import java.net.URLClassLoader; +import java.util.*; + +import static junit.framework.TestCase.*; +import static org.antlr.v4.test.runtime.BaseRuntimeTest.readFile; +import static org.antlr.v4.test.runtime.BaseRuntimeTest.writeFile; +import static org.junit.Assert.assertArrayEquals; + + +public class BaseDartTest implements RuntimeTestSupport { + public static final String newline = System.getProperty("line.separator"); + public static final String pathSep = System.getProperty("path.separator"); + + + /** + * When the {@code antlr.preserve-test-dir} runtime property is set to + * {@code true}, the temporary directories created by the test run will not + * be removed at the end of the test run, even for tests that completed + * successfully. + *

+ *

+ * The default behavior (used in all other cases) is removing the temporary + * directories for all tests which completed successfully, and preserving + * the directories for tests which failed.

+ */ + public static final boolean PRESERVE_TEST_DIR = Boolean.parseBoolean(System.getProperty("antlr.preserve-test-dir", "false")); + + /** + * The base test directory is the directory where generated files get placed + * during unit test execution. + *

+ *

+ * The default value for this property is the {@code java.io.tmpdir} system + * property, and can be overridden by setting the + * {@code antlr.java-test-dir} property to a custom location. Note that the + * {@code antlr.java-test-dir} property directly affects the + * {@link #CREATE_PER_TEST_DIRECTORIES} value as well.

+ */ + public static final String BASE_TEST_DIR; + + /** + * When {@code true}, a temporary directory will be created for each test + * executed during the test run. + *

+ *

+ * This value is {@code true} when the {@code antlr.java-test-dir} system + * property is set, and otherwise {@code false}.

+ */ + public static final boolean CREATE_PER_TEST_DIRECTORIES; + + static { + String baseTestDir = System.getProperty("antlr.dart-test-dir"); + boolean perTestDirectories = false; + if (baseTestDir == null || baseTestDir.isEmpty()) { + baseTestDir = System.getProperty("java.io.tmpdir"); + perTestDirectories = true; + } + + if (!new File(baseTestDir).isDirectory()) { + throw new UnsupportedOperationException("The specified base test directory does not exist: " + baseTestDir); + } + + BASE_TEST_DIR = baseTestDir; + CREATE_PER_TEST_DIRECTORIES = perTestDirectories; + } + + /** + * Build up the full classpath we need, including the surefire path (if present) + */ + public static final String CLASSPATH = System.getProperty("java.class.path"); + + public String tmpdir = null; + + /** + * If error during parser execution, store stderr here; can't return + * stdout and stderr. This doesn't trap errors from running antlr. + */ + protected String stderrDuringParse; + + /** + * Errors found while running antlr + */ + protected StringBuilder antlrToolErrors; + + private static String cacheDartPackages; + + private String getPropertyPrefix() { + return "antlr-php"; + } + + @Override + public void testSetUp() throws Exception { + if (CREATE_PER_TEST_DIRECTORIES) { + // new output dir for each test + String threadName = Thread.currentThread().getName(); + String testDirectory = getClass().getSimpleName() + "-" + threadName + "-" + System.nanoTime(); + tmpdir = new File(BASE_TEST_DIR, testDirectory).getAbsolutePath(); + } else { + tmpdir = new File(BASE_TEST_DIR).getAbsolutePath(); + if (!PRESERVE_TEST_DIR && new File(tmpdir).exists()) { + eraseFiles(); + } + } + antlrToolErrors = new StringBuilder(); + } + + @Override + public void testTearDown() throws Exception { + } + + @Override + public String getTmpDir() { + return tmpdir; + } + + @Override + public String getStdout() { + return null; + } + + @Override + public String getParseErrors() { + return stderrDuringParse; + } + + @Override + public String getANTLRToolErrors() { + if (antlrToolErrors.length() == 0) { + return null; + } + return antlrToolErrors.toString(); + } + + protected Tool newTool(String[] args) { + Tool tool = new Tool(args); + return tool; + } + + protected ATN createATN(Grammar g, boolean useSerializer) { + if (g.atn == null) { + semanticProcess(g); + assertEquals(0, g.tool.getNumErrors()); + + ParserATNFactory f; + if (g.isLexer()) { + f = new LexerATNFactory((LexerGrammar) g); + } else { + f = new ParserATNFactory(g); + } + + g.atn = f.createATN(); + assertEquals(0, g.tool.getNumErrors()); + } + + ATN atn = g.atn; + if (useSerializer) { + char[] serialized = ATNSerializer.getSerializedAsChars(atn); + return new ATNDeserializer().deserialize(serialized); + } + + return atn; + } + + protected void semanticProcess(Grammar g) { + if (g.ast != null && !g.ast.hasErrors) { +// System.out.println(g.ast.toStringTree()); + Tool antlr = new Tool(); + SemanticPipeline sem = new SemanticPipeline(g); + sem.process(); + if (g.getImportedGrammars() != null) { // process imported grammars (if any) + for (Grammar imp : g.getImportedGrammars()) { + antlr.processNonCombinedGrammar(imp, false); + } + } + } + } + + public DFA createDFA(Grammar g, DecisionState s) { +// PredictionDFAFactory conv = new PredictionDFAFactory(g, s); +// DFA dfa = conv.createDFA(); +// conv.issueAmbiguityWarnings(); +// System.out.print("DFA="+dfa); +// return dfa; + return null; + } + +// public void minimizeDFA(DFA dfa) { +// DFAMinimizer dmin = new DFAMinimizer(dfa); +// dfa.minimized = dmin.minimize(); +// } + + IntegerList getTypesFromString(Grammar g, String expecting) { + IntegerList expectingTokenTypes = new IntegerList(); + if (expecting != null && !expecting.trim().isEmpty()) { + for (String tname : expecting.replace(" ", "").split(",")) { + int ttype = g.getTokenType(tname); + expectingTokenTypes.add(ttype); + } + } + return expectingTokenTypes; + } + + public IntegerList getTokenTypesViaATN(String input, LexerATNSimulator lexerATN) { + ANTLRInputStream in = new ANTLRInputStream(input); + IntegerList tokenTypes = new IntegerList(); + int ttype; + do { + ttype = lexerATN.match(in, Lexer.DEFAULT_MODE); + tokenTypes.add(ttype); + } while (ttype != Token.EOF); + return tokenTypes; + } + + public List getTokenTypes(LexerGrammar lg, + ATN atn, + CharStream input) { + LexerATNSimulator interp = new LexerATNSimulator(atn, new DFA[]{new DFA(atn.modeToStartState.get(Lexer.DEFAULT_MODE))}, null); + List tokenTypes = new ArrayList(); + int ttype; + boolean hitEOF = false; + do { + if (hitEOF) { + tokenTypes.add("EOF"); + break; + } + int t = input.LA(1); + ttype = interp.match(input, Lexer.DEFAULT_MODE); + if (ttype == Token.EOF) { + tokenTypes.add("EOF"); + } else { + tokenTypes.add(lg.typeToTokenList.get(ttype)); + } + + if (t == IntStream.EOF) { + hitEOF = true; + } + } while (ttype != Token.EOF); + return tokenTypes; + } + + List checkRuleDFA(String gtext, String ruleName, String expecting) + throws Exception { + ErrorQueue equeue = new ErrorQueue(); + Grammar g = new Grammar(gtext, equeue); + ATN atn = createATN(g, false); + ATNState s = atn.ruleToStartState[g.getRule(ruleName).index]; + if (s == null) { + System.err.println("no such rule: " + ruleName); + return null; + } + ATNState t = s.transition(0).target; + if (!(t instanceof DecisionState)) { + System.out.println(ruleName + " has no decision"); + return null; + } + DecisionState blk = (DecisionState) t; + checkRuleDFA(g, blk, expecting); + return equeue.all; + } + + List checkRuleDFA(String gtext, int decision, String expecting) + throws Exception { + ErrorQueue equeue = new ErrorQueue(); + Grammar g = new Grammar(gtext, equeue); + ATN atn = createATN(g, false); + DecisionState blk = atn.decisionToState.get(decision); + checkRuleDFA(g, blk, expecting); + return equeue.all; + } + + void checkRuleDFA(Grammar g, DecisionState blk, String expecting) + throws Exception { + DFA dfa = createDFA(g, blk); + String result = null; + if (dfa != null) result = dfa.toString(); + assertEquals(expecting, result); + } + + List checkLexerDFA(String gtext, String expecting) + throws Exception { + return checkLexerDFA(gtext, LexerGrammar.DEFAULT_MODE_NAME, expecting); + } + + List checkLexerDFA(String gtext, String modeName, String expecting) + throws Exception { + ErrorQueue equeue = new ErrorQueue(); + LexerGrammar g = new LexerGrammar(gtext, equeue); + g.atn = createATN(g, false); +// LexerATNToDFAConverter conv = new LexerATNToDFAConverter(g); +// DFA dfa = conv.createDFA(modeName); +// g.setLookaheadDFA(0, dfa); // only one decision to worry about +// +// String result = null; +// if ( dfa!=null ) result = dfa.toString(); +// assertEquals(expecting, result); +// +// return equeue.all; + return null; + } + + protected String load(String fileName, String encoding) + throws IOException { + if (fileName == null) { + return null; + } + + String fullFileName = getClass().getPackage().getName().replace('.', '/') + '/' + fileName; + int size = 65000; + InputStreamReader isr; + InputStream fis = getClass().getClassLoader().getResourceAsStream(fullFileName); + if (encoding != null) { + isr = new InputStreamReader(fis, encoding); + } else { + isr = new InputStreamReader(fis); + } + try { + char[] data = new char[size]; + int n = isr.read(data); + return new String(data, 0, n); + } finally { + isr.close(); + } + } + + protected String execLexer(String grammarFileName, + String grammarStr, + String lexerName, + String input) { + return execLexer(grammarFileName, grammarStr, lexerName, input, false); + } + + @Override + public String execLexer(String grammarFileName, + String grammarStr, + String lexerName, + String input, + boolean showDFA) { + boolean success = rawGenerateAndBuildRecognizer(grammarFileName, + grammarStr, + null, + lexerName); + assertTrue(success); + writeFile(tmpdir, "input", input); + writeLexerTestFile(lexerName, showDFA); + String output = execClass("Test.dart"); + return output; + } + + public ParseTree execParser(String startRuleName, String input, + String parserName, String lexerName) + throws Exception { + Pair pl = getParserAndLexer(input, parserName, lexerName); + Parser parser = pl.a; + return execStartRule(startRuleName, parser); + } + + public ParseTree execStartRule(String startRuleName, Parser parser) + throws IllegalAccessException, InvocationTargetException, + NoSuchMethodException { + Method startRule = null; + Object[] args = null; + try { + startRule = parser.getClass().getMethod(startRuleName); + } catch (NoSuchMethodException nsme) { + // try with int _p arg for recursive func + startRule = parser.getClass().getMethod(startRuleName, int.class); + args = new Integer[]{0}; + } + ParseTree result = (ParseTree) startRule.invoke(parser, args); +// System.out.println("parse tree = "+result.toStringTree(parser)); + return result; + } + + public Pair getParserAndLexer(String input, + String parserName, String lexerName) + throws Exception { + final Class lexerClass = loadLexerClassFromTempDir(lexerName); + final Class parserClass = loadParserClassFromTempDir(parserName); + + ANTLRInputStream in = new ANTLRInputStream(new StringReader(input)); + + Class c = lexerClass.asSubclass(Lexer.class); + Constructor ctor = c.getConstructor(CharStream.class); + Lexer lexer = ctor.newInstance(in); + + Class pc = parserClass.asSubclass(Parser.class); + Constructor pctor = pc.getConstructor(TokenStream.class); + CommonTokenStream tokens = new CommonTokenStream(lexer); + Parser parser = pctor.newInstance(tokens); + return new Pair(parser, lexer); + } + + public Class loadClassFromTempDir(String name) throws Exception { + ClassLoader loader = + new URLClassLoader(new URL[]{new File(tmpdir).toURI().toURL()}, + ClassLoader.getSystemClassLoader()); + return loader.loadClass(name); + } + + public Class loadLexerClassFromTempDir(String name) throws Exception { + return loadClassFromTempDir(name).asSubclass(Lexer.class); + } + + public Class loadParserClassFromTempDir(String name) throws Exception { + return loadClassFromTempDir(name).asSubclass(Parser.class); + } + + @Override + public String execParser(String grammarFileName, + String grammarStr, + String parserName, + String lexerName, + String listenerName, + String visitorName, + String startRuleName, + String input, + boolean showDiagnosticErrors) { + return execParser(grammarFileName, grammarStr, parserName, lexerName, + listenerName, visitorName, startRuleName, input, showDiagnosticErrors, false); + } + + public String execParser(String grammarFileName, + String grammarStr, + String parserName, + String lexerName, + String listenerName, + String visitorName, + String startRuleName, + String input, + boolean showDiagnosticErrors, + boolean profile) { + boolean success = rawGenerateAndBuildRecognizer(grammarFileName, + grammarStr, + parserName, + lexerName, + "-visitor"); + assertTrue(success); + writeFile(tmpdir, "input", input); + return rawExecRecognizer(parserName, + lexerName, + startRuleName, + showDiagnosticErrors, + profile); + } + + /** + * Return true if all is well + */ + protected boolean rawGenerateAndBuildRecognizer(String grammarFileName, + String grammarStr, + String parserName, + String lexerName, + String... extraOptions) { + return rawGenerateAndBuildRecognizer(grammarFileName, grammarStr, parserName, lexerName, false, extraOptions); + } + + /** + * Return true if all is well + */ + protected boolean rawGenerateAndBuildRecognizer(String grammarFileName, + String grammarStr, + String parserName, + String lexerName, + boolean defaultListener, + String... extraOptions) { + ErrorQueue equeue = + BaseRuntimeTest.antlrOnString(getTmpDir(), "Dart", grammarFileName, grammarStr, defaultListener, extraOptions); + if (!equeue.errors.isEmpty()) { + return false; + } + + List files = new ArrayList(); + if (lexerName != null) { + files.add(lexerName + ".dart"); + } + if (parserName != null) { + files.add(parserName + ".dart"); + Set optionsSet = new HashSet(Arrays.asList(extraOptions)); + String grammarName = grammarFileName.substring(0, grammarFileName.lastIndexOf('.')); + if (!optionsSet.contains("-no-listener")) { + files.add(grammarName + "Listener.dart"); + files.add(grammarName + "BaseListener.dart"); + } + if (optionsSet.contains("-visitor")) { + files.add(grammarName + "Visitor.dart"); + files.add(grammarName + "BaseVisitor.dart"); + } + } + + String runtime = locateRuntime(); + writeFile(tmpdir, "pubspec.yaml", + "name: \"test\"\n" + + "dependencies:\n" + + " antlr4:\n" + + " path: " + runtime + "\n"); + if (cacheDartPackages == null) { + System.out.println("Not skipping" + tmpdir); + try { + Process process = Runtime.getRuntime().exec(new String[]{locatePub(), "get"}, null, new File(tmpdir)); + StreamVacuum stderrVacuum = new StreamVacuum(process.getErrorStream()); + stderrVacuum.start(); + process.waitFor(); + stderrVacuum.join(); + System.out.println(stderrVacuum.toString()); + } catch (IOException | InterruptedException e) { + e.printStackTrace(); + return false; + } + cacheDartPackages = readFile(tmpdir, ".packages"); + } else { + writeFile(tmpdir, ".packages", cacheDartPackages); + } + return true; // allIsWell: no compile + } + + protected String rawExecRecognizer(String parserName, + String lexerName, + String parserStartRuleName, + boolean debug, + boolean profile) { + this.stderrDuringParse = null; + if (parserName == null) { + writeLexerTestFile(lexerName, false); + } else { + writeTestFile(parserName, + lexerName, + parserStartRuleName, + debug, + profile); + } + + return execClass("Test.dart"); + } + + public String execRecognizer() { + return execClass("Test.dart"); + } + + public String execClass(String className) { + try { + String[] args = new String[]{ + locateDart(), + className, new File(tmpdir, "input").getAbsolutePath() + }; + String cmdLine = Utils.join(args, " "); + System.err.println("execParser: " + cmdLine); + Process process = + Runtime.getRuntime().exec(args, null, new File(tmpdir)); + StreamVacuum stdoutVacuum = new StreamVacuum(process.getInputStream()); + StreamVacuum stderrVacuum = new StreamVacuum(process.getErrorStream()); + stdoutVacuum.start(); + stderrVacuum.start(); + process.waitFor(); + stdoutVacuum.join(); + stderrVacuum.join(); + String output = stdoutVacuum.toString(); + if (output.length() == 0) { + output = null; + } + if (stderrVacuum.toString().length() > 0) { + this.stderrDuringParse = stderrVacuum.toString(); + } + return output; + } catch (Exception e) { + System.err.println("can't exec recognizer"); + e.printStackTrace(System.err); + } + return null; + } + + private String locateTool(String tool) { + final String phpPath = System.getProperty("DART_PATH"); + + if (phpPath != null && new File(phpPath).exists()) { + return phpPath; + } + + String[] roots = {"/usr/local/bin/", "/opt/local/bin/", "/usr/bin/", "/usr/lib/dart/bin/"}; + + for (String root : roots) { + if (new File(root + tool).exists()) { + return root + tool; + } + } + + throw new RuntimeException("Could not locate " + tool); + } + + protected String locatePub() { + String propName = getPropertyPrefix() + "-pub"; + String prop = System.getProperty(propName); + + if (prop == null || prop.length() == 0) { + prop = locateTool("pub"); + } + + File file = new File(prop); + + if (!file.exists()) { + throw new RuntimeException("Missing system property:" + propName); + } + + return file.getAbsolutePath(); + } + + protected String locateDart() { + String propName = getPropertyPrefix() + "-dart"; + String prop = System.getProperty(propName); + + if (prop == null || prop.length() == 0) { + prop = locateTool("dart"); + } + + File file = new File(prop); + + if (!file.exists()) { + throw new RuntimeException("Missing system property:" + propName); + } + + return file.getAbsolutePath(); + } + + private String locateRuntime() { + final ClassLoader loader = Thread.currentThread().getContextClassLoader(); + final URL runtimeSrc = loader.getResource("Dart"); + if (runtimeSrc == null) { + throw new RuntimeException("Cannot find Dart runtime"); + } + if (isWindows()) { + return runtimeSrc.getPath().replaceFirst("/", ""); + } + return runtimeSrc.getPath(); + } + + private boolean isWindows() { + return System.getProperty("os.name").toLowerCase().contains("windows"); + } + +// void ambig(List msgs, int[] expectedAmbigAlts, String expectedAmbigInput) +// throws Exception +// { +// ambig(msgs, 0, expectedAmbigAlts, expectedAmbigInput); +// } + +// void ambig(List msgs, int i, int[] expectedAmbigAlts, String expectedAmbigInput) +// throws Exception +// { +// List amsgs = getMessagesOfType(msgs, AmbiguityMessage.class); +// AmbiguityMessage a = (AmbiguityMessage)amsgs.get(i); +// if ( a==null ) assertNull(expectedAmbigAlts); +// else { +// assertEquals(a.conflictingAlts.toString(), Arrays.toString(expectedAmbigAlts)); +// } +// assertEquals(expectedAmbigInput, a.input); +// } + +// void unreachable(List msgs, int[] expectedUnreachableAlts) +// throws Exception +// { +// unreachable(msgs, 0, expectedUnreachableAlts); +// } + +// void unreachable(List msgs, int i, int[] expectedUnreachableAlts) +// throws Exception +// { +// List amsgs = getMessagesOfType(msgs, UnreachableAltsMessage.class); +// UnreachableAltsMessage u = (UnreachableAltsMessage)amsgs.get(i); +// if ( u==null ) assertNull(expectedUnreachableAlts); +// else { +// assertEquals(u.conflictingAlts.toString(), Arrays.toString(expectedUnreachableAlts)); +// } +// } + + List getMessagesOfType(List msgs, Class c) { + List filtered = new ArrayList(); + for (ANTLRMessage m : msgs) { + if (m.getClass() == c) filtered.add(m); + } + return filtered; + } + + public void checkRuleATN(Grammar g, String ruleName, String expecting) { +// DOTGenerator dot = new DOTGenerator(g); +// System.out.println(dot.getDOT(g.atn.ruleToStartState[g.getRule(ruleName).index])); + + Rule r = g.getRule(ruleName); + ATNState startState = g.getATN().ruleToStartState[r.index]; + ATNPrinter serializer = new ATNPrinter(g, startState); + String result = serializer.asString(); + + //System.out.print(result); + assertEquals(expecting, result); + } + + public void testActions(String templates, String actionName, String action, String expected) throws org.antlr.runtime.RecognitionException { + int lp = templates.indexOf('('); + String name = templates.substring(0, lp); + STGroup group = new STGroupString(templates); + ST st = group.getInstanceOf(name); + st.add(actionName, action); + String grammar = st.render(); + ErrorQueue equeue = new ErrorQueue(); + Grammar g = new Grammar(grammar, equeue); + if (g.ast != null && !g.ast.hasErrors) { + SemanticPipeline sem = new SemanticPipeline(g); + sem.process(); + + ATNFactory factory = new ParserATNFactory(g); + if (g.isLexer()) factory = new LexerATNFactory((LexerGrammar) g); + g.atn = factory.createATN(); + + AnalysisPipeline anal = new AnalysisPipeline(g); + anal.process(); + + CodeGenerator gen = new CodeGenerator(g); + ST outputFileST = gen.generateParser(false); + String output = outputFileST.render(); + //System.out.println(output); + String b = "#" + actionName + "#"; + int start = output.indexOf(b); + String e = "#end-" + actionName + "#"; + int end = output.indexOf(e); + String snippet = output.substring(start + b.length(), end); + assertEquals(expected, snippet); + } + if (equeue.size() > 0) { +// System.err.println(equeue.toString()); + } + } + + protected void checkGrammarSemanticsError(ErrorQueue equeue, + GrammarSemanticsMessage expectedMessage) + throws Exception { + ANTLRMessage foundMsg = null; + for (int i = 0; i < equeue.errors.size(); i++) { + ANTLRMessage m = equeue.errors.get(i); + if (m.getErrorType() == expectedMessage.getErrorType()) { + foundMsg = m; + } + } + assertNotNull("no error; " + expectedMessage.getErrorType() + " expected", foundMsg); + assertTrue("error is not a GrammarSemanticsMessage", + foundMsg instanceof GrammarSemanticsMessage); + assertEquals(Arrays.toString(expectedMessage.getArgs()), Arrays.toString(foundMsg.getArgs())); + if (equeue.size() != 1) { + System.err.println(equeue); + } + } + + protected void checkGrammarSemanticsWarning(ErrorQueue equeue, + GrammarSemanticsMessage expectedMessage) + throws Exception { + ANTLRMessage foundMsg = null; + for (int i = 0; i < equeue.warnings.size(); i++) { + ANTLRMessage m = equeue.warnings.get(i); + if (m.getErrorType() == expectedMessage.getErrorType()) { + foundMsg = m; + } + } + assertNotNull("no error; " + expectedMessage.getErrorType() + " expected", foundMsg); + assertTrue("error is not a GrammarSemanticsMessage", + foundMsg instanceof GrammarSemanticsMessage); + assertEquals(Arrays.toString(expectedMessage.getArgs()), Arrays.toString(foundMsg.getArgs())); + if (equeue.size() != 1) { + System.err.println(equeue); + } + } + + protected void checkError(ErrorQueue equeue, + ANTLRMessage expectedMessage) + throws Exception { + //System.out.println("errors="+equeue); + ANTLRMessage foundMsg = null; + for (int i = 0; i < equeue.errors.size(); i++) { + ANTLRMessage m = equeue.errors.get(i); + if (m.getErrorType() == expectedMessage.getErrorType()) { + foundMsg = m; + } + } + assertTrue("no error; " + expectedMessage.getErrorType() + " expected", !equeue.errors.isEmpty()); + assertTrue("too many errors; " + equeue.errors, equeue.errors.size() <= 1); + assertNotNull("couldn't find expected error: " + expectedMessage.getErrorType(), foundMsg); + /* + * assertTrue("error is not a GrammarSemanticsMessage", foundMsg + * instanceof GrammarSemanticsMessage); + */ + assertArrayEquals(expectedMessage.getArgs(), foundMsg.getArgs()); + } + + public static class FilteringTokenStream extends CommonTokenStream { + public FilteringTokenStream(TokenSource src) { + super(src); + } + + Set hide = new HashSet(); + + @Override + protected boolean sync(int i) { + if (!super.sync(i)) { + return false; + } + + Token t = get(i); + if (hide.contains(t.getType())) { + ((WritableToken) t).setChannel(Token.HIDDEN_CHANNEL); + } + + return true; + } + + public void setTokenTypeChannel(int ttype, int channel) { + hide.add(ttype); + } + } + + protected void writeTestFile(String parserName, + String lexerName, + String parserStartRuleName, + boolean debug, + boolean profile) { + ST outputFileST = new ST( + "import 'package:antlr4/antlr4.dart';\n" + + "\n" + + "import '.dart';\n" + + "import '.dart';\n" + + "\n" + + "void main(List args) async {\n" + + " CharStream input = await InputStream.fromPath(args[0]);\n" + + " lex = new (input);\n" + + " CommonTokenStream tokens = new CommonTokenStream(lex);\n" + + " \n" + + " parser.buildParseTree = true;\n" + + " \n" + + " ParserRuleContext tree = parser.();\n" + + " print('[${profiler.getDecisionInfo().join(', ')}]');\n" + + " ParseTreeWalker.DEFAULT.walk(new TreeShapeListener(), tree);\n" + + "}\n" + + "\n" + + "class TreeShapeListener implements ParseTreeListener {\n" + + " @override void visitTerminal(TerminalNode node) {}\n" + + "\n" + + " @override void visitErrorNode(ErrorNode node) {}\n" + + "\n" + + " @override void exitEveryRule(ParserRuleContext ctx) {}\n" + + "\n" + + " @override\n" + + " void enterEveryRule(ParserRuleContext ctx) {\n" + + " for (int i = 0; i \\< ctx.childCount; i++) {\n" + + " ParseTree parent = ctx.getChild(i).parent;\n" + + " if (!(parent is RuleNode) || (parent as RuleNode).ruleContext != ctx) {\n" + + " throw new StateError(\"Invalid parse tree shape detected.\");\n" + + " }\n" + + " }\n" + + " }\n" + + "}\n" + ); + ST createParserST = new ST(" parser = new (tokens);\n"); + if (debug) { + createParserST = + new ST( + " parser = new (tokens);\n" + + " parser.addErrorListener(new DiagnosticErrorListener());\n"); + } + if (profile) { + outputFileST.add("profile", + "ProfilingATNSimulator profiler = new ProfilingATNSimulator(parser);\n" + + "parser.setInterpreter(profiler);"); + } else { + outputFileST.add("profile", new ArrayList()); + } + outputFileST.add("createParser", createParserST); + outputFileST.add("parserName", parserName); + outputFileST.add("lexerName", lexerName); + outputFileST.add("parserStartRuleName", parserStartRuleName); + writeFile(tmpdir, "Test.dart", outputFileST.render()); + } + + protected void writeLexerTestFile(String lexerName, boolean showDFA) { + ST outputFileST = new ST( + "import 'dart:io';\n" + + "\n" + + "import 'package:antlr4/antlr4.dart';\n" + + "\n" + + "import '.dart';\n" + + "\n" + + "void main(List args) async {\n" + + " CharStream input = await InputStream.fromPath(args[0]);\n" + + " lex = new (input);\n" + + " CommonTokenStream tokens = new CommonTokenStream(lex);\n" + + " tokens.fill();\n" + + " for (Object t in tokens.getTokens())\n" + + " print(t);\n" + + "\n" + + (showDFA ? "stdout.write(lex.interpreter.getDFA(Lexer.DEFAULT_MODE).toLexerString());\n" : "") + + "}\n" + ); + + outputFileST.add("lexerName", lexerName); + writeFile(tmpdir, "Test.dart", outputFileST.render()); + } + + public void writeRecognizerAndCompile(String parserName, String lexerName, + String parserStartRuleName, + boolean debug, + boolean profile) { + if (parserName == null) { + writeLexerTestFile(lexerName, debug); + } else { + writeTestFile(parserName, + lexerName, + parserStartRuleName, + debug, + profile); + } + } + + protected void eraseFiles(final String filesEndingWith) { + File tmpdirF = new File(tmpdir); + String[] files = tmpdirF.list(); + for (int i = 0; files != null && i < files.length; i++) { + if (files[i].endsWith(filesEndingWith)) { + new File(tmpdir + "/" + files[i]).delete(); + } + } + } + + protected void eraseFiles() { + if (tmpdir == null) { + return; + } + + File tmpdirF = new File(tmpdir); + String[] files = tmpdirF.list(); + for (int i = 0; files != null && i < files.length; i++) { + new File(tmpdir + "/" + files[i]).delete(); + } + } + + @Override + public void eraseTempDir() { + File tmpdirF = new File(tmpdir); + if (tmpdirF.exists()) { + eraseFiles(); + tmpdirF.delete(); + } + } + + public String getFirstLineOfException() { + if (this.stderrDuringParse == null) { + return null; + } + String[] lines = this.stderrDuringParse.split("\n"); + String prefix = "Exception in thread \"main\" "; + return lines[0].substring(prefix.length(), lines[0].length()); + } + + /** + * When looking at a result set that consists of a Map/HashTable + * we cannot rely on the output order, as the hashing algorithm or other aspects + * of the implementation may be different on differnt JDKs or platforms. Hence + * we take the Map, convert the keys to a List, sort them and Stringify the Map, which is a + * bit of a hack, but guarantees that we get the same order on all systems. We assume that + * the keys are strings. + * + * @param m The Map that contains keys we wish to return in sorted order + * @return A string that represents all the keys in sorted order. + */ + public String sortMapToString(Map m) { + // Pass in crap, and get nothing back + // + if (m == null) { + return null; + } + + System.out.println("Map toString looks like: " + m.toString()); + + // Sort the keys in the Map + // + TreeMap nset = new TreeMap(m); + + System.out.println("Tree map looks like: " + nset.toString()); + return nset.toString(); + } + + public List realElements(List elements) { + return elements.subList(Token.MIN_USER_TOKEN_TYPE, elements.size()); + } + + public void assertNotNullOrEmpty(String message, String text) { + assertNotNull(message, text); + assertFalse(message, text.isEmpty()); + } + + public void assertNotNullOrEmpty(String text) { + assertNotNull(text); + assertFalse(text.isEmpty()); + } + + public static class IntTokenStream implements TokenStream { + public IntegerList types; + int p = 0; + + public IntTokenStream(IntegerList types) { + this.types = types; + } + + @Override + public void consume() { + p++; + } + + @Override + public int LA(int i) { + return LT(i).getType(); + } + + @Override + public int mark() { + return index(); + } + + @Override + public int index() { + return p; + } + + @Override + public void release(int marker) { + seek(marker); + } + + @Override + public void seek(int index) { + p = index; + } + + @Override + public int size() { + return types.size(); + } + + @Override + public String getSourceName() { + return UNKNOWN_SOURCE_NAME; + } + + @Override + public Token LT(int i) { + CommonToken t; + int rawIndex = p + i - 1; + if (rawIndex >= types.size()) t = new CommonToken(Token.EOF); + else t = new CommonToken(types.get(rawIndex)); + t.setTokenIndex(rawIndex); + return t; + } + + @Override + public Token get(int i) { + return new CommonToken(types.get(i)); + } + + @Override + public TokenSource getTokenSource() { + return null; + } + + @Override + public String getText() { + throw new UnsupportedOperationException("can't give strings"); + } + + @Override + public String getText(Interval interval) { + throw new UnsupportedOperationException("can't give strings"); + } + + @Override + public String getText(RuleContext ctx) { + throw new UnsupportedOperationException("can't give strings"); + } + + @Override + public String getText(Token start, Token stop) { + throw new UnsupportedOperationException("can't give strings"); + } + } + + /** + * Sort a list + */ + public > List sort(List data) { + List dup = new ArrayList(); + dup.addAll(data); + Collections.sort(dup); + return dup; + } + + /** + * Return map sorted by key + */ + public , V> LinkedHashMap sort(Map data) { + LinkedHashMap dup = new LinkedHashMap(); + List keys = new ArrayList(); + keys.addAll(data.keySet()); + Collections.sort(keys); + for (K k : keys) { + dup.put(k, data.get(k)); + } + return dup; + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestCompositeLexers.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestCompositeLexers.java new file mode 100644 index 000000000..60aa4a35a --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestCompositeLexers.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.CompositeLexersDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestCompositeLexers extends BaseRuntimeTest { + public TestCompositeLexers(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(CompositeLexersDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestCompositeParsers.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestCompositeParsers.java new file mode 100644 index 000000000..638413f9a --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestCompositeParsers.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.CompositeParsersDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestCompositeParsers extends BaseRuntimeTest { + public TestCompositeParsers(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(CompositeParsersDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestFullContextParsing.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestFullContextParsing.java new file mode 100644 index 000000000..a0d7f9c1a --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestFullContextParsing.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.FullContextParsingDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestFullContextParsing extends BaseRuntimeTest { + public TestFullContextParsing(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(FullContextParsingDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestLeftRecursion.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestLeftRecursion.java new file mode 100644 index 000000000..e92f1b306 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestLeftRecursion.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.LeftRecursionDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestLeftRecursion extends BaseRuntimeTest { + public TestLeftRecursion(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(LeftRecursionDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestLexerErrors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestLexerErrors.java new file mode 100644 index 000000000..b95cd59bf --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestLexerErrors.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.LexerErrorsDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestLexerErrors extends BaseRuntimeTest { + public TestLexerErrors(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(LexerErrorsDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestLexerExec.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestLexerExec.java new file mode 100644 index 000000000..1ed1d84ca --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestLexerExec.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.LexerExecDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestLexerExec extends BaseRuntimeTest { + public TestLexerExec(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(LexerExecDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestListeners.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestListeners.java new file mode 100644 index 000000000..e15dee9ae --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestListeners.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.ListenersDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestListeners extends BaseRuntimeTest { + public TestListeners(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(ListenersDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestParseTrees.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestParseTrees.java new file mode 100644 index 000000000..0115e384d --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestParseTrees.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.ParseTreesDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestParseTrees extends BaseRuntimeTest { + public TestParseTrees(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(ParseTreesDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestParserErrors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestParserErrors.java new file mode 100644 index 000000000..87b850fb9 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestParserErrors.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.ParserErrorsDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestParserErrors extends BaseRuntimeTest { + public TestParserErrors(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(ParserErrorsDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestParserExec.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestParserExec.java new file mode 100644 index 000000000..c22aa8ceb --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestParserExec.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.ParserExecDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestParserExec extends BaseRuntimeTest { + public TestParserExec(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(ParserExecDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestPerformance.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestPerformance.java new file mode 100644 index 000000000..78e6942ac --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestPerformance.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.PerformanceDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestPerformance extends BaseRuntimeTest { + public TestPerformance(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(PerformanceDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestSemPredEvalLexer.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestSemPredEvalLexer.java new file mode 100644 index 000000000..8825042cf --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestSemPredEvalLexer.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.SemPredEvalLexerDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestSemPredEvalLexer extends BaseRuntimeTest { + public TestSemPredEvalLexer(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(SemPredEvalLexerDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestSemPredEvalParser.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestSemPredEvalParser.java new file mode 100644 index 000000000..87d6a9dea --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestSemPredEvalParser.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.SemPredEvalParserDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestSemPredEvalParser extends BaseRuntimeTest { + public TestSemPredEvalParser(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(SemPredEvalParserDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestSets.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestSets.java new file mode 100644 index 000000000..4fe603fd4 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestSets.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.SetsDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestSets extends BaseRuntimeTest { + public TestSets(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(SetsDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java index 2c2702fe5..01e0bd4a1 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java @@ -113,7 +113,7 @@ public class PerformanceDescriptors { @Override public boolean ignore(String targetName) { - return !Arrays.asList("Java", "CSharp", "Python2", "Python3", "Node", "Cpp", "Swift").contains(targetName); + return !Arrays.asList("Java", "CSharp", "Python2", "Python3", "Node", "Cpp", "Swift", "Dart").contains(targetName); } } diff --git a/runtime/Dart/.gitignore b/runtime/Dart/.gitignore new file mode 100644 index 000000000..a45b12ec7 --- /dev/null +++ b/runtime/Dart/.gitignore @@ -0,0 +1,23 @@ +!lib + +# See https://www.dartlang.org/guides/libraries/private-files + +# Files and directories created by pub +.dart_tool/ +.packages +build/ +# If you're building an application, you may want to check-in your pubspec.lock +pubspec.lock + +# Directory created by dartdoc +# If you don't generate documentation locally you can remove this line. +doc/api/ + +# Avoid committing generated Javascript files: +*.dart.js +*.info.json # Produced by the --dump-info flag. +*.js # When generated by dart2js. Don't specify *.js if your + # project includes source files written in JavaScript. +*.js_ +*.js.deps +*.js.map \ No newline at end of file diff --git a/runtime/Dart/LICENSE.txt b/runtime/Dart/LICENSE.txt new file mode 100644 index 000000000..2042d1bda --- /dev/null +++ b/runtime/Dart/LICENSE.txt @@ -0,0 +1,52 @@ +[The "BSD 3-clause license"] +Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +===== + +MIT License for codepointat.js from https://git.io/codepointat +MIT License for fromcodepoint.js from https://git.io/vDW1m + +Copyright Mathias Bynens + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/runtime/Dart/README.md b/runtime/Dart/README.md new file mode 100644 index 000000000..b386f0ba1 --- /dev/null +++ b/runtime/Dart/README.md @@ -0,0 +1,11 @@ +# Dart target for ANTLR 4 + +Dart runtime libraries for ANTLR 4 + +This runtime is available through [pub](https://pub.dev). The package name is 'antlr4'. + +See www.antlr.org for more information on ANTLR. + +See https://github.com/antlr/antlr4/blob/master/doc/Dart-target.md for more information on using ANTLR in Dart. + + diff --git a/runtime/Dart/lib/antlr4.dart b/runtime/Dart/lib/antlr4.dart new file mode 100644 index 000000000..0a4b4f60e --- /dev/null +++ b/runtime/Dart/lib/antlr4.dart @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +library antlr4; + +export 'src/atn/atn.dart'; +export 'src/dfa/dfa.dart'; +export 'src/tree/tree.dart'; +export 'src/error/error.dart'; +export 'src/rule_context.dart'; +export 'src/input_stream.dart'; +export 'src/token_stream.dart'; +export 'src/lexer.dart'; +export 'src/parser.dart'; +export 'src/parser_rule_context.dart'; +export 'src/vocabulary.dart'; +export 'src/runtime_meta_data.dart'; +export 'src/token.dart'; diff --git a/runtime/Dart/lib/src/atn/atn.dart b/runtime/Dart/lib/src/atn/atn.dart new file mode 100644 index 000000000..a0400f41d --- /dev/null +++ b/runtime/Dart/lib/src/atn/atn.dart @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +export 'src/atn.dart'; +export 'src/atn_config.dart'; +export 'src/atn_config_set.dart'; +export 'src/atn_deserializer.dart'; +export 'src/atn_simulator.dart'; +export 'src/atn_state.dart'; +export 'src/info.dart'; +export 'src/lexer_action_executor.dart'; +export 'src/lexer_atn_simulator.dart'; +export 'src/parser_atn_simulator.dart'; +export 'src/profiling_atn_simulator.dart'; +export 'src/transition.dart'; diff --git a/runtime/Dart/lib/src/atn/src/atn.dart b/runtime/Dart/lib/src/atn/src/atn.dart new file mode 100644 index 000000000..fd881ce4f --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/atn.dart @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:collection'; + +import '../../interval_set.dart'; +import '../../ll1_analyzer.dart'; +import '../../rule_context.dart'; +import '../../token.dart'; +import 'atn_state.dart'; +import 'atn_type.dart'; +import 'lexer_action.dart'; +import 'transition.dart'; + +class ATN { + static final INVALID_ALT_NUMBER = 0; + + List states = []; + + /** Each subrule/rule is a decision point and we must track them so we + * can go back later and build DFA predictors for them. This includes + * all the rules, subrules, optional blocks, ()+, ()* etc... + */ + List decisionToState = []; + + /** + * Maps from rule index to starting state number. + */ + List ruleToStartState; + + /** + * Maps from rule index to stop state number. + */ + List ruleToStopState; + + Map modeNameToStartState = LinkedHashMap(); + + /** + * The type of the ATN. + */ + final ATNType grammarType; + + /** + * The maximum value for any symbol recognized by a transition in the ATN. + */ + final int maxTokenType; + + /** + * For lexer ATNs, this maps the rule index to the resulting token type. + * For parser ATNs, this maps the rule index to the generated bypass token + * type if the + * {@link ATNDeserializationOptions#isGenerateRuleBypassTransitions} + * deserialization option was specified; otherwise, this is null. + */ + List ruleToTokenType; + + /** + * For lexer ATNs, this is an array of [LexerAction] objects which may + * be referenced by action transitions in the ATN. + */ + List lexerActions; + + List modeToStartState = []; + + /** Used for runtime deserialization of ATNs from strings */ + ATN(this.grammarType, this.maxTokenType); + + /** + * TODO merge doc comment + * Compute the set of valid tokens that can occur starting in state [s]. + * If [ctx] is null, the set of tokens will not include what can follow + * the rule surrounding [s]. In other words, the set will be + * restricted to tokens reachable staying within [s]'s rule. + * + * Compute the set of valid tokens that can occur starting in [s] and + * staying in same rule. {@link Token#EPSILON} is in set if we reach end of + * rule. + */ + IntervalSet nextTokens(ATNState s, [RuleContext ctx]) { + if (ctx != null) { + return LL1Analyzer(this).LOOK(s, ctx); + } + if (s.nextTokenWithinRule != null) return s.nextTokenWithinRule; + s.nextTokenWithinRule = LL1Analyzer(this).LOOK(s, null); + s.nextTokenWithinRule.setReadonly(true); + return s.nextTokenWithinRule; + } + + void addState(ATNState state) { + if (state != null) { + state.atn = this; + state.stateNumber = states.length; + } + + states.add(state); + } + + void removeState(ATNState state) { + states[state.stateNumber] = + null; // just free mem, don't shift states in list + } + + int defineDecisionState(DecisionState s) { + decisionToState.add(s); + s.decision = decisionToState.length - 1; + return s.decision; + } + + DecisionState getDecisionState(int decision) { + if (!decisionToState.isEmpty) { + return decisionToState[decision]; + } + return null; + } + + int get numberOfDecisions { + return decisionToState.length; + } + + /** + * Computes the set of input symbols which could follow ATN state number + * [stateNumber] in the specified full [context]. This method + * considers the complete parser context, but does not evaluate semantic + * predicates (i.e. all predicates encountered during the calculation are + * assumed true). If a path in the ATN exists from the starting state to the + * [RuleStopState] of the outermost context without matching any + * symbols, {@link Token#EOF} is added to the returned set. + * + *

If [context] is null, it is treated as {@link ParserRuleContext#EMPTY}.

+ * + * Note that this does NOT give you the set of all tokens that could + * appear at a given token position in the input phrase. In other words, + * it does not answer: + * + * "Given a specific partial input phrase, return the set of all tokens + * that can follow the last token in the input phrase." + * + * The big difference is that with just the input, the parser could + * land right in the middle of a lookahead decision. Getting + * all *possible* tokens given a partial input stream is a separate + * computation. See https://github.com/antlr/antlr4/issues/1428 + * + * For this function, we are specifying an ATN state and call stack to compute + * what token(s) can come next and specifically: outside of a lookahead decision. + * That is what you want for error reporting and recovery upon parse error. + * + * @param stateNumber the ATN state number + * @param context the full parse context + * @return The set of potentially valid input symbols which could follow the + * specified state in the specified context. + * @throws IllegalArgumentException if the ATN does not contain a state with + * number [stateNumber] + */ + IntervalSet getExpectedTokens(int stateNumber, RuleContext context) { + if (stateNumber < 0 || stateNumber >= states.length) { + throw new RangeError.index(stateNumber, states, "stateNumber"); + } + + RuleContext ctx = context; + ATNState s = states[stateNumber]; + IntervalSet following = nextTokens(s); + if (!following.contains(Token.EPSILON)) { + return following; + } + + IntervalSet expected = new IntervalSet(); + expected.addAll(following); + expected.remove(Token.EPSILON); + while (ctx != null && + ctx.invokingState >= 0 && + following.contains(Token.EPSILON)) { + ATNState invokingState = states[ctx.invokingState]; + RuleTransition rt = invokingState.transition(0); + following = nextTokens(rt.followState); + expected.addAll(following); + expected.remove(Token.EPSILON); + ctx = ctx.parent; + } + + if (following.contains(Token.EPSILON)) { + expected.addOne(Token.EOF); + } + + return expected; + } +} diff --git a/runtime/Dart/lib/src/atn/src/atn_config.dart b/runtime/Dart/lib/src/atn/src/atn_config.dart new file mode 100644 index 000000000..70baa25c9 --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/atn_config.dart @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../prediction_context.dart'; +import '../../recognizer.dart'; +import '../../util/murmur_hash.dart'; +import 'atn_state.dart'; +import 'lexer_action_executor.dart'; +import 'semantic_context.dart'; + +checkParams(params, isCfg) { + if (params == null) { + Map result = { + "state": null, + "alt": null, + "context": null, + "semanticContext": null + }; + if (isCfg) { + result["reachesIntoOuterContext"] = 0; + } + return result; + } else { + Map props = {}; + props["state"] = params.state ?? null; + props["alt"] = (params.alt == null) ? null : params.alt; + props["context"] = params.context ?? null; + props["semanticContext"] = params.semanticContext ?? null; + if (isCfg) { + props["reachesIntoOuterContext"] = params.reachesIntoOuterContext ?? 0; + props["precedenceFilterSuppressed"] = + params.precedenceFilterSuppressed ?? false; + } + return props; + } +} + +/** A tuple: (ATN state, predicted alt, syntactic, semantic context). + * The syntactic context is a graph-structured stack node whose + * path(s) to the root is the rule invocation(s) + * chain used to arrive at the state. The semantic context is + * the tree of semantic predicates encountered before reaching + * an ATN state. + */ +class ATNConfig { + /** + * This field stores the bit mask for implementing the + * {@link #isPrecedenceFilterSuppressed} property as a bit within the + * existing {@link #reachesIntoOuterContext} field. + */ + static final int SUPPRESS_PRECEDENCE_FILTER = 0x40000000; + + /** The ATN state associated with this configuration */ + ATNState state; + + /** What alt (or lexer rule) is predicted by this configuration */ + int alt; + + /** The stack of invoking states leading to the rule/states associated + * with this config. We track only those contexts pushed during + * execution of the ATN simulator. + */ + PredictionContext context; + + /** + * We cannot execute predicates dependent upon local context unless + * we know for sure we are in the correct context. Because there is + * no way to do this efficiently, we simply cannot evaluate + * dependent predicates unless we are in the rule that initially + * invokes the ATN simulator. + * + *

+ * closure() tracks the depth of how far we dip into the outer context: + * depth > 0. Note that it may not be totally accurate depth since I + * don't ever decrement. TODO: make it a bool then

+ * + *

+ * For memory efficiency, the {@link #isPrecedenceFilterSuppressed} method + * is also backed by this field. Since the field is ly accessible, the + * highest bit which would not cause the value to become negative is used to + * store this field. This choice minimizes the risk that code which only + * compares this value to 0 would be affected by the new purpose of the + * flag. It also ensures the performance of the existing [ATNConfig] + * constructors as well as certain operations like + * {@link ATNConfigSet#add(ATNConfig, DoubleKeyMap)} method are + * completely unaffected by the change.

+ */ + int reachesIntoOuterContext = 0; + + SemanticContext semanticContext; + + ATNConfig(this.state, this.alt, this.context, + [this.semanticContext = SemanticContext.NONE]); + + ATNConfig.dup(ATNConfig c, + {this.state, this.alt, this.context, this.semanticContext}) { + this.state = state ?? c.state; + this.alt = alt ?? c.alt; + this.context = context ?? c.context; + this.semanticContext = semanticContext ?? c.semanticContext; + this.reachesIntoOuterContext = + c.reachesIntoOuterContext ?? reachesIntoOuterContext; + } + + /** + * This method gets the value of the {@link #reachesIntoOuterContext} field + * as it existed prior to the introduction of the + * {@link #isPrecedenceFilterSuppressed} method. + */ + int get outerContextDepth { + return reachesIntoOuterContext & ~SUPPRESS_PRECEDENCE_FILTER; + } + + bool isPrecedenceFilterSuppressed() { + return (reachesIntoOuterContext & SUPPRESS_PRECEDENCE_FILTER) != 0; + } + + void setPrecedenceFilterSuppressed(bool value) { + if (value) { + this.reachesIntoOuterContext |= 0x40000000; + } else { + this.reachesIntoOuterContext &= ~SUPPRESS_PRECEDENCE_FILTER; + } + } + + /** An ATN configuration is equal to another if both have + * the same state, they predict the same alternative, and + * syntactic/semantic contexts are the same. + */ + operator ==(Object other) { + if (other is ATNConfig && other != null) { + return this.state.stateNumber == other.state.stateNumber && + this.alt == other.alt && + (this.context == other.context || + (this.context != null && this.context == other.context)) && + this.semanticContext == other.semanticContext && + this.isPrecedenceFilterSuppressed() == + other.isPrecedenceFilterSuppressed(); + } + return false; + } + + get hashCode { + int hashCode = MurmurHash.initialize(7); + hashCode = MurmurHash.update(hashCode, state.stateNumber); + hashCode = MurmurHash.update(hashCode, alt); + hashCode = MurmurHash.update(hashCode, context); + hashCode = MurmurHash.update(hashCode, semanticContext); + hashCode = MurmurHash.finish(hashCode, 4); + return hashCode; + } + + String toString([Recognizer recog, bool showAlt = true]) { + StringBuffer buf = new StringBuffer(); + // if ( state.ruleIndex>=0 ) { + // if ( recog!=null ) buf.write(recog.ruleNames[state.ruleIndex]+":"); + // else buf.write(state.ruleIndex+":"); + // } + buf.write('('); + buf.write(state); + if (showAlt) { + buf.write(","); + buf.write(alt); + } + if (context != null) { + buf.write(",["); + buf.write(context.toString()); + buf.write("]"); + } + if (semanticContext != null && semanticContext != SemanticContext.NONE) { + buf.write(","); + buf.write(semanticContext); + } + if (outerContextDepth > 0) { + buf.write(",up="); + buf.write(outerContextDepth); + } + buf.write(')'); + return buf.toString(); + } +} + +class LexerATNConfig extends ATNConfig { + /** + * Gets the [LexerActionExecutor] capable of executing the embedded + * action(s) for the current configuration. + */ + LexerActionExecutor lexerActionExecutor; + + bool passedThroughNonGreedyDecision = false; + + LexerATNConfig(ATNState state, int alt, PredictionContext context, + [this.lexerActionExecutor]) + : super(state, alt, context, SemanticContext.NONE) { + this.passedThroughNonGreedyDecision = false; + } + + LexerATNConfig.dup(LexerATNConfig c, ATNState state, + {this.lexerActionExecutor, PredictionContext context}) + : super.dup(c, state: state, context: context) { + this.lexerActionExecutor = lexerActionExecutor ?? c.lexerActionExecutor; + this.passedThroughNonGreedyDecision = checkNonGreedyDecision(c, state); + } + + bool hasPassedThroughNonGreedyDecision() { + return passedThroughNonGreedyDecision; + } + + int get hashCode { + int hashCode = MurmurHash.initialize(7); + hashCode = MurmurHash.update(hashCode, state.stateNumber); + hashCode = MurmurHash.update(hashCode, alt); + hashCode = MurmurHash.update(hashCode, context); + hashCode = MurmurHash.update(hashCode, semanticContext); + hashCode = + MurmurHash.update(hashCode, passedThroughNonGreedyDecision ? 1 : 0); + hashCode = MurmurHash.update(hashCode, lexerActionExecutor); + hashCode = MurmurHash.finish(hashCode, 6); + return hashCode; + } + + bool operator ==(Object other) { + if (identical(this, other)) { + return true; + } else if (other is LexerATNConfig) { + LexerATNConfig lexerOther = other; + if (passedThroughNonGreedyDecision != + lexerOther.passedThroughNonGreedyDecision) { + return false; + } + + if (lexerActionExecutor != lexerOther.lexerActionExecutor) { + return false; + } + + return super == other; + } + return false; + } + + static bool checkNonGreedyDecision(LexerATNConfig source, ATNState target) { + return source.passedThroughNonGreedyDecision || + target is DecisionState && target.nonGreedy; + } +} diff --git a/runtime/Dart/lib/src/atn/src/atn_config_set.dart b/runtime/Dart/lib/src/atn/src/atn_config_set.dart new file mode 100644 index 000000000..58385543f --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/atn_config_set.dart @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:collection'; +import 'dart:math'; + +import 'package:collection/collection.dart'; + +import '../../misc/pair.dart'; +import '../../prediction_context.dart'; +import '../../util/bit_set.dart'; +import '../../util/utils.dart'; +import 'atn.dart'; +import 'atn_config.dart'; +import 'atn_state.dart'; +import 'semantic_context.dart'; + +class ATNConfigSet extends Iterable { + /** + * Indicates that the set of configurations is read-only. Do not + * allow any code to manipulate the set; DFA states will point at + * the sets and they must not change. This does not protect the other + * fields; in particular, conflictingAlts is set after + * we've made this readonly. + */ + bool _readOnly = false; + + bool get readOnly => _readOnly; + + set readOnly(bool readOnly) { + this._readOnly = readOnly; + if (readOnly) { + this.configLookup = null; // can't mod, no need for lookup cache + } + } + + /// The reason that we need this is because we don't want the hash map to use + /// the standard hash code and equals. We need all configurations with the same + /// {@code (s,i,_,semctx)} to be equal. Unfortunately, this key effectively doubles + /// the number of objects associated with ATNConfigs. The other solution is to + /// use a hash table that lets us specify the equals/hashcode operation. + /// + /// All configs but hashed by (s, i, _, pi) not including context. Wiped out + /// when we go readonly as this set becomes a DFA state. + Set configLookup = new HashSet(equals: (a, b) { + if (a == null || b == null) return false; + return a.state.stateNumber == b.state.stateNumber && + a.alt == b.alt && + a.semanticContext == b.semanticContext; + }, hashCode: (ATNConfig o) { + int hashCode = 7; + hashCode = 31 * hashCode + o.state.stateNumber; + hashCode = 31 * hashCode + o.alt; + hashCode = 31 * hashCode + o.semanticContext.hashCode; + return hashCode; + }); + + /** Track the elements as they are added to the set; supports get(i) */ + final List configs = []; + + // TODO: these fields make me pretty uncomfortable but nice to pack up info together, saves recomputation + // TODO: can we track conflicts as they are added to save scanning configs later? + int uniqueAlt = 0; + + /** + * Currently this is only used when we detect SLL conflict; this does + * not necessarily represent the ambiguous alternatives. In fact, + * I should also point out that this seems to include predicated alternatives + * that have predicates that evaluate to false. Computed in computeTargetState(). + */ + BitSet conflictingAlts; + + // Used in parser and lexer. In lexer, it indicates we hit a pred + // while computing a closure operation. Don't make a DFA state from this. + bool hasSemanticContext = false; + bool dipsIntoOuterContext = false; + + /** Indicates that this configuration set is part of a full context + * LL prediction. It will be used to determine how to merge $. With SLL + * it's a wildcard whereas it is not for LL context merge. + */ + bool fullCtx; + + int cachedHashCode = -1; + + ATNConfigSet([this.fullCtx = true]); + + ATNConfigSet.dup(ATNConfigSet old) { + this.fullCtx = old.fullCtx; + addAll(old); + this.uniqueAlt = old.uniqueAlt; + this.conflictingAlts = old.conflictingAlts; + this.hasSemanticContext = old.hasSemanticContext; + this.dipsIntoOuterContext = old.dipsIntoOuterContext; + } + + /** + * Adding a new config means merging contexts with existing configs for + * {@code (s, i, pi, _)}, where [s] is the + * {@link ATNConfig#state}, [i] is the {@link ATNConfig#alt}, and + * [pi] is the {@link ATNConfig#semanticContext}. We use + * {@code (s,i,pi)} as key. + * + *

This method updates {@link #dipsIntoOuterContext} and + * {@link #hasSemanticContext} when necessary.

+ */ + bool add(ATNConfig config, + [Map, PredictionContext> + mergeCache = null]) { + if (readOnly) throw new StateError("This set is readonly"); + if (config.semanticContext != SemanticContext.NONE) { + hasSemanticContext = true; + } + if (config.outerContextDepth > 0) { + dipsIntoOuterContext = true; + } + final existing = configLookup.lookup(config) ?? config; + if (identical(existing, config)) { + // we added this new one + cachedHashCode = -1; + configLookup.add(config); + configs.add(config); // track order here + return true; + } + // a previous (s,i,pi,_), merge with it and save result + bool rootIsWildcard = !fullCtx; + PredictionContext merged = PredictionContext.merge( + existing.context, config.context, rootIsWildcard, mergeCache); + // no need to check for existing.context, config.context in cache + // since only way to create new graphs is "call rule" and here. We + // cache at both places. + existing.reachesIntoOuterContext = + max(existing.reachesIntoOuterContext, config.reachesIntoOuterContext); + + // make sure to preserve the precedence filter suppression during the merge + if (config.isPrecedenceFilterSuppressed()) { + existing.setPrecedenceFilterSuppressed(true); + } + + existing.context = merged; // replace context; no need to alt mapping + return true; + } + + /** Return a List holding list of configs */ + List get elements { + return configs; + } + + Set get states { + var states = new Set(); + for (var i = 0; i < this.configs.length; i++) { + states.add(this.configs[i].state); + } + return states; + } + + /** + * Gets the complete set of represented alternatives for the configuration + * set. + * + * @return the set of represented alternatives in this configuration set + * + * @since 4.3 + */ + BitSet get alts { + BitSet alts = new BitSet(); + for (ATNConfig config in configs) { + alts.set(config.alt); + } + return alts; + } + + List get predicates { + List preds = []; + for (ATNConfig c in configs) { + if (c.semanticContext != SemanticContext.NONE) { + preds.add(c.semanticContext); + } + } + return preds; + } + + ATNConfig get(int i) { + return configs[i]; + } + + optimizeConfigs(interpreter) { + if (this.readOnly) throw StateError("This set is readonly"); + + if (this.configLookup.isEmpty) return; + + for (ATNConfig config in configs) { +// int before = PredictionContext.getAllContextNodes(config.context).length; + config.context = interpreter.getCachedContext(config.context); +// int after = PredictionContext.getAllContextNodes(config.context).length; +// System.out.println("configs "+before+"->"+after); + } + } + + addAll(coll) { + for (ATNConfig c in coll) add(c); + return false; + } + + bool operator ==(other) { + return identical(this, other) || + (other is ATNConfigSet && + other != null && + ListEquality().equals(this.configs, other.configs) && + this.fullCtx == other.fullCtx && + this.uniqueAlt == other.uniqueAlt && + this.conflictingAlts == other.conflictingAlts && + this.hasSemanticContext == other.hasSemanticContext && + this.dipsIntoOuterContext == other.dipsIntoOuterContext); + } + + int get hashCode { + if (readOnly) { + if (cachedHashCode == -1) { + cachedHashCode = ListEquality().hash(configs); + } + + return cachedHashCode; + } + + return ListEquality().hash(configs); + } + + int get length { + return configs.length; + } + + bool get isEmpty => configs.isEmpty; + + updateHashCode(hash) { + if (this.readOnly) { + if (this.cachedHashCode == -1) { + this.cachedHashCode = this.hashCode; + } + hash.update(this.cachedHashCode); + } else { + hash.update(this.hashCode); + } + } + + bool contains(Object o) { + if (configLookup == null) { + throw new UnsupportedError( + "This method is not implemented for readonly sets."); + } + + return configLookup.contains(o); + } + + Iterator get iterator => configs.iterator; + + clear() { + if (readOnly) throw StateError("This set is readonly"); + configs.clear(); + cachedHashCode = -1; + configLookup.clear(); + } + + String toString() { + final buf = new StringBuffer(); + buf.write(arrayToString(elements)); + if (hasSemanticContext) + buf.write(",hasSemanticContext=$hasSemanticContext"); + if (uniqueAlt != ATN.INVALID_ALT_NUMBER) buf.write(",uniqueAlt=$uniqueAlt"); + if (conflictingAlts != null) buf.write(",conflictingAlts=$conflictingAlts"); + if (dipsIntoOuterContext) buf.write(",dipsIntoOuterContext"); + return buf.toString(); + } +} + +class OrderedATNConfigSet extends ATNConfigSet { + final configLookup = Set(); +} diff --git a/runtime/Dart/lib/src/atn/src/atn_deserializer.dart b/runtime/Dart/lib/src/atn/src/atn_deserializer.dart new file mode 100644 index 000000000..9d48780d1 --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/atn_deserializer.dart @@ -0,0 +1,823 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../interval_set.dart'; +import '../../misc/pair.dart'; +import '../../token.dart'; +import 'atn.dart'; +import 'atn_state.dart'; +import 'atn_type.dart'; +import 'lexer_action.dart'; +import 'transition.dart'; + +class ATNDeserializationOptions { + static final ATNDeserializationOptions defaultOptions = + ATNDeserializationOptions()..makeReadOnly(); + + bool readOnly; + bool verifyATN; + bool generateRuleBypassTransitions; + + ATNDeserializationOptions([ATNDeserializationOptions options]) { + if (options == null) { + this.verifyATN = true; + this.generateRuleBypassTransitions = false; + } else { + this.verifyATN = options.verifyATN; + this.generateRuleBypassTransitions = + options.generateRuleBypassTransitions; + } + } + + bool isReadOnly() { + return readOnly; + } + + void makeReadOnly() { + readOnly = true; + } + + bool isVerifyATN() { + return verifyATN; + } + + void setVerifyATN(bool verifyATN) { + throwIfReadOnly(); + this.verifyATN = verifyATN; + } + + bool isGenerateRuleBypassTransitions() { + return generateRuleBypassTransitions; + } + + void setGenerateRuleBypassTransitions(bool generateRuleBypassTransitions) { + throwIfReadOnly(); + this.generateRuleBypassTransitions = generateRuleBypassTransitions; + } + + void throwIfReadOnly() { + if (isReadOnly()) { + throw new StateError("The object is read only."); + } + } +} + +class ATNDeserializer { + /** This value should never change. Updates following this version are + * reflected as change in the unique ID SERIALIZED_UUID. + */ + static final SERIALIZED_VERSION = 3; + + /** WARNING: DO NOT MERGE THESE LINES. If UUIDs differ during a merge, + * resolve the conflict by generating a new ID! + */ + /** + * This is the earliest supported serialized UUID. + */ + static final BASE_SERIALIZED_UUID = "33761B2D-78BB-4A43-8B0B-4F5BEE8AACF3"; + + /** + * This UUID indicates an extension of {@link BASE_SERIALIZED_UUID} for the + * addition of precedence predicates. + */ + static final ADDED_PRECEDENCE_TRANSITIONS = + "1DA0C57D-6C06-438A-9B27-10BCB3CE0F61"; + + /** + * This UUID indicates an extension of {@link #ADDED_PRECEDENCE_TRANSITIONS} + * for the addition of lexer actions encoded as a sequence of + * [LexerAction] instances. + */ + static final ADDED_LEXER_ACTIONS = "AADB8D7E-AEEF-4415-AD2B-8204D6CF042E"; + + /** + * This UUID indicates the serialized ATN contains two sets of + * IntervalSets, where the second set's values are encoded as + * 32-bit integers to support the full Unicode SMP range up to U+10FFFF. + */ + static final ADDED_UNICODE_SMP = "59627784-3BE5-417A-B9EB-8131A7286089"; + + /** + * This list contains all of the currently supported UUIDs, ordered by when + * the feature first appeared in this branch. + */ + static final SUPPORTED_UUIDS = [ + BASE_SERIALIZED_UUID, + ADDED_PRECEDENCE_TRANSITIONS, + ADDED_LEXER_ACTIONS, + ADDED_UNICODE_SMP + ]; + + /** + * This is the current serialized UUID. + */ + static final SERIALIZED_UUID = ADDED_UNICODE_SMP; + + ATNDeserializationOptions deserializationOptions; + List data; + var pos; + String uuid; + + ATNDeserializer([options = null]) { + this.deserializationOptions = + options ?? ATNDeserializationOptions.defaultOptions; + } + +// Determines if a particular serialized representation of an ATN supports +// a particular feature, identified by the [UUID] used for serializing +// the ATN at the time the feature was first introduced. +// +// @param feature The [UUID] marking the first time the feature was +// supported in the serialized ATN. +// @param actualUuid The [UUID] of the actual serialized ATN which is +// currently being deserialized. +// @return [true] if the [actualUuid] value represents a +// serialized ATN at or after the feature identified by [feature] was +// introduced; otherwise, [false]. + + isFeatureSupported(feature, actualUuid) { + var idx1 = SUPPORTED_UUIDS.indexOf(feature); + if (idx1 < 0) { + return false; + } + var idx2 = SUPPORTED_UUIDS.indexOf(actualUuid); + return idx2 >= idx1; + } + + deserialize(List data) { + this.reset(data); + this.checkVersion(); + this.checkUUID(); + final ATN atn = this.readATN(); + this.readStates(atn); + this.readRules(atn); + this.readModes(atn); + var sets = List(); + // First, deserialize sets with 16-bit arguments <= U+FFFF. + this.readSets(atn, sets, () => this.readInt()); + // Next, if the ATN was serialized with the Unicode SMP feature, + // deserialize sets with 32-bit arguments <= U+10FFFF. + if (this.isFeatureSupported(ADDED_UNICODE_SMP, this.uuid)) { + this.readSets(atn, sets, () => this.readInt32()); + } + this.readEdges(atn, sets); + this.readDecisions(atn); + this.readLexerActions(atn); + this.markPrecedenceDecisions(atn); + this.verifyATN(atn); + if (this.deserializationOptions.generateRuleBypassTransitions && + atn.grammarType == ATNType.PARSER) { + this.generateRuleBypassTransitions(atn); + // re-verify after modification + this.verifyATN(atn); + } + return atn; + } + + /// Each char value in data is shifted by +2 at the entry to this method. + /// This is an encoding optimization targeting the serialized values 0 + /// and -1 (serialized to 0xFFFF), each of which are very common in the + /// serialized form of the ATN. In the modified UTF-8 that Java uses for + /// compiled string literals, these two character values have multi-byte + /// forms. By shifting each value by +2, they become characters 2 and 1 + /// prior to writing the string, each of which have single-byte + /// representations. Since the shift occurs in the tool during ATN + /// serialization, each target is responsible for adjusting the values + /// during deserialization. + /// + /// As a special case, note that the first element of data is not + /// adjusted because it contains the major version number of the + /// serialized ATN, which was fixed at 3 at the time the value shifting + /// was implemented. + reset(List data) { + var adjust = (int c) { + var v = c; + return v > 1 ? v - 2 : v + 65534; + }; + final temp = data.map(adjust).toList(); + // don't adjust the first value since that's the version number + temp[0] = data[0]; + this.data = temp; + this.pos = 0; + } + + checkVersion() { + var version = this.readInt(); + if (version != SERIALIZED_VERSION) { + throw ("Could not deserialize ATN with version $version (expected $SERIALIZED_VERSION)."); + } + } + + checkUUID() { + var uuid = this.readUUID(); + if (SUPPORTED_UUIDS.indexOf(uuid) < 0) { + throw ("Could not deserialize ATN with UUID: $uuid (expected $SERIALIZED_UUID or a legacy UUID)."); + } + this.uuid = uuid; + } + + ATN readATN() { + var grammarType = this.readInt(); + var maxTokenType = this.readInt(); + return new ATN(ATNType.values[grammarType], maxTokenType); + } + + readStates(ATN atn) { + List> loopBackStateNumbers = []; + List> endStateNumbers = []; + int nstates = this.readInt(); + for (int i = 0; i < nstates; i++) { + StateType stype = StateType.values[readInt()]; + // ignore bad type of states + if (stype == StateType.INVALID_TYPE) { + atn.addState(null); + continue; + } + + int ruleIndex = readInt(); + if (ruleIndex == 0xFFFF) { + ruleIndex = -1; + } + + ATNState s = stateFactory(stype, ruleIndex); + if (s is LoopEndState) { + // special case + int loopBackStateNumber = readInt(); + loopBackStateNumbers.add(Pair(s, loopBackStateNumber)); + } else if (s is BlockStartState) { + int endStateNumber = readInt(); + endStateNumbers.add(new Pair(s, endStateNumber)); + } + atn.addState(s); + } + + // delay the assignment of loop back and end states until we know all the state instances have been initialized + for (final pair in loopBackStateNumbers) { + pair.a.loopBackState = atn.states[pair.b]; + } + + for (final pair in endStateNumbers) { + pair.a.endState = atn.states[pair.b] as BlockEndState; + } + + int numNonGreedyStates = readInt(); + for (int i = 0; i < numNonGreedyStates; i++) { + int stateNumber = readInt(); + (atn.states[stateNumber] as DecisionState).nonGreedy = true; + } + if (this.isFeatureSupported(ADDED_PRECEDENCE_TRANSITIONS, this.uuid)) { + int numPrecedenceStates = readInt(); + for (int i = 0; i < numPrecedenceStates; i++) { + int stateNumber = readInt(); + (atn.states[stateNumber] as RuleStartState).isLeftRecursiveRule = true; + } + } + } + + readRules(ATN atn) { + int nrules = readInt(); + if (atn.grammarType == ATNType.LEXER) { + atn.ruleToTokenType = new List(nrules); + } + + atn.ruleToStartState = new List(nrules); + for (int i = 0; i < nrules; i++) { + int s = readInt(); + RuleStartState startState = atn.states[s]; + atn.ruleToStartState[i] = startState; + if (atn.grammarType == ATNType.LEXER) { + int tokenType = readInt(); + if (tokenType == 0xFFFF) { + tokenType = Token.EOF; + } + + atn.ruleToTokenType[i] = tokenType; + + if (!isFeatureSupported(ADDED_LEXER_ACTIONS, uuid)) { + // this piece of unused metadata was serialized prior to the + // addition of LexerAction + int actionIndexIgnored = readInt(); + } + } + } + + atn.ruleToStopState = new List(nrules); + for (ATNState state in atn.states) { + if (!(state is RuleStopState)) { + continue; + } + + RuleStopState stopState = state; + atn.ruleToStopState[state.ruleIndex] = stopState; + atn.ruleToStartState[state.ruleIndex].stopState = stopState; + } + } + + readModes(ATN atn) { + int nmodes = readInt(); + for (int i = 0; i < nmodes; i++) { + int s = readInt(); + atn.modeToStartState.add(atn.states[s] as TokensStartState); + } + } + + readSets(ATN atn, List sets, readUnicode) { + int nsets = readInt(); + for (int i = 0; i < nsets; i++) { + int nintervals = readInt(); + IntervalSet set = new IntervalSet(); + sets.add(set); + + bool containsEof = readInt() != 0; + if (containsEof) { + set.addOne(-1); + } + + for (int j = 0; j < nintervals; j++) { + int a = readUnicode(); + int b = readUnicode(); + set.addRange(a, b); + } + } + } + + readEdges(ATN atn, sets) { + int nedges = readInt(); + for (int i = 0; i < nedges; i++) { + int src = readInt(); + int trg = readInt(); + TransitionType ttype = TransitionType.values[readInt()]; + int arg1 = readInt(); + int arg2 = readInt(); + int arg3 = readInt(); + Transition trans = + edgeFactory(atn, ttype, src, trg, arg1, arg2, arg3, sets); +// System.out.println("EDGE "+trans.getClass().getSimpleName()+" "+ +// src+"->"+trg+ +// " "+Transition.serializationNames[ttype]+ +// " "+arg1+","+arg2+","+arg3); + ATNState srcState = atn.states[src]; + srcState.addTransition(trans); + } + + // edges for rule stop states can be derived, so they aren't serialized + for (ATNState state in atn.states) { + for (int i = 0; i < state.numberOfTransitions; i++) { + Transition t = state.transition(i); + if (t is RuleTransition) { + final ruleTransition = t; + int outermostPrecedenceReturn = -1; + if (atn.ruleToStartState[ruleTransition.target.ruleIndex] + .isLeftRecursiveRule) { + if (ruleTransition.precedence == 0) { + outermostPrecedenceReturn = ruleTransition.target.ruleIndex; + } + } + + EpsilonTransition returnTransition = new EpsilonTransition( + ruleTransition.followState, outermostPrecedenceReturn); + atn.ruleToStopState[ruleTransition.target.ruleIndex] + .addTransition(returnTransition); + } + } + } + + for (ATNState state in atn.states) { + if (state is BlockStartState) { + // we need to know the end state to set its start state + if (state.endState == null) { + throw new StateError(""); + } + + // block end states can only be associated to a single block start state + if (state.endState.startState != null) { + throw new StateError(""); + } + + state.endState.startState = state; + } + + if (state is PlusLoopbackState) { + PlusLoopbackState loopbackState = state; + for (int i = 0; i < loopbackState.numberOfTransitions; i++) { + ATNState target = loopbackState.transition(i).target; + if (target is PlusBlockStartState) { + target.loopBackState = loopbackState; + } + } + } else if (state is StarLoopbackState) { + StarLoopbackState loopbackState = state; + for (int i = 0; i < loopbackState.numberOfTransitions; i++) { + ATNState target = loopbackState.transition(i).target; + if (target is StarLoopEntryState) { + target.loopBackState = loopbackState; + } + } + } + } + } + + readDecisions(ATN atn) { + int ndecisions = this.readInt(); + for (int i = 1; i <= ndecisions; i++) { + int s = this.readInt(); + DecisionState decState = atn.states[s]; + atn.decisionToState.add(decState); + decState.decision = i - 1; + } + } + + readLexerActions(ATN atn) { + if (atn.grammarType == ATNType.LEXER) { + if (isFeatureSupported(ADDED_LEXER_ACTIONS, this.uuid)) { + atn.lexerActions = new List(readInt()); + for (int i = 0; i < atn.lexerActions.length; i++) { + LexerActionType actionType = LexerActionType.values[readInt()]; + int data1 = readInt(); + if (data1 == 0xFFFF) { + data1 = -1; + } + + int data2 = readInt(); + if (data2 == 0xFFFF) { + data2 = -1; + } + LexerAction lexerAction = + lexerActionFactory(actionType, data1, data2); + + atn.lexerActions[i] = lexerAction; + } + } else { + // for compatibility with older serialized ATNs, convert the old + // serialized action index for action transitions to the new + // form, which is the index of a LexerCustomAction + List legacyLexerActions = []; + for (ATNState state in atn.states) { + for (int i = 0; i < state.numberOfTransitions; i++) { + Transition transition = state.transition(i); + if (transition is ActionTransition) { + int ruleIndex = transition.ruleIndex; + int actionIndex = transition.actionIndex; + LexerCustomAction lexerAction = + new LexerCustomAction(ruleIndex, actionIndex); + state.setTransition( + i, + new ActionTransition(transition.target, ruleIndex, + legacyLexerActions.length, false)); + legacyLexerActions.add(lexerAction); + } + } + } + + atn.lexerActions = legacyLexerActions; + } + } + } + + generateRuleBypassTransitions(ATN atn) { + for (int i = 0; i < atn.ruleToStartState.length; i++) { + atn.ruleToTokenType[i] = atn.maxTokenType + i + 1; + } + for (int i = 0; i < atn.ruleToStartState.length; i++) { + this.generateRuleBypassTransition(atn, i); + } + } + + generateRuleBypassTransition(ATN atn, int idx) { + BasicBlockStartState bypassStart = new BasicBlockStartState(); + bypassStart.ruleIndex = idx; + atn.addState(bypassStart); + + BlockEndState bypassStop = new BlockEndState(); + bypassStop.ruleIndex = idx; + atn.addState(bypassStop); + + bypassStart.endState = bypassStop; + atn.defineDecisionState(bypassStart); + + bypassStop.startState = bypassStart; + + ATNState endState; + Transition excludeTransition = null; + if (atn.ruleToStartState[idx].isLeftRecursiveRule) { + // wrap from the beginning of the rule to the StarLoopEntryState + endState = null; + for (ATNState state in atn.states) { + if (state.ruleIndex != idx) { + continue; + } + + if (!(state is StarLoopEntryState)) { + continue; + } + + ATNState maybeLoopEndState = + state.transition(state.numberOfTransitions - 1).target; + if (!(maybeLoopEndState is LoopEndState)) { + continue; + } + + if (maybeLoopEndState.epsilonOnlyTransitions && + maybeLoopEndState.transition(0).target is RuleStopState) { + endState = state; + break; + } + } + + if (endState == null) { + throw new UnsupportedError( + "Couldn't identify final state of the precedence rule prefix section."); + } + + excludeTransition = + (endState as StarLoopEntryState).loopBackState.transition(0); + } else { + endState = atn.ruleToStopState[idx]; + } + + // all non-excluded transitions that currently target end state need to target blockEnd instead + for (ATNState state in atn.states) { + for (Transition transition in state.transitions) { + if (transition == excludeTransition) { + continue; + } + + if (transition.target == endState) { + transition.target = bypassStop; + } + } + } + + // all transitions leaving the rule start state need to leave blockStart instead + while (atn.ruleToStartState[idx].numberOfTransitions > 0) { + Transition transition = atn.ruleToStartState[idx].removeTransition( + atn.ruleToStartState[idx].numberOfTransitions - 1); + bypassStart.addTransition(transition); + } + + // link the new states + atn.ruleToStartState[idx].addTransition(new EpsilonTransition(bypassStart)); + bypassStop.addTransition(new EpsilonTransition(endState)); + + ATNState matchState = new BasicState(); + atn.addState(matchState); + matchState.addTransition( + new AtomTransition(bypassStop, atn.ruleToTokenType[idx])); + bypassStart.addTransition(new EpsilonTransition(matchState)); + } + + /** + * Analyze the [StarLoopEntryState] states in the specified ATN to set + * the {@link StarLoopEntryState#isPrecedenceDecision} field to the + * correct value. + * + * @param atn The ATN. + */ + markPrecedenceDecisions(ATN atn) { + for (ATNState state in atn.states) { + if (state is StarLoopEntryState) { + /* We analyze the ATN to determine if this ATN decision state is the + * decision for the closure block that determines whether a + * precedence rule should continue or complete. + */ + if (atn.ruleToStartState[state.ruleIndex].isLeftRecursiveRule) { + ATNState maybeLoopEndState = + state.transition(state.numberOfTransitions - 1).target; + if (maybeLoopEndState is LoopEndState) { + if (maybeLoopEndState.epsilonOnlyTransitions && + maybeLoopEndState.transition(0).target is RuleStopState) { + state.isPrecedenceDecision = true; + } + } + } + } + } + } + + void verifyATN(ATN atn) { + // verify assumptions + for (ATNState state in atn.states) { + if (state == null) { + continue; + } + + checkCondition(state.onlyHasEpsilonTransitions() || + state.numberOfTransitions <= 1); + + if (state is PlusBlockStartState) { + checkCondition(state.loopBackState != null); + } + + if (state is StarLoopEntryState) { + StarLoopEntryState starLoopEntryState = state; + checkCondition(starLoopEntryState.loopBackState != null); + checkCondition(starLoopEntryState.numberOfTransitions == 2); + + if (starLoopEntryState.transition(0).target is StarBlockStartState) { + checkCondition( + starLoopEntryState.transition(1).target is LoopEndState); + checkCondition(!starLoopEntryState.nonGreedy); + } else if (starLoopEntryState.transition(0).target is LoopEndState) { + checkCondition( + starLoopEntryState.transition(1).target is StarBlockStartState); + checkCondition(starLoopEntryState.nonGreedy); + } else { + throw new StateError(""); + } + } + + if (state is StarLoopbackState) { + checkCondition(state.numberOfTransitions == 1); + checkCondition(state.transition(0).target is StarLoopEntryState); + } + + if (state is LoopEndState) { + checkCondition(state.loopBackState != null); + } + + if (state is RuleStartState) { + checkCondition(state.stopState != null); + } + + if (state is BlockStartState) { + checkCondition(state.endState != null); + } + + if (state is BlockEndState) { + checkCondition(state.startState != null); + } + + if (state is DecisionState) { + DecisionState decisionState = state; + checkCondition(decisionState.numberOfTransitions <= 1 || + decisionState.decision >= 0); + } else { + checkCondition( + state.numberOfTransitions <= 1 || state is RuleStopState); + } + } + } + + void checkCondition(bool condition, [String message = ""]) { + if (!condition) { + throw new StateError(message); + } + } + + int readInt() { + return this.data[this.pos++]; + } + + readInt32() { + var low = this.readInt(); + var high = this.readInt(); + return low | (high << 16); + } + + readLong() { + var low = this.readInt32(); + var high = this.readInt32(); + return (low & 0x00000000FFFFFFFF) | (high << 32); + } + + static final byteToHex = List.generate(256, (i) => i.toRadixString(16).padLeft(2, '0').toUpperCase()); + + readUUID() { + final bb = List(16); + for (var i = 7; i >= 0; i--) { + var int = this.readInt(); + /* jshint bitwise: false */ + bb[(2 * i) + 1] = int & 0xFF; + bb[2 * i] = (int >> 8) & 0xFF; + } + return byteToHex[bb[0]] + byteToHex[bb[1]] + + byteToHex[bb[2]] + byteToHex[bb[3]] + '-' + + byteToHex[bb[4]] + byteToHex[bb[5]] + '-' + + byteToHex[bb[6]] + byteToHex[bb[7]] + '-' + + byteToHex[bb[8]] + byteToHex[bb[9]] + '-' + + byteToHex[bb[10]] + byteToHex[bb[11]] + + byteToHex[bb[12]] + byteToHex[bb[13]] + + byteToHex[bb[14]] + byteToHex[bb[15]];; + } + + Transition edgeFactory(ATN atn, TransitionType type, int src, int trg, + int arg1, int arg2, int arg3, List sets) { + ATNState target = atn.states[trg]; + switch (type) { + case TransitionType.EPSILON: + return EpsilonTransition(target); + case TransitionType.RANGE: + return arg3 != 0 + ? RangeTransition(target, Token.EOF, arg2) + : RangeTransition(target, arg1, arg2); + case TransitionType.RULE: + RuleTransition rt = + new RuleTransition(atn.states[arg1], arg2, arg3, target); + return rt; + case TransitionType.PREDICATE: + PredicateTransition pt = + new PredicateTransition(target, arg1, arg2, arg3 != 0); + return pt; + case TransitionType.PRECEDENCE: + return new PrecedencePredicateTransition(target, arg1); + case TransitionType.ATOM: + return arg3 != 0 + ? AtomTransition(target, Token.EOF) + : new AtomTransition(target, arg1); + case TransitionType.ACTION: + ActionTransition a = + new ActionTransition(target, arg1, arg2, arg3 != 0); + return a; + case TransitionType.SET: + return new SetTransition(target, sets[arg1]); + case TransitionType.NOT_SET: + return new NotSetTransition(target, sets[arg1]); + case TransitionType.WILDCARD: + return new WildcardTransition(target); + case TransitionType.INVALID: + throw ArgumentError.value(type, "transition type", "not valid."); + } + } + + ATNState stateFactory(StateType type, int ruleIndex) { + ATNState s; + switch (type) { + case StateType.INVALID_TYPE: + return null; + case StateType.BASIC: + s = new BasicState(); + break; + case StateType.RULE_START: + s = new RuleStartState(); + break; + case StateType.BLOCK_START: + s = new BasicBlockStartState(); + break; + case StateType.PLUS_BLOCK_START: + s = new PlusBlockStartState(); + break; + case StateType.STAR_BLOCK_START: + s = new StarBlockStartState(); + break; + case StateType.TOKEN_START: + s = new TokensStartState(); + break; + case StateType.RULE_STOP: + s = new RuleStopState(); + break; + case StateType.BLOCK_END: + s = new BlockEndState(); + break; + case StateType.STAR_LOOP_BACK: + s = new StarLoopbackState(); + break; + case StateType.STAR_LOOP_ENTRY: + s = new StarLoopEntryState(); + break; + case StateType.PLUS_LOOP_BACK: + s = new PlusLoopbackState(); + break; + case StateType.LOOP_END: + s = new LoopEndState(); + break; + default: + throw ArgumentError.value(type, "state type", "not valid."); + } + + s.ruleIndex = ruleIndex; + return s; + } + + LexerAction lexerActionFactory(LexerActionType type, int data1, int data2) { + switch (type) { + case LexerActionType.CHANNEL: + return new LexerChannelAction(data1); + + case LexerActionType.CUSTOM: + return new LexerCustomAction(data1, data2); + + case LexerActionType.MODE: + return new LexerModeAction(data1); + + case LexerActionType.MORE: + return LexerMoreAction.INSTANCE; + + case LexerActionType.POP_MODE: + return LexerPopModeAction.INSTANCE; + + case LexerActionType.PUSH_MODE: + return new LexerPushModeAction(data1); + + case LexerActionType.SKIP: + return LexerSkipAction.INSTANCE; + + case LexerActionType.TYPE: + return new LexerTypeAction(data1); + default: + throw ArgumentError.value(type, "lexer action type", "not valid."); + } + } +} diff --git a/runtime/Dart/lib/src/atn/src/atn_simulator.dart b/runtime/Dart/lib/src/atn/src/atn_simulator.dart new file mode 100644 index 000000000..f24abf121 --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/atn_simulator.dart @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../dfa/dfa.dart'; +import '../../prediction_context.dart'; +import 'atn.dart'; +import 'atn_config_set.dart'; + +abstract class ATNSimulator { + /** Must distinguish between missing edge and edge we know leads nowhere */ + + static final DFAState ERROR = + DFAState(stateNumber: 0x7FFFFFFF, configs: new ATNConfigSet()); + + final ATN atn; + + /** The context cache maps all PredictionContext objects that are equals() + * to a single cached copy. This cache is shared across all contexts + * in all ATNConfigs in all DFA states. We rebuild each ATNConfigSet + * to use only cached nodes/graphs in addDFAState(). We don't want to + * fill this during closure() since there are lots of contexts that + * pop up but are not used ever again. It also greatly slows down closure(). + * + *

This cache makes a huge difference in memory and a little bit in speed. + * For the Java grammar on java.*, it dropped the memory requirements + * at the end from 25M to 16M. We don't store any of the full context + * graphs in the DFA because they are limited to local context only, + * but apparently there's a lot of repetition there as well. We optimize + * the config contexts before storing the config set in the DFA states + * by literally rebuilding them with cached subgraphs only.

+ * + *

I tried a cache for use during closure operations, that was + * whacked after each adaptivePredict(). It cost a little bit + * more time I think and doesn't save on the overall footprint + * so it's not worth the complexity.

+ */ + final PredictionContextCache sharedContextCache; + + ATNSimulator(this.atn, this.sharedContextCache); + + void reset(); + + /** + * Clear the DFA cache used by the current instance. Since the DFA cache may + * be shared by multiple ATN simulators, this method may affect the + * performance (but not accuracy) of other parsers which are being used + * concurrently. + * + * @throws UnsupportedOperationException if the current instance does not + * support clearing the DFA. + * + * @since 4.3 + */ + void clearDFA() { + throw new UnsupportedError( + "This ATN simulator does not support clearing the DFA."); + } + + PredictionContext getCachedContext(PredictionContext context) { + if (sharedContextCache == null) return context; + + Map visited = + new Map(); + return PredictionContext.getCachedContext( + context, sharedContextCache, visited); + } +} + +/** Used to cache [PredictionContext] objects. Its used for the shared + * context cash associated with contexts in DFA states. This cache + * can be used for both lexers and parsers. + */ +class PredictionContextCache { + final cache = new Map(); + + /** Add a context to the cache and return it. If the context already exists, + * return that one instead and do not add a new context to the cache. + * Protect shared cache from unsafe thread access. + */ + PredictionContext add(PredictionContext ctx) { + if (ctx == PredictionContext.EMPTY) return PredictionContext.EMPTY; + PredictionContext existing = cache[ctx]; + if (existing != null) { +// System.out.println(name+" reuses "+existing); + return existing; + } + cache[ctx] = ctx; + return ctx; + } + + PredictionContext operator [](PredictionContext ctx) { + return cache[ctx]; + } + + int get length { + return cache.length; + } +} diff --git a/runtime/Dart/lib/src/atn/src/atn_state.dart b/runtime/Dart/lib/src/atn/src/atn_state.dart new file mode 100644 index 000000000..98f10f4ed --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/atn_state.dart @@ -0,0 +1,298 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:developer'; + +import 'package:logging/logging.dart'; + +import '../../interval_set.dart'; +import 'atn.dart'; +import 'transition.dart'; + +var INITIAL_NUM_TRANSITIONS = 4; + +enum StateType { + INVALID_TYPE, + BASIC, + RULE_START, + BLOCK_START, + PLUS_BLOCK_START, + STAR_BLOCK_START, + TOKEN_START, + RULE_STOP, + BLOCK_END, + STAR_LOOP_BACK, + STAR_LOOP_ENTRY, + PLUS_LOOP_BACK, + LOOP_END, +} + +/** + * The following images show the relation of states and + * {@link ATNState#transitions} for various grammar constructs. + * + *
    + * + *
  • Solid edges marked with an ε indicate a required + * [EpsilonTransition].
  • + * + *
  • Dashed edges indicate locations where any transition derived from + * [Transition] might appear.
  • + * + *
  • Dashed nodes are place holders for either a sequence of linked + * [BasicState] states or the inclusion of a block representing a nested + * construct in one of the forms below.
  • + * + *
  • Nodes showing multiple outgoing alternatives with a {@code ...} support + * any number of alternatives (one or more). Nodes without the {@code ...} only + * support the exact number of alternatives shown in the diagram.
  • + * + *
+ * + *

Basic Blocks

+ * + *

Rule

+ * + * + * + *

Block of 1 or more alternatives

+ * + * + * + *

Greedy Loops

+ * + *

Greedy Closure: {@code (...)*}

+ * + * + * + *

Greedy Positive Closure: {@code (...)+}

+ * + * + * + *

Greedy Optional: {@code (...)?}

+ * + * + * + *

Non-Greedy Loops

+ * + *

Non-Greedy Closure: {@code (...)*?}

+ * + * + * + *

Non-Greedy Positive Closure: {@code (...)+?}

+ * + * + * + *

Non-Greedy Optional: {@code (...)??}

+ * + * + */ +abstract class ATNState { + static final int INITIAL_NUM_TRANSITIONS = 4; + + static final int INVALID_STATE_NUMBER = -1; + + /** Which ATN are we in? */ + ATN atn = null; + + int stateNumber = INVALID_STATE_NUMBER; + + int ruleIndex; // at runtime, we don't have Rule objects + + bool epsilonOnlyTransitions = false; + + /** Track the transitions emanating from this ATN state. */ + List transitions = []; + + /** Used to cache lookahead during parsing, not used during construction */ + IntervalSet nextTokenWithinRule; + + int get hashCode { + return stateNumber; + } + + bool operator ==(Object o) { + // are these states same object? + if (o is ATNState) return stateNumber == o.stateNumber; + return false; + } + + bool isNonGreedyExitState() { + return false; + } + + String toString() { + return stateNumber.toString(); + } + + int get numberOfTransitions { + return transitions.length; + } + + void addTransition(Transition e) { + addTransitionAt(transitions.length, e); + } + + void addTransitionAt(int index, Transition e) { + if (transitions.isEmpty) { + epsilonOnlyTransitions = e.isEpsilon; + } else if (epsilonOnlyTransitions != e.isEpsilon) { + log("ATN state $stateNumber has both epsilon and non-epsilon transitions.\n", + level: Level.SEVERE.value); + epsilonOnlyTransitions = false; + } + + bool alreadyPresent = false; + for (Transition t in transitions) { + if (t.target.stateNumber == e.target.stateNumber) { + if (t.label != null && e.label != null && t.label == e.label) { +// System.err.println("Repeated transition upon "+e.label()+" from "+stateNumber+"->"+t.target.stateNumber); + alreadyPresent = true; + break; + } else if (t.isEpsilon && e.isEpsilon) { +// System.err.println("Repeated epsilon transition from "+stateNumber+"->"+t.target.stateNumber); + alreadyPresent = true; + break; + } + } + } + if (!alreadyPresent) { + transitions.insert(index, e); + } + } + + Transition transition(int i) { + return transitions[i]; + } + + void setTransition(int i, Transition e) { + transitions[i] = e; + } + + Transition removeTransition(int index) { + return transitions.removeAt(index); + } + + StateType get stateType; + + bool onlyHasEpsilonTransitions() => epsilonOnlyTransitions; + + void setRuleIndex(int ruleIndex) { + this.ruleIndex = ruleIndex; + } +} + +class BasicState extends ATNState { + @override + StateType get stateType => StateType.BASIC; +} + +class RuleStartState extends ATNState { + var stopState = null; + var isLeftRecursiveRule = false; + + @override + StateType get stateType => StateType.RULE_START; +} + +abstract class DecisionState extends ATNState { + int decision = 0; + bool nonGreedy = false; +} + +// The start of a regular {@code (...)} block. +abstract class BlockStartState extends DecisionState { + BlockEndState endState; +} + +class BasicBlockStartState extends BlockStartState { + @override + StateType get stateType => StateType.BLOCK_START; +} + +/** Start of {@code (A|B|...)+} loop. Technically a decision state, but + * we don't use for code generation; somebody might need it, so I'm defining + * it for completeness. In reality, the [PlusLoopbackState] node is the + * real decision-making note for {@code A+}. + */ +class PlusBlockStartState extends BlockStartState { + PlusLoopbackState loopBackState; + + @override + StateType get stateType => StateType.PLUS_BLOCK_START; +} + +/// The block that begins a closure loop. +class StarBlockStartState extends BlockStartState { + @override + StateType get stateType => StateType.STAR_BLOCK_START; +} + +/// The Tokens rule start state linking to each lexer rule start state */ +class TokensStartState extends DecisionState { + @override + StateType get stateType => StateType.TOKEN_START; +} + +/// The last node in the ATN for a rule, unless that rule is the start symbol. +/// In that case, there is one transition to EOF. Later, we might encode +/// references to all calls to this rule to compute FOLLOW sets for +/// error handling. +class RuleStopState extends ATNState { + @override + StateType get stateType => StateType.RULE_STOP; +} + +/// Terminal node of a simple {@code (a|b|c)} block. +class BlockEndState extends ATNState { + BlockStartState startState = null; + + @override + StateType get stateType => StateType.BLOCK_END; +} + +class StarLoopbackState extends ATNState { + StarLoopEntryState get loopEntryState { + return transition(0).target; + } + + @override + StateType get stateType => StateType.STAR_LOOP_BACK; +} + +class StarLoopEntryState extends DecisionState { + StarLoopbackState loopBackState; + + /** + * Indicates whether this state can benefit from a precedence DFA during SLL + * decision making. + * + *

This is a computed property that is calculated during ATN deserialization + * and stored for use in [ParserATNSimulator] and + * [ParserInterpreter].

+ * + * @see DFA#isPrecedenceDfa() + */ + bool isPrecedenceDecision = false; + + @override + StateType get stateType => StateType.STAR_LOOP_ENTRY; +} + +/// Decision state for {@code A+} and {@code (A|B)+}. It has two transitions: +/// one to the loop back to start of the block and one to exit. +class PlusLoopbackState extends DecisionState { + @override + StateType get stateType => StateType.PLUS_LOOP_BACK; +} + +/// Mark the end of a * or + loop. +class LoopEndState extends ATNState { + ATNState loopBackState; + + @override + StateType get stateType => StateType.LOOP_END; +} diff --git a/runtime/Dart/lib/src/atn/src/atn_type.dart b/runtime/Dart/lib/src/atn/src/atn_type.dart new file mode 100644 index 000000000..1b7eb8f2e --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/atn_type.dart @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +/// Represents the type of recognizer an ATN applies to. +enum ATNType { + /** + * A lexer grammar. + */ + LEXER, + + /** + * A parser grammar. + */ + PARSER +} diff --git a/runtime/Dart/lib/src/atn/src/info.dart b/runtime/Dart/lib/src/atn/src/info.dart new file mode 100644 index 000000000..50fba1e8e --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/info.dart @@ -0,0 +1,666 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../dfa/dfa.dart'; +import '../../token_stream.dart'; +import '../../util/bit_set.dart'; +import 'atn_config_set.dart'; +import 'profiling_atn_simulator.dart'; +import 'semantic_context.dart'; + +/** + * This class represents profiling event information for a context sensitivity. + * Context sensitivities are decisions where a particular input resulted in an + * SLL conflict, but LL prediction produced a single unique alternative. + * + *

+ * In some cases, the unique alternative identified by LL prediction is not + * equal to the minimum represented alternative in the conflicting SLL + * configuration set. Grammars and inputs which result in this scenario are + * unable to use {@link PredictionMode#SLL}, which in turn means they cannot use + * the two-stage parsing strategy to improve parsing performance for that + * input.

+ * + * @see ParserATNSimulator#reportContextSensitivity + * @see ANTLRErrorListener#reportContextSensitivity + * + * @since 4.3 + */ +class ContextSensitivityInfo extends DecisionEventInfo { + /** + * Constructs a new instance of the [ContextSensitivityInfo] class + * with the specified detailed context sensitivity information. + * + * @param decision The decision number + * @param configs The final configuration set containing the unique + * alternative identified by full-context prediction + * @param input The input token stream + * @param startIndex The start index for the current prediction + * @param stopIndex The index at which the context sensitivity was + * identified during full-context prediction + */ + ContextSensitivityInfo(int decision, ATNConfigSet configs, TokenStream input, + int startIndex, int stopIndex) + : super(decision, configs, input, startIndex, stopIndex, true); +} + +/** + * This is the base class for gathering detailed information about prediction + * events which occur during parsing. + * + * Note that we could record the parser call stack at the time this event + * occurred but in the presence of left recursive rules, the stack is kind of + * meaningless. It's better to look at the individual configurations for their + * individual stacks. Of course that is a [PredictionContext] object + * not a parse tree node and so it does not have information about the extent + * (start...stop) of the various subtrees. Examining the stack tops of all + * configurations provide the return states for the rule invocations. + * From there you can get the enclosing rule. + * + * @since 4.3 + */ +class DecisionEventInfo { + /** + * The invoked decision number which this event is related to. + * + * @see ATN#decisionToState + */ + final int decision; + + /** + * The configuration set containing additional information relevant to the + * prediction state when the current event occurred, or null if no + * additional information is relevant or available. + */ + final ATNConfigSet configs; + + /** + * The input token stream which is being parsed. + */ + final TokenStream input; + + /** + * The token index in the input stream at which the current prediction was + * originally invoked. + */ + final int startIndex; + + /** + * The token index in the input stream at which the current event occurred. + */ + final int stopIndex; + + /** + * [true] if the current event occurred during LL prediction; + * otherwise, [false] if the input occurred during SLL prediction. + */ + final bool fullCtx; + + DecisionEventInfo(this.decision, this.configs, this.input, this.startIndex, + this.stopIndex, this.fullCtx); +} + +/** + * This class contains profiling gathered for a particular decision. + * + *

+ * Parsing performance in ANTLR 4 is heavily influenced by both static factors + * (e.g. the form of the rules in the grammar) and dynamic factors (e.g. the + * choice of input and the state of the DFA cache at the time profiling + * operations are started). For best results, gather and use aggregate + * statistics from a large sample of inputs representing the inputs expected in + * production before using the results to make changes in the grammar.

+ * + * @since 4.3 + */ +class DecisionInfo { + /** + * The decision number, which is an index into {@link ATN#decisionToState}. + */ + final int decision; + + /** + * The total number of times {@link ParserATNSimulator#adaptivePredict} was + * invoked for this decision. + */ + int invocations; + + /** + * The total time spent in {@link ParserATNSimulator#adaptivePredict} for + * this decision, in nanoseconds. + * + *

+ * The value of this field contains the sum of differential results obtained + * by {@link System#nanoTime()}, and is not adjusted to compensate for JIT + * and/or garbage collection overhead. For best accuracy, use a modern JVM + * implementation that provides precise results from + * {@link System#nanoTime()}, and perform profiling in a separate process + * which is warmed up by parsing the input prior to profiling. If desired, + * call {@link ATNSimulator#clearDFA} to reset the DFA cache to its initial + * state before starting the profiling measurement pass.

+ */ + int timeInPrediction; + + /** + * The sum of the lookahead required for SLL prediction for this decision. + * Note that SLL prediction is used before LL prediction for performance + * reasons even when {@link PredictionMode#LL} or + * {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} is used. + */ + int SLL_TotalLook; + + /** + * Gets the minimum lookahead required for any single SLL prediction to + * complete for this decision, by reaching a unique prediction, reaching an + * SLL conflict state, or encountering a syntax error. + */ + int SLL_MinLook; + + /** + * Gets the maximum lookahead required for any single SLL prediction to + * complete for this decision, by reaching a unique prediction, reaching an + * SLL conflict state, or encountering a syntax error. + */ + int SLL_MaxLook; + + /** + * Gets the [LookaheadEventInfo] associated with the event where the + * {@link #SLL_MaxLook} value was set. + */ + LookaheadEventInfo SLL_MaxLookEvent; + + /** + * The sum of the lookahead required for LL prediction for this decision. + * Note that LL prediction is only used when SLL prediction reaches a + * conflict state. + */ + int LL_TotalLook; + + /** + * Gets the minimum lookahead required for any single LL prediction to + * complete for this decision. An LL prediction completes when the algorithm + * reaches a unique prediction, a conflict state (for + * {@link PredictionMode#LL}, an ambiguity state (for + * {@link PredictionMode#LL_EXACT_AMBIG_DETECTION}, or a syntax error. + */ + int LL_MinLook; + + /** + * Gets the maximum lookahead required for any single LL prediction to + * complete for this decision. An LL prediction completes when the algorithm + * reaches a unique prediction, a conflict state (for + * {@link PredictionMode#LL}, an ambiguity state (for + * {@link PredictionMode#LL_EXACT_AMBIG_DETECTION}, or a syntax error. + */ + int LL_MaxLook; + + /** + * Gets the [LookaheadEventInfo] associated with the event where the + * {@link #LL_MaxLook} value was set. + */ + LookaheadEventInfo LL_MaxLookEvent; + + /** + * A collection of [ContextSensitivityInfo] instances describing the + * context sensitivities encountered during LL prediction for this decision. + * + * @see ContextSensitivityInfo + */ + final List contextSensitivities = []; + + /** + * A collection of [ErrorInfo] instances describing the parse errors + * identified during calls to {@link ParserATNSimulator#adaptivePredict} for + * this decision. + * + * @see ErrorInfo + */ + final List errors = []; + + /** + * A collection of [AmbiguityInfo] instances describing the + * ambiguities encountered during LL prediction for this decision. + * + * @see AmbiguityInfo + */ + final List ambiguities = []; + + /** + * A collection of [PredicateEvalInfo] instances describing the + * results of evaluating individual predicates during prediction for this + * decision. + * + * @see PredicateEvalInfo + */ + final List predicateEvals = []; + + /** + * The total number of ATN transitions required during SLL prediction for + * this decision. An ATN transition is determined by the number of times the + * DFA does not contain an edge that is required for prediction, resulting + * in on-the-fly computation of that edge. + * + *

+ * If DFA caching of SLL transitions is employed by the implementation, ATN + * computation may cache the computed edge for efficient lookup during + * future parsing of this decision. Otherwise, the SLL parsing algorithm + * will use ATN transitions exclusively.

+ * + * @see #SLL_ATNTransitions + * @see ParserATNSimulator#computeTargetState + * @see LexerATNSimulator#computeTargetState + */ + int SLL_ATNTransitions; + + /** + * The total number of DFA transitions required during SLL prediction for + * this decision. + * + *

If the ATN simulator implementation does not use DFA caching for SLL + * transitions, this value will be 0.

+ * + * @see ParserATNSimulator#getExistingTargetState + * @see LexerATNSimulator#getExistingTargetState + */ + int SLL_DFATransitions; + + /** + * Gets the total number of times SLL prediction completed in a conflict + * state, resulting in fallback to LL prediction. + * + *

Note that this value is not related to whether or not + * {@link PredictionMode#SLL} may be used successfully with a particular + * grammar. If the ambiguity resolution algorithm applied to the SLL + * conflicts for this decision produce the same result as LL prediction for + * this decision, {@link PredictionMode#SLL} would produce the same overall + * parsing result as {@link PredictionMode#LL}.

+ */ + int LL_Fallback; + + /** + * The total number of ATN transitions required during LL prediction for + * this decision. An ATN transition is determined by the number of times the + * DFA does not contain an edge that is required for prediction, resulting + * in on-the-fly computation of that edge. + * + *

+ * If DFA caching of LL transitions is employed by the implementation, ATN + * computation may cache the computed edge for efficient lookup during + * future parsing of this decision. Otherwise, the LL parsing algorithm will + * use ATN transitions exclusively.

+ * + * @see #LL_DFATransitions + * @see ParserATNSimulator#computeTargetState + * @see LexerATNSimulator#computeTargetState + */ + int LL_ATNTransitions; + + /** + * The total number of DFA transitions required during LL prediction for + * this decision. + * + *

If the ATN simulator implementation does not use DFA caching for LL + * transitions, this value will be 0.

+ * + * @see ParserATNSimulator#getExistingTargetState + * @see LexerATNSimulator#getExistingTargetState + */ + int LL_DFATransitions; + + /** + * Constructs a new instance of the [DecisionInfo] class to contain + * statistics for a particular decision. + * + * @param decision The decision number + */ + DecisionInfo(this.decision); + + String toString() { + return "{" + + "decision=$decision" + + ", contextSensitivities=${contextSensitivities.length}" + + ", errors=${errors.length}" + + ", ambiguities=${ambiguities.length}" + + ", SLL_lookahead=$SLL_TotalLook" + + ", SLL_ATNTransitions=$SLL_ATNTransitions" + + ", SLL_DFATransitions=$SLL_DFATransitions" + + ", LL_Fallback=$LL_Fallback" + + ", LL_lookahead=$LL_TotalLook" + + ", LL_ATNTransitions=$LL_ATNTransitions" + + '}'; + } +} + +/** + * This class represents profiling event information for an ambiguity. + * Ambiguities are decisions where a particular input resulted in an SLL + * conflict, followed by LL prediction also reaching a conflict state + * (indicating a true ambiguity in the grammar). + * + *

+ * This event may be reported during SLL prediction in cases where the + * conflicting SLL configuration set provides sufficient information to + * determine that the SLL conflict is truly an ambiguity. For example, if none + * of the ATN configurations in the conflicting SLL configuration set have + * traversed a global follow transition (i.e. + * {@link ATNConfig#reachesIntoOuterContext} is 0 for all configurations), then + * the result of SLL prediction for that input is known to be equivalent to the + * result of LL prediction for that input.

+ * + *

+ * In some cases, the minimum represented alternative in the conflicting LL + * configuration set is not equal to the minimum represented alternative in the + * conflicting SLL configuration set. Grammars and inputs which result in this + * scenario are unable to use {@link PredictionMode#SLL}, which in turn means + * they cannot use the two-stage parsing strategy to improve parsing performance + * for that input.

+ * + * @see ParserATNSimulator#reportAmbiguity + * @see ANTLRErrorListener#reportAmbiguity + * + * @since 4.3 + */ +class AmbiguityInfo extends DecisionEventInfo { + /** The set of alternative numbers for this decision event that lead to a valid parse. */ + BitSet ambigAlts; + + /** + * Constructs a new instance of the [AmbiguityInfo] class with the + * specified detailed ambiguity information. + * + * @param decision The decision number + * @param configs The final configuration set identifying the ambiguous + * alternatives for the current input + * @param ambigAlts The set of alternatives in the decision that lead to a valid parse. + * The predicted alt is the min(ambigAlts) + * @param input The input token stream + * @param startIndex The start index for the current prediction + * @param stopIndex The index at which the ambiguity was identified during + * prediction + * @param fullCtx [true] if the ambiguity was identified during LL + * prediction; otherwise, [false] if the ambiguity was identified + * during SLL prediction + */ + AmbiguityInfo(int decision, ATNConfigSet configs, BitSet this.ambigAlts, + TokenStream input, int startIndex, int stopIndex, bool fullCtx) + : super(decision, configs, input, startIndex, stopIndex, fullCtx); +} + +/** + * This class represents profiling event information for a syntax error + * identified during prediction. Syntax errors occur when the prediction + * algorithm is unable to identify an alternative which would lead to a + * successful parse. + * + * @see Parser#notifyErrorListeners(Token, String, RecognitionException) + * @see ANTLRErrorListener#syntaxError + * + * @since 4.3 + */ +class ErrorInfo extends DecisionEventInfo { + /** + * Constructs a new instance of the [ErrorInfo] class with the + * specified detailed syntax error information. + * + * @param decision The decision number + * @param configs The final configuration set reached during prediction + * prior to reaching the {@link ATNSimulator#ERROR} state + * @param input The input token stream + * @param startIndex The start index for the current prediction + * @param stopIndex The index at which the syntax error was identified + * @param fullCtx [true] if the syntax error was identified during LL + * prediction; otherwise, [false] if the syntax error was identified + * during SLL prediction + */ + ErrorInfo(int decision, ATNConfigSet configs, TokenStream input, + int startIndex, int stopIndex, bool fullCtx) + : super(decision, configs, input, startIndex, stopIndex, fullCtx); +} + +/** + * This class represents profiling event information for tracking the lookahead + * depth required in order to make a prediction. + * + * @since 4.3 + */ +class LookaheadEventInfo extends DecisionEventInfo { + /** The alternative chosen by adaptivePredict(), not necessarily + * the outermost alt shown for a rule; left-recursive rules have + * user-level alts that differ from the rewritten rule with a (...) block + * and a (..)* loop. + */ + int predictedAlt; + + /** + * Constructs a new instance of the [LookaheadEventInfo] class with + * the specified detailed lookahead information. + * + * @param decision The decision number + * @param configs The final configuration set containing the necessary + * information to determine the result of a prediction, or null if + * the final configuration set is not available + * @param input The input token stream + * @param startIndex The start index for the current prediction + * @param stopIndex The index at which the prediction was finally made + * @param fullCtx [true] if the current lookahead is part of an LL + * prediction; otherwise, [false] if the current lookahead is part of + * an SLL prediction + */ + LookaheadEventInfo(int decision, ATNConfigSet configs, int this.predictedAlt, + TokenStream input, int startIndex, int stopIndex, bool fullCtx) + : super(decision, configs, input, startIndex, stopIndex, fullCtx); +} + +/** + * This class represents profiling event information for semantic predicate + * evaluations which occur during prediction. + * + * @see ParserATNSimulator#evalSemanticContext + * + * @since 4.3 + */ +class PredicateEvalInfo extends DecisionEventInfo { + /** + * The semantic context which was evaluated. + */ + final SemanticContext semctx; + + /** + * The alternative number for the decision which is guarded by the semantic + * context {@link #semctx}. Note that other ATN + * configurations may predict the same alternative which are guarded by + * other semantic contexts and/or {@link SemanticContext#NONE}. + */ + final int predictedAlt; + + /** + * The result of evaluating the semantic context {@link #semctx}. + */ + final bool evalResult; + + /** + * Constructs a new instance of the [PredicateEvalInfo] class with the + * specified detailed predicate evaluation information. + * + * @param decision The decision number + * @param input The input token stream + * @param startIndex The start index for the current prediction + * @param stopIndex The index at which the predicate evaluation was + * triggered. Note that the input stream may be reset to other positions for + * the actual evaluation of individual predicates. + * @param semctx The semantic context which was evaluated + * @param evalResult The results of evaluating the semantic context + * @param predictedAlt The alternative number for the decision which is + * guarded by the semantic context [semctx]. See {@link #predictedAlt} + * for more information. + * @param fullCtx [true] if the semantic context was + * evaluated during LL prediction; otherwise, [false] if the semantic + * context was evaluated during SLL prediction + * + * @see ParserATNSimulator#evalSemanticContext(SemanticContext, ParserRuleContext, int, boolean) + * @see SemanticContext#eval(Recognizer, RuleContext) + */ + PredicateEvalInfo( + int decision, + TokenStream input, + int startIndex, + int stopIndex, + this.semctx, + this.evalResult, + this.predictedAlt, + bool fullCtx) + : super(decision, new ATNConfigSet(), input, startIndex, stopIndex, + fullCtx); +} + +/** + * This class provides access to specific and aggregate statistics gathered + * during profiling of a parser. + * + * @since 4.3 + */ +class ParseInfo { + final ProfilingATNSimulator atnSimulator; + + ParseInfo(this.atnSimulator); + + /** + * Gets an array of [DecisionInfo] instances containing the profiling + * information gathered for each decision in the ATN. + * + * @return An array of [DecisionInfo] instances, indexed by decision + * number. + */ + List get decisionInfo { + return atnSimulator.decisionInfo; + } + + /** + * Gets the decision numbers for decisions that required one or more + * full-context predictions during parsing. These are decisions for which + * {@link DecisionInfo#LL_Fallback} is non-zero. + * + * @return A list of decision numbers which required one or more + * full-context predictions during parsing. + */ + List get llDecisions { + List decisions = atnSimulator.decisionInfo; + List LL = []; + for (int i = 0; i < decisions.length; i++) { + int fallBack = decisions[i].LL_Fallback; + if (fallBack > 0) LL.add(i); + } + return LL; + } + + /** + * Gets the total time spent during prediction across all decisions made + * during parsing. This value is the sum of + * {@link DecisionInfo#timeInPrediction} for all decisions. + */ + int get totalTimeInPrediction { + List decisions = atnSimulator.decisionInfo; + int t = 0; + for (int i = 0; i < decisions.length; i++) { + t += decisions[i].timeInPrediction; + } + return t; + } + + /** + * Gets the total number of SLL lookahead operations across all decisions + * made during parsing. This value is the sum of + * {@link DecisionInfo#SLL_TotalLook} for all decisions. + */ + int get totalSLLLookaheadOps { + List decisions = atnSimulator.decisionInfo; + int k = 0; + for (int i = 0; i < decisions.length; i++) { + k += decisions[i].SLL_TotalLook; + } + return k; + } + + /** + * Gets the total number of LL lookahead operations across all decisions + * made during parsing. This value is the sum of + * {@link DecisionInfo#LL_TotalLook} for all decisions. + */ + int get totalLLLookaheadOps { + List decisions = atnSimulator.decisionInfo; + int k = 0; + for (int i = 0; i < decisions.length; i++) { + k += decisions[i].LL_TotalLook; + } + return k; + } + + /** + * Gets the total number of ATN lookahead operations for SLL prediction + * across all decisions made during parsing. + */ + int get totalSLLATNLookaheadOps { + List decisions = atnSimulator.decisionInfo; + int k = 0; + for (int i = 0; i < decisions.length; i++) { + k += decisions[i].SLL_ATNTransitions; + } + return k; + } + + /** + * Gets the total number of ATN lookahead operations for LL prediction + * across all decisions made during parsing. + */ + int get totalLLATNLookaheadOps { + List decisions = atnSimulator.decisionInfo; + int k = 0; + for (int i = 0; i < decisions.length; i++) { + k += decisions[i].LL_ATNTransitions; + } + return k; + } + + /** + * Gets the total number of ATN lookahead operations for SLL and LL + * prediction across all decisions made during parsing. + * + *

+ * This value is the sum of {@link #getTotalSLLATNLookaheadOps} and + * {@link #getTotalLLATNLookaheadOps}.

+ */ + int get totalATNLookaheadOps { + List decisions = atnSimulator.decisionInfo; + int k = 0; + for (int i = 0; i < decisions.length; i++) { + k += decisions[i].SLL_ATNTransitions; + k += decisions[i].LL_ATNTransitions; + } + return k; + } + + /** + * Gets the total number of DFA states stored in the DFA cache for all + * decisions in the ATN. + */ + int get dfaSize { + int n = 0; + List decisionToDFA = atnSimulator.decisionToDFA; + for (int i = 0; i < decisionToDFA.length; i++) { + n += getDFASizeAt(i); + } + return n; + } + + /** + * Gets the total number of DFA states stored in the DFA cache for a + * particular decision. + */ + int getDFASizeAt(int decision) { + DFA decisionToDFA = atnSimulator.decisionToDFA[decision]; + return decisionToDFA.states.length; + } +} diff --git a/runtime/Dart/lib/src/atn/src/lexer_action.dart b/runtime/Dart/lib/src/atn/src/lexer_action.dart new file mode 100644 index 000000000..2c89e623b --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/lexer_action.dart @@ -0,0 +1,678 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../lexer.dart'; +import '../../util/murmur_hash.dart'; + +/** + * Represents the serialization type of a [LexerAction]. + * + * @since 4.2 + */ +enum LexerActionType { + /** + * The type of a [LexerChannelAction] action. + */ + CHANNEL, + /** + * The type of a [LexerCustomAction] action. + */ + CUSTOM, + /** + * The type of a [LexerModeAction] action. + */ + MODE, + /** + * The type of a [LexerMoreAction] action. + */ + MORE, + /** + * The type of a [LexerPopModeAction] action. + */ + POP_MODE, + /** + * The type of a [LexerPushModeAction] action. + */ + PUSH_MODE, + /** + * The type of a [LexerSkipAction] action. + */ + SKIP, + /** + * The type of a [LexerTypeAction] action. + */ + TYPE, +} + +/** + * Represents a single action which can be executed following the successful + * match of a lexer rule. Lexer actions are used for both embedded action syntax + * and ANTLR 4's new lexer command syntax. + * + * @since 4.2 + */ +abstract class LexerAction { + /** + * Gets the serialization type of the lexer action. + * + * @return The serialization type of the lexer action. + */ + LexerActionType get actionType; + + /** + * Gets whether the lexer action is position-dependent. Position-dependent + * actions may have different semantics depending on the [CharStream] + * index at the time the action is executed. + * + *

Many lexer commands, including [type], [skip], and + * [more], do not check the input index during their execution. + * Actions like this are position-independent, and may be stored more + * efficiently as part of the {@link LexerATNConfig#lexerActionExecutor}.

+ * + * @return [true] if the lexer action semantics can be affected by the + * position of the input [CharStream] at the time it is executed; + * otherwise, [false]. + */ + bool get isPositionDependent; + + /** + * Execute the lexer action in the context of the specified [Lexer]. + * + *

For position-dependent actions, the input stream must already be + * positioned correctly prior to calling this method.

+ * + * @param lexer The lexer instance. + */ + void execute(Lexer lexer); +} + +/** + * Implements the [channel] lexer action by calling + * {@link Lexer#setChannel} with the assigned channel. + * + * @since 4.2 + */ +class LexerChannelAction implements LexerAction { + /** + * Gets the channel to use for the [Token] created by the lexer. + * + * @return The channel to use for the [Token] created by the lexer. + */ + final int channel; + + /** + * Constructs a new [channel] action with the specified channel value. + * @param channel The channel value to pass to {@link Lexer#setChannel}. + */ + LexerChannelAction(this.channel); + + @override + LexerActionType get actionType => LexerActionType.CHANNEL; + + bool get isPositionDependent => false; + + /** + * {@inheritDoc} + * + *

This action is implemented by calling {@link Lexer#setChannel} with the + * value provided by {@link #getChannel}.

+ */ + void execute(Lexer lexer) { + lexer.channel = channel; + } + + get hashCode { + int hash = MurmurHash.initialize(); + hash = MurmurHash.update(hash, actionType.index); + hash = MurmurHash.update(hash, channel); + return MurmurHash.finish(hash, 2); + } + + bool operator ==(Object obj) { + if (identical(obj, this)) { + return true; + } else if (obj is LexerChannelAction) { + return channel == obj.channel; + } + + return false; + } + + String toString() { + return "channel($channel)"; + } +} + +/** + * Executes a custom lexer action by calling {@link Recognizer#action} with the + * rule and action indexes assigned to the custom action. The implementation of + * a custom action is added to the generated code for the lexer in an override + * of {@link Recognizer#action} when the grammar is compiled. + * + *

This class may represent embedded actions created with the {...} + * syntax in ANTLR 4, as well as actions created for lexer commands where the + * command argument could not be evaluated when the grammar was compiled.

+ * + * @since 4.2 + */ +class LexerCustomAction implements LexerAction { + /** + * Gets the rule index to use for calls to {@link Recognizer#action}. + * + * @return The rule index for the custom action. + */ + final int ruleIndex; + + /** + * Gets the action index to use for calls to {@link Recognizer#action}. + * + * @return The action index for the custom action. + */ + final int actionIndex; + + /** + * Constructs a custom lexer action with the specified rule and action + * indexes. + * + * @param ruleIndex The rule index to use for calls to + * {@link Recognizer#action}. + * @param actionIndex The action index to use for calls to + * {@link Recognizer#action}. + */ + LexerCustomAction(this.ruleIndex, this.actionIndex); + + /** + * {@inheritDoc} + * + * @return This method returns {@link LexerActionType#CUSTOM}. + */ + + get actionType => LexerActionType.CUSTOM; + + /** + * Gets whether the lexer action is position-dependent. Position-dependent + * actions may have different semantics depending on the [CharStream] + * index at the time the action is executed. + * + *

Custom actions are position-dependent since they may represent a + * user-defined embedded action which makes calls to methods like + * {@link Lexer#getText}.

+ * + * @return This method returns [true]. + */ + + bool get isPositionDependent => true; + + /** + * {@inheritDoc} + * + *

Custom actions are implemented by calling {@link Lexer#action} with the + * appropriate rule and action indexes.

+ */ + + void execute(Lexer lexer) { + lexer.action(null, ruleIndex, actionIndex); + } + + get hashCode { + int hash = MurmurHash.initialize(); + hash = MurmurHash.update(hash, actionType.index); + hash = MurmurHash.update(hash, ruleIndex); + hash = MurmurHash.update(hash, actionIndex); + return MurmurHash.finish(hash, 3); + } + + bool operator ==(Object obj) { + if (identical(obj, this)) { + return true; + } else if (obj is LexerCustomAction) { + return ruleIndex == obj.ruleIndex && actionIndex == obj.actionIndex; + } + return false; + } +} + +/** + * Implements the [mode] lexer action by calling {@link Lexer#mode} with + * the assigned mode. + * + * @since 4.2 + */ +class LexerModeAction implements LexerAction { + /** + * Get the lexer mode this action should transition the lexer to. + * + * @return The lexer mode for this [mode] command. + */ + final int mode; + + /** + * Constructs a new [mode] action with the specified mode value. + * @param mode The mode value to pass to {@link Lexer#mode}. + */ + LexerModeAction(this.mode); + + /** + * {@inheritDoc} + * @return This method returns {@link LexerActionType#MODE}. + */ + + get actionType => LexerActionType.MODE; + + /** + * {@inheritDoc} + * @return This method returns [false]. + */ + + bool get isPositionDependent => false; + + /** + * {@inheritDoc} + * + *

This action is implemented by calling {@link Lexer#mode} with the + * value provided by {@link #getMode}.

+ */ + + void execute(Lexer lexer) { + lexer.mode(mode); + } + + get hashCode { + int hash = MurmurHash.initialize(); + hash = MurmurHash.update(hash, actionType.index); + hash = MurmurHash.update(hash, mode); + return MurmurHash.finish(hash, 2); + } + + bool operator ==(Object obj) { + if (identical(obj, this)) { + return true; + } else if (obj is LexerModeAction) { + return mode == obj.mode; + } + return false; + } + + String toString() { + return "mode($mode)"; + } +} + +/** + * Implements the [more] lexer action by calling {@link Lexer#more}. + * + *

The [more] command does not have any parameters, so this action is + * implemented as a singleton instance exposed by {@link #INSTANCE}.

+ * + * @since 4.2 + */ +class LexerMoreAction implements LexerAction { + /** + * Provides a singleton instance of this parameterless lexer action. + */ + static final LexerMoreAction INSTANCE = new LexerMoreAction(); + + /** + * {@inheritDoc} + * @return This method returns {@link LexerActionType#MORE}. + */ + get actionType => LexerActionType.MORE; + + /** + * {@inheritDoc} + * @return This method returns [false]. + */ + + bool get isPositionDependent => false; + + /** + * {@inheritDoc} + * + *

This action is implemented by calling {@link Lexer#more}.

+ */ + + void execute(Lexer lexer) { + lexer.more(); + } + + get hashCode { + int hash = MurmurHash.initialize(); + hash = MurmurHash.update(hash, actionType.index); + return MurmurHash.finish(hash, 1); + } + + bool operator ==(Object obj) { + return identical(obj, this); + } + + String toString() { + return "more"; + } +} + +/** + * Implements the [popMode] lexer action by calling {@link Lexer#popMode}. + * + *

The [popMode] command does not have any parameters, so this action is + * implemented as a singleton instance exposed by {@link #INSTANCE}.

+ * + * @since 4.2 + */ +class LexerPopModeAction implements LexerAction { + /** + * Provides a singleton instance of this parameterless lexer action. + */ + static final LexerPopModeAction INSTANCE = new LexerPopModeAction(); + + /** + * {@inheritDoc} + * @return This method returns {@link LexerActionType#POP_MODE}. + */ + + get actionType => LexerActionType.POP_MODE; + + /** + * {@inheritDoc} + * @return This method returns [false]. + */ + + bool get isPositionDependent => false; + + /** + * {@inheritDoc} + * + *

This action is implemented by calling {@link Lexer#popMode}.

+ */ + + void execute(Lexer lexer) { + lexer.popMode(); + } + + get hashCode { + int hash = MurmurHash.initialize(); + hash = MurmurHash.update(hash, actionType.index); + return MurmurHash.finish(hash, 1); + } + + bool operator ==(Object obj) { + return identical(obj, this); + } + + String toString() { + return "popMode"; + } +} + +/** + * Implements the [pushMode] lexer action by calling + * {@link Lexer#pushMode} with the assigned mode. + * + * @since 4.2 + */ +class LexerPushModeAction implements LexerAction { + /** + * Get the lexer mode this action should transition the lexer to. + * + * @return The lexer mode for this [pushMode] command. + */ + final int mode; + + /** + * Constructs a new [pushMode] action with the specified mode value. + * @param mode The mode value to pass to {@link Lexer#pushMode}. + */ + LexerPushModeAction(this.mode); + + /** + * {@inheritDoc} + * @return This method returns {@link LexerActionType#PUSH_MODE}. + */ + + get actionType => LexerActionType.PUSH_MODE; + + /** + * {@inheritDoc} + * @return This method returns [false]. + */ + + bool get isPositionDependent => false; + + /** + * {@inheritDoc} + * + *

This action is implemented by calling {@link Lexer#pushMode} with the + * value provided by {@link #getMode}.

+ */ + + void execute(Lexer lexer) { + lexer.pushMode(mode); + } + + get hashCode { + int hash = MurmurHash.initialize(); + hash = MurmurHash.update(hash, actionType.index); + hash = MurmurHash.update(hash, mode); + return MurmurHash.finish(hash, 2); + } + + bool operator ==(Object obj) { + if (identical(obj, this)) { + return true; + } else if (obj is LexerPushModeAction) { + return mode == obj.mode; + } + return false; + } + + String toString() { + return "pushMode($mode)"; + } +} + +/** + * Implements the [skip] lexer action by calling {@link Lexer#skip}. + * + *

The [skip] command does not have any parameters, so this action is + * implemented as a singleton instance exposed by {@link #INSTANCE}.

+ * + * @since 4.2 + */ +class LexerSkipAction implements LexerAction { + /** + * Provides a singleton instance of this parameterless lexer action. + */ + static final LexerSkipAction INSTANCE = new LexerSkipAction(); + + /** + * {@inheritDoc} + * @return This method returns {@link LexerActionType#SKIP}. + */ + + get actionType => LexerActionType.SKIP; + + /** + * {@inheritDoc} + * @return This method returns [false]. + */ + + bool get isPositionDependent => false; + + /** + * {@inheritDoc} + * + *

This action is implemented by calling {@link Lexer#skip}.

+ */ + void execute(Lexer lexer) { + lexer.skip(); + } + + get hashCode { + int hash = MurmurHash.initialize(); + hash = MurmurHash.update(hash, actionType.index); + return MurmurHash.finish(hash, 1); + } + + bool operator ==(Object obj) { + return identical(obj, this); + } + + String toString() { + return "skip"; + } +} + +/** + * Implements the [type] lexer action by calling {@link Lexer#setType} + * with the assigned type. + * + * @since 4.2 + */ +class LexerTypeAction implements LexerAction { + /** + * Gets the type to assign to a token created by the lexer. + * @return The type to assign to a token created by the lexer. + */ + final int type; + + /** + * Constructs a new [type] action with the specified token type value. + * @param type The type to assign to the token using {@link Lexer#setType}. + */ + LexerTypeAction(this.type); + + /** + * {@inheritDoc} + * @return This method returns {@link LexerActionType#TYPE}. + */ + get actionType => LexerActionType.TYPE; + + /** + * {@inheritDoc} + * @return This method returns [false]. + */ + + bool get isPositionDependent => false; + + /** + * {@inheritDoc} + * + *

This action is implemented by calling {@link Lexer#setType} with the + * value provided by {@link #getType}.

+ */ + + void execute(Lexer lexer) { + lexer.type = type; + } + + get hashCode { + int hash = MurmurHash.initialize(); + hash = MurmurHash.update(hash, actionType.index); + hash = MurmurHash.update(hash, type); + return MurmurHash.finish(hash, 2); + } + + bool operator ==(Object obj) { + if (identical(obj, this)) { + return true; + } else if (obj is LexerTypeAction) { + return type == obj.type; + } + return false; + } + + String toString() { + return "type($type)"; + } +} + +/** + * This implementation of [LexerAction] is used for tracking input offsets + * for position-dependent actions within a [LexerActionExecutor]. + * + *

This action is not serialized as part of the ATN, and is only required for + * position-dependent lexer actions which appear at a location other than the + * end of a rule. For more information about DFA optimizations employed for + * lexer actions, see {@link LexerActionExecutor#append} and + * {@link LexerActionExecutor#fixOffsetBeforeMatch}.

+ * + * @since 4.2 + */ +class LexerIndexedCustomAction implements LexerAction { + /** + * Gets the location in the input [CharStream] at which the lexer + * action should be executed. The value is interpreted as an offset relative + * to the token start index. + * + * @return The location in the input [CharStream] at which the lexer + * action should be executed. + */ + final int offset; + + /** + * Gets the lexer action to execute. + * + * @return A [LexerAction] object which executes the lexer action. + */ + final LexerAction action; + + /** + * Constructs a new indexed custom action by associating a character offset + * with a [LexerAction]. + * + *

Note: This class is only required for lexer actions for which + * {@link LexerAction#isPositionDependent} returns [true].

+ * + * @param offset The offset into the input [CharStream], relative to + * the token start index, at which the specified lexer action should be + * executed. + * @param action The lexer action to execute at a particular offset in the + * input [CharStream]. + */ + LexerIndexedCustomAction(this.offset, this.action); + + /** + * {@inheritDoc} + * + * @return This method returns the result of calling {@link #getActionType} + * on the [LexerAction] returned by {@link #getAction}. + */ + LexerActionType get actionType => action.actionType; + + /** + * {@inheritDoc} + * @return This method returns [true]. + */ + + bool get isPositionDependent => true; + + /** + * {@inheritDoc} + * + *

This method calls {@link #execute} on the result of {@link #getAction} + * using the provided [lexer].

+ */ + + void execute(Lexer lexer) { +// assume the input stream position was properly set by the calling code + action.execute(lexer); + } + + int get hashCode { + int hash = MurmurHash.initialize(); + hash = MurmurHash.update(hash, offset); + hash = MurmurHash.update(hash, action); + return MurmurHash.finish(hash, 2); + } + + bool operator ==(Object obj) { + if (obj == this) { + return true; + } else if (obj is LexerIndexedCustomAction) { + return offset == obj.offset && action == obj.action; + } + return false; + } +} diff --git a/runtime/Dart/lib/src/atn/src/lexer_action_executor.dart b/runtime/Dart/lib/src/atn/src/lexer_action_executor.dart new file mode 100644 index 000000000..08a0ac00d --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/lexer_action_executor.dart @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'package:collection/collection.dart'; + +import '../../input_stream.dart'; +import '../../lexer.dart'; +import '../../util/murmur_hash.dart'; +import 'lexer_action.dart'; + +/** + * Represents an executor for a sequence of lexer actions which traversed during + * the matching operation of a lexer rule (token). + * + *

The executor tracks position information for position-dependent lexer actions + * efficiently, ensuring that actions appearing only at the end of the rule do + * not cause bloating of the [DFA] created for the lexer.

+ * + * @since 4.2 + */ +class LexerActionExecutor { + /** + * Gets the lexer actions to be executed by this executor. + * @return The lexer actions to be executed by this executor. + */ + final List lexerActions; + + /** + * Caches the result of {@link #hashCode} since the hash code is an element + * of the performance-critical {@link LexerATNConfig#hashCode} operation. + */ + int get hashCode { + int hash = MurmurHash.initialize(); + for (LexerAction lexerAction in lexerActions) { + hash = MurmurHash.update(hash, lexerAction); + } + + return MurmurHash.finish(hash, lexerActions.length); + } + + /** + * Constructs an executor for a sequence of [LexerAction] actions. + * @param lexerActions The lexer actions to execute. + */ + LexerActionExecutor(List this.lexerActions) {} + + /** + * Creates a [LexerActionExecutor] which executes the actions for + * the input [lexerActionExecutor] followed by a specified + * [lexerAction]. + * + * @param lexerActionExecutor The executor for actions already traversed by + * the lexer while matching a token within a particular + * [LexerATNConfig]. If this is null, the method behaves as + * though it were an empty executor. + * @param lexerAction The lexer action to execute after the actions + * specified in [lexerActionExecutor]. + * + * @return A [LexerActionExecutor] for executing the combine actions + * of [lexerActionExecutor] and [lexerAction]. + */ + static LexerActionExecutor append( + LexerActionExecutor lexerActionExecutor, LexerAction lexerAction) { + if (lexerActionExecutor == null) { + return new LexerActionExecutor([lexerAction]); + } + + List lexerActions = + List.from(lexerActionExecutor.lexerActions); + lexerActions.add(lexerAction); + return new LexerActionExecutor(lexerActions); + } + + /** + * Creates a [LexerActionExecutor] which encodes the current offset + * for position-dependent lexer actions. + * + *

Normally, when the executor encounters lexer actions where + * {@link LexerAction#isPositionDependent} returns [true], it calls + * {@link IntStream#seek} on the input [CharStream] to set the input + * position to the end of the current token. This behavior provides + * for efficient DFA representation of lexer actions which appear at the end + * of a lexer rule, even when the lexer rule matches a variable number of + * characters.

+ * + *

Prior to traversing a match transition in the ATN, the current offset + * from the token start index is assigned to all position-dependent lexer + * actions which have not already been assigned a fixed offset. By storing + * the offsets relative to the token start index, the DFA representation of + * lexer actions which appear in the middle of tokens remains efficient due + * to sharing among tokens of the same length, regardless of their absolute + * position in the input stream.

+ * + *

If the current executor already has offsets assigned to all + * position-dependent lexer actions, the method returns [this].

+ * + * @param offset The current offset to assign to all position-dependent + * lexer actions which do not already have offsets assigned. + * + * @return A [LexerActionExecutor] which stores input stream offsets + * for all position-dependent lexer actions. + */ + LexerActionExecutor fixOffsetBeforeMatch(int offset) { + List updatedLexerActions = null; + for (int i = 0; i < lexerActions.length; i++) { + if (lexerActions[i].isPositionDependent && + !(lexerActions[i] is LexerIndexedCustomAction)) { + if (updatedLexerActions == null) { + updatedLexerActions = List.from(lexerActions); + } + + updatedLexerActions[i] = + new LexerIndexedCustomAction(offset, lexerActions[i]); + } + } + + if (updatedLexerActions == null) { + return this; + } + + return new LexerActionExecutor(updatedLexerActions); + } + + /** + * Execute the actions encapsulated by this executor within the context of a + * particular [Lexer]. + * + *

This method calls {@link IntStream#seek} to set the position of the + * [input] [CharStream] prior to calling + * {@link LexerAction#execute} on a position-dependent action. Before the + * method returns, the input position will be restored to the same position + * it was in when the method was invoked.

+ * + * @param lexer The lexer instance. + * @param input The input stream which is the source for the current token. + * When this method is called, the current {@link IntStream#index} for + * [input] should be the start of the following token, i.e. 1 + * character past the end of the current token. + * @param startIndex The token start index. This value may be passed to + * {@link IntStream#seek} to set the [input] position to the beginning + * of the token. + */ + void execute(Lexer lexer, CharStream input, int startIndex) { + bool requiresSeek = false; + int stopIndex = input.index; + try { + for (LexerAction lexerAction in lexerActions) { + if (lexerAction is LexerIndexedCustomAction) { + int offset = (lexerAction as LexerIndexedCustomAction).offset; + input.seek(startIndex + offset); + lexerAction = (lexerAction as LexerIndexedCustomAction).action; + requiresSeek = (startIndex + offset) != stopIndex; + } else if (lexerAction.isPositionDependent) { + input.seek(stopIndex); + requiresSeek = false; + } + + lexerAction.execute(lexer); + } + } finally { + if (requiresSeek) { + input.seek(stopIndex); + } + } + } + + bool operator ==(Object obj) { + if (identical(obj, this)) { + return true; + } else if (!(obj is LexerActionExecutor)) { + return false; + } + + LexerActionExecutor other = obj; + return hashCode == other.hashCode && + ListEquality().equals(lexerActions, other.lexerActions); + } +} diff --git a/runtime/Dart/lib/src/atn/src/lexer_atn_simulator.dart b/runtime/Dart/lib/src/atn/src/lexer_atn_simulator.dart new file mode 100644 index 000000000..9c407f84c --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/lexer_atn_simulator.dart @@ -0,0 +1,747 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:developer'; + +import 'package:logging/logging.dart'; + +import '../../dfa/dfa.dart'; +import '../../error/error.dart'; +import '../../input_stream.dart'; +import '../../interval_set.dart'; +import '../../lexer.dart'; +import '../../prediction_context.dart'; +import '../../token.dart'; +import 'atn.dart'; +import 'atn_config.dart'; +import 'atn_config_set.dart'; +import 'atn_simulator.dart'; +import 'atn_state.dart'; +import 'lexer_action_executor.dart'; +import 'transition.dart'; + +/// When we hit an accept state in either the DFA or the ATN, we +/// have to notify the character stream to start buffering characters +/// via {@link IntStream#mark} and record the current state. The current sim state +/// includes the current index into the input, the current line, +/// and current character position in that line. Note that the Lexer is +/// tracking the starting line and characterization of the token. These +/// variables track the "state" of the simulator when it hits an accept state. +/// +///

We track these variables separately for the DFA and ATN simulation +/// because the DFA simulation often has to fail over to the ATN +/// simulation. If the ATN simulation fails, we need the DFA to fall +/// back to its previously accepted state, if any. If the ATN succeeds, +/// then the ATN does the accept and the DFA simulator that invoked it +/// can simply return the predicted token type.

+class SimState { + int index = -1; + int line = 0; + int charPos = -1; + + DFAState dfaState = null; + + reset() { + this.index = -1; + this.line = 0; + this.charPos = -1; + this.dfaState = null; + } +} + +/** "dup" of ParserInterpreter */ +class LexerATNSimulator extends ATNSimulator { + static final bool debug = true; + static final bool dfa_debug = true; + + static final int MIN_DFA_EDGE = 0; + static final int MAX_DFA_EDGE = 127; // forces unicode to stay in ATN + + final Lexer recog; + + /** The current token's starting index into the character stream. + * Shared across DFA to ATN simulation in case the ATN fails and the + * DFA did not have a previous accept state. In this case, we use the + * ATN-generated exception object. + */ + int startIndex = -1; + + /** line number 1..n within the input */ + int line = 1; + + /** The index of the character relative to the beginning of the line 0..n-1 */ + int charPositionInLine = 0; + + List decisionToDFA; + int mode = Lexer.DEFAULT_MODE; + + /** Used during DFA/ATN exec to record the most recent accept configuration info */ + + final SimState prevAccept = new SimState(); + + LexerATNSimulator(ATN atn, List this.decisionToDFA, + PredictionContextCache sharedContextCache, + {Lexer this.recog = null}) + : super(atn, sharedContextCache); + + void copyState(LexerATNSimulator simulator) { + this.charPositionInLine = simulator.charPositionInLine; + this.line = simulator.line; + this.mode = simulator.mode; + this.startIndex = simulator.startIndex; + } + + int match(CharStream input, int mode) { + this.mode = mode; + int mark = input.mark(); + try { + this.startIndex = input.index; + this.prevAccept.reset(); + DFA dfa = decisionToDFA[mode]; + if (dfa.s0 == null) { + return matchATN(input); + } else { + return execATN(input, dfa.s0); + } + } finally { + input.release(mark); + } + } + + void reset() { + prevAccept.reset(); + startIndex = -1; + line = 1; + charPositionInLine = 0; + mode = Lexer.DEFAULT_MODE; + } + + void clearDFA() { + for (int d = 0; d < decisionToDFA.length; d++) { + decisionToDFA[d] = new DFA(atn.getDecisionState(d), d); + } + } + + int matchATN(CharStream input) { + ATNState startState = atn.modeToStartState[mode]; + + if (debug) { + log("matchATN mode $mode start: $startState\n", level: Level.FINE.value); + } + + int old_mode = mode; + + ATNConfigSet s0_closure = computeStartState(input, startState); + bool suppressEdge = s0_closure.hasSemanticContext; + s0_closure.hasSemanticContext = false; + + DFAState next = addDFAState(s0_closure); + if (!suppressEdge) { + decisionToDFA[mode].s0 = next; + } + + int predict = execATN(input, next); + + if (debug) { + log("DFA after matchATN: ${decisionToDFA[old_mode].toLexerString()}\n", + level: Level.FINE.value); + } + + return predict; + } + + int execATN(CharStream input, DFAState ds0) { + //log("enter exec index "+input.index()+" from "+ds0.configs, level: Level.FINE.value); + if (debug) { + log("start state closure=${ds0.configs}\n", level: Level.FINE.value); + } + + if (ds0.isAcceptState) { + // allow zero-length tokens + captureSimState(prevAccept, input, ds0); + } + + int t = input.LA(1); + + DFAState s = ds0; // s is current/from DFA state + + while (true) { + // while more work + if (debug) { + log("execATN loop starting closure: ${s.configs}\n", + level: Level.FINE.value); + } + + // As we move src->trg, src->trg, we keep track of the previous trg to + // avoid looking up the DFA state again, which is expensive. + // If the previous target was already part of the DFA, we might + // be able to avoid doing a reach operation upon t. If s!=null, + // it means that semantic predicates didn't prevent us from + // creating a DFA state. Once we know s!=null, we check to see if + // the DFA state has an edge already for t. If so, we can just reuse + // it's configuration set; there's no point in re-computing it. + // This is kind of like doing DFA simulation within the ATN + // simulation because DFA simulation is really just a way to avoid + // computing reach/closure sets. Technically, once we know that + // we have a previously added DFA state, we could jump over to + // the DFA simulator. But, that would mean popping back and forth + // a lot and making things more complicated algorithmically. + // This optimization makes a lot of sense for loops within DFA. + // A character will take us back to an existing DFA state + // that already has lots of edges out of it. e.g., .* in comments. + DFAState target = getExistingTargetState(s, t); + if (target == null) { + target = computeTargetState(input, s, t); + } + + if (target == ATNSimulator.ERROR) { + break; + } + + // If this is a consumable input element, make sure to consume before + // capturing the accept state so the input index, line, and char + // position accurately reflect the state of the interpreter at the + // end of the token. + if (t != IntStream.EOF) { + consume(input); + } + + if (target.isAcceptState) { + captureSimState(prevAccept, input, target); + if (t == IntStream.EOF) { + break; + } + } + + t = input.LA(1); + s = target; // flip; current DFA target becomes new src/from state + } + + return failOrAccept(prevAccept, input, s.configs, t); + } + + /** + * Get an existing target state for an edge in the DFA. If the target state + * for the edge has not yet been computed or is otherwise not available, + * this method returns null. + * + * @param s The current DFA state + * @param t The next input symbol + * @return The existing target DFA state for the given input symbol + * [t], or null if the target state for this edge is not + * already cached + */ + + DFAState getExistingTargetState(DFAState s, int t) { + if (s.edges == null || t < MIN_DFA_EDGE || t > MAX_DFA_EDGE) { + return null; + } + + DFAState target = s.edges[t - MIN_DFA_EDGE]; + if (debug && target != null) { + log("reuse state ${s.stateNumber} edge to ${target.stateNumber}", + level: Level.FINE.value); + } + + return target; + } + + /** + * Compute a target state for an edge in the DFA, and attempt to add the + * computed state and corresponding edge to the DFA. + * + * @param input The input stream + * @param s The current DFA state + * @param t The next input symbol + * + * @return The computed target DFA state for the given input symbol + * [t]. If [t] does not lead to a valid DFA state, this method + * returns {@link #ERROR}. + */ + + DFAState computeTargetState(CharStream input, DFAState s, int t) { + ATNConfigSet reach = new OrderedATNConfigSet(); + + // if we don't find an existing DFA state + // Fill reach starting from closure, following t transitions + getReachableConfigSet(input, s.configs, reach, t); + + if (reach.isEmpty) { + // we got nowhere on t from s + if (!reach.hasSemanticContext) { + // we got nowhere on t, don't throw out this knowledge; it'd + // cause a failover from DFA later. + addDFAEdge(s, t, ATNSimulator.ERROR); + } + + // stop when we can't match any more char + return ATNSimulator.ERROR; + } + + // Add an edge from s to target DFA found/created for reach + return addDFAEdgeByConfig(s, t, reach); + } + + int failOrAccept( + SimState prevAccept, CharStream input, ATNConfigSet reach, int t) { + if (prevAccept.dfaState != null) { + LexerActionExecutor lexerActionExecutor = + prevAccept.dfaState.lexerActionExecutor; + accept(input, lexerActionExecutor, startIndex, prevAccept.index, + prevAccept.line, prevAccept.charPos); + return prevAccept.dfaState.prediction; + } else { + // if no accept and EOF is first char, return EOF + if (t == IntStream.EOF && input.index == startIndex) { + return Token.EOF; + } + + throw new LexerNoViableAltException(recog, input, startIndex, reach); + } + } + + /** Given a starting configuration set, figure out all ATN configurations + * we can reach upon input [t]. Parameter [reach] is a return + * parameter. + */ + void getReachableConfigSet( + CharStream input, ATNConfigSet configs, ATNConfigSet reach, int t) { + // this is used to skip processing for configs which have a lower priority + // than a config that already reached an accept state for the same rule + int skipAlt = ATN.INVALID_ALT_NUMBER; + for (ATNConfig c in configs) { + bool currentAltReachedAcceptState = c.alt == skipAlt; + if (currentAltReachedAcceptState && + (c as LexerATNConfig).hasPassedThroughNonGreedyDecision()) { + continue; + } + + if (debug) { + log("testing ${getTokenName(t)} at ${c.toString(recog, true)}\n", + level: Level.FINE.value); + } + + int n = c.state.numberOfTransitions; + for (int ti = 0; ti < n; ti++) { + // for each transition + Transition trans = c.state.transition(ti); + ATNState target = getReachableTarget(trans, t); + if (target != null) { + LexerActionExecutor lexerActionExecutor = + (c as LexerATNConfig).lexerActionExecutor; + if (lexerActionExecutor != null) { + lexerActionExecutor = lexerActionExecutor + .fixOffsetBeforeMatch(input.index - startIndex); + } + + bool treatEofAsEpsilon = t == IntStream.EOF; + if (closure( + input, + new LexerATNConfig.dup(c, target, + lexerActionExecutor: lexerActionExecutor), + reach, + currentAltReachedAcceptState, + true, + treatEofAsEpsilon)) { + // any remaining configs for this alt have a lower priority than + // the one that just reached an accept state. + skipAlt = c.alt; + break; + } + } + } + } + } + + void accept(CharStream input, LexerActionExecutor lexerActionExecutor, + int startIndex, int index, int line, int charPos) { + if (debug) { + log("ACTION $lexerActionExecutor\n", level: Level.FINE.value); + } + + // seek to after last char in token + input.seek(index); + this.line = line; + this.charPositionInLine = charPos; + + if (lexerActionExecutor != null && recog != null) { + lexerActionExecutor.execute(recog, input, startIndex); + } + } + + ATNState getReachableTarget(Transition trans, int t) { + if (trans.matches(t, Lexer.MIN_CHAR_VALUE, Lexer.MAX_CHAR_VALUE)) { + return trans.target; + } + + return null; + } + + ATNConfigSet computeStartState(CharStream input, ATNState p) { + PredictionContext initialContext = PredictionContext.EMPTY; + ATNConfigSet configs = new OrderedATNConfigSet(); + for (int i = 0; i < p.numberOfTransitions; i++) { + ATNState target = p.transition(i).target; + LexerATNConfig c = new LexerATNConfig(target, i + 1, initialContext); + closure(input, c, configs, false, false, false); + } + return configs; + } + + /** + * Since the alternatives within any lexer decision are ordered by + * preference, this method stops pursuing the closure as soon as an accept + * state is reached. After the first accept state is reached by depth-first + * search from [config], all other (potentially reachable) states for + * this rule would have a lower priority. + * + * @return [true] if an accept state is reached, otherwise + * [false]. + */ + bool closure( + CharStream input, + LexerATNConfig config, + ATNConfigSet configs, + bool currentAltReachedAcceptState, + bool speculative, + bool treatEofAsEpsilon) { + if (debug) { + log("closure(" + config.toString(recog, true) + ")", + level: Level.FINE.value); + } + + if (config.state is RuleStopState) { + if (debug) { + if (recog != null) { + log("closure at ${recog.ruleNames[config.state.ruleIndex]} rule stop $config\n", + level: Level.FINE.value); + } else { + log("closure at rule stop $config\n", level: Level.FINE.value); + } + } + + if (config.context == null || config.context.hasEmptyPath()) { + if (config.context == null || config.context.isEmpty) { + configs.add(config); + return true; + } else { + configs.add(new LexerATNConfig.dup(config, config.state, + context: PredictionContext.EMPTY)); + currentAltReachedAcceptState = true; + } + } + + if (config.context != null && !config.context.isEmpty) { + for (int i = 0; i < config.context.length; i++) { + if (config.context.getReturnState(i) != + PredictionContext.EMPTY_RETURN_STATE) { + PredictionContext newContext = + config.context.getParent(i); // "pop" return state + ATNState returnState = atn.states[config.context.getReturnState(i)]; + LexerATNConfig c = new LexerATNConfig.dup(config, returnState, + context: newContext); + currentAltReachedAcceptState = closure(input, c, configs, + currentAltReachedAcceptState, speculative, treatEofAsEpsilon); + } + } + } + + return currentAltReachedAcceptState; + } + + // optimization + if (!config.state.onlyHasEpsilonTransitions()) { + if (!currentAltReachedAcceptState || + !config.hasPassedThroughNonGreedyDecision()) { + configs.add(config); + } + } + + ATNState p = config.state; + for (int i = 0; i < p.numberOfTransitions; i++) { + Transition t = p.transition(i); + LexerATNConfig c = getEpsilonTarget( + input, config, t, configs, speculative, treatEofAsEpsilon); + if (c != null) { + currentAltReachedAcceptState = closure(input, c, configs, + currentAltReachedAcceptState, speculative, treatEofAsEpsilon); + } + } + + return currentAltReachedAcceptState; + } + + // side-effect: can alter configs.hasSemanticContext + + LexerATNConfig getEpsilonTarget( + CharStream input, + LexerATNConfig config, + Transition t, + ATNConfigSet configs, + bool speculative, + bool treatEofAsEpsilon) { + LexerATNConfig c = null; + switch (t.type) { + case TransitionType.RULE: + RuleTransition ruleTransition = t; + PredictionContext newContext = SingletonPredictionContext.create( + config.context, ruleTransition.followState.stateNumber); + c = new LexerATNConfig.dup(config, t.target, context: newContext); + break; + + case TransitionType.PRECEDENCE: + throw new UnsupportedError( + "Precedence predicates are not supported in lexers."); + case TransitionType.PREDICATE: + /* Track traversing semantic predicates. If we traverse, + we cannot add a DFA state for this "reach" computation + because the DFA would not test the predicate again in the + future. Rather than creating collections of semantic predicates + like v3 and testing them on prediction, v4 will test them on the + fly all the time using the ATN not the DFA. This is slower but + semantically it's not used that often. One of the key elements to + this predicate mechanism is not adding DFA states that see + predicates immediately afterwards in the ATN. For example, + + a : ID {p1}? | ID {p2}? ; + + should create the start state for rule 'a' (to save start state + competition), but should not create target of ID state. The + collection of ATN states the following ID references includes + states reached by traversing predicates. Since this is when we + test them, we cannot cash the DFA state target of ID. + */ + PredicateTransition pt = t; + if (debug) { + log("EVAL rule ${pt.ruleIndex}:${pt.predIndex}", + level: Level.FINE.value); + } + configs.hasSemanticContext = true; + if (evaluatePredicate(input, pt.ruleIndex, pt.predIndex, speculative)) { + c = new LexerATNConfig.dup(config, t.target); + } + break; + case TransitionType.ACTION: + if (config.context == null || config.context.hasEmptyPath()) { + // execute actions anywhere in the start rule for a token. + // + // TODO: if the entry rule is invoked recursively, some + // actions may be executed during the recursive call. The + // problem can appear when hasEmptyPath() is true but + // isEmpty is false. In this case, the config needs to be + // split into two contexts - one with just the empty path + // and another with everything but the empty path. + // Unfortunately, the current algorithm does not allow + // getEpsilonTarget to return two configurations, so + // additional modifications are needed before we can support + // the split operation. + LexerActionExecutor lexerActionExecutor = LexerActionExecutor.append( + config.lexerActionExecutor, + atn.lexerActions[(t as ActionTransition).actionIndex]); + c = new LexerATNConfig.dup(config, t.target, + lexerActionExecutor: lexerActionExecutor); + } else { + // ignore actions in referenced rules + c = new LexerATNConfig.dup(config, t.target); + } + break; + + case TransitionType.EPSILON: + c = new LexerATNConfig.dup(config, t.target); + break; + + case TransitionType.ATOM: + case TransitionType.RANGE: + case TransitionType.SET: + if (treatEofAsEpsilon) { + if (t.matches( + IntStream.EOF, Lexer.MIN_CHAR_VALUE, Lexer.MAX_CHAR_VALUE)) { + c = new LexerATNConfig.dup(config, t.target); + break; + } + } + break; + case TransitionType.NOT_SET: + break; + case TransitionType.WILDCARD: + break; + case TransitionType.INVALID: + throw ArgumentError.value(t.type, "TransitionType"); + break; + } + + return c; + } + + /** + * Evaluate a predicate specified in the lexer. + * + *

If [speculative] is [true], this method was called before + * {@link #consume} for the matched character. This method should call + * {@link #consume} before evaluating the predicate to ensure position + * sensitive values, including {@link Lexer#getText}, {@link Lexer#getLine}, + * and {@link Lexer#getCharPositionInLine}, properly reflect the current + * lexer state. This method should restore [input] and the simulator + * to the original state before returning (i.e. undo the actions made by the + * call to {@link #consume}.

+ * + * @param input The input stream. + * @param ruleIndex The rule containing the predicate. + * @param predIndex The index of the predicate within the rule. + * @param speculative [true] if the current index in [input] is + * one character before the predicate's location. + * + * @return [true] if the specified predicate evaluates to + * [true]. + */ + bool evaluatePredicate( + CharStream input, int ruleIndex, int predIndex, bool speculative) { + // assume true if no recognizer was provided + if (recog == null) { + return true; + } + + if (!speculative) { + return recog.sempred(null, ruleIndex, predIndex); + } + + int savedCharPositionInLine = charPositionInLine; + int savedLine = line; + int index = input.index; + int marker = input.mark(); + try { + consume(input); + return recog.sempred(null, ruleIndex, predIndex); + } finally { + charPositionInLine = savedCharPositionInLine; + line = savedLine; + input.seek(index); + input.release(marker); + } + } + + void captureSimState(SimState settings, CharStream input, DFAState dfaState) { + settings.index = input.index; + settings.line = line; + settings.charPos = charPositionInLine; + settings.dfaState = dfaState; + } + + DFAState addDFAEdgeByConfig(DFAState from, int t, ATNConfigSet q) { + /* leading to this call, ATNConfigSet.hasSemanticContext is used as a + * marker indicating dynamic predicate evaluation makes this edge + * dependent on the specific input sequence, so the static edge in the + * DFA should be omitted. The target DFAState is still created since + * execATN has the ability to resynchronize with the DFA state cache + * following the predicate evaluation step. + * + * TJP notes: next time through the DFA, we see a pred again and eval. + * If that gets us to a previously created (but dangling) DFA + * state, we can continue in pure DFA mode from there. + */ + bool suppressEdge = q.hasSemanticContext; + q.hasSemanticContext = false; + + DFAState to = addDFAState(q); + + if (suppressEdge) { + return to; + } + + addDFAEdge(from, t, to); + return to; + } + + void addDFAEdge(DFAState p, int t, DFAState q) { + if (t < MIN_DFA_EDGE || t > MAX_DFA_EDGE) { + // Only track edges within the DFA bounds + return; + } + + if (debug) { + log("EDGE $p -> $q upon ${String.fromCharCode(t)}", + level: Level.FINE.value); + } + + if (p.edges == null) { + // make room for tokens 1..n and -1 masquerading as index 0 + p.edges = List(MAX_DFA_EDGE - MIN_DFA_EDGE + 1); + } + p.edges[t - MIN_DFA_EDGE] = q; // connect + } + + /** Add a new DFA state if there isn't one with this set of + configurations already. This method also detects the first + configuration containing an ATN rule stop state. Later, when + traversing the DFA, we will know which rule to accept. + */ + + DFAState addDFAState(ATNConfigSet configs) { + /* the lexer evaluates predicates on-the-fly; by this point configs + * should not contain any configurations with unevaluated predicates. + */ + assert(!configs.hasSemanticContext); + + DFAState proposed = new DFAState(configs: configs); + ATNConfig firstConfigWithRuleStopState = null; + for (ATNConfig c in configs) { + if (c.state is RuleStopState) { + firstConfigWithRuleStopState = c; + break; + } + } + + if (firstConfigWithRuleStopState != null) { + proposed.isAcceptState = true; + proposed.lexerActionExecutor = + (firstConfigWithRuleStopState as LexerATNConfig) + .lexerActionExecutor; + proposed.prediction = + atn.ruleToTokenType[firstConfigWithRuleStopState.state.ruleIndex]; + } + + DFA dfa = decisionToDFA[mode]; + DFAState existing = dfa.states[proposed]; + if (existing != null) return existing; + + DFAState newState = proposed; + + newState.stateNumber = dfa.states.length; + configs.readOnly = true; + newState.configs = configs; + dfa.states[newState] = newState; + return newState; + } + + DFA getDFA(int mode) { + return decisionToDFA[mode]; + } + + /** Get the text matched so far for the current token. + */ + + String getText(CharStream input) { + // index is first lookahead char, don't include. + return input.getText(Interval.of(startIndex, input.index - 1)); + } + + void consume(CharStream input) { + int curChar = input.LA(1); + if (curChar == 10) { // Is new line + line++; + charPositionInLine = 0; + } else { + charPositionInLine++; + } + input.consume(); + } + + String getTokenName(int t) { + if (t == -1) return "EOF"; + //if ( atn.g!=null ) return atn.g.getTokenDisplayName(t); + return "'${String.fromCharCode(t)}'"; + } +} diff --git a/runtime/Dart/lib/src/atn/src/parser_atn_simulator.dart b/runtime/Dart/lib/src/atn/src/parser_atn_simulator.dart new file mode 100644 index 000000000..01be3ba57 --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/parser_atn_simulator.dart @@ -0,0 +1,2701 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:collection'; +import 'dart:developer'; + +import 'package:logging/logging.dart'; + +import '../../vocabulary.dart'; +import '../../dfa/dfa.dart'; +import '../../error/error.dart'; +import '../../input_stream.dart'; +import '../../interval_set.dart'; +import '../../misc/pair.dart'; +import '../../parser.dart'; +import '../../parser_rule_context.dart'; +import '../../prediction_context.dart'; +import '../../rule_context.dart'; +import '../../token.dart'; +import '../../token_stream.dart'; +import '../../util/bit_set.dart'; +import '../../util/murmur_hash.dart'; +import 'atn.dart'; +import 'atn_config.dart'; +import 'atn_config_set.dart'; +import 'atn_simulator.dart'; +import 'atn_state.dart'; +import 'semantic_context.dart'; +import 'transition.dart'; + +/** + * The embodiment of the adaptive LL(*), ALL(*), parsing strategy. + * + *

+ * The basic complexity of the adaptive strategy makes it harder to understand. + * We begin with ATN simulation to build paths in a DFA. Subsequent prediction + * requests go through the DFA first. If they reach a state without an edge for + * the current symbol, the algorithm fails over to the ATN simulation to + * complete the DFA path for the current input (until it finds a conflict state + * or uniquely predicting state).

+ * + *

+ * All of that is done without using the outer context because we want to create + * a DFA that is not dependent upon the rule invocation stack when we do a + * prediction. One DFA works in all contexts. We avoid using context not + * necessarily because it's slower, although it can be, but because of the DFA + * caching problem. The closure routine only considers the rule invocation stack + * created during prediction beginning in the decision rule. For example, if + * prediction occurs without invoking another rule's ATN, there are no context + * stacks in the configurations. When lack of context leads to a conflict, we + * don't know if it's an ambiguity or a weakness in the strong LL(*) parsing + * strategy (versus full LL(*)).

+ * + *

+ * When SLL yields a configuration set with conflict, we rewind the input and + * retry the ATN simulation, this time using full outer context without adding + * to the DFA. Configuration context stacks will be the full invocation stacks + * from the start rule. If we get a conflict using full context, then we can + * definitively say we have a true ambiguity for that input sequence. If we + * don't get a conflict, it implies that the decision is sensitive to the outer + * context. (It is not context-sensitive in the sense of context-sensitive + * grammars.)

+ * + *

+ * The next time we reach this DFA state with an SLL conflict, through DFA + * simulation, we will again retry the ATN simulation using full context mode. + * This is slow because we can't save the results and have to "interpret" the + * ATN each time we get that input.

+ * + *

+ * CACHING FULL CONTEXT PREDICTIONS

+ * + *

+ * We could cache results from full context to predicted alternative easily and + * that saves a lot of time but doesn't work in presence of predicates. The set + * of visible predicates from the ATN start state changes depending on the + * context, because closure can fall off the end of a rule. I tried to cache + * tuples (stack context, semantic context, predicted alt) but it was slower + * than interpreting and much more complicated. Also required a huge amount of + * memory. The goal is not to create the world's fastest parser anyway. I'd like + * to keep this algorithm simple. By launching multiple threads, we can improve + * the speed of parsing across a large number of files.

+ * + *

+ * There is no strict ordering between the amount of input used by SLL vs LL, + * which makes it really hard to build a cache for full context. Let's say that + * we have input A B C that leads to an SLL conflict with full context X. That + * implies that using X we might only use A B but we could also use A B C D to + * resolve conflict. Input A B C D could predict alternative 1 in one position + * in the input and A B C E could predict alternative 2 in another position in + * input. The conflicting SLL configurations could still be non-unique in the + * full context prediction, which would lead us to requiring more input than the + * original A B C. To make a prediction cache work, we have to track the exact + * input used during the previous prediction. That amounts to a cache that maps + * X to a specific DFA for that context.

+ * + *

+ * Something should be done for left-recursive expression predictions. They are + * likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry + * with full LL thing Sam does.

+ * + *

+ * AVOIDING FULL CONTEXT PREDICTION

+ * + *

+ * We avoid doing full context retry when the outer context is empty, we did not + * dip into the outer context by falling off the end of the decision state rule, + * or when we force SLL mode.

+ * + *

+ * As an example of the not dip into outer context case, consider as super + * constructor calls versus function calls. One grammar might look like + * this:

+ * + *
+ * ctorBody
+ *   : '{' superCall? stat* '}'
+ *   ;
+ * 
+ * + *

+ * Or, you might see something like

+ * + *
+ * stat
+ *   : superCall ';'
+ *   | expression ';'
+ *   | ...
+ *   ;
+ * 
+ * + *

+ * In both cases I believe that no closure operations will dip into the outer + * context. In the first case ctorBody in the worst case will stop at the '}'. + * In the 2nd case it should stop at the ';'. Both cases should stay within the + * entry rule and not dip into the outer context.

+ * + *

+ * PREDICATES

+ * + *

+ * Predicates are always evaluated if present in either SLL or LL both. SLL and + * LL simulation deals with predicates differently. SLL collects predicates as + * it performs closure operations like ANTLR v3 did. It delays predicate + * evaluation until it reaches and accept state. This allows us to cache the SLL + * ATN simulation whereas, if we had evaluated predicates on-the-fly during + * closure, the DFA state configuration sets would be different and we couldn't + * build up a suitable DFA.

+ * + *

+ * When building a DFA accept state during ATN simulation, we evaluate any + * predicates and return the sole semantically valid alternative. If there is + * more than 1 alternative, we report an ambiguity. If there are 0 alternatives, + * we throw an exception. Alternatives without predicates act like they have + * true predicates. The simple way to think about it is to strip away all + * alternatives with false predicates and choose the minimum alternative that + * remains.

+ * + *

+ * When we start in the DFA and reach an accept state that's predicated, we test + * those and return the minimum semantically viable alternative. If no + * alternatives are viable, we throw an exception.

+ * + *

+ * During full LL ATN simulation, closure always evaluates predicates and + * on-the-fly. This is crucial to reducing the configuration set size during + * closure. It hits a landmine when parsing with the Java grammar, for example, + * without this on-the-fly evaluation.

+ * + *

+ * SHARING DFA

+ * + *

+ * All instances of the same parser share the same decision DFAs through a + * static field. Each instance gets its own ATN simulator but they share the + * same {@link #decisionToDFA} field. They also share a + * [PredictionContextCache] object that makes sure that all + * [PredictionContext] objects are shared among the DFA states. This makes + * a big size difference.

+ * + *

+ * THREAD SAFETY

+ * + *

+ * The [ParserATNSimulator] locks on the {@link #decisionToDFA} field when + * it adds a new DFA object to that array. {@link #addDFAEdge} + * locks on the DFA for the current decision when setting the + * {@link DFAState#edges} field. {@link #addDFAState} locks on + * the DFA for the current decision when looking up a DFA state to see if it + * already exists. We must make sure that all requests to add DFA states that + * are equivalent result in the same shared DFA object. This is because lots of + * threads will be trying to update the DFA at once. The + * {@link #addDFAState} method also locks inside the DFA lock + * but this time on the shared context cache when it rebuilds the + * configurations' [PredictionContext] objects using cached + * subgraphs/nodes. No other locking occurs, even during DFA simulation. This is + * safe as long as we can guarantee that all threads referencing + * {@code s.edge[t]} get the same physical target [DFAState], or + * null. Once into the DFA, the DFA simulation does not reference the + * {@link DFA#states} map. It follows the {@link DFAState#edges} field to new + * targets. The DFA simulator will either find {@link DFAState#edges} to be + * null, to be non-null and {@code dfa.edges[t]} null, or + * {@code dfa.edges[t]} to be non-null. The + * {@link #addDFAEdge} method could be racing to set the field + * but in either case the DFA simulator works; if null, and requests ATN + * simulation. It could also race trying to get {@code dfa.edges[t]}, but either + * way it will work because it's not doing a test and set operation.

+ * + *

+ * Starting with SLL then failing to combined SLL/LL (Two-Stage + * Parsing)

+ * + *

+ * Sam pointed out that if SLL does not give a syntax error, then there is no + * point in doing full LL, which is slower. We only have to try LL if we get a + * syntax error. For maximum speed, Sam starts the parser set to pure SLL + * mode with the [BailErrorStrategy]:

+ * + *
+ * parser.{@link Parser#interpreter interpreter}.{@link #setPredictionMode setPredictionMode}{@code (}{@link PredictionMode#SLL}{@code )};
+ * parser.{@link Parser#setErrorHandler setErrorHandler}(new [BailErrorStrategy]());
+ * 
+ * + *

+ * If it does not get a syntax error, then we're done. If it does get a syntax + * error, we need to retry with the combined SLL/LL strategy.

+ * + *

+ * The reason this works is as follows. If there are no SLL conflicts, then the + * grammar is SLL (at least for that input set). If there is an SLL conflict, + * the full LL analysis must yield a set of viable alternatives which is a + * subset of the alternatives reported by SLL. If the LL set is a singleton, + * then the grammar is LL but not SLL. If the LL set is the same size as the SLL + * set, the decision is SLL. If the LL set has size > 1, then that decision + * is truly ambiguous on the current input. If the LL set is smaller, then the + * SLL conflict resolution might choose an alternative that the full LL would + * rule out as a possibility based upon better context information. If that's + * the case, then the SLL parse will definitely get an error because the full LL + * analysis says it's not viable. If SLL conflict resolution chooses an + * alternative within the LL set, them both SLL and LL would choose the same + * alternative because they both choose the minimum of multiple conflicting + * alternatives.

+ * + *

+ * Let's say we have a set of SLL conflicting alternatives {@code {1, 2, 3}} and + * a smaller LL set called s. If s is {@code {2, 3}}, then SLL + * parsing will get an error because SLL will pursue alternative 1. If + * s is {@code {1, 2}} or {@code {1, 3}} then both SLL and LL will + * choose the same alternative because alternative one is the minimum of either + * set. If s is {@code {2}} or {@code {3}} then SLL will get a syntax + * error. If s is {@code {1}} then SLL will succeed.

+ * + *

+ * Of course, if the input is invalid, then we will get an error for sure in + * both SLL and LL parsing. Erroneous input will therefore require 2 passes over + * the input.

+ */ +class ParserATNSimulator extends ATNSimulator { + static final bool debug = false; + static final bool debug_list_atn_decisions = false; + static final bool dfa_debug = false; + static final bool retry_debug = false; + + /** Just in case this optimization is bad, add an ENV variable to turn it off */ + static final bool TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT = + bool.fromEnvironment("TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT"); + + final Parser parser; + + final List decisionToDFA; + + /** SLL, LL, or LL + exact ambig detection? */ + + PredictionMode predictionMode = PredictionMode.LL; + + /** Each prediction operation uses a cache for merge of prediction contexts. + * Don't keep around as it wastes huge amounts of memory. DoubleKeyMap + * isn't synchronized but we're ok since two threads shouldn't reuse same + * parser/atnsim object because it can only handle one input at a time. + * This maps graphs a and b to merged result c. (a,b)→c. We can avoid + * the merge if we ever see a and b again. Note that (b,a)→c should + * also be examined during cache lookup. + */ + Map, PredictionContext> mergeCache; + + // LAME globals to avoid parameters!!!!! I need these down deep in predTransition + TokenStream input; + int startIndex = 0; + ParserRuleContext _outerContext; + DFA _dfa; + + ParserATNSimulator(Parser this.parser, ATN atn, this.decisionToDFA, + PredictionContextCache sharedContextCache) + : super(atn, sharedContextCache) { + // DOTGenerator dot = new DOTGenerator(null); + // log(dot.getDOT(atn.rules.get(0), parser.getRuleNames())); + // log(dot.getDOT(atn.rules.get(1), parser.getRuleNames())); + } + + void reset() {} + + void clearDFA() { + for (int d = 0; d < decisionToDFA.length; d++) { + decisionToDFA[d] = new DFA(atn.getDecisionState(d), d); + } + } + + int adaptivePredict( + TokenStream input_, int decision, ParserRuleContext outerContext) { + if (debug || debug_list_atn_decisions) { + log("adaptivePredict decision $decision" + + " exec LA(1)==" + + getLookaheadName(input_) + + " line ${input_.LT(1).line}:${input_.LT(1).charPositionInLine}"); + } + + input = input_; + startIndex = input_.index; + _outerContext = outerContext; + DFA dfa = decisionToDFA[decision]; + _dfa = dfa; + + int m = input_.mark(); + int index = startIndex; + + // Now we are certain to have a specific decision's DFA + // But, do we still need an initial state? + try { + DFAState s0; + if (dfa.isPrecedenceDfa()) { + // the start state for a precedence DFA depends on the current + // parser precedence, and is provided by a DFA method. + s0 = dfa.getPrecedenceStartState(parser.precedence); + } else { + // the start state for a "regular" DFA is just s0 + s0 = dfa.s0; + } + + if (s0 == null) { + if (outerContext == null) outerContext = ParserRuleContext.EMPTY; + if (debug || debug_list_atn_decisions) { + log("predictATN decision ${dfa.decision}" + + " exec LA(1)==" + + getLookaheadName(input_) + + ", outerContext=" + + outerContext.toString(recog: parser)); + } + + bool fullCtx = false; + ATNConfigSet s0_closure = computeStartState( + dfa.atnStartState, ParserRuleContext.EMPTY, fullCtx); + + if (dfa.isPrecedenceDfa()) { + /* If this is a precedence DFA, we use applyPrecedenceFilter + * to convert the computed start state to a precedence start + * state. We then use DFA.setPrecedenceStartState to set the + * appropriate start state for the precedence level rather + * than simply setting DFA.s0. + */ + dfa.s0.configs = + s0_closure; // not used for prediction but useful to know start configs anyway + s0_closure = applyPrecedenceFilter(s0_closure); + s0 = addDFAState(dfa, new DFAState(configs: s0_closure)); + dfa.setPrecedenceStartState(parser.precedence, s0); + } else { + s0 = addDFAState(dfa, new DFAState(configs: s0_closure)); + dfa.s0 = s0; + } + } + + int alt = execATN(dfa, s0, input_, index, outerContext); + if (debug) + log("DFA after predictATN: " + dfa.toString(parser.vocabulary)); + return alt; + } finally { + mergeCache = null; // wack cache after each prediction + _dfa = null; + input_.seek(index); + input_.release(m); + } + } + + /** Performs ATN simulation to compute a predicted alternative based + * upon the remaining input, but also updates the DFA cache to avoid + * having to traverse the ATN again for the same input sequence. + + There are some key conditions we're looking for after computing a new + set of ATN configs (proposed DFA state): + * if the set is empty, there is no viable alternative for current symbol + * does the state uniquely predict an alternative? + * does the state have a conflict that would prevent us from + putting it on the work list? + + We also have some key operations to do: + * add an edge from previous DFA state to potentially new DFA state, D, + upon current symbol but only if adding to work list, which means in all + cases except no viable alternative (and possibly non-greedy decisions?) + * collecting predicates and adding semantic context to DFA accept states + * adding rule context to context-sensitive DFA accept states + * consuming an input symbol + * reporting a conflict + * reporting an ambiguity + * reporting a context sensitivity + * reporting insufficient predicates + + cover these cases: + dead end + single alt + single alt + preds + conflict + conflict + preds + */ + int execATN(DFA dfa, DFAState s0, TokenStream input, int startIndex, + ParserRuleContext outerContext) { + if (debug || debug_list_atn_decisions) { + log("execATN decision ${dfa.decision}" + + " exec LA(1)==" + + getLookaheadName(input) + + " line ${input.LT(1).line}" + + ":${input.LT(1).charPositionInLine}"); + } + + DFAState previousD = s0; + + if (debug) log("s0 = $s0"); + + int t = input.LA(1); + + while (true) { + // while more work + DFAState D = getExistingTargetState(previousD, t); + if (D == null) { + D = computeTargetState(dfa, previousD, t); + } + + if (D == ATNSimulator.ERROR) { + // if any configs in previous dipped into outer context, that + // means that input up to t actually finished entry rule + // at least for SLL decision. Full LL doesn't dip into outer + // so don't need special case. + // We will get an error no matter what so delay until after + // decision; better error message. Also, no reachable target + // ATN states in SLL implies LL will also get nowhere. + // If conflict in states that dip out, choose min since we + // will get error no matter what. + NoViableAltException e = + noViableAlt(input, outerContext, previousD.configs, startIndex); + input.seek(startIndex); + int alt = getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule( + previousD.configs, outerContext); + if (alt != ATN.INVALID_ALT_NUMBER) { + return alt; + } + throw e; + } + + if (D.requiresFullContext && predictionMode != PredictionMode.SLL) { + // IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error) + BitSet conflictingAlts = D.configs.conflictingAlts; + if (D.predicates != null) { + if (debug) log("DFA state has preds in DFA sim LL failover"); + int conflictIndex = input.index; + if (conflictIndex != startIndex) { + input.seek(startIndex); + } + + conflictingAlts = + evalSemanticContext(D.predicates, outerContext, true); + if (conflictingAlts.cardinality == 1) { + if (debug) log("Full LL avoided"); + return conflictingAlts.nextset(0); + } + + if (conflictIndex != startIndex) { + // restore the index so reporting the fallback to full + // context occurs with the index at the correct spot + input.seek(conflictIndex); + } + } + + if (dfa_debug) log("ctx sensitive state ${outerContext} in $D"); + bool fullCtx = true; + ATNConfigSet s0_closure = + computeStartState(dfa.atnStartState, outerContext, fullCtx); + reportAttemptingFullContext( + dfa, conflictingAlts, D.configs, startIndex, input.index); + int alt = execATNWithFullContext( + dfa, D, s0_closure, input, startIndex, outerContext); + return alt; + } + + if (D.isAcceptState) { + if (D.predicates == null) { + return D.prediction; + } + + int stopIndex = input.index; + input.seek(startIndex); + BitSet alts = evalSemanticContext(D.predicates, outerContext, true); + switch (alts.cardinality) { + case 0: + throw noViableAlt(input, outerContext, D.configs, startIndex); + + case 1: + return alts.nextset(0); + + default: + // report ambiguity after predicate evaluation to make sure the correct + // set of ambig alts is reported. + reportAmbiguity( + dfa, D, startIndex, stopIndex, false, alts, D.configs); + return alts.nextset(0); + } + } + + previousD = D; + + if (t != IntStream.EOF) { + input.consume(); + t = input.LA(1); + } + } + } + + /** + * Get an existing target state for an edge in the DFA. If the target state + * for the edge has not yet been computed or is otherwise not available, + * this method returns null. + * + * @param previousD The current DFA state + * @param t The next input symbol + * @return The existing target DFA state for the given input symbol + * [t], or null if the target state for this edge is not + * already cached + */ + DFAState getExistingTargetState(DFAState previousD, int t) { + List edges = previousD.edges; + if (edges == null || t + 1 < 0 || t + 1 >= edges.length) { + return null; + } + + return edges[t + 1]; + } + + /** + * Compute a target state for an edge in the DFA, and attempt to add the + * computed state and corresponding edge to the DFA. + * + * @param dfa The DFA + * @param previousD The current DFA state + * @param t The next input symbol + * + * @return The computed target DFA state for the given input symbol + * [t]. If [t] does not lead to a valid DFA state, this method + * returns {@link #ERROR}. + */ + DFAState computeTargetState(DFA dfa, DFAState previousD, int t) { + ATNConfigSet reach = computeReachSet(previousD.configs, t, false); + if (reach == null) { + addDFAEdge(dfa, previousD, t, ATNSimulator.ERROR); + return ATNSimulator.ERROR; + } + + // create new target state; we'll add to DFA after it's complete + DFAState D = new DFAState(configs: reach); + + int predictedAlt = getUniqueAlt(reach); + + if (debug) { + List altSubSets = + PredictionModeExtension.getConflictingAltSubsets(reach); + log("SLL altSubSets=$altSubSets" + + ", configs=$reach" + + ", predict=$predictedAlt, allSubsetsConflict=${PredictionModeExtension.allSubsetsConflict(altSubSets)}" + + ", conflictingAlts=${getConflictingAlts(reach)}"); + } + + if (predictedAlt != ATN.INVALID_ALT_NUMBER) { + // NO CONFLICT, UNIQUELY PREDICTED ALT + D.isAcceptState = true; + D.configs.uniqueAlt = predictedAlt; + D.prediction = predictedAlt; + } else if (PredictionModeExtension.hasSLLConflictTerminatingPrediction( + predictionMode, reach)) { + // MORE THAN ONE VIABLE ALTERNATIVE + D.configs.conflictingAlts = getConflictingAlts(reach); + D.requiresFullContext = true; + // in SLL-only mode, we will stop at this state and return the minimum alt + D.isAcceptState = true; + D.prediction = D.configs.conflictingAlts.nextset(0); + } + + if (D.isAcceptState && D.configs.hasSemanticContext) { + predicateDFAState(D, atn.getDecisionState(dfa.decision)); + if (D.predicates != null) { + D.prediction = ATN.INVALID_ALT_NUMBER; + } + } + + // all adds to dfa are done after we've created full D state + D = addDFAEdge(dfa, previousD, t, D); + return D; + } + + void predicateDFAState(DFAState dfaState, DecisionState decisionState) { + // We need to test all predicates, even in DFA states that + // uniquely predict alternative. + int nalts = decisionState.numberOfTransitions; + // Update DFA so reach becomes accept state with (predicate,alt) + // pairs if preds found for conflicting alts + BitSet altsToCollectPredsFrom = + getConflictingAltsOrUniqueAlt(dfaState.configs); + List altToPred = + getPredsForAmbigAlts(altsToCollectPredsFrom, dfaState.configs, nalts); + if (altToPred != null) { + dfaState.predicates = + getPredicatePredictions(altsToCollectPredsFrom, altToPred); + dfaState.prediction = ATN.INVALID_ALT_NUMBER; // make sure we use preds + } else { + // There are preds in configs but they might go away + // when OR'd together like {p}? || NONE == NONE. If neither + // alt has preds, resolve to min alt + dfaState.prediction = altsToCollectPredsFrom.nextset(0); + } + } + + // comes back with reach.uniqueAlt set to a valid alt + int execATNWithFullContext( + DFA dfa, + DFAState D, // how far we got in SLL DFA before failing over + ATNConfigSet s0, + TokenStream input, + int startIndex, + ParserRuleContext outerContext) { + if (debug || debug_list_atn_decisions) { + log("execATNWithFullContext $s0"); + } + bool fullCtx = true; + bool foundExactAmbig = false; + ATNConfigSet reach = null; + ATNConfigSet previous = s0; + input.seek(startIndex); + int t = input.LA(1); + int predictedAlt; + while (true) { + // while more work +// log("LL REACH "+getLookaheadName(input)+ +// " from configs.size="+previous.length+ +// " line "+input.LT(1).getLine()+":"+input.LT(1).getCharPositionInLine()); + reach = computeReachSet(previous, t, fullCtx); + if (reach == null) { + // if any configs in previous dipped into outer context, that + // means that input up to t actually finished entry rule + // at least for LL decision. Full LL doesn't dip into outer + // so don't need special case. + // We will get an error no matter what so delay until after + // decision; better error message. Also, no reachable target + // ATN states in SLL implies LL will also get nowhere. + // If conflict in states that dip out, choose min since we + // will get error no matter what. + NoViableAltException e = + noViableAlt(input, outerContext, previous, startIndex); + input.seek(startIndex); + int alt = getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule( + previous, outerContext); + if (alt != ATN.INVALID_ALT_NUMBER) { + return alt; + } + throw e; + } + + List altSubSets = + PredictionModeExtension.getConflictingAltSubsets(reach); + if (debug) { + log("LL altSubSets=$altSubSets" + + ", predict=${PredictionModeExtension.getUniqueAlt(altSubSets)}" + + ", resolvesToJustOneViableAlt=${PredictionModeExtension.resolvesToJustOneViableAlt(altSubSets)}"); + } + +// log("altSubSets: "+altSubSets); +// log("reach="+reach+", "+reach.conflictingAlts, level: Level.SEVERE.value); + reach.uniqueAlt = getUniqueAlt(reach); + // unique prediction? + if (reach.uniqueAlt != ATN.INVALID_ALT_NUMBER) { + predictedAlt = reach.uniqueAlt; + break; + } + if (predictionMode != PredictionMode.LL_EXACT_AMBIG_DETECTION) { + predictedAlt = + PredictionModeExtension.resolvesToJustOneViableAlt(altSubSets); + if (predictedAlt != ATN.INVALID_ALT_NUMBER) { + break; + } + } else { + // In exact ambiguity mode, we never try to terminate early. + // Just keeps scarfing until we know what the conflict is + if (PredictionModeExtension.allSubsetsConflict(altSubSets) && + PredictionModeExtension.allSubsetsEqual(altSubSets)) { + foundExactAmbig = true; + predictedAlt = PredictionModeExtension.getSingleViableAlt(altSubSets); + break; + } + // else there are multiple non-conflicting subsets or + // we're not sure what the ambiguity is yet. + // So, keep going. + } + + previous = reach; + if (t != IntStream.EOF) { + input.consume(); + t = input.LA(1); + } + } + + // If the configuration set uniquely predicts an alternative, + // without conflict, then we know that it's a full LL decision + // not SLL. + if (reach.uniqueAlt != ATN.INVALID_ALT_NUMBER) { + reportContextSensitivity( + dfa, predictedAlt, reach, startIndex, input.index); + return predictedAlt; + } + + // We do not check predicates here because we have checked them + // on-the-fly when doing full context prediction. + + /* + In non-exact ambiguity detection mode, we might actually be able to + detect an exact ambiguity, but I'm not going to spend the cycles + needed to check. We only emit ambiguity warnings in exact ambiguity + mode. + + For example, we might know that we have conflicting configurations. + But, that does not mean that there is no way forward without a + conflict. It's possible to have nonconflicting alt subsets as in: + + LL altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}] + + from + + [(17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]), + (13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$])] + + In this case, (17,1,[5 $]) indicates there is some next sequence that + would resolve this without conflict to alternative 1. Any other viable + next sequence, however, is associated with a conflict. We stop + looking for input because no amount of further lookahead will alter + the fact that we should predict alternative 1. We just can't say for + sure that there is an ambiguity without looking further. + */ + reportAmbiguity(dfa, D, startIndex, input.index, foundExactAmbig, + reach.alts, reach); + + return predictedAlt; + } + + ATNConfigSet computeReachSet(ATNConfigSet config, int t, bool fullCtx) { + if (debug) log("in computeReachSet, starting closure: $config"); + + if (mergeCache == null) { + mergeCache = {}; + } + + ATNConfigSet intermediate = new ATNConfigSet(fullCtx); + + /* Configurations already in a rule stop state indicate reaching the end + * of the decision rule (local context) or end of the start rule (full + * context). Once reached, these configurations are never updated by a + * closure operation, so they are handled separately for the performance + * advantage of having a smaller intermediate set when calling closure. + * + * For full-context reach operations, separate handling is required to + * ensure that the alternative matching the longest overall sequence is + * chosen when multiple such configurations can match the input. + */ + List skippedStopStates = null; + + // First figure out where we can reach on input t + for (ATNConfig c in config) { + if (debug) log("testing " + getTokenName(t) + " at " + c.toString()); + + if (c.state is RuleStopState) { + assert(c.context.isEmpty); + if (fullCtx || t == IntStream.EOF) { + if (skippedStopStates == null) { + skippedStopStates = []; + } + + skippedStopStates.add(c); + } + + continue; + } + + int n = c.state.numberOfTransitions; + for (int ti = 0; ti < n; ti++) { + // for each transition + Transition trans = c.state.transition(ti); + ATNState target = getReachableTarget(trans, t); + if (target != null) { + intermediate.add(new ATNConfig.dup(c, state: target), mergeCache); + } + } + } + + // Now figure out where the reach operation can take us... + + ATNConfigSet reach = null; + + /* This block optimizes the reach operation for intermediate sets which + * trivially indicate a termination state for the overall + * adaptivePredict operation. + * + * The conditions assume that intermediate + * contains all configurations relevant to the reach set, but this + * condition is not true when one or more configurations have been + * withheld in skippedStopStates, or when the current symbol is EOF. + */ + if (skippedStopStates == null && t != Token.EOF) { + if (intermediate.length == 1) { + // Don't pursue the closure if there is just one state. + // It can only have one alternative; just add to result + // Also don't pursue the closure if there is unique alternative + // among the configurations. + reach = intermediate; + } else if (getUniqueAlt(intermediate) != ATN.INVALID_ALT_NUMBER) { + // Also don't pursue the closure if there is unique alternative + // among the configurations. + reach = intermediate; + } + } + + /* If the reach set could not be trivially determined, perform a closure + * operation on the intermediate set to compute its initial value. + */ + if (reach == null) { + reach = new ATNConfigSet(fullCtx); + Set closureBusy = Set(); + bool treatEofAsEpsilon = t == Token.EOF; + for (ATNConfig c in intermediate) { + closure(c, reach, closureBusy, false, fullCtx, treatEofAsEpsilon); + } + } + + if (t == IntStream.EOF) { + /* After consuming EOF no additional input is possible, so we are + * only interested in configurations which reached the end of the + * decision rule (local context) or end of the start rule (full + * context). Update reach to contain only these configurations. This + * handles both explicit EOF transitions in the grammar and implicit + * EOF transitions following the end of the decision or start rule. + * + * When reach==intermediate, no closure operation was performed. In + * this case, removeAllConfigsNotInRuleStopState needs to check for + * reachable rule stop states as well as configurations already in + * a rule stop state. + * + * This is handled before the configurations in skippedStopStates, + * because any configurations potentially added from that list are + * already guaranteed to meet this condition whether or not it's + * required. + */ + reach = removeAllConfigsNotInRuleStopState(reach, reach == intermediate); + } + + /* If skippedStopStates is not null, then it contains at least one + * configuration. For full-context reach operations, these + * configurations reached the end of the start rule, in which case we + * only add them back to reach if no configuration during the current + * closure operation reached such a state. This ensures adaptivePredict + * chooses an alternative matching the longest overall sequence when + * multiple alternatives are viable. + */ + if (skippedStopStates != null && + (!fullCtx || + !PredictionModeExtension.hasConfigInRuleStopState(reach))) { + assert(!skippedStopStates.isEmpty); + for (ATNConfig c in skippedStopStates) { + reach.add(c, mergeCache); + } + } + + if (reach.isEmpty) return null; + return reach; + } + + /** + * Return a configuration set containing only the configurations from + * [configs] which are in a [RuleStopState]. If all + * configurations in [configs] are already in a rule stop state, this + * method simply returns [configs]. + * + *

When [lookToEndOfRule] is true, this method uses + * {@link ATN#nextTokens} for each configuration in [configs] which is + * not already in a rule stop state to see if a rule stop state is reachable + * from the configuration via epsilon-only transitions.

+ * + * @param configs the configuration set to update + * @param lookToEndOfRule when true, this method checks for rule stop states + * reachable by epsilon-only transitions from each configuration in + * [configs]. + * + * @return [configs] if all configurations in [configs] are in a + * rule stop state, otherwise return a new configuration set containing only + * the configurations from [configs] which are in a rule stop state + */ + ATNConfigSet removeAllConfigsNotInRuleStopState( + ATNConfigSet configs, bool lookToEndOfRule) { + if (PredictionModeExtension.allConfigsInRuleStopStates(configs)) { + return configs; + } + + ATNConfigSet result = new ATNConfigSet(configs.fullCtx); + for (ATNConfig config in configs) { + if (config.state is RuleStopState) { + result.add(config, mergeCache); + continue; + } + + if (lookToEndOfRule && config.state.onlyHasEpsilonTransitions()) { + IntervalSet nextTokens = atn.nextTokens(config.state); + if (nextTokens.contains(Token.EPSILON)) { + ATNState endOfRuleState = atn.ruleToStopState[config.state.ruleIndex]; + result.add( + new ATNConfig.dup(config, state: endOfRuleState), mergeCache); + } + } + } + + return result; + } + + ATNConfigSet computeStartState(ATNState p, RuleContext ctx, bool fullCtx) { + // always at least the implicit call to start rule + PredictionContext initialContext = + PredictionContext.fromRuleContext(atn, ctx); + ATNConfigSet configs = new ATNConfigSet(fullCtx); + + for (int i = 0; i < p.numberOfTransitions; i++) { + ATNState target = p.transition(i).target; + ATNConfig c = new ATNConfig(target, i + 1, initialContext); + Set closureBusy = Set(); + closure(c, configs, closureBusy, true, fullCtx, false); + } + + return configs; + } + + /* parrt internal source braindump that doesn't mess up + * external API spec. + context-sensitive in that they can only be properly evaluated + in the context of the proper prec argument. Without pruning, + these predicates are normal predicates evaluated when we reach + conflict state (or unique prediction). As we cannot evaluate + these predicates out of context, the resulting conflict leads + to full LL evaluation and nonlinear prediction which shows up + very clearly with fairly large expressions. + + Example grammar: + + e : e '*' e + | e '+' e + | INT + ; + + We convert that to the following: + + e[int prec] + : INT + ( {3>=prec}? '*' e[4] + | {2>=prec}? '+' e[3] + )* + ; + + The (..)* loop has a decision for the inner block as well as + an enter or exit decision, which is what concerns us here. At + the 1st + of input 1+2+3, the loop entry sees both predicates + and the loop exit also sees both predicates by falling off the + edge of e. This is because we have no stack information with + SLL and find the follow of e, which will hit the return states + inside the loop after e[4] and e[3], which brings it back to + the enter or exit decision. In this case, we know that we + cannot evaluate those predicates because we have fallen off + the edge of the stack and will in general not know which prec + parameter is the right one to use in the predicate. + + Because we have special information, that these are precedence + predicates, we can resolve them without failing over to full + LL despite their context sensitive nature. We make an + assumption that prec[-1] <= prec[0], meaning that the current + precedence level is greater than or equal to the precedence + level of recursive invocations above us in the stack. For + example, if predicate {3>=prec}? is true of the current prec, + then one option is to enter the loop to match it now. The + other option is to exit the loop and the left recursive rule + to match the current operator in rule invocation further up + the stack. But, we know that all of those prec are lower or + the same value and so we can decide to enter the loop instead + of matching it later. That means we can strip out the other + configuration for the exit branch. + + So imagine we have (14,1,$,{2>=prec}?) and then + (14,2,$-dipsIntoOuterContext,{2>=prec}?). The optimization + allows us to collapse these two configurations. We know that + if {2>=prec}? is true for the current prec parameter, it will + also be true for any prec from an invoking e call, indicated + by dipsIntoOuterContext. As the predicates are both true, we + have the option to evaluate them early in the decision start + state. We do this by stripping both predicates and choosing to + enter the loop as it is consistent with the notion of operator + precedence. It's also how the full LL conflict resolution + would work. + + The solution requires a different DFA start state for each + precedence level. + + The basic filter mechanism is to remove configurations of the + form (p, 2, pi) if (p, 1, pi) exists for the same p and pi. In + other words, for the same ATN state and predicate context, + remove any configuration associated with an exit branch if + there is a configuration associated with the enter branch. + + It's also the case that the filter evaluates precedence + predicates and resolves conflicts according to precedence + levels. For example, for input 1+2+3 at the first +, we see + prediction filtering + + [(11,1,[$],{3>=prec}?), (14,1,[$],{2>=prec}?), (5,2,[$],up=1), + (11,2,[$],up=1), (14,2,[$],up=1)],hasSemanticContext=true,dipsIntoOuterContext + + to + + [(11,1,[$]), (14,1,[$]), (5,2,[$],up=1)],dipsIntoOuterContext + + This filters because {3>=prec}? evals to true and collapses + (11,1,[$],{3>=prec}?) and (11,2,[$],up=1) since early conflict + resolution based upon rules of operator precedence fits with + our usual match first alt upon conflict. + + We noticed a problem where a recursive call resets precedence + to 0. Sam's fix: each config has flag indicating if it has + returned from an expr[0] call. then just don't filter any + config with that flag set. flag is carried along in + closure(). so to avoid adding field, set bit just under sign + bit of dipsIntoOuterContext (SUPPRESS_PRECEDENCE_FILTER). + With the change you filter "unless (p, 2, pi) was reached + after leaving the rule stop state of the LR rule containing + state p, corresponding to a rule invocation with precedence + level 0" + */ + + /** + * This method transforms the start state computed by + * {@link #computeStartState} to the special start state used by a + * precedence DFA for a particular precedence value. The transformation + * process applies the following changes to the start state's configuration + * set. + * + *
    + *
  1. Evaluate the precedence predicates for each configuration using + * {@link SemanticContext#evalPrecedence}.
  2. + *
  3. When {@link ATNConfig#isPrecedenceFilterSuppressed} is [false], + * remove all configurations which predict an alternative greater than 1, + * for which another configuration that predicts alternative 1 is in the + * same ATN state with the same prediction context. This transformation is + * valid for the following reasons: + *
      + *
    • The closure block cannot contain any epsilon transitions which bypass + * the body of the closure, so all states reachable via alternative 1 are + * part of the precedence alternatives of the transformed left-recursive + * rule.
    • + *
    • The "primary" portion of a left recursive rule cannot contain an + * epsilon transition, so the only way an alternative other than 1 can exist + * in a state that is also reachable via alternative 1 is by nesting calls + * to the left-recursive rule, with the outer calls not being at the + * preferred precedence level. The + * {@link ATNConfig#isPrecedenceFilterSuppressed} property marks ATN + * configurations which do not meet this condition, and therefore are not + * eligible for elimination during the filtering process.
    • + *
    + *
  4. + *
+ * + *

+ * The prediction context must be considered by this filter to address + * situations like the following. + *

+ * + *
+   * grammar TA;
+   * prog: statement* EOF;
+   * statement: letterA | statement letterA 'b' ;
+   * letterA: 'a';
+   * 
+ *
+ *

+ * If the above grammar, the ATN state immediately before the token + * reference {@code 'a'} in [letterA] is reachable from the left edge + * of both the primary and closure blocks of the left-recursive rule + * [statement]. The prediction context associated with each of these + * configurations distinguishes between them, and prevents the alternative + * which stepped out to [prog] (and then back in to [statement] + * from being eliminated by the filter. + *

+ * + * @param configs The configuration set computed by + * {@link #computeStartState} as the start state for the DFA. + * @return The transformed configuration set representing the start state + * for a precedence DFA at a particular precedence level (determined by + * calling {@link Parser#getPrecedence}). + */ + ATNConfigSet applyPrecedenceFilter(ATNConfigSet configs) { + Map statesFromAlt1 = {}; + ATNConfigSet configSet = new ATNConfigSet(configs.fullCtx); + for (ATNConfig config in configs) { + // handle alt 1 first + if (config.alt != 1) { + continue; + } + + SemanticContext updatedContext = + config.semanticContext.evalPrecedence(parser, _outerContext); + if (updatedContext == null) { + // the configuration was eliminated + continue; + } + + statesFromAlt1[config.state.stateNumber] = config.context; + if (updatedContext != config.semanticContext) { + configSet.add( + new ATNConfig.dup(config, semanticContext: updatedContext), + mergeCache); + } else { + configSet.add(config, mergeCache); + } + } + + for (ATNConfig config in configs) { + if (config.alt == 1) { + // already handled + continue; + } + + if (!config.isPrecedenceFilterSuppressed()) { + /* In the future, this elimination step could be updated to also + * filter the prediction context for alternatives predicting alt>1 + * (basically a graph subtraction algorithm). + */ + PredictionContext context = statesFromAlt1[config.state.stateNumber]; + if (context != null && context == config.context) { + // eliminated + continue; + } + } + + configSet.add(config, mergeCache); + } + + return configSet; + } + + ATNState getReachableTarget(Transition trans, int ttype) { + if (trans.matches(ttype, 0, atn.maxTokenType)) { + return trans.target; + } + + return null; + } + + List getPredsForAmbigAlts( + BitSet ambigAlts, ATNConfigSet configs, int nalts) { + // REACH=[1|1|[]|0:0, 1|2|[]|0:1] + /* altToPred starts as an array of all null contexts. The entry at index i + * corresponds to alternative i. altToPred[i] may have one of three values: + * 1. null: no ATNConfig c is found such that c.alt==i + * 2. SemanticContext.NONE: At least one ATNConfig c exists such that + * c.alt==i and c.semanticContext==SemanticContext.NONE. In other words, + * alt i has at least one unpredicated config. + * 3. Non-NONE Semantic Context: There exists at least one, and for all + * ATNConfig c such that c.alt==i, c.semanticContext!=SemanticContext.NONE. + * + * From this, it is clear that NONE||anything==NONE. + */ + List altToPred = List(nalts + 1); + for (ATNConfig c in configs) { + if (ambigAlts[c.alt]) { + altToPred[c.alt] = + SemanticContext.or(altToPred[c.alt], c.semanticContext); + } + } + + int nPredAlts = 0; + for (int i = 1; i <= nalts; i++) { + if (altToPred[i] == null) { + altToPred[i] = SemanticContext.NONE; + } else if (altToPred[i] != SemanticContext.NONE) { + nPredAlts++; + } + } + +// // Optimize away p||p and p&&p TODO: optimize() was a no-op +// for (int i = 0; i < altToPred.length; i++) { +// altToPred[i] = altToPred[i].optimize(); +// } + + // nonambig alts are null in altToPred + if (nPredAlts == 0) altToPred = null; + if (debug) log("getPredsForAmbigAlts result $altToPred"); + return altToPred; + } + + List getPredicatePredictions( + BitSet ambigAlts, List altToPred) { + List pairs = []; + bool containsPredicate = false; + for (int i = 1; i < altToPred.length; i++) { + SemanticContext pred = altToPred[i]; + + // unpredicated is indicated by SemanticContext.NONE + assert(pred != null); + + if (ambigAlts != null && ambigAlts[i]) { + pairs.add(new PredPrediction(pred, i)); + } + if (pred != SemanticContext.NONE) containsPredicate = true; + } + + if (!containsPredicate) { + return null; + } + +// log(Arrays.toString(altToPred)+"->"+pairs); + return pairs; + } + + /** + * This method is used to improve the localization of error messages by + * choosing an alternative rather than throwing a + * [NoViableAltException] in particular prediction scenarios where the + * {@link #ERROR} state was reached during ATN simulation. + * + *

+ * The default implementation of this method uses the following + * algorithm to identify an ATN configuration which successfully parsed the + * decision entry rule. Choosing such an alternative ensures that the + * [ParserRuleContext] returned by the calling rule will be complete + * and valid, and the syntax error will be reported later at a more + * localized location.

+ * + *
    + *
  • If a syntactically valid path or paths reach the end of the decision rule and + * they are semantically valid if predicated, return the min associated alt.
  • + *
  • Else, if a semantically invalid but syntactically valid path exist + * or paths exist, return the minimum associated alt. + *
  • + *
  • Otherwise, return {@link ATN#INVALID_ALT_NUMBER}.
  • + *
+ * + *

+ * In some scenarios, the algorithm described above could predict an + * alternative which will result in a [FailedPredicateException] in + * the parser. Specifically, this could occur if the only configuration + * capable of successfully parsing to the end of the decision rule is + * blocked by a semantic predicate. By choosing this alternative within + * {@link #adaptivePredict} instead of throwing a + * [NoViableAltException], the resulting + * [FailedPredicateException] in the parser will identify the specific + * predicate which is preventing the parser from successfully parsing the + * decision rule, which helps developers identify and correct logic errors + * in semantic predicates. + *

+ * + * @param configs The ATN configurations which were valid immediately before + * the {@link #ERROR} state was reached + * @param outerContext The is the \gamma_0 initial parser context from the paper + * or the parser stack at the instant before prediction commences. + * + * @return The value to return from {@link #adaptivePredict}, or + * {@link ATN#INVALID_ALT_NUMBER} if a suitable alternative was not + * identified and {@link #adaptivePredict} should report an error instead. + */ + int getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule( + ATNConfigSet configs, ParserRuleContext outerContext) { + Pair sets = + splitAccordingToSemanticValidity(configs, outerContext); + ATNConfigSet semValidConfigs = sets.a; + ATNConfigSet semInvalidConfigs = sets.b; + int alt = getAltThatFinishedDecisionEntryRule(semValidConfigs); + if (alt != ATN.INVALID_ALT_NUMBER) { + // semantically/syntactically viable path exists + return alt; + } + // Is there a syntactically valid path with a failed pred? + if (semInvalidConfigs.length > 0) { + alt = getAltThatFinishedDecisionEntryRule(semInvalidConfigs); + if (alt != ATN.INVALID_ALT_NUMBER) { + // syntactically viable path exists + return alt; + } + } + return ATN.INVALID_ALT_NUMBER; + } + + int getAltThatFinishedDecisionEntryRule(ATNConfigSet configs) { + IntervalSet alts = new IntervalSet(); + for (ATNConfig c in configs) { + if (c.outerContextDepth > 0 || + (c.state is RuleStopState && c.context.hasEmptyPath())) { + alts.addOne(c.alt); + } + } + if (alts.length == 0) return ATN.INVALID_ALT_NUMBER; + return alts.minElement; + } + + /** Walk the list of configurations and split them according to + * those that have preds evaluating to true/false. If no pred, assume + * true pred and include in succeeded set. Returns Pair of sets. + * + * Create a new set so as not to alter the incoming parameter. + * + * Assumption: the input stream has been restored to the starting point + * prediction, which is where predicates need to evaluate. + */ + Pair splitAccordingToSemanticValidity( + ATNConfigSet configs, ParserRuleContext outerContext) { + ATNConfigSet succeeded = new ATNConfigSet(configs.fullCtx); + ATNConfigSet failed = new ATNConfigSet(configs.fullCtx); + for (ATNConfig c in configs) { + if (c.semanticContext != SemanticContext.NONE) { + bool predicateEvaluationResult = evalSemanticContextOne( + c.semanticContext, outerContext, c.alt, configs.fullCtx); + if (predicateEvaluationResult) { + succeeded.add(c); + } else { + failed.add(c); + } + } else { + succeeded.add(c); + } + } + return new Pair(succeeded, failed); + } + + /** Look through a list of predicate/alt pairs, returning alts for the + * pairs that win. A [NONE] predicate indicates an alt containing an + * unpredicated config which behaves as "always true." If !complete + * then we stop at the first predicate that evaluates to true. This + * includes pairs with null predicates. + */ + BitSet evalSemanticContext(List predPredictions, + ParserRuleContext outerContext, bool complete) { + BitSet predictions = new BitSet(); + for (PredPrediction pair in predPredictions) { + if (pair.pred == SemanticContext.NONE) { + predictions.set(pair.alt); + if (!complete) { + break; + } + continue; + } + + bool fullCtx = false; // in dfa + bool predicateEvaluationResult = + evalSemanticContextOne(pair.pred, outerContext, pair.alt, fullCtx); + if (debug || dfa_debug) { + log("eval pred $pair=$predicateEvaluationResult"); + } + + if (predicateEvaluationResult) { + if (debug || dfa_debug) log("PREDICT " + pair.alt); + predictions.set(pair.alt); + if (!complete) { + break; + } + } + } + + return predictions; + } + + /** + * Evaluate a semantic context within a specific parser context. + * + *

+ * This method might not be called for every semantic context evaluated + * during the prediction process. In particular, we currently do not + * evaluate the following but it may change in the future:

+ * + *
    + *
  • Precedence predicates (represented by + * {@link SemanticContext.PrecedencePredicate}) are not currently evaluated + * through this method.
  • + *
  • Operator predicates (represented by {@link SemanticContext.AND} and + * {@link SemanticContext.OR}) are evaluated as a single semantic + * context, rather than evaluating the operands individually. + * Implementations which require evaluation results from individual + * predicates should override this method to explicitly handle evaluation of + * the operands within operator predicates.
  • + *
+ * + * @param pred The semantic context to evaluate + * @param parserCallStack The parser context in which to evaluate the + * semantic context + * @param alt The alternative which is guarded by [pred] + * @param fullCtx [true] if the evaluation is occurring during LL + * prediction; otherwise, [false] if the evaluation is occurring + * during SLL prediction + * + * @since 4.3 + */ + bool evalSemanticContextOne(SemanticContext pred, + ParserRuleContext parserCallStack, int alt, bool fullCtx) { + return pred.eval(parser, parserCallStack); + } + + /* TODO: If we are doing predicates, there is no point in pursuing + closure operations if we reach a DFA state that uniquely predicts + alternative. We will not be caching that DFA state and it is a + waste to pursue the closure. Might have to advance when we do + ambig detection thought :( + */ + + void closure( + ATNConfig config, + ATNConfigSet configs, + Set closureBusy, + bool collectPredicates, + bool fullCtx, + bool treatEofAsEpsilon) { + final int initialDepth = 0; + closureCheckingStopState(config, configs, closureBusy, collectPredicates, + fullCtx, initialDepth, treatEofAsEpsilon); + assert(!fullCtx || !configs.dipsIntoOuterContext); + } + + void closureCheckingStopState( + ATNConfig config, + ATNConfigSet configs, + Set closureBusy, + bool collectPredicates, + bool fullCtx, + int depth, + bool treatEofAsEpsilon) { + if (debug) log("closure(" + config.toString(parser, true) + ")"); + + if (config.state is RuleStopState) { + // We hit rule end. If we have context info, use it + // run thru all possible stack tops in ctx + if (!config.context.isEmpty) { + for (int i = 0; i < config.context.length; i++) { + if (config.context.getReturnState(i) == + PredictionContext.EMPTY_RETURN_STATE) { + if (fullCtx) { + configs.add( + new ATNConfig.dup(config, + state: config.state, context: PredictionContext.EMPTY), + mergeCache); + continue; + } else { + // we have no context info, just chase follow links (if greedy) + if (debug) + log("FALLING off rule " + getRuleName(config.state.ruleIndex)); + closure_(config, configs, closureBusy, collectPredicates, fullCtx, + depth, treatEofAsEpsilon); + } + continue; + } + ATNState returnState = atn.states[config.context.getReturnState(i)]; + PredictionContext newContext = + config.context.getParent(i); // "pop" return state + ATNConfig c = new ATNConfig( + returnState, config.alt, newContext, config.semanticContext); + // While we have context to pop back from, we may have + // gotten that context AFTER having falling off a rule. + // Make sure we track that we are now out of context. + // + // This assignment also propagates the + // isPrecedenceFilterSuppressed() value to the new + // configuration. + c.reachesIntoOuterContext = config.reachesIntoOuterContext; +// assert(depth > int.MIN_VALUE); + closureCheckingStopState(c, configs, closureBusy, collectPredicates, + fullCtx, depth - 1, treatEofAsEpsilon); + } + return; + } else if (fullCtx) { + // reached end of start rule + configs.add(config, mergeCache); + return; + } else { + // else if we have no context info, just chase follow links (if greedy) + if (debug) + log("FALLING off rule " + getRuleName(config.state.ruleIndex)); + } + } + + closure_(config, configs, closureBusy, collectPredicates, fullCtx, depth, + treatEofAsEpsilon); + } + + /** Do the actual work of walking epsilon edges */ + void closure_( + ATNConfig config, + ATNConfigSet configs, + Set closureBusy, + bool collectPredicates, + bool fullCtx, + int depth, + bool treatEofAsEpsilon) { + ATNState p = config.state; + // optimization + if (!p.onlyHasEpsilonTransitions()) { + configs.add(config, mergeCache); + // make sure to not return here, because EOF transitions can act as + // both epsilon transitions and non-epsilon transitions. +// if ( debug ) log("added config "+configs); + } + + for (int i = 0; i < p.numberOfTransitions; i++) { + if (i == 0 && canDropLoopEntryEdgeInLeftRecursiveRule(config)) continue; + + Transition t = p.transition(i); + bool continueCollecting = !(t is ActionTransition) && collectPredicates; + ATNConfig c = getEpsilonTarget(config, t, continueCollecting, depth == 0, + fullCtx, treatEofAsEpsilon); + if (c != null) { + int newDepth = depth; + if (config.state is RuleStopState) { + assert(!fullCtx); + // target fell off end of rule; mark resulting c as having dipped into outer context + // We can't get here if incoming config was rule stop and we had context + // track how far we dip into outer context. Might + // come in handy and we avoid evaluating context dependent + // preds if this is > 0. + + if (_dfa != null && _dfa.isPrecedenceDfa()) { + int outermostPrecedenceReturn = + (t as EpsilonTransition).outermostPrecedenceReturn; + if (outermostPrecedenceReturn == _dfa.atnStartState.ruleIndex) { + c.setPrecedenceFilterSuppressed(true); + } + } + + c.reachesIntoOuterContext++; + + if (!closureBusy.add(c)) { + // avoid infinite recursion for right-recursive rules + continue; + } + + // TODO: can remove? only care when we add to set per middle of this method + configs.dipsIntoOuterContext = true; +// assert(newDepth > int.MIN_VALUE); + newDepth--; + if (debug) log("dips into outer ctx: $c"); + } else { + if (!t.isEpsilon && !closureBusy.add(c)) { + // avoid infinite recursion for EOF* and EOF+ + continue; + } + + if (t is RuleTransition) { + // latch when newDepth goes negative - once we step out of the entry context we can't return + if (newDepth >= 0) { + newDepth++; + } + } + } + + closureCheckingStopState(c, configs, closureBusy, continueCollecting, + fullCtx, newDepth, treatEofAsEpsilon); + } + } + } + + /** Implements first-edge (loop entry) elimination as an optimization + * during closure operations. See antlr/antlr4#1398. + * + * The optimization is to avoid adding the loop entry config when + * the exit path can only lead back to the same + * StarLoopEntryState after popping context at the rule end state + * (traversing only epsilon edges, so we're still in closure, in + * this same rule). + * + * We need to detect any state that can reach loop entry on + * epsilon w/o exiting rule. We don't have to look at FOLLOW + * links, just ensure that all stack tops for config refer to key + * states in LR rule. + * + * To verify we are in the right situation we must first check + * closure is at a StarLoopEntryState generated during LR removal. + * Then we check that each stack top of context is a return state + * from one of these cases: + * + * 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state + * 2. expr op expr. The return state is the block end of internal block of (...)* + * 3. 'between' expr 'and' expr. The return state of 2nd expr reference. + * That state points at block end of internal block of (...)*. + * 4. expr '?' expr ':' expr. The return state points at block end, + * which points at loop entry state. + * + * If any is true for each stack top, then closure does not add a + * config to the current config set for edge[0], the loop entry branch. + * + * Conditions fail if any context for the current config is: + * + * a. empty (we'd fall out of expr to do a global FOLLOW which could + * even be to some weird spot in expr) or, + * b. lies outside of expr or, + * c. lies within expr but at a state not the BlockEndState + * generated during LR removal + * + * Do we need to evaluate predicates ever in closure for this case? + * + * No. Predicates, including precedence predicates, are only + * evaluated when computing a DFA start state. I.e., only before + * the lookahead (but not parser) consumes a token. + * + * There are no epsilon edges allowed in LR rule alt blocks or in + * the "primary" part (ID here). If closure is in + * StarLoopEntryState any lookahead operation will have consumed a + * token as there are no epsilon-paths that lead to + * StarLoopEntryState. We do not have to evaluate predicates + * therefore if we are in the generated StarLoopEntryState of a LR + * rule. Note that when making a prediction starting at that + * decision point, decision d=2, compute-start-state performs + * closure starting at edges[0], edges[1] emanating from + * StarLoopEntryState. That means it is not performing closure on + * StarLoopEntryState during compute-start-state. + * + * How do we know this always gives same prediction answer? + * + * Without predicates, loop entry and exit paths are ambiguous + * upon remaining input +b (in, say, a+b). Either paths lead to + * valid parses. Closure can lead to consuming + immediately or by + * falling out of this call to expr back into expr and loop back + * again to StarLoopEntryState to match +b. In this special case, + * we choose the more efficient path, which is to take the bypass + * path. + * + * The lookahead language has not changed because closure chooses + * one path over the other. Both paths lead to consuming the same + * remaining input during a lookahead operation. If the next token + * is an operator, lookahead will enter the choice block with + * operators. If it is not, lookahead will exit expr. Same as if + * closure had chosen to enter the choice block immediately. + * + * Closure is examining one config (some loopentrystate, some alt, + * context) which means it is considering exactly one alt. Closure + * always copies the same alt to any derived configs. + * + * How do we know this optimization doesn't mess up precedence in + * our parse trees? + * + * Looking through expr from left edge of stat only has to confirm + * that an input, say, a+b+c; begins with any valid interpretation + * of an expression. The precedence actually doesn't matter when + * making a decision in stat seeing through expr. It is only when + * parsing rule expr that we must use the precedence to get the + * right interpretation and, hence, parse tree. + * + * @since 4.6 + */ + bool canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig config) { + if (TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT) return false; + ATNState p = config.state; + // First check to see if we are in StarLoopEntryState generated during + // left-recursion elimination. For efficiency, also check if + // the context has an empty stack case. If so, it would mean + // global FOLLOW so we can't perform optimization + if (p.stateType != StateType.STAR_LOOP_ENTRY || + !(p as StarLoopEntryState) + .isPrecedenceDecision || // Are we the special loop entry/exit state? + config.context.isEmpty || // If SLL wildcard + config.context.hasEmptyPath()) { + return false; + } + + // Require all return states to return back to the same rule + // that p is in. + int numCtxs = config.context.length; + for (int i = 0; i < numCtxs; i++) { + // for each stack context + ATNState returnState = atn.states[config.context.getReturnState(i)]; + if (returnState.ruleIndex != p.ruleIndex) return false; + } + + BlockStartState decisionStartState = p.transition(0).target; + int blockEndStateNum = decisionStartState.endState.stateNumber; + BlockEndState blockEndState = atn.states[blockEndStateNum]; + + // Verify that the top of each stack context leads to loop entry/exit + // state through epsilon edges and w/o leaving rule. + for (int i = 0; i < numCtxs; i++) { + // for each stack context + int returnStateNumber = config.context.getReturnState(i); + ATNState returnState = atn.states[returnStateNumber]; + // all states must have single outgoing epsilon edge + if (returnState.numberOfTransitions != 1 || + !returnState.transition(0).isEpsilon) { + return false; + } + // Look for prefix op case like 'not expr', (' type ')' expr + ATNState returnStateTarget = returnState.transition(0).target; + if (returnState.stateType == StateType.BLOCK_END && + returnStateTarget == p) { + continue; + } + // Look for 'expr op expr' or case where expr's return state is block end + // of (...)* internal block; the block end points to loop back + // which points to p but we don't need to check that + if (returnState == blockEndState) { + continue; + } + // Look for ternary expr ? expr : expr. The return state points at block end, + // which points at loop entry state + if (returnStateTarget == blockEndState) { + continue; + } + // Look for complex prefix 'between expr and expr' case where 2nd expr's + // return state points at block end state of (...)* internal block + if (returnStateTarget.stateType == StateType.BLOCK_END && + returnStateTarget.numberOfTransitions == 1 && + returnStateTarget.transition(0).isEpsilon && + returnStateTarget.transition(0).target == p) { + continue; + } + + // anything else ain't conforming + return false; + } + + return true; + } + + String getRuleName(int index) { + if (parser != null && index >= 0) return parser.ruleNames[index]; + return ""; + } + + ATNConfig getEpsilonTarget( + ATNConfig config, + Transition t, + bool collectPredicates, + bool inContext, + bool fullCtx, + bool treatEofAsEpsilon) { + switch (t.type) { + case TransitionType.RULE: + return ruleTransition(config, t); + + case TransitionType.PRECEDENCE: + return precedenceTransition( + config, t, collectPredicates, inContext, fullCtx); + + case TransitionType.PREDICATE: + return predTransition(config, t, collectPredicates, inContext, fullCtx); + case TransitionType.ACTION: + return actionTransition(config, t); + + case TransitionType.EPSILON: + return new ATNConfig.dup(config, state: t.target); + + case TransitionType.ATOM: + case TransitionType.RANGE: + case TransitionType.SET: + // EOF transitions act like epsilon transitions after the first EOF + // transition is traversed + if (treatEofAsEpsilon) { + if (t.matches(Token.EOF, 0, 1)) { + return new ATNConfig.dup(config, state: t.target); + } + } + + return null; + + default: + return null; + } + } + + ATNConfig actionTransition(ATNConfig config, ActionTransition t) { + if (debug) log("ACTION edge ${t.ruleIndex}:${t.actionIndex}"); + return new ATNConfig.dup(config, state: t.target); + } + + ATNConfig precedenceTransition( + ATNConfig config, + PrecedencePredicateTransition pt, + bool collectPredicates, + bool inContext, + bool fullCtx) { + if (debug) { + log("PRED (collectPredicates=$collectPredicates) ${pt.precedence}>=_p, ctx dependent=true"); + if (parser != null) { + log("context surrounding pred is ${parser.getRuleInvocationStack()}"); + } + } + + ATNConfig c = null; + if (collectPredicates && inContext) { + if (fullCtx) { + // In full context mode, we can evaluate predicates on-the-fly + // during closure, which dramatically reduces the size of + // the config sets. It also obviates the need to test predicates + // later during conflict resolution. + int currentPosition = input.index; + input.seek(startIndex); + bool predSucceeds = evalSemanticContextOne( + pt.predicate, _outerContext, config.alt, fullCtx); + input.seek(currentPosition); + if (predSucceeds) { + c = new ATNConfig.dup(config, state: pt.target); // no pred context + } + } else { + SemanticContext newSemCtx = + SemanticContext.and(config.semanticContext, pt.predicate); + c = new ATNConfig.dup(config, + state: pt.target, semanticContext: newSemCtx); + } + } else { + c = new ATNConfig.dup(config, state: pt.target); + } + + if (debug) log("config from pred transition=$c"); + return c; + } + + ATNConfig predTransition(ATNConfig config, PredicateTransition pt, + bool collectPredicates, bool inContext, bool fullCtx) { + if (debug) { + log("PRED (collectPredicates=$collectPredicates) " + + "${pt.ruleIndex}:${pt.predIndex}" + + ", ctx dependent=${pt.isCtxDependent}"); + if (parser != null) { + log("context surrounding pred is ${parser.getRuleInvocationStack()}"); + } + } + + ATNConfig c = null; + if (collectPredicates && + (!pt.isCtxDependent || (pt.isCtxDependent && inContext))) { + if (fullCtx) { + // In full context mode, we can evaluate predicates on-the-fly + // during closure, which dramatically reduces the size of + // the config sets. It also obviates the need to test predicates + // later during conflict resolution. + int currentPosition = input.index; + input.seek(startIndex); + bool predSucceeds = evalSemanticContextOne( + pt.predicate, _outerContext, config.alt, fullCtx); + input.seek(currentPosition); + if (predSucceeds) { + c = new ATNConfig.dup(config, state: pt.target); // no pred context + } + } else { + SemanticContext newSemCtx = + SemanticContext.and(config.semanticContext, pt.predicate); + c = new ATNConfig.dup(config, + state: pt.target, semanticContext: newSemCtx); + } + } else { + c = new ATNConfig.dup(config, state: pt.target); + } + + if (debug) log("config from pred transition=$c"); + return c; + } + + ATNConfig ruleTransition(ATNConfig config, RuleTransition t) { + if (debug) { + log("CALL rule " + + getRuleName(t.target.ruleIndex) + + ", ctx=${config.context}"); + } + + ATNState returnState = t.followState; + PredictionContext newContext = SingletonPredictionContext.create( + config.context, returnState.stateNumber); + return new ATNConfig.dup(config, state: t.target, context: newContext); + } + + /** + * Gets a [BitSet] containing the alternatives in [configs] + * which are part of one or more conflicting alternative subsets. + * + * @param configs The [ATNConfigSet] to analyze. + * @return The alternatives in [configs] which are part of one or more + * conflicting alternative subsets. If [configs] does not contain any + * conflicting subsets, this method returns an empty [BitSet]. + */ + BitSet getConflictingAlts(ATNConfigSet configs) { + List altsets = + PredictionModeExtension.getConflictingAltSubsets(configs); + return PredictionModeExtension.getAlts(altsets); + } + + /** + Sam pointed out a problem with the previous definition, v3, of + ambiguous states. If we have another state associated with conflicting + alternatives, we should keep going. For example, the following grammar + + s : (ID | ID ID?) ';' ; + + When the ATN simulation reaches the state before ';', it has a DFA + state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally + 12|1|[] and 12|2|[] conflict, but we cannot stop processing this node + because alternative to has another way to continue, via [6|2|[]]. + The key is that we have a single state that has config's only associated + with a single alternative, 2, and crucially the state transitions + among the configurations are all non-epsilon transitions. That means + we don't consider any conflicts that include alternative 2. So, we + ignore the conflict between alts 1 and 2. We ignore a set of + conflicting alts when there is an intersection with an alternative + associated with a single alt state in the state→config-list map. + + It's also the case that we might have two conflicting configurations but + also a 3rd nonconflicting configuration for a different alternative: + [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar: + + a : A | A | A B ; + + After matching input A, we reach the stop state for rule A, state 1. + State 8 is the state right before B. Clearly alternatives 1 and 2 + conflict and no amount of further lookahead will separate the two. + However, alternative 3 will be able to continue and so we do not + stop working on this state. In the previous example, we're concerned + with states associated with the conflicting alternatives. Here alt + 3 is not associated with the conflicting configs, but since we can continue + looking for input reasonably, I don't declare the state done. We + ignore a set of conflicting alts when we have an alternative + that we still need to pursue. + */ + BitSet getConflictingAltsOrUniqueAlt(ATNConfigSet configs) { + BitSet conflictingAlts; + if (configs.uniqueAlt != ATN.INVALID_ALT_NUMBER) { + conflictingAlts = new BitSet(); + conflictingAlts.set(configs.uniqueAlt); + } else { + conflictingAlts = configs.conflictingAlts; + } + return conflictingAlts; + } + + String getTokenName(int t) { + if (t == Token.EOF) { + return "EOF"; + } + + Vocabulary vocabulary = parser != null + ? parser.vocabulary + : VocabularyImpl.EMPTY_VOCABULARY; + String displayName = vocabulary.getDisplayName(t); + if (displayName == t.toString()) { + return displayName; + } + + return displayName + "<$t>"; + } + + String getLookaheadName(TokenStream input) { + return getTokenName(input.LA(1)); + } + + /** Used for debugging in adaptivePredict around execATN but I cut + * it out for clarity now that alg. works well. We can leave this + * "dead" code for a bit. + */ + void dumpDeadEndConfigs(NoViableAltException nvae) { + log("dead end configs: ", level: Level.SEVERE.value); + for (ATNConfig c in nvae.deadEndConfigs) { + String trans = "no edges"; + if (c.state.numberOfTransitions > 0) { + Transition t = c.state.transition(0); + if (t is AtomTransition) { + AtomTransition at = t; + trans = "Atom " + getTokenName(at.atomLabel); + } else if (t is SetTransition) { + SetTransition st = t; + bool not = st is NotSetTransition; + trans = (not ? "~" : "") + "Set " + st.label.toString(); + } + } + log(c.toString(parser, true) + ":" + trans, level: Level.SEVERE.value); + } + } + + NoViableAltException noViableAlt(TokenStream input, + ParserRuleContext outerContext, ATNConfigSet configs, int startIndex) { + return new NoViableAltException(parser, input, input.get(startIndex), + input.LT(1), configs, outerContext); + } + + static int getUniqueAlt(ATNConfigSet configs) { + int alt = ATN.INVALID_ALT_NUMBER; + for (ATNConfig c in configs) { + if (alt == ATN.INVALID_ALT_NUMBER) { + alt = c.alt; // found first alt + } else if (c.alt != alt) { + return ATN.INVALID_ALT_NUMBER; + } + } + return alt; + } + + /** + * Add an edge to the DFA, if possible. This method calls + * {@link #addDFAState} to ensure the [to] state is present in the + * DFA. If [from] is null, or if [t] is outside the + * range of edges that can be represented in the DFA tables, this method + * returns without adding the edge to the DFA. + * + *

If [to] is null, this method returns null. + * Otherwise, this method returns the [DFAState] returned by calling + * {@link #addDFAState} for the [to] state.

+ * + * @param dfa The DFA + * @param from The source state for the edge + * @param t The input symbol + * @param to The target state for the edge + * + * @return If [to] is null, this method returns null; + * otherwise this method returns the result of calling {@link #addDFAState} + * on [to] + */ + DFAState addDFAEdge(DFA dfa, DFAState from, int t, DFAState to) { + if (debug) { + log("EDGE $from -> $to upon " + getTokenName(t)); + } + + if (to == null) { + return null; + } + + to = addDFAState(dfa, to); // used existing if possible not incoming + if (from == null || t < -1 || t > atn.maxTokenType) { + return to; + } + + if (from.edges == null) { + from.edges = List(atn.maxTokenType + 1 + 1); + } + + from.edges[t + 1] = to; // connect + + if (debug) { + log("DFA=\n" + + dfa.toString(parser != null + ? parser.vocabulary + : VocabularyImpl.EMPTY_VOCABULARY)); + } + + return to; + } + + /** + * Add state [D] to the DFA if it is not already present, and return + * the actual instance stored in the DFA. If a state equivalent to [D] + * is already in the DFA, the existing state is returned. Otherwise this + * method returns [D] after adding it to the DFA. + * + *

If [D] is {@link #ERROR}, this method returns {@link #ERROR} and + * does not change the DFA.

+ * + * @param dfa The dfa + * @param D The DFA state to add + * @return The state stored in the DFA. This will be either the existing + * state if [D] is already in the DFA, or [D] itself if the + * state was not already present. + */ + DFAState addDFAState(DFA dfa, DFAState D) { + if (D == ATNSimulator.ERROR) { + return D; + } + + DFAState existing = dfa.states[D]; + if (existing != null) return existing; + + D.stateNumber = dfa.states.length; + if (!D.configs.readOnly) { + D.configs.optimizeConfigs(this); + D.configs.readOnly = true; + } + dfa.states[D] = D; + if (debug) log("adding new DFA state: $D"); + return D; + } + + void reportAttemptingFullContext(DFA dfa, BitSet conflictingAlts, + ATNConfigSet configs, int startIndex, int stopIndex) { + if (debug || retry_debug) { + Interval interval = Interval.of(startIndex, stopIndex); + log("reportAttemptingFullContext decision=${dfa.decision}:$configs" + + ", input=" + + parser.tokenStream.getText(interval)); + } + if (parser != null) + parser.errorListenerDispatch.reportAttemptingFullContext( + parser, dfa, startIndex, stopIndex, conflictingAlts, configs); + } + + void reportContextSensitivity(DFA dfa, int prediction, ATNConfigSet configs, + int startIndex, int stopIndex) { + if (debug || retry_debug) { + Interval interval = Interval.of(startIndex, stopIndex); + log("reportContextSensitivity decision=${dfa.decision}:$configs" + + ", input=" + + parser.tokenStream.getText(interval)); + } + if (parser != null) + parser.errorListenerDispatch.reportContextSensitivity( + parser, dfa, startIndex, stopIndex, prediction, configs); + } + + /** If context sensitive parsing, we know it's ambiguity not conflict */ + void reportAmbiguity( + DFA dfa, + DFAState D, // the DFA state from execATN() that had SLL conflicts + int startIndex, + int stopIndex, + bool exact, + BitSet ambigAlts, + ATNConfigSet configs) // configs that LL not SLL considered conflicting + { + if (debug || retry_debug) { + Interval interval = Interval.of(startIndex, stopIndex); + log("reportAmbiguity $ambigAlts:$configs" + + ", input=" + + parser.tokenStream.getText(interval)); + } + if (parser != null) + parser.errorListenerDispatch.reportAmbiguity( + parser, dfa, startIndex, stopIndex, exact, ambigAlts, configs); + } +} + +/** + * This enumeration defines the prediction modes available in ANTLR 4 along with + * utility methods for analyzing configuration sets for conflicts and/or + * ambiguities. + */ +enum PredictionMode { + /** + * The SLL(*) prediction mode. This prediction mode ignores the current + * parser context when making predictions. This is the fastest prediction + * mode, and provides correct results for many grammars. This prediction + * mode is more powerful than the prediction mode provided by ANTLR 3, but + * may result in syntax errors for grammar and input combinations which are + * not SLL. + * + *

+ * When using this prediction mode, the parser will either return a correct + * parse tree (i.e. the same parse tree that would be returned with the + * {@link #LL} prediction mode), or it will report a syntax error. If a + * syntax error is encountered when using the {@link #SLL} prediction mode, + * it may be due to either an actual syntax error in the input or indicate + * that the particular combination of grammar and input requires the more + * powerful {@link #LL} prediction abilities to complete successfully.

+ * + *

+ * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.

+ */ + SLL, + /** + * The LL(*) prediction mode. This prediction mode allows the current parser + * context to be used for resolving SLL conflicts that occur during + * prediction. This is the fastest prediction mode that guarantees correct + * parse results for all combinations of grammars with syntactically correct + * inputs. + * + *

+ * When using this prediction mode, the parser will make correct decisions + * for all syntactically-correct grammar and input combinations. However, in + * cases where the grammar is truly ambiguous this prediction mode might not + * report a precise answer for exactly which alternatives are + * ambiguous.

+ * + *

+ * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.

+ */ + LL, + /** + * The LL(*) prediction mode with exact ambiguity detection. In addition to + * the correctness guarantees provided by the {@link #LL} prediction mode, + * this prediction mode instructs the prediction algorithm to determine the + * complete and exact set of ambiguous alternatives for every ambiguous + * decision encountered while parsing. + * + *

+ * This prediction mode may be used for diagnosing ambiguities during + * grammar development. Due to the performance overhead of calculating sets + * of ambiguous alternatives, this prediction mode should be avoided when + * the exact results are not necessary.

+ * + *

+ * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.

+ */ + LL_EXACT_AMBIG_DETECTION, +} + +extension PredictionModeExtension on PredictionMode { + /** + * Computes the SLL prediction termination condition. + * + *

+ * This method computes the SLL prediction termination condition for both of + * the following cases.

+ * + *
    + *
  • The usual SLL+LL fallback upon SLL conflict
  • + *
  • Pure SLL without LL fallback
  • + *
+ * + *

COMBINED SLL+LL PARSING

+ * + *

When LL-fallback is enabled upon SLL conflict, correct predictions are + * ensured regardless of how the termination condition is computed by this + * method. Due to the substantially higher cost of LL prediction, the + * prediction should only fall back to LL when the additional lookahead + * cannot lead to a unique SLL prediction.

+ * + *

Assuming combined SLL+LL parsing, an SLL configuration set with only + * conflicting subsets should fall back to full LL, even if the + * configuration sets don't resolve to the same alternative (e.g. + * {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting + * configuration, SLL could continue with the hopes that more lookahead will + * resolve via one of those non-conflicting configurations.

+ * + *

Here's the prediction termination rule them: SLL (for SLL+LL parsing) + * stops when it sees only conflicting configuration subsets. In contrast, + * full LL keeps going when there is uncertainty.

+ * + *

HEURISTIC

+ * + *

As a heuristic, we stop prediction when we see any conflicting subset + * unless we see a state that only has one alternative associated with it. + * The single-alt-state thing lets prediction continue upon rules like + * (otherwise, it would admit defeat too soon):

+ * + *

{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}

+ * + *

When the ATN simulation reaches the state before {@code ';'}, it has a + * DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally + * {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop + * processing this node because alternative to has another way to continue, + * via {@code [6|2|[]]}.

+ * + *

It also let's us continue for this rule:

+ * + *

{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}

+ * + *

After matching input A, we reach the stop state for rule A, state 1. + * State 8 is the state right before B. Clearly alternatives 1 and 2 + * conflict and no amount of further lookahead will separate the two. + * However, alternative 3 will be able to continue and so we do not stop + * working on this state. In the previous example, we're concerned with + * states associated with the conflicting alternatives. Here alt 3 is not + * associated with the conflicting configs, but since we can continue + * looking for input reasonably, don't declare the state done.

+ * + *

PURE SLL PARSING

+ * + *

To handle pure SLL parsing, all we have to do is make sure that we + * combine stack contexts for configurations that differ only by semantic + * predicate. From there, we can do the usual SLL termination heuristic.

+ * + *

PREDICATES IN SLL+LL PARSING

+ * + *

SLL decisions don't evaluate predicates until after they reach DFA stop + * states because they need to create the DFA cache that works in all + * semantic situations. In contrast, full LL evaluates predicates collected + * during start state computation so it can ignore predicates thereafter. + * This means that SLL termination detection can totally ignore semantic + * predicates.

+ * + *

Implementation-wise, [ATNConfigSet] combines stack contexts but not + * semantic predicate contexts so we might see two configurations like the + * following.

+ * + *

{@code (s, 1, x, {}), (s, 1, x', {p})}

+ * + *

Before testing these configurations against others, we have to merge + * [x] and {@code x'} (without modifying the existing configurations). + * For example, we test {@code (x+x')==x''} when looking for conflicts in + * the following configurations.

+ * + *

{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}

+ * + *

If the configuration set has predicates (as indicated by + * {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of + * the configurations to strip out all of the predicates so that a standard + * [ATNConfigSet] will merge everything ignoring predicates.

+ */ + static bool hasSLLConflictTerminatingPrediction( + PredictionMode mode, ATNConfigSet configs) { +/* Configs in rule stop states indicate reaching the end of the decision + * rule (local context) or end of start rule (full context). If all + * configs meet this condition, then none of the configurations is able + * to match additional input so we terminate prediction. + */ + if (allConfigsInRuleStopStates(configs)) { + return true; + } + +// pure SLL mode parsing + if (mode == PredictionMode.SLL) { +// Don't bother with combining configs from different semantic +// contexts if we can fail over to full LL; costs more time +// since we'll often fail over anyway. + if (configs.hasSemanticContext) { +// dup configs, tossing out semantic predicates + ATNConfigSet dup = new ATNConfigSet(); + for (ATNConfig c in configs) { + c = new ATNConfig.dup(c, semanticContext: SemanticContext.NONE); + dup.add(c); + } + configs = dup; + } +// now we have combined contexts for configs with dissimilar preds + } + +// pure SLL or combined SLL+LL mode parsing + + List altsets = getConflictingAltSubsets(configs); + bool heuristic = + hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(configs); + return heuristic; + } + + /** + * Checks if any configuration in [configs] is in a + * [RuleStopState]. Configurations meeting this condition have reached + * the end of the decision rule (local context) or end of start rule (full + * context). + * + * @param configs the configuration set to test + * @return [true] if any configuration in [configs] is in a + * [RuleStopState], otherwise [false] + */ + static bool hasConfigInRuleStopState(ATNConfigSet configs) { + for (ATNConfig c in configs) { + if (c.state is RuleStopState) { + return true; + } + } + + return false; + } + + /** + * Checks if all configurations in [configs] are in a + * [RuleStopState]. Configurations meeting this condition have reached + * the end of the decision rule (local context) or end of start rule (full + * context). + * + * @param configs the configuration set to test + * @return [true] if all configurations in [configs] are in a + * [RuleStopState], otherwise [false] + */ + static bool allConfigsInRuleStopStates(ATNConfigSet configs) { + for (ATNConfig config in configs) { + if (!(config.state is RuleStopState)) { + return false; + } + } + + return true; + } + + /** + * Full LL prediction termination. + * + *

Can we stop looking ahead during ATN simulation or is there some + * uncertainty as to which alternative we will ultimately pick, after + * consuming more input? Even if there are partial conflicts, we might know + * that everything is going to resolve to the same minimum alternative. That + * means we can stop since no more lookahead will change that fact. On the + * other hand, there might be multiple conflicts that resolve to different + * minimums. That means we need more look ahead to decide which of those + * alternatives we should predict.

+ * + *

The basic idea is to split the set of configurations [C], into + * conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with + * non-conflicting configurations. Two configurations conflict if they have + * identical {@link ATNConfig#state} and {@link ATNConfig#context} values + * but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)} + * and {@code (s, j, ctx, _)} for {@code i!=j}.

+ * + *

Reduce these configuration subsets to the set of possible alternatives. + * You can compute the alternative subsets in one pass as follows:

+ * + *

{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in + * [C] holding [s] and [ctx] fixed.

+ * + *

Or in pseudo-code, for each configuration [c] in [C]:

+ * + *
+   * map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
+   * alt and not pred
+   * 
+ * + *

The values in [map] are the set of {@code A_s,ctx} sets.

+ * + *

If {@code |A_s,ctx|=1} then there is no conflict associated with + * [s] and [ctx].

+ * + *

Reduce the subsets to singletons by choosing a minimum of each subset. If + * the union of these alternative subsets is a singleton, then no amount of + * more lookahead will help us. We will always pick that alternative. If, + * however, there is more than one alternative, then we are uncertain which + * alternative to predict and must continue looking for resolution. We may + * or may not discover an ambiguity in the future, even if there are no + * conflicting subsets this round.

+ * + *

The biggest sin is to terminate early because it means we've made a + * decision but were uncertain as to the eventual outcome. We haven't used + * enough lookahead. On the other hand, announcing a conflict too late is no + * big deal; you will still have the conflict. It's just inefficient. It + * might even look until the end of file.

+ * + *

No special consideration for semantic predicates is required because + * predicates are evaluated on-the-fly for full LL prediction, ensuring that + * no configuration contains a semantic context during the termination + * check.

+ * + *

CONFLICTING CONFIGS

+ * + *

Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict + * when {@code i!=j} but {@code x=x'}. Because we merge all + * {@code (s, i, _)} configurations together, that means that there are at + * most [n] configurations associated with state [s] for + * [n] possible alternatives in the decision. The merged stacks + * complicate the comparison of configuration contexts [x] and + * {@code x'}. Sam checks to see if one is a subset of the other by calling + * merge and checking to see if the merged result is either [x] or + * {@code x'}. If the [x] associated with lowest alternative [i] + * is the superset, then [i] is the only possible prediction since the + * others resolve to {@code min(i)} as well. However, if [x] is + * associated with {@code j>i} then at least one stack configuration for + * [j] is not in conflict with alternative [i]. The algorithm + * should keep going, looking for more lookahead due to the uncertainty.

+ * + *

For simplicity, I'm doing a equality check between [x] and + * {@code x'} that lets the algorithm continue to consume lookahead longer + * than necessary. The reason I like the equality is of course the + * simplicity but also because that is the test you need to detect the + * alternatives that are actually in conflict.

+ * + *

CONTINUE/STOP RULE

+ * + *

Continue if union of resolved alternative sets from non-conflicting and + * conflicting alternative subsets has more than one alternative. We are + * uncertain about which alternative to predict.

+ * + *

The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which + * alternatives are still in the running for the amount of input we've + * consumed at this point. The conflicting sets let us to strip away + * configurations that won't lead to more states because we resolve + * conflicts to the configuration with a minimum alternate for the + * conflicting set.

+ * + *

CASES

+ * + *
    + * + *
  • no conflicts and more than 1 alternative in set => continue
  • + * + *
  • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)}, + * {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set + * {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = + * {@code {1,3}} => continue + *
  • + * + *
  • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, + * {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set + * {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = + * {@code {1}} => stop and predict 1
  • + * + *
  • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, + * {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U + * {@code {1}} = {@code {1}} => stop and predict 1, can announce + * ambiguity {@code {1,2}}
  • + * + *
  • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)}, + * {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U + * {@code {2}} = {@code {1,2}} => continue
  • + * + *
  • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)}, + * {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U + * {@code {3}} = {@code {1,3}} => continue
  • + * + *
+ * + *

EXACT AMBIGUITY DETECTION

+ * + *

If all states report the same conflicting set of alternatives, then we + * know we have the exact ambiguity set.

+ * + *

|A_i|>1 and + * A_i = A_j for all i, j.

+ * + *

In other words, we continue examining lookahead until all {@code A_i} + * have more than one alternative and all {@code A_i} are the same. If + * {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate + * because the resolved set is {@code {1}}. To determine what the real + * ambiguity is, we have to know whether the ambiguity is between one and + * two or one and three so we keep going. We can only stop prediction when + * we need exact ambiguity detection when the sets look like + * {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...

+ */ + static int resolvesToJustOneViableAlt(List altsets) { + return getSingleViableAlt(altsets); + } + + /** + * Determines if every alternative subset in [altsets] contains more + * than one alternative. + * + * @param altsets a collection of alternative subsets + * @return [true] if every [BitSet] in [altsets] has + * {@link BitSet#cardinality cardinality} > 1, otherwise [false] + */ + static bool allSubsetsConflict(List altsets) { + return !hasNonConflictingAltSet(altsets); + } + + /** + * Determines if any single alternative subset in [altsets] contains + * exactly one alternative. + * + * @param altsets a collection of alternative subsets + * @return [true] if [altsets] contains a [BitSet] with + * {@link BitSet#cardinality cardinality} 1, otherwise [false] + */ + static bool hasNonConflictingAltSet(List altsets) { + for (BitSet alts in altsets) { + if (alts.cardinality == 1) { + return true; + } + } + return false; + } + + /** + * Determines if any single alternative subset in [altsets] contains + * more than one alternative. + * + * @param altsets a collection of alternative subsets + * @return [true] if [altsets] contains a [BitSet] with + * {@link BitSet#cardinality cardinality} > 1, otherwise [false] + */ + static bool hasConflictingAltSet(List altsets) { + for (BitSet alts in altsets) { + if (alts.cardinality > 1) { + return true; + } + } + return false; + } + + /** + * Determines if every alternative subset in [altsets] is equivalent. + * + * @param altsets a collection of alternative subsets + * @return [true] if every member of [altsets] is equal to the + * others, otherwise [false] + */ + static bool allSubsetsEqual(List altsets) { + final first = altsets.first; + return altsets.every((e) => e == first); + } + + /** + * Returns the unique alternative predicted by all alternative subsets in + * [altsets]. If no such alternative exists, this method returns + * {@link ATN#INVALID_ALT_NUMBER}. + * + * @param altsets a collection of alternative subsets + */ + static int getUniqueAlt(List altsets) { + BitSet all = getAlts(altsets); + if (all.cardinality == 1) return all.nextset(0); + return ATN.INVALID_ALT_NUMBER; + } + + /** + * Gets the complete set of represented alternatives for a collection of + * alternative subsets. This method returns the union of each [BitSet] + * in [altsets]. + * + * @param altsets a collection of alternative subsets + * @return the set of represented alternatives in [altsets] + */ + static BitSet getAlts(List altsets) { + BitSet all = new BitSet(); + for (BitSet alts in altsets) { + all.or(alts); + } + return all; + } + + /** + * Get union of all alts from configs. + * + * @since 4.5.1 + */ + static BitSet getAltsFromConfigs(ATNConfigSet configs) { + BitSet alts = new BitSet(); + for (ATNConfig config in configs) { + alts.set(config.alt); + } + return alts; + } + + /** + * This function gets the conflicting alt subsets from a configuration set. + * For each configuration [c] in [configs]: + * + *
+   * map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
+   * alt and not pred
+   * 
+ */ + static List getConflictingAltSubsets(ATNConfigSet configs) { + final configToAlts = + new HashMap(equals: (ATNConfig a, ATNConfig b) { + if (identical(a, b)) return true; + if (a == null || b == null) return false; + return a.state.stateNumber == b.state.stateNumber && + a.context == b.context; + }, hashCode: (ATNConfig o) { + /** + * The hash code is only a function of the {@link ATNState#stateNumber} + * and {@link ATNConfig#context}. + */ + int hashCode = MurmurHash.initialize(7); + hashCode = MurmurHash.update(hashCode, o.state.stateNumber); + hashCode = MurmurHash.update(hashCode, o.context); + hashCode = MurmurHash.finish(hashCode, 2); + return hashCode; + }); + for (ATNConfig c in configs) { + BitSet alts = configToAlts[c]; + if (alts == null) { + alts = new BitSet(); + configToAlts[c] = alts; + } + alts.set(c.alt); + } + return configToAlts.values.toList(); + } + + /** + * Get a map from state to alt subset from a configuration set. For each + * configuration [c] in [configs]: + * + *
+   * map[c.{@link ATNConfig#state state}] U= c.{@link ATNConfig#alt alt}
+   * 
+ */ + static Map getStateToAltMap(ATNConfigSet configs) { + Map m = {}; + for (ATNConfig c in configs) { + BitSet alts = m[c.state]; + if (alts == null) { + alts = new BitSet(); + m[c.state] = alts; + } + alts.set(c.alt); + } + return m; + } + + static bool hasStateAssociatedWithOneAlt(ATNConfigSet configs) { + Map x = getStateToAltMap(configs); + for (BitSet alts in x.values) { + if (alts.cardinality == 1) return true; + } + return false; + } + + static int getSingleViableAlt(List altsets) { + BitSet viableAlts = new BitSet(); + for (BitSet alts in altsets) { + int minAlt = alts.nextset(0); + viableAlts.set(minAlt); + if (viableAlts.cardinality > 1) { + // more than 1 viable alt + return ATN.INVALID_ALT_NUMBER; + } + } + return viableAlts.nextset(0); + } +} diff --git a/runtime/Dart/lib/src/atn/src/profiling_atn_simulator.dart b/runtime/Dart/lib/src/atn/src/profiling_atn_simulator.dart new file mode 100644 index 000000000..06984d47f --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/profiling_atn_simulator.dart @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:math'; + +import '../../dfa/dfa.dart'; +import '../../parser.dart'; +import '../../parser_rule_context.dart'; +import '../../token_stream.dart'; +import '../../util/bit_set.dart'; +import 'atn_config_set.dart'; +import 'atn_simulator.dart'; +import 'info.dart'; +import 'parser_atn_simulator.dart'; +import 'semantic_context.dart'; + +class ProfilingATNSimulator extends ParserATNSimulator { + List decisions; + int numDecisions; + + int _sllStopIndex; + int _llStopIndex; + + int currentDecision; + DFAState currentState; + + /** At the point of LL failover, we record how SLL would resolve the conflict so that + * we can determine whether or not a decision / input pair is context-sensitive. + * If LL gives a different result than SLL's predicted alternative, we have a + * context sensitivity for sure. The converse is not necessarily true, however. + * It's possible that after conflict resolution chooses minimum alternatives, + * SLL could get the same answer as LL. Regardless of whether or not the result indicates + * an ambiguity, it is not treated as a context sensitivity because LL prediction + * was not required in order to produce a correct prediction for this decision and input sequence. + * It may in fact still be a context sensitivity but we don't know by looking at the + * minimum alternatives for the current input. + */ + int conflictingAltResolvedBySLL; + + ProfilingATNSimulator(Parser parser) + : super(parser, parser.interpreter.atn, parser.interpreter.decisionToDFA, + parser.interpreter.sharedContextCache) { + numDecisions = atn.decisionToState.length; + decisions = List(numDecisions); + for (int i = 0; i < numDecisions; i++) { + decisions[i] = new DecisionInfo(i); + } + } + + int adaptivePredict( + TokenStream input, int decision, ParserRuleContext outerContext) { + try { + this._sllStopIndex = -1; + this._llStopIndex = -1; + this.currentDecision = decision; + + final start = + DateTime.now(); // TODO get nano seconds expensive but useful info + int alt = super.adaptivePredict(input, decision, outerContext); + final stop = DateTime.now(); + decisions[decision].timeInPrediction += + (stop.difference(start)).inMicroseconds; + decisions[decision].invocations++; + + int SLL_k = _sllStopIndex - startIndex + 1; + decisions[decision].SLL_TotalLook += SLL_k; + decisions[decision].SLL_MinLook = decisions[decision].SLL_MinLook == 0 + ? SLL_k + : min(decisions[decision].SLL_MinLook, SLL_k); + if (SLL_k > decisions[decision].SLL_MaxLook) { + decisions[decision].SLL_MaxLook = SLL_k; + decisions[decision].SLL_MaxLookEvent = new LookaheadEventInfo( + decision, null, alt, input, startIndex, _sllStopIndex, false); + } + + if (_llStopIndex >= 0) { + int LL_k = _llStopIndex - startIndex + 1; + decisions[decision].LL_TotalLook += LL_k; + decisions[decision].LL_MinLook = decisions[decision].LL_MinLook == 0 + ? LL_k + : min(decisions[decision].LL_MinLook, LL_k); + if (LL_k > decisions[decision].LL_MaxLook) { + decisions[decision].LL_MaxLook = LL_k; + decisions[decision].LL_MaxLookEvent = new LookaheadEventInfo( + decision, null, alt, input, startIndex, _llStopIndex, true); + } + } + + return alt; + } finally { + this.currentDecision = -1; + } + } + + DFAState getExistingTargetState(DFAState previousD, int t) { + // this method is called after each time the input position advances + // during SLL prediction + _sllStopIndex = input.index; + + DFAState existingTargetState = super.getExistingTargetState(previousD, t); + if (existingTargetState != null) { + decisions[currentDecision] + .SLL_DFATransitions++; // count only if we transition over a DFA state + if (existingTargetState == ATNSimulator.ERROR) { + decisions[currentDecision].errors.add(new ErrorInfo(currentDecision, + previousD.configs, input, startIndex, _sllStopIndex, false)); + } + } + + currentState = existingTargetState; + return existingTargetState; + } + + DFAState computeTargetState(DFA dfa, DFAState previousD, int t) { + DFAState state = super.computeTargetState(dfa, previousD, t); + currentState = state; + return state; + } + + ATNConfigSet computeReachSet(ATNConfigSet closure, int t, bool fullCtx) { + if (fullCtx) { + // this method is called after each time the input position advances + // during full context prediction + _llStopIndex = input.index; + } + + ATNConfigSet reachConfigs = super.computeReachSet(closure, t, fullCtx); + if (fullCtx) { + decisions[currentDecision] + .LL_ATNTransitions++; // count computation even if error + if (reachConfigs != null) { + } else { + // no reach on current lookahead symbol. ERROR. + // TODO: does not handle delayed errors per getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule() + decisions[currentDecision].errors.add(new ErrorInfo( + currentDecision, closure, input, startIndex, _llStopIndex, true)); + } + } else { + decisions[currentDecision].SLL_ATNTransitions++; + if (reachConfigs != null) { + } else { + // no reach on current lookahead symbol. ERROR. + decisions[currentDecision].errors.add(new ErrorInfo( + currentDecision, closure, input, startIndex, _sllStopIndex, false)); + } + } + return reachConfigs; + } + + bool evalSemanticContextOne(SemanticContext pred, + ParserRuleContext parserCallStack, int alt, bool fullCtx) { + bool result = + super.evalSemanticContextOne(pred, parserCallStack, alt, fullCtx); + if (!(pred is PrecedencePredicate)) { + bool fullContext = _llStopIndex >= 0; + int stopIndex = fullContext ? _llStopIndex : _sllStopIndex; + decisions[currentDecision].predicateEvals.add(new PredicateEvalInfo( + currentDecision, + input, + startIndex, + stopIndex, + pred, + result, + alt, + fullCtx)); + } + + return result; + } + + void reportAttemptingFullContext(DFA dfa, BitSet conflictingAlts, + ATNConfigSet configs, int startIndex, int stopIndex) { + if (conflictingAlts != null) { + conflictingAltResolvedBySLL = conflictingAlts.nextset(0); + } else { + conflictingAltResolvedBySLL = configs.alts.nextset(0); + } + decisions[currentDecision].LL_Fallback++; + super.reportAttemptingFullContext( + dfa, conflictingAlts, configs, startIndex, stopIndex); + } + + void reportContextSensitivity(DFA dfa, int prediction, ATNConfigSet configs, + int startIndex, int stopIndex) { + if (prediction != conflictingAltResolvedBySLL) { + decisions[currentDecision].contextSensitivities.add( + new ContextSensitivityInfo( + currentDecision, configs, input, startIndex, stopIndex)); + } + super.reportContextSensitivity( + dfa, prediction, configs, startIndex, stopIndex); + } + + void reportAmbiguity(DFA dfa, DFAState D, int startIndex, int stopIndex, + bool exact, BitSet ambigAlts, ATNConfigSet configs) { + final int prediction = + ambigAlts != null ? ambigAlts.nextset(0) : configs.alts.nextset(0); + if (configs.fullCtx && prediction != conflictingAltResolvedBySLL) { + // Even though this is an ambiguity we are reporting, we can + // still detect some context sensitivities. Both SLL and LL + // are showing a conflict, hence an ambiguity, but if they resolve + // to different minimum alternatives we have also identified a + // context sensitivity. + decisions[currentDecision].contextSensitivities.add( + new ContextSensitivityInfo( + currentDecision, configs, input, startIndex, stopIndex)); + } + decisions[currentDecision].ambiguities.add(new AmbiguityInfo( + currentDecision, + configs, + ambigAlts, + input, + startIndex, + stopIndex, + configs.fullCtx)); + super.reportAmbiguity( + dfa, D, startIndex, stopIndex, exact, ambigAlts, configs); + } + + // --------------------------------------------------------------------- + + List get decisionInfo { + return decisions; + } +} diff --git a/runtime/Dart/lib/src/atn/src/semantic_context.dart b/runtime/Dart/lib/src/atn/src/semantic_context.dart new file mode 100644 index 000000000..cc117f829 --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/semantic_context.dart @@ -0,0 +1,399 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'package:collection/collection.dart'; + +import '../../recognizer.dart'; +import '../../rule_context.dart'; +import '../../util/murmur_hash.dart'; + +/** A tree structure used to record the semantic context in which + * an ATN configuration is valid. It's either a single predicate, + * a conjunction {@code p1&&p2}, or a sum of products {@code p1||p2}. + * + *

I have scoped the [AND], [OR], and [Predicate] subclasses of + * [SemanticContext] within the scope of this outer class.

+ */ +abstract class SemanticContext { + /** + * The default [SemanticContext], which is semantically equivalent to + * a predicate of the form {@code {true}?}. + */ + static const SemanticContext NONE = const Predicate(); + + const SemanticContext(); + + /** + * For context independent predicates, we evaluate them without a local + * context (i.e., null context). That way, we can evaluate them without + * having to create proper rule-specific context during prediction (as + * opposed to the parser, which creates them naturally). In a practical + * sense, this avoids a cast exception from RuleContext to myruleContext. + * + *

For context dependent predicates, we must pass in a local context so that + * references such as $arg evaluate properly as _localctx.arg. We only + * capture context dependent predicates in the context in which we begin + * prediction, so we passed in the outer context here in case of context + * dependent predicate evaluation.

+ */ + bool eval(Recognizer parser, RuleContext parserCallStack); + + /** + * Evaluate the precedence predicates for the context and reduce the result. + * + * @param parser The parser instance. + * @param parserCallStack + * @return The simplified semantic context after precedence predicates are + * evaluated, which will be one of the following values. + *
    + *
  • {@link #NONE}in if the predicate simplifies to [true] after + * precedence predicates are evaluated.
  • + *
  • nullin if the predicate simplifies to [false] after + * precedence predicates are evaluated.
  • + *
  • [this]in if the semantic context is not changed as a result of + * precedence predicate evaluation.
  • + *
  • A non-null [SemanticContext]in the new simplified + * semantic context after precedence predicates are evaluated.
  • + *
+ */ + SemanticContext evalPrecedence(Recognizer parser, + RuleContext parserCallStack) { + return this; + } + + static SemanticContext and(SemanticContext a, SemanticContext b) { + if (a == null || a == NONE) return b; + if (b == null || b == NONE) return a; + AND result = new AND(a, b); + if (result.opnds.length == 1) { + return result.opnds[0]; + } + + return result; + } + + /** + * + * @see ParserATNSimulator#getPredsForAmbigAlts + */ + static SemanticContext or(SemanticContext a, SemanticContext b) { + if (a == null) return b; + if (b == null) return a; + if (a == NONE || b == NONE) return NONE; + OR result = new OR(a, b); + if (result.opnds.length == 1) { + return result.opnds[0]; + } + + return result; + } + + static Iterable filterPrecedencePredicates( + Iterable collection) { + return collection.where((e) => e is PrecedencePredicate).map((e)=> e as PrecedencePredicate); + } + + static Iterable filterNonPrecedencePredicates( + Iterable collection) { + return collection.where((e) => !(e is PrecedencePredicate)); + } +} + +class Predicate extends SemanticContext { + final int ruleIndex; + final int predIndex; + final bool isCtxDependent; // e.g., $i ref in pred + + const Predicate( + [this.ruleIndex = -1, this.predIndex = -1, this.isCtxDependent = false]); + + bool eval(Recognizer parser, RuleContext parserCallStack) { + RuleContext localctx = isCtxDependent ? parserCallStack : null; + return parser.sempred(localctx, ruleIndex, predIndex); + } + + int get hashCode { + int hashCode = MurmurHash.initialize(); + hashCode = MurmurHash.update(hashCode, ruleIndex); + hashCode = MurmurHash.update(hashCode, predIndex); + hashCode = MurmurHash.update(hashCode, isCtxDependent ? 1 : 0); + hashCode = MurmurHash.finish(hashCode, 3); + return hashCode; + } + + operator ==(Object obj) { + return obj is Predicate && + this.ruleIndex == obj.ruleIndex && + this.predIndex == obj.predIndex && + this.isCtxDependent == obj.isCtxDependent; + } + + String toString() { + return "{$ruleIndex:$predIndex}?"; + } +} + +class PrecedencePredicate extends SemanticContext + implements Comparable { + final int precedence; + + PrecedencePredicate([this.precedence = 0]); + + bool eval(Recognizer parser, RuleContext parserCallStack) { + return parser.precpred(parserCallStack, precedence); + } + + SemanticContext evalPrecedence(Recognizer parser, + RuleContext parserCallStack) { + if (parser.precpred(parserCallStack, precedence)) { + return SemanticContext.NONE; + } else { + return null; + } + } + + int compareTo(PrecedencePredicate o) { + return precedence - o.precedence; + } + + get hashCode { + int hashCode = 1; + hashCode = 31 * hashCode + precedence; + return hashCode; + } + + operator ==(Object obj) { + if (!(obj is PrecedencePredicate)) { + return false; + } + PrecedencePredicate other = obj; + return this.precedence == other.precedence; + } + +// precedence >= _precedenceStack.peek() + String toString() { + return "{$precedence>=prec}?"; + } +} + +/** + * This is the base class for semantic context "operators", which operate on + * a collection of semantic context "operands". + * + * @since 4.3 + */ +abstract class Operator extends SemanticContext { + /** + * Gets the operands for the semantic context operator. + * + * @return a collection of [SemanticContext] operands for the + * operator. + * + * @since 4.3 + */ + List get operands; +} + +/** + * A semantic context which is true whenever none of the contained contexts + * is false. + */ + +class AND extends Operator { + List opnds; + + AND(SemanticContext a, SemanticContext b) { + Set operands = Set(); + if (a is AND) + operands.addAll(a.opnds); + else + operands.add(a); + if (b is AND) + operands.addAll(b.opnds); + else + operands.add(b); + + Iterable precedencePredicates = + SemanticContext.filterPrecedencePredicates(operands); + + operands = SemanticContext.filterNonPrecedencePredicates(operands).toSet(); + if (!precedencePredicates.isEmpty) { + // interested in the transition with the lowest precedence + PrecedencePredicate reduced = + precedencePredicates.reduce((a, b) => a.compareTo(b) <= 0 ? a : b); + operands.add(reduced); + } + + opnds = operands.toList(); + } + + List get operands { + return opnds; + } + + operator ==(Object obj) { + if (!(obj is AND)) return false; + AND other = obj; + return ListEquality().equals(this.opnds, other.opnds); + } + + get hashCode { + return MurmurHash.getHashCode(opnds, this.runtimeType.hashCode); + } + + /** + * {@inheritDoc} + * + *

+ * The evaluation of predicates by this context is short-circuiting, but + * unordered.

+ */ + + bool eval(Recognizer parser, RuleContext parserCallStack) { + for (SemanticContext opnd in opnds) { + if (!opnd.eval(parser, parserCallStack)) return false; + } + return true; + } + + SemanticContext evalPrecedence(Recognizer parser, + RuleContext parserCallStack) { + bool differs = false; + List operands = []; + for (SemanticContext context in opnds) { + SemanticContext evaluated = + context.evalPrecedence(parser, parserCallStack); + differs |= (evaluated != context); + if (evaluated == null) { + // The AND context is false if any element is false + return null; + } else if (evaluated != SemanticContext.NONE) { + // Reduce the result by skipping true elements + operands.add(evaluated); + } + } + + if (!differs) { + return this; + } + + if (operands.isEmpty) { + // all elements were true, so the AND context is true + return SemanticContext.NONE; + } + + SemanticContext result = operands[0]; + for (int i = 1; i < operands.length; i++) { + result = SemanticContext.and(result, operands[i]); + } + + return result; + } + + String toString() { + return opnds.join("&&"); + } +} + +/** + * A semantic context which is true whenever at least one of the contained + * contexts is true. + */ +class OR extends Operator { + List opnds; + + OR(SemanticContext a, SemanticContext b) { + Set operands = Set(); + if (a is OR) + operands.addAll(a.opnds); + else + operands.add(a); + if (b is OR) + operands.addAll(b.opnds); + else + operands.add(b); + + Iterable precedencePredicates = + SemanticContext.filterPrecedencePredicates(operands); + + operands = SemanticContext.filterNonPrecedencePredicates(operands).toSet(); + if (!precedencePredicates.isEmpty) { + // interested in the transition with the highest precedence + PrecedencePredicate reduced = + precedencePredicates.reduce((a, b) => a.compareTo(b) >= 0 ? a : b); + operands.add(reduced); + } + + this.opnds = operands.toList(); + } + + List get operands { + return opnds; + } + + operator ==(Object obj) { + if (!(obj is OR)) return false; + OR other = obj; + return ListEquality().equals(this.opnds, other.opnds); + } + + get hashCode { + return MurmurHash.getHashCode(opnds, this.runtimeType.hashCode); + } + + /** + * {@inheritDoc} + * + *

+ * The evaluation of predicates by this context is short-circuiting, but + * unordered.

+ */ + + bool eval(Recognizer parser, RuleContext parserCallStack) { + for (SemanticContext opnd in opnds) { + if (opnd.eval(parser, parserCallStack)) return true; + } + return false; + } + + SemanticContext evalPrecedence(Recognizer parser, + RuleContext parserCallStack) { + bool differs = false; + List operands = []; + for (SemanticContext context in opnds) { + SemanticContext evaluated = + context.evalPrecedence(parser, parserCallStack); + differs |= (evaluated != context); + if (evaluated == SemanticContext.NONE) { + // The OR context is true if any element is true + return SemanticContext.NONE; + } else if (evaluated != null) { + // Reduce the result by skipping false elements + operands.add(evaluated); + } + } + + if (!differs) { + return this; + } + + if (operands.isEmpty) { + // all elements were false, so the OR context is false + return null; + } + + SemanticContext result = operands[0]; + for (int i = 1; i < operands.length; i++) { + result = SemanticContext.or(result, operands[i]); + } + + return result; + } + + String toString() { + return opnds.join("||"); + } +} diff --git a/runtime/Dart/lib/src/atn/src/transition.dart b/runtime/Dart/lib/src/atn/src/transition.dart new file mode 100644 index 000000000..3a13afa3b --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/transition.dart @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../interval_set.dart'; +import '../../token.dart'; +import 'atn_state.dart'; +import 'semantic_context.dart'; + +enum TransitionType { + INVALID, // 0 is not used + EPSILON, + RANGE, + RULE, + PREDICATE, // e.g., {isType(input.LT(1))}? + ATOM, + ACTION, + SET, // ~(A|B) or ~atom, wildcard, which convert to next 2 + NOT_SET, + WILDCARD, + PRECEDENCE, +} + +/** An ATN transition between any two ATN states. Subclasses define + * atom, set, epsilon, action, predicate, rule transitions. + * + *

This is a one way link. It emanates from a state (usually via a list of + * transitions) and has a target state.

+ * + *

Since we never have to change the ATN transitions once we construct it, + * we can fix these transitions as specific classes. The DFA transitions + * on the other hand need to update the labels as it adds transitions to + * the states. We'll use the term Edge for the DFA to distinguish them from + * ATN transitions.

+ */ +abstract class Transition { + /** The target of this transition. */ + ATNState target; + + Transition(this.target) { + if (target == null) { + throw new ArgumentError.notNull("target cannot be null."); + } + } + + TransitionType get type; + + /** + * Determines if the transition is an "epsilon" transition. + * + *

The default implementation returns [false].

+ * + * @return [true] if traversing this transition in the ATN does not + * consume an input symbol; otherwise, [false] if traversing this + * transition consumes (matches) an input symbol. + */ + bool get isEpsilon => false; + + IntervalSet get label => null; + + bool matches(int symbol, int minVocabSymbol, int maxVocabSymbol); +} + +class EpsilonTransition extends Transition { + /** + * @return the rule index of a precedence rule for which this transition is + * returning from, where the precedence value is 0; otherwise, -1. + * + * @see ATNConfig#isPrecedenceFilterSuppressed() + * @see ParserATNSimulator#applyPrecedenceFilter(ATNConfigSet) + * @since 4.4.1 + */ + final int outermostPrecedenceReturn; + + EpsilonTransition(ATNState target, [this.outermostPrecedenceReturn = -1]) + : super(target); + + get isEpsilon => true; + + bool matches(int symbol, int minVocabSymbol, int maxVocabSymbol) { + return false; + } + + String toString() { + return "epsilon"; + } + + @override + TransitionType get type => TransitionType.EPSILON; +} + +class RangeTransition extends Transition { + final int from; + final int to; + + RangeTransition(ATNState target, this.from, this.to) : super(target); + + IntervalSet get label { + return IntervalSet.ofRange(from, to); + } + + bool matches(int symbol, int minVocabSymbol, int maxVocabSymbol) { + return symbol >= from && symbol <= to; + } + + String toString() { + return "'$from..$to'"; + } + + @override + TransitionType get type => TransitionType.RANGE; +} + +class RuleTransition extends Transition { + /** Ptr to the rule definition object for this rule ref */ + final int ruleIndex; // no Rule object at runtime + + final int precedence; + + /** What node to begin computations following ref to rule */ + ATNState followState; + + RuleTransition(RuleStartState ruleStart, this.ruleIndex, this.precedence, + this.followState) + : super(ruleStart); + + get isEpsilon => true; + + bool matches(int symbol, int minVocabSymbol, int maxVocabSymbol) { + return false; + } + + @override + TransitionType get type => TransitionType.RULE; +} + +abstract class AbstractPredicateTransition extends Transition { + AbstractPredicateTransition(ATNState target) : super(target); +} + +class PredicateTransition extends AbstractPredicateTransition { + final int ruleIndex; + final int predIndex; + final bool isCtxDependent; // e.g., $i ref in pred + + PredicateTransition( + target, this.ruleIndex, this.predIndex, this.isCtxDependent) + : super(target); + + get isEpsilon => true; + + matches(symbol, minVocabSymbol, maxVocabSymbol) { + return false; + } + + get predicate => new Predicate(this.ruleIndex, this.predIndex, this.isCtxDependent); + + + toString() { + return "pred_$ruleIndex:$predIndex"; + } + + @override + TransitionType get type => TransitionType.PREDICATE; +} + +/** TODO: make all transitions sets? no, should remove set edges */ +class AtomTransition extends Transition { + /** The token type or character value; or, signifies special label. */ + final int atomLabel; + + AtomTransition(ATNState target, this.atomLabel) : super(target); + + IntervalSet get label { + return IntervalSet.ofOne(atomLabel); + } + + bool matches(int symbol, int minVocabSymbol, int maxVocabSymbol) { + return atomLabel == symbol; + } + + String toString() { + return label.toString(); + } + + @override + TransitionType get type => TransitionType.ATOM; +} + +class ActionTransition extends Transition { + final int ruleIndex; + final int actionIndex; + final bool isCtxDependent; // e.g., $i ref in pred + + ActionTransition(target, this.ruleIndex, + [this.actionIndex = -1, this.isCtxDependent = false]) + : super(target); + + bool get isEpsilon => + true; // we are to be ignored by analysis 'cept for predicates + + matches(symbol, minVocabSymbol, maxVocabSymbol) => false; + + toString() { + return "action_$ruleIndex:$actionIndex"; + } + + @override + TransitionType get type => TransitionType.ACTION; +} + +// A transition containing a set of values. +class SetTransition extends Transition { + IntervalSet label; + + SetTransition(ATNState target, [IntervalSet st]) : super(target) { + this.label = st ?? IntervalSet.ofOne(Token.INVALID_TYPE); + } + + matches(symbol, minVocabSymbol, maxVocabSymbol) { + return this.label.contains(symbol); + } + + toString() { + return this.label.toString(); + } + + @override + TransitionType get type => TransitionType.SET; +} + +class NotSetTransition extends SetTransition { + NotSetTransition(target, st) : super(target, st); + + matches(symbol, minVocabSymbol, maxVocabSymbol) { + return symbol >= minVocabSymbol && + symbol <= maxVocabSymbol && + !super.matches(symbol, minVocabSymbol, maxVocabSymbol); + } + + toString() { + return '~' + super.toString(); + } + + @override + TransitionType get type => TransitionType.NOT_SET; +} + +class WildcardTransition extends Transition { + WildcardTransition(target) : super(target); + + matches(symbol, minVocabSymbol, maxVocabSymbol) { + return symbol >= minVocabSymbol && symbol <= maxVocabSymbol; + } + + toString() { + return "."; + } + + @override + TransitionType get type => TransitionType.WILDCARD; +} + +class PrecedencePredicateTransition extends AbstractPredicateTransition { + final int precedence; + + PrecedencePredicateTransition(target, this.precedence) : super(target); + + get isEpsilon => true; + + matches(symbol, minVocabSymbol, maxVocabSymbol) => false; + + PrecedencePredicate get predicate { + return new PrecedencePredicate(precedence); + } + + toString() => "$precedence >= _p"; + + @override + TransitionType get type => TransitionType.PRECEDENCE; +} diff --git a/runtime/Dart/lib/src/dfa/dfa.dart b/runtime/Dart/lib/src/dfa/dfa.dart new file mode 100644 index 000000000..552ddef7d --- /dev/null +++ b/runtime/Dart/lib/src/dfa/dfa.dart @@ -0,0 +1,8 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +export 'src/dfa.dart'; +export 'src/dfa_state.dart'; diff --git a/runtime/Dart/lib/src/dfa/src/dfa.dart b/runtime/Dart/lib/src/dfa/src/dfa.dart new file mode 100644 index 000000000..092d63e65 --- /dev/null +++ b/runtime/Dart/lib/src/dfa/src/dfa.dart @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../vocabulary.dart'; +import '../../atn/atn.dart'; +import 'dfa_serializer.dart'; +import 'dfa_state.dart'; + +class DFA { + /** A set of all DFA states. Use [Map] so we can get old state back + * ([Set] only allows you to see if it's there). + */ + + Map states = {}; + + DFAState s0; + + final int decision; + + /** From which ATN state did we create this DFA? */ + + DecisionState atnStartState; + + /** + * [true] if this DFA is for a precedence decision; otherwise, + * [false]. This is the backing field for {@link #isPrecedenceDfa}. + */ + bool precedenceDfa; + + DFA(this.atnStartState, [this.decision]) { + bool precedenceDfa = false; + if (atnStartState is StarLoopEntryState) { + if ((atnStartState as StarLoopEntryState).isPrecedenceDecision) { + precedenceDfa = true; + DFAState precedenceState = new DFAState(configs: ATNConfigSet()); + precedenceState.edges = []; + precedenceState.isAcceptState = false; + precedenceState.requiresFullContext = false; + this.s0 = precedenceState; + } + } + + this.precedenceDfa = precedenceDfa; + } + + /** + * Gets whether this DFA is a precedence DFA. Precedence DFAs use a special + * start state {@link #s0} which is not stored in {@link #states}. The + * {@link DFAState#edges} array for this start state contains outgoing edges + * supplying individual start states corresponding to specific precedence + * values. + * + * @return [true] if this is a precedence DFA; otherwise, + * [false]. + * @see Parser#getPrecedence() + */ + bool isPrecedenceDfa() { + return precedenceDfa; + } + + /** + * Get the start state for a specific precedence value. + * + * @param precedence The current precedence. + * @return The start state corresponding to the specified precedence, or + * null if no start state exists for the specified precedence. + * + * @throws IllegalStateException if this is not a precedence DFA. + * @see #isPrecedenceDfa() + */ + DFAState getPrecedenceStartState(int precedence) { + if (!isPrecedenceDfa()) { + throw new StateError( + "Only precedence DFAs may contain a precedence start state."); + } + + // s0.edges is never null for a precedence DFA + if (precedence < 0 || precedence >= s0.edges.length) { + return null; + } + + return s0.edges[precedence]; + } + + /** + * Set the start state for a specific precedence value. + * + * @param precedence The current precedence. + * @param startState The start state corresponding to the specified + * precedence. + * + * @throws IllegalStateException if this is not a precedence DFA. + * @see #isPrecedenceDfa() + */ + void setPrecedenceStartState(int precedence, DFAState startState) { + if (!isPrecedenceDfa()) { + throw new StateError( + "Only precedence DFAs may contain a precedence start state."); + } + + if (precedence < 0) { + return; + } + + // synchronization on s0 here is ok. when the DFA is turned into a + // precedence DFA, s0 will be initialized once and not updated again + // s0.edges is never null for a precedence DFA + if (precedence >= s0.edges.length) { + final original = s0.edges; + s0.edges = List(precedence + 1); + List.copyRange(s0.edges, 0, original); + } + + s0.edges[precedence] = startState; + } + + /** + * Return a list of all states in this DFA, ordered by state number. + */ + + List getStates() { + List result = states.keys.toList(); + result.sort((DFAState o1, DFAState o2) { + return o1.stateNumber - o2.stateNumber; + }); + + return result; + } + + String toString([Vocabulary vocabulary]) { + vocabulary = vocabulary ?? VocabularyImpl.EMPTY_VOCABULARY; + if (s0 == null) { + return ""; + } + + DFASerializer serializer = new DFASerializer(this, vocabulary); + return serializer.toString(); + } + + String toLexerString() { + if (s0 == null) return ""; + DFASerializer serializer = new LexerDFASerializer(this); + return serializer.toString(); + } +} diff --git a/runtime/Dart/lib/src/dfa/src/dfa_serializer.dart b/runtime/Dart/lib/src/dfa/src/dfa_serializer.dart new file mode 100644 index 000000000..f7661f613 --- /dev/null +++ b/runtime/Dart/lib/src/dfa/src/dfa_serializer.dart @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../vocabulary.dart'; +import '../../util/utils.dart'; +import 'dfa.dart'; +import 'dfa_state.dart'; + +/** A DFA walker that knows how to dump them to serialized strings. */ +class DFASerializer { + final DFA dfa; + + final Vocabulary vocabulary; + + DFASerializer(this.dfa, this.vocabulary); + + String toString() { + if (dfa.s0 == null) return null; + StringBuffer buf = new StringBuffer(); + List states = dfa.getStates(); + for (DFAState s in states) { + int n = 0; + if (s.edges != null) n = s.edges.length; + for (int i = 0; i < n; i++) { + DFAState t = s.edges[i]; + if (t != null && t.stateNumber != 0x7FFFFFFF) { + buf.write(getStateString(s)); + String label = getEdgeLabel(i); + buf.write("-"); + buf.write(label); + buf.write("->"); + buf.write(getStateString(t)); + buf.write('\n'); + } + } + } + + String output = buf.toString(); + if (output.length == 0) return null; + //return Utils.sortLinesInString(output); + return output; + } + + String getEdgeLabel(int i) { + return vocabulary.getDisplayName(i - 1); + } + + String getStateString(DFAState s) { + int n = s.stateNumber; + final String baseStateStr = (s.isAcceptState ? ":" : "") + + "s$n" + + (s.requiresFullContext ? "^" : ""); + if (s.isAcceptState) { + if (s.predicates != null) { + return baseStateStr + "=>${arrayToString(s.predicates)}"; + } else { + return baseStateStr + "=>${s.prediction}"; + } + } else { + return baseStateStr; + } + } +} + +class LexerDFASerializer extends DFASerializer { + LexerDFASerializer(dfa) : super(dfa, VocabularyImpl.EMPTY_VOCABULARY); + + String getEdgeLabel(i) { + return "'" + String.fromCharCode(i) + "'"; + } +} diff --git a/runtime/Dart/lib/src/dfa/src/dfa_state.dart b/runtime/Dart/lib/src/dfa/src/dfa_state.dart new file mode 100644 index 000000000..13926f325 --- /dev/null +++ b/runtime/Dart/lib/src/dfa/src/dfa_state.dart @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../atn/atn.dart'; +import '../../util/murmur_hash.dart'; +import '../../util/utils.dart'; + +/** Map a predicate to a predicted alternative. */ +class PredPrediction { + var alt; + var pred; + + PredPrediction(this.pred, this.alt); + + toString() { + return "($pred, $alt)"; + } +} + +/** A DFA state represents a set of possible ATN configurations. + * As Aho, Sethi, Ullman p. 117 says "The DFA uses its state + * to keep track of all possible states the ATN can be in after + * reading each input symbol. That is to say, after reading + * input a1a2..an, the DFA is in a state that represents the + * subset T of the states of the ATN that are reachable from the + * ATN's start state along some path labeled a1a2..an." + * In conventional NFA→DFA conversion, therefore, the subset T + * would be a bitset representing the set of states the + * ATN could be in. We need to track the alt predicted by each + * state as well, however. More importantly, we need to maintain + * a stack of states, tracking the closure operations as they + * jump from rule to rule, emulating rule invocations (method calls). + * I have to add a stack to simulate the proper lookahead sequences for + * the underlying LL grammar from which the ATN was derived. + * + *

I use a set of ATNConfig objects not simple states. An ATNConfig + * is both a state (ala normal conversion) and a RuleContext describing + * the chain of rules (if any) followed to arrive at that state.

+ * + *

A DFA state may have multiple references to a particular state, + * but with different ATN contexts (with same or different alts) + * meaning that state was reached via a different set of rule invocations.

+ */ +class DFAState { + int stateNumber = -1; + + ATNConfigSet configs = new ATNConfigSet(); + + /** {@code edges[symbol]} points to target of symbol. Shift up by 1 so (-1) + * {@link Token#EOF} maps to {@code edges[0]}. + */ + + List edges; + + bool isAcceptState = false; + + /** if accept state, what ttype do we match or alt do we predict? + * This is set to {@link ATN#INVALID_ALT_NUMBER} when {@link #predicates}{@code !=null} or + * {@link #requiresFullContext}. + */ + int prediction = 0; + + LexerActionExecutor lexerActionExecutor; + + /** + * Indicates that this state was created during SLL prediction that + * discovered a conflict between the configurations in the state. Future + * {@link ParserATNSimulator#execATN} invocations immediately jumped doing + * full context prediction if this field is true. + */ + bool requiresFullContext = false; + + /** During SLL parsing, this is a list of predicates associated with the + * ATN configurations of the DFA state. When we have predicates, + * {@link #requiresFullContext} is [false] since full context prediction evaluates predicates + * on-the-fly. If this is not null, then {@link #prediction} is + * {@link ATN#INVALID_ALT_NUMBER}. + * + *

We only use these for non-{@link #requiresFullContext} but conflicting states. That + * means we know from the context (it's $ or we don't dip into outer + * context) that it's an ambiguity not a conflict.

+ * + *

This list is computed by {@link ParserATNSimulator#predicateDFAState}.

+ */ + + List predicates; + + DFAState({this.stateNumber, this.configs}); + + /** Get the set of all alts mentioned by all ATN configurations in this + * DFA state. + */ + Set get altSet { + Set alts = new Set(); + if (configs != null) { + for (ATNConfig c in configs) { + alts.add(c.alt); + } + } + if (alts.isEmpty) return null; + return alts; + } + + int get hashCode { + int hash = MurmurHash.initialize(7); + hash = MurmurHash.update(hash, configs.hashCode); + hash = MurmurHash.finish(hash, 1); + return hash; + } + + /** + * Two [DFAState] instances are equal if their ATN configuration sets + * are the same. This method is used to see if a state already exists. + * + *

Because the number of alternatives and number of ATN configurations are + * finite, there is a finite number of DFA states that can be processed. + * This is necessary to show that the algorithm terminates.

+ * + *

Cannot test the DFA state numbers here because in + * {@link ParserATNSimulator#addDFAState} we need to know if any other state + * exists that has this exact set of ATN configurations. The + * {@link #stateNumber} is irrelevant.

+ */ + + bool operator ==(Object o) { + // compare set of ATN configurations in this set with other + if (identical(this, o)) return true; + + if (!(o is DFAState)) { + return false; + } + + DFAState other = o; + // TODO (sam): what to do when configs==null? + bool sameSet = this.configs == other.configs; +// System.out.println("DFAState.equals: "+configs+(sameSet?"==":"!=")+other.configs); + return sameSet; + } + + String toString() { + StringBuffer buf = new StringBuffer(); + buf.write("$stateNumber:$configs"); + if (isAcceptState) { + buf.write("=>"); + if (predicates != null) { + buf.write(arrayToString(predicates)); + } else { + buf.write(prediction); + } + } + return buf.toString(); + } +} diff --git a/runtime/Dart/lib/src/error/error.dart b/runtime/Dart/lib/src/error/error.dart new file mode 100644 index 000000000..8b96c26b9 --- /dev/null +++ b/runtime/Dart/lib/src/error/error.dart @@ -0,0 +1,10 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +export 'src/diagnostic_error_listener.dart'; +export 'src/error_listener.dart'; +export 'src/error_strategy.dart'; +export 'src/errors.dart'; diff --git a/runtime/Dart/lib/src/error/src/diagnostic_error_listener.dart b/runtime/Dart/lib/src/error/src/diagnostic_error_listener.dart new file mode 100644 index 000000000..7ded001e4 --- /dev/null +++ b/runtime/Dart/lib/src/error/src/diagnostic_error_listener.dart @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../atn/atn.dart'; +import '../../dfa/dfa.dart'; +import '../../interval_set.dart'; +import '../../parser.dart'; +import '../../util/bit_set.dart'; +import 'error_listener.dart'; + +/** + * This implementation of [ANTLRErrorListener] can be used to identify + * certain potential correctness and performance problems in grammars. "Reports" + * are made by calling {@link Parser#notifyErrorListeners} with the appropriate + * message. + * + *
    + *
  • Ambiguities: These are cases where more than one path through the + * grammar can match the input.
  • + *
  • Weak context sensitivity: These are cases where full-context + * prediction resolved an SLL conflict to a unique alternative which equaled the + * minimum alternative of the SLL conflict.
  • + *
  • Strong (forced) context sensitivity: These are cases where the + * full-context prediction resolved an SLL conflict to a unique alternative, + * and the minimum alternative of the SLL conflict was found to not be + * a truly viable alternative. Two-stage parsing cannot be used for inputs where + * this situation occurs.
  • + *
+ */ +class DiagnosticErrorListener extends BaseErrorListener { + /** + * When [true], only exactly known ambiguities are reported. + */ + final bool exactOnly; + + /** + * Initializes a new instance of [DiagnosticErrorListener], specifying + * whether all ambiguities or only exact ambiguities are reported. + * + * @param exactOnly [true] to report only exact ambiguities, otherwise + * [false] to report all ambiguities. + */ + DiagnosticErrorListener([this.exactOnly = true]); + + void reportAmbiguity(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, bool exact, BitSet ambigAlts, ATNConfigSet configs) { + if (exactOnly && !exact) { + return; + } + + final decision = getDecisionDescription(recognizer, dfa); + final conflictingAlts = getConflictingAlts(ambigAlts, configs); + final text = + recognizer.tokenStream.getText(Interval.of(startIndex, stopIndex)); + final message = + "reportAmbiguity d=$decision: ambigAlts=$conflictingAlts, input='$text'"; + recognizer.notifyErrorListeners(message); + } + + void reportAttemptingFullContext(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, BitSet conflictingAlts, ATNConfigSet configs) { + final decision = getDecisionDescription(recognizer, dfa); + final text = + recognizer.tokenStream.getText(Interval.of(startIndex, stopIndex)); + final message = "reportAttemptingFullContext d=$decision, input='$text'"; + recognizer.notifyErrorListeners(message); + } + + void reportContextSensitivity(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, int prediction, ATNConfigSet configs) { + String decision = getDecisionDescription(recognizer, dfa); + String text = + recognizer.tokenStream.getText(Interval.of(startIndex, stopIndex)); + String message = "reportContextSensitivity d=$decision, input='$text'"; + recognizer.notifyErrorListeners(message); + } + + String getDecisionDescription(Parser recognizer, DFA dfa) { + int decision = dfa.decision; + int ruleIndex = dfa.atnStartState.ruleIndex; + + final ruleNames = recognizer.ruleNames; + if (ruleIndex < 0 || ruleIndex >= ruleNames.length) { + return decision.toString(); + } + + final ruleName = ruleNames[ruleIndex]; + if (ruleName == null || ruleName.isEmpty) { + return decision.toString(); + } + + return "$decision ($ruleName)"; + } + + /** + * Computes the set of conflicting or ambiguous alternatives from a + * configuration set, if that information was not already provided by the + * parser. + * + * @param reportedAlts The set of conflicting or ambiguous alternatives, as + * reported by the parser. + * @param configs The conflicting or ambiguous configuration set. + * @return Returns [reportedAlts] if it is not null, otherwise + * returns the set of alternatives represented in [configs]. + */ + BitSet getConflictingAlts(BitSet reportedAlts, ATNConfigSet configs) { + if (reportedAlts != null) { + return reportedAlts; + } + + BitSet result = new BitSet(); + for (ATNConfig config in configs) { + result.set(config.alt); + } + + return result; + } +} diff --git a/runtime/Dart/lib/src/error/src/error_listener.dart b/runtime/Dart/lib/src/error/src/error_listener.dart new file mode 100644 index 000000000..b3103a9e7 --- /dev/null +++ b/runtime/Dart/lib/src/error/src/error_listener.dart @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:io'; + +import '../../atn/atn.dart'; +import '../../dfa/dfa.dart'; +import '../../parser.dart'; +import '../../recognizer.dart'; +import '../../util/bit_set.dart'; +import 'errors.dart'; + +abstract class ErrorListener { + /** + * Upon syntax error, notify any interested parties. This is not how to + * recover from errors or compute error messages. [ANTLRErrorStrategy] + * specifies how to recover from syntax errors and how to compute error + * messages. This listener's job is simply to emit a computed message, + * though it has enough information to create its own message in many cases. + * + *

The [RecognitionException] is non-null for all syntax errors except + * when we discover mismatched token errors that we can recover from + * in-line, without returning from the surrounding rule (via the single + * token insertion and deletion mechanism).

+ * + * @param recognizer + * What parser got the error. From this + * object, you can access the context as well + * as the input stream. + * @param offendingSymbol + * The offending token in the input token + * stream, unless recognizer is a lexer (then it's null). If + * no viable alternative error, [e] has token at which we + * started production for the decision. + * @param line + * The line number in the input where the error occurred. + * @param charPositionInLine + * The character position within that line where the error occurred. + * @param msg + * The message to emit. + * @param e + * The exception generated by the parser that led to + * the reporting of an error. It is null in the case where + * the parser was able to recover in line without exiting the + * surrounding rule. + */ + void syntaxError(Recognizer recognizer, Object offendingSymbol, int line, + int charPositionInLine, String msg, RecognitionException e); + + /** + * This method is called by the parser when a full-context prediction + * results in an ambiguity. + * + *

Each full-context prediction which does not result in a syntax error + * will call either {@link #reportContextSensitivity} or + * {@link #reportAmbiguity}.

+ * + *

When [ambigAlts] is not null, it contains the set of potentially + * viable alternatives identified by the prediction algorithm. When + * [ambigAlts] is null, use {@link ATNConfigSet#getAlts} to obtain the + * represented alternatives from the [configs] argument.

+ * + *

When [exact] is [true], all of the potentially + * viable alternatives are truly viable, i.e. this is reporting an exact + * ambiguity. When [exact] is [false], at least two of + * the potentially viable alternatives are viable for the current input, but + * the prediction algorithm terminated as soon as it determined that at + * least the minimum potentially viable alternative is truly + * viable.

+ * + *

When the {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} prediction + * mode is used, the parser is required to identify exact ambiguities so + * [exact] will always be [true].

+ * + *

This method is not used by lexers.

+ * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input input where the ambiguity was identified + * @param exact [true] if the ambiguity is exactly known, otherwise + * [false]. This is always [true] when + * {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} is used. + * @param ambigAlts the potentially ambiguous alternatives, or null + * to indicate that the potentially ambiguous alternatives are the complete + * set of represented alternatives in [configs] + * @param configs the ATN configuration set where the ambiguity was + * identified + */ + void reportAmbiguity(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, bool exact, BitSet ambigAlts, ATNConfigSet configs); + + /** + * This method is called when an SLL conflict occurs and the parser is about + * to use the full context information to make an LL decision. + * + *

If one or more configurations in [configs] contains a semantic + * predicate, the predicates are evaluated before this method is called. The + * subset of alternatives which are still viable after predicates are + * evaluated is reported in [conflictingAlts].

+ * + *

This method is not used by lexers.

+ * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input index where the SLL conflict occurred + * @param conflictingAlts The specific conflicting alternatives. If this is + * null, the conflicting alternatives are all alternatives + * represented in [configs]. At the moment, conflictingAlts is non-null + * (for the reference implementation, but Sam's optimized version can see this + * as null). + * @param configs the ATN configuration set where the SLL conflict was + * detected + */ + void reportAttemptingFullContext(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, BitSet conflictingAlts, ATNConfigSet configs); + + /** + * This method is called by the parser when a full-context prediction has a + * unique result. + * + *

Each full-context prediction which does not result in a syntax error + * will call either {@link #reportContextSensitivity} or + * {@link #reportAmbiguity}.

+ * + *

For prediction implementations that only evaluate full-context + * predictions when an SLL conflict is found (including the default + * [ParserATNSimulator] implementation), this method reports cases + * where SLL conflicts were resolved to unique full-context predictions, + * i.e. the decision was context-sensitive. This report does not necessarily + * indicate a problem, and it may appear even in completely unambiguous + * grammars.

+ * + *

[configs] may have more than one represented alternative if the + * full-context prediction algorithm does not evaluate predicates before + * beginning the full-context prediction. In all cases, the final prediction + * is passed as the [prediction] argument.

+ * + *

Note that the definition of "context sensitivity" in this method + * differs from the concept in {@link DecisionInfo#contextSensitivities}. + * This method reports all instances where an SLL conflict occurred but LL + * parsing produced a unique result, whether or not that unique result + * matches the minimum alternative in the SLL conflicting set.

+ * + *

This method is not used by lexers.

+ * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input index where the context sensitivity was + * finally determined + * @param prediction the unambiguous result of the full-context prediction + * @param configs the ATN configuration set where the unambiguous prediction + * was determined + */ + void reportContextSensitivity(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, int prediction, ATNConfigSet configs); +} + +class BaseErrorListener extends ErrorListener { + @override + void reportAmbiguity(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, bool exact, BitSet ambigAlts, ATNConfigSet configs) {} + + @override + void reportAttemptingFullContext(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, BitSet conflictingAlts, ATNConfigSet configs) {} + + @override + void reportContextSensitivity(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, int prediction, ATNConfigSet configs) {} + + @override + void syntaxError(Recognizer recognizer, Object offendingSymbol, + int line, int charPositionInLine, String msg, RecognitionException e) {} +} + +class ConsoleErrorListener extends BaseErrorListener { + /** + * Provides a default instance of [ConsoleErrorListener]. + */ + static final INSTANCE = ConsoleErrorListener(); + + /** + * {@inheritDoc} + * + *

+ * This implementation prints messages to {@link System//err} containing the + * values of [line], [charPositionInLine], and [msg] using + * the following format.

+ * + *
+   * line line:charPositionInLine msg
+   * 
+ */ + syntaxError(recognizer, offendingSymbol, line, column, msg, e) { + stderr.writeln("line $line:$column $msg"); + } +} + +/** + * This implementation of [ErrorListener] dispatches all calls to a + * collection of delegate listeners. This reduces the effort required to support multiple + * listeners. + */ +class ProxyErrorListener implements ErrorListener { + final List delegates; + + ProxyErrorListener(this.delegates) { + if (delegates == null) { + throw new ArgumentError.notNull("delegates"); + } + } + + void syntaxError(Recognizer recognizer, Object offendingSymbol, int line, + int charPositionInLine, String msg, RecognitionException e) { + for (final listener in delegates) { + listener.syntaxError( + recognizer, offendingSymbol, line, charPositionInLine, msg, e); + } + } + + void reportAmbiguity(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, bool exact, BitSet ambigAlts, ATNConfigSet configs) { + for (final listener in delegates) { + listener.reportAmbiguity( + recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs); + } + } + + void reportAttemptingFullContext(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, BitSet conflictingAlts, ATNConfigSet configs) { + for (final listener in delegates) { + listener.reportAttemptingFullContext( + recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs); + } + } + + void reportContextSensitivity(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, int prediction, ATNConfigSet configs) { + for (final listener in delegates) { + listener.reportContextSensitivity( + recognizer, dfa, startIndex, stopIndex, prediction, configs); + } + } +} diff --git a/runtime/Dart/lib/src/error/src/error_strategy.dart b/runtime/Dart/lib/src/error/src/error_strategy.dart new file mode 100644 index 000000000..c9d06dff7 --- /dev/null +++ b/runtime/Dart/lib/src/error/src/error_strategy.dart @@ -0,0 +1,955 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:developer'; + +import 'package:logging/logging.dart'; + +import '../../atn/atn.dart'; +import '../../interval_set.dart'; +import '../../misc/pair.dart'; +import '../../parser.dart'; +import '../../parser_rule_context.dart'; +import '../../rule_context.dart'; +import '../../token.dart'; +import '../../token_stream.dart'; +import '../../tree/tree.dart'; +import 'errors.dart'; + +/** + * The interface for defining strategies to deal with syntax errors encountered + * during a parse by ANTLR-generated parsers. We distinguish between three + * different kinds of errors: + * + *
    + *
  • The parser could not figure out which path to take in the ATN (none of + * the available alternatives could possibly match)
  • + *
  • The current input does not match what we were looking for
  • + *
  • A predicate evaluated to false
  • + *
+ * + * Implementations of this interface report syntax errors by calling + * {@link Parser#notifyErrorListeners}. + * + *

TODO: what to do about lexers

+ */ +abstract class ErrorStrategy { + /** + * Reset the error handler state for the specified [recognizer]. + * @param recognizer the parser instance + */ + void reset(Parser recognizer); + + /** + * This method is called when an unexpected symbol is encountered during an + * inline match operation, such as {@link Parser#match}. If the error + * strategy successfully recovers from the match failure, this method + * returns the [Token] instance which should be treated as the + * successful result of the match. + * + *

This method handles the consumption of any tokens - the caller should + * not call {@link Parser#consume} after a successful recovery.

+ * + *

Note that the calling code will not report an error if this method + * returns successfully. The error strategy implementation is responsible + * for calling {@link Parser#notifyErrorListeners} as appropriate.

+ * + * @param recognizer the parser instance + * @ if the error strategy was not able to + * recover from the unexpected input symbol + */ + Token recoverInline(Parser recognizer); + + /** + * This method is called to recover from exception [e]. This method is + * called after {@link #reportError} by the default exception handler + * generated for a rule method. + * + * @see #reportError + * + * @param recognizer the parser instance + * @param e the recognition exception to recover from + * @ if the error strategy could not recover from + * the recognition exception + */ + void recover(Parser recognizer, RecognitionException e); + + /** + * This method provides the error handler with an opportunity to handle + * syntactic or semantic errors in the input stream before they result in a + * [RecognitionException]. + * + *

The generated code currently contains calls to {@link #sync} after + * entering the decision state of a closure block ({@code (...)*} or + * {@code (...)+}).

+ * + *

For an implementation based on Jim Idle's "magic sync" mechanism, see + * {@link DefaultErrorStrategy#sync}.

+ * + * @see DefaultErrorStrategy#sync + * + * @param recognizer the parser instance + * @ if an error is detected by the error + * strategy but cannot be automatically recovered at the current state in + * the parsing process + */ + void sync(Parser recognizer); + + /** + * Tests whether or not [recognizer] is in the process of recovering + * from an error. In error recovery mode, {@link Parser#consume} adds + * symbols to the parse tree by calling + * {@link Parser#createErrorNode(ParserRuleContext, Token)} then + * {@link ParserRuleContext#addErrorNode(ErrorNode)} instead of + * {@link Parser#createTerminalNode(ParserRuleContext, Token)}. + * + * @param recognizer the parser instance + * @return [true] if the parser is currently recovering from a parse + * error, otherwise [false] + */ + bool inErrorRecoveryMode(Parser recognizer); + + /** + * This method is called by when the parser successfully matches an input + * symbol. + * + * @param recognizer the parser instance + */ + void reportMatch(Parser recognizer); + + /** + * Report any kind of [RecognitionException]. This method is called by + * the default exception handler generated for a rule method. + * + * @param recognizer the parser instance + * @param e the recognition exception to report + */ + void reportError(Parser recognizer, RecognitionException e); +} + +/** + * This is the default implementation of [ANTLRErrorStrategy] used for + * error reporting and recovery in ANTLR parsers. + */ +class DefaultErrorStrategy implements ErrorStrategy { + /** + * Indicates whether the error strategy is currently "recovering from an + * error". This is used to suppress reporting multiple error messages while + * attempting to recover from a detected syntax error. + * + * @see #inErrorRecoveryMode + */ + bool errorRecoveryMode = false; + + /** The index into the input stream where the last error occurred. + * This is used to prevent infinite loops where an error is found + * but no token is consumed during recovery...another error is found, + * ad nauseum. This is a failsafe mechanism to guarantee that at least + * one token/tree node is consumed for two errors. + */ + int lastErrorIndex = -1; + + IntervalSet lastErrorStates; + + /** + * This field is used to propagate information about the lookahead following + * the previous match. Since prediction prefers completing the current rule + * to error recovery efforts, error reporting may occur later than the + * original point where it was discoverable. The original context is used to + * compute the true expected sets as though the reporting occurred as early + * as possible. + */ + ParserRuleContext nextTokensContext; + + /** + * @see #nextTokensContext + */ + int nextTokensState; + + /** + * {@inheritDoc} + * + *

The default implementation simply calls {@link #endErrorCondition} to + * ensure that the handler is not in error recovery mode.

+ */ + + void reset(Parser recognizer) { + endErrorCondition(recognizer); + } + + /** + * This method is called to enter error recovery mode when a recognition + * exception is reported. + * + * @param recognizer the parser instance + */ + void beginErrorCondition(Parser recognizer) { + errorRecoveryMode = true; + } + + /** + * {@inheritDoc} + */ + + bool inErrorRecoveryMode(Parser recognizer) { + return errorRecoveryMode; + } + + /** + * This method is called to leave error recovery mode after recovering from + * a recognition exception. + * + * @param recognizer + */ + void endErrorCondition(Parser recognizer) { + errorRecoveryMode = false; + lastErrorStates = null; + lastErrorIndex = -1; + } + + /** + * {@inheritDoc} + * + *

The default implementation simply calls {@link #endErrorCondition}.

+ */ + + void reportMatch(Parser recognizer) { + endErrorCondition(recognizer); + } + + /** + * {@inheritDoc} + * + *

The default implementation returns immediately if the handler is already + * in error recovery mode. Otherwise, it calls {@link #beginErrorCondition} + * and dispatches the reporting task based on the runtime type of [e] + * according to the following table.

+ * + *
    + *
  • [NoViableAltException]: Dispatches the call to + * {@link #reportNoViableAlternative}
  • + *
  • [InputMismatchException]: Dispatches the call to + * {@link #reportInputMismatch}
  • + *
  • [FailedPredicateException]: Dispatches the call to + * {@link #reportFailedPredicate}
  • + *
  • All other types: calls {@link Parser#notifyErrorListeners} to report + * the exception
  • + *
+ */ + + void reportError(Parser recognizer, RecognitionException e) { + // if we've already reported an error and have not matched a token + // yet successfully, don't report any errors. + if (inErrorRecoveryMode(recognizer)) { +// System.err.print("[SPURIOUS] "); + return; // don't report spurious errors + } + beginErrorCondition(recognizer); + if (e is NoViableAltException) { + reportNoViableAlternative(recognizer, e); + } else if (e is InputMismatchException) { + reportInputMismatch(recognizer, e); + } else if (e is FailedPredicateException) { + reportFailedPredicate(recognizer, e); + } else { + log("unknown recognition error type: ${e.runtimeType}", + level: Level.SEVERE.value); + recognizer.notifyErrorListeners(e.message, e.offendingToken, e); + } + } + + /** + * {@inheritDoc} + * + *

The default implementation resynchronizes the parser by consuming tokens + * until we find one in the resynchronization set--loosely the set of tokens + * that can follow the current rule.

+ */ + + void recover(Parser recognizer, RecognitionException e) { +// System.out.println("recover in "+recognizer.getRuleInvocationStack()+ +// " index="+recognizer.inputStream.index()+ +// ", lastErrorIndex="+ +// lastErrorIndex+ +// ", states="+lastErrorStates); + if (lastErrorIndex == recognizer.inputStream.index && + lastErrorStates != null && + lastErrorStates.contains(recognizer.state)) { + // uh oh, another error at same token index and previously-visited + // state in ATN; must be a case where LT(1) is in the recovery + // token set so nothing got consumed. Consume a single token + // at least to prevent an infinite loop; this is a failsafe. +// log("seen error condition before index=, level: Level.SEVERE.value"+ +// lastErrorIndex+", states="+lastErrorStates); +// log("FAILSAFE consumes "+recognizer.getTokenNames()[recognizer.inputStream.LA(1)], level: Level.SEVERE.value); + recognizer.consume(); + } + lastErrorIndex = recognizer.inputStream.index; + if (lastErrorStates == null) lastErrorStates = new IntervalSet(); + lastErrorStates.addOne(recognizer.state); + IntervalSet followSet = getErrorRecoverySet(recognizer); + consumeUntil(recognizer, followSet); + } + + /** + * The default implementation of {@link ANTLRErrorStrategy#sync} makes sure + * that the current lookahead symbol is consistent with what were expecting + * at this point in the ATN. You can call this anytime but ANTLR only + * generates code to check before subrules/loops and each iteration. + * + *

Implements Jim Idle's magic sync mechanism in closures and optional + * subrules. E.g.,

+ * + *
+   * a : sync ( stuff sync )* ;
+   * sync : {consume to what can follow sync} ;
+   * 
+ * + * At the start of a sub rule upon error, {@link #sync} performs single + * token deletion, if possible. If it can't do that, it bails on the current + * rule and uses the default error recovery, which consumes until the + * resynchronization set of the current rule. + * + *

If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block + * with an empty alternative), then the expected set includes what follows + * the subrule.

+ * + *

During loop iteration, it consumes until it sees a token that can start a + * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to + * stay in the loop as long as possible.

+ * + *

ORIGINS

+ * + *

Previous versions of ANTLR did a poor job of their recovery within loops. + * A single mismatch token or missing token would force the parser to bail + * out of the entire rules surrounding the loop. So, for rule

+ * + *
+   * classDef : 'class' ID '{' member* '}'
+   * 
+ * + * input with an extra token between members would force the parser to + * consume until it found the next class definition rather than the next + * member definition of the current class. + * + *

This functionality cost a little bit of effort because the parser has to + * compare token set at the start of the loop and at each iteration. If for + * some reason speed is suffering for you, you can turn off this + * functionality by simply overriding this method as a blank { }.

+ */ + + void sync(Parser recognizer) { + ATNState s = recognizer.interpreter.atn.states[recognizer.state]; +// log("sync @ "+s.stateNumber+"="+s.getClass().getSimpleName(), level: Level.SEVERE.value); + // If already recovering, don't try to sync + if (inErrorRecoveryMode(recognizer)) { + return; + } + + TokenStream tokens = recognizer.inputStream; + int la = tokens.LA(1); + + // try cheaper subset first; might get lucky. seems to shave a wee bit off + IntervalSet nextTokens = recognizer.getATN().nextTokens(s); + if (nextTokens.contains(la)) { + // We are sure the token matches + nextTokensContext = null; + nextTokensState = ATNState.INVALID_STATE_NUMBER; + return; + } + + if (nextTokens.contains(Token.EPSILON)) { + if (nextTokensContext == null) { + // It's possible the next token won't match; information tracked + // by sync is restricted for performance. + nextTokensContext = recognizer.context; + nextTokensState = recognizer.state; + } + return; + } + + switch (s.stateType) { + case StateType.BLOCK_START: + case StateType.STAR_BLOCK_START: + case StateType.PLUS_BLOCK_START: + case StateType.STAR_LOOP_ENTRY: + // report error and recover if possible + if (singleTokenDeletion(recognizer) != null) { + return; + } + + throw new InputMismatchException(recognizer); + + case StateType.PLUS_LOOP_BACK: + case StateType.STAR_LOOP_BACK: +// log("at loop back: "+s.getClass().getSimpleName(), level: Level.SEVERE.value); + reportUnwantedToken(recognizer); + IntervalSet expecting = recognizer.expectedTokens; + IntervalSet whatFollowsLoopIterationOrRule = + expecting | getErrorRecoverySet(recognizer); + consumeUntil(recognizer, whatFollowsLoopIterationOrRule); + break; + + default: + // do nothing if we can't identify the exact kind of ATN state + break; + } + } + + /** + * This is called by {@link #reportError} when the exception is a + * [NoViableAltException]. + * + * @see #reportError + * + * @param recognizer the parser instance + * @param e the recognition exception + */ + void reportNoViableAlternative(Parser recognizer, NoViableAltException e) { + TokenStream tokens = recognizer.inputStream; + String input; + if (tokens != null) { + if (e.startToken.type == Token.EOF) + input = ""; + else + input = tokens.getTextRange(e.startToken, e.offendingToken); + } else { + input = ""; + } + String msg = "no viable alternative at input " + escapeWSAndQuote(input); + recognizer.notifyErrorListeners(msg, e.offendingToken, e); + } + + /** + * This is called by {@link #reportError} when the exception is an + * [InputMismatchException]. + * + * @see #reportError + * + * @param recognizer the parser instance + * @param e the recognition exception + */ + void reportInputMismatch(Parser recognizer, InputMismatchException e) { + String msg = "mismatched input " + + getTokenErrorDisplay(e.offendingToken) + + " expecting " + + e.expectedTokens.toString(vocabulary: recognizer.vocabulary); + recognizer.notifyErrorListeners(msg, e.offendingToken, e); + } + + /** + * This is called by {@link #reportError} when the exception is a + * [FailedPredicateException]. + * + * @see #reportError + * + * @param recognizer the parser instance + * @param e the recognition exception + */ + void reportFailedPredicate(Parser recognizer, FailedPredicateException e) { + String ruleName = + recognizer.ruleNames[recognizer.context.ruleIndex]; + String msg = "rule " + ruleName + " " + e.message; + recognizer.notifyErrorListeners(msg, e.offendingToken, e); + } + + /** + * This method is called to report a syntax error which requires the removal + * of a token from the input stream. At the time this method is called, the + * erroneous symbol is current {@code LT(1)} symbol and has not yet been + * removed from the input stream. When this method returns, + * [recognizer] is in error recovery mode. + * + *

This method is called when {@link #singleTokenDeletion} identifies + * single-token deletion as a viable recovery strategy for a mismatched + * input error.

+ * + *

The default implementation simply returns if the handler is already in + * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to + * enter error recovery mode, followed by calling + * {@link Parser#notifyErrorListeners}.

+ * + * @param recognizer the parser instance + */ + void reportUnwantedToken(Parser recognizer) { + if (inErrorRecoveryMode(recognizer)) { + return; + } + + beginErrorCondition(recognizer); + + Token t = recognizer.currentToken; + String tokenName = getTokenErrorDisplay(t); + IntervalSet expecting = getExpectedTokens(recognizer); + String msg = "extraneous input " + + tokenName + + " expecting " + + expecting.toString(vocabulary: recognizer.vocabulary); + recognizer.notifyErrorListeners(msg, t, null); + } + + /** + * This method is called to report a syntax error which requires the + * insertion of a missing token into the input stream. At the time this + * method is called, the missing token has not yet been inserted. When this + * method returns, [recognizer] is in error recovery mode. + * + *

This method is called when {@link #singleTokenInsertion} identifies + * single-token insertion as a viable recovery strategy for a mismatched + * input error.

+ * + *

The default implementation simply returns if the handler is already in + * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to + * enter error recovery mode, followed by calling + * {@link Parser#notifyErrorListeners}.

+ * + * @param recognizer the parser instance + */ + void reportMissingToken(Parser recognizer) { + if (inErrorRecoveryMode(recognizer)) { + return; + } + + beginErrorCondition(recognizer); + + Token t = recognizer.currentToken; + IntervalSet expecting = getExpectedTokens(recognizer); + String msg = "missing " + + expecting.toString(vocabulary: recognizer.vocabulary) + + " at " + + getTokenErrorDisplay(t); + + recognizer.notifyErrorListeners(msg, t, null); + } + + /** + * {@inheritDoc} + * + *

The default implementation attempts to recover from the mismatched input + * by using single token insertion and deletion as described below. If the + * recovery attempt fails, this method throws an + * [InputMismatchException].

+ * + *

EXTRA TOKEN (single token deletion)

+ * + *

{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the + * right token, however, then assume {@code LA(1)} is some extra spurious + * token and delete it. Then consume and return the next token (which was + * the {@code LA(2)} token) as the successful result of the match operation.

+ * + *

This recovery strategy is implemented by {@link #singleTokenDeletion}.

+ * + *

MISSING TOKEN (single token insertion)

+ * + *

If current token (at {@code LA(1)}) is consistent with what could come + * after the expected {@code LA(1)} token, then assume the token is missing + * and use the parser's [TokenFactory] to create it on the fly. The + * "insertion" is performed by returning the created token as the successful + * result of the match operation.

+ * + *

This recovery strategy is implemented by {@link #singleTokenInsertion}.

+ * + *

EXAMPLE

+ * + *

For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When + * the parser returns from the nested call to [expr], it will have + * call chain:

+ * + *
+   * stat → expr → atom
+   * 
+ * + * and it will be trying to match the {@code ')'} at this point in the + * derivation: + * + *
+   * => ID '=' '(' INT ')' ('+' atom)* ';'
+   *                    ^
+   * 
+ * + * The attempt to match {@code ')'} will fail when it sees {@code ';'} and + * call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'} + * is in the set of tokens that can follow the {@code ')'} token reference + * in rule [atom]. It can assume that you forgot the {@code ')'}. + */ + + Token recoverInline(Parser recognizer) { +// SINGLE TOKEN DELETION + Token matchedSymbol = singleTokenDeletion(recognizer); + if (matchedSymbol != null) { +// we have deleted the extra token. +// now, move past ttype token as if all were ok + recognizer.consume(); + return matchedSymbol; + } + +// SINGLE TOKEN INSERTION + if (singleTokenInsertion(recognizer)) { + return getMissingSymbol(recognizer); + } + +// even that didn't work; must throw the exception + InputMismatchException e; + if (nextTokensContext == null) { + e = new InputMismatchException(recognizer); + } else { + e = new InputMismatchException( + recognizer, nextTokensState, nextTokensContext); + } + + throw e; + } + + /** + * This method implements the single-token insertion inline error recovery + * strategy. It is called by {@link #recoverInline} if the single-token + * deletion strategy fails to recover from the mismatched input. If this + * method returns [true], [recognizer] will be in error recovery + * mode. + * + *

This method determines whether or not single-token insertion is viable by + * checking if the {@code LA(1)} input symbol could be successfully matched + * if it were instead the {@code LA(2)} symbol. If this method returns + * [true], the caller is responsible for creating and inserting a + * token with the correct type to produce this behavior.

+ * + * @param recognizer the parser instance + * @return [true] if single-token insertion is a viable recovery + * strategy for the current mismatched input, otherwise [false] + */ + bool singleTokenInsertion(Parser recognizer) { + int currentSymbolType = recognizer.inputStream.LA(1); + // if current token is consistent with what could come after current + // ATN state, then we know we're missing a token; error recovery + // is free to conjure up and insert the missing token + ATNState currentState = + recognizer.interpreter.atn.states[recognizer.state]; + ATNState next = currentState.transition(0).target; + ATN atn = recognizer.interpreter.atn; + IntervalSet expectingAtLL2 = atn.nextTokens(next, recognizer.context); +// System.out.println("LT(2) set="+expectingAtLL2.toString(recognizer.getTokenNames())); + if (expectingAtLL2.contains(currentSymbolType)) { + reportMissingToken(recognizer); + return true; + } + return false; + } + + /** + * This method implements the single-token deletion inline error recovery + * strategy. It is called by {@link #recoverInline} to attempt to recover + * from mismatched input. If this method returns null, the parser and error + * handler state will not have changed. If this method returns non-null, + * [recognizer] will not be in error recovery mode since the + * returned token was a successful match. + * + *

If the single-token deletion is successful, this method calls + * {@link #reportUnwantedToken} to report the error, followed by + * {@link Parser#consume} to actually "delete" the extraneous token. Then, + * before returning {@link #reportMatch} is called to signal a successful + * match.

+ * + * @param recognizer the parser instance + * @return the successfully matched [Token] instance if single-token + * deletion successfully recovers from the mismatched input, otherwise + * null + */ + Token singleTokenDeletion(Parser recognizer) { + int nextTokenType = recognizer.inputStream.LA(2); + IntervalSet expecting = getExpectedTokens(recognizer); + if (expecting.contains(nextTokenType)) { + reportUnwantedToken(recognizer); + /* + log("recoverFromMismatchedToken deleting , level: Level.SEVERE.value"+ + ((TokenStream)recognizer.inputStream).LT(1)+ + " since "+((TokenStream)recognizer.inputStream).LT(2)+ + " is what we want"); + */ + recognizer.consume(); // simply delete extra token + // we want to return the token we're actually matching + Token matchedSymbol = recognizer.currentToken; + reportMatch(recognizer); // we know current token is correct + return matchedSymbol; + } + return null; + } + + /** Conjure up a missing token during error recovery. + * + * The recognizer attempts to recover from single missing + * symbols. But, actions might refer to that missing symbol. + * For example, x=ID {f($x);}. The action clearly assumes + * that there has been an identifier matched previously and that + * $x points at that token. If that token is missing, but + * the next token in the stream is what we want we assume that + * this token is missing and we keep going. Because we + * have to return some token to replace the missing token, + * we have to conjure one up. This method gives the user control + * over the tokens returned for missing tokens. Mostly, + * you will want to create something special for identifier + * tokens. For literals such as '{' and ',', the default + * action in the parser or tree parser works. It simply creates + * a CommonToken of the appropriate type. The text will be the token. + * If you change what tokens must be created by the lexer, + * override this method to create the appropriate tokens. + */ + Token getMissingSymbol(Parser recognizer) { + Token currentSymbol = recognizer.currentToken; + IntervalSet expecting = getExpectedTokens(recognizer); + int expectedTokenType = Token.INVALID_TYPE; + if (!expecting.isNil) { + expectedTokenType = expecting.minElement; // get any element + } + String tokenText; + if (expectedTokenType == Token.EOF) + tokenText = ""; + else + tokenText = ""; + Token current = currentSymbol; + Token lookback = recognizer.inputStream.LT(-1); + if (current.type == Token.EOF && lookback != null) { + current = lookback; + } + return recognizer.tokenFactory.create( + expectedTokenType, + tokenText, + Pair(current.tokenSource, current.tokenSource.inputStream), + Token.DEFAULT_CHANNEL, + -1, + -1, + current.line, + current.charPositionInLine); + } + + IntervalSet getExpectedTokens(Parser recognizer) { + return recognizer.expectedTokens; + } + + /** How should a token be displayed in an error message? The default + * is to display just the text, but during development you might + * want to have a lot of information spit out. Override in that case + * to use t.toString() (which, for CommonToken, dumps everything about + * the token). This is better than forcing you to override a method in + * your token objects because you don't have to go modify your lexer + * so that it creates a new Java type. + */ + String getTokenErrorDisplay(Token t) { + if (t == null) return ""; + String s = getSymbolText(t); + if (s == null) { + if (getSymbolType(t) == Token.EOF) { + s = ""; + } else { + s = "<${getSymbolType(t)}>"; + } + } + return escapeWSAndQuote(s); + } + + String getSymbolText(Token symbol) { + return symbol.text; + } + + int getSymbolType(Token symbol) { + return symbol.type; + } + + String escapeWSAndQuote(String s) { +// if ( s==null ) return s; + s = s.replaceAll("\n", r"\n"); + s = s.replaceAll("\r", r"\r"); + s = s.replaceAll("\t", r"\t"); + return "'" + s + "'"; + } + +/* Compute the error recovery set for the current rule. During + * rule invocation, the parser pushes the set of tokens that can + * follow that rule reference on the stack; this amounts to + * computing FIRST of what follows the rule reference in the + * enclosing rule. See LinearApproximator.FIRST(). + * This local follow set only includes tokens + * from within the rule; i.e., the FIRST computation done by + * ANTLR stops at the end of a rule. + * + * EXAMPLE + * + * When you find a "no viable alt exception", the input is not + * consistent with any of the alternatives for rule r. The best + * thing to do is to consume tokens until you see something that + * can legally follow a call to r *or* any rule that called r. + * You don't want the exact set of viable next tokens because the + * input might just be missing a token--you might consume the + * rest of the input looking for one of the missing tokens. + * + * Consider grammar: + * + * a : '[' b ']' + * | '(' b ')' + * ; + * b : c '^' INT ; + * c : ID + * | INT + * ; + * + * At each rule invocation, the set of tokens that could follow + * that rule is pushed on a stack. Here are the various + * context-sensitive follow sets: + * + * FOLLOW(b1_in_a) = FIRST(']') = ']' + * FOLLOW(b2_in_a) = FIRST(')') = ')' + * FOLLOW(c_in_b) = FIRST('^') = '^' + * + * Upon erroneous input "[]", the call chain is + * + * a -> b -> c + * + * and, hence, the follow context stack is: + * + * depth follow set start of rule execution + * 0 a (from main()) + * 1 ']' b + * 2 '^' c + * + * Notice that ')' is not included, because b would have to have + * been called from a different context in rule a for ')' to be + * included. + * + * For error recovery, we cannot consider FOLLOW(c) + * (context-sensitive or otherwise). We need the combined set of + * all context-sensitive FOLLOW sets--the set of all tokens that + * could follow any reference in the call chain. We need to + * resync to one of those tokens. Note that FOLLOW(c)='^' and if + * we resync'd to that token, we'd consume until EOF. We need to + * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. + * In this case, for input "[]", LA(1) is ']' and in the set, so we would + * not consume anything. After printing an error, rule c would + * return normally. Rule b would not find the required '^' though. + * At this point, it gets a mismatched token error and throws an + * exception (since LA(1) is not in the viable following token + * set). The rule exception handler tries to recover, but finds + * the same recovery set and doesn't consume anything. Rule b + * exits normally returning to rule a. Now it finds the ']' (and + * with the successful match exits errorRecovery mode). + * + * So, you can see that the parser walks up the call chain looking + * for the token that was a member of the recovery set. + * + * Errors are not generated in errorRecovery mode. + * + * ANTLR's error recovery mechanism is based upon original ideas: + * + * "Algorithms + Data Structures = Programs" by Niklaus Wirth + * + * and + * + * "A note on error recovery in recursive descent parsers": + * http://portal.acm.org/citation.cfm?id=947902.947905 + * + * Later, Josef Grosch had some good ideas: + * + * "Efficient and Comfortable Error Recovery in Recursive Descent + * Parsers": + * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip + * + * Like Grosch I implement context-sensitive FOLLOW sets that are combined + * at run-time upon error to avoid overhead during parsing. + */ + IntervalSet getErrorRecoverySet(Parser recognizer) { + ATN atn = recognizer.interpreter.atn; + RuleContext ctx = recognizer.context; + IntervalSet recoverSet = new IntervalSet(); + while (ctx != null && ctx.invokingState >= 0) { + // compute what follows who invoked us + ATNState invokingState = atn.states[ctx.invokingState]; + RuleTransition rt = invokingState.transition(0); + IntervalSet follow = atn.nextTokens(rt.followState); + recoverSet.addAll(follow); + ctx = ctx.parent; + } + recoverSet.remove(Token.EPSILON); +// System.out.println("recover set "+recoverSet.toString(recognizer.getTokenNames())); + return recoverSet; + } + + /** Consume tokens until one matches the given token set. */ + void consumeUntil(Parser recognizer, IntervalSet set) { +// log("consumeUntil("+set.toString(recognizer.getTokenNames())+")", level: Level.SEVERE.value); + int ttype = recognizer.inputStream.LA(1); + while (ttype != Token.EOF && !set.contains(ttype)) { + //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]); +// recognizer.inputStream.consume(); + recognizer.consume(); + ttype = recognizer.inputStream.LA(1); + } + } +} + +/** + * This implementation of [ANTLRErrorStrategy] responds to syntax errors + * by immediately canceling the parse operation with a + * [ParseCancellationException]. The implementation ensures that the + * {@link ParserRuleContext#exception} field is set for all parse tree nodes + * that were not completed prior to encountering the error. + * + *

+ * This error strategy is useful in the following scenarios.

+ * + *
    + *
  • Two-stage parsing: This error strategy allows the first + * stage of two-stage parsing to immediately terminate if an error is + * encountered, and immediately fall back to the second stage. In addition to + * avoiding wasted work by attempting to recover from errors here, the empty + * implementation of {@link BailErrorStrategy#sync} improves the performance of + * the first stage.
  • + *
  • Silent validation: When syntax errors are not being + * reported or logged, and the parse result is simply ignored if errors occur, + * the [BailErrorStrategy] avoids wasting work on recovering from errors + * when the result will be ignored either way.
  • + *
+ * + *

+ * {@code myparser.setErrorHandler(new BailErrorStrategy());}

+ * + * @see Parser#setErrorHandler(ANTLRErrorStrategy) + */ +class BailErrorStrategy extends DefaultErrorStrategy { + /** Instead of recovering from exception [e], re-throw it wrapped + * in a [ParseCancellationException] so it is not caught by the + * rule function catches. Use {@link Exception#getCause()} to get the + * original [RecognitionException]. + */ + + void recover(Parser recognizer, RecognitionException e) { + for (ParserRuleContext context = recognizer.context; + context != null; + context = context.parent) { + context.exception = e; + } + + throw new ParseCancellationException(e.message); + } + + /** Make sure we don't attempt to recover inline; if the parser + * successfully recovers, it won't throw an exception. + */ + + Token recoverInline(Parser recognizer) { + InputMismatchException e = new InputMismatchException(recognizer); + for (ParserRuleContext context = recognizer.context; + context != null; + context = context.parent) { + context.exception = e; + } + + throw new ParseCancellationException(e.message); + } + + /** Make sure we don't attempt to recover from problems in subrules. */ + + void sync(Parser recognizer) {} +} diff --git a/runtime/Dart/lib/src/error/src/errors.dart b/runtime/Dart/lib/src/error/src/errors.dart new file mode 100644 index 000000000..3acde4b22 --- /dev/null +++ b/runtime/Dart/lib/src/error/src/errors.dart @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../atn/atn.dart'; +import '../../input_stream.dart'; +import '../../interval_set.dart'; +import '../../lexer.dart'; +import '../../parser.dart'; +import '../../parser_rule_context.dart'; +import '../../recognizer.dart'; +import '../../rule_context.dart'; +import '../../token.dart'; +import '../../token_stream.dart'; +import '../../util/utils.dart'; + +/** The root of the ANTLR exception hierarchy. In general, ANTLR tracks just + * 3 kinds of errors: prediction errors, failed predicate errors, and + * mismatched input errors. In each case, the parser knows where it is + * in the input, where it is in the ATN, the rule invocation stack, + * and what kind of problem occurred. + */ +class RecognitionException extends StateError { + /** + * Gets the [Recognizer] where this exception occurred. + * + *

If the recognizer is not available, this method returns null.

+ * + * @return The recognizer where this exception occurred, or null if + * the recognizer is not available. + */ + final Recognizer recognizer; + + /** + * Gets the [RuleContext] at the time this exception was thrown. + * + *

If the context is not available, this method returns null.

+ * + * @return The [RuleContext] at the time this exception was thrown. + * If the context is not available, this method returns null. + */ + final RuleContext ctx; + + /** + * Gets the input stream which is the symbol source for the recognizer where + * this exception was thrown. + * + *

If the input stream is not available, this method returns null.

+ * + * @return The input stream which is the symbol source for the recognizer + * where this exception was thrown, or null if the stream is not + * available. + */ + final IntStream inputStream; + + /** + * The current [Token] when an error occurred. Since not all streams + * support accessing symbols by index, we have to track the [Token] + * instance itself. + */ + Token offendingToken; + + /** + * Get the ATN state number the parser was in at the time the error + * occurred. For [NoViableAltException] and + * [LexerNoViableAltException] exceptions, this is the + * [DecisionState] number. For others, it is the state whose outgoing + * edge we couldn't match. + * + *

If the state number is not known, this method returns -1.

+ */ + int offendingState = -1; + + RecognitionException(this.recognizer, this.inputStream, this.ctx, + [String message = ""]) + : super(message) { + if (recognizer != null) this.offendingState = recognizer.state; + } + + /** + * Gets the set of input symbols which could potentially follow the + * previously matched symbol at the time this exception was thrown. + * + *

If the set of expected tokens is not known and could not be computed, + * this method returns null.

+ * + * @return The set of token types that could potentially follow the current + * state in the ATN, or null if the information is not available. + */ + IntervalSet get expectedTokens { + if (recognizer != null) { + return recognizer.getATN().getExpectedTokens(offendingState, ctx); + } + return null; + } +} + +class LexerNoViableAltException extends RecognitionException { + /** Matching attempted at what input index? */ + final int startIndex; + + /** Which configurations did we try at input.index() that couldn't match input.LA(1)? */ + final ATNConfigSet deadEndConfigs; + + LexerNoViableAltException( + Lexer lexer, CharStream input, this.startIndex, this.deadEndConfigs) + : super(lexer, input, null); + + CharStream get inputStream { + return super.inputStream; + } + + String toString() { + String symbol = ""; + if (startIndex >= 0 && startIndex < inputStream.size) { + symbol = inputStream.getText(Interval.of(startIndex, startIndex)); + symbol = escapeWhitespace(symbol); + } + + return "${LexerNoViableAltException}('${symbol}')"; + } +} + +/** Indicates that the parser could not decide which of two or more paths + * to take based upon the remaining input. It tracks the starting token + * of the offending input and also knows where the parser was + * in the various paths when the error. Reported by reportNoViableAlternative() + */ +class NoViableAltException extends RecognitionException { + /** Which configurations did we try at input.index() that couldn't match input.LT(1)? */ + + final ATNConfigSet deadEndConfigs; + + /** The token object at the start index; the input stream might + * not be buffering tokens so get a reference to it. (At the + * time the error occurred, of course the stream needs to keep a + * buffer all of the tokens but later we might not have access to those.) + */ + + final Token startToken; + +// NoViableAltException(Parser recognizer) { // LL(1) error +// this(recognizer, +// recognizer.inputStream, +// recognizer.getCurrentToken(), +// recognizer.getCurrentToken(), +// null, +// recognizer._ctx); +// } + + NoViableAltException._(Parser recognizer, TokenStream input, this.startToken, + Token offendingToken, this.deadEndConfigs, ParserRuleContext ctx) + : super(recognizer, input, ctx) { + this.offendingToken = offendingToken; + } + + NoViableAltException(Parser recognizer, + [TokenStream input, + Token startToken, + Token offendingToken, + ATNConfigSet deadEndConfigs, + ParserRuleContext ctx]) + : this._( + recognizer, + input ?? recognizer.inputStream, + startToken ?? recognizer.currentToken, + offendingToken ?? recognizer.currentToken, + deadEndConfigs ?? null, + ctx ?? recognizer.context); +} + +/** This signifies any kind of mismatched input exceptions such as + * when the current input does not match the expected token. + */ +class InputMismatchException extends RecognitionException { + InputMismatchException(Parser recognizer, + [int state = -1, ParserRuleContext ctx]) + : super(recognizer, recognizer.inputStream, ctx ?? recognizer.context) { + if (state != -1 && ctx != null) { + this.offendingState = state; + } + this.offendingToken = recognizer.currentToken; + } +} + +/** A semantic predicate failed during validation. Validation of predicates + * occurs when normally parsing the alternative just like matching a token. + * Disambiguating predicate evaluation occurs when we test a predicate during + * prediction. + */ +class FailedPredicateException extends RecognitionException { + int ruleIndex; + int predIndex; + final String predicate; + + FailedPredicateException(Parser recognizer, + [this.predicate = null, String message = null]) + : super(recognizer, recognizer.inputStream, recognizer.context, + formatMessage(predicate, message)) { + ATNState s = recognizer.interpreter.atn.states[recognizer.state]; + + AbstractPredicateTransition trans = s.transition(0); + if (trans is PredicateTransition) { + this.ruleIndex = (trans as PredicateTransition).ruleIndex; + this.predIndex = (trans as PredicateTransition).predIndex; + } + this.offendingToken = recognizer.currentToken; + } + + static String formatMessage(String predicate, String message) { + if (message != null) { + return message; + } + + return "failed predicate: {$predicate}?"; + } +} diff --git a/runtime/Dart/lib/src/input_stream.dart b/runtime/Dart/lib/src/input_stream.dart new file mode 100644 index 000000000..580c2a3ee --- /dev/null +++ b/runtime/Dart/lib/src/input_stream.dart @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:async'; +import 'dart:convert'; +import 'dart:io'; +import 'dart:math'; + +import 'interval_set.dart'; +import 'token.dart'; + +/** + * A simple stream of symbols whose values are represented as integers. This + * interface provides marked ranges with support for a minimum level + * of buffering necessary to implement arbitrary lookahead during prediction. + * For more information on marked ranges, see {@link #mark}. + * + *

Initializing Methods: Some methods in this interface have + * unspecified behavior if no call to an initializing method has occurred after + * the stream was constructed. The following is a list of initializing methods:

+ * + *
    + *
  • {@link #LA}
  • + *
  • {@link #consume}
  • + *
  • {@link #size}
  • + *
+ */ +abstract class IntStream { + /** + * The value returned by {@link #LA LA()} when the end of the stream is + * reached. + */ + static const int EOF = -1; + + /** + * The value returned by {@link #getSourceName} when the actual name of the + * underlying source is not known. + */ + static const UNKNOWN_SOURCE_NAME = ""; + + /** + * Consumes the current symbol in the stream. This method has the following + * effects: + * + *
    + *
  • Forward movement: The value of {@link #index index()} + * before calling this method is less than the value of {@code index()} + * after calling this method.
  • + *
  • Ordered lookahead: The value of {@code LA(1)} before + * calling this method becomes the value of {@code LA(-1)} after calling + * this method.
  • + *
+ * + * Note that calling this method does not guarantee that {@code index()} is + * incremented by exactly 1, as that would preclude the ability to implement + * filtering streams (e.g. [CommonTokenStream] which distinguishes + * between "on-channel" and "off-channel" tokens). + * + * @throws IllegalStateException if an attempt is made to consume the + * end of the stream (i.e. if {@code LA(1)==}{@link #EOF EOF} before calling + * [consume]). + */ + void consume(); + + /** + * Gets the value of the symbol at offset [i] from the current + * position. When {@code i==1}, this method returns the value of the current + * symbol in the stream (which is the next symbol to be consumed). When + * {@code i==-1}, this method returns the value of the previously read + * symbol in the stream. It is not valid to call this method with + * {@code i==0}, but the specific behavior is unspecified because this + * method is frequently called from performance-critical code. + * + *

This method is guaranteed to succeed if any of the following are true:

+ * + *
    + *
  • {@code i>0}
  • + *
  • {@code i==-1} and {@link #index index()} returns a value greater + * than the value of {@code index()} after the stream was constructed + * and {@code LA(1)} was called in that order. Specifying the current + * {@code index()} relative to the index after the stream was created + * allows for filtering implementations that do not return every symbol + * from the underlying source. Specifying the call to {@code LA(1)} + * allows for lazily initialized streams.
  • + *
  • {@code LA(i)} refers to a symbol consumed within a marked region + * that has not yet been released.
  • + *
+ * + *

If [i] represents a position at or beyond the end of the stream, + * this method returns {@link #EOF}.

+ * + *

The return value is unspecified if {@code i<0} and fewer than {@code -i} + * calls to {@link #consume consume()} have occurred from the beginning of + * the stream before calling this method.

+ * + * @throws UnsupportedOperationException if the stream does not support + * retrieving the value of the specified symbol + */ + int LA(int i); + + /** + * A mark provides a guarantee that {@link #seek seek()} operations will be + * valid over a "marked range" extending from the index where {@code mark()} + * was called to the current {@link #index index()}. This allows the use of + * streaming input sources by specifying the minimum buffering requirements + * to support arbitrary lookahead during prediction. + * + *

The returned mark is an opaque handle (type [int]) which is passed + * to {@link #release release()} when the guarantees provided by the marked + * range are no longer necessary. When calls to + * {@code mark()}/{@code release()} are nested, the marks must be released + * in reverse order of which they were obtained. Since marked regions are + * used during performance-critical sections of prediction, the specific + * behavior of invalid usage is unspecified (i.e. a mark is not released, or + * a mark is released twice, or marks are not released in reverse order from + * which they were created).

+ * + *

The behavior of this method is unspecified if no call to an + * {@link IntStream initializing method} has occurred after this stream was + * constructed.

+ * + *

This method does not change the current position in the input stream.

+ * + *

The following example shows the use of {@link #mark mark()}, + * {@link #release release(mark)}, {@link #index index()}, and + * {@link #seek seek(index)} as part of an operation to safely work within a + * marked region, then restore the stream position to its original value and + * release the mark.

+ *
+   * IntStream stream = ...;
+   * int index = -1;
+   * int mark = stream.mark();
+   * try {
+   *   index = stream.index();
+   *   // perform work here...
+   * } finally {
+   *   if (index != -1) {
+   *     stream.seek(index);
+   *   }
+   *   stream.release(mark);
+   * }
+   * 
+ * + * @return An opaque marker which should be passed to + * {@link #release release()} when the marked range is no longer required. + */ + int mark(); + + /** + * This method releases a marked range created by a call to + * {@link #mark mark()}. Calls to {@code release()} must appear in the + * reverse order of the corresponding calls to {@code mark()}. If a mark is + * released twice, or if marks are not released in reverse order of the + * corresponding calls to {@code mark()}, the behavior is unspecified. + * + *

For more information and an example, see {@link #mark}.

+ * + * @param marker A marker returned by a call to {@code mark()}. + * @see #mark + */ + void release(int marker); + + /** + * Return the index into the stream of the input symbol referred to by + * {@code LA(1)}. + * + *

The behavior of this method is unspecified if no call to an + * {@link IntStream initializing method} has occurred after this stream was + * constructed.

+ */ + int get index; + + /** + * Set the input cursor to the position indicated by [index]. If the + * specified index lies past the end of the stream, the operation behaves as + * though [index] was the index of the EOF symbol. After this method + * returns without throwing an exception, then at least one of the following + * will be true. + * + *
    + *
  • {@link #index index()} will return the index of the first symbol + * appearing at or after the specified [index]. Specifically, + * implementations which filter their sources should automatically + * adjust [index] forward the minimum amount required for the + * operation to target a non-ignored symbol.
  • + *
  • {@code LA(1)} returns {@link #EOF}
  • + *
+ * + * This operation is guaranteed to not throw an exception if [index] + * lies within a marked region. For more information on marked regions, see + * {@link #mark}. The behavior of this method is unspecified if no call to + * an {@link IntStream initializing method} has occurred after this stream + * was constructed. + * + * @param index The absolute index to seek to. + * + * @throws IllegalArgumentException if [index] is less than 0 + * @throws UnsupportedOperationException if the stream does not support + * seeking to the specified index + */ + void seek(int index); + + /** + * Returns the total number of symbols in the stream, including a single EOF + * symbol. + * + * @throws UnsupportedOperationException if the size of the stream is + * unknown. + */ + int get size; + + /** + * Gets the name of the underlying symbol source. This method returns a + * non-null, non-empty string. If such a name is not known, this method + * returns {@link #UNKNOWN_SOURCE_NAME}. + */ + + String get sourceName; +} + +/** A source of characters for an ANTLR lexer. */ +abstract class CharStream extends IntStream { + /** + * This method returns the text for a range of characters within this input + * stream. This method is guaranteed to not throw an exception if the + * specified [interval] lies entirely within a marked range. For more + * information about marked ranges, see {@link IntStream#mark}. + * + * @param interval an interval within the stream + * @return the text of the specified interval + * + * @throws NullPointerException if [interval] is null + * @throws IllegalArgumentException if {@code interval.a < 0}, or if + * {@code interval.b < interval.a - 1}, or if {@code interval.b} lies at or + * past the end of the stream + * @throws UnsupportedOperationException if the stream does not support + * getting the text of the specified interval + */ + String getText(Interval interval); +} + +// Vacuum all input from a string and then treat it like a buffer. +class InputStream extends CharStream { + var name = ""; + List data; + int _index = 0; + bool decodeToUnicodeCodePoints = false; + + InputStream(List data) { + this.data = data; + } + + InputStream.fromString(String data) { + this.data = data.runes.toList(growable: false); + } + + static Future fromStringStream(Stream stream) async { + final data = StringBuffer(); + await stream.listen((buf) { + data.write(buf); + }).asFuture(); + return InputStream.fromString(data.toString()); + } + + static Future fromStream(Stream> stream, + {Encoding encoding = utf8}) { + final data = stream.transform(encoding.decoder); + return fromStringStream(data); + } + + static Future fromPath(String path, {Encoding encoding = utf8}) { + return fromStream(File(path).openRead()); + } + + get index { + return this._index; + } + + get size { + return this.data.length; + } + + /// Reset the stream so that it's in the same state it was + /// when the object was created *except* the data array is not + /// touched. + reset() { + this._index = 0; + } + + consume() { + if (this._index >= this.size) { + // assert this.LA(1) == Token.EOF + throw ("cannot consume EOF"); + } + this._index += 1; + } + + int LA(int offset) { + if (offset == 0) { + return 0; // undefined + } + if (offset < 0) { + offset += 1; // e.g., translate LA(-1) to use offset=0 + } + var pos = this._index + offset - 1; + if (pos < 0 || pos >= this.size) { + // invalid + return Token.EOF; + } + return this.data[pos]; + } + + /// mark/release do nothing; we have entire buffer + int mark() { + return -1; + } + + release(int marker) {} + + /// consume() ahead until p==_index; can't just set p=_index as we must + /// update line and column. If we seek backwards, just set p + seek(int _index) { + if (_index <= this._index) { + this._index = _index; // just jump; don't update stream state (line, + // ...) + return; + } + // seek forward + this._index = min(_index, this.size); + } + + String getText(Interval interval) { + final startIdx = min(interval.a, size); + final len = min(interval.b - interval.a + 1, size - startIdx); + return String.fromCharCodes(this.data, startIdx, startIdx + len); + } + + toString() { + return String.fromCharCodes(this.data); + } + + @override + String get sourceName { + // TODO: implement getSourceName + return IntStream.UNKNOWN_SOURCE_NAME; + } +} diff --git a/runtime/Dart/lib/src/interval_set.dart b/runtime/Dart/lib/src/interval_set.dart new file mode 100644 index 000000000..ae4276563 --- /dev/null +++ b/runtime/Dart/lib/src/interval_set.dart @@ -0,0 +1,750 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:math'; + +import 'package:collection/collection.dart'; + +import 'lexer.dart'; +import 'token.dart'; +import 'util/murmur_hash.dart'; +import 'vocabulary.dart'; + +/** An immutable inclusive interval a..b */ +class Interval { + static final int INTERVAL_POOL_MAX_VALUE = 1000; + + static final Interval INVALID = new Interval(-1, -2); + + static List cache = List(INTERVAL_POOL_MAX_VALUE + 1); + + int a; + int b; + + static int creates = 0; + static int misses = 0; + static int hits = 0; + static int outOfRange = 0; + + Interval(this.a, this.b); + + /** Interval objects are used readonly so share all with the + * same single value a==b up to some max size. Use an array as a perfect hash. + * Return shared object for 0..INTERVAL_POOL_MAX_VALUE or a new + * Interval object with a..a in it. On Java.g4, 218623 IntervalSets + * have a..a (set with 1 element). + */ + static Interval of(int a, int b) { + // cache just a..a + if (a != b || a < 0 || a > INTERVAL_POOL_MAX_VALUE) { + return new Interval(a, b); + } + if (cache[a] == null) { + cache[a] = new Interval(a, a); + } + return cache[a]; + } + + /** return number of elements between a and b inclusively. x..x is length 1. + * if b < a, then length is 0. 9..10 has length 2. + */ + int get length { + if (b < a) return 0; + return b - a + 1; + } + + bool operator ==(Object o) { + if (o == null || !(o is Interval)) { + return false; + } + Interval other = o; + return this.a == other.a && this.b == other.b; + } + + int get hashCode { + int hash = 23; + hash = hash * 31 + a; + hash = hash * 31 + b; + return hash; + } + + /** Does this start completely before other? Disjoint */ + bool startsBeforeDisjoint(Interval other) { + return this.a < other.a && this.b < other.a; + } + + /** Does this start at or before other? Nondisjoint */ + bool startsBeforeNonDisjoint(Interval other) { + return this.a <= other.a && this.b >= other.a; + } + + /** Does this.a start after other.b? May or may not be disjoint */ + bool startsAfter(Interval other) { + return this.a > other.a; + } + + /** Does this start completely after other? Disjoint */ + bool startsAfterDisjoint(Interval other) { + return this.a > other.b; + } + + /** Does this start after other? NonDisjoint */ + bool startsAfterNonDisjoint(Interval other) { + return this.a > other.a && this.a <= other.b; // this.b>=other.b implied + } + + /** Are both ranges disjoint? I.e., no overlap? */ + bool disjoint(Interval other) { + return startsBeforeDisjoint(other) || startsAfterDisjoint(other); + } + + /** Are two intervals adjacent such as 0..41 and 42..42? */ + bool adjacent(Interval other) { + return this.a == other.b + 1 || this.b == other.a - 1; + } + + bool properlyContains(Interval other) { + return other.a >= this.a && other.b <= this.b; + } + + /** Return the interval computed from combining this and other */ + Interval union(Interval other) { + return Interval.of(min(a, other.a), max(b, other.b)); + } + + /** Return the interval in common between this and o */ + Interval intersection(Interval other) { + return Interval.of(max(a, other.a), min(b, other.b)); + } + + /** Return the interval with elements from this not in other; + * other must not be totally enclosed (properly contained) + * within this, which would result in two disjoint intervals + * instead of the single one returned by this method. + */ + Interval differenceNotProperlyContained(Interval other) { + Interval diff = null; + // other.a to left of this.a (or same) + if (other.startsBeforeNonDisjoint(this)) { + diff = Interval.of(max(this.a, other.b + 1), this.b); + } + + // other.a to right of this.a + else if (other.startsAfterNonDisjoint(this)) { + diff = Interval.of(this.a, other.a - 1); + } + return diff; + } + + String toString() { + return "$a..$b"; + } +} + +/** + * This class implements the [IntervalSet] backed by a sorted array of + * non-overlapping intervals. It is particularly efficient for representing + * large collections of numbers, where the majority of elements appear as part + * of a sequential range of numbers that are all part of the set. For example, + * the set { 1, 2, 3, 4, 7, 8 } may be represented as { [1, 4], [7, 8] }. + * + *

+ * This class is able to represent sets containing any combination of values in + * the range {@link int#MIN_VALUE} to {@link int#MAX_VALUE} + * (inclusive).

+ */ +class IntervalSet { + static final IntervalSet COMPLETE_CHAR_SET = + IntervalSet.ofRange(Lexer.MIN_CHAR_VALUE, Lexer.MAX_CHAR_VALUE) + ..setReadonly(true); + + static final IntervalSet EMPTY_SET = new IntervalSet([])..setReadonly(true); + + /** The list of sorted, disjoint intervals. */ + List intervals = []; + + bool readonly = false; + + IntervalSet([List intervals]) { + this.intervals = intervals ?? []; + } + + IntervalSet.ofSet(IntervalSet set) { + addAll(set); + } + +// TODO +// IntervalSet(int... els) { +//if ( els==null ) { +//intervals = new ArrayList(2); // most sets are 1 or 2 elements +//} +//else { +//intervals = new ArrayList(els.length); +//for (int e : els) add(e); +//} +//} + + /** Create a set with a single element, el. */ + + IntervalSet.ofOne(int a) { + addOne(a); + } + + /** Create a set with all ints within range [a..b] (inclusive) */ + static IntervalSet ofRange(int a, int b) { + IntervalSet s = new IntervalSet(); + s.addRange(a, b); + return s; + } + + void clear() { + if (readonly) throw new StateError("can't alter readonly IntervalSet"); + intervals.clear(); + } + + /** Add a single element to the set. An isolated element is stored + * as a range el..el. + */ + + void addOne(int el) { + if (readonly) throw new StateError("can't alter readonly IntervalSet"); + addRange(el, el); + } + + /** Add interval; i.e., add all integers from a to b to set. + * If b<a, do nothing. + * Keep list in sorted order (by left range value). + * If overlap, combine ranges. For example, + * If this is {1..5, 10..20}, adding 6..7 yields + * {1..5, 6..7, 10..20}. Adding 4..8 yields {1..8, 10..20}. + */ + void addRange(int a, int b) { + add(Interval.of(a, b)); + } + + // copy on write so we can cache a..a intervals and sets of that + void add(Interval addition) { + if (readonly) throw new StateError("can't alter readonly IntervalSet"); + //System.out.println("add "+addition+" to "+intervals.toString()); + if (addition.b < addition.a) { + return; + } + for (int i = 0; i < intervals.length; i++) { + Interval r = intervals[i]; + if (addition == r) { + return; + } + if (addition.adjacent(r) || !addition.disjoint(r)) { + // next to each other, make a single larger interval + Interval bigger = addition.union(r); + intervals[i] = bigger; + + // make sure we didn't just create an interval that + // should be merged with next interval in list + for (i++; i < intervals.length; i++) { + Interval next = intervals[i]; + if (!bigger.adjacent(next) && bigger.disjoint(next)) { + break; + } + + // if we bump up against or overlap next, merge + intervals.removeAt(i); // remove this one + intervals[i - 1] = + bigger.union(next); // set previous to 3 merged ones + } + return; + } + if (addition.startsBeforeDisjoint(r)) { + // insert before r + intervals.insert(i, addition); + return; + } + // if disjoint and after r, a future iteration will handle it + + } + // ok, must be after last interval (and disjoint from last interval) + // just add it + intervals.add(addition); + } + + /** combine all sets in the array returned the or'd value */ + static IntervalSet or(List sets) { + IntervalSet r = new IntervalSet(); + for (IntervalSet s in sets) r.addAll(s); + return r; + } + + IntervalSet operator |(IntervalSet a) { + IntervalSet o = new IntervalSet(); + o.addAll(this); + o.addAll(a); + return o; + } + + IntervalSet addAll(IntervalSet set) { + if (set == null) { + return this; + } + + if (set is IntervalSet) { + IntervalSet other = set; + // walk set and add each interval + int n = other.intervals.length; + for (int i = 0; i < n; i++) { + Interval I = other.intervals[i]; + this.addRange(I.a, I.b); + } + } else { + for (int value in set.toList()) { + addOne(value); + } + } + + return this; + } + + IntervalSet complementRange(int minElement, int maxElement) { + return this.complement(IntervalSet.ofRange(minElement, maxElement)); + } + + /** {@inheritDoc} */ + IntervalSet complement(IntervalSet vocabulary) { + if (vocabulary == null || vocabulary.isNil) { + return null; // nothing in common with null set + } + IntervalSet vocabularyIS; + if (vocabulary is IntervalSet) { + vocabularyIS = vocabulary; + } else { + vocabularyIS = new IntervalSet(); + vocabularyIS.addAll(vocabulary); + } + + return vocabularyIS - this; + } + + IntervalSet operator -(IntervalSet a) { + if (a == null || a.isNil) { + return new IntervalSet.ofSet(this); + } + + if (a is IntervalSet) { + return subtract(this, a); + } + + IntervalSet other = new IntervalSet(); + other.addAll(a); + return subtract(this, other); + } + + /** + * Compute the set difference between two interval sets. The specific + * operation is {@code left - right}. If either of the input sets is + * null, it is treated as though it was an empty set. + */ + static IntervalSet subtract(IntervalSet left, IntervalSet right) { + if (left == null || left.isNil) { + return new IntervalSet(); + } + + IntervalSet result = new IntervalSet.ofSet(left); + if (right == null || right.isNil) { + // right set has no elements; just return the copy of the current set + return result; + } + + int resultI = 0; + int rightI = 0; + while ( + resultI < result.intervals.length && rightI < right.intervals.length) { + Interval resultInterval = result.intervals[resultI]; + Interval rightInterval = right.intervals[rightI]; + +// operation: (resultInterval - rightInterval) and update indexes + + if (rightInterval.b < resultInterval.a) { + rightI++; + continue; + } + + if (rightInterval.a > resultInterval.b) { + resultI++; + continue; + } + + Interval beforeCurrent = null; + Interval afterCurrent = null; + if (rightInterval.a > resultInterval.a) { + beforeCurrent = new Interval(resultInterval.a, rightInterval.a - 1); + } + + if (rightInterval.b < resultInterval.b) { + afterCurrent = new Interval(rightInterval.b + 1, resultInterval.b); + } + + if (beforeCurrent != null) { + if (afterCurrent != null) { +// split the current interval into two + result.intervals[resultI] = beforeCurrent; + result.intervals.insert(resultI + 1, afterCurrent); + resultI++; + rightI++; + continue; + } else { +// replace the current interval + result.intervals[resultI] = beforeCurrent; + resultI++; + continue; + } + } else { + if (afterCurrent != null) { +// replace the current interval + result.intervals[resultI] = afterCurrent; + rightI++; + continue; + } else { +// remove the current interval (thus no need to increment resultI) + result.intervals.removeAt(resultI); + continue; + } + } + } + +// If rightI reached right.intervals.length, no more intervals to subtract from result. +// If resultI reached result.intervals.length, we would be subtracting from an empty set. +// Either way, we are done. + return result; + } + + /** {@inheritDoc} */ + IntervalSet operator +(IntervalSet other) { + if (other == null) { + //|| !(other is IntervalSet) ) { + return null; // nothing in common with null set + } + + List myIntervals = this.intervals; + List theirIntervals = (other).intervals; + IntervalSet intersection = null; + int mySize = myIntervals.length; + int theirSize = theirIntervals.length; + int i = 0; + int j = 0; +// iterate down both interval lists looking for nondisjoint intervals + while (i < mySize && j < theirSize) { + Interval mine = myIntervals[i]; + Interval theirs = theirIntervals[j]; +//System.out.println("mine="+mine+" and theirs="+theirs); + if (mine.startsBeforeDisjoint(theirs)) { +// move this iterator looking for interval that might overlap + i++; + } else if (theirs.startsBeforeDisjoint(mine)) { +// move other iterator looking for interval that might overlap + j++; + } else if (mine.properlyContains(theirs)) { +// overlap, add intersection, get next theirs + if (intersection == null) { + intersection = new IntervalSet(); + } + intersection.add(mine.intersection(theirs)); + j++; + } else if (theirs.properlyContains(mine)) { +// overlap, add intersection, get next mine + if (intersection == null) { + intersection = new IntervalSet(); + } + intersection.add(mine.intersection(theirs)); + i++; + } else if (!mine.disjoint(theirs)) { +// overlap, add intersection + if (intersection == null) { + intersection = new IntervalSet(); + } + intersection.add(mine.intersection(theirs)); +// Move the iterator of lower range [a..b], but not +// the upper range as it may contain elements that will collide +// with the next iterator. So, if mine=[0..115] and +// theirs=[115..200], then intersection is 115 and move mine +// but not theirs as theirs may collide with the next range +// in thisIter. +// move both iterators to next ranges + if (mine.startsAfterNonDisjoint(theirs)) { + j++; + } else if (theirs.startsAfterNonDisjoint(mine)) { + i++; + } + } + } + if (intersection == null) { + return new IntervalSet(); + } + return intersection; + } + + /** {@inheritDoc} */ + + bool contains(int el) { + int n = intervals.length; + int l = 0; + int r = n - 1; +// Binary search for the element in the (sorted, +// disjoint) array of intervals. + while (l <= r) { + int m = ((l + r) / 2).floor(); + Interval I = intervals[m]; + int a = I.a; + int b = I.b; + if (b < el) { + l = m + 1; + } else if (a > el) { + r = m - 1; + } else { + // el >= a && el <= b + return true; + } + } + return false; + } + + /** {@inheritDoc} */ + + bool get isNil { + return intervals == null || intervals.isEmpty; + } + + /** + * Returns the maximum value contained in the set if not isNil(). + * + * @return the maximum value contained in the set. + * @throws RuntimeException if set is empty + */ + int get maxElement { + if (isNil) { + throw new StateError("set is empty"); + } + return intervals.last.b; + } + + /** + * Returns the minimum value contained in the set if not isNil(). + * + * @return the minimum value contained in the set. + * @throws RuntimeException if set is empty + */ + int get minElement { + if (isNil) { + throw new StateError("set is empty"); + } + + return intervals.first.a; + } + + int get hashCode { + int hash = MurmurHash.initialize(); + for (Interval I in intervals) { + hash = MurmurHash.update(hash, I.a); + hash = MurmurHash.update(hash, I.b); + } + + hash = MurmurHash.finish(hash, intervals.length * 2); + return hash; + } + + /** Are two IntervalSets equal? Because all intervals are sorted + * and disjoint, equals is a simple linear walk over both lists + * to make sure they are the same. Interval.equals() is used + * by the List.equals() method to check the ranges. + */ + + bool operator ==(Object obj) { + if (obj == null || !(obj is IntervalSet)) { + return false; + } + IntervalSet other = obj; + return ListEquality().equals(this.intervals, other.intervals); + } + + String toString({bool elemAreChar = false, Vocabulary vocabulary}) { + if (this.intervals == null || this.intervals.isEmpty) { + return "{}"; + } + + final elemStr = this.intervals.map((I) { + StringBuffer buf = new StringBuffer(); + int a = I.a; + int b = I.b; + if (a == b) { + if (vocabulary != null) { + buf.write(elementName(vocabulary, a)); + } else { + if (a == Token.EOF) { + buf.write(""); + } else if (elemAreChar) { + buf.write("'"); + buf.writeCharCode(a); + buf.write("'"); + } else { + buf.write(a); + } + } + } else { + if (vocabulary != null) { + for (int i = a; i <= b; i++) { + if (i > a) buf.write(", "); + buf.write(elementName(vocabulary, i)); + } + } else { + if (elemAreChar) { + buf.write("'"); + buf.writeCharCode(a); + buf.write("'..'"); + buf.writeCharCode(b); + buf.write("'"); + } else { + buf.write(a); + buf.write(".."); + buf.write(b); + } + } + } + return buf; + }).join(", "); + if (this.length > 1) { + return "{$elemStr}"; + } + return elemStr; + } + + String elementName(Vocabulary vocabulary, int a) { + if (a == Token.EOF) { + return ""; + } else if (a == Token.EPSILON) { + return ""; + } else { + return vocabulary.getDisplayName(a); + } + } + + int get length { + int n = 0; + int numIntervals = intervals.length; + if (numIntervals == 1) { + Interval firstInterval = this.intervals[0]; + return firstInterval.b - firstInterval.a + 1; + } + for (int i = 0; i < numIntervals; i++) { + Interval I = intervals[i]; + n += (I.b - I.a + 1); + } + return n; + } + + List toIntegerList() { + List values = new List(length); + int n = intervals.length; + for (int i = 0; i < n; i++) { + Interval I = intervals[i]; + int a = I.a; + int b = I.b; + for (int v = a; v <= b; v++) { + values.add(v); + } + } + return values; + } + + List toList() { + List values = []; + int n = intervals.length; + for (int i = 0; i < n; i++) { + Interval I = intervals[i]; + int a = I.a; + int b = I.b; + for (int v = a; v <= b; v++) { + values.add(v); + } + } + return values; + } + + Set toSet() { + Set s = new Set(); + for (Interval I in intervals) { + int a = I.a; + int b = I.b; + for (int v = a; v <= b; v++) { + s.add(v); + } + } + return s; + } + + /** Get the ith element of ordered set. Used only by RandomPhrase so + * don't bother to implement if you're not doing that for a new + * ANTLR code gen target. + */ + int get(int i) { + int n = intervals.length; + int index = 0; + for (int j = 0; j < n; j++) { + Interval I = intervals[j]; + int a = I.a; + int b = I.b; + for (int v = a; v <= b; v++) { + if (index == i) { + return v; + } + index++; + } + } + return -1; + } + + void remove(int el) { + if (readonly) throw new StateError("can't alter readonly IntervalSet"); + int n = intervals.length; + for (int i = 0; i < n; i++) { + Interval I = intervals[i]; + int a = I.a; + int b = I.b; + if (el < a) { + break; // list is sorted and el is before this interval; not here + } +// if whole interval x..x, rm + if (el == a && el == b) { + intervals.removeAt(i); + break; + } +// if on left edge x..b, adjust left + if (el == a) { + I.a++; + break; + } +// if on right edge a..x, adjust right + if (el == b) { + I.b--; + break; + } +// if in middle a..x..b, split interval + if (el > a && el < b) { + // found in this interval + int oldb = I.b; + I.b = el - 1; // [a..x-1] + addRange(el + 1, oldb); // add [x+1..b] + } + } + } + + bool isReadonly() { + return readonly; + } + + void setReadonly(bool readonly) { + if (this.readonly && !readonly) + throw new StateError("can't alter readonly IntervalSet"); + this.readonly = readonly; + } +} diff --git a/runtime/Dart/lib/src/lexer.dart b/runtime/Dart/lib/src/lexer.dart new file mode 100644 index 000000000..b8c61ad00 --- /dev/null +++ b/runtime/Dart/lib/src/lexer.dart @@ -0,0 +1,342 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:developer'; + +import 'atn/atn.dart'; +import 'error/error.dart'; +import 'input_stream.dart'; +import 'interval_set.dart'; +import 'misc/pair.dart'; +import 'recognizer.dart'; +import 'token.dart'; +import 'token_factory.dart'; +import 'token_source.dart'; +import 'util/utils.dart'; + +abstract class Lexer extends Recognizer + implements TokenSource { + static final DEFAULT_MODE = 0; + static final MORE = -2; + static final SKIP = -3; + + static final DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL; + static final HIDDEN = Token.HIDDEN_CHANNEL; + static final MIN_CHAR_VALUE = 0x0000; + static final MAX_CHAR_VALUE = 0x10FFFF; + + CharStream _input; + + Pair _tokenFactorySourcePair; + TokenFactory tokenFactory = CommonTokenFactory.DEFAULT; + + // The goal of all lexer rules/methods is to create a token object. + // this is an instance variable as multiple rules may collaborate to + // create a single token. nextToken will return this object after + // matching lexer rule(s). If you subclass to allow multiple token + // emissions, then set this to the last token to be matched or + // something nonnull so that the auto token emit mechanism will not + // emit another token. + Token _token = null; + + // What character index in the stream did the current token start at? + // Needed, for example, to get the text for current token. Set at + // the start of nextToken. + int tokenStartCharIndex = -1; + + // The line on which the first character of the token resides/// + int tokenStartLine = -1; + + // The character position of first character within the line/// + int tokenStartCharPositionInLine = -1; + + // Once we see EOF on char stream, next token will be EOF. + // If you have DONE : EOF ; then you see DONE EOF. + bool _hitEOF = false; + + // The channel number for the current token/// + int channel = Token.DEFAULT_CHANNEL; + + // The token type for the current token/// + int type = Token.INVALID_TYPE; + + List _modeStack = []; + int mode_ = Lexer.DEFAULT_MODE; + + /// You can set the text for the current token to override what is in + /// the input char buffer. Use setText() or can set this instance var. + String _text = null; + + Lexer(CharStream input) { + this._input = input; + this._tokenFactorySourcePair = Pair(this, input); + } + + reset() { + // wack Lexer state variables + if (_input != null) { + _input.seek(0); // rewind the input + } + _token = null; + type = Token.INVALID_TYPE; + channel = Token.DEFAULT_CHANNEL; + tokenStartCharIndex = -1; + tokenStartCharPositionInLine = -1; + tokenStartLine = -1; + _text = null; + + _hitEOF = false; + mode_ = Lexer.DEFAULT_MODE; + _modeStack.clear(); + + interpreter.reset(); + } + + /// Return a token from this source; i.e., match a token on the char stream. + Token nextToken() { + if (_input == null) { + throw new StateError("nextToken requires a non-null input stream."); + } + + // Mark start location in char stream so unbuffered streams are + // guaranteed at least have text of current token + int tokenStartMarker = _input.mark(); + try { + outer: + while (true) { + if (_hitEOF) { + emitEOF(); + return _token; + } + + _token = null; + channel = Token.DEFAULT_CHANNEL; + tokenStartCharIndex = _input.index; + tokenStartCharPositionInLine = interpreter.charPositionInLine; + tokenStartLine = interpreter.line; + _text = null; + do { + type = Token.INVALID_TYPE; +// System.out.println("nextToken line "+tokenStartLine+" at "+((char)input.LA(1))+ +// " in mode "+mode+ +// " at index "+input.index()); + int ttype; + try { + ttype = interpreter.match(_input, mode_); + } on LexerNoViableAltException catch (e) { + notifyListeners(e); // report error + recover(e); + ttype = SKIP; + } + if (_input.LA(1) == IntStream.EOF) { + _hitEOF = true; + } + if (type == Token.INVALID_TYPE) type = ttype; + if (type == SKIP) { + continue outer; + } + } while (type == MORE); + if (_token == null) emit(); + return _token; + } + } finally { + // make sure we release marker after match or + // unbuffered char stream will keep buffering + _input.release(tokenStartMarker); + } + } + + /// Instruct the lexer to skip creating a token for current lexer rule + /// and look for another token. nextToken() knows to keep looking when + /// a lexer rule finishes with token set to SKIP_TOKEN. Recall that + /// if token==null at end of any token rule, it creates one for you + /// and emits it. + skip() { + this.type = Lexer.SKIP; + } + + more() { + this.type = Lexer.MORE; + } + + mode(int m) { + this.mode_ = m; + } + + pushMode(int m) { + if (LexerATNSimulator.debug) { + log("pushMode $m"); + } + _modeStack.add(mode_); + mode(m); + } + + int popMode() { + if (_modeStack.isEmpty) throw new StateError(""); + if (LexerATNSimulator.debug) log("popMode back to ${_modeStack.last}"); + mode(_modeStack.removeLast()); + return mode_; + } + + /** Set the char stream and reset the lexer */ + void set inputStream(IntStream input) { + this._input = null; + this._tokenFactorySourcePair = + new Pair(this, _input); + reset(); + this._input = input; + this._tokenFactorySourcePair = + new Pair(this, _input); + } + + String get sourceName { + return _input.sourceName; + } + + CharStream get inputStream { + return _input; + } + + /** By default does not support multiple emits per nextToken invocation + * for efficiency reasons. Subclass and override this method, nextToken, + * and getToken (to push tokens into a list and pull from that list + * rather than a single variable as this implementation does). + */ + void emitToken(Token token) { + //System.err.println("emit "+token); + this._token = token; + } + + /** The standard method called to automatically emit a token at the + * outermost lexical rule. The token object should point into the + * char buffer start..stop. If there is a text override in 'text', + * use that to set the token's text. Override this method to emit + * custom Token objects or provide a new factory. + */ + Token emit() { + Token t = tokenFactory.create( + type, + _text, + _tokenFactorySourcePair, + channel, + tokenStartCharIndex, + charIndex - 1, + tokenStartLine, + tokenStartCharPositionInLine); + emitToken(t); + return t; + } + + Token emitEOF() { + int cpos = charPositionInLine; + Token eof = tokenFactory.create(Token.EOF, null, _tokenFactorySourcePair, + Token.DEFAULT_CHANNEL, _input.index, _input.index - 1, line, cpos); + emitToken(eof); + return eof; + } + + int get charPositionInLine { + return interpreter.charPositionInLine; + } + + int get line { + return interpreter.line; + } + + void set line(int line) { + interpreter.line = line; + } + + void set charPositionInLine(int charPositionInLine) { + interpreter.charPositionInLine = charPositionInLine; + } + + /** What is the index of the current character of lookahead? */ + int get charIndex { + return _input.index; + } + + /** Return the text matched so far for the current token or any + * text override. + */ + String get text { + if (_text != null) { + return _text; + } + return interpreter.getText(_input); + } + + /** Set the complete text of this token; it wipes any previous + * changes to the text. + */ + void set text(String text) { + this._text = text; + } + + /** Override if emitting multiple tokens. */ + Token get token { + return _token; + } + + void setToken(Token _token) { + this._token = _token; + } + + List get channelNames => null; + + List get modeNames => null; + + /** Return a list of all Token objects in input char stream. + * Forces load of all tokens. Does not include EOF token. + */ + List get allTokens { + List tokens = []; + Token t = nextToken(); + while (t.type != Token.EOF) { + tokens.add(t); + t = nextToken(); + } + return tokens; + } + + void notifyListeners(LexerNoViableAltException e) { + String text = + _input.getText(Interval.of(tokenStartCharIndex, _input.index)); + String msg = "token recognition error at: '" + getErrorDisplay(text) + "'"; + + ErrorListener listener = errorListenerDispatch; + listener.syntaxError( + this, null, tokenStartLine, tokenStartCharPositionInLine, msg, e); + } + + String getErrorDisplay(String s) { + return escapeWhitespace(s); + } + + String getCharErrorDisplay(int c) { + String s = getErrorDisplay(String.fromCharCode(c)); + return "'$s'"; + } + + /** Lexers can normally match any char in it's vocabulary after matching + * a token, so do the easy thing and just kill a character and hope + * it all works out. You can instead use the rule invocation stack + * to do sophisticated error recovery if you are in a fragment rule. + */ + void recover(RecognitionException re) { + if (re is LexerNoViableAltException) { + if (_input.LA(1) != IntStream.EOF) { + // skip a char and try again + interpreter.consume(_input); + } + } else { + //System.out.println("consuming char "+(char)input.LA(1)+" during recovery"); + //re.printStackTrace(); + // TODO: Do we lose character or line position information? + _input.consume(); + } + } +} diff --git a/runtime/Dart/lib/src/ll1_analyzer.dart b/runtime/Dart/lib/src/ll1_analyzer.dart new file mode 100644 index 000000000..45646299d --- /dev/null +++ b/runtime/Dart/lib/src/ll1_analyzer.dart @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import './util/bit_set.dart'; +import 'atn/atn.dart'; +import 'interval_set.dart'; +import 'prediction_context.dart'; +import 'rule_context.dart'; +import 'token.dart'; +import 'util/bit_set.dart'; + +class LL1Analyzer { + /** Special value added to the lookahead sets to indicate that we hit + * a predicate during analysis if {@code seeThruPreds==false}. + */ + static final int HIT_PRED = Token.INVALID_TYPE; + + final ATN atn; + + LL1Analyzer(this.atn); + + /** + * Calculates the SLL(1) expected lookahead set for each outgoing transition + * of an [ATNState]. The returned array has one element for each + * outgoing transition in [s]. If the closure from transition + * i leads to a semantic predicate before matching a symbol, the + * element at index i of the result will be null. + * + * @param s the ATN state + * @return the expected symbols for each outgoing transition of [s]. + */ + List getDecisionLookahead(ATNState s) { +// System.out.println("LOOK("+s.stateNumber+")"); + if (s == null) { + return null; + } + + List look = List(s.numberOfTransitions); + for (int alt = 0; alt < s.numberOfTransitions; alt++) { + look[alt] = new IntervalSet(); + Set lookBusy = Set(); + bool seeThruPreds = false; // fail to get lookahead upon pred + _LOOK(s.transition(alt).target, null, PredictionContext.EMPTY, look[alt], + lookBusy, new BitSet(), seeThruPreds, false); + // Wipe out lookahead for this alternative if we found nothing + // or we had a predicate when we !seeThruPreds + if (look[alt].length == 0 || look[alt].contains(HIT_PRED)) { + look[alt] = null; + } + } + return look; + } + + /** + * Compute set of tokens that can follow [s] in the ATN in the + * specified [ctx]. + * + *

If [ctx] is null and the end of the rule containing + * [s] is reached, {@link Token#EPSILON} is added to the result set. + * If [ctx] is not null and the end of the outermost rule is + * reached, {@link Token#EOF} is added to the result set.

+ * + * @param s the ATN state + * @param stopState the ATN state to stop at. This can be a + * [BlockEndState] to detect epsilon paths through a closure. + * @param ctx the complete parser context, or null if the context + * should be ignored + * + * @return The set of tokens that can follow [s] in the ATN in the + * specified [ctx]. + */ + + IntervalSet LOOK(ATNState s, RuleContext ctx, [ATNState stopState = null]) { + IntervalSet r = new IntervalSet(); + bool seeThruPreds = true; // ignore preds; get all lookahead + PredictionContext lookContext = + ctx != null ? PredictionContext.fromRuleContext(s.atn, ctx) : null; + _LOOK( + s, stopState, lookContext, r, Set(), new BitSet(), seeThruPreds, true); + return r; + } + + /** + * Compute set of tokens that can follow [s] in the ATN in the + * specified [ctx]. + * + *

If [ctx] is null and [stopState] or the end of the + * rule containing [s] is reached, {@link Token#EPSILON} is added to + * the result set. If [ctx] is not null and [addEOF] is + * [true] and [stopState] or the end of the outermost rule is + * reached, {@link Token#EOF} is added to the result set.

+ * + * @param s the ATN state. + * @param stopState the ATN state to stop at. This can be a + * [BlockEndState] to detect epsilon paths through a closure. + * @param ctx The outer context, or null if the outer context should + * not be used. + * @param look The result lookahead set. + * @param lookBusy A set used for preventing epsilon closures in the ATN + * from causing a stack overflow. Outside code should pass + * {@code new HashSet} for this argument. + * @param calledRuleStack A set used for preventing left recursion in the + * ATN from causing a stack overflow. Outside code should pass + * {@code new BitSet()} for this argument. + * @param seeThruPreds [true] to true semantic predicates as + * implicitly [true] and "see through them", otherwise [false] + * to treat semantic predicates as opaque and add {@link #HIT_PRED} to the + * result if one is encountered. + * @param addEOF Add {@link Token#EOF} to the result if the end of the + * outermost context is reached. This parameter has no effect if [ctx] + * is null. + */ + void _LOOK( + ATNState s, + ATNState stopState, + PredictionContext ctx, + IntervalSet look, + Set lookBusy, + BitSet calledRuleStack, + bool seeThruPreds, + bool addEOF) { +// System.out.println("_LOOK("+s.stateNumber+", ctx="+ctx); + ATNConfig c = new ATNConfig(s, 0, ctx); + if (!lookBusy.add(c)) return; + + if (s == stopState) { + if (ctx == null) { + look.addOne(Token.EPSILON); + return; + } else if (ctx.isEmpty && addEOF) { + look.addOne(Token.EOF); + return; + } + } + + if (s is RuleStopState) { + if (ctx == null) { + look.addOne(Token.EPSILON); + return; + } else if (ctx.isEmpty && addEOF) { + look.addOne(Token.EOF); + return; + } + + if (ctx != PredictionContext.EMPTY) { + // run thru all possible stack tops in ctx + bool removed = calledRuleStack[s.ruleIndex]; + try { + calledRuleStack.clear(s.ruleIndex); + for (int i = 0; i < ctx.length; i++) { + ATNState returnState = atn.states[ctx.getReturnState(i)]; +// System.out.println("popping back to "+retState); + _LOOK(returnState, stopState, ctx.getParent(i), look, lookBusy, + calledRuleStack, seeThruPreds, addEOF); + } + } finally { + if (removed) { + calledRuleStack.set(s.ruleIndex); + } + } + return; + } + } + + for (int i = 0; i < s.numberOfTransitions; i++) { + Transition t = s.transition(i); + if (t is RuleTransition) { + if (calledRuleStack[t.target.ruleIndex]) { + continue; + } + + PredictionContext newContext = + SingletonPredictionContext.create(ctx, t.followState.stateNumber); + + try { + calledRuleStack.set(t.target.ruleIndex); + _LOOK(t.target, stopState, newContext, look, lookBusy, + calledRuleStack, seeThruPreds, addEOF); + } finally { + calledRuleStack.clear(t.target.ruleIndex); + } + } else if (t is AbstractPredicateTransition) { + if (seeThruPreds) { + _LOOK(t.target, stopState, ctx, look, lookBusy, calledRuleStack, + seeThruPreds, addEOF); + } else { + look.addOne(HIT_PRED); + } + } else if (t.isEpsilon) { + _LOOK(t.target, stopState, ctx, look, lookBusy, calledRuleStack, + seeThruPreds, addEOF); + } else if (t is WildcardTransition) { + look.addAll( + IntervalSet.ofRange(Token.MIN_USER_TOKEN_TYPE, atn.maxTokenType)); + } else { +// System.out.println("adding "+ t); + IntervalSet set = t.label; + if (set != null) { + if (t is NotSetTransition) { + set = set.complement(IntervalSet.ofRange( + Token.MIN_USER_TOKEN_TYPE, atn.maxTokenType)); + } + look.addAll(set); + } + } + } + } +} diff --git a/runtime/Dart/lib/src/misc/multi_map.dart b/runtime/Dart/lib/src/misc/multi_map.dart new file mode 100644 index 000000000..27bcfdca4 --- /dev/null +++ b/runtime/Dart/lib/src/misc/multi_map.dart @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:collection'; + +import 'package:collection/collection.dart'; + +import 'pair.dart'; + +class MultiMap extends DelegatingMap> { + MultiMap() : super(LinkedHashMap()); + + void put(K key, V value) { + List elementsForKey = this[key]; + if (elementsForKey == null) { + elementsForKey = []; + this[key] = elementsForKey; + } + elementsForKey.add(value); + } + + List> get pairs { + List> pairs = []; + for (K key in keys) { + for (V value in this[key]) { + pairs.add(new Pair(key, value)); + } + } + return pairs; + } +} diff --git a/runtime/Dart/lib/src/misc/pair.dart b/runtime/Dart/lib/src/misc/pair.dart new file mode 100644 index 000000000..7041da97b --- /dev/null +++ b/runtime/Dart/lib/src/misc/pair.dart @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +class Pair { + final A a; + final B b; + + const Pair(this.a, this.b); + + @override + bool operator ==(other) { + return other is Pair && a == other.a && b == other.b; + } + + String toString() { + return "($a, $b)"; + } + + @override + int get hashCode { + return a.hashCode ^ b.hashCode; + } +} diff --git a/runtime/Dart/lib/src/parser.dart b/runtime/Dart/lib/src/parser.dart new file mode 100644 index 000000000..584c23144 --- /dev/null +++ b/runtime/Dart/lib/src/parser.dart @@ -0,0 +1,839 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:developer'; +import 'dart:io'; + +import 'atn/atn.dart'; +import 'dfa/dfa.dart'; +import 'error/error.dart'; +import 'input_stream.dart'; +import 'interval_set.dart'; +import 'lexer.dart'; +import 'parser_rule_context.dart'; +import 'recognizer.dart'; +import 'rule_context.dart'; +import 'token.dart'; +import 'token_factory.dart'; +import 'token_source.dart'; +import 'token_stream.dart'; +import 'tree/tree.dart'; + +/** This is all the parsing support code essentially; most of it is error recovery stuff. */ +abstract class Parser extends Recognizer { + /** + * This field maps from the serialized ATN string to the deserialized [ATN] with + * bypass alternatives. + * + * @see ATNDeserializationOptions#isGenerateRuleBypassTransitions() + */ + static final Map bypassAltsAtnCache = {}; + + /** + * The error handling strategy for the parser. The default value is a new + * instance of [DefaultErrorStrategy]. + * + * @see #getErrorHandler + * @see #setErrorHandler + */ + + ErrorStrategy errorHandler = new DefaultErrorStrategy(); + + /** + * The input stream. + * + * @see #getInputStream + * @see #setInputStream + */ + TokenStream _input; + + final List _precedenceStack = [0]; + + /** + * The [ParserRuleContext] object for the currently executing rule. + * This is always non-null during the parsing process. + */ + ParserRuleContext _ctx; + + /** + * Specifies whether or not the parser should construct a parse tree during + * the parsing process. The default value is [true]. + * + * @see #getBuildParseTree + * @see #setBuildParseTree + */ + bool _buildParseTrees = true; + + /** + * When {@link #setTrace}{@code (true)} is called, a reference to the + * [TraceListener] is stored here so it can be easily removed in a + * later call to {@link #setTrace}{@code (false)}. The listener itself is + * implemented as a parser listener so this field is not directly used by + * other parser methods. + */ + TraceListener _tracer; + + /** + * The list of [ParseTreeListener] listeners registered to receive + * events during the parse. + * + * @see #addParseListener + */ + List _parseListeners; + + /** + * The number of syntax errors reported during parsing. This value is + * incremented each time {@link #notifyErrorListeners} is called. + */ + int _syntaxErrors = 0; + + /** Indicates parser has match()ed EOF token. See {@link #exitRule()}. */ + bool matchedEOF = false; + + Parser(TokenStream input) { + inputStream = input; + } + + /** reset the parser's state */ + void reset() { + if (inputStream != null) inputStream.seek(0); + errorHandler.reset(this); + _ctx = null; + _syntaxErrors = 0; + matchedEOF = false; + setTrace(false); + _precedenceStack.clear(); + _precedenceStack.add(0); + if (interpreter != null) { + interpreter.reset(); + } + } + + /** + * Match current input symbol against [ttype]. If the symbol type + * matches, {@link ANTLRErrorStrategy#reportMatch} and {@link #consume} are + * called to complete the match process. + * + *

If the symbol type does not match, + * {@link ANTLRErrorStrategy#recoverInline} is called on the current error + * strategy to attempt recovery. If {@link #getBuildParseTree} is + * [true] and the token index of the symbol returned by + * {@link ANTLRErrorStrategy#recoverInline} is -1, the symbol is added to + * the parse tree by calling {@link #createErrorNode(ParserRuleContext, Token)} then + * {@link ParserRuleContext#addErrorNode(ErrorNode)}.

+ * + * @param ttype the token type to match + * @return the matched symbol + * @throws RecognitionException if the current input symbol did not match + * [ttype] and the error strategy could not recover from the + * mismatched symbol + */ + Token match(int ttype) { + Token t = currentToken; + if (t.type == ttype) { + if (ttype == Token.EOF) { + matchedEOF = true; + } + errorHandler.reportMatch(this); + consume(); + } else { + t = errorHandler.recoverInline(this); + if (_buildParseTrees && t.tokenIndex == -1) { + // we must have conjured up a new token during single token insertion + // if it's not the current symbol + _ctx.addErrorNode(createErrorNode(_ctx, t)); + } + } + return t; + } + + /** + * Match current input symbol as a wildcard. If the symbol type matches + * (i.e. has a value greater than 0), {@link ANTLRErrorStrategy#reportMatch} + * and {@link #consume} are called to complete the match process. + * + *

If the symbol type does not match, + * {@link ANTLRErrorStrategy#recoverInline} is called on the current error + * strategy to attempt recovery. If {@link #getBuildParseTree} is + * [true] and the token index of the symbol returned by + * {@link ANTLRErrorStrategy#recoverInline} is -1, the symbol is added to + * the parse tree by calling {@link Parser#createErrorNode(ParserRuleContext, Token)}. then + * {@link ParserRuleContext#addErrorNode(ErrorNode)}

+ * + * @return the matched symbol + * @throws RecognitionException if the current input symbol did not match + * a wildcard and the error strategy could not recover from the mismatched + * symbol + */ + Token matchWildcard() { + Token t = currentToken; + if (t.type > 0) { + errorHandler.reportMatch(this); + consume(); + } else { + t = errorHandler.recoverInline(this); + if (_buildParseTrees && t.tokenIndex == -1) { + // we must have conjured up a new token during single token insertion + // if it's not the current symbol + _ctx.addErrorNode(createErrorNode(_ctx, t)); + } + } + + return t; + } + + /** + * Track the [ParserRuleContext] objects during the parse and hook + * them up using the {@link ParserRuleContext#children} list so that it + * forms a parse tree. The [ParserRuleContext] returned from the start + * rule represents the root of the parse tree. + * + *

Note that if we are not building parse trees, rule contexts only point + * upwards. When a rule exits, it returns the context but that gets garbage + * collected if nobody holds a reference. It points upwards but nobody + * points at it.

+ * + *

When we build parse trees, we are adding all of these contexts to + * {@link ParserRuleContext#children} list. Contexts are then not candidates + * for garbage collection.

+ */ + void set buildParseTree(bool buildParseTrees) { + this._buildParseTrees = buildParseTrees; + } + + /** + * Gets whether or not a complete parse tree will be constructed while + * parsing. This property is [true] for a newly constructed parser. + * + * @return [true] if a complete parse tree will be constructed while + * parsing, otherwise [false] + */ + bool get buildParseTree { + return _buildParseTrees; + } + + /** + * Trim the internal lists of the parse tree during parsing to conserve memory. + * This property is set to [false] by default for a newly constructed parser. + * + * @param trimParseTrees [true] to trim the capacity of the {@link ParserRuleContext#children} + * list to its size after a rule is parsed. + */ + void set trimParseTree(bool trimParseTrees) { + if (trimParseTrees) { + if (trimParseTree) return; + addParseListener(TrimToSizeListener.INSTANCE); + } else { + removeParseListener(TrimToSizeListener.INSTANCE); + } + } + + /** + * @return [true] if the {@link ParserRuleContext#children} list is trimmed + * using the default {@link Parser.TrimToSizeListener} during the parse process. + */ + bool get trimParseTree { + return parseListeners.contains(TrimToSizeListener.INSTANCE); + } + + List get parseListeners => _parseListeners; + + /** + * Registers [listener] to receive events during the parsing process. + * + *

To support output-preserving grammar transformations (including but not + * limited to left-recursion removal, automated left-factoring, and + * optimized code generation), calls to listener methods during the parse + * may differ substantially from calls made by + * {@link ParseTreeWalker#DEFAULT} used after the parse is complete. In + * particular, rule entry and exit events may occur in a different order + * during the parse than after the parser. In addition, calls to certain + * rule entry methods may be omitted.

+ * + *

With the following specific exceptions, calls to listener events are + * deterministic, i.e. for identical input the calls to listener + * methods will be the same.

+ * + *
    + *
  • Alterations to the grammar used to generate code may change the + * behavior of the listener calls.
  • + *
  • Alterations to the command line options passed to ANTLR 4 when + * generating the parser may change the behavior of the listener calls.
  • + *
  • Changing the version of the ANTLR Tool used to generate the parser + * may change the behavior of the listener calls.
  • + *
+ * + * @param listener the listener to add + * + * @throws NullPointerException if {@code} listener is null + */ + void addParseListener(ParseTreeListener listener) { + if (listener == null) { + throw new ArgumentError.notNull("listener"); + } + + if (_parseListeners == null) { + _parseListeners = []; + } + + this._parseListeners.add(listener); + } + + /** + * Remove [listener] from the list of parse listeners. + * + *

If [listener] is null or has not been added as a parse + * listener, this method does nothing.

+ * + * @see #addParseListener + * + * @param listener the listener to remove + */ + void removeParseListener(ParseTreeListener listener) { + if (_parseListeners != null) { + if (_parseListeners.remove(listener)) { + if (_parseListeners.isEmpty) { + _parseListeners = null; + } + } + } + } + + /** + * Remove all parse listeners. + * + * @see #addParseListener + */ + void removeParseListeners() { + _parseListeners = null; + } + + /** + * Notify any parse listeners of an enter rule event. + * + * @see #addParseListener + */ + void triggerEnterRuleEvent() { + for (ParseTreeListener listener in _parseListeners) { + listener.enterEveryRule(_ctx); + _ctx.enterRule(listener); + } + } + + /** + * Notify any parse listeners of an exit rule event. + * + * @see #addParseListener + */ + void triggerExitRuleEvent() { + // reverse order walk of listeners + for (int i = _parseListeners.length - 1; i >= 0; i--) { + ParseTreeListener listener = _parseListeners[i]; + _ctx.exitRule(listener); + listener.exitEveryRule(_ctx); + } + } + + /** + * Gets the number of syntax errors reported during parsing. This value is + * incremented each time {@link #notifyErrorListeners} is called. + * + * @see #notifyErrorListeners + */ + int get numberOfSyntaxErrors { + return _syntaxErrors; + } + + TokenFactory get tokenFactory { + return _input.tokenSource.tokenFactory; + } + + /** Tell our token source and error strategy about a new way to create tokens. */ + + void set tokenFactory(TokenFactory factory) { + _input.tokenSource.tokenFactory = factory; + } + + /** + * The ATN with bypass alternatives is expensive to create so we create it + * lazily. + * + * @throws UnsupportedOperationException if the current parser does not + * implement the {@link #getSerializedATN()} method. + */ + ATN get ATNWithBypassAlts { + String serializedAtn = serializedATN; + if (serializedAtn == null) { + throw new UnsupportedError( + "The current parser does not support an ATN with bypass alternatives."); + } + + ATN result = bypassAltsAtnCache[serializedAtn]; + if (result == null) { + ATNDeserializationOptions deserializationOptions = + new ATNDeserializationOptions(); + deserializationOptions.setGenerateRuleBypassTransitions(true); + result = new ATNDeserializer(deserializationOptions) + .deserialize(serializedAtn.codeUnits); + bypassAltsAtnCache[serializedAtn] = result; + } + + return result; + } + + /** + * The preferred method of getting a tree pattern. For example, here's a + * sample use: + * + *
+   * ParseTree t = parser.expr();
+   * ParseTreePattern p = parser.compileParseTreePattern("<ID>+0", MyParser.RULE_expr);
+   * ParseTreeMatch m = p.match(t);
+   * String id = m.get("ID");
+   * 
+ */ + ParseTreePattern compileParseTreePattern(String pattern, int patternRuleIndex, + [Lexer lexer]) { + if (lexer == null) { + TokenSource tokenSource = tokenStream?.tokenSource; + if (tokenSource == null || !(tokenSource is Lexer)) { + throw new UnsupportedError("Parser can't discover a lexer to use"); + } + lexer = tokenSource; + } + + ParseTreePatternMatcher m = new ParseTreePatternMatcher(lexer, this); + return m.compile(pattern, patternRuleIndex); + } + + TokenStream get inputStream => tokenStream; + + void set inputStream(IntStream input) { + setTokenStream(input); + } + + TokenStream get tokenStream => _input; + + /** Set the token stream and reset the parser. */ + void setTokenStream(TokenStream input) { + this._input = null; + reset(); + this._input = input; + } + + /** Match needs to return the current input symbol, which gets put + * into the label for the associated token ref; e.g., x=ID. + */ + + Token get currentToken { + return _input.LT(1); + } + + void notifyErrorListeners(String msg, + [Token offendingToken = null, RecognitionException e = null]) { + offendingToken = offendingToken ?? currentToken; + _syntaxErrors++; + int line = -1; + int charPositionInLine = -1; + line = offendingToken.line; + charPositionInLine = offendingToken.charPositionInLine; + + ErrorListener listener = errorListenerDispatch; + listener.syntaxError( + this, offendingToken, line, charPositionInLine, msg, e); + } + + /** + * Consume and return the {@linkplain #getCurrentToken current symbol}. + * + *

E.g., given the following input with [A] being the current + * lookahead symbol, this function moves the cursor to [B] and returns + * [A].

+ * + *
+   *  A B
+   *  ^
+   * 
+ * + * If the parser is not in error recovery mode, the consumed symbol is added + * to the parse tree using {@link ParserRuleContext#addChild}, and + * {@link ParseTreeListener#visitTerminal} is called on any parse listeners. + * If the parser is in error recovery mode, the consumed symbol is + * added to the parse tree using {@link #createErrorNode(ParserRuleContext, Token)} then + * {@link ParserRuleContext#addErrorNode(ErrorNode)} and + * {@link ParseTreeListener#visitErrorNode} is called on any parse + * listeners. + */ + Token consume() { + Token o = currentToken; + if (o.type != IntStream.EOF) { + inputStream.consume(); + } + bool hasListener = _parseListeners != null && !_parseListeners.isEmpty; + if (_buildParseTrees || hasListener) { + if (errorHandler.inErrorRecoveryMode(this)) { + ErrorNode node = _ctx.addErrorNode(createErrorNode(_ctx, o)); + if (_parseListeners != null) { + for (ParseTreeListener listener in _parseListeners) { + listener.visitErrorNode(node); + } + } + } else { + TerminalNode node = _ctx.addChild(createTerminalNode(_ctx, o)); + if (_parseListeners != null) { + for (ParseTreeListener listener in _parseListeners) { + listener.visitTerminal(node); + } + } + } + } + return o; + } + + /** How to create a token leaf node associated with a parent. + * Typically, the terminal node to create is not a function of the parent. + * + * @since 4.7 + */ + TerminalNode createTerminalNode(ParserRuleContext parent, Token t) { + return new TerminalNodeImpl(t); + } + + /** How to create an error node, given a token, associated with a parent. + * Typically, the error node to create is not a function of the parent. + * + * @since 4.7 + */ + ErrorNode createErrorNode(ParserRuleContext parent, Token t) { + return new ErrorNodeImpl(t); + } + + void addContextToParseTree() { + ParserRuleContext parent = _ctx.parent; + // add current context to parent if we have a parent + if (parent != null) { + parent.addAnyChild(_ctx); + } + } + + /** + * Always called by generated parsers upon entry to a rule. Access field + * {@link #_ctx} get the current context. + */ + void enterRule(ParserRuleContext localctx, int state, int ruleIndex) { + this.state = state; + _ctx = localctx; + _ctx.start = _input.LT(1); + if (_buildParseTrees) addContextToParseTree(); + if (_parseListeners != null) triggerEnterRuleEvent(); + } + + void exitRule() { + if (matchedEOF) { + // if we have matched EOF, it cannot consume past EOF so we use LT(1) here + _ctx.stop = _input.LT(1); // LT(1) will be end of file + } else { + _ctx.stop = _input.LT(-1); // stop node is what we just matched + } + // trigger event on _ctx, before it reverts to parent + if (_parseListeners != null) triggerExitRuleEvent(); + state = _ctx.invokingState; + _ctx = _ctx.parent; + } + + void enterOuterAlt(ParserRuleContext localctx, int altNum) { + localctx.altNumber = altNum; + // if we have new localctx, make sure we replace existing ctx + // that is previous child of parse tree + if (_buildParseTrees && _ctx != localctx) { + ParserRuleContext parent = _ctx.parent; + if (parent != null) { + parent.removeLastChild(); + parent.addAnyChild(localctx); + } + } + _ctx = localctx; + } + + /** + * Get the precedence level for the top-most precedence rule. + * + * @return The precedence level for the top-most precedence rule, or -1 if + * the parser context is not nested within a precedence rule. + */ + int get precedence { + if (_precedenceStack.isEmpty) { + return -1; + } + + return _precedenceStack.last; + } + + void enterRecursionRule( + ParserRuleContext localctx, int state, int ruleIndex, int precedence) { + this.state = state; + _precedenceStack.add(precedence); + _ctx = localctx; + _ctx.start = _input.LT(1); + if (_parseListeners != null) { + triggerEnterRuleEvent(); // simulates rule entry for left-recursive rules + } + } + + /** Like {@link #enterRule} but for recursive rules. + * Make the current context the child of the incoming localctx. + */ + void pushNewRecursionContext( + ParserRuleContext localctx, int state, int ruleIndex) { + ParserRuleContext previous = _ctx; + previous.parent = localctx; + previous.invokingState = state; + previous.stop = _input.LT(-1); + + _ctx = localctx; + _ctx.start = previous.start; + if (_buildParseTrees) { + _ctx.addAnyChild(previous); + } + + if (_parseListeners != null) { + triggerEnterRuleEvent(); // simulates rule entry for left-recursive rules + } + } + + void unrollRecursionContexts(ParserRuleContext _parentctx) { + _precedenceStack.removeLast(); + _ctx.stop = _input.LT(-1); + ParserRuleContext retctx = _ctx; // save current ctx (return value) + + // unroll so _ctx is as it was before call to recursive method + if (_parseListeners != null) { + while (_ctx != _parentctx) { + triggerExitRuleEvent(); + _ctx = _ctx.parent; + } + } else { + _ctx = _parentctx; + } + + // hook into tree + retctx.parent = _parentctx; + + if (_buildParseTrees && _parentctx != null) { + // add return ctx into invoking rule's tree + _parentctx.addAnyChild(retctx); + } + } + + ParserRuleContext getInvokingContext(int ruleIndex) { + ParserRuleContext p = _ctx; + while (p != null) { + if (p.ruleIndex == ruleIndex) return p; + p = p.parent; + } + return null; + } + + ParserRuleContext get context { + return _ctx; + } + + set context(ParserRuleContext ctx) { + _ctx = ctx; + } + + bool precpred(RuleContext localctx, int precedence) { + return precedence >= _precedenceStack.last; + } + + bool inContext(String context) { + // TODO: useful in parser? + return false; + } + + /** + * Checks whether or not [symbol] can follow the current state in the + * ATN. The behavior of this method is equivalent to the following, but is + * implemented such that the complete context-sensitive follow set does not + * need to be explicitly constructed. + * + *
+   * return expectedTokens.contains(symbol);
+   * 
+ * + * @param symbol the symbol type to check + * @return [true] if [symbol] can follow the current state in + * the ATN, otherwise [false]. + */ + bool isExpectedToken(int symbol) { +// return interpreter.atn.nextTokens(_ctx); + ATN atn = interpreter.atn; + ParserRuleContext ctx = _ctx; + ATNState s = atn.states[state]; + IntervalSet following = atn.nextTokens(s); + if (following.contains(symbol)) { + return true; + } +// log("following "+s+"="+following); + if (!following.contains(Token.EPSILON)) return false; + + while (ctx != null && + ctx.invokingState >= 0 && + following.contains(Token.EPSILON)) { + ATNState invokingState = atn.states[ctx.invokingState]; + RuleTransition rt = invokingState.transition(0); + following = atn.nextTokens(rt.followState); + if (following.contains(symbol)) { + return true; + } + + ctx = ctx.parent; + } + + if (following.contains(Token.EPSILON) && symbol == Token.EOF) { + return true; + } + + return false; + } + + bool isMatchedEOF() { + return matchedEOF; + } + + /** + * Computes the set of input symbols which could follow the current parser + * state and context, as given by {@link #getState} and {@link #getContext}, + * respectively. + * + * @see ATN#getExpectedTokens(int, RuleContext) + */ + IntervalSet get expectedTokens { + return getATN().getExpectedTokens(state, context); + } + + IntervalSet get expectedTokensWithinCurrentRule { + ATN atn = interpreter.atn; + ATNState s = atn.states[state]; + return atn.nextTokens(s); + } + + /** Get a rule's index (i.e., {@code RULE_ruleName} field) or -1 if not found. */ + int getRuleIndex(String ruleName) { + int ruleIndex = ruleIndexMap[ruleName]; + if (ruleIndex != null) return ruleIndex; + return -1; + } + + ParserRuleContext get ruleContext { + return _ctx; + } + + List get ruleInvocationStack => getRuleInvocationStack(); + + /** Return List<String> of the rule names in your parser instance + * leading up to a call to the current rule. You could override if + * you want more details such as the file/line info of where + * in the ATN a rule is invoked. + * + * This is very useful for error messages. + */ + List getRuleInvocationStack([RuleContext p]) { + p = p ?? _ctx; + final _ruleNames = ruleNames; + List stack = []; + while (p != null) { + // compute what follows who invoked us + int ruleIndex = p.ruleIndex; + if (ruleIndex < 0) + stack.add("n/a"); + else + stack.add(_ruleNames[ruleIndex]); + p = p.parent; + } + return stack; + } + + /** For debugging and other purposes. */ + List get dfaStrings { + List s = []; + for (int d = 0; d < interpreter.decisionToDFA.length; d++) { + DFA dfa = interpreter.decisionToDFA[d]; + s.add(dfa.toString(vocabulary)); + } + return s; + } + + /** For debugging and other purposes. */ + void dumpDFA() { + bool seenOne = false; + for (int d = 0; d < interpreter.decisionToDFA.length; d++) { + DFA dfa = interpreter.decisionToDFA[d]; + if (!dfa.states.isEmpty) { + if (seenOne) print(""); + print("Decision ${dfa.decision}:"); + stdout.write(dfa.toString(vocabulary)); + seenOne = true; + } + } + } + + String get sourceName { + return _input.sourceName; + } + + ParseInfo get parseInfo { + ParserATNSimulator interp = interpreter; + if (interp is ProfilingATNSimulator) { + return new ParseInfo(interp); + } + return null; + } + + /** + * @since 4.3 + */ + void setProfile(bool profile) { + ParserATNSimulator interp = interpreter; + PredictionMode saveMode = interp.predictionMode; + if (profile) { + if (!(interp is ProfilingATNSimulator)) { + interpreter = new ProfilingATNSimulator(this); + } + } else if (interp is ProfilingATNSimulator) { + ParserATNSimulator sim = new ParserATNSimulator( + this, getATN(), interp.decisionToDFA, interp.sharedContextCache); + interpreter = sim; + } + interpreter.predictionMode = saveMode; + } + + /** During a parse is sometimes useful to listen in on the rule entry and exit + * events as well as token matches. This is for quick and dirty debugging. + */ + void setTrace(bool trace) { + if (!trace) { + removeParseListener(_tracer); + _tracer = null; + } else { + if (_tracer != null) + removeParseListener(_tracer); + else + _tracer = new TraceListener(this); + addParseListener(_tracer); + } + } + + /** + * Gets whether a [TraceListener] is registered as a parse listener + * for the parser. + * + * @see #setTrace(bool) + */ + bool isTrace() { + return _tracer != null; + } +} diff --git a/runtime/Dart/lib/src/parser_interpreter.dart b/runtime/Dart/lib/src/parser_interpreter.dart new file mode 100644 index 000000000..238b9442c --- /dev/null +++ b/runtime/Dart/lib/src/parser_interpreter.dart @@ -0,0 +1,396 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:collection'; + +import 'atn/atn.dart'; +import 'dfa/dfa.dart'; +import 'error/error.dart'; +import 'misc/pair.dart'; +import 'parser.dart'; +import 'parser_rule_context.dart'; +import 'token.dart'; +import 'token_stream.dart'; +import 'vocabulary.dart'; + +/** A parser simulator that mimics what ANTLR's generated + * parser code does. A ParserATNSimulator is used to make + * predictions via adaptivePredict but this class moves a pointer through the + * ATN to simulate parsing. ParserATNSimulator just + * makes us efficient rather than having to backtrack, for example. + * + * This properly creates parse trees even for left recursive rules. + * + * We rely on the left recursive rule invocation and special predicate + * transitions to make left recursive rules work. + * + * See TestParserInterpreter for examples. + */ +class ParserInterpreter extends Parser { + final String grammarFileName; + final ATN atn; + + List decisionToDFA; // not shared like it is for generated parsers + final PredictionContextCache sharedContextCache = + new PredictionContextCache(); + + final List ruleNames; + + final Vocabulary vocabulary; + + /** This stack corresponds to the _parentctx, _parentState pair of locals + * that would exist on call stack frames with a recursive descent parser; + * in the generated function for a left-recursive rule you'd see: + * + * EContext e(int _p) throws RecognitionException { + * ParserRuleContext _parentctx = context; // Pair.a + * int _parentState = state; // Pair.b + * ... + * } + * + * Those values are used to create new recursive rule invocation contexts + * associated with left operand of an alt like "expr '*' expr". + */ + final DoubleLinkedQueue> _parentContextStack = + new DoubleLinkedQueue(); + + /** We need a map from (decision,inputIndex)->forced alt for computing ambiguous + * parse trees. For now, we allow exactly one override. + */ + int overrideDecision = -1; + int overrideDecisionInputIndex = -1; + int overrideDecisionAlt = -1; + bool overrideDecisionReached = + false; // latch and only override once; error might trigger infinite loop + + /** What is the current context when we override a decisions? This tells + * us what the root of the parse tree is when using override + * for an ambiguity/lookahead check. + */ + InterpreterRuleContext overrideDecisionRoot = null; + + /** Return the root of the parse, which can be useful if the parser + * bails out. You still can access the top node. Note that, + * because of the way left recursive rules add children, it's possible + * that the root will not have any children if the start rule immediately + * called and left recursive rule that fails. + * + * @since 4.5.1 + */ + InterpreterRuleContext rootContext; + + ParserInterpreter(this.grammarFileName, this.vocabulary, this.ruleNames, + this.atn, TokenStream input) + : super(input) { + // init decision DFA + int numberOfDecisions = atn.numberOfDecisions; + this.decisionToDFA = new List(numberOfDecisions); + for (int i = 0; i < numberOfDecisions; i++) { + DecisionState decisionState = atn.getDecisionState(i); + decisionToDFA[i] = new DFA(decisionState, i); + } + + // get atn simulator that knows how to do predictions + interpreter = + new ParserATNSimulator(this, atn, decisionToDFA, sharedContextCache); + } + + void reset() { + super.reset(); + overrideDecisionReached = false; + overrideDecisionRoot = null; + } + + ATN getATN() { + return atn; + } + + /** Begin parsing at startRuleIndex */ + ParserRuleContext parse(int startRuleIndex) { + RuleStartState startRuleStartState = atn.ruleToStartState[startRuleIndex]; + + rootContext = createInterpreterRuleContext( + null, ATNState.INVALID_STATE_NUMBER, startRuleIndex); + if (startRuleStartState.isLeftRecursiveRule) { + enterRecursionRule( + rootContext, startRuleStartState.stateNumber, startRuleIndex, 0); + } else { + enterRule(rootContext, startRuleStartState.stateNumber, startRuleIndex); + } + + while (true) { + ATNState p = atnState; + switch (p.stateType) { + case StateType.RULE_STOP: + // pop; return from rule + if (context.isEmpty) { + if (startRuleStartState.isLeftRecursiveRule) { + ParserRuleContext result = context; + Pair parentContext = + _parentContextStack.removeLast(); + unrollRecursionContexts(parentContext.a); + return result; + } else { + exitRule(); + return rootContext; + } + } + + visitRuleStopState(p); + break; + + default: + try { + visitState(p); + } on RecognitionException catch (e) { + state = atn.ruleToStopState[p.ruleIndex].stateNumber; + context.exception = e; + errorHandler.reportError(this, e); + recover(e); + } + + break; + } + } + } + + void enterRecursionRule( + ParserRuleContext localctx, int state, int ruleIndex, int precedence) { + Pair pair = + new Pair(context, localctx.invokingState); + _parentContextStack.add(pair); + super.enterRecursionRule(localctx, state, ruleIndex, precedence); + } + + ATNState get atnState { + return atn.states[state]; + } + + void visitState(ATNState p) { +// System.out.println("visitState "+p.stateNumber); + int predictedAlt = 1; + if (p is DecisionState) { + predictedAlt = visitDecisionState(p); + } + + Transition transition = p.transition(predictedAlt - 1); + switch (transition.type) { + case TransitionType.EPSILON: + if (p.stateType == StateType.STAR_LOOP_ENTRY && + (p as StarLoopEntryState).isPrecedenceDecision && + !(transition.target is LoopEndState)) { + // We are at the start of a left recursive rule's (...)* loop + // and we're not taking the exit branch of loop. + InterpreterRuleContext localctx = createInterpreterRuleContext( + _parentContextStack.last.a, + _parentContextStack.last.b, + context.ruleIndex); + pushNewRecursionContext(localctx, + atn.ruleToStartState[p.ruleIndex].stateNumber, context.ruleIndex); + } + break; + + case TransitionType.ATOM: + match((transition as AtomTransition).atomLabel); + break; + + case TransitionType.RANGE: + case TransitionType.SET: + case TransitionType.NOT_SET: + if (!transition.matches( + inputStream.LA(1), Token.MIN_USER_TOKEN_TYPE, 65535)) { + recoverInline(); + } + matchWildcard(); + break; + + case TransitionType.WILDCARD: + matchWildcard(); + break; + + case TransitionType.RULE: + RuleStartState ruleStartState = transition.target; + int ruleIndex = ruleStartState.ruleIndex; + InterpreterRuleContext newctx = + createInterpreterRuleContext(context, p.stateNumber, ruleIndex); + if (ruleStartState.isLeftRecursiveRule) { + enterRecursionRule(newctx, ruleStartState.stateNumber, ruleIndex, + (transition as RuleTransition).precedence); + } else { + enterRule(newctx, transition.target.stateNumber, ruleIndex); + } + break; + + case TransitionType.PREDICATE: + PredicateTransition predicateTransition = transition; + if (!sempred(context, predicateTransition.ruleIndex, + predicateTransition.predIndex)) { + throw new FailedPredicateException(this); + } + + break; + + case TransitionType.ACTION: + ActionTransition actionTransition = transition; + action( + context, actionTransition.ruleIndex, actionTransition.actionIndex); + break; + + case TransitionType.PRECEDENCE: + if (!precpred(context, + (transition as PrecedencePredicateTransition).precedence)) { + throw new FailedPredicateException(this, + "precpred(context, ${(transition as PrecedencePredicateTransition).precedence})"); + } + break; + + default: + throw new UnsupportedError("Unrecognized ATN transition type."); + } + + state = transition.target.stateNumber; + } + + /** Method visitDecisionState() is called when the interpreter reaches + * a decision state (instance of DecisionState). It gives an opportunity + * for subclasses to track interesting things. + */ + int visitDecisionState(DecisionState p) { + int predictedAlt = 1; + if (p.numberOfTransitions > 1) { + errorHandler.sync(this); + int decision = p.decision; + if (decision == overrideDecision && + inputStream.index == overrideDecisionInputIndex && + !overrideDecisionReached) { + predictedAlt = overrideDecisionAlt; + overrideDecisionReached = true; + } else { + predictedAlt = + interpreter.adaptivePredict(inputStream, decision, context); + } + } + return predictedAlt; + } + + /** Provide simple "factory" for InterpreterRuleContext's. + * @since 4.5.1 + */ + InterpreterRuleContext createInterpreterRuleContext( + ParserRuleContext parent, int invokingStateNumber, int ruleIndex) { + return new InterpreterRuleContext(parent, invokingStateNumber, ruleIndex); + } + + void visitRuleStopState(ATNState p) { + RuleStartState ruleStartState = atn.ruleToStartState[p.ruleIndex]; + if (ruleStartState.isLeftRecursiveRule) { + Pair parentContext = + _parentContextStack.removeLast(); + unrollRecursionContexts(parentContext.a); + state = parentContext.b; + } else { + exitRule(); + } + + RuleTransition ruleTransition = atn.states[state].transition(0); + state = ruleTransition.followState.stateNumber; + } + + /** Override this parser interpreters normal decision-making process + * at a particular decision and input token index. Instead of + * allowing the adaptive prediction mechanism to choose the + * first alternative within a block that leads to a successful parse, + * force it to take the alternative, 1..n for n alternatives. + * + * As an implementation limitation right now, you can only specify one + * override. This is sufficient to allow construction of different + * parse trees for ambiguous input. It means re-parsing the entire input + * in general because you're never sure where an ambiguous sequence would + * live in the various parse trees. For example, in one interpretation, + * an ambiguous input sequence would be matched completely in expression + * but in another it could match all the way back to the root. + * + * s : e '!'? ; + * e : ID + * | ID '!' + * ; + * + * Here, x! can be matched as (s (e ID) !) or (s (e ID !)). In the first + * case, the ambiguous sequence is fully contained only by the root. + * In the second case, the ambiguous sequences fully contained within just + * e, as in: (e ID !). + * + * Rather than trying to optimize this and make + * some intelligent decisions for optimization purposes, I settled on + * just re-parsing the whole input and then using + * {link Trees#getRootOfSubtreeEnclosingRegion} to find the minimal + * subtree that contains the ambiguous sequence. I originally tried to + * record the call stack at the point the parser detected and ambiguity but + * left recursive rules create a parse tree stack that does not reflect + * the actual call stack. That impedance mismatch was enough to make + * it it challenging to restart the parser at a deeply nested rule + * invocation. + * + * Only parser interpreters can override decisions so as to avoid inserting + * override checking code in the critical ALL(*) prediction execution path. + * + * @since 4.5.1 + */ + void addDecisionOverride(int decision, int tokenIndex, int forcedAlt) { + overrideDecision = decision; + overrideDecisionInputIndex = tokenIndex; + overrideDecisionAlt = forcedAlt; + } + + /** Rely on the error handler for this parser but, if no tokens are consumed + * to recover, add an error node. Otherwise, nothing is seen in the parse + * tree. + */ + void recover(RecognitionException e) { + int i = inputStream.index; + errorHandler.recover(this, e); + if (inputStream.index == i) { + // no input consumed, better add an error node + if (e is InputMismatchException) { + InputMismatchException ime = e; + Token tok = e.offendingToken; + int expectedTokenType = Token.INVALID_TYPE; + if (!ime.expectedTokens.isNil) { + expectedTokenType = ime.expectedTokens.minElement; // get any element + } + Token errToken = tokenFactory.create( + expectedTokenType, + tok.text, + new Pair(tok.tokenSource, tok.tokenSource.inputStream), + Token.DEFAULT_CHANNEL, + -1, + -1, + // invalid start/stop + tok.line, + tok.charPositionInLine); + context.addErrorNode(createErrorNode(context, errToken)); + } else { + // NoViableAlt + Token tok = e.offendingToken; + Token errToken = tokenFactory.create( + Token.INVALID_TYPE, + tok.text, + new Pair(tok.tokenSource, tok.tokenSource.inputStream), + Token.DEFAULT_CHANNEL, + -1, + -1, + // invalid start/stop + tok.line, + tok.charPositionInLine); + context.addErrorNode(createErrorNode(context, errToken)); + } + } + } + + Token recoverInline() { + return errorHandler.recoverInline(this); + } +} diff --git a/runtime/Dart/lib/src/parser_rule_context.dart b/runtime/Dart/lib/src/parser_rule_context.dart new file mode 100644 index 000000000..e6743bd1e --- /dev/null +++ b/runtime/Dart/lib/src/parser_rule_context.dart @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'error/error.dart'; +import 'interval_set.dart'; +import 'parser.dart'; +import 'rule_context.dart'; +import 'token.dart'; +import 'tree/tree.dart'; + +/** A rule invocation record for parsing. + * + * Contains all of the information about the current rule not stored in the + * RuleContext. It handles parse tree children list, Any ATN state + * tracing, and the default values available for rule invocations: + * start, stop, rule index, current alt number. + * + * Subclasses made for each rule and grammar track the parameters, + * return values, locals, and labels specific to that rule. These + * are the objects that are returned from rules. + * + * Note text is not an actual field of a rule return value; it is computed + * from start and stop using the input stream's toString() method. I + * could add a ctor to this so that we can pass in and store the input + * stream, but I'm not sure we want to do that. It would seem to be undefined + * to get the .text property anyway if the rule matches tokens from multiple + * input streams. + * + * I do not use getters for fields of objects that are used simply to + * group values such as this aggregate. The getters/setters are there to + * satisfy the superclass interface. + */ +class ParserRuleContext extends RuleContext { + /** + * If we are debugging or building a parse tree for a visitor, + * we need to track all of the tokens and rule invocations associated + * with this rule's context. This is empty for parsing w/o tree constr. + * operation because we don't the need to track the details about + * how we parse this rule. + */ + List children; + + /** + * Get the initial/final token in this context. + * Note that the range from start to stop is inclusive, so for rules that do not consume anything + * (for example, zero length or error productions) this token may exceed stop. + */ + Token start, stop; + + /// The exception that forced this rule to return. If the rule successfully + /// completed, this is null. + RecognitionException exception = null; + + ParserRuleContext([RuleContext parent, int invokingStateNumber]) + : super(parent: parent, invokingState: invokingStateNumber); + + /** COPY a ctx (I'm deliberately not using copy constructor) to avoid + * confusion with creating node with parent. Does not copy children + * (except error leaves). + * + * This is used in the generated parser code to flip a generic XContext + * node for rule X to a YContext for alt label Y. In that sense, it is + * not really a generic copy function. + * + * If we do an error sync() at start of a rule, we might add error nodes + * to the generic XContext so this function must copy those nodes to + * the YContext as well else they are lost! + */ + void copyFrom(ParserRuleContext ctx) { + this.parent = ctx.parent; + this.invokingState = ctx.invokingState; + + this.start = ctx.start; + this.stop = ctx.stop; + + // copy any error nodes to alt label node + if (ctx.children != null) { + this.children = []; + // reset parent pointer for any error nodes + for (ParseTree child in ctx.children) { + if (child is ErrorNode) { + addChild(child); + } + } + } + } + + // Double dispatch methods for listeners + + void enterRule(ParseTreeListener listener) {} + + void exitRule(ParseTreeListener listener) {} + + /** Add a parse tree node to this as a child. Works for + * internal and leaf nodes. Does not set parent link; + * other add methods must do that. Other addChild methods + * call this. + * + * We cannot set the parent pointer of the incoming node + * because the existing interfaces do not have a setParent() + * method and I don't want to break backward compatibility for this. + * + * @since 4.7 + */ + T addAnyChild(T t) { + if (children == null) children = []; + children.add(t); + return t; + } + + /** Add a token leaf node child and force its parent to be this node. */ + TerminalNode addChild(TerminalNode t) { + t.parent = this; + return addAnyChild(t); + } + + /** Add an error node child and force its parent to be this node. + * + * @since 4.7 + */ + ErrorNode addErrorNode(ErrorNode errorNode) { + errorNode.parent=this; + return addAnyChild(errorNode); + } + + /** Used by enterOuterAlt to toss out a RuleContext previously added as + * we entered a rule. If we have # label, we will need to remove + * generic ruleContext object. + */ + void removeLastChild() { + if (children != null) { + children.removeLast(); + } + } + + // Override to make type more specific + ParserRuleContext get parent { + return super.parent; + } + + @override + ParseTree getChild(int i) { + if (children == null || i < 0 || i >= children.length) { + return null; + } + + if (T == null) { + return children[i]; + } + int j = -1; // what element have we found with ctxType? + for (ParseTree o in children) { + if (o is T) { + j++; + if (j == i) { + return o; + } + } + } + return null; + } + + TerminalNode getToken(int ttype, int i) { + if (children == null || i < 0 || i >= children.length) { + return null; + } + + int j = -1; // what token with ttype have we found? + for (ParseTree o in children) { + if (o is TerminalNode) { + TerminalNode tnode = o; + Token symbol = tnode.symbol; + if (symbol.type == ttype) { + j++; + if (j == i) { + return tnode; + } + } + } + } + + return null; + } + + List getTokens(int ttype) { + if (children == null) { + return []; + } + + List tokens = null; + for (ParseTree o in children) { + if (o is TerminalNode) { + TerminalNode tnode = o; + Token symbol = tnode.symbol; + if (symbol.type == ttype) { + if (tokens == null) { + tokens = []; + } + tokens.add(tnode); + } + } + } + + if (tokens == null) { + return []; + } + + return tokens; + } + + T getRuleContext(int i) { + return getChild(i); + } + + List getRuleContexts() { + if (children == null) { + return []; + } + + List contexts = null; + for (ParseTree o in children) { + if (o is T) { + if (contexts == null) { + contexts = []; + } + + contexts.add(o); + } + } + + if (contexts == null) { + return []; + } + + return contexts; + } + + int get childCount => children?.length ?? 0; + + Interval get sourceInterval { + if (start == null) { + return Interval.INVALID; + } + if (stop == null || stop.tokenIndex < start.tokenIndex) { + return Interval(start.tokenIndex, start.tokenIndex - 1); // empty + } + return Interval(start.tokenIndex, stop.tokenIndex); + } + + /** Used for rule context info debugging during parse-time, not so much for ATN debugging */ + String toInfoString(Parser recognizer) { + List rules = recognizer.getRuleInvocationStack(this); + + return "ParserRuleContext${rules.reversed}{start=$start, stop=$stop}'"; + } + + static final EMPTY = ParserRuleContext(); +} + +/** + * This class extends [ParserRuleContext] by allowing the value of + * {@link #getRuleIndex} to be explicitly set for the context. + * + *

+ * [ParserRuleContext] does not include field storage for the rule index + * since the context classes created by the code generator override the + * {@link #getRuleIndex} method to return the correct value for that context. + * Since the parser interpreter does not use the context classes generated for a + * parser, this class (with slightly more memory overhead per node) is used to + * provide equivalent functionality.

+ */ +class InterpreterRuleContext extends ParserRuleContext { + int ruleIndex = -1; + + /** + * Constructs a new [InterpreterRuleContext] with the specified + * parent, invoking state, and rule index. + * + * @param parent The parent context. + * @param invokingStateNumber The invoking state number. + * @param ruleIndex The rule index for the current context. + */ + InterpreterRuleContext( + ParserRuleContext parent, int invokingStateNumber, this.ruleIndex) + : super(parent, invokingStateNumber); +} diff --git a/runtime/Dart/lib/src/prediction_context.dart b/runtime/Dart/lib/src/prediction_context.dart new file mode 100644 index 000000000..c2bc71c5a --- /dev/null +++ b/runtime/Dart/lib/src/prediction_context.dart @@ -0,0 +1,874 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'package:collection/collection.dart'; + +import 'atn/atn.dart'; +import 'misc/pair.dart'; +import 'recognizer.dart'; +import 'rule_context.dart'; +import 'util/murmur_hash.dart'; + +abstract class PredictionContext { + /** + * Represents {@code $} in local context prediction, which means wildcard. + * {@code *+x = *}. + */ + static final EmptyPredictionContext EMPTY = new EmptyPredictionContext(); + + /** + * Represents {@code $} in an array in full context mode, when {@code $} + * doesn't mean wildcard: {@code $ + x = [$,x]}. Here, + * {@code $} = {@link #EMPTY_RETURN_STATE}. + */ + static final int EMPTY_RETURN_STATE = 0x7FFFFFFF; + + static final int INITIAL_HASH = 1; + + static int globalNodeCount = 0; + int id = globalNodeCount++; + + /** + * Stores the computed hash code of this [PredictionContext]. The hash + * code is computed in parts to match the following reference algorithm. + * + *
+   *   int referenceHashCode() {
+   *      int hash = {@link MurmurHash#initialize MurmurHash.initialize}({@link #INITIAL_HASH});
+   *
+   *      for (int i = 0; i < {@link #size()}; i++) {
+   *          hash = {@link MurmurHash#update MurmurHash.update}(hash, {@link #getParent getParent}(i));
+   *      }
+   *
+   *      for (int i = 0; i < {@link #size()}; i++) {
+   *          hash = {@link MurmurHash#update MurmurHash.update}(hash, {@link #getReturnState getReturnState}(i));
+   *      }
+   *
+   *      hash = {@link MurmurHash#finish MurmurHash.finish}(hash, 2 * {@link #size()});
+   *      return hash;
+   *  }
+   * 
+ */ + final int cachedHashCode; + + PredictionContext(this.cachedHashCode); + + /** Convert a [RuleContext] tree to a [PredictionContext] graph. + * Return {@link #EMPTY} if [outerContext] is empty or null. + */ + static PredictionContext fromRuleContext(ATN atn, RuleContext outerContext) { + if (outerContext == null) outerContext = RuleContext.EMPTY; + + // if we are in RuleContext of start rule, s, then PredictionContext + // is EMPTY. Nobody called us. (if we are empty, return empty) + if (outerContext.parent == null || outerContext == RuleContext.EMPTY) { + return PredictionContext.EMPTY; + } + + // If we have a parent, convert it to a PredictionContext graph + PredictionContext parent = EMPTY; + parent = PredictionContext.fromRuleContext(atn, outerContext.parent); + + ATNState state = atn.states[outerContext.invokingState]; + RuleTransition transition = state.transition(0); + return SingletonPredictionContext.create( + parent, transition.followState.stateNumber); + } + + int get length; + + PredictionContext getParent(int index); + + int getReturnState(int index); + + /** This means only the {@link #EMPTY} (wildcard? not sure) context is in set. */ + bool get isEmpty { + return this == EMPTY; + } + + bool hasEmptyPath() { + // since EMPTY_RETURN_STATE can only appear in the last position, we check last one + return getReturnState(length - 1) == EMPTY_RETURN_STATE; + } + + int get hashCode { + return cachedHashCode; + } + + bool operator ==(Object obj); + + static int calculateEmptyHashCode() { + int hash = MurmurHash.initialize(INITIAL_HASH); + hash = MurmurHash.finish(hash, 0); + return hash; + } + + static int calculateHashCode( + List parents, List returnStates) { + int hash = MurmurHash.initialize(INITIAL_HASH); + + for (PredictionContext parent in parents) { + hash = MurmurHash.update(hash, parent); + } + + for (int returnState in returnStates) { + hash = MurmurHash.update(hash, returnState); + } + + hash = MurmurHash.finish(hash, 2 * parents.length); + return hash; + } + + // dispatch + static PredictionContext merge( + PredictionContext a, + PredictionContext b, + bool rootIsWildcard, + Map, PredictionContext> + mergeCache) { + assert(a != null && b != null); // must be empty context, never null + + // share same graph if both same + if (a == b || a == b) return a; + + if (a is SingletonPredictionContext && b is SingletonPredictionContext) { + return mergeSingletons(a, b, rootIsWildcard, mergeCache); + } + + // At least one of a or b is array + // If one is $ and rootIsWildcard, return $ as * wildcard + if (rootIsWildcard) { + if (a is EmptyPredictionContext) return a; + if (b is EmptyPredictionContext) return b; + } + + // convert singleton so both are arrays to normalize + if (a is SingletonPredictionContext) { + a = new ArrayPredictionContext.of(a); + } + if (b is SingletonPredictionContext) { + b = new ArrayPredictionContext.of(b); + } + return mergeArrays(a, b, rootIsWildcard, mergeCache); + } + + /** + * Merge two [SingletonPredictionContext] instances. + * + *

Stack tops equal, parents merge is same; return left graph.
+ *

+ * + *

Same stack top, parents differ; merge parents giving array node, then + * remainders of those graphs. A new root node is created to point to the + * merged parents.
+ *

+ * + *

Different stack tops pointing to same parent. Make array node for the + * root where both element in the root point to the same (original) + * parent.
+ *

+ * + *

Different stack tops pointing to different parents. Make array node for + * the root where each element points to the corresponding original + * parent.
+ *

+ * + * @param a the first [SingletonPredictionContext] + * @param b the second [SingletonPredictionContext] + * @param rootIsWildcard [true] if this is a local-context merge, + * otherwise false to indicate a full-context merge + * @param mergeCache + */ + static PredictionContext mergeSingletons( + SingletonPredictionContext a, + SingletonPredictionContext b, + bool rootIsWildcard, + Map, PredictionContext> + mergeCache) { + if (mergeCache != null) { + PredictionContext previous = mergeCache[Pair(a, b)]; + if (previous != null) return previous; + previous = mergeCache[Pair(b, a)]; + if (previous != null) return previous; + } + + PredictionContext rootMerge = mergeRoot(a, b, rootIsWildcard); + if (rootMerge != null) { + if (mergeCache != null) mergeCache[Pair(a, b)] = rootMerge; + return rootMerge; + } + + if (a.returnState == b.returnState) { + // a == b + PredictionContext parent = + merge(a.parent, b.parent, rootIsWildcard, mergeCache); + // if parent is same as existing a or b parent or reduced to a parent, return it + if (parent == a.parent) return a; // ax + bx = ax, if a=b + if (parent == b.parent) return b; // ax + bx = bx, if a=b + // else: ax + ay = a'[x,y] + // merge parents x and y, giving array node with x,y then remainders + // of those graphs. dup a, a' points at merged array + // new joined parent so create new singleton pointing to it, a' + PredictionContext a_ = + SingletonPredictionContext.create(parent, a.returnState); + if (mergeCache != null) mergeCache[Pair(a, b)] = a_; + return a_; + } else { + // a != b payloads differ + // see if we can collapse parents due to $+x parents if local ctx + PredictionContext singleParent = null; + if (a == b || (a.parent != null && a.parent == b.parent)) { + // ax + bx = [a,b]x + singleParent = a.parent; + } + if (singleParent != null) { + // parents are same + // sort payloads and use same parent + List payloads = [a.returnState, b.returnState]; + if (a.returnState > b.returnState) { + payloads[0] = b.returnState; + payloads[1] = a.returnState; + } + List parents = [singleParent, singleParent]; + PredictionContext a_ = new ArrayPredictionContext(parents, payloads); + if (mergeCache != null) mergeCache[Pair(a, b)] = a_; + return a_; + } + // parents differ and can't merge them. Just pack together + // into array; can't merge. + // ax + by = [ax,by] + List payloads = [a.returnState, b.returnState]; + List parents = [a.parent, b.parent]; + if (a.returnState > b.returnState) { + // sort by payload + payloads[0] = b.returnState; + payloads[1] = a.returnState; + parents = [b.parent, a.parent]; + } + PredictionContext a_ = new ArrayPredictionContext(parents, payloads); + if (mergeCache != null) mergeCache[Pair(a, b)] = a_; + return a_; + } + } + + /** + * Handle case where at least one of [a] or [b] is + * {@link #EMPTY}. In the following diagrams, the symbol {@code $} is used + * to represent {@link #EMPTY}. + * + *

Local-Context Merges

+ * + *

These local-context merge operations are used when [rootIsWildcard] + * is true.

+ * + *

{@link #EMPTY} is superset of any graph; return {@link #EMPTY}.
+ *

+ * + *

{@link #EMPTY} and anything is {@code #EMPTY}, so merged parent is + * {@code #EMPTY}; return left graph.
+ *

+ * + *

Special case of last merge if local context.
+ *

+ * + *

Full-Context Merges

+ * + *

These full-context merge operations are used when [rootIsWildcard] + * is false.

+ * + *

+ * + *

Must keep all contexts; {@link #EMPTY} in array is a special value (and + * null parent).
+ *

+ * + *

+ * + * @param a the first [SingletonPredictionContext] + * @param b the second [SingletonPredictionContext] + * @param rootIsWildcard [true] if this is a local-context merge, + * otherwise false to indicate a full-context merge + */ + static PredictionContext mergeRoot(SingletonPredictionContext a, + SingletonPredictionContext b, bool rootIsWildcard) { + if (rootIsWildcard) { + if (a == EMPTY) return EMPTY; // * + b = * + if (b == EMPTY) return EMPTY; // a + * = * + } else { + if (a == EMPTY && b == EMPTY) return EMPTY; // $ + $ = $ + if (a == EMPTY) { + // $ + x = [x,$] + List payloads = [b.returnState, EMPTY_RETURN_STATE]; + List parents = [b.parent, null]; + PredictionContext joined = + new ArrayPredictionContext(parents, payloads); + return joined; + } + if (b == EMPTY) { + // x + $ = [x,$] ($ is always last if present) + List payloads = [a.returnState, EMPTY_RETURN_STATE]; + final parents = [a.parent, null]; + PredictionContext joined = + new ArrayPredictionContext(parents, payloads); + return joined; + } + } + return null; + } + + /** + * Merge two [ArrayPredictionContext] instances. + * + *

Different tops, different parents.
+ *

+ * + *

Shared top, same parents.
+ *

+ * + *

Shared top, different parents.
+ *

+ * + *

Shared top, all shared parents.
+ *

+ * + *

Equal tops, merge parents and reduce top to + * [SingletonPredictionContext].
+ *

+ */ + static PredictionContext mergeArrays( + ArrayPredictionContext a, + ArrayPredictionContext b, + bool rootIsWildcard, + Map, PredictionContext> + mergeCache) { + if (mergeCache != null) { + PredictionContext previous = mergeCache[Pair(a, b)]; + if (previous != null) return previous; + previous = mergeCache[Pair(b, a)]; + if (previous != null) return previous; + } + + // merge sorted payloads a + b => M + int i = 0; // walks a + int j = 0; // walks b + int k = 0; // walks target M array + + List mergedReturnStates = List( + a.returnStates.length + b.returnStates.length); // TODO Will it grow? + var mergedParents = List( + a.returnStates.length + b.returnStates.length); // TODO Will it grow? + // walk and merge to yield mergedParents, mergedReturnStates + while (i < a.returnStates.length && j < b.returnStates.length) { + PredictionContext a_parent = a.parents[i]; + PredictionContext b_parent = b.parents[j]; + if (a.returnStates[i] == b.returnStates[j]) { + // same payload (stack tops are equal), must yield merged singleton + int payload = a.returnStates[i]; + // $+$ = $ + bool both$ = payload == EMPTY_RETURN_STATE && + a_parent == null && + b_parent == null; + bool ax_ax = (a_parent != null && b_parent != null) && + a_parent == b_parent; // ax+ax -> ax + if (both$ || ax_ax) { + mergedParents[k] = a_parent; // choose left + mergedReturnStates[k] = payload; + } else { + // ax+ay -> a'[x,y] + PredictionContext mergedParent = + merge(a_parent, b_parent, rootIsWildcard, mergeCache); + mergedParents[k] = mergedParent; + mergedReturnStates[k] = payload; + } + i++; // hop over left one as usual + j++; // but also skip one in right side since we merge + } else if (a.returnStates[i] < b.returnStates[j]) { + // copy a[i] to M + mergedParents[k] = a_parent; + mergedReturnStates[k] = a.returnStates[i]; + i++; + } else { + // b > a, copy b[j] to M + mergedParents[k] = b_parent; + mergedReturnStates[k] = b.returnStates[j]; + j++; + } + k++; + } + + // copy over any payloads remaining in either array + if (i < a.returnStates.length) { + for (int p = i; p < a.returnStates.length; p++) { + mergedParents[k] = a.parents[p]; + mergedReturnStates[k] = a.returnStates[p]; + k++; + } + } else { + for (int p = j; p < b.returnStates.length; p++) { + mergedParents[k] = b.parents[p]; + mergedReturnStates[k] = b.returnStates[p]; + k++; + } + } + + // trim merged if we combined a few that had same stack tops + if (k < mergedParents.length) { + // write index < last position; trim + if (k == 1) { + // for just one merged element, return singleton top + PredictionContext a_ = SingletonPredictionContext.create( + mergedParents[0], mergedReturnStates[0]); + if (mergeCache != null) mergeCache[Pair(a, b)] = a_; + return a_; + } + mergedParents = List(k)..setRange(0, k, mergedParents); + mergedReturnStates = List(k)..setRange(0, k, mergedReturnStates); + } + + PredictionContext M = + new ArrayPredictionContext(mergedParents, mergedReturnStates); + + // if we created same array as a or b, return that instead + // TODO: track whether this is possible above during merge sort for speed + if (M == a) { + if (mergeCache != null) mergeCache[Pair(a, b)] = a; + return a; + } + if (M == b) { + if (mergeCache != null) mergeCache[Pair(a, b)] = b; + return b; + } + + combineCommonParents(mergedParents); + + if (mergeCache != null) mergeCache[Pair(a, b)] = M; + return M; + } + + /** + * Make pass over all M [parents]; merge any {@code equals()} + * ones. + */ + static void combineCommonParents(List parents) { + Map uniqueParents = + new Map(); + + for (int p = 0; p < parents.length; p++) { + PredictionContext parent = parents[p]; + if (!uniqueParents.containsKey(parent)) { + // don't replace + uniqueParents[parent] = parent; + } + } + + for (int p = 0; p < parents.length; p++) { + parents[p] = uniqueParents[parents[p]]; + } + } + + static String toDOTString(PredictionContext context) { + if (context == null) return ""; + StringBuffer buf = new StringBuffer(); + buf.write("digraph G {\n"); + buf.write("rankdir=LR;\n"); + + List nodes = getAllContextNodes(context); + nodes.sort((PredictionContext o1, PredictionContext o2) { + return o1.id - o2.id; + }); + + for (PredictionContext current in nodes) { + if (current is SingletonPredictionContext) { + String s = current.id.toString(); + buf.write(" s"); + buf.write(s); + String returnState = current.getReturnState(0).toString(); + if (current is EmptyPredictionContext) returnState = r"$"; + buf.write(" [label=\""); + buf.write(returnState); + buf.write("\"];\n"); + continue; + } + ArrayPredictionContext arr = current; + buf.write(" s"); + buf.write(arr.id); + buf.write(" [shape=box, label=\""); + buf.write("["); + bool first = true; + for (int inv in arr.returnStates) { + if (!first) buf.write(", "); + if (inv == EMPTY_RETURN_STATE) + buf.write(r"$"); + else + buf.write(inv); + first = false; + } + buf.write("]"); + buf.write("\"];\n"); + } + + for (PredictionContext current in nodes) { + if (current == EMPTY) continue; + for (int i = 0; i < current.length; i++) { + if (current.getParent(i) == null) continue; + String s = current.id.toString(); + buf.write(" s"); + buf.write(s); + buf.write("->"); + buf.write("s"); + buf.write(current.getParent(i).id); + if (current.length > 1) + buf.write(" [label=\"parent[$i]\"];\n"); + else + buf.write(";\n"); + } + } + + buf.write("}\n"); + return buf.toString(); + } + + // From Sam + static PredictionContext getCachedContext( + PredictionContext context, + PredictionContextCache contextCache, + Map visited) { + if (context.isEmpty) { + return context; + } + + PredictionContext existing = visited[context]; + if (existing != null) { + return existing; + } + + existing = contextCache[context]; + if (existing != null) { + visited[context] = existing; + return existing; + } + + bool changed = false; + var parents = List(context.length); + for (int i = 0; i < parents.length; i++) { + PredictionContext parent = + getCachedContext(context.getParent(i), contextCache, visited); + if (changed || parent != context.getParent(i)) { + if (!changed) { + parents = List(context.length); + for (int j = 0; j < context.length; j++) { + parents[j] = context.getParent(j); + } + + changed = true; + } + + parents[i] = parent; + } + } + + if (!changed) { + contextCache.add(context); + visited[context] = context; + return context; + } + + PredictionContext updated; + if (parents.length == 0) { + updated = EMPTY; + } else if (parents.length == 1) { + updated = SingletonPredictionContext.create( + parents[0], context.getReturnState(0)); + } else { + ArrayPredictionContext arrayPredictionContext = context; + updated = new ArrayPredictionContext( + parents, arrayPredictionContext.returnStates); + } + + contextCache.add(updated); + visited[updated] = updated; + visited[context] = updated; + + return updated; + } + +// // extra structures, but cut/paste/morphed works, so leave it. +// // seems to do a breadth-first walk +// static List getAllNodes(PredictionContext context) { +// Map visited = +// new IdentityHashMap(); +// Deque workList = new ArrayDeque(); +// workList.add(context); +// visited.put(context, context); +// List nodes = new ArrayList(); +// while (!workList.isEmpty) { +// PredictionContext current = workList.pop(); +// nodes.add(current); +// for (int i = 0; i < current.length; i++) { +// PredictionContext parent = current.getParent(i); +// if ( parent!=null && visited.put(parent, parent) == null) { +// workList.push(parent); +// } +// } +// } +// return nodes; +// } + + // ter's recursive version of Sam's getAllNodes() + static List getAllContextNodes(PredictionContext context) { + List nodes = List(); + Map visited = + Map(); + getAllContextNodes_(context, nodes, visited); + return nodes; + } + + static void getAllContextNodes_( + PredictionContext context, + List nodes, + Map visited) { + if (context == null || visited.containsKey(context)) return; + visited[context] = context; + nodes.add(context); + for (int i = 0; i < context.length; i++) { + getAllContextNodes_(context.getParent(i), nodes, visited); + } + } + + // FROM SAM + List toStrings( + Recognizer recognizer, PredictionContext stop, int currentState) { + List result = []; + + outer: + for (int perm = 0;; perm++) { + int offset = 0; + bool last = true; + PredictionContext p = this; + int stateNumber = currentState; + StringBuffer localBuffer = new StringBuffer(); + localBuffer.write("["); + while (!p.isEmpty && p != stop) { + int index = 0; + if (p.length > 0) { + int bits = 1; + while ((1 << bits) < p.length) { + bits++; + } + + int mask = (1 << bits) - 1; + index = (perm >> offset) & mask; + last &= index >= p.length - 1; + if (index >= p.length) { + continue outer; + } + offset += bits; + } + + if (recognizer != null) { + if (localBuffer.length > 1) { + // first char is '[', if more than that this isn't the first rule + localBuffer.write(' '); + } + + ATN atn = recognizer.getATN(); + ATNState s = atn.states[stateNumber]; + String ruleName = recognizer.ruleNames[s.ruleIndex]; + localBuffer.write(ruleName); + } else if (p.getReturnState(index) != EMPTY_RETURN_STATE) { + if (!p.isEmpty) { + if (localBuffer.length > 1) { + // first char is '[', if more than that this isn't the first rule + localBuffer.write(' '); + } + + localBuffer.write(p.getReturnState(index)); + } + } + stateNumber = p.getReturnState(index); + p = p.getParent(index); + } + localBuffer.write("]"); + result.add(localBuffer.toString()); + + if (last) { + break; + } + } + + return result; + } +} + +class SingletonPredictionContext extends PredictionContext { + final PredictionContext parent; + final int returnState; + + SingletonPredictionContext(PredictionContext this.parent, this.returnState) + : super(parent != null + ? PredictionContext.calculateHashCode([parent], [returnState]) + : PredictionContext.calculateEmptyHashCode()) { + assert(this.returnState != ATNState.INVALID_STATE_NUMBER); + } + + static SingletonPredictionContext create( + PredictionContext parent, int returnState) { + if (returnState == PredictionContext.EMPTY_RETURN_STATE && parent == null) { + // someone can pass in the bits of an array ctx that mean $ + return PredictionContext.EMPTY; + } + return new SingletonPredictionContext(parent, returnState); + } + + int get length { + return 1; + } + + PredictionContext getParent(int index) { + assert(index == 0); + return parent; + } + + int getReturnState(int index) { + assert(index == 0); + return returnState; + } + + bool operator ==(Object o) { + if (identical(this, o)) { + return true; + } else if (o is SingletonPredictionContext) { + if (this.hashCode != o.hashCode) { + return false; // can't be same if hash is different + } + + SingletonPredictionContext s = o; + return returnState == s.returnState && + (parent != null && parent == s.parent); + } + return false; + } + + String toString() { + String up = parent != null ? parent.toString() : ""; + if (up.length == 0) { + if (returnState == PredictionContext.EMPTY_RETURN_STATE) { + return r"$"; + } + return returnState.toString(); + } + return "$returnState $up"; + } +} + +class EmptyPredictionContext extends SingletonPredictionContext { + EmptyPredictionContext() : super(null, PredictionContext.EMPTY_RETURN_STATE); + + bool get isEmpty { + return true; + } + + int get length { + return 1; + } + + PredictionContext getParent(int index) { + return null; + } + + int getReturnState(int index) { + return returnState; + } + + String toString() { + return r"$"; + } +} + +class ArrayPredictionContext extends PredictionContext { + /** Parent can be null only if full ctx mode and we make an array + * from {@link #EMPTY} and non-empty. We merge {@link #EMPTY} by using null parent and + * returnState == {@link #EMPTY_RETURN_STATE}. + */ + List parents; + + /** Sorted for merge, no duplicates; if present, + * {@link #EMPTY_RETURN_STATE} is always last. + */ + List returnStates; + + ArrayPredictionContext.of(SingletonPredictionContext a) + : this([a.parent], [a.returnState]); + + ArrayPredictionContext( + List parents, List returnStates) + : super(PredictionContext.calculateHashCode(parents, returnStates)) { + assert(parents != null && parents.length > 0); + assert(returnStates != null && returnStates.length > 0); +// System.err.println("CREATE ARRAY: "+Arrays.toString(parents)+", "+Arrays.toString(returnStates)); + this.parents = parents; + this.returnStates = returnStates; + } + + bool get isEmpty { + // since EMPTY_RETURN_STATE can only appear in the last position, we + // don't need to verify that size==1 + return returnStates[0] == PredictionContext.EMPTY_RETURN_STATE; + } + + int get length { + return returnStates.length; + } + + PredictionContext getParent(int index) { + return parents[index]; + } + + int getReturnState(int index) { + return returnStates[index]; + } + +// int findReturnState(int returnState) { +// return Arrays.binarySearch(returnStates, returnState); +// } + + bool operator ==(Object o) { + if (identical(this, o)) { + return true; + } else if (o is ArrayPredictionContext) { + if (this.hashCode != o.hashCode) { + return false; // can't be same if hash is different + } + + ArrayPredictionContext a = o; + return ListEquality().equals(returnStates, a.returnStates) && + ListEquality().equals(parents, a.parents); + } + return false; + } + + String toString() { + if (isEmpty) return "[]"; + StringBuffer buf = new StringBuffer(); + buf.write("["); + for (int i = 0; i < returnStates.length; i++) { + if (i > 0) buf.write(", "); + if (returnStates[i] == PredictionContext.EMPTY_RETURN_STATE) { + buf.write(r"$"); + continue; + } + buf.write(returnStates[i]); + if (parents[i] != null) { + buf.write(' '); + buf.write(parents[i].toString()); + } else { + buf.write("null"); + } + } + buf.write("]"); + return buf.toString(); + } +} diff --git a/runtime/Dart/lib/src/recognizer.dart b/runtime/Dart/lib/src/recognizer.dart new file mode 100644 index 000000000..ff575c4ca --- /dev/null +++ b/runtime/Dart/lib/src/recognizer.dart @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'vocabulary.dart'; +import 'atn/atn.dart'; +import 'error/error.dart'; +import 'input_stream.dart'; +import 'rule_context.dart'; +import 'token.dart'; +import 'token_factory.dart'; +import 'util/utils.dart'; + +abstract class Recognizer { + static const EOF = -1; + + static final Map> tokenTypeMapCache = {}; + static final Map, Map> ruleIndexMapCache = {}; + List _listeners = [ConsoleErrorListener.INSTANCE]; + + /// The ATN interpreter used by the recognizer for prediction. + ATNInterpreter interpreter; + int _stateNumber = -1; + + List get ruleNames; + + /** + * Get the vocabulary used by the recognizer. + * + * @return A [Vocabulary] instance providing information about the + * vocabulary used by the grammar. + */ + Vocabulary get vocabulary; + + /** + * Get a map from token names to token types. + * + *

Used for XPath and tree pattern compilation.

+ */ + Map get tokenTypeMap { + Vocabulary _vocabulary = vocabulary; + + Map result = tokenTypeMapCache[_vocabulary]; + if (result == null) { + result = {}; + for (int i = 0; i <= getATN().maxTokenType; i++) { + String literalName = _vocabulary.getLiteralName(i); + if (literalName != null) { + result[literalName] = i; + } + + String symbolicName = _vocabulary.getSymbolicName(i); + if (symbolicName != null) { + result[symbolicName] = i; + } + } + + result["EOF"] = Token.EOF; + result = Map.unmodifiable(result); + tokenTypeMapCache[_vocabulary] = result; + } + + return result; + } + + /** + * Get a map from rule names to rule indexes. + * + *

Used for XPath and tree pattern compilation.

+ */ + Map get ruleIndexMap { + final _ruleNames = ruleNames; + if (_ruleNames == null) { + throw UnsupportedError( + "The current recognizer does not provide a list of rule names."); + } + + var result = ruleIndexMapCache[_ruleNames]; + if (result == null) { + result = Map.unmodifiable(toMap(_ruleNames)); + ruleIndexMapCache[_ruleNames] = result; + } + + return result; + } + + int getTokenType(String tokenName) { + final ttype = tokenTypeMap[tokenName]; + if (ttype != null) return ttype; + return Token.INVALID_TYPE; + } + + /** + * If this recognizer was generated, it will have a serialized ATN + * representation of the grammar. + * + *

For interpreters, we don't know their serialized ATN despite having + * created the interpreter from it.

+ */ + String get serializedATN { + throw new UnsupportedError("there is no serialized ATN"); + } + + /** For debugging and other purposes, might want the grammar name. + * Have ANTLR generate an implementation for this method. + */ + String get grammarFileName; + + /** + * Get the [ATN] used by the recognizer for prediction. + * + * @return The [ATN] used by the recognizer for prediction. + */ + ATN getATN(); + + /** If profiling during the parse/lex, this will return DecisionInfo records + * for each decision in recognizer in a ParseInfo object. + * + * @since 4.3 + */ + ParseInfo get parseInfo { + return null; + } + + /** What is the error header, normally line/character position information? */ + String getErrorHeader(RecognitionException e) { + int line = e.offendingToken.line; + int charPositionInLine = e.offendingToken.charPositionInLine; + return "line $line:$charPositionInLine"; + } + + /** + * @exception NullPointerException if [listener] is null. + */ + void addErrorListener(ErrorListener listener) { + if (listener == null) { + throw new ArgumentError.notNull("listener"); + } + + _listeners.add(listener); + } + + void removeErrorListener(ErrorListener listener) { + _listeners.remove(listener); + } + + void removeErrorListeners() { + _listeners.clear(); + } + + List get errorListeners { + return _listeners; + } + + ErrorListener get errorListenerDispatch { + return new ProxyErrorListener(errorListeners); + } + + // subclass needs to override these if there are sempreds or actions + // that the ATN interp needs to execute + bool sempred(RuleContext _localctx, int ruleIndex, int actionIndex) { + return true; + } + + bool precpred(RuleContext localctx, int precedence) { + return true; + } + + void action(RuleContext _localctx, int ruleIndex, int actionIndex) {} + + int get state { + return _stateNumber; + } + + /** Indicate that the recognizer has changed internal state that is + * consistent with the ATN state passed in. This way we always know + * where we are in the ATN as the parser goes along. The rule + * context objects form a stack that lets us see the stack of + * invoking rules. Combine this and we have complete ATN + * configuration information. + */ + void set state(int atnState) { +// System.err.println("setState "+atnState); + _stateNumber = atnState; +// if ( traceATNStates ) _ctx.trace(atnState); + } + + IntStream get inputStream; + + void set inputStream(IntStream input); + + TokenFactory get tokenFactory; + + void set tokenFactory(TokenFactory input); +} diff --git a/runtime/Dart/lib/src/rule_context.dart b/runtime/Dart/lib/src/rule_context.dart new file mode 100644 index 000000000..36a3aeb68 --- /dev/null +++ b/runtime/Dart/lib/src/rule_context.dart @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'atn/atn.dart'; +import 'interval_set.dart'; +import 'parser.dart'; +import 'parser_rule_context.dart'; +import 'recognizer.dart'; +import 'tree/tree.dart'; + +/** A rule context is a record of a single rule invocation. + * + * We form a stack of these context objects using the parent + * pointer. A parent pointer of null indicates that the current + * context is the bottom of the stack. The ParserRuleContext subclass + * as a children list so that we can turn this data structure into a + * tree. + * + * The root node always has a null pointer and invokingState of -1. + * + * Upon entry to parsing, the first invoked rule function creates a + * context object (a subclass specialized for that rule such as + * SContext) and makes it the root of a parse tree, recorded by field + * Parser._ctx. + * + * public final SContext s() throws RecognitionException { + * SContext _localctx = new SContext(_ctx, getState()); <-- create new node + * enterRule(_localctx, 0, RULE_s); <-- push it + * ... + * exitRule(); <-- pop back to _localctx + * return _localctx; + * } + * + * A subsequent rule invocation of r from the start rule s pushes a + * new context object for r whose parent points at s and use invoking + * state is the state with r emanating as edge label. + * + * The invokingState fields from a context object to the root + * together form a stack of rule indication states where the root + * (bottom of the stack) has a -1 sentinel value. If we invoke start + * symbol s then call r1, which calls r2, the would look like + * this: + * + * SContext[-1] <- root node (bottom of the stack) + * R1Context[p] <- p in rule s called r1 + * R2Context[q] <- q in rule r1 called r2 + * + * So the top of the stack, _ctx, represents a call to the current + * rule and it holds the return address from another rule that invoke + * to this rule. To invoke a rule, we must always have a current context. + * + * The parent contexts are useful for computing lookahead sets and + * getting error information. + * + * These objects are used during parsing and prediction. + * For the special case of parsers, we use the subclass + * ParserRuleContext. + * + * @see ParserRuleContext + */ +abstract class RuleContext extends RuleNode { + /// What context invoked this rule? + RuleContext parent = null; + + /// What state invoked the rule associated with this context? + /// The "return address" is the followState of invokingState + /// If parent is null, this should be -1. + int invokingState; + + RuleContext({this.parent, this.invokingState}) { + invokingState = invokingState ?? -1; + } + + int depth() { + var n = 0; + var p = this; + while (p != null) { + p = p.parent; + n++; + } + return n; + } + + /// A context is empty if there is no invoking state; meaning nobody call + /// current context. + bool get isEmpty => invokingState == -1; + + /// satisfy the ParseTree / SyntaxTree interface + Interval get sourceInterval => Interval.INVALID; + + RuleContext get ruleContext => this; + + RuleContext get payload => this; + + /** + * Return the combined text of all child nodes. This method only considers + * tokens which have been added to the parse tree. + *

+ * Since tokens on hidden channels (e.g. whitespace or comments) are not + * added to the parse trees, they will not appear in the output of this + * method. + */ + String get text { + if (childCount == 0) { + return ""; + } + + final builder = new StringBuffer(); + for (int i = 0; i < childCount; i++) { + builder.write(getChild(i).text); + } + + return builder.toString(); + } + + int get ruleIndex => -1; + + /// For rule associated with this parse tree internal node, return + /// the outer alternative number used to match the input. Default + /// implementation does not compute nor store this alt num. Create + /// a subclass of ParserRuleContext with backing field and set + /// option contextSuperClass. + /// to set it. + int get altNumber => ATN.INVALID_ALT_NUMBER; + + /// Set the outer alternative number for this context node. Default + /// implementation does nothing to avoid backing field overhead for + /// trees that don't need it. Create + /// a subclass of ParserRuleContext with backing field and set + /// option contextSuperClass. + set altNumber(int altNumber) {} + + ParseTree getChild(int i) { + return null; + } + + int get childCount => 0; + + T accept(ParseTreeVisitor visitor) { + return visitor.visitChildren(this); + } + + /// Print out a whole tree, not just a node, in LISP format + /// (root child1 .. childN). Print just a node if this is a leaf. + /// + String toStringTree({List ruleNames, Parser parser}) { + return Trees.toStringTree(this, ruleNames: ruleNames, recog: parser); + } + + String toString( + {List ruleNames, Recognizer recog, RuleContext stop}) { + ruleNames = ruleNames ?? recog?.ruleNames; + final buf = new StringBuffer(); + var p = this; + buf.write("["); + while (p != null && p != stop) { + if (ruleNames == null) { + if (!p.isEmpty) { + buf.write(p.invokingState); + } + } else { + int ruleIndex = p.ruleIndex; + String ruleName = ruleIndex >= 0 && ruleIndex < ruleNames.length + ? ruleNames[ruleIndex] + : ruleIndex.toString(); + buf.write(ruleName); + } + + if (p.parent != null && + (ruleNames != null || !p.parent.isEmpty)) { + buf.write(" "); + } + + p = p.parent; + } + + buf.write("]"); + return buf.toString(); + } + + static final EMPTY = new ParserRuleContext(); +} diff --git a/runtime/Dart/lib/src/runtime_meta_data.dart b/runtime/Dart/lib/src/runtime_meta_data.dart new file mode 100644 index 000000000..1d5f409be --- /dev/null +++ b/runtime/Dart/lib/src/runtime_meta_data.dart @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:developer'; +import 'dart:math' as math; + +import 'package:logging/logging.dart'; + +/** + * This class provides access to the current version of the ANTLR 4 runtime + * library as compile-time and runtime constants, along with methods for + * checking for matching version numbers and notifying listeners in the case + * where a version mismatch is detected. + * + *

+ * The runtime version information is provided by {@link #VERSION} and + * {@link #getRuntimeVersion()}. Detailed information about these values is + * provided in the documentation for each member.

+ * + *

+ * The runtime version check is implemented by {@link #checkVersion}. Detailed + * information about incorporating this call into user code, as well as its use + * in generated code, is provided in the documentation for the method.

+ * + *

+ * Version strings x.y and x.y.z are considered "compatible" and no error + * would be generated. Likewise, version strings x.y-SNAPSHOT and x.y.z are + * considered "compatible" because the major and minor components x.y + * are the same in each.

+ * + *

+ * To trap any error messages issued by this code, use System.setErr() + * in your main() startup code. + *

+ * + * @since 4.3 + */ +class RuntimeMetaData { + /** + * A compile-time constant containing the current version of the ANTLR 4 + * runtime library. + * + *

+ * This compile-time constant value allows generated parsers and other + * libraries to include a literal reference to the version of the ANTLR 4 + * runtime library the code was compiled against. At each release, we + * change this value.

+ * + *

Version numbers are assumed to have the form + * + * major.minor.patch.revision-suffix, + * + * with the individual components defined as follows.

+ * + *
    + *
  • major is a required non-negative integer, and is equal to + * {@code 4} for ANTLR 4.
  • + *
  • minor is a required non-negative integer.
  • + *
  • patch is an optional non-negative integer. When + * patch is omitted, the {@code .} (dot) appearing before it is + * also omitted.
  • + *
  • revision is an optional non-negative integer, and may only + * be included when patch is also included. When revision + * is omitted, the {@code .} (dot) appearing before it is also omitted.
  • + *
  • suffix is an optional string. When suffix is + * omitted, the {@code -} (hyphen-minus) appearing before it is also + * omitted.
  • + *
+ */ + static final String VERSION = "4.7.2"; + + /** + * Gets the currently executing version of the ANTLR 4 runtime library. + * + *

+ * This method provides runtime access to the {@link #VERSION} field, as + * opposed to directly referencing the field as a compile-time constant.

+ * + * @return The currently executing version of the ANTLR 4 library + */ + + static String get runtimeVersion { + return VERSION; + } + + /** + * This method provides the ability to detect mismatches between the version + * of ANTLR 4 used to generate a parser, the version of the ANTLR runtime a + * parser was compiled against, and the version of the ANTLR runtime which + * is currently executing. + * + *

+ * The version check is designed to detect the following two specific + * scenarios.

+ * + *
    + *
  • The ANTLR Tool version used for code generation does not match the + * currently executing runtime version.
  • + *
  • The ANTLR Runtime version referenced at the time a parser was + * compiled does not match the currently executing runtime version.
  • + *
+ * + *

+ * Starting with ANTLR 4.3, the code generator emits a call to this method + * using two constants in each generated lexer and parser: a hard-coded + * constant indicating the version of the tool used to generate the parser + * and a reference to the compile-time constant {@link #VERSION}. At + * runtime, this method is called during the initialization of the generated + * parser to detect mismatched versions, and notify the registered listeners + * prior to creating instances of the parser.

+ * + *

+ * This method does not perform any detection or filtering of semantic + * changes between tool and runtime versions. It simply checks for a + * version match and emits an error to stderr if a difference + * is detected.

+ * + *

+ * Note that some breaking changes between releases could result in other + * types of runtime exceptions, such as a [LinkageError], prior to + * calling this method. In these cases, the underlying version mismatch will + * not be reported here. This method is primarily intended to + * notify users of potential semantic changes between releases that do not + * result in binary compatibility problems which would be detected by the + * class loader. As with semantic changes, changes that break binary + * compatibility between releases are mentioned in the release notes + * accompanying the affected release.

+ * + *

+ * Additional note for target developers: The version check + * implemented by this class is designed to address specific compatibility + * concerns that may arise during the execution of Java applications. Other + * targets should consider the implementation of this method in the context + * of that target's known execution environment, which may or may not + * resemble the design provided for the Java target.

+ * + * @param generatingToolVersion The version of the tool used to generate a parser. + * This value may be null when called from user code that was not generated + * by, and does not reference, the ANTLR 4 Tool itself. + * @param compileTimeVersion The version of the runtime the parser was + * compiled against. This should always be passed using a direct reference + * to {@link #VERSION}. + */ + static void checkVersion( + String generatingToolVersion, String compileTimeVersion) { + String runtimeVersion = VERSION; + bool runtimeConflictsWithGeneratingTool = false; + bool runtimeConflictsWithCompileTimeTool = false; + + if (generatingToolVersion != null) { + runtimeConflictsWithGeneratingTool = + !(runtimeVersion == generatingToolVersion) && + !(getMajorMinorVersion(runtimeVersion) == + getMajorMinorVersion(generatingToolVersion)); + } + + runtimeConflictsWithCompileTimeTool = + !(runtimeVersion == compileTimeVersion) && + !(getMajorMinorVersion(runtimeVersion) == + getMajorMinorVersion(compileTimeVersion)); + + if (runtimeConflictsWithGeneratingTool) { + log("ANTLR Tool version $generatingToolVersion used for code generation does not match the current runtime version $runtimeVersion", + level: Level.SEVERE.value); + } + if (runtimeConflictsWithCompileTimeTool) { + log("ANTLR Runtime version $compileTimeVersion used for parser compilation does not match the current runtime version $runtimeVersion", + level: Level.SEVERE.value); + } + } + + /** + * Gets the major and minor version numbers from a version string. For + * details about the syntax of the input [version]. + * E.g., from x.y.z return x.y. + * + * @param version The complete version string. + * @return A string of the form major.minor containing + * only the major and minor components of the version string. + */ + static String getMajorMinorVersion(String version) { + int firstDot = version.indexOf('.'); + int secondDot = firstDot >= 0 ? version.indexOf('.', firstDot + 1) : -1; + int firstDash = version.indexOf('-'); + int referenceLength = version.length; + if (secondDot >= 0) { + referenceLength = math.min(referenceLength, secondDot); + } + + if (firstDash >= 0) { + referenceLength = math.min(referenceLength, firstDash); + } + + return version.substring(0, referenceLength); + } +} diff --git a/runtime/Dart/lib/src/token.dart b/runtime/Dart/lib/src/token.dart new file mode 100644 index 000000000..4667896b0 --- /dev/null +++ b/runtime/Dart/lib/src/token.dart @@ -0,0 +1,479 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'input_stream.dart'; +import 'interval_set.dart'; +import 'misc/pair.dart'; +import 'recognizer.dart'; +import 'token_source.dart'; + +/** A token has properties: text, type, line, character position in the line + * (so we can ignore tabs), token channel, index, and source from which + * we obtained this token. + */ +abstract class Token { + static const int INVALID_TYPE = 0; + + /** During lookahead operations, this "token" signifies we hit rule end ATN state + * and did not follow it despite needing to. + */ + static const int EPSILON = -2; + + static const int MIN_USER_TOKEN_TYPE = 1; + + static const int EOF = IntStream.EOF; + + /** All tokens go to the parser (unless skip() is called in that rule) + * on a particular "channel". The parser tunes to a particular channel + * so that whitespace etc... can go to the parser on a "hidden" channel. + */ + static const int DEFAULT_CHANNEL = 0; + + /** Anything on different channel than DEFAULT_CHANNEL is not parsed + * by parser. + */ + static const int HIDDEN_CHANNEL = 1; + + /** + * This is the minimum constant value which can be assigned to a + * user-defined token channel. + * + *

+ * The non-negative numbers less than {@link #MIN_USER_CHANNEL_VALUE} are + * assigned to the predefined channels {@link #DEFAULT_CHANNEL} and + * {@link #HIDDEN_CHANNEL}.

+ * + * @see Token#getChannel() + */ + static const int MIN_USER_CHANNEL_VALUE = 2; + + /** + * Get the text of the token. + */ + String get text; + + /** Get the token type of the token */ + int get type; + + /** The line number on which the 1st character of this token was matched, + * line=1..n + */ + int get line; + + /** The index of the first character of this token relative to the + * beginning of the line at which it occurs, 0..n-1 + */ + int get charPositionInLine; + + /** Return the channel this token. Each token can arrive at the parser + * on a different channel, but the parser only "tunes" to a single channel. + * The parser ignores everything not on DEFAULT_CHANNEL. + */ + int get channel; + + /** An index from 0..n-1 of the token object in the input stream. + * This must be valid in order to print token streams and + * use TokenRewriteStream. + * + * Return -1 to indicate that this token was conjured up since + * it doesn't have a valid index. + */ + int get tokenIndex; + + /** The starting character index of the token + * This method is optional; return -1 if not implemented. + */ + int get startIndex; + + /** The last character index of the token. + * This method is optional; return -1 if not implemented. + */ + int get stopIndex; + + /** Gets the [TokenSource] which created this token. + */ + TokenSource get tokenSource; + + /** + * Gets the [CharStream] from which this token was derived. + */ + CharStream get inputStream; +} + +abstract class WritableToken extends Token { + void set text(String text); + + void set type(int ttype); + + void set line(int line); + + void set charPositionInLine(int pos); + + void set channel(int channel); + + void set tokenIndex(int index); +} + +class CommonToken extends WritableToken { + /** + * An empty [Pair] which is used as the default value of + * {@link #source} for tokens that do not have a source. + */ + static const Pair EMPTY_SOURCE = + const Pair(null, null); + + int type; + + int line; + + int charPositionInLine = -1; // set to invalid position + + int channel = Token.DEFAULT_CHANNEL; + + /** + * These properties share a field to reduce the memory footprint of + * [CommonToken]. Tokens created by a [CommonTokenFactory] from + * the same source and input stream share a reference to the same + * [Pair] containing these values.

+ */ + Pair source; + + /** + * This is the backing field for {@link #getText} when the token text is + * explicitly set in the constructor or via {@link #setText}. + * + * @see #getText() + */ + String _text; + + int tokenIndex = -1; + + int startIndex; + + int stopIndex; + + /** + * Constructs a new [CommonToken] with the specified token type and + * text. + * + * @param type The token type. + * @param text The text of the token. + */ + CommonToken(this.type, + {this.source = EMPTY_SOURCE, + this.channel = Token.DEFAULT_CHANNEL, + this.startIndex, + this.stopIndex, + text}) { + this._text = text; + if (source.a != null) { + this.line = source.a.line; + this.charPositionInLine = source.a.charPositionInLine; + } + } + + /** + * Constructs a new [CommonToken] as a copy of another [Token]. + * + *

+ * If [oldToken] is also a [CommonToken] instance, the newly + * constructed token will share a reference to the {@link #text} field and + * the [Pair] stored in {@link #source}. Otherwise, {@link #text} will + * be assigned the result of calling {@link #getText}, and {@link #source} + * will be constructed from the result of {@link Token#getTokenSource} and + * {@link Token#getInputStream}.

+ * + * @param oldToken The token to copy. + */ + CommonToken.copy(Token oldToken) { + type = oldToken.type; + line = oldToken.line; + tokenIndex = oldToken.tokenIndex; + charPositionInLine = oldToken.charPositionInLine; + channel = oldToken.channel; + startIndex = oldToken.startIndex; + stopIndex = oldToken.stopIndex; + + if (oldToken is CommonToken) { + _text = oldToken.text; + source = oldToken.source; + } else { + _text = oldToken.text; + source = new Pair( + oldToken.tokenSource, oldToken.inputStream); + } + } + + String get text { + if (_text != null) { + return _text; + } + + CharStream input = inputStream; + if (input == null) return null; + int n = input.size; + if (startIndex < n && stopIndex < n) { + return input.getText(Interval.of(startIndex, stopIndex)); + } else { + return ""; + } + } + + /** + * Explicitly set the text for this token. If {code text} is not + * null, then {@link #getText} will return this value rather than + * extracting the text from the input. + * + * @param text The explicit text of the token, or null if the text + * should be obtained from the input along with the start and stop indexes + * of the token. + */ + void set text(String text) { + this._text = text; + } + + TokenSource get tokenSource { + return source.a; + } + + CharStream get inputStream { + return source.b; + } + + String toString([Recognizer r]) { + var txt = this.text; + if (txt != null) { + txt = txt + .replaceAll("\n", r"\n") + .replaceAll("\r", r"\r") + .replaceAll("\t", r"\t"); + } else { + txt = ""; + } + return "[@$tokenIndex,$startIndex:$stopIndex='$txt',<$type>" + + (this.channel > 0 ? ",channel=$channel" : "") + + ",$line:$charPositionInLine]"; + } +} + +/** + * A [Token] object representing an entire subtree matched by a parser + * rule; e.g., {@code }. These tokens are created for [TagChunk] + * chunks where the tag corresponds to a parser rule. + */ +class RuleTagToken implements Token { + /** + * Gets the name of the rule associated with this rule tag. + * + * @return The name of the parser rule associated with this rule tag. + */ + final String ruleName; + + /** + * The token type for the current token. This is the token type assigned to + * the bypass alternative for the rule during ATN deserialization. + */ + final int bypassTokenType; + + /** + * Gets the label associated with the rule tag. + * + * @return The name of the label associated with the rule tag, or + * null if this is an unlabeled rule tag. + */ + final String label; + + /** + * Constructs a new instance of [RuleTagToken] with the specified rule + * name, bypass token type, and label. + * + * @param ruleName The name of the parser rule this rule tag matches. + * @param bypassTokenType The bypass token type assigned to the parser rule. + * @param label The label associated with the rule tag, or null if + * the rule tag is unlabeled. + * + * @exception ArgumentError.value(value) if [ruleName] is null + * or empty. + */ + RuleTagToken(this.ruleName, this.bypassTokenType, [this.label]) { + if (ruleName == null || ruleName.isEmpty) { + throw new ArgumentError.value( + ruleName, "ruleName", "cannot be null or empty."); + } + } + + /** + * {@inheritDoc} + * + *

Rule tag tokens are always placed on the {@link #DEFAULT_CHANNEL}.

+ */ + + int get channel { + return Token.DEFAULT_CHANNEL; + } + + /** + * {@inheritDoc} + * + *

This method returns the rule tag formatted with {@code <} and {@code >} + * delimiters.

+ */ + + String get text { + if (label != null) { + return "<" + label + ":" + ruleName + ">"; + } + + return "<" + ruleName + ">"; + } + + /** + * {@inheritDoc} + * + *

Rule tag tokens have types assigned according to the rule bypass + * transitions created during ATN deserialization.

+ */ + + int get type { + return bypassTokenType; + } + + /** + * {@inheritDoc} + * + *

The implementation for [RuleTagToken] always returns 0.

+ */ + + int get line { + return 0; + } + + /** + * {@inheritDoc} + * + *

The implementation for [RuleTagToken] always returns -1.

+ */ + int get charPositionInLine { + return -1; + } + + /** + * {@inheritDoc} + * + *

The implementation for [RuleTagToken] always returns -1.

+ */ + int get tokenIndex { + return -1; + } + + /** + * {@inheritDoc} + * + *

The implementation for [RuleTagToken] always returns -1.

+ */ + int get startIndex { + return -1; + } + + /** + * {@inheritDoc} + * + *

The implementation for [RuleTagToken] always returns -1.

+ */ + + int get stopIndex { + return -1; + } + + /** + * {@inheritDoc} + * + *

The implementation for [RuleTagToken] always returns null.

+ */ + + TokenSource get tokenSource { + return null; + } + + /** + * {@inheritDoc} + * + *

The implementation for [RuleTagToken] always returns null.

+ */ + + CharStream get inputStream { + return null; + } + + /** + * {@inheritDoc} + * + *

The implementation for [RuleTagToken] returns a string of the form + * {@code ruleName:bypassTokenType}.

+ */ + + String toString() { + return ruleName + ":$bypassTokenType"; + } +} + +/** + * A [Token] object representing a token of a particular type; e.g., + * {@code }. These tokens are created for [TagChunk] chunks where the + * tag corresponds to a lexer rule or token type. + */ +class TokenTagToken extends CommonToken { + /** + * Gets the token name. + * @return The token name. + */ + final String tokenName; + + /** + * Gets the label associated with the rule tag. + * + * @return The name of the label associated with the rule tag, or + * null if this is an unlabeled rule tag. + */ + final String label; + + /** + * Constructs a new instance of [TokenTagToken] with the specified + * token name, type, and label. + * + * @param tokenName The token name. + * @param type The token type. + * @param label The label associated with the token tag, or null if + * the token tag is unlabeled. + */ + TokenTagToken(this.tokenName, type, [this.label]) : super(type); + + /** + * {@inheritDoc} + * + *

The implementation for [TokenTagToken] returns the token tag + * formatted with {@code <} and {@code >} delimiters.

+ */ + + String get text { + if (label != null) { + return "<" + label + ":" + tokenName + ">"; + } + + return "<" + tokenName + ">"; + } + + /** + * {@inheritDoc} + * + *

The implementation for [TokenTagToken] returns a string of the form + * {@code tokenName:type}.

+ */ + + String toString([recognizer]) { + return tokenName + ":$type"; + } +} diff --git a/runtime/Dart/lib/src/token_factory.dart b/runtime/Dart/lib/src/token_factory.dart new file mode 100644 index 000000000..94f1b64ea --- /dev/null +++ b/runtime/Dart/lib/src/token_factory.dart @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'input_stream.dart'; +import 'interval_set.dart'; +import 'misc/pair.dart'; +import 'token.dart'; +import 'token_source.dart'; + +/** The default mechanism for creating tokens. It's used by default in Lexer and + * the error handling strategy (to create missing tokens). Notifying the parser + * of a new factory means that it notifies its token source and error strategy. + */ +abstract class TokenFactory { + /** This is the method used to create tokens in the lexer and in the + * error handling strategy. If text!=null, than the start and stop positions + * are wiped to -1 in the text override is set in the CommonToken. + */ + Symbol create(int type, String text, + [Pair source, + int channel, + int start, + int stop, + int line, + int charPositionInLine]); +} + +/** + * This default implementation of [TokenFactory] creates + * [CommonToken] objects. + */ +class CommonTokenFactory implements TokenFactory { + /** + * The default [CommonTokenFactory] instance. + * + *

+ * This token factory does not explicitly copy token text when constructing + * tokens.

+ */ + static final TokenFactory DEFAULT = new CommonTokenFactory(); + + /** + * Indicates whether {@link CommonToken#setText} should be called after + * constructing tokens to explicitly set the text. This is useful for cases + * where the input stream might not be able to provide arbitrary substrings + * of text from the input after the lexer creates a token (e.g. the + * implementation of {@link CharStream#getText} in + * [UnbufferedCharStream] throws an + * [UnsupportedOperationException]). Explicitly setting the token text + * allows {@link Token#getText} to be called at any time regardless of the + * input stream implementation. + * + *

+ * The default value is [false] to avoid the performance and memory + * overhead of copying text for every token unless explicitly requested.

+ */ + final bool copyText; + + /** + * Constructs a [CommonTokenFactory] with the specified value for + * {@link #copyText}. + * + *

+ * When [copyText] is [false], the {@link #DEFAULT} instance + * should be used instead of constructing a new instance.

+ * + * @param copyText The value for {@link #copyText}. + */ + CommonTokenFactory([this.copyText = false]); + + CommonToken create(int type, String text, + [Pair source, + int channel, + int start, + int stop, + int line, + int charPositionInLine]) { + if (source == null) { + return CommonToken(type, text: text); + } + + CommonToken t = new CommonToken(type, + source: source, channel: channel, startIndex: start, stopIndex: stop); + t.line = line; + t.charPositionInLine = charPositionInLine; + if (text != null) { + t.text = text; + } else if (copyText && source.b != null) { + t.text = source.b.getText(Interval.of(start, stop)); + } + + return t; + } +} diff --git a/runtime/Dart/lib/src/token_source.dart b/runtime/Dart/lib/src/token_source.dart new file mode 100644 index 000000000..69fd6c573 --- /dev/null +++ b/runtime/Dart/lib/src/token_source.dart @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:math'; + +import 'input_stream.dart'; +import 'misc/pair.dart'; +import 'token.dart'; +import 'token_factory.dart'; + +/** + * A source of tokens must provide a sequence of tokens via {@link #nextToken()} + * and also must reveal it's source of characters; [CommonToken]'s text is + * computed from a [CharStream]; it only store indices into the char + * stream. + * + *

Errors from the lexer are never passed to the parser. Either you want to keep + * going or you do not upon token recognition error. If you do not want to + * continue lexing then you do not want to continue parsing. Just throw an + * exception not under [RecognitionException] and Java will naturally toss + * you all the way out of the recognizers. If you want to continue lexing then + * you should not throw an exception to the parser--it has already requested a + * token. Keep lexing until you get a valid one. Just report errors and keep + * going, looking for a valid token.

+ */ +abstract class TokenSource { + /** + * Return a [Token] object from your input stream (usually a + * [CharStream]). Do not fail/return upon lexing error; keep chewing + * on the characters until you get a good one; errors are not passed through + * to the parser. + */ + Token nextToken(); + + /** + * Get the line number for the current position in the input stream. The + * first line in the input is line 1. + * + * @return The line number for the current position in the input stream, or + * 0 if the current token source does not track line numbers. + */ + int get line; + + /** + * Get the index into the current line for the current position in the input + * stream. The first character on a line has position 0. + * + * @return The line number for the current position in the input stream, or + * -1 if the current token source does not track character positions. + */ + int get charPositionInLine; + + /** + * Get the [CharStream] from which this token source is currently + * providing tokens. + * + * @return The [CharStream] associated with the current position in + * the input, or null if no input stream is available for the token + * source. + */ + CharStream get inputStream; + + /** + * Gets the name of the underlying input source. This method returns a + * non-null, non-empty string. If such a name is not known, this method + * returns {@link IntStream#UNKNOWN_SOURCE_NAME}. + */ + String get sourceName; + + /** + * Set the [TokenFactory] this token source should use for creating + * [Token] objects from the input. + * + * @param factory The [TokenFactory] to use for creating tokens. + */ + void set tokenFactory(TokenFactory factory); + + /** + * Gets the [TokenFactory] this token source is currently using for + * creating [Token] objects from the input. + * + * @return The [TokenFactory] currently used by this token source. + */ + TokenFactory get tokenFactory; +} + +/** + * Provides an implementation of [TokenSource] as a wrapper around a list + * of [Token] objects. + * + *

If the final token in the list is an {@link Token#EOF} token, it will be used + * as the EOF token for every call to {@link #nextToken} after the end of the + * list is reached. Otherwise, an EOF token will be created.

+ */ +class ListTokenSource implements TokenSource { + /** + * The wrapped collection of [Token] objects to return. + */ + final List tokens; + + final String _sourceName; + + /** + * The index into {@link #tokens} of token to return by the next call to + * {@link #nextToken}. The end of the input is indicated by this value + * being greater than or equal to the number of items in {@link #tokens}. + */ + int i; + + /** + * This field caches the EOF token for the token source. + */ + Token eofToken; + + /** + * This is the backing field for {@link #getTokenFactory} and + * [setTokenFactory]. + */ + TokenFactory tokenFactory = CommonTokenFactory.DEFAULT; + + /** + * Constructs a new [ListTokenSource] instance from the specified + * collection of [Token] objects. + * + * @param tokens The collection of [Token] objects to provide as a + * [TokenSource]. + * @exception NullPointerException if [tokens] is null + */ + + /** + * Constructs a new [ListTokenSource] instance from the specified + * collection of [Token] objects and source name. + * + * @param tokens The collection of [Token] objects to provide as a + * [TokenSource]. + * @param sourceName The name of the [TokenSource]. If this value is + * null, {@link #getSourceName} will attempt to infer the name from + * the next [Token] (or the previous token if the end of the input has + * been reached). + * + * @exception NullPointerException if [tokens] is null + */ + ListTokenSource(this.tokens, [this._sourceName = null]) { + if (tokens == null) { + throw new ArgumentError.notNull("tokens"); + } + } + + /** + * {@inheritDoc} + */ + + int get charPositionInLine { + if (i < tokens.length) { + return tokens[i].charPositionInLine; + } else if (eofToken != null) { + return eofToken.charPositionInLine; + } else if (tokens.length > 0) { + // have to calculate the result from the line/column of the previous + // token, along with the text of the token. + Token lastToken = tokens[tokens.length - 1]; + String tokenText = lastToken.text; + if (tokenText != null) { + int lastNewLine = tokenText.lastIndexOf('\n'); + if (lastNewLine >= 0) { + return tokenText.length - lastNewLine - 1; + } + } + + return lastToken.charPositionInLine + + lastToken.stopIndex - + lastToken.startIndex + + 1; + } + + // only reach this if tokens is empty, meaning EOF occurs at the first + // position in the input + return 0; + } + + /** + * {@inheritDoc} + */ + + Token nextToken() { + if (i >= tokens.length) { + if (eofToken == null) { + int start = -1; + if (tokens.length > 0) { + int previousStop = tokens[tokens.length - 1].stopIndex; + if (previousStop != -1) { + start = previousStop + 1; + } + } + + int stop = max(-1, start - 1); + eofToken = tokenFactory.create(Token.EOF, "EOF", Pair(this, inputStream), + Token.DEFAULT_CHANNEL, start, stop, line, charPositionInLine); + } + + return eofToken; + } + + Token t = tokens[i]; + if (i == tokens.length - 1 && t.type == Token.EOF) { + eofToken = t; + } + + i++; + return t; + } + + /** + * {@inheritDoc} + */ + + int get line { + if (i < tokens.length) { + return tokens[i].line; + } else if (eofToken != null) { + return eofToken.line; + } else if (tokens.length > 0) { + // have to calculate the result from the line/column of the previous + // token, along with the text of the token. + Token lastToken = tokens[tokens.length - 1]; + int line = lastToken.line; + + String tokenText = lastToken.text; + if (tokenText != null) { + for (int i = 0; i < tokenText.length; i++) { + if (tokenText[i] == '\n') { + line++; + } + } + } + + // if no text is available, assume the token did not contain any newline characters. + return line; + } + + // only reach this if tokens is empty, meaning EOF occurs at the first + // position in the input + return 1; + } + + /** + * {@inheritDoc} + */ + + CharStream get inputStream { + if (i < tokens.length) { + return tokens[i].inputStream; + } else if (eofToken != null) { + return eofToken.inputStream; + } else if (tokens.length > 0) { + return tokens[tokens.length - 1].inputStream; + } + + // no input stream information is available + return null; + } + + /** + * The name of the input source. If this value is null, a call to + * {@link #getSourceName} should return the source name used to create the + * the next token in {@link #tokens} (or the previous token if the end of + * the input has been reached). + */ + String get sourceName =>_sourceName ?? inputStream?.sourceName ?? "List"; +} diff --git a/runtime/Dart/lib/src/token_stream.dart b/runtime/Dart/lib/src/token_stream.dart new file mode 100644 index 000000000..9e6dc9b72 --- /dev/null +++ b/runtime/Dart/lib/src/token_stream.dart @@ -0,0 +1,650 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ +import 'input_stream.dart'; +import 'interval_set.dart'; +import 'lexer.dart'; +import 'rule_context.dart'; +import 'token.dart'; +import 'token_source.dart'; + +/** + * An [IntStream] whose symbols are [Token] instances. + */ +abstract class TokenStream extends IntStream { + /** + * Get the [Token] instance associated with the value returned by + * {@link #LA LA(k)}. This method has the same pre- and post-conditions as + * {@link IntStream#LA}. In addition, when the preconditions of this method + * are met, the return value is non-null and the value of + * {@code LT(k).getType()==LA(k)}. + * + * @see IntStream#LA + */ + Token LT(int k); + + /** + * Gets the [Token] at the specified [index] in the stream. When + * the preconditions of this method are met, the return value is non-null. + * + *

The preconditions for this method are the same as the preconditions of + * {@link IntStream#seek}. If the behavior of {@code seek(index)} is + * unspecified for the current state and given [index], then the + * behavior of this method is also unspecified.

+ * + *

The symbol referred to by [index] differs from {@code seek()} only + * in the case of filtering streams where [index] lies before the end + * of the stream. Unlike {@code seek()}, this method does not adjust + * [index] to point to a non-ignored symbol.

+ * + * @throws IllegalArgumentException if {code index} is less than 0 + * @throws UnsupportedOperationException if the stream does not support + * retrieving the token at the specified index + */ + Token get(int index); + + /** + * Gets the underlying [TokenSource] which provides tokens for this + * stream. + */ + TokenSource get tokenSource; + + /** + * Return the text of all tokens within the specified [interval]. This + * method behaves like the following code (including potential exceptions + * for violating preconditions of {@link #get}, but may be optimized by the + * specific implementation. + * + *
+   * TokenStream stream = ...;
+   * String text = "";
+   * for (int i = interval.a; i <= interval.b; i++) {
+   *   text += stream.get(i).getText();
+   * }
+   * 
+ * + *
+   * TokenStream stream = ...;
+   * String text = stream.getText(new Interval(0, stream.length));
+   * 
+ * + *
+   * TokenStream stream = ...;
+   * String text = stream.getText(ctx.getSourceInterval());
+   * 
+ * + * @param interval The interval of tokens within this stream to get text + * for. + * @return The text of all tokens / within the specified interval in this + * stream. + */ + String getText([Interval interval]); + + String get text; + + /** + * Return the text of all tokens in the source interval of the specified + * context. This method behaves like the following code, including potential + * exceptions from the call to {@link #getText(Interval)}, but may be + * optimized by the specific implementation. + * + *

If {@code ctx.getSourceInterval()} does not return a valid interval of + * tokens provided by this stream, the behavior is unspecified.

+ * + * @param ctx The context providing the source interval of tokens to get + * text for. + * @return The text of all tokens within the source interval of [ctx]. + */ + String getTextFromCtx(RuleContext ctx); + + /** + * Return the text of all tokens in this stream between [start] and + * [stop] (inclusive). + * + *

If the specified [start] or [stop] token was not provided by + * this stream, or if the [stop] occurred before the [start] + * token, the behavior is unspecified.

+ * + *

For streams which ensure that the {@link Token#getTokenIndex} method is + * accurate for all of its provided tokens, this method behaves like the + * following code. Other streams may implement this method in other ways + * provided the behavior is consistent with this at a high level.

+ * + *
+   * TokenStream stream = ...;
+   * String text = "";
+   * for (int i = start.getTokenIndex(); i <= stop.getTokenIndex(); i++) {
+   *   text += stream.get(i).getText();
+   * }
+   * 
+ * + * @param start The first token in the interval to get text for. + * @param stop The last token in the interval to get text for (inclusive). + * @return The text of all tokens lying between the specified [start] + * and [stop] tokens. + * + * @throws UnsupportedOperationException if this stream does not support + * this method for the specified tokens + */ + String getTextRange(Token start, Token stop); +} + +/** + * This implementation of [TokenStream] loads tokens from a + * [TokenSource] on-demand, and places the tokens in a buffer to provide + * access to any previous token by index. + * + *

+ * This token stream ignores the value of {@link Token#getChannel}. If your + * parser requires the token stream filter tokens to only those on a particular + * channel, such as {@link Token#DEFAULT_CHANNEL} or + * {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a + * [CommonTokenStream].

+ */ +class BufferedTokenStream implements TokenStream { + /** + * The [TokenSource] from which tokens for this stream are fetched. + */ + TokenSource _tokenSource; + + /** + * A collection of all tokens fetched from the token source. The list is + * considered a complete view of the input once {@link #fetchedEOF} is set + * to [true]. + */ + List tokens = List(); + + /** + * The index into [tokens] of the current token (next token to [consume]). + * [tokens][p] should be [LT(1)]. + * + *

This field is set to -1 when the stream is first constructed or when + * [tokenSource] is set, indicating that the first token has + * not yet been fetched from the token source. For additional information, + * see the documentation of [IntStream] for a description of + * Initializing Methods.

+ */ + int p = -1; + + /** + * Indicates whether the [Token.EOF] token has been fetched from + * [tokenSource] and added to [tokens]. This field improves + * performance for the following cases: + * + *