diff --git a/doc/javascript-target.md b/doc/javascript-target.md index 144aa5f81..3473e95b5 100644 --- a/doc/javascript-target.md +++ b/doc/javascript-target.md @@ -96,7 +96,7 @@ Now a fully functioning script might look like the following: ```javascript var input = "your text to parse here" - var chars = CharStreams.fromString(input); + var chars = new antlr4.InputStream(input); var lexer = new MyGrammarLexer.MyGrammarLexer(chars); var tokens = new antlr4.CommonTokenStream(lexer); var parser = new MyGrammarParser.MyGrammarParser(tokens); diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LexerExecDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LexerExecDescriptors.java index 435bf4299..60cb86cb4 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LexerExecDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LexerExecDescriptors.java @@ -73,6 +73,29 @@ public class LexerExecDescriptors { } + /* regression test for antlr/antlr4#1925 */ + public static class UnicodeCharSet extends BaseLexerTestDescriptor { + public String input = "均"; + /** + [@0,0:0='均',<1>,1:0] + [@1,1:0='',<-1>,1:1] + */ + @CommentHasStringValue + public String output; + + public String errors = null; + public String startRule = ""; + public String grammarName = "L"; + + /** + lexer grammar L; + ID : ([A-Z_]|'\u0100'..'\uFFFE') ([A-Z_0-9]|'\u0100'..'\uFFFE')*; + */ + @CommentHasStringValue + public String grammar; + + } + public static class CharSetInSet extends BaseLexerTestDescriptor { public String input = "a x"; /** diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/python2/BasePython2Test.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/python2/BasePython2Test.java index 34c60a4ce..aa0eea7e2 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/python2/BasePython2Test.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/python2/BasePython2Test.java @@ -38,7 +38,7 @@ public class BasePython2Test extends BasePythonTest { + " lexer = (input, output)\n" + " stream = CommonTokenStream(lexer)\n" + " stream.fill()\n" - + " [ print(t, file=output) for t in stream.tokens ]\n" + + " [ print(unicode(t), file=output) for t in stream.tokens ]\n" + (showDFA ? " print(lexer._interp.decisionToDFA[Lexer.DEFAULT_MODE].toLexerString(), end='', file=output)\n" : "") + "\n" + "if __name__ == '__main__':\n" + " main(sys.argv)\n" + "\n"); diff --git a/runtime/CSharp/README.md b/runtime/CSharp/README.md index 1e6e243ba..f60be7e23 100644 --- a/runtime/CSharp/README.md +++ b/runtime/CSharp/README.md @@ -51,7 +51,7 @@ This is just a quick start. The tool has many useful options to control generati The Antlr 4 standard runtime for C# is now available from NuGet. We trust that you know how to do add NuGet references to your project :-). -The package id is Antlr.4.Runtime. We do not support other packages. +The package id is Antlr4.Runtime.Standard. We do not support other packages. ### Step 6: You're done! diff --git a/runtime/Go/antlr/atn_deserializer.go b/runtime/Go/antlr/atn_deserializer.go index 2ac6953a3..884d39cf7 100644 --- a/runtime/Go/antlr/atn_deserializer.go +++ b/runtime/Go/antlr/atn_deserializer.go @@ -125,8 +125,10 @@ func (a *ATNDeserializer) reset(data []rune) { // Don't adjust the first value since that's the version number if i == 0 { temp[i] = c - } else { + } else if c > 1 { temp[i] = c - 2 + } else { + temp[i] = c + 65533 } } diff --git a/runtime/JavaScript/src/antlr4/atn/ATNDeserializer.js b/runtime/JavaScript/src/antlr4/atn/ATNDeserializer.js index 0b2228c34..3d71d4242 100644 --- a/runtime/JavaScript/src/antlr4/atn/ATNDeserializer.js +++ b/runtime/JavaScript/src/antlr4/atn/ATNDeserializer.js @@ -138,7 +138,7 @@ ATNDeserializer.prototype.deserialize = function(data) { ATNDeserializer.prototype.reset = function(data) { var adjust = function(c) { var v = c.charCodeAt(0); - return v>1 ? v-2 : -1; + return v>1 ? v-2 : v + 65533; }; var temp = data.split("").map(adjust); // don't adjust the first value since that's the version number diff --git a/runtime/Python2/src/antlr4/atn/ATNDeserializer.py b/runtime/Python2/src/antlr4/atn/ATNDeserializer.py index 96b5db31f..6f9c0c6cf 100644 --- a/runtime/Python2/src/antlr4/atn/ATNDeserializer.py +++ b/runtime/Python2/src/antlr4/atn/ATNDeserializer.py @@ -86,7 +86,7 @@ class ATNDeserializer (object): def reset(self, data): def adjust(c): v = ord(c) - return v-2 if v>1 else -1 + return v-2 if v>1 else v + 65533 temp = [ adjust(c) for c in data ] # don't adjust the first value since that's the version number temp[0] = ord(data[0]) diff --git a/runtime/Python3/src/antlr4/atn/ATNDeserializer.py b/runtime/Python3/src/antlr4/atn/ATNDeserializer.py index 9fa18900f..cd0bb661a 100644 --- a/runtime/Python3/src/antlr4/atn/ATNDeserializer.py +++ b/runtime/Python3/src/antlr4/atn/ATNDeserializer.py @@ -86,7 +86,7 @@ class ATNDeserializer (object): def reset(self, data:str): def adjust(c): v = ord(c) - return v-2 if v>1 else -1 + return v-2 if v>1 else v + 65533 temp = [ adjust(c) for c in data ] # don't adjust the first value since that's the version number temp[0] = ord(data[0]) diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg index 43309bd84..6342b660e 100644 --- a/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg +++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg @@ -1024,7 +1024,7 @@ ContextRuleListIndexedGetterDecl(r) ::= << >> -LexerRuleContext() ::= "RuleContext" +LexerRuleContext() ::= "antlr4::RuleContext" // The rule context name is the rule followed by a suffix; e.g. r becomes rContext. RuleContextNameSuffix() ::= "Context"