Merge https://github.com/antlr/antlr4

2017-06-26 17:22:44 +10:00 · 2017-06-26 17:22:44 +10:00 · 0c4473ea1a
parent cdfe310c5d ff13553a2b
commit 0c4473ea1a
12 changed files with 72 additions and 9 deletions
--- a/contributors.txt
+++ b/contributors.txt
@ -149,3 +149,4 @@ YYYY/MM/DD, github id, Full name, email
 2017/05/26, waf, Will Fuqua, wafuqua@gmail.com
 2017/05/29, kosak, Corey Kosak, kosak@kosak.com
 2017/06/10, jm-mikkelsen, Jan Martin Mikkelsen, janm@transactionware.com
+2017/06/25, alimg, Alim Gökkaya, alim.gokkaya@gmail.com
--- a/doc/javascript-target.md
+++ b/doc/javascript-target.md
@ -96,7 +96,7 @@ Now a fully functioning script might look like the following:

 ```javascript
   var input = "your text to parse here"
-   var chars = CharStreams.fromString(input);
+   var chars = new antlr4.InputStream(input);
   var lexer = new MyGrammarLexer.MyGrammarLexer(chars);
   var tokens  = new antlr4.CommonTokenStream(lexer);
   var parser = new MyGrammarParser.MyGrammarParser(tokens);
--- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LexerExecDescriptors.java
+++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LexerExecDescriptors.java
@ -73,6 +73,29 @@ public class LexerExecDescriptors {

 	}

+	/* regression test for antlr/antlr4#1925 */
+	public static class UnicodeCharSet extends BaseLexerTestDescriptor {
+		public String input = "均";
+		/**
+		 [@0,0:0='均',<1>,1:0]
+		 [@1,1:0='<EOF>',<-1>,1:1]
+		 */
+		@CommentHasStringValue
+		public String output;
+
+		public String errors = null;
+		public String startRule = "";
+		public String grammarName = "L";
+
+		/**
+		 lexer grammar L;
+		 ID : ([A-Z_]|'\u0100'..'\uFFFE') ([A-Z_0-9]|'\u0100'..'\uFFFE')*;
+		 */
+		@CommentHasStringValue
+		public String grammar;
+
+	}
+
 	public static class CharSetInSet extends BaseLexerTestDescriptor {
 		public String input = "a x";
 		/**
--- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserErrorsDescriptors.java
+++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserErrorsDescriptors.java
@ -581,4 +581,41 @@ public class ParserErrorsDescriptors {
 		public String grammar;

 	}
+
+	public static class TokenMismatch3 extends BaseParserTestDescriptor {
+		public String input = "";
+		public String output = null;
+		public String errors = "line 1:0 mismatched input '<EOF>' expecting {'(', BOOLEAN_LITERAL, ID, '$'}\n";
+		public String startRule = "expression";
+		public String grammarName = "T";
+
+		/**
+		 grammar T;
+
+		 expression
+		 :   value
+		 |   expression op=AND expression
+		 |   expression op=OR expression
+		 ;
+		 value
+		 :   BOOLEAN_LITERAL
+		 |   ID
+		 |   ID1
+		 |   '(' expression ')'
+		 ;
+
+		 AND : '&&';
+		 OR  : '||';
+
+		 BOOLEAN_LITERAL : 'true' | 'false';
+
+		 ID  : [a-z]+;
+		 ID1 : '$';
+
+		 WS  : [ \t\r\n]+ -> skip ;
+		 */
+		@CommentHasStringValue
+		public String grammar;
+
+	}
 }
--- a/runtime-testsuite/test/org/antlr/v4/test/runtime/python2/BasePython2Test.java
+++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/python2/BasePython2Test.java
@ -38,7 +38,7 @@ public class BasePython2Test extends BasePythonTest {
 						+ "        lexer = <lexerName>(input, output)\n"
 						+ "        stream = CommonTokenStream(lexer)\n"
 						+ "        stream.fill()\n"
-						+ "        [ print(t, file=output) for t in stream.tokens ]\n"
+						+ "        [ print(unicode(t), file=output) for t in stream.tokens ]\n"
 						+ (showDFA ? "        print(lexer._interp.decisionToDFA[Lexer.DEFAULT_MODE].toLexerString(), end='', file=output)\n"
 								: "") + "\n" + "if __name__ == '__main__':\n"
 						+ "    main(sys.argv)\n" + "\n");
--- a/runtime/CSharp/README.md
+++ b/runtime/CSharp/README.md
@ -51,7 +51,7 @@ This is just a quick start. The tool has many useful options to control generati

 The Antlr 4 standard runtime for C# is now available from NuGet.
 We trust that you know how to do add NuGet references to your project :-).
-The package id is Antlr.4.Runtime. We do not support other packages.
+The package id is Antlr4.Runtime.Standard. We do not support other packages.


 ### Step 6: You're done!
--- a/runtime/Go/antlr/atn_deserializer.go
+++ b/runtime/Go/antlr/atn_deserializer.go
@ -125,8 +125,10 @@ func (a *ATNDeserializer) reset(data []rune) {
 		// Don't adjust the first value since that's the version number
 		if i == 0 {
 			temp[i] = c
-		} else {
+		} else if c > 1 {
 			temp[i] = c - 2
+		} else {
+		    temp[i] = c + 65533
 		}
 	}

--- a/runtime/JavaScript/src/antlr4/atn/ATNDeserializer.js
+++ b/runtime/JavaScript/src/antlr4/atn/ATNDeserializer.js
@ -138,7 +138,7 @@ ATNDeserializer.prototype.deserialize = function(data) {
 ATNDeserializer.prototype.reset = function(data) {
 	var adjust = function(c) {
        var v = c.charCodeAt(0);
-        return v>1  ? v-2 : -1;
+        return v>1  ? v-2 : v + 65533;
 	};
    var temp = data.split("").map(adjust);
    // don't adjust the first value since that's the version number
--- a/runtime/Python2/src/antlr4/IntervalSet.py
+++ b/runtime/Python2/src/antlr4/IntervalSet.py
@ -193,7 +193,7 @@ class IntervalSet(object):
        elif a==Token.EPSILON:
            return u"<EPSILON>"
        else:
-            if a<len(literalNames):
+            if a<len(literalNames) and literalNames[a] != u"<INVALID>":
                return literalNames[a]
            if a<len(symbolicNames):
                return symbolicNames[a]
--- a/runtime/Python2/src/antlr4/atn/ATNDeserializer.py
+++ b/runtime/Python2/src/antlr4/atn/ATNDeserializer.py
@ -86,7 +86,7 @@ class ATNDeserializer (object):
    def reset(self, data):
        def adjust(c):
            v = ord(c)
-            return v-2 if v>1 else -1
+            return v-2 if v>1 else v + 65533
        temp = [ adjust(c) for c in data ]
        # don't adjust the first value since that's the version number
        temp[0] = ord(data[0])
--- a/runtime/Python3/src/antlr4/IntervalSet.py
+++ b/runtime/Python3/src/antlr4/IntervalSet.py
@ -173,7 +173,7 @@ class IntervalSet(object):
        elif a==Token.EPSILON:
            return "<EPSILON>"
        else:
-            if a<len(literalNames):
+            if a<len(literalNames) and literalNames[a] != "<INVALID>":
                return literalNames[a]
            if a<len(symbolicNames):
                return symbolicNames[a]
--- a/runtime/Python3/src/antlr4/atn/ATNDeserializer.py
+++ b/runtime/Python3/src/antlr4/atn/ATNDeserializer.py
@ -86,7 +86,7 @@ class ATNDeserializer (object):
    def reset(self, data:str):
        def adjust(c):
            v = ord(c)
-            return v-2 if v>1 else -1
+            return v-2 if v>1 else v + 65533
        temp = [ adjust(c) for c in data ]
        # don't adjust the first value since that's the version number
        temp[0] = ord(data[0])