Emit language-specific Unicode escapes when generating code containing non-ASCII Unicode values

2017-02-10 14:00:33 -08:00 · 2017-02-10 14:00:33 -08:00 · 0049d6d9ae
parent 182f3c4647
commit 0049d6d9ae
11 changed files with 235 additions and 245 deletions
--- a/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeEscapes.java
+++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeEscapes.java
@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+package org.antlr.v4.test.tool;
+
+import org.antlr.v4.codegen.UnicodeEscapes;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestUnicodeEscapes {
+	@Test
+	public void latinJavaEscape() {
+		StringBuilder sb = new StringBuilder();
+		UnicodeEscapes.appendJavaStyleEscapedCodePoint(0x0061, sb);
+		assertEquals("\\u0061", sb.toString());
+	}
+
+	@Test
+	public void latinPythonEscape() {
+		StringBuilder sb = new StringBuilder();
+		UnicodeEscapes.appendPythonStyleEscapedCodePoint(0x0061, sb);
+		assertEquals("\\u0061", sb.toString());
+	}
+
+	@Test
+	public void latinSwiftEscape() {
+		StringBuilder sb = new StringBuilder();
+		UnicodeEscapes.appendSwiftStyleEscapedCodePoint(0x0061, sb);
+		assertEquals("\\u{0061}", sb.toString());
+	}
+
+	@Test
+	public void bmpJavaEscape() {
+		StringBuilder sb = new StringBuilder();
+		UnicodeEscapes.appendJavaStyleEscapedCodePoint(0xABCD, sb);
+		assertEquals("\\uABCD", sb.toString());
+	}
+
+	@Test
+	public void bmpPythonEscape() {
+		StringBuilder sb = new StringBuilder();
+		UnicodeEscapes.appendPythonStyleEscapedCodePoint(0xABCD, sb);
+		assertEquals("\\uABCD", sb.toString());
+	}
+
+	@Test
+	public void bmpSwiftEscape() {
+		StringBuilder sb = new StringBuilder();
+		UnicodeEscapes.appendSwiftStyleEscapedCodePoint(0xABCD, sb);
+		assertEquals("\\u{ABCD}", sb.toString());
+	}
+
+	@Test
+	public void smpJavaEscape() {
+		StringBuilder sb = new StringBuilder();
+		UnicodeEscapes.appendJavaStyleEscapedCodePoint(0x1F4A9, sb);
+		assertEquals("\\uD83D\\uDCA9", sb.toString());
+	}
+
+	@Test
+	public void smpPythonEscape() {
+		StringBuilder sb = new StringBuilder();
+		UnicodeEscapes.appendPythonStyleEscapedCodePoint(0x1F4A9, sb);
+		assertEquals("\\U0001F4A9", sb.toString());
+	}
+
+	@Test
+	public void smpSwiftEscape() {
+		StringBuilder sb = new StringBuilder();
+		UnicodeEscapes.appendSwiftStyleEscapedCodePoint(0x1F4A9, sb);
+		assertEquals("\\u{1F4A9}", sb.toString());
+	}
+}
--- a/tool/src/org/antlr/v4/codegen/Target.java
+++ b/tool/src/org/antlr/v4/codegen/Target.java
@ -9,6 +9,7 @@ package org.antlr.v4.codegen;
 import org.antlr.v4.Tool;
 import org.antlr.v4.codegen.model.RuleFunction;
 import org.antlr.v4.codegen.model.SerializedATN;
+import org.antlr.v4.misc.CharSupport;
 import org.antlr.v4.misc.Utils;
 import org.antlr.v4.parse.ANTLRParser;
 import org.antlr.v4.runtime.RuntimeMetaData;
@ -146,17 +147,22 @@ public abstract class Target {
 		if ( quoted ) {
 			buf.append('"');
 		}
-		for (int i=0; i<s.length(); i++) {
-			int c = s.charAt(i);
+		for (int i=0; i<s.length(); ) {
+			int c = s.codePointAt(i);
 			if ( c!='\'' && // don't escape single quotes in strings for java
 				 c<targetCharValueEscape.length &&
 				 targetCharValueEscape[c]!=null )
 			{
 				buf.append(targetCharValueEscape[c]);
 			}
-			else {
-				buf.append((char)c);
+			else if (shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(c)) {
+				appendUnicodeEscapedCodePoint(i, buf);
 			}
+			else
+			{
+				buf.appendCodePoint(c);
+			}
+			i += Character.charCount(c);
 		}
 		if ( quoted ) {
 			buf.append('"');
@ -164,6 +170,12 @@ public abstract class Target {
 		return buf.toString();
 	}

+	/**
+	 * Escape the Unicode code point appropriately for this language
+	 * and append the escaped value to {@code sb}.
+	 */
+	abstract protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb);
+
 	public String getTargetStringLiteralFromString(String s) {
 		return getTargetStringLiteralFromString(s, true);
 	}
@ -194,15 +206,19 @@ public abstract class Target {

 		if ( addQuotes ) sb.append('"');

-		for (int i = 1; i < is.length() -1; i++) {
-			if  (is.charAt(i) == '\\') {
+		for (int i = 1; i < is.length() -1; ) {
+			int codePoint = is.codePointAt(i);
+			int toAdvance = Character.charCount(codePoint);
+			if  (codePoint == '\\') {
 				// Anything escaped is what it is! We assume that
 				// people know how to escape characters correctly. However
 				// we catch anything that does not need an escape in Java (which
 				// is what the default implementation is dealing with and remove
 				// the escape. The C target does this for instance.
 				//
-				switch (is.charAt(i+1)) {
+				int escapedCodePoint = is.codePointAt(i+toAdvance);
+				toAdvance++;
+				switch (escapedCodePoint) {
 					// Pass through any escapes that Java also needs
 					//
 					case    '"':
@ -214,32 +230,43 @@ public abstract class Target {
 					case    '\\':
 						// Pass the escape through
 						sb.append('\\');
+						sb.appendCodePoint(escapedCodePoint);
 						break;

-					case    'u':    // Assume unnnn
-						// Pass the escape through as double \\
-						// so that Java leaves as \u0000 string not char
-						sb.append('\\');
-						sb.append('\\');
-						break;
-
-					default:
-						// Remove the escape by virtue of not adding it here
-						// Thus \' becomes ' and so on
-						break;
+					case    'u':    // Either unnnn or u{nnnnnn}
+						if (is.charAt(i+toAdvance) == '{') {
+							while (is.charAt(i+toAdvance) != '}') {
+								toAdvance++;
 							}
-
-				// Go past the \ character
-				i++;
+							toAdvance++;
 						} else {
-				// Characters that don't need \ in ANTLR 'strings' but do in Java
-				if (is.charAt(i) == '"') {
-					// We need to escape " in Java
-					sb.append('\\');
+							toAdvance += 4;
+						}
+						String fullEscape = is.substring(i, i + toAdvance);
+						appendUnicodeEscapedCodePoint(
+								CharSupport.getCharValueFromCharInGrammarLiteral(fullEscape),
+								sb);
+						break;
+					default:
+						if (shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(escapedCodePoint)) {
+							appendUnicodeEscapedCodePoint(escapedCodePoint, sb);
+						} else {
+							sb.appendCodePoint(escapedCodePoint);
+						}
+						break;
+				}
+			} else {
+				if (codePoint == 0x22) {
+					// ANTLR doesn't escape " in literal strings,
+					// but every other language needs to do so.
+					sb.append("\\\"");
+				} else if (shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(codePoint)) {
+					appendUnicodeEscapedCodePoint(codePoint, sb);
+				} else {
+					sb.appendCodePoint(codePoint);
 				}
 			}
-			// Add in the next character, which may have been escaped
-			sb.append(is.charAt(i));
+			i += toAdvance;
 		}

 		if ( addQuotes ) sb.append('"');
@ -247,6 +274,19 @@ public abstract class Target {
 		return sb.toString();
 	}

+	private static boolean shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(int codePoint) {
+		// We don't want anyone passing 0x0A (newline) or 0x22
+		// (double-quote) here because Java treats \\u000A as
+		// a literal newline and \\u0022 as a literal
+		// double-quote, so Unicode escaping doesn't help.
+		assert codePoint != 0x0A && codePoint != 0x22;
+
+		return
+			codePoint < 0x20  || // control characters up to but not including space
+			codePoint == 0x5C || // backslash
+			codePoint >= 0x7F;   // DEL and beyond (keeps source code 7-bit US-ASCII)
+	}
+
 	/** Assume 16-bit char */
 	public String encodeIntAsCharEscape(int v) {
 		if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) {
--- a/tool/src/org/antlr/v4/codegen/UnicodeEscapes.java
+++ b/tool/src/org/antlr/v4/codegen/UnicodeEscapes.java
@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+package org.antlr.v4.codegen;
+
+/**
+ * Utility class to escape Unicode code points using various
+ * languages' syntaxes.
+ */
+public abstract class UnicodeEscapes {
+	static public void appendJavaStyleEscapedCodePoint(int codePoint, StringBuilder sb) {
+		if (Character.isSupplementaryCodePoint(codePoint)) {
+			// char is not an 'integral' type, so we have to explicitly convert
+			// to int before passing to the %X formatter or else it throws.
+			sb.append(String.format("\\u%04X", (int)Character.highSurrogate(codePoint)));
+			sb.append(String.format("\\u%04X", (int)Character.lowSurrogate(codePoint)));
+		} else {
+			sb.append(String.format("\\u%04X", codePoint));
+		}
+	}
+
+	static public void appendPythonStyleEscapedCodePoint(int codePoint, StringBuilder sb) {
+		if (Character.isSupplementaryCodePoint(codePoint)) {
+			sb.append(String.format("\\U%08X", codePoint));
+		} else {
+			sb.append(String.format("\\u%04X", codePoint));
+		}
+	}
+
+	static public void appendSwiftStyleEscapedCodePoint(int codePoint, StringBuilder sb) {
+		sb.append(String.format("\\u{%04X}", codePoint));
+	}
+}
--- a/tool/src/org/antlr/v4/codegen/target/CSharpTarget.java
+++ b/tool/src/org/antlr/v4/codegen/target/CSharpTarget.java
@ -7,6 +7,7 @@ package org.antlr.v4.codegen.target;

 import org.antlr.v4.codegen.CodeGenerator;
 import org.antlr.v4.codegen.Target;
+import org.antlr.v4.codegen.UnicodeEscapes;
 import org.antlr.v4.tool.ErrorType;
 import org.antlr.v4.tool.ast.GrammarAST;
 import org.stringtemplate.v4.NumberRenderer;
@ -36,78 +37,16 @@ public class CSharpTarget extends Target {
 			throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v));
 		}

+		String formatted;
 		if (v >= 0 && v < targetCharValueEscape.length && targetCharValueEscape[v] != null) {
-			return targetCharValueEscape[v];
-		}
-
-		if (v >= 0x20 && v < 127 && (v < '0' || v > '9') && (v < 'a' || v > 'f') && (v < 'A' || v > 'F')) {
-			return String.valueOf((char)v);
-		}
-
-		return String.format("\\x%X", v & 0xFFFF);
-	}
-
-	@Override
-	public String getTargetStringLiteralFromANTLRStringLiteral(
-		CodeGenerator generator,
-		String literal, boolean addQuotes)
-	{
-		StringBuilder sb = new StringBuilder();
-		String is = literal;
-
-		if ( addQuotes ) sb.append('"');
-
-		for (int i = 1; i < is.length() -1; i++) {
-			if  (is.charAt(i) == '\\') {
-				// Anything escaped is what it is! We assume that
-				// people know how to escape characters correctly. However
-				// we catch anything that does not need an escape in Java (which
-				// is what the default implementation is dealing with and remove
-				// the escape. The C target does this for instance.
-				//
-				switch (is.charAt(i+1)) {
-					// Pass through any escapes that Java also needs
-					//
-					case    '"':
-					case    'n':
-					case    'r':
-					case    't':
-					case    'b':
-					case    'f':
-					case    '\\':
-						// Pass the escape through
-						sb.append('\\');
-						break;
-
-					case    'u':    // Assume unnnn
-						// Pass the escape through as double \\
-						// so that Java leaves as \u0000 string not char
-						sb.append('\\');
-						sb.append('\\');
-						break;
-
-					default:
-						// Remove the escape by virtue of not adding it here
-						// Thus \' becomes ' and so on
-						break;
-				}
-
-				// Go past the \ character
-				i++;
+			formatted = targetCharValueEscape[v];
+		} else if (v >= 0x20 && v < 127 && (v < '0' || v > '9') && (v < 'a' || v > 'f') && (v < 'A' || v > 'F')) {
+			formatted = Character.toString((char)v);
 		} else {
-				// Characters that don't need \ in ANTLR 'strings' but do in Java
-				if (is.charAt(i) == '"') {
-					// We need to escape " in Java
-					sb.append('\\');
-				}
-			}
-			// Add in the next character, which may have been escaped
-			sb.append(is.charAt(i));
+			formatted = String.format("\\x%X", v & 0xFFFF);
 		}

-		if ( addQuotes ) sb.append('"');
-
-		return sb.toString();
+		return "'" + formatted + "'";
 	}

 	@Override
@ -150,4 +89,9 @@ public class CSharpTarget extends Target {
 		return result;
 	}

+	@Override
+	protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) {
+		// C# and Python share the same escaping style.
+		UnicodeEscapes.appendPythonStyleEscapedCodePoint(codePoint, sb);
+	}
 }
--- a/tool/src/org/antlr/v4/codegen/target/CppTarget.java
+++ b/tool/src/org/antlr/v4/codegen/target/CppTarget.java
@ -6,6 +6,7 @@

 package org.antlr.v4.codegen.target;

+import org.antlr.v4.codegen.UnicodeEscapes;
 import org.antlr.v4.codegen.CodeGenerator;
 import org.antlr.v4.codegen.Target;
 import org.antlr.v4.tool.ErrorType;
@ -68,81 +69,6 @@ public class CppTarget extends Target {
 		badWords.add("parserRule");
 	}

-	/**
-	 * {@inheritDoc}
-	 * <p/>
-	 * For C++, this is the translation {@code 'a\n"'} &rarr; {@code "a\n\""}.
-	 * Expect single quotes around the incoming literal. Just flip the quotes
-	 * and replace double quotes with {@code \"}.
-	 * <p/>
-	 * Note that we have decided to allow people to use '\"' without penalty, so
-	 * we must build the target string in a loop as {@link String#replace}
-	 * cannot handle both {@code \"} and {@code "} without a lot of messing
-	 * around.
-	 */
-	@Override
-	public String getTargetStringLiteralFromANTLRStringLiteral(
-		CodeGenerator generator,
-		String literal, boolean addQuotes)
-	{
-		StringBuilder sb = new StringBuilder();
-		String is = literal;
-
-		if ( addQuotes ) sb.append('"');
-
-		for (int i = 1; i < is.length() -1; i++) {
-			if  (is.charAt(i) == '\\') {
-				// Anything escaped is what it is! We assume that
-				// people know how to escape characters correctly. However
-				// we catch anything that does not need an escape in Java (which
-				// is what the default implementation is dealing with and remove
-				// the escape. The C target does this for instance.
-				//
-				switch (is.charAt(i+1)) {
-					// Pass through any escapes that Java also needs
-					//
-					case    '"':
-					case    'n':
-					case    'r':
-					case    't':
-					case    'b':
-					case    'f':
-					case    '\\':
-						// Pass the escape through
-						sb.append('\\');
-						break;
-
-					case    'u':    // Assume unnnn
-						// Pass the escape through as double \\
-						// so that Java leaves as \u0000 string not char
-						sb.append('\\');
-						sb.append('\\');
-						break;
-
-					default:
-						// Remove the escape by virtue of not adding it here
-						// Thus \' becomes ' and so on
-						break;
-				}
-
-				// Go past the \ character
-				i++;
-			} else {
-				// Characters that don't need \ in ANTLR 'strings' but do in Java
-				if (is.charAt(i) == '"') {
-					// We need to escape " in Java
-					sb.append('\\');
-				}
-			}
-			// Add in the next character, which may have been escaped
-			sb.append(is.charAt(i));
-		}
-
-		if ( addQuotes ) sb.append('"');
-
-		return sb.toString();
-	}
-
 	@Override
 	public String encodeIntAsCharEscape(int v) {
 		return "0x" + Integer.toHexString(v) + ", ";
@ -232,4 +158,10 @@ public class CppTarget extends Target {

 		return result;
 	}
+
+	@Override
+	protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) {
+		// C99 and Python share the same escaping style.
+		UnicodeEscapes.appendPythonStyleEscapedCodePoint(codePoint, sb);
+	}
 }
--- a/tool/src/org/antlr/v4/codegen/target/GoTarget.java
+++ b/tool/src/org/antlr/v4/codegen/target/GoTarget.java
@ -8,6 +8,7 @@ package org.antlr.v4.codegen.target;

 import org.antlr.v4.codegen.CodeGenerator;
 import org.antlr.v4.codegen.Target;
+import org.antlr.v4.codegen.UnicodeEscapes;
 import org.antlr.v4.parse.ANTLRParser;
 import org.antlr.v4.tool.Grammar;
 import org.antlr.v4.tool.ast.GrammarAST;
@ -214,5 +215,10 @@ public class GoTarget extends Target {
 		}

 	}
-}

+	@Override
+	protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) {
+		// Go and Python share the same escaping style.
+		UnicodeEscapes.appendPythonStyleEscapedCodePoint(codePoint, sb);
+	}
+}
--- a/tool/src/org/antlr/v4/codegen/target/JavaScriptTarget.java
+++ b/tool/src/org/antlr/v4/codegen/target/JavaScriptTarget.java
@ -8,6 +8,7 @@ package org.antlr.v4.codegen.target;

 import org.antlr.v4.codegen.CodeGenerator;
 import org.antlr.v4.codegen.Target;
+import org.antlr.v4.codegen.UnicodeEscapes;
 import org.antlr.v4.tool.ast.GrammarAST;
 import org.stringtemplate.v4.STGroup;
 import org.stringtemplate.v4.StringRenderer;
@ -67,81 +68,6 @@ public class JavaScriptTarget extends Target {
 		badWords.add("parserRule");
 	}

-	/**
-	 * {@inheritDoc}
-	 * <p>
-	 * For Java, this is the translation {@code 'a\n"'} &rarr; {@code "a\n\""}.
-	 * Expect single quotes around the incoming literal. Just flip the quotes
-	 * and replace double quotes with {@code \"}.
-	 * <p>
-	 * Note that we have decided to allow people to use '\"' without penalty, so
-	 * we must build the target string in a loop as {@link String#replace}
-	 * cannot handle both {@code \"} and {@code "} without a lot of messing
-	 * around.
-	 */
-	@Override
-	public String getTargetStringLiteralFromANTLRStringLiteral(
-		CodeGenerator generator,
-		String literal, boolean addQuotes)
-	{
-		StringBuilder sb = new StringBuilder();
-		String is = literal;
-
-		if ( addQuotes ) sb.append('"');
-
-		for (int i = 1; i < is.length() -1; i++) {
-			if  (is.charAt(i) == '\\') {
-				// Anything escaped is what it is! We assume that
-				// people know how to escape characters correctly. However
-				// we catch anything that does not need an escape in Java (which
-				// is what the default implementation is dealing with and remove
-				// the escape. The C target does this for instance.
-				//
-				switch (is.charAt(i+1)) {
-					// Pass through any escapes that Java also needs
-					//
-					case    '"':
-					case    'n':
-					case    'r':
-					case    't':
-					case    'b':
-					case    'f':
-					case    '\\':
-						// Pass the escape through
-						sb.append('\\');
-						break;
-
-					case    'u':    // Assume unnnn
-						// Pass the escape through as double \\
-						// so that Java leaves as \u0000 string not char
-						sb.append('\\');
-						sb.append('\\');
-						break;
-
-					default:
-						// Remove the escape by virtue of not adding it here
-						// Thus \' becomes ' and so on
-						break;
-				}
-
-				// Go past the \ character
-				i++;
-			} else {
-				// Characters that don't need \ in ANTLR 'strings' but do in Java
-				if (is.charAt(i) == '"') {
-					// We need to escape " in Java
-					sb.append('\\');
-				}
-			}
-			// Add in the next character, which may have been escaped
-			sb.append(is.charAt(i));
-		}
-
-		if ( addQuotes ) sb.append('"');
-
-		return sb.toString();
-	}
-
 	@Override
 	public String encodeIntAsCharEscape(int v) {
 		if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) {
@ -210,4 +136,10 @@ public class JavaScriptTarget extends Target {
 	public boolean supportsOverloadedMethods() {
 		return false;
 	}
+
+	@Override
+	protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) {
+		// JavaScript and Java share the same escaping style.
+		UnicodeEscapes.appendJavaStyleEscapedCodePoint(codePoint, sb);
+	}
 }
--- a/tool/src/org/antlr/v4/codegen/target/JavaTarget.java
+++ b/tool/src/org/antlr/v4/codegen/target/JavaTarget.java
@ -9,6 +9,7 @@ package org.antlr.v4.codegen.target;
 import org.antlr.v4.Tool;
 import org.antlr.v4.codegen.CodeGenerator;
 import org.antlr.v4.codegen.Target;
+import org.antlr.v4.codegen.UnicodeEscapes;
 import org.antlr.v4.tool.ast.GrammarAST;
 import org.stringtemplate.v4.STGroup;
 import org.stringtemplate.v4.StringRenderer;
@ -99,4 +100,9 @@ public class JavaTarget extends Target {
 		}

 	}
+
+	@Override
+	protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) {
+		UnicodeEscapes.appendJavaStyleEscapedCodePoint(codePoint, sb);
+	}
 }
--- a/tool/src/org/antlr/v4/codegen/target/Python2Target.java
+++ b/tool/src/org/antlr/v4/codegen/target/Python2Target.java
@ -8,6 +8,7 @@ package org.antlr.v4.codegen.target;

 import org.antlr.v4.codegen.CodeGenerator;
 import org.antlr.v4.codegen.Target;
+import org.antlr.v4.codegen.UnicodeEscapes;
 import org.antlr.v4.tool.ast.GrammarAST;
 import org.stringtemplate.v4.STGroup;
 import org.stringtemplate.v4.StringRenderer;
@ -109,4 +110,9 @@ public class Python2Target extends Target {
 		badWords.add("rule");
 		badWords.add("parserRule");
 	}
+
+	@Override
+	protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) {
+		UnicodeEscapes.appendPythonStyleEscapedCodePoint(codePoint, sb);
+	}
 }
--- a/tool/src/org/antlr/v4/codegen/target/Python3Target.java
+++ b/tool/src/org/antlr/v4/codegen/target/Python3Target.java
@ -8,6 +8,7 @@ package org.antlr.v4.codegen.target;

 import org.antlr.v4.codegen.CodeGenerator;
 import org.antlr.v4.codegen.Target;
+import org.antlr.v4.codegen.UnicodeEscapes;
 import org.antlr.v4.tool.ast.GrammarAST;
 import org.stringtemplate.v4.STGroup;
 import org.stringtemplate.v4.StringRenderer;
@ -115,5 +116,8 @@ public class Python3Target extends Target {
 		badWords.add("parserRule");
 	}

-
+	@Override
+	protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) {
+		UnicodeEscapes.appendPythonStyleEscapedCodePoint(codePoint, sb);
+	}
 }
--- a/tool/src/org/antlr/v4/codegen/target/SwiftTarget.java
+++ b/tool/src/org/antlr/v4/codegen/target/SwiftTarget.java
@ -8,6 +8,7 @@ package org.antlr.v4.codegen.target;

 import org.antlr.v4.codegen.CodeGenerator;
 import org.antlr.v4.codegen.Target;
+import org.antlr.v4.codegen.UnicodeEscapes;
 import org.antlr.v4.runtime.Token;
 import org.antlr.v4.runtime.atn.ATN;
 import org.antlr.v4.runtime.atn.ATNDeserializer;
@ -550,4 +551,9 @@ public class SwiftTarget extends Target {
        }

    }
+
+	@Override
+	protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) {
+		UnicodeEscapes.appendSwiftStyleEscapedCodePoint(codePoint, sb);
+	}
 }