forked from jasder/antlr
Emit language-specific Unicode escapes when generating code containing non-ASCII Unicode values
This commit is contained in:
parent
182f3c4647
commit
0049d6d9ae
|
@ -0,0 +1,78 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
|
||||||
|
* Use of this file is governed by the BSD 3-clause license that
|
||||||
|
* can be found in the LICENSE.txt file in the project root.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.antlr.v4.test.tool;
|
||||||
|
|
||||||
|
import org.antlr.v4.codegen.UnicodeEscapes;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
||||||
|
public class TestUnicodeEscapes {
|
||||||
|
@Test
|
||||||
|
public void latinJavaEscape() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
UnicodeEscapes.appendJavaStyleEscapedCodePoint(0x0061, sb);
|
||||||
|
assertEquals("\\u0061", sb.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void latinPythonEscape() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
UnicodeEscapes.appendPythonStyleEscapedCodePoint(0x0061, sb);
|
||||||
|
assertEquals("\\u0061", sb.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void latinSwiftEscape() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
UnicodeEscapes.appendSwiftStyleEscapedCodePoint(0x0061, sb);
|
||||||
|
assertEquals("\\u{0061}", sb.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void bmpJavaEscape() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
UnicodeEscapes.appendJavaStyleEscapedCodePoint(0xABCD, sb);
|
||||||
|
assertEquals("\\uABCD", sb.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void bmpPythonEscape() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
UnicodeEscapes.appendPythonStyleEscapedCodePoint(0xABCD, sb);
|
||||||
|
assertEquals("\\uABCD", sb.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void bmpSwiftEscape() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
UnicodeEscapes.appendSwiftStyleEscapedCodePoint(0xABCD, sb);
|
||||||
|
assertEquals("\\u{ABCD}", sb.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void smpJavaEscape() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
UnicodeEscapes.appendJavaStyleEscapedCodePoint(0x1F4A9, sb);
|
||||||
|
assertEquals("\\uD83D\\uDCA9", sb.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void smpPythonEscape() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
UnicodeEscapes.appendPythonStyleEscapedCodePoint(0x1F4A9, sb);
|
||||||
|
assertEquals("\\U0001F4A9", sb.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void smpSwiftEscape() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
UnicodeEscapes.appendSwiftStyleEscapedCodePoint(0x1F4A9, sb);
|
||||||
|
assertEquals("\\u{1F4A9}", sb.toString());
|
||||||
|
}
|
||||||
|
}
|
|
@ -9,6 +9,7 @@ package org.antlr.v4.codegen;
|
||||||
import org.antlr.v4.Tool;
|
import org.antlr.v4.Tool;
|
||||||
import org.antlr.v4.codegen.model.RuleFunction;
|
import org.antlr.v4.codegen.model.RuleFunction;
|
||||||
import org.antlr.v4.codegen.model.SerializedATN;
|
import org.antlr.v4.codegen.model.SerializedATN;
|
||||||
|
import org.antlr.v4.misc.CharSupport;
|
||||||
import org.antlr.v4.misc.Utils;
|
import org.antlr.v4.misc.Utils;
|
||||||
import org.antlr.v4.parse.ANTLRParser;
|
import org.antlr.v4.parse.ANTLRParser;
|
||||||
import org.antlr.v4.runtime.RuntimeMetaData;
|
import org.antlr.v4.runtime.RuntimeMetaData;
|
||||||
|
@ -146,17 +147,22 @@ public abstract class Target {
|
||||||
if ( quoted ) {
|
if ( quoted ) {
|
||||||
buf.append('"');
|
buf.append('"');
|
||||||
}
|
}
|
||||||
for (int i=0; i<s.length(); i++) {
|
for (int i=0; i<s.length(); ) {
|
||||||
int c = s.charAt(i);
|
int c = s.codePointAt(i);
|
||||||
if ( c!='\'' && // don't escape single quotes in strings for java
|
if ( c!='\'' && // don't escape single quotes in strings for java
|
||||||
c<targetCharValueEscape.length &&
|
c<targetCharValueEscape.length &&
|
||||||
targetCharValueEscape[c]!=null )
|
targetCharValueEscape[c]!=null )
|
||||||
{
|
{
|
||||||
buf.append(targetCharValueEscape[c]);
|
buf.append(targetCharValueEscape[c]);
|
||||||
}
|
}
|
||||||
else {
|
else if (shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(c)) {
|
||||||
buf.append((char)c);
|
appendUnicodeEscapedCodePoint(i, buf);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
buf.appendCodePoint(c);
|
||||||
|
}
|
||||||
|
i += Character.charCount(c);
|
||||||
}
|
}
|
||||||
if ( quoted ) {
|
if ( quoted ) {
|
||||||
buf.append('"');
|
buf.append('"');
|
||||||
|
@ -164,6 +170,12 @@ public abstract class Target {
|
||||||
return buf.toString();
|
return buf.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Escape the Unicode code point appropriately for this language
|
||||||
|
* and append the escaped value to {@code sb}.
|
||||||
|
*/
|
||||||
|
abstract protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb);
|
||||||
|
|
||||||
public String getTargetStringLiteralFromString(String s) {
|
public String getTargetStringLiteralFromString(String s) {
|
||||||
return getTargetStringLiteralFromString(s, true);
|
return getTargetStringLiteralFromString(s, true);
|
||||||
}
|
}
|
||||||
|
@ -194,15 +206,19 @@ public abstract class Target {
|
||||||
|
|
||||||
if ( addQuotes ) sb.append('"');
|
if ( addQuotes ) sb.append('"');
|
||||||
|
|
||||||
for (int i = 1; i < is.length() -1; i++) {
|
for (int i = 1; i < is.length() -1; ) {
|
||||||
if (is.charAt(i) == '\\') {
|
int codePoint = is.codePointAt(i);
|
||||||
|
int toAdvance = Character.charCount(codePoint);
|
||||||
|
if (codePoint == '\\') {
|
||||||
// Anything escaped is what it is! We assume that
|
// Anything escaped is what it is! We assume that
|
||||||
// people know how to escape characters correctly. However
|
// people know how to escape characters correctly. However
|
||||||
// we catch anything that does not need an escape in Java (which
|
// we catch anything that does not need an escape in Java (which
|
||||||
// is what the default implementation is dealing with and remove
|
// is what the default implementation is dealing with and remove
|
||||||
// the escape. The C target does this for instance.
|
// the escape. The C target does this for instance.
|
||||||
//
|
//
|
||||||
switch (is.charAt(i+1)) {
|
int escapedCodePoint = is.codePointAt(i+toAdvance);
|
||||||
|
toAdvance++;
|
||||||
|
switch (escapedCodePoint) {
|
||||||
// Pass through any escapes that Java also needs
|
// Pass through any escapes that Java also needs
|
||||||
//
|
//
|
||||||
case '"':
|
case '"':
|
||||||
|
@ -214,32 +230,43 @@ public abstract class Target {
|
||||||
case '\\':
|
case '\\':
|
||||||
// Pass the escape through
|
// Pass the escape through
|
||||||
sb.append('\\');
|
sb.append('\\');
|
||||||
|
sb.appendCodePoint(escapedCodePoint);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'u': // Assume unnnn
|
case 'u': // Either unnnn or u{nnnnnn}
|
||||||
// Pass the escape through as double \\
|
if (is.charAt(i+toAdvance) == '{') {
|
||||||
// so that Java leaves as \u0000 string not char
|
while (is.charAt(i+toAdvance) != '}') {
|
||||||
sb.append('\\');
|
toAdvance++;
|
||||||
sb.append('\\');
|
}
|
||||||
|
toAdvance++;
|
||||||
|
} else {
|
||||||
|
toAdvance += 4;
|
||||||
|
}
|
||||||
|
String fullEscape = is.substring(i, i + toAdvance);
|
||||||
|
appendUnicodeEscapedCodePoint(
|
||||||
|
CharSupport.getCharValueFromCharInGrammarLiteral(fullEscape),
|
||||||
|
sb);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
// Remove the escape by virtue of not adding it here
|
if (shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(escapedCodePoint)) {
|
||||||
// Thus \' becomes ' and so on
|
appendUnicodeEscapedCodePoint(escapedCodePoint, sb);
|
||||||
|
} else {
|
||||||
|
sb.appendCodePoint(escapedCodePoint);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Go past the \ character
|
|
||||||
i++;
|
|
||||||
} else {
|
} else {
|
||||||
// Characters that don't need \ in ANTLR 'strings' but do in Java
|
if (codePoint == 0x22) {
|
||||||
if (is.charAt(i) == '"') {
|
// ANTLR doesn't escape " in literal strings,
|
||||||
// We need to escape " in Java
|
// but every other language needs to do so.
|
||||||
sb.append('\\');
|
sb.append("\\\"");
|
||||||
|
} else if (shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(codePoint)) {
|
||||||
|
appendUnicodeEscapedCodePoint(codePoint, sb);
|
||||||
|
} else {
|
||||||
|
sb.appendCodePoint(codePoint);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Add in the next character, which may have been escaped
|
i += toAdvance;
|
||||||
sb.append(is.charAt(i));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( addQuotes ) sb.append('"');
|
if ( addQuotes ) sb.append('"');
|
||||||
|
@ -247,6 +274,19 @@ public abstract class Target {
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static boolean shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(int codePoint) {
|
||||||
|
// We don't want anyone passing 0x0A (newline) or 0x22
|
||||||
|
// (double-quote) here because Java treats \\u000A as
|
||||||
|
// a literal newline and \\u0022 as a literal
|
||||||
|
// double-quote, so Unicode escaping doesn't help.
|
||||||
|
assert codePoint != 0x0A && codePoint != 0x22;
|
||||||
|
|
||||||
|
return
|
||||||
|
codePoint < 0x20 || // control characters up to but not including space
|
||||||
|
codePoint == 0x5C || // backslash
|
||||||
|
codePoint >= 0x7F; // DEL and beyond (keeps source code 7-bit US-ASCII)
|
||||||
|
}
|
||||||
|
|
||||||
/** Assume 16-bit char */
|
/** Assume 16-bit char */
|
||||||
public String encodeIntAsCharEscape(int v) {
|
public String encodeIntAsCharEscape(int v) {
|
||||||
if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) {
|
if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) {
|
||||||
|
|
|
@ -0,0 +1,36 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012-2016 The ANTLR Project. All rights reserved.
|
||||||
|
* Use of this file is governed by the BSD 3-clause license that
|
||||||
|
* can be found in the LICENSE.txt file in the project root.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.antlr.v4.codegen;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility class to escape Unicode code points using various
|
||||||
|
* languages' syntaxes.
|
||||||
|
*/
|
||||||
|
public abstract class UnicodeEscapes {
|
||||||
|
static public void appendJavaStyleEscapedCodePoint(int codePoint, StringBuilder sb) {
|
||||||
|
if (Character.isSupplementaryCodePoint(codePoint)) {
|
||||||
|
// char is not an 'integral' type, so we have to explicitly convert
|
||||||
|
// to int before passing to the %X formatter or else it throws.
|
||||||
|
sb.append(String.format("\\u%04X", (int)Character.highSurrogate(codePoint)));
|
||||||
|
sb.append(String.format("\\u%04X", (int)Character.lowSurrogate(codePoint)));
|
||||||
|
} else {
|
||||||
|
sb.append(String.format("\\u%04X", codePoint));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void appendPythonStyleEscapedCodePoint(int codePoint, StringBuilder sb) {
|
||||||
|
if (Character.isSupplementaryCodePoint(codePoint)) {
|
||||||
|
sb.append(String.format("\\U%08X", codePoint));
|
||||||
|
} else {
|
||||||
|
sb.append(String.format("\\u%04X", codePoint));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void appendSwiftStyleEscapedCodePoint(int codePoint, StringBuilder sb) {
|
||||||
|
sb.append(String.format("\\u{%04X}", codePoint));
|
||||||
|
}
|
||||||
|
}
|
|
@ -7,6 +7,7 @@ package org.antlr.v4.codegen.target;
|
||||||
|
|
||||||
import org.antlr.v4.codegen.CodeGenerator;
|
import org.antlr.v4.codegen.CodeGenerator;
|
||||||
import org.antlr.v4.codegen.Target;
|
import org.antlr.v4.codegen.Target;
|
||||||
|
import org.antlr.v4.codegen.UnicodeEscapes;
|
||||||
import org.antlr.v4.tool.ErrorType;
|
import org.antlr.v4.tool.ErrorType;
|
||||||
import org.antlr.v4.tool.ast.GrammarAST;
|
import org.antlr.v4.tool.ast.GrammarAST;
|
||||||
import org.stringtemplate.v4.NumberRenderer;
|
import org.stringtemplate.v4.NumberRenderer;
|
||||||
|
@ -36,78 +37,16 @@ public class CSharpTarget extends Target {
|
||||||
throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v));
|
throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
String formatted;
|
||||||
if (v >= 0 && v < targetCharValueEscape.length && targetCharValueEscape[v] != null) {
|
if (v >= 0 && v < targetCharValueEscape.length && targetCharValueEscape[v] != null) {
|
||||||
return targetCharValueEscape[v];
|
formatted = targetCharValueEscape[v];
|
||||||
|
} else if (v >= 0x20 && v < 127 && (v < '0' || v > '9') && (v < 'a' || v > 'f') && (v < 'A' || v > 'F')) {
|
||||||
|
formatted = Character.toString((char)v);
|
||||||
|
} else {
|
||||||
|
formatted = String.format("\\x%X", v & 0xFFFF);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (v >= 0x20 && v < 127 && (v < '0' || v > '9') && (v < 'a' || v > 'f') && (v < 'A' || v > 'F')) {
|
return "'" + formatted + "'";
|
||||||
return String.valueOf((char)v);
|
|
||||||
}
|
|
||||||
|
|
||||||
return String.format("\\x%X", v & 0xFFFF);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getTargetStringLiteralFromANTLRStringLiteral(
|
|
||||||
CodeGenerator generator,
|
|
||||||
String literal, boolean addQuotes)
|
|
||||||
{
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
String is = literal;
|
|
||||||
|
|
||||||
if ( addQuotes ) sb.append('"');
|
|
||||||
|
|
||||||
for (int i = 1; i < is.length() -1; i++) {
|
|
||||||
if (is.charAt(i) == '\\') {
|
|
||||||
// Anything escaped is what it is! We assume that
|
|
||||||
// people know how to escape characters correctly. However
|
|
||||||
// we catch anything that does not need an escape in Java (which
|
|
||||||
// is what the default implementation is dealing with and remove
|
|
||||||
// the escape. The C target does this for instance.
|
|
||||||
//
|
|
||||||
switch (is.charAt(i+1)) {
|
|
||||||
// Pass through any escapes that Java also needs
|
|
||||||
//
|
|
||||||
case '"':
|
|
||||||
case 'n':
|
|
||||||
case 'r':
|
|
||||||
case 't':
|
|
||||||
case 'b':
|
|
||||||
case 'f':
|
|
||||||
case '\\':
|
|
||||||
// Pass the escape through
|
|
||||||
sb.append('\\');
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 'u': // Assume unnnn
|
|
||||||
// Pass the escape through as double \\
|
|
||||||
// so that Java leaves as \u0000 string not char
|
|
||||||
sb.append('\\');
|
|
||||||
sb.append('\\');
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
// Remove the escape by virtue of not adding it here
|
|
||||||
// Thus \' becomes ' and so on
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Go past the \ character
|
|
||||||
i++;
|
|
||||||
} else {
|
|
||||||
// Characters that don't need \ in ANTLR 'strings' but do in Java
|
|
||||||
if (is.charAt(i) == '"') {
|
|
||||||
// We need to escape " in Java
|
|
||||||
sb.append('\\');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Add in the next character, which may have been escaped
|
|
||||||
sb.append(is.charAt(i));
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( addQuotes ) sb.append('"');
|
|
||||||
|
|
||||||
return sb.toString();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -150,4 +89,9 @@ public class CSharpTarget extends Target {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) {
|
||||||
|
// C# and Python share the same escaping style.
|
||||||
|
UnicodeEscapes.appendPythonStyleEscapedCodePoint(codePoint, sb);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
package org.antlr.v4.codegen.target;
|
package org.antlr.v4.codegen.target;
|
||||||
|
|
||||||
|
import org.antlr.v4.codegen.UnicodeEscapes;
|
||||||
import org.antlr.v4.codegen.CodeGenerator;
|
import org.antlr.v4.codegen.CodeGenerator;
|
||||||
import org.antlr.v4.codegen.Target;
|
import org.antlr.v4.codegen.Target;
|
||||||
import org.antlr.v4.tool.ErrorType;
|
import org.antlr.v4.tool.ErrorType;
|
||||||
|
@ -68,81 +69,6 @@ public class CppTarget extends Target {
|
||||||
badWords.add("parserRule");
|
badWords.add("parserRule");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
* <p/>
|
|
||||||
* For C++, this is the translation {@code 'a\n"'} → {@code "a\n\""}.
|
|
||||||
* Expect single quotes around the incoming literal. Just flip the quotes
|
|
||||||
* and replace double quotes with {@code \"}.
|
|
||||||
* <p/>
|
|
||||||
* Note that we have decided to allow people to use '\"' without penalty, so
|
|
||||||
* we must build the target string in a loop as {@link String#replace}
|
|
||||||
* cannot handle both {@code \"} and {@code "} without a lot of messing
|
|
||||||
* around.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public String getTargetStringLiteralFromANTLRStringLiteral(
|
|
||||||
CodeGenerator generator,
|
|
||||||
String literal, boolean addQuotes)
|
|
||||||
{
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
String is = literal;
|
|
||||||
|
|
||||||
if ( addQuotes ) sb.append('"');
|
|
||||||
|
|
||||||
for (int i = 1; i < is.length() -1; i++) {
|
|
||||||
if (is.charAt(i) == '\\') {
|
|
||||||
// Anything escaped is what it is! We assume that
|
|
||||||
// people know how to escape characters correctly. However
|
|
||||||
// we catch anything that does not need an escape in Java (which
|
|
||||||
// is what the default implementation is dealing with and remove
|
|
||||||
// the escape. The C target does this for instance.
|
|
||||||
//
|
|
||||||
switch (is.charAt(i+1)) {
|
|
||||||
// Pass through any escapes that Java also needs
|
|
||||||
//
|
|
||||||
case '"':
|
|
||||||
case 'n':
|
|
||||||
case 'r':
|
|
||||||
case 't':
|
|
||||||
case 'b':
|
|
||||||
case 'f':
|
|
||||||
case '\\':
|
|
||||||
// Pass the escape through
|
|
||||||
sb.append('\\');
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 'u': // Assume unnnn
|
|
||||||
// Pass the escape through as double \\
|
|
||||||
// so that Java leaves as \u0000 string not char
|
|
||||||
sb.append('\\');
|
|
||||||
sb.append('\\');
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
// Remove the escape by virtue of not adding it here
|
|
||||||
// Thus \' becomes ' and so on
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Go past the \ character
|
|
||||||
i++;
|
|
||||||
} else {
|
|
||||||
// Characters that don't need \ in ANTLR 'strings' but do in Java
|
|
||||||
if (is.charAt(i) == '"') {
|
|
||||||
// We need to escape " in Java
|
|
||||||
sb.append('\\');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Add in the next character, which may have been escaped
|
|
||||||
sb.append(is.charAt(i));
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( addQuotes ) sb.append('"');
|
|
||||||
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String encodeIntAsCharEscape(int v) {
|
public String encodeIntAsCharEscape(int v) {
|
||||||
return "0x" + Integer.toHexString(v) + ", ";
|
return "0x" + Integer.toHexString(v) + ", ";
|
||||||
|
@ -232,4 +158,10 @@ public class CppTarget extends Target {
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) {
|
||||||
|
// C99 and Python share the same escaping style.
|
||||||
|
UnicodeEscapes.appendPythonStyleEscapedCodePoint(codePoint, sb);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,6 +8,7 @@ package org.antlr.v4.codegen.target;
|
||||||
|
|
||||||
import org.antlr.v4.codegen.CodeGenerator;
|
import org.antlr.v4.codegen.CodeGenerator;
|
||||||
import org.antlr.v4.codegen.Target;
|
import org.antlr.v4.codegen.Target;
|
||||||
|
import org.antlr.v4.codegen.UnicodeEscapes;
|
||||||
import org.antlr.v4.parse.ANTLRParser;
|
import org.antlr.v4.parse.ANTLRParser;
|
||||||
import org.antlr.v4.tool.Grammar;
|
import org.antlr.v4.tool.Grammar;
|
||||||
import org.antlr.v4.tool.ast.GrammarAST;
|
import org.antlr.v4.tool.ast.GrammarAST;
|
||||||
|
@ -214,5 +215,10 @@ public class GoTarget extends Target {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) {
|
||||||
|
// Go and Python share the same escaping style.
|
||||||
|
UnicodeEscapes.appendPythonStyleEscapedCodePoint(codePoint, sb);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -8,6 +8,7 @@ package org.antlr.v4.codegen.target;
|
||||||
|
|
||||||
import org.antlr.v4.codegen.CodeGenerator;
|
import org.antlr.v4.codegen.CodeGenerator;
|
||||||
import org.antlr.v4.codegen.Target;
|
import org.antlr.v4.codegen.Target;
|
||||||
|
import org.antlr.v4.codegen.UnicodeEscapes;
|
||||||
import org.antlr.v4.tool.ast.GrammarAST;
|
import org.antlr.v4.tool.ast.GrammarAST;
|
||||||
import org.stringtemplate.v4.STGroup;
|
import org.stringtemplate.v4.STGroup;
|
||||||
import org.stringtemplate.v4.StringRenderer;
|
import org.stringtemplate.v4.StringRenderer;
|
||||||
|
@ -67,81 +68,6 @@ public class JavaScriptTarget extends Target {
|
||||||
badWords.add("parserRule");
|
badWords.add("parserRule");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
* <p>
|
|
||||||
* For Java, this is the translation {@code 'a\n"'} → {@code "a\n\""}.
|
|
||||||
* Expect single quotes around the incoming literal. Just flip the quotes
|
|
||||||
* and replace double quotes with {@code \"}.
|
|
||||||
* <p>
|
|
||||||
* Note that we have decided to allow people to use '\"' without penalty, so
|
|
||||||
* we must build the target string in a loop as {@link String#replace}
|
|
||||||
* cannot handle both {@code \"} and {@code "} without a lot of messing
|
|
||||||
* around.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public String getTargetStringLiteralFromANTLRStringLiteral(
|
|
||||||
CodeGenerator generator,
|
|
||||||
String literal, boolean addQuotes)
|
|
||||||
{
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
String is = literal;
|
|
||||||
|
|
||||||
if ( addQuotes ) sb.append('"');
|
|
||||||
|
|
||||||
for (int i = 1; i < is.length() -1; i++) {
|
|
||||||
if (is.charAt(i) == '\\') {
|
|
||||||
// Anything escaped is what it is! We assume that
|
|
||||||
// people know how to escape characters correctly. However
|
|
||||||
// we catch anything that does not need an escape in Java (which
|
|
||||||
// is what the default implementation is dealing with and remove
|
|
||||||
// the escape. The C target does this for instance.
|
|
||||||
//
|
|
||||||
switch (is.charAt(i+1)) {
|
|
||||||
// Pass through any escapes that Java also needs
|
|
||||||
//
|
|
||||||
case '"':
|
|
||||||
case 'n':
|
|
||||||
case 'r':
|
|
||||||
case 't':
|
|
||||||
case 'b':
|
|
||||||
case 'f':
|
|
||||||
case '\\':
|
|
||||||
// Pass the escape through
|
|
||||||
sb.append('\\');
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 'u': // Assume unnnn
|
|
||||||
// Pass the escape through as double \\
|
|
||||||
// so that Java leaves as \u0000 string not char
|
|
||||||
sb.append('\\');
|
|
||||||
sb.append('\\');
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
// Remove the escape by virtue of not adding it here
|
|
||||||
// Thus \' becomes ' and so on
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Go past the \ character
|
|
||||||
i++;
|
|
||||||
} else {
|
|
||||||
// Characters that don't need \ in ANTLR 'strings' but do in Java
|
|
||||||
if (is.charAt(i) == '"') {
|
|
||||||
// We need to escape " in Java
|
|
||||||
sb.append('\\');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Add in the next character, which may have been escaped
|
|
||||||
sb.append(is.charAt(i));
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( addQuotes ) sb.append('"');
|
|
||||||
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String encodeIntAsCharEscape(int v) {
|
public String encodeIntAsCharEscape(int v) {
|
||||||
if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) {
|
if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) {
|
||||||
|
@ -210,4 +136,10 @@ public class JavaScriptTarget extends Target {
|
||||||
public boolean supportsOverloadedMethods() {
|
public boolean supportsOverloadedMethods() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) {
|
||||||
|
// JavaScript and Java share the same escaping style.
|
||||||
|
UnicodeEscapes.appendJavaStyleEscapedCodePoint(codePoint, sb);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,6 +9,7 @@ package org.antlr.v4.codegen.target;
|
||||||
import org.antlr.v4.Tool;
|
import org.antlr.v4.Tool;
|
||||||
import org.antlr.v4.codegen.CodeGenerator;
|
import org.antlr.v4.codegen.CodeGenerator;
|
||||||
import org.antlr.v4.codegen.Target;
|
import org.antlr.v4.codegen.Target;
|
||||||
|
import org.antlr.v4.codegen.UnicodeEscapes;
|
||||||
import org.antlr.v4.tool.ast.GrammarAST;
|
import org.antlr.v4.tool.ast.GrammarAST;
|
||||||
import org.stringtemplate.v4.STGroup;
|
import org.stringtemplate.v4.STGroup;
|
||||||
import org.stringtemplate.v4.StringRenderer;
|
import org.stringtemplate.v4.StringRenderer;
|
||||||
|
@ -99,4 +100,9 @@ public class JavaTarget extends Target {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) {
|
||||||
|
UnicodeEscapes.appendJavaStyleEscapedCodePoint(codePoint, sb);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,6 +8,7 @@ package org.antlr.v4.codegen.target;
|
||||||
|
|
||||||
import org.antlr.v4.codegen.CodeGenerator;
|
import org.antlr.v4.codegen.CodeGenerator;
|
||||||
import org.antlr.v4.codegen.Target;
|
import org.antlr.v4.codegen.Target;
|
||||||
|
import org.antlr.v4.codegen.UnicodeEscapes;
|
||||||
import org.antlr.v4.tool.ast.GrammarAST;
|
import org.antlr.v4.tool.ast.GrammarAST;
|
||||||
import org.stringtemplate.v4.STGroup;
|
import org.stringtemplate.v4.STGroup;
|
||||||
import org.stringtemplate.v4.StringRenderer;
|
import org.stringtemplate.v4.StringRenderer;
|
||||||
|
@ -109,4 +110,9 @@ public class Python2Target extends Target {
|
||||||
badWords.add("rule");
|
badWords.add("rule");
|
||||||
badWords.add("parserRule");
|
badWords.add("parserRule");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) {
|
||||||
|
UnicodeEscapes.appendPythonStyleEscapedCodePoint(codePoint, sb);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,6 +8,7 @@ package org.antlr.v4.codegen.target;
|
||||||
|
|
||||||
import org.antlr.v4.codegen.CodeGenerator;
|
import org.antlr.v4.codegen.CodeGenerator;
|
||||||
import org.antlr.v4.codegen.Target;
|
import org.antlr.v4.codegen.Target;
|
||||||
|
import org.antlr.v4.codegen.UnicodeEscapes;
|
||||||
import org.antlr.v4.tool.ast.GrammarAST;
|
import org.antlr.v4.tool.ast.GrammarAST;
|
||||||
import org.stringtemplate.v4.STGroup;
|
import org.stringtemplate.v4.STGroup;
|
||||||
import org.stringtemplate.v4.StringRenderer;
|
import org.stringtemplate.v4.StringRenderer;
|
||||||
|
@ -115,5 +116,8 @@ public class Python3Target extends Target {
|
||||||
badWords.add("parserRule");
|
badWords.add("parserRule");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) {
|
||||||
|
UnicodeEscapes.appendPythonStyleEscapedCodePoint(codePoint, sb);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,6 +8,7 @@ package org.antlr.v4.codegen.target;
|
||||||
|
|
||||||
import org.antlr.v4.codegen.CodeGenerator;
|
import org.antlr.v4.codegen.CodeGenerator;
|
||||||
import org.antlr.v4.codegen.Target;
|
import org.antlr.v4.codegen.Target;
|
||||||
|
import org.antlr.v4.codegen.UnicodeEscapes;
|
||||||
import org.antlr.v4.runtime.Token;
|
import org.antlr.v4.runtime.Token;
|
||||||
import org.antlr.v4.runtime.atn.ATN;
|
import org.antlr.v4.runtime.atn.ATN;
|
||||||
import org.antlr.v4.runtime.atn.ATNDeserializer;
|
import org.antlr.v4.runtime.atn.ATNDeserializer;
|
||||||
|
@ -550,4 +551,9 @@ public class SwiftTarget extends Target {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) {
|
||||||
|
UnicodeEscapes.appendSwiftStyleEscapedCodePoint(codePoint, sb);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue