move common char encoding up to Target; same works for Python.

This commit is contained in:
Terence Parr 2014-07-03 10:54:49 -07:00
parent 0b59fd5f33
commit e1ba264d93
2 changed files with 95 additions and 102 deletions

View File

@ -79,104 +79,6 @@ public class JavaTarget extends Target {
badWords.add("parserRule");
}
/**
* {@inheritDoc}
* <p/>
* For Java, this is the translation {@code 'a\n"'} &rarr; {@code "a\n\""}.
* Expect single quotes around the incoming literal. Just flip the quotes
* and replace double quotes with {@code \"}.
* <p/>
* Note that we have decided to allow people to use '\"' without penalty, so
* we must build the target string in a loop as {@link String#replace}
* cannot handle both {@code \"} and {@code "} without a lot of messing
* around.
*/
@Override
public String getTargetStringLiteralFromANTLRStringLiteral(
CodeGenerator generator,
String literal, boolean addQuotes)
{
StringBuilder sb = new StringBuilder();
String is = literal;
if ( addQuotes ) sb.append('"');
for (int i = 1; i < is.length() -1; i++) {
if (is.charAt(i) == '\\') {
// Anything escaped is what it is! We assume that
// people know how to escape characters correctly. However
// we catch anything that does not need an escape in Java (which
// is what the default implementation is dealing with and remove
// the escape. The C target does this for instance.
//
switch (is.charAt(i+1)) {
// Pass through any escapes that Java also needs
//
case '"':
case 'n':
case 'r':
case 't':
case 'b':
case 'f':
case '\\':
// Pass the escape through
sb.append('\\');
break;
case 'u': // Assume unnnn
// Pass the escape through as double \\
// so that Java leaves as \u0000 string not char
sb.append('\\');
sb.append('\\');
break;
default:
// Remove the escape by virtue of not adding it here
// Thus \' becomes ' and so on
break;
}
// Go past the \ character
i++;
} else {
// Characters that don't need \ in ANTLR 'strings' but do in Java
if (is.charAt(i) == '"') {
// We need to escape " in Java
sb.append('\\');
}
}
// Add in the next character, which may have been escaped
sb.append(is.charAt(i));
}
if ( addQuotes ) sb.append('"');
return sb.toString();
}
@Override
public String encodeIntAsCharEscape(int v) {
if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) {
throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v));
}
if (v >= 0 && v < targetCharValueEscape.length && targetCharValueEscape[v] != null) {
return targetCharValueEscape[v];
}
if (v >= 0x20 && v < 127 && (!Character.isDigit(v) || v == '8' || v == '9')) {
return String.valueOf((char)v);
}
if ( v>=0 && v<=127 ) {
String oct = Integer.toOctalString(v);
return "\\"+ oct;
}
String hex = Integer.toHexString(v|0x10000).substring(1,5);
return "\\u"+hex;
}
@Override
public int getSerializedATNSegmentLimit() {
// 65535 is the class file format byte limit for a UTF-8 encoded string literal

View File

@ -211,15 +211,106 @@ public abstract class Target {
}
/**
* Convert from an ANTLR string literal found in a grammar file to an
* <p>Convert from an ANTLR string literal found in a grammar file to an
* equivalent string literal in the target language.
*</p>
* <p>
* For Java, this is the translation {@code 'a\n"'} &rarr; {@code "a\n\""}.
* Expect single quotes around the incoming literal. Just flip the quotes
* and replace double quotes with {@code \"}.
* </p>
* <p>
* Note that we have decided to allow people to use '\"' without penalty, so
* we must build the target string in a loop as {@link String#replace}
* cannot handle both {@code \"} and {@code "} without a lot of messing
* around.
* </p>
*/
public abstract String getTargetStringLiteralFromANTLRStringLiteral(
public String getTargetStringLiteralFromANTLRStringLiteral(
CodeGenerator generator,
String literal, boolean addQuotes);
String literal,
boolean addQuotes)
{
StringBuilder sb = new StringBuilder();
String is = literal;
if ( addQuotes ) sb.append('"');
for (int i = 1; i < is.length() -1; i++) {
if (is.charAt(i) == '\\') {
// Anything escaped is what it is! We assume that
// people know how to escape characters correctly. However
// we catch anything that does not need an escape in Java (which
// is what the default implementation is dealing with and remove
// the escape. The C target does this for instance.
//
switch (is.charAt(i+1)) {
// Pass through any escapes that Java also needs
//
case '"':
case 'n':
case 'r':
case 't':
case 'b':
case 'f':
case '\\':
// Pass the escape through
sb.append('\\');
break;
case 'u': // Assume unnnn
// Pass the escape through as double \\
// so that Java leaves as \u0000 string not char
sb.append('\\');
sb.append('\\');
break;
default:
// Remove the escape by virtue of not adding it here
// Thus \' becomes ' and so on
break;
}
// Go past the \ character
i++;
} else {
// Characters that don't need \ in ANTLR 'strings' but do in Java
if (is.charAt(i) == '"') {
// We need to escape " in Java
sb.append('\\');
}
}
// Add in the next character, which may have been escaped
sb.append(is.charAt(i));
}
if ( addQuotes ) sb.append('"');
return sb.toString();
}
/** Assume 16-bit char */
public abstract String encodeIntAsCharEscape(int v);
public String encodeIntAsCharEscape(int v) {
if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) {
throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v));
}
if (v >= 0 && v < targetCharValueEscape.length && targetCharValueEscape[v] != null) {
return targetCharValueEscape[v];
}
if (v >= 0x20 && v < 127 && (!Character.isDigit(v) || v == '8' || v == '9')) {
return String.valueOf((char)v);
}
if ( v>=0 && v<=127 ) {
String oct = Integer.toOctalString(v);
return "\\"+ oct;
}
String hex = Integer.toHexString(v|0x10000).substring(1,5);
return "\\u"+hex;
}
public String getLoopLabel(GrammarAST ast) {
return "loop"+ ast.token.getTokenIndex();