forked from jasder/antlr
move common char encoding up to Target; same works for Python.
This commit is contained in:
parent
0b59fd5f33
commit
e1ba264d93
|
@ -79,104 +79,6 @@ public class JavaTarget extends Target {
|
||||||
badWords.add("parserRule");
|
badWords.add("parserRule");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
* <p/>
|
|
||||||
* For Java, this is the translation {@code 'a\n"'} → {@code "a\n\""}.
|
|
||||||
* Expect single quotes around the incoming literal. Just flip the quotes
|
|
||||||
* and replace double quotes with {@code \"}.
|
|
||||||
* <p/>
|
|
||||||
* Note that we have decided to allow people to use '\"' without penalty, so
|
|
||||||
* we must build the target string in a loop as {@link String#replace}
|
|
||||||
* cannot handle both {@code \"} and {@code "} without a lot of messing
|
|
||||||
* around.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public String getTargetStringLiteralFromANTLRStringLiteral(
|
|
||||||
CodeGenerator generator,
|
|
||||||
String literal, boolean addQuotes)
|
|
||||||
{
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
String is = literal;
|
|
||||||
|
|
||||||
if ( addQuotes ) sb.append('"');
|
|
||||||
|
|
||||||
for (int i = 1; i < is.length() -1; i++) {
|
|
||||||
if (is.charAt(i) == '\\') {
|
|
||||||
// Anything escaped is what it is! We assume that
|
|
||||||
// people know how to escape characters correctly. However
|
|
||||||
// we catch anything that does not need an escape in Java (which
|
|
||||||
// is what the default implementation is dealing with and remove
|
|
||||||
// the escape. The C target does this for instance.
|
|
||||||
//
|
|
||||||
switch (is.charAt(i+1)) {
|
|
||||||
// Pass through any escapes that Java also needs
|
|
||||||
//
|
|
||||||
case '"':
|
|
||||||
case 'n':
|
|
||||||
case 'r':
|
|
||||||
case 't':
|
|
||||||
case 'b':
|
|
||||||
case 'f':
|
|
||||||
case '\\':
|
|
||||||
// Pass the escape through
|
|
||||||
sb.append('\\');
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 'u': // Assume unnnn
|
|
||||||
// Pass the escape through as double \\
|
|
||||||
// so that Java leaves as \u0000 string not char
|
|
||||||
sb.append('\\');
|
|
||||||
sb.append('\\');
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
// Remove the escape by virtue of not adding it here
|
|
||||||
// Thus \' becomes ' and so on
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Go past the \ character
|
|
||||||
i++;
|
|
||||||
} else {
|
|
||||||
// Characters that don't need \ in ANTLR 'strings' but do in Java
|
|
||||||
if (is.charAt(i) == '"') {
|
|
||||||
// We need to escape " in Java
|
|
||||||
sb.append('\\');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Add in the next character, which may have been escaped
|
|
||||||
sb.append(is.charAt(i));
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( addQuotes ) sb.append('"');
|
|
||||||
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String encodeIntAsCharEscape(int v) {
|
|
||||||
if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) {
|
|
||||||
throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (v >= 0 && v < targetCharValueEscape.length && targetCharValueEscape[v] != null) {
|
|
||||||
return targetCharValueEscape[v];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (v >= 0x20 && v < 127 && (!Character.isDigit(v) || v == '8' || v == '9')) {
|
|
||||||
return String.valueOf((char)v);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( v>=0 && v<=127 ) {
|
|
||||||
String oct = Integer.toOctalString(v);
|
|
||||||
return "\\"+ oct;
|
|
||||||
}
|
|
||||||
|
|
||||||
String hex = Integer.toHexString(v|0x10000).substring(1,5);
|
|
||||||
return "\\u"+hex;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getSerializedATNSegmentLimit() {
|
public int getSerializedATNSegmentLimit() {
|
||||||
// 65535 is the class file format byte limit for a UTF-8 encoded string literal
|
// 65535 is the class file format byte limit for a UTF-8 encoded string literal
|
||||||
|
|
|
@ -211,15 +211,106 @@ public abstract class Target {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert from an ANTLR string literal found in a grammar file to an
|
* <p>Convert from an ANTLR string literal found in a grammar file to an
|
||||||
* equivalent string literal in the target language.
|
* equivalent string literal in the target language.
|
||||||
|
*</p>
|
||||||
|
* <p>
|
||||||
|
* For Java, this is the translation {@code 'a\n"'} → {@code "a\n\""}.
|
||||||
|
* Expect single quotes around the incoming literal. Just flip the quotes
|
||||||
|
* and replace double quotes with {@code \"}.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* Note that we have decided to allow people to use '\"' without penalty, so
|
||||||
|
* we must build the target string in a loop as {@link String#replace}
|
||||||
|
* cannot handle both {@code \"} and {@code "} without a lot of messing
|
||||||
|
* around.
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
public abstract String getTargetStringLiteralFromANTLRStringLiteral(
|
public String getTargetStringLiteralFromANTLRStringLiteral(
|
||||||
CodeGenerator generator,
|
CodeGenerator generator,
|
||||||
String literal, boolean addQuotes);
|
String literal,
|
||||||
|
boolean addQuotes)
|
||||||
|
{
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
String is = literal;
|
||||||
|
|
||||||
|
if ( addQuotes ) sb.append('"');
|
||||||
|
|
||||||
|
for (int i = 1; i < is.length() -1; i++) {
|
||||||
|
if (is.charAt(i) == '\\') {
|
||||||
|
// Anything escaped is what it is! We assume that
|
||||||
|
// people know how to escape characters correctly. However
|
||||||
|
// we catch anything that does not need an escape in Java (which
|
||||||
|
// is what the default implementation is dealing with and remove
|
||||||
|
// the escape. The C target does this for instance.
|
||||||
|
//
|
||||||
|
switch (is.charAt(i+1)) {
|
||||||
|
// Pass through any escapes that Java also needs
|
||||||
|
//
|
||||||
|
case '"':
|
||||||
|
case 'n':
|
||||||
|
case 'r':
|
||||||
|
case 't':
|
||||||
|
case 'b':
|
||||||
|
case 'f':
|
||||||
|
case '\\':
|
||||||
|
// Pass the escape through
|
||||||
|
sb.append('\\');
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'u': // Assume unnnn
|
||||||
|
// Pass the escape through as double \\
|
||||||
|
// so that Java leaves as \u0000 string not char
|
||||||
|
sb.append('\\');
|
||||||
|
sb.append('\\');
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
// Remove the escape by virtue of not adding it here
|
||||||
|
// Thus \' becomes ' and so on
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Go past the \ character
|
||||||
|
i++;
|
||||||
|
} else {
|
||||||
|
// Characters that don't need \ in ANTLR 'strings' but do in Java
|
||||||
|
if (is.charAt(i) == '"') {
|
||||||
|
// We need to escape " in Java
|
||||||
|
sb.append('\\');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Add in the next character, which may have been escaped
|
||||||
|
sb.append(is.charAt(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( addQuotes ) sb.append('"');
|
||||||
|
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
/** Assume 16-bit char */
|
/** Assume 16-bit char */
|
||||||
public abstract String encodeIntAsCharEscape(int v);
|
public String encodeIntAsCharEscape(int v) {
|
||||||
|
if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) {
|
||||||
|
throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (v >= 0 && v < targetCharValueEscape.length && targetCharValueEscape[v] != null) {
|
||||||
|
return targetCharValueEscape[v];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (v >= 0x20 && v < 127 && (!Character.isDigit(v) || v == '8' || v == '9')) {
|
||||||
|
return String.valueOf((char)v);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( v>=0 && v<=127 ) {
|
||||||
|
String oct = Integer.toOctalString(v);
|
||||||
|
return "\\"+ oct;
|
||||||
|
}
|
||||||
|
|
||||||
|
String hex = Integer.toHexString(v|0x10000).substring(1,5);
|
||||||
|
return "\\u"+hex;
|
||||||
|
}
|
||||||
|
|
||||||
public String getLoopLabel(GrammarAST ast) {
|
public String getLoopLabel(GrammarAST ast) {
|
||||||
return "loop"+ ast.token.getTokenIndex();
|
return "loop"+ ast.token.getTokenIndex();
|
||||||
|
|
Loading…
Reference in New Issue