move common char encoding up to Target; same works for Python.

2014-07-03 10:54:49 -07:00 · 2014-07-03 10:54:49 -07:00 · e1ba264d93
parent 0b59fd5f33
commit e1ba264d93
2 changed files with 95 additions and 102 deletions
--- a/tool/src/org/antlr/v4/codegen/JavaTarget.java
+++ b/tool/src/org/antlr/v4/codegen/JavaTarget.java
@ -79,104 +79,6 @@ public class JavaTarget extends Target {
 		badWords.add("parserRule");
 	}
 	/**
 	 * {@inheritDoc}
 	 * <p/>
 	 * For Java, this is the translation {@code 'a\n"'} &rarr; {@code "a\n\""}.
 	 * Expect single quotes around the incoming literal. Just flip the quotes
 	 * and replace double quotes with {@code \"}.
 	 * <p/>
 	 * Note that we have decided to allow people to use '\"' without penalty, so
 	 * we must build the target string in a loop as {@link String#replace}
 	 * cannot handle both {@code \"} and {@code "} without a lot of messing
 	 * around.
 	 */
 	@Override
 	public String getTargetStringLiteralFromANTLRStringLiteral(
 		CodeGenerator generator,
 		String literal, boolean addQuotes)
 	{
 		StringBuilder sb = new StringBuilder();
 		String is = literal;
 		if ( addQuotes ) sb.append('"');
 		for (int i = 1; i < is.length() -1; i++) {
 			if  (is.charAt(i) == '\\') {
 				// Anything escaped is what it is! We assume that
 				// people know how to escape characters correctly. However
 				// we catch anything that does not need an escape in Java (which
 				// is what the default implementation is dealing with and remove
 				// the escape. The C target does this for instance.
 				//
 				switch (is.charAt(i+1)) {
 					// Pass through any escapes that Java also needs
 					//
 					case    '"':
 					case    'n':
 					case    'r':
 					case    't':
 					case    'b':
 					case    'f':
 					case    '\\':
 						// Pass the escape through
 						sb.append('\\');
 						break;
 					case    'u':    // Assume unnnn
 						// Pass the escape through as double \\
 						// so that Java leaves as \u0000 string not char
 						sb.append('\\');
 						sb.append('\\');
 						break;
 					default:
 						// Remove the escape by virtue of not adding it here
 						// Thus \' becomes ' and so on
 						break;
 				}
 				// Go past the \ character
 				i++;
 			} else {
 				// Characters that don't need \ in ANTLR 'strings' but do in Java
 				if (is.charAt(i) == '"') {
 					// We need to escape " in Java
 					sb.append('\\');
 				}
 			}
 			// Add in the next character, which may have been escaped
 			sb.append(is.charAt(i));
 		}
 		if ( addQuotes ) sb.append('"');
 		return sb.toString();
 	}
 	@Override
 	public String encodeIntAsCharEscape(int v) {
 		if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) {
 			throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v));
 		}
 		if (v >= 0 && v < targetCharValueEscape.length && targetCharValueEscape[v] != null) {
 			return targetCharValueEscape[v];
 		}
 		if (v >= 0x20 && v < 127 && (!Character.isDigit(v) || v == '8' || v == '9')) {
 			return String.valueOf((char)v);
 		}
 		if ( v>=0 && v<=127 ) {
 			String oct = Integer.toOctalString(v);
 			return "\\"+ oct;
 		}
 		String hex = Integer.toHexString(v|0x10000).substring(1,5);
 		return "\\u"+hex;
 	}
 	@Override
 	public int getSerializedATNSegmentLimit() {
 		// 65535 is the class file format byte limit for a UTF-8 encoded string literal
--- a/tool/src/org/antlr/v4/codegen/Target.java
+++ b/tool/src/org/antlr/v4/codegen/Target.java
@ -211,15 +211,106 @@ public abstract class Target {
 	}
 	/**
-	 * Convert from an ANTLR string literal found in a grammar file to an
+	 * <p>Convert from an ANTLR string literal found in a grammar file to an
 	 * equivalent string literal in the target language.
 	 *</p>
 	 * <p>
 	 * For Java, this is the translation {@code 'a\n"'} &rarr; {@code "a\n\""}.
 	 * Expect single quotes around the incoming literal. Just flip the quotes
 	 * and replace double quotes with {@code \"}.
 	 * </p>
 	 * <p>
 	 * Note that we have decided to allow people to use '\"' without penalty, so
 	 * we must build the target string in a loop as {@link String#replace}
 	 * cannot handle both {@code \"} and {@code "} without a lot of messing
 	 * around.
 	 * </p>
 	 */
-	public abstract String getTargetStringLiteralFromANTLRStringLiteral(
+	public String getTargetStringLiteralFromANTLRStringLiteral(
 		CodeGenerator generator,
-		String literal, boolean addQuotes);
+		String literal,
 		boolean addQuotes)
 	{
 		StringBuilder sb = new StringBuilder();
 		String is = literal;
 		if ( addQuotes ) sb.append('"');
 		for (int i = 1; i < is.length() -1; i++) {
 			if  (is.charAt(i) == '\\') {
 				// Anything escaped is what it is! We assume that
 				// people know how to escape characters correctly. However
 				// we catch anything that does not need an escape in Java (which
 				// is what the default implementation is dealing with and remove
 				// the escape. The C target does this for instance.
 				//
 				switch (is.charAt(i+1)) {
 					// Pass through any escapes that Java also needs
 					//
 					case    '"':
 					case    'n':
 					case    'r':
 					case    't':
 					case    'b':
 					case    'f':
 					case    '\\':
 						// Pass the escape through
 						sb.append('\\');
 						break;
 					case    'u':    // Assume unnnn
 						// Pass the escape through as double \\
 						// so that Java leaves as \u0000 string not char
 						sb.append('\\');
 						sb.append('\\');
 						break;
 					default:
 						// Remove the escape by virtue of not adding it here
 						// Thus \' becomes ' and so on
 						break;
 				}
 				// Go past the \ character
 				i++;
 			} else {
 				// Characters that don't need \ in ANTLR 'strings' but do in Java
 				if (is.charAt(i) == '"') {
 					// We need to escape " in Java
 					sb.append('\\');
 				}
 			}
 			// Add in the next character, which may have been escaped
 			sb.append(is.charAt(i));
 		}
 		if ( addQuotes ) sb.append('"');
 		return sb.toString();
 	}
 	/** Assume 16-bit char */
-	public abstract String encodeIntAsCharEscape(int v);
+	public String encodeIntAsCharEscape(int v) {
 		if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) {
 			throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v));
 		}
 		if (v >= 0 && v < targetCharValueEscape.length && targetCharValueEscape[v] != null) {
 			return targetCharValueEscape[v];
 		}
 		if (v >= 0x20 && v < 127 && (!Character.isDigit(v) || v == '8' || v == '9')) {
 			return String.valueOf((char)v);
 		}
 		if ( v>=0 && v<=127 ) {
 			String oct = Integer.toOctalString(v);
 			return "\\"+ oct;
 		}
 		String hex = Integer.toHexString(v|0x10000).substring(1,5);
 		return "\\u"+hex;
 	}
 	public String getLoopLabel(GrammarAST ast) {
 		return "loop"+ ast.token.getTokenIndex();