template names = class names; reorg'd char stuff; got bitsets defined.

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6850]
This commit is contained in:
parrt 2010-05-09 12:11:50 -08:00
parent cab4d7d197
commit 3e1f43117e
16 changed files with 301 additions and 174 deletions

View File

@ -1,40 +1,45 @@
// args must be <object-model-object>, <fields-resulting-in-STs>
parserFile(f, parser) ::= <<
ParserFile(f, parser, dfaDefs, bitSetDefs) ::= <<
// $ANTLR ANTLRVersion> <f.fileName> generatedTimestamp>
import org.antlr.runtime.*;
<parser>
>>
parser(p,funcs,dfaDefs) ::= <<
Parser(p, funcs) ::= <<
public class <p.name> {
<funcs>
<dfaDefs>
<funcs; separator="\n">
<dfaDefs; separator="\n">
<bitSetDefs; separator="\n">
}
>>
DFA(dfa) ::= <<
DFADef(dfa) ::= <<
// define <dfa.name>
>>
parserFunction(f,code) ::= <<
BitSetDef(b) ::= <<
// define <b.name>
>>
RuleFunction(f,code) ::= <<
<f.modifiers:{f | <f> }>void <f.name>(<f.args>) {
<code>
}
>>
codeBlock(c, ops) ::= <<
<ops>
CodeBlock(c, ops) ::= <<
<ops; separator="\n">
>>
switch(c, alts) ::= <<
LL1Choice(c, alts) ::= <<
switch ( input.LA(1) ) {
<alts>
<alts; separator="\n">
}
>>
matchToken(m) ::= <<
match(<m.name>);
MatchToken(m) ::= <<
match(<m.name>, <m.follow.name>);
>>
codeFileExtension() ::= ".java"

View File

@ -1,6 +1,6 @@
package org.antlr.v4.automata;
import org.antlr.v4.codegen.Target;
import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.tool.GrammarAST;
import org.antlr.v4.tool.LexerGrammar;
import org.antlr.v4.tool.Rule;
@ -45,8 +45,8 @@ public class LexerNFAFactory extends ParserNFAFactory {
public Handle range(GrammarAST a, GrammarAST b) {
BasicState left = newState(a);
BasicState right = newState(b);
int t1 = Target.getCharValueFromGrammarCharLiteral(a.getText());
int t2 = Target.getCharValueFromGrammarCharLiteral(b.getText());
int t1 = CharSupport.getCharValueFromGrammarCharLiteral(a.getText());
int t2 = CharSupport.getCharValueFromGrammarCharLiteral(b.getText());
left.transition = new RangeTransition(t1, t2, right);
a.nfaState = left;
b.nfaState = left;

View File

@ -3,7 +3,7 @@ package org.antlr.v4.automata;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.v4.codegen.Target;
import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.misc.IntervalSet;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.parse.GrammarASTAdaptor;
@ -103,7 +103,7 @@ public class ParserNFAFactory implements NFAFactory {
GrammarAST ast = A.left.ast;
int ttype = 0;
if ( g.isLexer() ) {
ttype = Target.getCharValueFromGrammarCharLiteral(ast.getText());
ttype = CharSupport.getCharValueFromGrammarCharLiteral(ast.getText());
}
else {
ttype = g.getTokenType(ast.getText());

View File

@ -1,6 +1,6 @@
package org.antlr.v4.automata;
import org.antlr.v4.codegen.Target;
import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.misc.IntervalSet;
public class RangeTransition extends Transition {
@ -21,7 +21,7 @@ public class RangeTransition extends Transition {
@Override
public String toString() {
return Target.getANTLRCharLiteralForChar(from)+".."+
Target.getANTLRCharLiteralForChar(to);
return CharSupport.getANTLRCharLiteralForChar(from)+".."+
CharSupport.getANTLRCharLiteralForChar(to);
}
}

View File

@ -1,8 +1,13 @@
package org.antlr.v4.codegen;
import org.antlr.v4.codegen.src.BitSetDef;
import org.antlr.v4.codegen.src.OutputModelObject;
import org.antlr.v4.codegen.src.ParserFile;
import org.antlr.v4.misc.IntSet;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.tool.ErrorType;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.GrammarAST;
import org.stringtemplate.v4.*;
import java.io.IOException;
@ -19,6 +24,7 @@ public abstract class CodeGenerator {
public Grammar g;
public Target target;
public STGroup templates;
public ParserFile outputModel;
public int lineWidth = 72;
@ -72,8 +78,7 @@ public abstract class CodeGenerator {
public void write() {
OutputModelObject root = buildOutputModel();
OutputModelWalker walker = new OutputModelWalker(g.tool, templates,
ParserGenerator.modelToTemplateMap);
OutputModelWalker walker = new OutputModelWalker(g.tool, templates);
ST outputFileST = walker.walk(root);
// WRITE FILES
@ -130,4 +135,16 @@ public abstract class CodeGenerator {
// }
return g.name+VOCAB_FILE_EXTENSION;
}
public BitSetDef defineBitSet(GrammarAST ast, IntSet follow) {
String inRuleName = ast.nfaState.rule.name;
String elementName = ast.getText(); // assume rule ref
if ( ast.getType() == ANTLRParser.TOKEN_REF ) {
target.getTokenTypeAsTargetLabel(g, ast.getType() );
}
String name = "FOLLOW_"+elementName+"_in_"+inRuleName+ast.token.getTokenIndex();
BitSetDef b = new BitSetDef(this, name, follow);
outputModel.bitSetDefs.add(b);
return b;
}
}

View File

@ -5,6 +5,7 @@ import org.antlr.runtime.Token;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.runtime.tree.TreeNodeStream;
import org.antlr.v4.codegen.nfa.*;
import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.misc.DoubleKeyMap;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.parse.GrammarASTAdaptor;
@ -89,7 +90,7 @@ public class NFABytecodeGenerator extends TreeParser {
}
public void emitString(Token t) {
String chars = Target.getStringFromGrammarStringLiteral(t.getText());
String chars = CharSupport.getStringFromGrammarStringLiteral(t.getText());
for (char c : chars.toCharArray()) {
emit(new MatchInstr(t, c));
}

View File

@ -15,20 +15,19 @@ import java.util.*;
public class OutputModelWalker {
Tool tool;
STGroup templates;
Map<Class, String> modelToTemplateMap;
//Map<Class, String> modelToTemplateMap;
public OutputModelWalker(Tool tool,
STGroup templates,
Map<Class, String> modelToTemplateMap)
STGroup templates)
{
this.tool = tool;
this.templates = templates;
this.modelToTemplateMap = modelToTemplateMap;
//this.modelToTemplateMap = modelToTemplateMap;
}
public ST walk(OutputModelObject omo) {
// CREATE TEMPLATE FOR THIS OUTPUT OBJECT
String templateName = modelToTemplateMap.get(omo.getClass());
String templateName = omo.getClass().getSimpleName();
if ( templateName == null ) {
tool.errMgr.toolError(ErrorType.NO_MODEL_TO_TEMPLATE_MAPPING, omo.getClass().getSimpleName());
return new BlankST();

View File

@ -1,30 +1,31 @@
package org.antlr.v4.codegen;
import org.antlr.v4.codegen.src.*;
import org.antlr.v4.codegen.src.OutputModelObject;
import org.antlr.v4.codegen.src.Parser;
import org.antlr.v4.codegen.src.ParserFile;
import org.antlr.v4.tool.Grammar;
import java.util.HashMap;
import java.util.Map;
/** */
public class ParserGenerator extends CodeGenerator {
public static final Map<Class, String> modelToTemplateMap = new HashMap<Class, String>() {{
put(ParserFile.class, "parserFile");
put(Parser.class, "parser");
put(RuleFunction.class, "parserFunction");
put(DFADef.class, "DFA");
put(CodeBlock.class, "codeBlock");
put(LL1Choice.class, "switch");
put(MatchToken.class, "matchToken");
}};
// public static final Map<Class, String> modelToTemplateMap = new HashMap<Class, String>() {{
// put(ParserFile.class, "parserFile");
// put(Parser.class, "parser");
// put(RuleFunction.class, "parserFunction");
// put(DFADef.class, "DFA");
// put(CodeBlock.class, "codeBlock");
// put(LL1Choice.class, "switch");
// put(MatchToken.class, "matchToken");
// }};
public ParserGenerator(Grammar g) {
super(g);
}
public OutputModelObject buildOutputModel() {
Parser p = new Parser(this);
return new ParserFile(this, p, getRecognizerFileName());
ParserFile pf = new ParserFile(this, getRecognizerFileName());
outputModel = pf;
pf.parser = new Parser(this, pf); // side-effect: fills pf dfa and bitset defs
// at this point, model is built
return outputModel;
}
}

View File

@ -1,6 +1,7 @@
package org.antlr.v4.codegen;
import org.antlr.v4.automata.Label;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.tool.Grammar;
import org.stringtemplate.v4.ST;
@ -8,31 +9,28 @@ import java.io.IOException;
/** */
public class Target {
/** When converting ANTLR char and string literals, here is the
* value set of escape chars.
/** For pure strings of Java 16-bit unicode char, how can we display
* it in the target language as a literal. Useful for dumping
* predicates and such that may refer to chars that need to be escaped
* when represented as strings. Also, templates need to be escaped so
* that the target language can hold them as a string.
*
* I have defined (via the constructor) the set of typical escapes,
* but your Target subclass is free to alter the translated chars or
* add more definitions. This is nonstatic so each target can have
* a different set in memory at same time.
*/
public static int ANTLRLiteralEscapedCharValue[] = new int[255];
protected String[] targetCharValueEscape = new String[255];
/** Given a char, we need to be able to show as an ANTLR literal.
*/
public static String ANTLRLiteralCharValueEscape[] = new String[255];
static {
ANTLRLiteralEscapedCharValue['n'] = '\n';
ANTLRLiteralEscapedCharValue['r'] = '\r';
ANTLRLiteralEscapedCharValue['t'] = '\t';
ANTLRLiteralEscapedCharValue['b'] = '\b';
ANTLRLiteralEscapedCharValue['f'] = '\f';
ANTLRLiteralEscapedCharValue['\\'] = '\\';
ANTLRLiteralEscapedCharValue['\''] = '\'';
ANTLRLiteralEscapedCharValue['"'] = '"';
ANTLRLiteralCharValueEscape['\n'] = "\\n";
ANTLRLiteralCharValueEscape['\r'] = "\\r";
ANTLRLiteralCharValueEscape['\t'] = "\\t";
ANTLRLiteralCharValueEscape['\b'] = "\\b";
ANTLRLiteralCharValueEscape['\f'] = "\\f";
ANTLRLiteralCharValueEscape['\\'] = "\\\\";
ANTLRLiteralCharValueEscape['\''] = "\\'";
public Target() {
targetCharValueEscape['\n'] = "\\n";
targetCharValueEscape['\r'] = "\\r";
targetCharValueEscape['\t'] = "\\t";
targetCharValueEscape['\b'] = "\\b";
targetCharValueEscape['\f'] = "\\f";
targetCharValueEscape['\\'] = "\\\\";
targetCharValueEscape['\''] = "\\'";
targetCharValueEscape['"'] = "\\\"";
}
protected void genRecognizerFile(CodeGenerator generator,
@ -52,96 +50,66 @@ public class Target {
{
// no header file by default
}
/** Given a literal like (the 3 char sequence with single quotes) 'a',
* return the int value of 'a'. Convert escape sequences here also.
/** Get a meaningful name for a token type useful during code generation.
* Literals without associated names are converted to the string equivalent
* of their integer values. Used to generate x==ID and x==34 type comparisons
* etc... Essentially we are looking for the most obvious way to refer
* to a token type in the generated code. If in the lexer, return the
* char literal translated to the target language. For example, ttype=10
* will yield '\n' from the getTokenDisplayName method. That must
* be converted to the target languages literals. For most C-derived
* languages no translation is needed.
*/
public static int getCharValueFromGrammarCharLiteral(String literal) {
switch ( literal.length() ) {
case 3 :
// 'x'
return literal.charAt(1); // no escape char
case 4 :
// '\x' (antlr lexer will catch invalid char)
if ( Character.isDigit(literal.charAt(2)) ) {
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
// "invalid char literal: "+literal);
return -1;
}
int escChar = literal.charAt(2);
int charVal = ANTLRLiteralEscapedCharValue[escChar];
if ( charVal==0 ) {
// Unnecessary escapes like '\{' should just yield {
return escChar;
}
return charVal;
case 8 :
// '\u1234'
String unicodeChars = literal.substring(3,literal.length()-1);
return Integer.parseInt(unicodeChars, 16);
default :
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
// "invalid char literal: "+literal);
return -1;
public String getTokenTypeAsTargetLabel(Grammar g, int ttype) {
if ( g.getType() == ANTLRParser.LEXER ) {
// String name = g.getTokenDisplayName(ttype);
// return getTargetCharLiteralFromANTLRCharLiteral(this,name);
}
String name = g.getTokenDisplayName(ttype);
// If name is a literal, return the token type instead
if ( name.charAt(0)=='\'' ) {
return String.valueOf(ttype);
}
return name;
}
public static String getStringFromGrammarStringLiteral(String literal) {
StringBuilder buf = new StringBuilder();
int n = literal.length();
int i = 1; // skip first quote
while ( i < (n-1) ) { // scan all but last quote
switch ( literal.charAt(i) ) {
case '\\' :
i++;
if ( literal.charAt(i)=='u' ) { // '\u1234'
i++;
String unicodeChars = literal.substring(3,literal.length()-1);
buf.append((char)Integer.parseInt(unicodeChars, 16));
}
else {
char escChar = literal.charAt(i);
int charVal = ANTLRLiteralEscapedCharValue[escChar];
if ( charVal==0 ) buf.append(escChar); // Unnecessary escapes like '\{' should just yield {
else buf.append((char)charVal);
}
break;
default :
buf.append(literal.charAt(i));
i++;
break;
}
/** Convert from an ANTLR char literal found in a grammar file to
* an equivalent char literal in the target language. For most
* languages, this means leaving 'x' as 'x'. Actually, we need
* to escape '\u000A' so that it doesn't get converted to \n by
* the compiler. Convert the literal to the char value and then
* to an appropriate target char literal.
*
* Expect single quotes around the incoming literal.
*/
public String getTargetCharLiteralCharValue(int c) {
StringBuffer buf = new StringBuffer();
buf.append('\'');
if ( c<Label.MIN_CHAR_VALUE ) return "'\u0000'";
if ( c<targetCharValueEscape.length &&
targetCharValueEscape[c]!=null )
{
buf.append(targetCharValueEscape[c]);
}
else if ( Character.UnicodeBlock.of((char)c)==
Character.UnicodeBlock.BASIC_LATIN &&
!Character.isISOControl((char)c) )
{
// normal char
buf.append((char)c);
}
else {
// must be something unprintable...use \\uXXXX
// turn on the bit above max "\\uFFFF" value so that we pad with zeros
// then only take last 4 digits
String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5);
buf.append("\\u");
buf.append(hex);
}
buf.append('\'');
return buf.toString();
}
/** Return a string representing the escaped char for code c. E.g., If c
* has value 0x100, you will get "\u0100". ASCII gets the usual
* char (non-hex) representation. Control characters are spit out
* as unicode. While this is specially set up for returning Java strings,
* it can be used by any language target that has the same syntax. :)
*/
public static String getANTLRCharLiteralForChar(int c) {
if ( c< Label.MIN_CHAR_VALUE ) {
return "'<INVALID>'";
}
if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {
return '\''+ANTLRLiteralCharValueEscape[c]+'\'';
}
if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN &&
!Character.isISOControl((char)c) ) {
if ( c=='\\' ) {
return "'\\\\'";
}
if ( c=='\'') {
return "'\\''";
}
return '\''+Character.toString((char)c)+'\'';
}
// turn on the bit above max "\uFFFF" value so that we pad with zeros
// then only take last 4 digits
String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5);
String unicodeStr = "'\\u"+hex+"'";
return unicodeStr;
}
}

View File

@ -2,7 +2,7 @@ package org.antlr.v4.codegen.nfa;
import org.antlr.runtime.Token;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.codegen.Target;
import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.runtime.nfa.Bytecode;
/** */
@ -12,8 +12,8 @@ public class RangeInstr extends Instr {
public RangeInstr(Token start, Token stop) {
this.start = start;
this.stop = stop;
a = (char) Target.getCharValueFromGrammarCharLiteral(start.getText());
b = (char)Target.getCharValueFromGrammarCharLiteral(stop.getText());
a = (char)CharSupport.getCharValueFromGrammarCharLiteral(start.getText());
b = (char)CharSupport.getCharValueFromGrammarCharLiteral(stop.getText());
}
public short opcode() { return charSize(a, b)==1? Bytecode.RANGE8:Bytecode.RANGE16; };
public int nBytes() { return 1+2*charSize(a, b); }

View File

@ -1,9 +1,15 @@
package org.antlr.v4.codegen.src;
import org.antlr.v4.codegen.CodeGenerator;
import org.antlr.v4.misc.IntSet;
/** */
public class BitSetDef extends OutputModelObject {
String name;
IntSet[] set;
public String name;
public IntSet fset;
public BitSetDef(CodeGenerator gen, String name, IntSet fset) {
this.gen = gen;
this.name = name;
this.fset = fset;
}
}

View File

@ -8,15 +8,15 @@ import org.antlr.v4.tool.TerminalAST;
/** */
public class MatchToken extends SrcOp {
public String name;
public String bitSetName;
public BitSetDef follow;
public MatchToken(CodeGenerator gen, TerminalAST ast) {
this.gen = gen;
name = ast.getText();
LinearApproximator approx = new LinearApproximator(gen.g, -1);
IntervalSet follow = approx.LOOK(ast.nfaState.transition(0).target);
IntervalSet fset = approx.LOOK(ast.nfaState.transition(0).target);
System.out.println("follow="+follow);
//bitSetName = gen.defineBitSet(follow);
follow = gen.defineBitSet(ast, fset);
}
}

View File

@ -1,8 +1,6 @@
package org.antlr.v4.codegen.src;
import org.antlr.v4.automata.DFA;
import org.antlr.v4.codegen.CodeGenerator;
import org.antlr.v4.misc.IntSet;
import org.antlr.v4.tool.Rule;
import java.util.ArrayList;
@ -12,22 +10,23 @@ import java.util.List;
public class Parser extends OutputModelObject {
public String name;
public List<RuleFunction> funcs = new ArrayList<RuleFunction>();
public List<DFADef> dfaDefs = new ArrayList<DFADef>();
public List<IntSet> bitsetDefs;
ParserFile file;
public Parser(CodeGenerator gen) {
public Parser(CodeGenerator gen, ParserFile file) {
this.gen = gen;
this.file = file; // who contains us?
name = gen.g.getRecognizerName();
for (Rule r : gen.g.rules.values()) funcs.add( new RuleFunction(gen, r) );
// build DFA, bitset defs
for (DFA dfa : gen.g.decisionDFAs.values()) {
dfaDefs.add( new DFADef("DFA"+dfa.decision, dfa) );
}
// We create dfa and bitsets during rule function construction.
// They get stored in code gen for convenience as we walk rule block tree
// for (DFA dfa : gen.g.decisionDFAs.values()) {
// file.dfaDefs.add( new DFADef("DFA"+dfa.decision, dfa) );
// }
}
@Override
public List<String> getChildren() {
return new ArrayList<String>() {{ add("funcs"); add("dfaDefs"); }};
return new ArrayList<String>() {{ add("funcs"); }};
}
}

View File

@ -9,15 +9,20 @@ import java.util.List;
public class ParserFile extends OutputModelObject {
public String fileName;
public Parser parser;
public List<DFADef> dfaDefs = new ArrayList<DFADef>();
public List<BitSetDef> bitSetDefs = new ArrayList<BitSetDef>();
public ParserFile(CodeGenerator gen, Parser p, String fileName) {
public ParserFile(CodeGenerator gen, String fileName) {
this.gen = gen;
parser = p;
this.fileName = fileName;
}
@Override
public List<String> getChildren() {
return new ArrayList<String>() {{ add("parser"); }};
return new ArrayList<String>() {{
add("parser");
add("dfaDefs");
add("bitSetDefs");
}};
}
}

View File

@ -0,0 +1,126 @@
package org.antlr.v4.misc;
import org.antlr.v4.automata.Label;
/** */
public class CharSupport {
/** When converting ANTLR char and string literals, here is the
* value set of escape chars.
*/
public static int ANTLRLiteralEscapedCharValue[] = new int[255];
/** Given a char, we need to be able to show as an ANTLR literal.
*/
public static String ANTLRLiteralCharValueEscape[] = new String[255];
static {
ANTLRLiteralEscapedCharValue['n'] = '\n';
ANTLRLiteralEscapedCharValue['r'] = '\r';
ANTLRLiteralEscapedCharValue['t'] = '\t';
ANTLRLiteralEscapedCharValue['b'] = '\b';
ANTLRLiteralEscapedCharValue['f'] = '\f';
ANTLRLiteralEscapedCharValue['\\'] = '\\';
ANTLRLiteralEscapedCharValue['\''] = '\'';
ANTLRLiteralEscapedCharValue['"'] = '"';
ANTLRLiteralCharValueEscape['\n'] = "\\n";
ANTLRLiteralCharValueEscape['\r'] = "\\r";
ANTLRLiteralCharValueEscape['\t'] = "\\t";
ANTLRLiteralCharValueEscape['\b'] = "\\b";
ANTLRLiteralCharValueEscape['\f'] = "\\f";
ANTLRLiteralCharValueEscape['\\'] = "\\\\";
ANTLRLiteralCharValueEscape['\''] = "\\'";
}
/** Return a string representing the escaped char for code c. E.g., If c
* has value 0x100, you will get "\u0100". ASCII gets the usual
* char (non-hex) representation. Control characters are spit out
* as unicode. While this is specially set up for returning Java strings,
* it can be used by any language target that has the same syntax. :)
*/
public static String getANTLRCharLiteralForChar(int c) {
if ( c< Label.MIN_CHAR_VALUE ) {
return "'<INVALID>'";
}
if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {
return '\''+ANTLRLiteralCharValueEscape[c]+'\'';
}
if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN &&
!Character.isISOControl((char)c) ) {
if ( c=='\\' ) {
return "'\\\\'";
}
if ( c=='\'') {
return "'\\''";
}
return '\''+Character.toString((char)c)+'\'';
}
// turn on the bit above max "\uFFFF" value so that we pad with zeros
// then only take last 4 digits
String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5);
String unicodeStr = "'\\u"+hex+"'";
return unicodeStr;
}
/** Given a literal like (the 3 char sequence with single quotes) 'a',
* return the int value of 'a'. Convert escape sequences here also.
*/
public static int getCharValueFromGrammarCharLiteral(String literal) {
switch ( literal.length() ) {
case 3 :
// 'x'
return literal.charAt(1); // no escape char
case 4 :
// '\x' (antlr lexer will catch invalid char)
if ( Character.isDigit(literal.charAt(2)) ) {
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
// "invalid char literal: "+literal);
return -1;
}
int escChar = literal.charAt(2);
int charVal = ANTLRLiteralEscapedCharValue[escChar];
if ( charVal==0 ) {
// Unnecessary escapes like '\{' should just yield {
return escChar;
}
return charVal;
case 8 :
// '\u1234'
String unicodeChars = literal.substring(3,literal.length()-1);
return Integer.parseInt(unicodeChars, 16);
default :
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
// "invalid char literal: "+literal);
return -1;
}
}
public static String getStringFromGrammarStringLiteral(String literal) {
StringBuilder buf = new StringBuilder();
int n = literal.length();
int i = 1; // skip first quote
while ( i < (n-1) ) { // scan all but last quote
switch ( literal.charAt(i) ) {
case '\\' :
i++;
if ( literal.charAt(i)=='u' ) { // '\u1234'
i++;
String unicodeChars = literal.substring(3,literal.length()-1);
buf.append((char)Integer.parseInt(unicodeChars, 16));
}
else {
char escChar = literal.charAt(i);
int charVal = ANTLRLiteralEscapedCharValue[escChar];
if ( charVal==0 ) buf.append(escChar); // Unnecessary escapes like '\{' should just yield {
else buf.append((char)charVal);
}
break;
default :
buf.append(literal.charAt(i));
i++;
break;
}
}
return buf.toString();
}
}

View File

@ -6,7 +6,7 @@ import org.antlr.v4.Tool;
import org.antlr.v4.automata.DFA;
import org.antlr.v4.automata.Label;
import org.antlr.v4.automata.NFA;
import org.antlr.v4.codegen.Target;
import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.misc.IntSet;
import org.antlr.v4.misc.IntervalSet;
import org.antlr.v4.misc.Utils;
@ -359,7 +359,7 @@ public class Grammar implements AttributeResolver {
if ( isLexer() &&
ttype >= Label.MIN_CHAR_VALUE && ttype <= Label.MAX_CHAR_VALUE )
{
return Target.getANTLRCharLiteralForChar(ttype);
return CharSupport.getANTLRCharLiteralForChar(ttype);
}
// faux label?
else if ( ttype<0 ) {