template names = class names; reorg'd char stuff; got bitsets defined.

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6850]
This commit is contained in:
parrt 2010-05-09 12:11:50 -08:00
parent cab4d7d197
commit 3e1f43117e
16 changed files with 301 additions and 174 deletions

View File

@ -1,40 +1,45 @@
// args must be <object-model-object>, <fields-resulting-in-STs> // args must be <object-model-object>, <fields-resulting-in-STs>
parserFile(f, parser) ::= << ParserFile(f, parser, dfaDefs, bitSetDefs) ::= <<
// $ANTLR ANTLRVersion> <f.fileName> generatedTimestamp> // $ANTLR ANTLRVersion> <f.fileName> generatedTimestamp>
import org.antlr.runtime.*; import org.antlr.runtime.*;
<parser> <parser>
>> >>
parser(p,funcs,dfaDefs) ::= << Parser(p, funcs) ::= <<
public class <p.name> { public class <p.name> {
<funcs> <funcs; separator="\n">
<dfaDefs> <dfaDefs; separator="\n">
<bitSetDefs; separator="\n">
} }
>> >>
DFA(dfa) ::= << DFADef(dfa) ::= <<
// define <dfa.name> // define <dfa.name>
>> >>
parserFunction(f,code) ::= << BitSetDef(b) ::= <<
// define <b.name>
>>
RuleFunction(f,code) ::= <<
<f.modifiers:{f | <f> }>void <f.name>(<f.args>) { <f.modifiers:{f | <f> }>void <f.name>(<f.args>) {
<code> <code>
} }
>> >>
codeBlock(c, ops) ::= << CodeBlock(c, ops) ::= <<
<ops> <ops; separator="\n">
>> >>
switch(c, alts) ::= << LL1Choice(c, alts) ::= <<
switch ( input.LA(1) ) { switch ( input.LA(1) ) {
<alts> <alts; separator="\n">
} }
>> >>
matchToken(m) ::= << MatchToken(m) ::= <<
match(<m.name>); match(<m.name>, <m.follow.name>);
>> >>
codeFileExtension() ::= ".java" codeFileExtension() ::= ".java"

View File

@ -1,6 +1,6 @@
package org.antlr.v4.automata; package org.antlr.v4.automata;
import org.antlr.v4.codegen.Target; import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.tool.GrammarAST; import org.antlr.v4.tool.GrammarAST;
import org.antlr.v4.tool.LexerGrammar; import org.antlr.v4.tool.LexerGrammar;
import org.antlr.v4.tool.Rule; import org.antlr.v4.tool.Rule;
@ -45,8 +45,8 @@ public class LexerNFAFactory extends ParserNFAFactory {
public Handle range(GrammarAST a, GrammarAST b) { public Handle range(GrammarAST a, GrammarAST b) {
BasicState left = newState(a); BasicState left = newState(a);
BasicState right = newState(b); BasicState right = newState(b);
int t1 = Target.getCharValueFromGrammarCharLiteral(a.getText()); int t1 = CharSupport.getCharValueFromGrammarCharLiteral(a.getText());
int t2 = Target.getCharValueFromGrammarCharLiteral(b.getText()); int t2 = CharSupport.getCharValueFromGrammarCharLiteral(b.getText());
left.transition = new RangeTransition(t1, t2, right); left.transition = new RangeTransition(t1, t2, right);
a.nfaState = left; a.nfaState = left;
b.nfaState = left; b.nfaState = left;

View File

@ -3,7 +3,7 @@ package org.antlr.v4.automata;
import org.antlr.runtime.RecognitionException; import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.CommonTreeNodeStream; import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.v4.codegen.Target; import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.misc.IntervalSet; import org.antlr.v4.misc.IntervalSet;
import org.antlr.v4.parse.ANTLRParser; import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.parse.GrammarASTAdaptor; import org.antlr.v4.parse.GrammarASTAdaptor;
@ -103,7 +103,7 @@ public class ParserNFAFactory implements NFAFactory {
GrammarAST ast = A.left.ast; GrammarAST ast = A.left.ast;
int ttype = 0; int ttype = 0;
if ( g.isLexer() ) { if ( g.isLexer() ) {
ttype = Target.getCharValueFromGrammarCharLiteral(ast.getText()); ttype = CharSupport.getCharValueFromGrammarCharLiteral(ast.getText());
} }
else { else {
ttype = g.getTokenType(ast.getText()); ttype = g.getTokenType(ast.getText());

View File

@ -1,6 +1,6 @@
package org.antlr.v4.automata; package org.antlr.v4.automata;
import org.antlr.v4.codegen.Target; import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.misc.IntervalSet; import org.antlr.v4.misc.IntervalSet;
public class RangeTransition extends Transition { public class RangeTransition extends Transition {
@ -21,7 +21,7 @@ public class RangeTransition extends Transition {
@Override @Override
public String toString() { public String toString() {
return Target.getANTLRCharLiteralForChar(from)+".."+ return CharSupport.getANTLRCharLiteralForChar(from)+".."+
Target.getANTLRCharLiteralForChar(to); CharSupport.getANTLRCharLiteralForChar(to);
} }
} }

View File

@ -1,8 +1,13 @@
package org.antlr.v4.codegen; package org.antlr.v4.codegen;
import org.antlr.v4.codegen.src.BitSetDef;
import org.antlr.v4.codegen.src.OutputModelObject; import org.antlr.v4.codegen.src.OutputModelObject;
import org.antlr.v4.codegen.src.ParserFile;
import org.antlr.v4.misc.IntSet;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.tool.ErrorType; import org.antlr.v4.tool.ErrorType;
import org.antlr.v4.tool.Grammar; import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.GrammarAST;
import org.stringtemplate.v4.*; import org.stringtemplate.v4.*;
import java.io.IOException; import java.io.IOException;
@ -19,6 +24,7 @@ public abstract class CodeGenerator {
public Grammar g; public Grammar g;
public Target target; public Target target;
public STGroup templates; public STGroup templates;
public ParserFile outputModel;
public int lineWidth = 72; public int lineWidth = 72;
@ -72,8 +78,7 @@ public abstract class CodeGenerator {
public void write() { public void write() {
OutputModelObject root = buildOutputModel(); OutputModelObject root = buildOutputModel();
OutputModelWalker walker = new OutputModelWalker(g.tool, templates, OutputModelWalker walker = new OutputModelWalker(g.tool, templates);
ParserGenerator.modelToTemplateMap);
ST outputFileST = walker.walk(root); ST outputFileST = walker.walk(root);
// WRITE FILES // WRITE FILES
@ -130,4 +135,16 @@ public abstract class CodeGenerator {
// } // }
return g.name+VOCAB_FILE_EXTENSION; return g.name+VOCAB_FILE_EXTENSION;
} }
public BitSetDef defineBitSet(GrammarAST ast, IntSet follow) {
String inRuleName = ast.nfaState.rule.name;
String elementName = ast.getText(); // assume rule ref
if ( ast.getType() == ANTLRParser.TOKEN_REF ) {
target.getTokenTypeAsTargetLabel(g, ast.getType() );
}
String name = "FOLLOW_"+elementName+"_in_"+inRuleName+ast.token.getTokenIndex();
BitSetDef b = new BitSetDef(this, name, follow);
outputModel.bitSetDefs.add(b);
return b;
}
} }

View File

@ -5,6 +5,7 @@ import org.antlr.runtime.Token;
import org.antlr.runtime.tree.CommonTreeNodeStream; import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.runtime.tree.TreeNodeStream; import org.antlr.runtime.tree.TreeNodeStream;
import org.antlr.v4.codegen.nfa.*; import org.antlr.v4.codegen.nfa.*;
import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.misc.DoubleKeyMap; import org.antlr.v4.misc.DoubleKeyMap;
import org.antlr.v4.parse.ANTLRParser; import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.parse.GrammarASTAdaptor; import org.antlr.v4.parse.GrammarASTAdaptor;
@ -89,7 +90,7 @@ public class NFABytecodeGenerator extends TreeParser {
} }
public void emitString(Token t) { public void emitString(Token t) {
String chars = Target.getStringFromGrammarStringLiteral(t.getText()); String chars = CharSupport.getStringFromGrammarStringLiteral(t.getText());
for (char c : chars.toCharArray()) { for (char c : chars.toCharArray()) {
emit(new MatchInstr(t, c)); emit(new MatchInstr(t, c));
} }

View File

@ -15,20 +15,19 @@ import java.util.*;
public class OutputModelWalker { public class OutputModelWalker {
Tool tool; Tool tool;
STGroup templates; STGroup templates;
Map<Class, String> modelToTemplateMap; //Map<Class, String> modelToTemplateMap;
public OutputModelWalker(Tool tool, public OutputModelWalker(Tool tool,
STGroup templates, STGroup templates)
Map<Class, String> modelToTemplateMap)
{ {
this.tool = tool; this.tool = tool;
this.templates = templates; this.templates = templates;
this.modelToTemplateMap = modelToTemplateMap; //this.modelToTemplateMap = modelToTemplateMap;
} }
public ST walk(OutputModelObject omo) { public ST walk(OutputModelObject omo) {
// CREATE TEMPLATE FOR THIS OUTPUT OBJECT // CREATE TEMPLATE FOR THIS OUTPUT OBJECT
String templateName = modelToTemplateMap.get(omo.getClass()); String templateName = omo.getClass().getSimpleName();
if ( templateName == null ) { if ( templateName == null ) {
tool.errMgr.toolError(ErrorType.NO_MODEL_TO_TEMPLATE_MAPPING, omo.getClass().getSimpleName()); tool.errMgr.toolError(ErrorType.NO_MODEL_TO_TEMPLATE_MAPPING, omo.getClass().getSimpleName());
return new BlankST(); return new BlankST();

View File

@ -1,30 +1,31 @@
package org.antlr.v4.codegen; package org.antlr.v4.codegen;
import org.antlr.v4.codegen.src.*; import org.antlr.v4.codegen.src.OutputModelObject;
import org.antlr.v4.codegen.src.Parser;
import org.antlr.v4.codegen.src.ParserFile;
import org.antlr.v4.tool.Grammar; import org.antlr.v4.tool.Grammar;
import java.util.HashMap;
import java.util.Map;
/** */ /** */
public class ParserGenerator extends CodeGenerator { public class ParserGenerator extends CodeGenerator {
public static final Map<Class, String> modelToTemplateMap = new HashMap<Class, String>() {{ // public static final Map<Class, String> modelToTemplateMap = new HashMap<Class, String>() {{
put(ParserFile.class, "parserFile"); // put(ParserFile.class, "parserFile");
put(Parser.class, "parser"); // put(Parser.class, "parser");
put(RuleFunction.class, "parserFunction"); // put(RuleFunction.class, "parserFunction");
put(DFADef.class, "DFA"); // put(DFADef.class, "DFA");
put(CodeBlock.class, "codeBlock"); // put(CodeBlock.class, "codeBlock");
put(LL1Choice.class, "switch"); // put(LL1Choice.class, "switch");
put(MatchToken.class, "matchToken"); // put(MatchToken.class, "matchToken");
}}; // }};
public ParserGenerator(Grammar g) { public ParserGenerator(Grammar g) {
super(g); super(g);
} }
public OutputModelObject buildOutputModel() { public OutputModelObject buildOutputModel() {
Parser p = new Parser(this); ParserFile pf = new ParserFile(this, getRecognizerFileName());
return new ParserFile(this, p, getRecognizerFileName()); outputModel = pf;
pf.parser = new Parser(this, pf); // side-effect: fills pf dfa and bitset defs
// at this point, model is built
return outputModel;
} }
} }

View File

@ -1,6 +1,7 @@
package org.antlr.v4.codegen; package org.antlr.v4.codegen;
import org.antlr.v4.automata.Label; import org.antlr.v4.automata.Label;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.tool.Grammar; import org.antlr.v4.tool.Grammar;
import org.stringtemplate.v4.ST; import org.stringtemplate.v4.ST;
@ -8,31 +9,28 @@ import java.io.IOException;
/** */ /** */
public class Target { public class Target {
/** When converting ANTLR char and string literals, here is the /** For pure strings of Java 16-bit unicode char, how can we display
* value set of escape chars. * it in the target language as a literal. Useful for dumping
* predicates and such that may refer to chars that need to be escaped
* when represented as strings. Also, templates need to be escaped so
* that the target language can hold them as a string.
*
* I have defined (via the constructor) the set of typical escapes,
* but your Target subclass is free to alter the translated chars or
* add more definitions. This is nonstatic so each target can have
* a different set in memory at same time.
*/ */
public static int ANTLRLiteralEscapedCharValue[] = new int[255]; protected String[] targetCharValueEscape = new String[255];
/** Given a char, we need to be able to show as an ANTLR literal. public Target() {
*/ targetCharValueEscape['\n'] = "\\n";
public static String ANTLRLiteralCharValueEscape[] = new String[255]; targetCharValueEscape['\r'] = "\\r";
targetCharValueEscape['\t'] = "\\t";
static { targetCharValueEscape['\b'] = "\\b";
ANTLRLiteralEscapedCharValue['n'] = '\n'; targetCharValueEscape['\f'] = "\\f";
ANTLRLiteralEscapedCharValue['r'] = '\r'; targetCharValueEscape['\\'] = "\\\\";
ANTLRLiteralEscapedCharValue['t'] = '\t'; targetCharValueEscape['\''] = "\\'";
ANTLRLiteralEscapedCharValue['b'] = '\b'; targetCharValueEscape['"'] = "\\\"";
ANTLRLiteralEscapedCharValue['f'] = '\f';
ANTLRLiteralEscapedCharValue['\\'] = '\\';
ANTLRLiteralEscapedCharValue['\''] = '\'';
ANTLRLiteralEscapedCharValue['"'] = '"';
ANTLRLiteralCharValueEscape['\n'] = "\\n";
ANTLRLiteralCharValueEscape['\r'] = "\\r";
ANTLRLiteralCharValueEscape['\t'] = "\\t";
ANTLRLiteralCharValueEscape['\b'] = "\\b";
ANTLRLiteralCharValueEscape['\f'] = "\\f";
ANTLRLiteralCharValueEscape['\\'] = "\\\\";
ANTLRLiteralCharValueEscape['\''] = "\\'";
} }
protected void genRecognizerFile(CodeGenerator generator, protected void genRecognizerFile(CodeGenerator generator,
@ -53,95 +51,65 @@ public class Target {
// no header file by default // no header file by default
} }
/** Given a literal like (the 3 char sequence with single quotes) 'a', /** Get a meaningful name for a token type useful during code generation.
* return the int value of 'a'. Convert escape sequences here also. * Literals without associated names are converted to the string equivalent
* of their integer values. Used to generate x==ID and x==34 type comparisons
* etc... Essentially we are looking for the most obvious way to refer
* to a token type in the generated code. If in the lexer, return the
* char literal translated to the target language. For example, ttype=10
* will yield '\n' from the getTokenDisplayName method. That must
* be converted to the target languages literals. For most C-derived
* languages no translation is needed.
*/ */
public static int getCharValueFromGrammarCharLiteral(String literal) { public String getTokenTypeAsTargetLabel(Grammar g, int ttype) {
switch ( literal.length() ) { if ( g.getType() == ANTLRParser.LEXER ) {
case 3 : // String name = g.getTokenDisplayName(ttype);
// 'x' // return getTargetCharLiteralFromANTLRCharLiteral(this,name);
return literal.charAt(1); // no escape char
case 4 :
// '\x' (antlr lexer will catch invalid char)
if ( Character.isDigit(literal.charAt(2)) ) {
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
// "invalid char literal: "+literal);
return -1;
}
int escChar = literal.charAt(2);
int charVal = ANTLRLiteralEscapedCharValue[escChar];
if ( charVal==0 ) {
// Unnecessary escapes like '\{' should just yield {
return escChar;
}
return charVal;
case 8 :
// '\u1234'
String unicodeChars = literal.substring(3,literal.length()-1);
return Integer.parseInt(unicodeChars, 16);
default :
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
// "invalid char literal: "+literal);
return -1;
} }
String name = g.getTokenDisplayName(ttype);
// If name is a literal, return the token type instead
if ( name.charAt(0)=='\'' ) {
return String.valueOf(ttype);
}
return name;
} }
public static String getStringFromGrammarStringLiteral(String literal) { /** Convert from an ANTLR char literal found in a grammar file to
StringBuilder buf = new StringBuilder(); * an equivalent char literal in the target language. For most
int n = literal.length(); * languages, this means leaving 'x' as 'x'. Actually, we need
int i = 1; // skip first quote * to escape '\u000A' so that it doesn't get converted to \n by
while ( i < (n-1) ) { // scan all but last quote * the compiler. Convert the literal to the char value and then
switch ( literal.charAt(i) ) { * to an appropriate target char literal.
case '\\' : *
i++; * Expect single quotes around the incoming literal.
if ( literal.charAt(i)=='u' ) { // '\u1234' */
i++; public String getTargetCharLiteralCharValue(int c) {
String unicodeChars = literal.substring(3,literal.length()-1); StringBuffer buf = new StringBuffer();
buf.append((char)Integer.parseInt(unicodeChars, 16)); buf.append('\'');
} if ( c<Label.MIN_CHAR_VALUE ) return "'\u0000'";
else { if ( c<targetCharValueEscape.length &&
char escChar = literal.charAt(i); targetCharValueEscape[c]!=null )
int charVal = ANTLRLiteralEscapedCharValue[escChar]; {
if ( charVal==0 ) buf.append(escChar); // Unnecessary escapes like '\{' should just yield { buf.append(targetCharValueEscape[c]);
else buf.append((char)charVal);
}
break;
default :
buf.append(literal.charAt(i));
i++;
break;
}
} }
else if ( Character.UnicodeBlock.of((char)c)==
Character.UnicodeBlock.BASIC_LATIN &&
!Character.isISOControl((char)c) )
{
// normal char
buf.append((char)c);
}
else {
// must be something unprintable...use \\uXXXX
// turn on the bit above max "\\uFFFF" value so that we pad with zeros
// then only take last 4 digits
String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5);
buf.append("\\u");
buf.append(hex);
}
buf.append('\'');
return buf.toString(); return buf.toString();
} }
/** Return a string representing the escaped char for code c. E.g., If c
* has value 0x100, you will get "\u0100". ASCII gets the usual
* char (non-hex) representation. Control characters are spit out
* as unicode. While this is specially set up for returning Java strings,
* it can be used by any language target that has the same syntax. :)
*/
public static String getANTLRCharLiteralForChar(int c) {
if ( c< Label.MIN_CHAR_VALUE ) {
return "'<INVALID>'";
}
if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {
return '\''+ANTLRLiteralCharValueEscape[c]+'\'';
}
if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN &&
!Character.isISOControl((char)c) ) {
if ( c=='\\' ) {
return "'\\\\'";
}
if ( c=='\'') {
return "'\\''";
}
return '\''+Character.toString((char)c)+'\'';
}
// turn on the bit above max "\uFFFF" value so that we pad with zeros
// then only take last 4 digits
String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5);
String unicodeStr = "'\\u"+hex+"'";
return unicodeStr;
}
} }

View File

@ -2,7 +2,7 @@ package org.antlr.v4.codegen.nfa;
import org.antlr.runtime.Token; import org.antlr.runtime.Token;
import org.antlr.v4.codegen.NFABytecodeGenerator; import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.codegen.Target; import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.runtime.nfa.Bytecode; import org.antlr.v4.runtime.nfa.Bytecode;
/** */ /** */
@ -12,8 +12,8 @@ public class RangeInstr extends Instr {
public RangeInstr(Token start, Token stop) { public RangeInstr(Token start, Token stop) {
this.start = start; this.start = start;
this.stop = stop; this.stop = stop;
a = (char) Target.getCharValueFromGrammarCharLiteral(start.getText()); a = (char)CharSupport.getCharValueFromGrammarCharLiteral(start.getText());
b = (char)Target.getCharValueFromGrammarCharLiteral(stop.getText()); b = (char)CharSupport.getCharValueFromGrammarCharLiteral(stop.getText());
} }
public short opcode() { return charSize(a, b)==1? Bytecode.RANGE8:Bytecode.RANGE16; }; public short opcode() { return charSize(a, b)==1? Bytecode.RANGE8:Bytecode.RANGE16; };
public int nBytes() { return 1+2*charSize(a, b); } public int nBytes() { return 1+2*charSize(a, b); }

View File

@ -1,9 +1,15 @@
package org.antlr.v4.codegen.src; package org.antlr.v4.codegen.src;
import org.antlr.v4.codegen.CodeGenerator;
import org.antlr.v4.misc.IntSet; import org.antlr.v4.misc.IntSet;
/** */ /** */
public class BitSetDef extends OutputModelObject { public class BitSetDef extends OutputModelObject {
String name; public String name;
IntSet[] set; public IntSet fset;
public BitSetDef(CodeGenerator gen, String name, IntSet fset) {
this.gen = gen;
this.name = name;
this.fset = fset;
}
} }

View File

@ -8,15 +8,15 @@ import org.antlr.v4.tool.TerminalAST;
/** */ /** */
public class MatchToken extends SrcOp { public class MatchToken extends SrcOp {
public String name; public String name;
public String bitSetName; public BitSetDef follow;
public MatchToken(CodeGenerator gen, TerminalAST ast) { public MatchToken(CodeGenerator gen, TerminalAST ast) {
this.gen = gen; this.gen = gen;
name = ast.getText(); name = ast.getText();
LinearApproximator approx = new LinearApproximator(gen.g, -1); LinearApproximator approx = new LinearApproximator(gen.g, -1);
IntervalSet follow = approx.LOOK(ast.nfaState.transition(0).target); IntervalSet fset = approx.LOOK(ast.nfaState.transition(0).target);
System.out.println("follow="+follow); System.out.println("follow="+follow);
//bitSetName = gen.defineBitSet(follow); follow = gen.defineBitSet(ast, fset);
} }
} }

View File

@ -1,8 +1,6 @@
package org.antlr.v4.codegen.src; package org.antlr.v4.codegen.src;
import org.antlr.v4.automata.DFA;
import org.antlr.v4.codegen.CodeGenerator; import org.antlr.v4.codegen.CodeGenerator;
import org.antlr.v4.misc.IntSet;
import org.antlr.v4.tool.Rule; import org.antlr.v4.tool.Rule;
import java.util.ArrayList; import java.util.ArrayList;
@ -12,22 +10,23 @@ import java.util.List;
public class Parser extends OutputModelObject { public class Parser extends OutputModelObject {
public String name; public String name;
public List<RuleFunction> funcs = new ArrayList<RuleFunction>(); public List<RuleFunction> funcs = new ArrayList<RuleFunction>();
public List<DFADef> dfaDefs = new ArrayList<DFADef>(); ParserFile file;
public List<IntSet> bitsetDefs;
public Parser(CodeGenerator gen) { public Parser(CodeGenerator gen, ParserFile file) {
this.gen = gen; this.gen = gen;
this.file = file; // who contains us?
name = gen.g.getRecognizerName(); name = gen.g.getRecognizerName();
for (Rule r : gen.g.rules.values()) funcs.add( new RuleFunction(gen, r) ); for (Rule r : gen.g.rules.values()) funcs.add( new RuleFunction(gen, r) );
// build DFA, bitset defs // We create dfa and bitsets during rule function construction.
for (DFA dfa : gen.g.decisionDFAs.values()) { // They get stored in code gen for convenience as we walk rule block tree
dfaDefs.add( new DFADef("DFA"+dfa.decision, dfa) ); // for (DFA dfa : gen.g.decisionDFAs.values()) {
} // file.dfaDefs.add( new DFADef("DFA"+dfa.decision, dfa) );
// }
} }
@Override @Override
public List<String> getChildren() { public List<String> getChildren() {
return new ArrayList<String>() {{ add("funcs"); add("dfaDefs"); }}; return new ArrayList<String>() {{ add("funcs"); }};
} }
} }

View File

@ -9,15 +9,20 @@ import java.util.List;
public class ParserFile extends OutputModelObject { public class ParserFile extends OutputModelObject {
public String fileName; public String fileName;
public Parser parser; public Parser parser;
public List<DFADef> dfaDefs = new ArrayList<DFADef>();
public List<BitSetDef> bitSetDefs = new ArrayList<BitSetDef>();
public ParserFile(CodeGenerator gen, Parser p, String fileName) { public ParserFile(CodeGenerator gen, String fileName) {
this.gen = gen; this.gen = gen;
parser = p;
this.fileName = fileName; this.fileName = fileName;
} }
@Override @Override
public List<String> getChildren() { public List<String> getChildren() {
return new ArrayList<String>() {{ add("parser"); }}; return new ArrayList<String>() {{
add("parser");
add("dfaDefs");
add("bitSetDefs");
}};
} }
} }

View File

@ -0,0 +1,126 @@
package org.antlr.v4.misc;
import org.antlr.v4.automata.Label;
/** */
public class CharSupport {
/** When converting ANTLR char and string literals, here is the
* value set of escape chars.
*/
public static int ANTLRLiteralEscapedCharValue[] = new int[255];
/** Given a char, we need to be able to show as an ANTLR literal.
*/
public static String ANTLRLiteralCharValueEscape[] = new String[255];
static {
ANTLRLiteralEscapedCharValue['n'] = '\n';
ANTLRLiteralEscapedCharValue['r'] = '\r';
ANTLRLiteralEscapedCharValue['t'] = '\t';
ANTLRLiteralEscapedCharValue['b'] = '\b';
ANTLRLiteralEscapedCharValue['f'] = '\f';
ANTLRLiteralEscapedCharValue['\\'] = '\\';
ANTLRLiteralEscapedCharValue['\''] = '\'';
ANTLRLiteralEscapedCharValue['"'] = '"';
ANTLRLiteralCharValueEscape['\n'] = "\\n";
ANTLRLiteralCharValueEscape['\r'] = "\\r";
ANTLRLiteralCharValueEscape['\t'] = "\\t";
ANTLRLiteralCharValueEscape['\b'] = "\\b";
ANTLRLiteralCharValueEscape['\f'] = "\\f";
ANTLRLiteralCharValueEscape['\\'] = "\\\\";
ANTLRLiteralCharValueEscape['\''] = "\\'";
}
/** Return a string representing the escaped char for code c. E.g., If c
* has value 0x100, you will get "\u0100". ASCII gets the usual
* char (non-hex) representation. Control characters are spit out
* as unicode. While this is specially set up for returning Java strings,
* it can be used by any language target that has the same syntax. :)
*/
public static String getANTLRCharLiteralForChar(int c) {
if ( c< Label.MIN_CHAR_VALUE ) {
return "'<INVALID>'";
}
if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {
return '\''+ANTLRLiteralCharValueEscape[c]+'\'';
}
if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN &&
!Character.isISOControl((char)c) ) {
if ( c=='\\' ) {
return "'\\\\'";
}
if ( c=='\'') {
return "'\\''";
}
return '\''+Character.toString((char)c)+'\'';
}
// turn on the bit above max "\uFFFF" value so that we pad with zeros
// then only take last 4 digits
String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5);
String unicodeStr = "'\\u"+hex+"'";
return unicodeStr;
}
/** Given a literal like (the 3 char sequence with single quotes) 'a',
* return the int value of 'a'. Convert escape sequences here also.
*/
public static int getCharValueFromGrammarCharLiteral(String literal) {
switch ( literal.length() ) {
case 3 :
// 'x'
return literal.charAt(1); // no escape char
case 4 :
// '\x' (antlr lexer will catch invalid char)
if ( Character.isDigit(literal.charAt(2)) ) {
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
// "invalid char literal: "+literal);
return -1;
}
int escChar = literal.charAt(2);
int charVal = ANTLRLiteralEscapedCharValue[escChar];
if ( charVal==0 ) {
// Unnecessary escapes like '\{' should just yield {
return escChar;
}
return charVal;
case 8 :
// '\u1234'
String unicodeChars = literal.substring(3,literal.length()-1);
return Integer.parseInt(unicodeChars, 16);
default :
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
// "invalid char literal: "+literal);
return -1;
}
}
public static String getStringFromGrammarStringLiteral(String literal) {
StringBuilder buf = new StringBuilder();
int n = literal.length();
int i = 1; // skip first quote
while ( i < (n-1) ) { // scan all but last quote
switch ( literal.charAt(i) ) {
case '\\' :
i++;
if ( literal.charAt(i)=='u' ) { // '\u1234'
i++;
String unicodeChars = literal.substring(3,literal.length()-1);
buf.append((char)Integer.parseInt(unicodeChars, 16));
}
else {
char escChar = literal.charAt(i);
int charVal = ANTLRLiteralEscapedCharValue[escChar];
if ( charVal==0 ) buf.append(escChar); // Unnecessary escapes like '\{' should just yield {
else buf.append((char)charVal);
}
break;
default :
buf.append(literal.charAt(i));
i++;
break;
}
}
return buf.toString();
}
}

View File

@ -6,7 +6,7 @@ import org.antlr.v4.Tool;
import org.antlr.v4.automata.DFA; import org.antlr.v4.automata.DFA;
import org.antlr.v4.automata.Label; import org.antlr.v4.automata.Label;
import org.antlr.v4.automata.NFA; import org.antlr.v4.automata.NFA;
import org.antlr.v4.codegen.Target; import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.misc.IntSet; import org.antlr.v4.misc.IntSet;
import org.antlr.v4.misc.IntervalSet; import org.antlr.v4.misc.IntervalSet;
import org.antlr.v4.misc.Utils; import org.antlr.v4.misc.Utils;
@ -359,7 +359,7 @@ public class Grammar implements AttributeResolver {
if ( isLexer() && if ( isLexer() &&
ttype >= Label.MIN_CHAR_VALUE && ttype <= Label.MAX_CHAR_VALUE ) ttype >= Label.MIN_CHAR_VALUE && ttype <= Label.MAX_CHAR_VALUE )
{ {
return Target.getANTLRCharLiteralForChar(ttype); return CharSupport.getANTLRCharLiteralForChar(ttype);
} }
// faux label? // faux label?
else if ( ttype<0 ) { else if ( ttype<0 ) {