got good start on code generator

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6844]
This commit is contained in:
parrt 2010-05-06 17:54:58 -08:00
parent fbaeb50e5e
commit 07625981fb
26 changed files with 404 additions and 28 deletions

View File

@ -85,6 +85,8 @@ CANNOT_CREATE_TARGET_GENERATOR(arg,exception,stackTrace) ::=
"cannot create target <arg> code generator: <exception>"
CANNOT_COMPUTE_SAMPLE_INPUT_SEQ() ::=
"cannot generate a sample input sequence from lookahead DFA"
CODE_TEMPLATE_ARG_ISSUE(arg,arg2) ::=
"code generation template <arg> has missing, misnamed, or incomplete arg list: <arg2>"
// grammar interpretation errors
/*

View File

@ -13,6 +13,12 @@ public class CodeGenPipeline {
}
public void process() {
if ( g.isLexer() ) processLexer();
else if ( g.isParser() ) processParser();
}
void processParser() {
ParserGenerator gen = new ParserGenerator(g);
gen.write();
}
void processLexer() {

View File

@ -0,0 +1,125 @@
package org.antlr.v4.codegen;
import org.antlr.v4.codegen.src.OutputModelObject;
import org.antlr.v4.tool.ErrorType;
import org.antlr.v4.tool.Grammar;
import org.stringtemplate.v4.*;
import java.io.IOException;
import java.io.Writer;
/** */
public abstract class CodeGenerator {
public static final String TEMPLATE_ROOT = "org/antlr/v4/tool/templates/codegen";
public static final String VOCAB_FILE_EXTENSION = ".tokens";
protected final static String vocabFilePattern =
"<tokens:{<attr.name>=<attr.type>\n}>" +
"<literals:{<attr.name>=<attr.type>\n}>";
Grammar g;
Target target;
STGroup templates;
int lineWidth = 72;
public CodeGenerator(Grammar g) {
this.g = g;
String language = g.getOption("language", "Java");
loadLanguageTarget(language);
loadTemplates(language);
}
void loadLanguageTarget(String language) {
String targetName = "org.antlr.v4.codegen."+language+"Target";
try {
Class c = Class.forName(targetName);
target = (Target)c.newInstance();
}
catch (ClassNotFoundException cnfe) {
target = new Target(); // use default
}
catch (InstantiationException ie) {
g.tool.errMgr.toolError(ErrorType.CANNOT_CREATE_TARGET_GENERATOR,
targetName,
ie);
}
catch (IllegalAccessException cnfe) {
g.tool.errMgr.toolError(ErrorType.CANNOT_CREATE_TARGET_GENERATOR,
targetName,
cnfe);
}
}
public void loadTemplates(String language) {
templates = new STGroupFile(TEMPLATE_ROOT+"/"+language+"/"+language+".stg");
// if ( EMIT_TEMPLATE_DELIMITERS ) {
// templates.emitDebugStartStopStrings(true);
// templates.doNotEmitDebugStringsForTemplate("codeFileExtension");
// templates.doNotEmitDebugStringsForTemplate("headerFileExtension");
// }
}
public abstract OutputModelObject buildOutputModel();
public void write() {
OutputModelObject root = buildOutputModel();
OutputModelWalker walker = new OutputModelWalker(g.tool, templates,
ParserGenerator.modelToTemplateMap);
ST outputFileST = walker.walk(root);
// WRITE FILES
try {
target.genRecognizerFile(g.tool,this,g,outputFileST);
if ( templates.isDefined("headerFile") ) {
ST extST = templates.getInstanceOf("headerFileExtension");
ST headerFileST = null;
target.genRecognizerHeaderFile(g.tool,this,g,headerFileST,extST.render());
}
// // write out the vocab interchange file; used by antlr,
// // does not change per target
// ST tokenVocabSerialization = genTokenVocabOutput();
// String vocabFileName = getVocabFileName();
// if ( vocabFileName!=null ) {
// write(tokenVocabSerialization, vocabFileName);
// }
//System.out.println(outputFileST.getDOTForDependencyGraph(false));
}
catch (IOException ioe) {
g.tool.errMgr.toolError(ErrorType.CANNOT_WRITE_FILE,
getVocabFileName(),
ioe);
}
}
public void write(ST code, String fileName) throws IOException {
long start = System.currentTimeMillis();
Writer w = g.tool.getOutputFile(g, fileName);
// Write the output to a StringWriter
STWriter wr = new AutoIndentWriter(w);
wr.setLineWidth(lineWidth);
code.write(wr);
w.close();
long stop = System.currentTimeMillis();
System.out.println("render time for "+fileName+": "+(int)(stop-start)+"ms");
}
/** Generate TParser.java and TLexer.java from T.g if combined, else
* just use T.java as output regardless of type.
*/
public String getRecognizerFileName() {
ST extST = templates.getInstanceOf("codeFileExtension");
String recognizerName = g.getRecognizerName();
return recognizerName+extST.render();
}
/** What is the name of the vocab file generated for this grammar?
* Returns null if no .tokens file should be generated.
*/
public String getVocabFileName() {
// if ( g.isBuiltFromString() ) {
// return null;
// }
return g.name+VOCAB_FILE_EXTENSION;
}
}

View File

@ -0,0 +1,76 @@
package org.antlr.v4.codegen;
import org.antlr.v4.Tool;
import org.antlr.v4.codegen.src.OutputModelObject;
import org.antlr.v4.tool.ErrorType;
import org.stringtemplate.v4.ST;
import org.stringtemplate.v4.STGroup;
import org.stringtemplate.v4.compiler.FormalArgument;
import java.lang.reflect.Field;
import java.util.Collection;
import java.util.Map;
import java.util.Set;
/** Convert output model tree to template hierarchy */
public class OutputModelWalker {
Tool tool;
STGroup templates;
Map<Class, String> modelToTemplateMap;
public OutputModelWalker(Tool tool,
STGroup templates,
Map<Class, String> modelToTemplateMap)
{
this.tool = tool;
this.templates = templates;
this.modelToTemplateMap = modelToTemplateMap;
}
public ST walk(OutputModelObject omo) {
// CREATE TEMPLATE FOR THIS OUTPUT OBJECT
String templateName = modelToTemplateMap.get(omo.getClass());
ST st = templates.getInstanceOf(templateName);
if ( st.impl.formalArguments== FormalArgument.UNKNOWN ) {
tool.errMgr.toolError(ErrorType.CODE_TEMPLATE_ARG_ISSUE, templateName, "<none>");
return st;
}
// todo: chk arg-field mismtch
Set<String> argNames = st.impl.formalArguments.keySet();
String arg = argNames.iterator().next(); // should be just one
// PASS IN OUTPUT OBJECT TO TEMPLATE
st.add(arg, omo); // set template attribute of correct name
for (String fieldName : omo.getChildren()) {
if ( argNames.contains(fieldName) ) continue; // they won't use so don't compute
try {
Field fi = omo.getClass().getField(fieldName);
Object o = fi.get(omo);
if ( o instanceof OutputModelObject ) {
OutputModelObject nestedOmo = (OutputModelObject)o;
ST nestedST = walk(nestedOmo);
st.add(fieldName, nestedST);
}
else if ( o instanceof Collection) {
Collection<? extends OutputModelObject> nestedOmos = (Collection)o;
for (OutputModelObject nestedOmo : nestedOmos) {
ST nestedST = walk(nestedOmo);
st.add(fieldName, nestedST);
}
}
else if ( o!=null ) {
tool.errMgr.toolError(ErrorType.CODE_TEMPLATE_ARG_ISSUE, templateName, fieldName);
}
}
catch (NoSuchFieldException nsfe) {
tool.errMgr.toolError(ErrorType.CODE_TEMPLATE_ARG_ISSUE, templateName, fieldName);
}
catch (IllegalAccessException iae) {
tool.errMgr.toolError(ErrorType.CODE_TEMPLATE_ARG_ISSUE, templateName, fieldName);
}
}
return st;
}
}

View File

@ -0,0 +1,31 @@
package org.antlr.v4.codegen;
import org.antlr.v4.codegen.src.OutputModelObject;
import org.antlr.v4.codegen.src.Parser;
import org.antlr.v4.codegen.src.ParserFile;
import org.antlr.v4.codegen.src.RuleFunction;
import org.antlr.v4.tool.Grammar;
import java.util.HashMap;
import java.util.Map;
/** */
public class ParserGenerator extends CodeGenerator {
public static final Map<Class, String> modelToTemplateMap = new HashMap<Class, String>() {{
put(ParserFile.class, "parserFile");
put(Parser.class, "parser");
put(RuleFunction.class, "parserFunction");
}};
public ParserGenerator(Grammar g) {
super(g);
}
public OutputModelObject buildOutputModel() {
Parser p = new Parser(g);
ParserFile f = new ParserFile(p, getRecognizerFileName());
return f;
}
}

View File

@ -1,7 +1,11 @@
package org.antlr.v4.codegen;
import org.antlr.analysis.Label;
import org.antlr.tool.ErrorManager;
import org.antlr.v4.Tool;
import org.antlr.v4.automata.Label;
import org.antlr.v4.tool.Grammar;
import org.stringtemplate.v4.ST;
import java.io.IOException;
/** */
public class Target {
@ -32,6 +36,26 @@ public class Target {
ANTLRLiteralCharValueEscape['\''] = "\\'";
}
protected void genRecognizerFile(Tool tool,
CodeGenerator generator,
Grammar grammar,
ST outputFileST)
throws IOException
{
String fileName = generator.getRecognizerFileName();
generator.write(outputFileST, fileName);
}
protected void genRecognizerHeaderFile(Tool tool,
CodeGenerator generator,
Grammar grammar,
ST headerFileST,
String extName) // e.g., ".h"
throws IOException
{
// no header file by default
}
/** Given a literal like (the 3 char sequence with single quotes) 'a',
* return the int value of 'a'. Convert escape sequences here also.
*/
@ -43,8 +67,8 @@ public class Target {
case 4 :
// '\x' (antlr lexer will catch invalid char)
if ( Character.isDigit(literal.charAt(2)) ) {
ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
"invalid char literal: "+literal);
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
// "invalid char literal: "+literal);
return -1;
}
int escChar = literal.charAt(2);
@ -59,8 +83,8 @@ public class Target {
String unicodeChars = literal.substring(3,literal.length()-1);
return Integer.parseInt(unicodeChars, 16);
default :
ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
"invalid char literal: "+literal);
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
// "invalid char literal: "+literal);
return -1;
}
}
@ -102,7 +126,6 @@ public class Target {
*/
public static String getANTLRCharLiteralForChar(int c) {
if ( c< Label.MIN_CHAR_VALUE ) {
ErrorManager.internalError("invalid char value "+c);
return "'<INVALID>'";
}
if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {

View File

@ -3,7 +3,7 @@ package org.antlr.v4.codegen.src;
import org.antlr.v4.misc.IntSet;
/** */
public class BitSetDef {
public class BitSetDef extends OutputModelObject {
String name;
IntSet[] set;
}

View File

@ -1,12 +1,21 @@
package org.antlr.v4.codegen.src;
import org.antlr.v4.automata.DFA;
import org.antlr.v4.tool.Alternative;
import java.util.ArrayList;
import java.util.List;
/** */
public class Choice extends SrcOp {
//public DFADef dfaDef; ???
public DFA dfa;
public DFADef dfaDef;
public List<CodeBlock> alts;
public Choice(Alternative[] alts) {
}
@Override
public List<String> getChildren() {
return new ArrayList<String>() {{ add("alts"); }};
}
}

View File

@ -1,8 +1,14 @@
package org.antlr.v4.codegen.src;
import java.util.ArrayList;
import java.util.List;
/** */
public class CodeBlock extends SrcOp {
public List<SrcOp> ops;
@Override
public List<String> getChildren() {
return new ArrayList<String>() {{ add("ops"); }};
}
}

View File

@ -3,7 +3,7 @@ package org.antlr.v4.codegen.src;
import org.antlr.v4.automata.DFA;
/** */
public class DFADef {
public class DFADef extends OutputModelObject {
public String name;
public DFA dfa;

View File

@ -1,5 +1,10 @@
package org.antlr.v4.codegen.src;
import org.antlr.v4.tool.Alternative;
/** */
public class LL1OptionalBlock extends OptionalBlock {
public LL1OptionalBlock(Alternative[] alts) {
super(alts);
}
}

View File

@ -1,5 +1,10 @@
package org.antlr.v4.codegen.src;
import org.antlr.v4.tool.Alternative;
/** */
public class LL1OptionalBlockSingleAlt extends OptionalBlock {
public LL1OptionalBlockSingleAlt(Alternative[] alts) {
super(alts);
}
}

View File

@ -1,5 +1,10 @@
package org.antlr.v4.codegen.src;
import org.antlr.v4.tool.Alternative;
/** */
public class LLStarOptionalBlock extends OptionalBlock {
public LLStarOptionalBlock(Alternative[] alts) {
super(alts);
}
}

View File

@ -1,5 +1,10 @@
package org.antlr.v4.codegen.src;
import org.antlr.v4.tool.Alternative;
/** */
public class LLkOptionalBlock extends OptionalBlock {
public LLkOptionalBlock(Alternative[] alts) {
super(alts);
}
}

View File

@ -1,5 +1,17 @@
package org.antlr.v4.codegen.src;
import org.antlr.v4.tool.Alternative;
import java.util.List;
/** */
public class OptionalBlock extends Choice {
public OptionalBlock(Alternative[] alts) {
super(alts);
}
@Override
public List<String> getChildren() {
return super.getChildren();
}
}

View File

@ -0,0 +1,19 @@
package org.antlr.v4.codegen.src;
import java.util.List;
/** */
public abstract class OutputModelObject {
//public abstract ST getSt();
/** If the output model object encloses some other model objects,
* we need to be able to walk them. Rather than make each class
* properly walk any nested objects, I'm going to use a generic
* external walker. This method lets me look at the output model
* as a homogeneous tree structure. Returns a list of field names
* of type OutputModelObject that should be walked to complete model.
*/
public List<String> getChildren() {
return null;
}
}

View File

@ -1,11 +1,26 @@
package org.antlr.v4.codegen.src;
import org.antlr.v4.misc.IntSet;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.Rule;
import java.util.ArrayList;
import java.util.List;
/** */
public class Parser {
public Grammar g;
public List<RuleFunction> funcs;
public class Parser extends OutputModelObject {
public String name;
public List<RuleFunction> funcs = new ArrayList<RuleFunction>();
public List<DFADef> dfaDefs;
public List<IntSet> bitsetDefs;
public Parser(Grammar g) {
name = g.getRecognizerName();
for (Rule r : g.rules.values()) funcs.add( new RuleFunction(r) );
}
@Override
public List<String> getChildren() {
return new ArrayList<String>() {{ add("funcs"); add("dfaDefs"); }};
}
}

View File

@ -1,6 +1,16 @@
package org.antlr.v4.codegen.src;
import java.util.ArrayList;
import java.util.List;
/** */
public class ParserFile {
public class ParserFile extends OutputModelObject {
public String fileName;
public Parser parser;
public ParserFile(Parser p, String fileName) { parser = p; this.fileName = fileName; }
@Override
public List<String> getChildren() {
return new ArrayList<String>() {{ add("parser"); }};
}
}

View File

@ -10,7 +10,7 @@ import java.util.Collection;
import java.util.List;
/** */
public class RuleFunction {
public class RuleFunction extends OutputModelObject {
public String name;
public List<String> modifiers;
public Collection<Attribute> args;
@ -33,12 +33,17 @@ public class RuleFunction {
}
modifiers = Utils.nodesToStrings(r.modifiers);
args = r.args.attributes.values();
retvals = r.retvals.attributes.values();
ruleScopeDecls = r.scope.attributes.values();
if ( r.args!=null ) args = r.args.attributes.values();
if ( r.retvals!=null ) retvals = r.retvals.attributes.values();
if ( r.scope!=null ) ruleScopeDecls = r.scope.attributes.values();
ruleLabels = r.getLabelNames();
tokenLabels = r.getTokenRefs();
exceptions = Utils.nodesToStrings(r.exceptionActions);
finallyAction = r.finallyAction.getText();
if ( r.finallyAction!=null ) finallyAction = r.finallyAction.getText();
}
@Override
public List<String> getChildren() {
return new ArrayList<String>() {{ add("code"); }};
}
}

View File

@ -3,6 +3,6 @@ package org.antlr.v4.codegen.src;
import org.antlr.v4.tool.GrammarAST;
/** */
public class SrcOp {
public class SrcOp extends OutputModelObject {
public GrammarAST ast;
}

View File

@ -1,5 +1,5 @@
package org.antlr.v4.codegen.src;
/** */
public class Sync {
public class Sync extends SrcOp {
}

View File

@ -1,5 +1,5 @@
package org.antlr.v4.codegen.src;
/** */
public class dbg {
public class dbg extends OutputModelObject {
}

View File

@ -96,6 +96,7 @@ public class Utils {
}
public static <T extends GrammarAST> List<String> nodesToStrings(List<T> nodes) {
if ( nodes == null ) return null;
List<String> a = new ArrayList<String>();
for (T t : nodes) a.add(t.getText());
return a;

View File

@ -55,13 +55,13 @@ public enum ErrorType {
INTERNAL_ERROR(ErrorSeverity.ERROR, true, true),
INTERNAL_WARNING(ErrorSeverity.ERROR, true, true),
ERROR_CREATING_ARTIFICIAL_RULE(ErrorSeverity.ERROR, true, true),
TOKENS_FILE_SYNTAX_ERROR(ErrorSeverity.ERROR, true, true),
CANNOT_GEN_DOT_FILE(ErrorSeverity.ERROR, true, true),
// Code generation errors
MISSING_CODE_GEN_TEMPLATES(ErrorSeverity.ERROR, false, true),
CANNOT_CREATE_TARGET_GENERATOR(ErrorSeverity.ERROR, false, true),
CODE_TEMPLATE_ARG_ISSUE(ErrorSeverity.ERROR, false, true),
// Grammar errors
SYNTAX_ERROR(ErrorSeverity.ERROR, true, true),

View File

@ -536,6 +536,18 @@ public class Grammar implements AttributeResolver {
}
}
public String getOption(String key) {
if ( options==null ) return null;
return options.get(key);
}
public String getOption(String key, String defaultValue) {
if ( options==null ) return defaultValue;
String v = options.get(key);
if ( v!=null ) return v;
return defaultValue;
}
public static Map<String,String> getStringLiteralAliasesFromLexerRules(GrammarRootAST ast) {
GrammarAST combinedRulesRoot =
(GrammarAST)ast.getFirstChildWithType(ANTLRParser.RULES);

View File

@ -125,14 +125,18 @@ public class Rule implements AttributeResolver {
// TODO: move to code gen InvokeRule function? is only place we ref?
public Set<String> getRuleRefs() {
Set<String> refs = new HashSet<String>();
for (Alternative a : alt) refs.addAll(a.ruleRefs.keySet());
for (int i=1; i<=numberOfAlts; i++) {
refs.addAll(alt[i].ruleRefs.keySet());
}
return refs;
}
public Set<String> getTokenRefs() {
Set<String> refs = new HashSet<String>();
for (Alternative a : alt) refs.addAll(a.tokenRefs.keySet());
return refs;
for (int i=1; i<=numberOfAlts; i++) {
refs.addAll(alt[i].tokenRefs.keySet());
}
return refs;
}
public Set<String> getLabelNames() {