forked from jasder/antlr
template names = class names; reorg'd char stuff; got bitsets defined.
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6850]
This commit is contained in:
parent
cab4d7d197
commit
3e1f43117e
|
@ -1,40 +1,45 @@
|
||||||
// args must be <object-model-object>, <fields-resulting-in-STs>
|
// args must be <object-model-object>, <fields-resulting-in-STs>
|
||||||
parserFile(f, parser) ::= <<
|
ParserFile(f, parser, dfaDefs, bitSetDefs) ::= <<
|
||||||
// $ANTLR ANTLRVersion> <f.fileName> generatedTimestamp>
|
// $ANTLR ANTLRVersion> <f.fileName> generatedTimestamp>
|
||||||
import org.antlr.runtime.*;
|
import org.antlr.runtime.*;
|
||||||
|
|
||||||
<parser>
|
<parser>
|
||||||
>>
|
>>
|
||||||
|
|
||||||
parser(p,funcs,dfaDefs) ::= <<
|
Parser(p, funcs) ::= <<
|
||||||
public class <p.name> {
|
public class <p.name> {
|
||||||
<funcs>
|
<funcs; separator="\n">
|
||||||
<dfaDefs>
|
<dfaDefs; separator="\n">
|
||||||
|
<bitSetDefs; separator="\n">
|
||||||
}
|
}
|
||||||
>>
|
>>
|
||||||
|
|
||||||
DFA(dfa) ::= <<
|
DFADef(dfa) ::= <<
|
||||||
// define <dfa.name>
|
// define <dfa.name>
|
||||||
>>
|
>>
|
||||||
|
|
||||||
parserFunction(f,code) ::= <<
|
BitSetDef(b) ::= <<
|
||||||
|
// define <b.name>
|
||||||
|
>>
|
||||||
|
|
||||||
|
RuleFunction(f,code) ::= <<
|
||||||
<f.modifiers:{f | <f> }>void <f.name>(<f.args>) {
|
<f.modifiers:{f | <f> }>void <f.name>(<f.args>) {
|
||||||
<code>
|
<code>
|
||||||
}
|
}
|
||||||
>>
|
>>
|
||||||
|
|
||||||
codeBlock(c, ops) ::= <<
|
CodeBlock(c, ops) ::= <<
|
||||||
<ops>
|
<ops; separator="\n">
|
||||||
>>
|
>>
|
||||||
|
|
||||||
switch(c, alts) ::= <<
|
LL1Choice(c, alts) ::= <<
|
||||||
switch ( input.LA(1) ) {
|
switch ( input.LA(1) ) {
|
||||||
<alts>
|
<alts; separator="\n">
|
||||||
}
|
}
|
||||||
>>
|
>>
|
||||||
|
|
||||||
matchToken(m) ::= <<
|
MatchToken(m) ::= <<
|
||||||
match(<m.name>);
|
match(<m.name>, <m.follow.name>);
|
||||||
>>
|
>>
|
||||||
|
|
||||||
codeFileExtension() ::= ".java"
|
codeFileExtension() ::= ".java"
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
package org.antlr.v4.automata;
|
package org.antlr.v4.automata;
|
||||||
|
|
||||||
import org.antlr.v4.codegen.Target;
|
import org.antlr.v4.misc.CharSupport;
|
||||||
import org.antlr.v4.tool.GrammarAST;
|
import org.antlr.v4.tool.GrammarAST;
|
||||||
import org.antlr.v4.tool.LexerGrammar;
|
import org.antlr.v4.tool.LexerGrammar;
|
||||||
import org.antlr.v4.tool.Rule;
|
import org.antlr.v4.tool.Rule;
|
||||||
|
@ -45,8 +45,8 @@ public class LexerNFAFactory extends ParserNFAFactory {
|
||||||
public Handle range(GrammarAST a, GrammarAST b) {
|
public Handle range(GrammarAST a, GrammarAST b) {
|
||||||
BasicState left = newState(a);
|
BasicState left = newState(a);
|
||||||
BasicState right = newState(b);
|
BasicState right = newState(b);
|
||||||
int t1 = Target.getCharValueFromGrammarCharLiteral(a.getText());
|
int t1 = CharSupport.getCharValueFromGrammarCharLiteral(a.getText());
|
||||||
int t2 = Target.getCharValueFromGrammarCharLiteral(b.getText());
|
int t2 = CharSupport.getCharValueFromGrammarCharLiteral(b.getText());
|
||||||
left.transition = new RangeTransition(t1, t2, right);
|
left.transition = new RangeTransition(t1, t2, right);
|
||||||
a.nfaState = left;
|
a.nfaState = left;
|
||||||
b.nfaState = left;
|
b.nfaState = left;
|
||||||
|
|
|
@ -3,7 +3,7 @@ package org.antlr.v4.automata;
|
||||||
|
|
||||||
import org.antlr.runtime.RecognitionException;
|
import org.antlr.runtime.RecognitionException;
|
||||||
import org.antlr.runtime.tree.CommonTreeNodeStream;
|
import org.antlr.runtime.tree.CommonTreeNodeStream;
|
||||||
import org.antlr.v4.codegen.Target;
|
import org.antlr.v4.misc.CharSupport;
|
||||||
import org.antlr.v4.misc.IntervalSet;
|
import org.antlr.v4.misc.IntervalSet;
|
||||||
import org.antlr.v4.parse.ANTLRParser;
|
import org.antlr.v4.parse.ANTLRParser;
|
||||||
import org.antlr.v4.parse.GrammarASTAdaptor;
|
import org.antlr.v4.parse.GrammarASTAdaptor;
|
||||||
|
@ -103,7 +103,7 @@ public class ParserNFAFactory implements NFAFactory {
|
||||||
GrammarAST ast = A.left.ast;
|
GrammarAST ast = A.left.ast;
|
||||||
int ttype = 0;
|
int ttype = 0;
|
||||||
if ( g.isLexer() ) {
|
if ( g.isLexer() ) {
|
||||||
ttype = Target.getCharValueFromGrammarCharLiteral(ast.getText());
|
ttype = CharSupport.getCharValueFromGrammarCharLiteral(ast.getText());
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
ttype = g.getTokenType(ast.getText());
|
ttype = g.getTokenType(ast.getText());
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
package org.antlr.v4.automata;
|
package org.antlr.v4.automata;
|
||||||
|
|
||||||
import org.antlr.v4.codegen.Target;
|
import org.antlr.v4.misc.CharSupport;
|
||||||
import org.antlr.v4.misc.IntervalSet;
|
import org.antlr.v4.misc.IntervalSet;
|
||||||
|
|
||||||
public class RangeTransition extends Transition {
|
public class RangeTransition extends Transition {
|
||||||
|
@ -21,7 +21,7 @@ public class RangeTransition extends Transition {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return Target.getANTLRCharLiteralForChar(from)+".."+
|
return CharSupport.getANTLRCharLiteralForChar(from)+".."+
|
||||||
Target.getANTLRCharLiteralForChar(to);
|
CharSupport.getANTLRCharLiteralForChar(to);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,13 @@
|
||||||
package org.antlr.v4.codegen;
|
package org.antlr.v4.codegen;
|
||||||
|
|
||||||
|
import org.antlr.v4.codegen.src.BitSetDef;
|
||||||
import org.antlr.v4.codegen.src.OutputModelObject;
|
import org.antlr.v4.codegen.src.OutputModelObject;
|
||||||
|
import org.antlr.v4.codegen.src.ParserFile;
|
||||||
|
import org.antlr.v4.misc.IntSet;
|
||||||
|
import org.antlr.v4.parse.ANTLRParser;
|
||||||
import org.antlr.v4.tool.ErrorType;
|
import org.antlr.v4.tool.ErrorType;
|
||||||
import org.antlr.v4.tool.Grammar;
|
import org.antlr.v4.tool.Grammar;
|
||||||
|
import org.antlr.v4.tool.GrammarAST;
|
||||||
import org.stringtemplate.v4.*;
|
import org.stringtemplate.v4.*;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -19,6 +24,7 @@ public abstract class CodeGenerator {
|
||||||
public Grammar g;
|
public Grammar g;
|
||||||
public Target target;
|
public Target target;
|
||||||
public STGroup templates;
|
public STGroup templates;
|
||||||
|
public ParserFile outputModel;
|
||||||
|
|
||||||
public int lineWidth = 72;
|
public int lineWidth = 72;
|
||||||
|
|
||||||
|
@ -72,8 +78,7 @@ public abstract class CodeGenerator {
|
||||||
public void write() {
|
public void write() {
|
||||||
OutputModelObject root = buildOutputModel();
|
OutputModelObject root = buildOutputModel();
|
||||||
|
|
||||||
OutputModelWalker walker = new OutputModelWalker(g.tool, templates,
|
OutputModelWalker walker = new OutputModelWalker(g.tool, templates);
|
||||||
ParserGenerator.modelToTemplateMap);
|
|
||||||
ST outputFileST = walker.walk(root);
|
ST outputFileST = walker.walk(root);
|
||||||
|
|
||||||
// WRITE FILES
|
// WRITE FILES
|
||||||
|
@ -130,4 +135,16 @@ public abstract class CodeGenerator {
|
||||||
// }
|
// }
|
||||||
return g.name+VOCAB_FILE_EXTENSION;
|
return g.name+VOCAB_FILE_EXTENSION;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public BitSetDef defineBitSet(GrammarAST ast, IntSet follow) {
|
||||||
|
String inRuleName = ast.nfaState.rule.name;
|
||||||
|
String elementName = ast.getText(); // assume rule ref
|
||||||
|
if ( ast.getType() == ANTLRParser.TOKEN_REF ) {
|
||||||
|
target.getTokenTypeAsTargetLabel(g, ast.getType() );
|
||||||
|
}
|
||||||
|
String name = "FOLLOW_"+elementName+"_in_"+inRuleName+ast.token.getTokenIndex();
|
||||||
|
BitSetDef b = new BitSetDef(this, name, follow);
|
||||||
|
outputModel.bitSetDefs.add(b);
|
||||||
|
return b;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,6 +5,7 @@ import org.antlr.runtime.Token;
|
||||||
import org.antlr.runtime.tree.CommonTreeNodeStream;
|
import org.antlr.runtime.tree.CommonTreeNodeStream;
|
||||||
import org.antlr.runtime.tree.TreeNodeStream;
|
import org.antlr.runtime.tree.TreeNodeStream;
|
||||||
import org.antlr.v4.codegen.nfa.*;
|
import org.antlr.v4.codegen.nfa.*;
|
||||||
|
import org.antlr.v4.misc.CharSupport;
|
||||||
import org.antlr.v4.misc.DoubleKeyMap;
|
import org.antlr.v4.misc.DoubleKeyMap;
|
||||||
import org.antlr.v4.parse.ANTLRParser;
|
import org.antlr.v4.parse.ANTLRParser;
|
||||||
import org.antlr.v4.parse.GrammarASTAdaptor;
|
import org.antlr.v4.parse.GrammarASTAdaptor;
|
||||||
|
@ -89,7 +90,7 @@ public class NFABytecodeGenerator extends TreeParser {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void emitString(Token t) {
|
public void emitString(Token t) {
|
||||||
String chars = Target.getStringFromGrammarStringLiteral(t.getText());
|
String chars = CharSupport.getStringFromGrammarStringLiteral(t.getText());
|
||||||
for (char c : chars.toCharArray()) {
|
for (char c : chars.toCharArray()) {
|
||||||
emit(new MatchInstr(t, c));
|
emit(new MatchInstr(t, c));
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,20 +15,19 @@ import java.util.*;
|
||||||
public class OutputModelWalker {
|
public class OutputModelWalker {
|
||||||
Tool tool;
|
Tool tool;
|
||||||
STGroup templates;
|
STGroup templates;
|
||||||
Map<Class, String> modelToTemplateMap;
|
//Map<Class, String> modelToTemplateMap;
|
||||||
|
|
||||||
public OutputModelWalker(Tool tool,
|
public OutputModelWalker(Tool tool,
|
||||||
STGroup templates,
|
STGroup templates)
|
||||||
Map<Class, String> modelToTemplateMap)
|
|
||||||
{
|
{
|
||||||
this.tool = tool;
|
this.tool = tool;
|
||||||
this.templates = templates;
|
this.templates = templates;
|
||||||
this.modelToTemplateMap = modelToTemplateMap;
|
//this.modelToTemplateMap = modelToTemplateMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
public ST walk(OutputModelObject omo) {
|
public ST walk(OutputModelObject omo) {
|
||||||
// CREATE TEMPLATE FOR THIS OUTPUT OBJECT
|
// CREATE TEMPLATE FOR THIS OUTPUT OBJECT
|
||||||
String templateName = modelToTemplateMap.get(omo.getClass());
|
String templateName = omo.getClass().getSimpleName();
|
||||||
if ( templateName == null ) {
|
if ( templateName == null ) {
|
||||||
tool.errMgr.toolError(ErrorType.NO_MODEL_TO_TEMPLATE_MAPPING, omo.getClass().getSimpleName());
|
tool.errMgr.toolError(ErrorType.NO_MODEL_TO_TEMPLATE_MAPPING, omo.getClass().getSimpleName());
|
||||||
return new BlankST();
|
return new BlankST();
|
||||||
|
|
|
@ -1,30 +1,31 @@
|
||||||
package org.antlr.v4.codegen;
|
package org.antlr.v4.codegen;
|
||||||
|
|
||||||
import org.antlr.v4.codegen.src.*;
|
import org.antlr.v4.codegen.src.OutputModelObject;
|
||||||
|
import org.antlr.v4.codegen.src.Parser;
|
||||||
|
import org.antlr.v4.codegen.src.ParserFile;
|
||||||
import org.antlr.v4.tool.Grammar;
|
import org.antlr.v4.tool.Grammar;
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
|
|
||||||
/** */
|
/** */
|
||||||
public class ParserGenerator extends CodeGenerator {
|
public class ParserGenerator extends CodeGenerator {
|
||||||
public static final Map<Class, String> modelToTemplateMap = new HashMap<Class, String>() {{
|
// public static final Map<Class, String> modelToTemplateMap = new HashMap<Class, String>() {{
|
||||||
put(ParserFile.class, "parserFile");
|
// put(ParserFile.class, "parserFile");
|
||||||
put(Parser.class, "parser");
|
// put(Parser.class, "parser");
|
||||||
put(RuleFunction.class, "parserFunction");
|
// put(RuleFunction.class, "parserFunction");
|
||||||
put(DFADef.class, "DFA");
|
// put(DFADef.class, "DFA");
|
||||||
put(CodeBlock.class, "codeBlock");
|
// put(CodeBlock.class, "codeBlock");
|
||||||
put(LL1Choice.class, "switch");
|
// put(LL1Choice.class, "switch");
|
||||||
put(MatchToken.class, "matchToken");
|
// put(MatchToken.class, "matchToken");
|
||||||
}};
|
// }};
|
||||||
|
|
||||||
public ParserGenerator(Grammar g) {
|
public ParserGenerator(Grammar g) {
|
||||||
super(g);
|
super(g);
|
||||||
}
|
}
|
||||||
|
|
||||||
public OutputModelObject buildOutputModel() {
|
public OutputModelObject buildOutputModel() {
|
||||||
Parser p = new Parser(this);
|
ParserFile pf = new ParserFile(this, getRecognizerFileName());
|
||||||
return new ParserFile(this, p, getRecognizerFileName());
|
outputModel = pf;
|
||||||
|
pf.parser = new Parser(this, pf); // side-effect: fills pf dfa and bitset defs
|
||||||
|
// at this point, model is built
|
||||||
|
return outputModel;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package org.antlr.v4.codegen;
|
package org.antlr.v4.codegen;
|
||||||
|
|
||||||
import org.antlr.v4.automata.Label;
|
import org.antlr.v4.automata.Label;
|
||||||
|
import org.antlr.v4.parse.ANTLRParser;
|
||||||
import org.antlr.v4.tool.Grammar;
|
import org.antlr.v4.tool.Grammar;
|
||||||
import org.stringtemplate.v4.ST;
|
import org.stringtemplate.v4.ST;
|
||||||
|
|
||||||
|
@ -8,31 +9,28 @@ import java.io.IOException;
|
||||||
|
|
||||||
/** */
|
/** */
|
||||||
public class Target {
|
public class Target {
|
||||||
/** When converting ANTLR char and string literals, here is the
|
/** For pure strings of Java 16-bit unicode char, how can we display
|
||||||
* value set of escape chars.
|
* it in the target language as a literal. Useful for dumping
|
||||||
|
* predicates and such that may refer to chars that need to be escaped
|
||||||
|
* when represented as strings. Also, templates need to be escaped so
|
||||||
|
* that the target language can hold them as a string.
|
||||||
|
*
|
||||||
|
* I have defined (via the constructor) the set of typical escapes,
|
||||||
|
* but your Target subclass is free to alter the translated chars or
|
||||||
|
* add more definitions. This is nonstatic so each target can have
|
||||||
|
* a different set in memory at same time.
|
||||||
*/
|
*/
|
||||||
public static int ANTLRLiteralEscapedCharValue[] = new int[255];
|
protected String[] targetCharValueEscape = new String[255];
|
||||||
|
|
||||||
/** Given a char, we need to be able to show as an ANTLR literal.
|
public Target() {
|
||||||
*/
|
targetCharValueEscape['\n'] = "\\n";
|
||||||
public static String ANTLRLiteralCharValueEscape[] = new String[255];
|
targetCharValueEscape['\r'] = "\\r";
|
||||||
|
targetCharValueEscape['\t'] = "\\t";
|
||||||
static {
|
targetCharValueEscape['\b'] = "\\b";
|
||||||
ANTLRLiteralEscapedCharValue['n'] = '\n';
|
targetCharValueEscape['\f'] = "\\f";
|
||||||
ANTLRLiteralEscapedCharValue['r'] = '\r';
|
targetCharValueEscape['\\'] = "\\\\";
|
||||||
ANTLRLiteralEscapedCharValue['t'] = '\t';
|
targetCharValueEscape['\''] = "\\'";
|
||||||
ANTLRLiteralEscapedCharValue['b'] = '\b';
|
targetCharValueEscape['"'] = "\\\"";
|
||||||
ANTLRLiteralEscapedCharValue['f'] = '\f';
|
|
||||||
ANTLRLiteralEscapedCharValue['\\'] = '\\';
|
|
||||||
ANTLRLiteralEscapedCharValue['\''] = '\'';
|
|
||||||
ANTLRLiteralEscapedCharValue['"'] = '"';
|
|
||||||
ANTLRLiteralCharValueEscape['\n'] = "\\n";
|
|
||||||
ANTLRLiteralCharValueEscape['\r'] = "\\r";
|
|
||||||
ANTLRLiteralCharValueEscape['\t'] = "\\t";
|
|
||||||
ANTLRLiteralCharValueEscape['\b'] = "\\b";
|
|
||||||
ANTLRLiteralCharValueEscape['\f'] = "\\f";
|
|
||||||
ANTLRLiteralCharValueEscape['\\'] = "\\\\";
|
|
||||||
ANTLRLiteralCharValueEscape['\''] = "\\'";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void genRecognizerFile(CodeGenerator generator,
|
protected void genRecognizerFile(CodeGenerator generator,
|
||||||
|
@ -53,95 +51,65 @@ public class Target {
|
||||||
// no header file by default
|
// no header file by default
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Given a literal like (the 3 char sequence with single quotes) 'a',
|
/** Get a meaningful name for a token type useful during code generation.
|
||||||
* return the int value of 'a'. Convert escape sequences here also.
|
* Literals without associated names are converted to the string equivalent
|
||||||
|
* of their integer values. Used to generate x==ID and x==34 type comparisons
|
||||||
|
* etc... Essentially we are looking for the most obvious way to refer
|
||||||
|
* to a token type in the generated code. If in the lexer, return the
|
||||||
|
* char literal translated to the target language. For example, ttype=10
|
||||||
|
* will yield '\n' from the getTokenDisplayName method. That must
|
||||||
|
* be converted to the target languages literals. For most C-derived
|
||||||
|
* languages no translation is needed.
|
||||||
*/
|
*/
|
||||||
public static int getCharValueFromGrammarCharLiteral(String literal) {
|
public String getTokenTypeAsTargetLabel(Grammar g, int ttype) {
|
||||||
switch ( literal.length() ) {
|
if ( g.getType() == ANTLRParser.LEXER ) {
|
||||||
case 3 :
|
// String name = g.getTokenDisplayName(ttype);
|
||||||
// 'x'
|
// return getTargetCharLiteralFromANTLRCharLiteral(this,name);
|
||||||
return literal.charAt(1); // no escape char
|
|
||||||
case 4 :
|
|
||||||
// '\x' (antlr lexer will catch invalid char)
|
|
||||||
if ( Character.isDigit(literal.charAt(2)) ) {
|
|
||||||
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
|
|
||||||
// "invalid char literal: "+literal);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
int escChar = literal.charAt(2);
|
|
||||||
int charVal = ANTLRLiteralEscapedCharValue[escChar];
|
|
||||||
if ( charVal==0 ) {
|
|
||||||
// Unnecessary escapes like '\{' should just yield {
|
|
||||||
return escChar;
|
|
||||||
}
|
|
||||||
return charVal;
|
|
||||||
case 8 :
|
|
||||||
// '\u1234'
|
|
||||||
String unicodeChars = literal.substring(3,literal.length()-1);
|
|
||||||
return Integer.parseInt(unicodeChars, 16);
|
|
||||||
default :
|
|
||||||
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
|
|
||||||
// "invalid char literal: "+literal);
|
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
|
String name = g.getTokenDisplayName(ttype);
|
||||||
|
// If name is a literal, return the token type instead
|
||||||
|
if ( name.charAt(0)=='\'' ) {
|
||||||
|
return String.valueOf(ttype);
|
||||||
|
}
|
||||||
|
return name;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getStringFromGrammarStringLiteral(String literal) {
|
/** Convert from an ANTLR char literal found in a grammar file to
|
||||||
StringBuilder buf = new StringBuilder();
|
* an equivalent char literal in the target language. For most
|
||||||
int n = literal.length();
|
* languages, this means leaving 'x' as 'x'. Actually, we need
|
||||||
int i = 1; // skip first quote
|
* to escape '\u000A' so that it doesn't get converted to \n by
|
||||||
while ( i < (n-1) ) { // scan all but last quote
|
* the compiler. Convert the literal to the char value and then
|
||||||
switch ( literal.charAt(i) ) {
|
* to an appropriate target char literal.
|
||||||
case '\\' :
|
*
|
||||||
i++;
|
* Expect single quotes around the incoming literal.
|
||||||
if ( literal.charAt(i)=='u' ) { // '\u1234'
|
*/
|
||||||
i++;
|
public String getTargetCharLiteralCharValue(int c) {
|
||||||
String unicodeChars = literal.substring(3,literal.length()-1);
|
StringBuffer buf = new StringBuffer();
|
||||||
buf.append((char)Integer.parseInt(unicodeChars, 16));
|
buf.append('\'');
|
||||||
}
|
if ( c<Label.MIN_CHAR_VALUE ) return "'\u0000'";
|
||||||
else {
|
if ( c<targetCharValueEscape.length &&
|
||||||
char escChar = literal.charAt(i);
|
targetCharValueEscape[c]!=null )
|
||||||
int charVal = ANTLRLiteralEscapedCharValue[escChar];
|
{
|
||||||
if ( charVal==0 ) buf.append(escChar); // Unnecessary escapes like '\{' should just yield {
|
buf.append(targetCharValueEscape[c]);
|
||||||
else buf.append((char)charVal);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default :
|
|
||||||
buf.append(literal.charAt(i));
|
|
||||||
i++;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
else if ( Character.UnicodeBlock.of((char)c)==
|
||||||
|
Character.UnicodeBlock.BASIC_LATIN &&
|
||||||
|
!Character.isISOControl((char)c) )
|
||||||
|
{
|
||||||
|
// normal char
|
||||||
|
buf.append((char)c);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// must be something unprintable...use \\uXXXX
|
||||||
|
// turn on the bit above max "\\uFFFF" value so that we pad with zeros
|
||||||
|
// then only take last 4 digits
|
||||||
|
String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5);
|
||||||
|
buf.append("\\u");
|
||||||
|
buf.append(hex);
|
||||||
|
}
|
||||||
|
|
||||||
|
buf.append('\'');
|
||||||
return buf.toString();
|
return buf.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Return a string representing the escaped char for code c. E.g., If c
|
|
||||||
* has value 0x100, you will get "\u0100". ASCII gets the usual
|
|
||||||
* char (non-hex) representation. Control characters are spit out
|
|
||||||
* as unicode. While this is specially set up for returning Java strings,
|
|
||||||
* it can be used by any language target that has the same syntax. :)
|
|
||||||
*/
|
|
||||||
public static String getANTLRCharLiteralForChar(int c) {
|
|
||||||
if ( c< Label.MIN_CHAR_VALUE ) {
|
|
||||||
return "'<INVALID>'";
|
|
||||||
}
|
|
||||||
if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {
|
|
||||||
return '\''+ANTLRLiteralCharValueEscape[c]+'\'';
|
|
||||||
}
|
|
||||||
if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN &&
|
|
||||||
!Character.isISOControl((char)c) ) {
|
|
||||||
if ( c=='\\' ) {
|
|
||||||
return "'\\\\'";
|
|
||||||
}
|
|
||||||
if ( c=='\'') {
|
|
||||||
return "'\\''";
|
|
||||||
}
|
|
||||||
return '\''+Character.toString((char)c)+'\'';
|
|
||||||
}
|
|
||||||
// turn on the bit above max "\uFFFF" value so that we pad with zeros
|
|
||||||
// then only take last 4 digits
|
|
||||||
String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5);
|
|
||||||
String unicodeStr = "'\\u"+hex+"'";
|
|
||||||
return unicodeStr;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,7 @@ package org.antlr.v4.codegen.nfa;
|
||||||
|
|
||||||
import org.antlr.runtime.Token;
|
import org.antlr.runtime.Token;
|
||||||
import org.antlr.v4.codegen.NFABytecodeGenerator;
|
import org.antlr.v4.codegen.NFABytecodeGenerator;
|
||||||
import org.antlr.v4.codegen.Target;
|
import org.antlr.v4.misc.CharSupport;
|
||||||
import org.antlr.v4.runtime.nfa.Bytecode;
|
import org.antlr.v4.runtime.nfa.Bytecode;
|
||||||
|
|
||||||
/** */
|
/** */
|
||||||
|
@ -12,8 +12,8 @@ public class RangeInstr extends Instr {
|
||||||
public RangeInstr(Token start, Token stop) {
|
public RangeInstr(Token start, Token stop) {
|
||||||
this.start = start;
|
this.start = start;
|
||||||
this.stop = stop;
|
this.stop = stop;
|
||||||
a = (char) Target.getCharValueFromGrammarCharLiteral(start.getText());
|
a = (char)CharSupport.getCharValueFromGrammarCharLiteral(start.getText());
|
||||||
b = (char)Target.getCharValueFromGrammarCharLiteral(stop.getText());
|
b = (char)CharSupport.getCharValueFromGrammarCharLiteral(stop.getText());
|
||||||
}
|
}
|
||||||
public short opcode() { return charSize(a, b)==1? Bytecode.RANGE8:Bytecode.RANGE16; };
|
public short opcode() { return charSize(a, b)==1? Bytecode.RANGE8:Bytecode.RANGE16; };
|
||||||
public int nBytes() { return 1+2*charSize(a, b); }
|
public int nBytes() { return 1+2*charSize(a, b); }
|
||||||
|
|
|
@ -1,9 +1,15 @@
|
||||||
package org.antlr.v4.codegen.src;
|
package org.antlr.v4.codegen.src;
|
||||||
|
|
||||||
|
import org.antlr.v4.codegen.CodeGenerator;
|
||||||
import org.antlr.v4.misc.IntSet;
|
import org.antlr.v4.misc.IntSet;
|
||||||
|
|
||||||
/** */
|
/** */
|
||||||
public class BitSetDef extends OutputModelObject {
|
public class BitSetDef extends OutputModelObject {
|
||||||
String name;
|
public String name;
|
||||||
IntSet[] set;
|
public IntSet fset;
|
||||||
|
public BitSetDef(CodeGenerator gen, String name, IntSet fset) {
|
||||||
|
this.gen = gen;
|
||||||
|
this.name = name;
|
||||||
|
this.fset = fset;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,15 +8,15 @@ import org.antlr.v4.tool.TerminalAST;
|
||||||
/** */
|
/** */
|
||||||
public class MatchToken extends SrcOp {
|
public class MatchToken extends SrcOp {
|
||||||
public String name;
|
public String name;
|
||||||
public String bitSetName;
|
public BitSetDef follow;
|
||||||
|
|
||||||
public MatchToken(CodeGenerator gen, TerminalAST ast) {
|
public MatchToken(CodeGenerator gen, TerminalAST ast) {
|
||||||
this.gen = gen;
|
this.gen = gen;
|
||||||
name = ast.getText();
|
name = ast.getText();
|
||||||
|
|
||||||
LinearApproximator approx = new LinearApproximator(gen.g, -1);
|
LinearApproximator approx = new LinearApproximator(gen.g, -1);
|
||||||
IntervalSet follow = approx.LOOK(ast.nfaState.transition(0).target);
|
IntervalSet fset = approx.LOOK(ast.nfaState.transition(0).target);
|
||||||
System.out.println("follow="+follow);
|
System.out.println("follow="+follow);
|
||||||
//bitSetName = gen.defineBitSet(follow);
|
follow = gen.defineBitSet(ast, fset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
package org.antlr.v4.codegen.src;
|
package org.antlr.v4.codegen.src;
|
||||||
|
|
||||||
import org.antlr.v4.automata.DFA;
|
|
||||||
import org.antlr.v4.codegen.CodeGenerator;
|
import org.antlr.v4.codegen.CodeGenerator;
|
||||||
import org.antlr.v4.misc.IntSet;
|
|
||||||
import org.antlr.v4.tool.Rule;
|
import org.antlr.v4.tool.Rule;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -12,22 +10,23 @@ import java.util.List;
|
||||||
public class Parser extends OutputModelObject {
|
public class Parser extends OutputModelObject {
|
||||||
public String name;
|
public String name;
|
||||||
public List<RuleFunction> funcs = new ArrayList<RuleFunction>();
|
public List<RuleFunction> funcs = new ArrayList<RuleFunction>();
|
||||||
public List<DFADef> dfaDefs = new ArrayList<DFADef>();
|
ParserFile file;
|
||||||
public List<IntSet> bitsetDefs;
|
|
||||||
|
|
||||||
public Parser(CodeGenerator gen) {
|
public Parser(CodeGenerator gen, ParserFile file) {
|
||||||
this.gen = gen;
|
this.gen = gen;
|
||||||
|
this.file = file; // who contains us?
|
||||||
name = gen.g.getRecognizerName();
|
name = gen.g.getRecognizerName();
|
||||||
for (Rule r : gen.g.rules.values()) funcs.add( new RuleFunction(gen, r) );
|
for (Rule r : gen.g.rules.values()) funcs.add( new RuleFunction(gen, r) );
|
||||||
|
|
||||||
// build DFA, bitset defs
|
// We create dfa and bitsets during rule function construction.
|
||||||
for (DFA dfa : gen.g.decisionDFAs.values()) {
|
// They get stored in code gen for convenience as we walk rule block tree
|
||||||
dfaDefs.add( new DFADef("DFA"+dfa.decision, dfa) );
|
// for (DFA dfa : gen.g.decisionDFAs.values()) {
|
||||||
}
|
// file.dfaDefs.add( new DFADef("DFA"+dfa.decision, dfa) );
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getChildren() {
|
public List<String> getChildren() {
|
||||||
return new ArrayList<String>() {{ add("funcs"); add("dfaDefs"); }};
|
return new ArrayList<String>() {{ add("funcs"); }};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,15 +9,20 @@ import java.util.List;
|
||||||
public class ParserFile extends OutputModelObject {
|
public class ParserFile extends OutputModelObject {
|
||||||
public String fileName;
|
public String fileName;
|
||||||
public Parser parser;
|
public Parser parser;
|
||||||
|
public List<DFADef> dfaDefs = new ArrayList<DFADef>();
|
||||||
|
public List<BitSetDef> bitSetDefs = new ArrayList<BitSetDef>();
|
||||||
|
|
||||||
public ParserFile(CodeGenerator gen, Parser p, String fileName) {
|
public ParserFile(CodeGenerator gen, String fileName) {
|
||||||
this.gen = gen;
|
this.gen = gen;
|
||||||
parser = p;
|
|
||||||
this.fileName = fileName;
|
this.fileName = fileName;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getChildren() {
|
public List<String> getChildren() {
|
||||||
return new ArrayList<String>() {{ add("parser"); }};
|
return new ArrayList<String>() {{
|
||||||
|
add("parser");
|
||||||
|
add("dfaDefs");
|
||||||
|
add("bitSetDefs");
|
||||||
|
}};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,126 @@
|
||||||
|
package org.antlr.v4.misc;
|
||||||
|
|
||||||
|
import org.antlr.v4.automata.Label;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class CharSupport {
|
||||||
|
/** When converting ANTLR char and string literals, here is the
|
||||||
|
* value set of escape chars.
|
||||||
|
*/
|
||||||
|
public static int ANTLRLiteralEscapedCharValue[] = new int[255];
|
||||||
|
|
||||||
|
/** Given a char, we need to be able to show as an ANTLR literal.
|
||||||
|
*/
|
||||||
|
public static String ANTLRLiteralCharValueEscape[] = new String[255];
|
||||||
|
|
||||||
|
static {
|
||||||
|
ANTLRLiteralEscapedCharValue['n'] = '\n';
|
||||||
|
ANTLRLiteralEscapedCharValue['r'] = '\r';
|
||||||
|
ANTLRLiteralEscapedCharValue['t'] = '\t';
|
||||||
|
ANTLRLiteralEscapedCharValue['b'] = '\b';
|
||||||
|
ANTLRLiteralEscapedCharValue['f'] = '\f';
|
||||||
|
ANTLRLiteralEscapedCharValue['\\'] = '\\';
|
||||||
|
ANTLRLiteralEscapedCharValue['\''] = '\'';
|
||||||
|
ANTLRLiteralEscapedCharValue['"'] = '"';
|
||||||
|
ANTLRLiteralCharValueEscape['\n'] = "\\n";
|
||||||
|
ANTLRLiteralCharValueEscape['\r'] = "\\r";
|
||||||
|
ANTLRLiteralCharValueEscape['\t'] = "\\t";
|
||||||
|
ANTLRLiteralCharValueEscape['\b'] = "\\b";
|
||||||
|
ANTLRLiteralCharValueEscape['\f'] = "\\f";
|
||||||
|
ANTLRLiteralCharValueEscape['\\'] = "\\\\";
|
||||||
|
ANTLRLiteralCharValueEscape['\''] = "\\'";
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return a string representing the escaped char for code c. E.g., If c
|
||||||
|
* has value 0x100, you will get "\u0100". ASCII gets the usual
|
||||||
|
* char (non-hex) representation. Control characters are spit out
|
||||||
|
* as unicode. While this is specially set up for returning Java strings,
|
||||||
|
* it can be used by any language target that has the same syntax. :)
|
||||||
|
*/
|
||||||
|
public static String getANTLRCharLiteralForChar(int c) {
|
||||||
|
if ( c< Label.MIN_CHAR_VALUE ) {
|
||||||
|
return "'<INVALID>'";
|
||||||
|
}
|
||||||
|
if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {
|
||||||
|
return '\''+ANTLRLiteralCharValueEscape[c]+'\'';
|
||||||
|
}
|
||||||
|
if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN &&
|
||||||
|
!Character.isISOControl((char)c) ) {
|
||||||
|
if ( c=='\\' ) {
|
||||||
|
return "'\\\\'";
|
||||||
|
}
|
||||||
|
if ( c=='\'') {
|
||||||
|
return "'\\''";
|
||||||
|
}
|
||||||
|
return '\''+Character.toString((char)c)+'\'';
|
||||||
|
}
|
||||||
|
// turn on the bit above max "\uFFFF" value so that we pad with zeros
|
||||||
|
// then only take last 4 digits
|
||||||
|
String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5);
|
||||||
|
String unicodeStr = "'\\u"+hex+"'";
|
||||||
|
return unicodeStr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Given a literal like (the 3 char sequence with single quotes) 'a',
|
||||||
|
* return the int value of 'a'. Convert escape sequences here also.
|
||||||
|
*/
|
||||||
|
public static int getCharValueFromGrammarCharLiteral(String literal) {
|
||||||
|
switch ( literal.length() ) {
|
||||||
|
case 3 :
|
||||||
|
// 'x'
|
||||||
|
return literal.charAt(1); // no escape char
|
||||||
|
case 4 :
|
||||||
|
// '\x' (antlr lexer will catch invalid char)
|
||||||
|
if ( Character.isDigit(literal.charAt(2)) ) {
|
||||||
|
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
|
||||||
|
// "invalid char literal: "+literal);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
int escChar = literal.charAt(2);
|
||||||
|
int charVal = ANTLRLiteralEscapedCharValue[escChar];
|
||||||
|
if ( charVal==0 ) {
|
||||||
|
// Unnecessary escapes like '\{' should just yield {
|
||||||
|
return escChar;
|
||||||
|
}
|
||||||
|
return charVal;
|
||||||
|
case 8 :
|
||||||
|
// '\u1234'
|
||||||
|
String unicodeChars = literal.substring(3,literal.length()-1);
|
||||||
|
return Integer.parseInt(unicodeChars, 16);
|
||||||
|
default :
|
||||||
|
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
|
||||||
|
// "invalid char literal: "+literal);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String getStringFromGrammarStringLiteral(String literal) {
|
||||||
|
StringBuilder buf = new StringBuilder();
|
||||||
|
int n = literal.length();
|
||||||
|
int i = 1; // skip first quote
|
||||||
|
while ( i < (n-1) ) { // scan all but last quote
|
||||||
|
switch ( literal.charAt(i) ) {
|
||||||
|
case '\\' :
|
||||||
|
i++;
|
||||||
|
if ( literal.charAt(i)=='u' ) { // '\u1234'
|
||||||
|
i++;
|
||||||
|
String unicodeChars = literal.substring(3,literal.length()-1);
|
||||||
|
buf.append((char)Integer.parseInt(unicodeChars, 16));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
char escChar = literal.charAt(i);
|
||||||
|
int charVal = ANTLRLiteralEscapedCharValue[escChar];
|
||||||
|
if ( charVal==0 ) buf.append(escChar); // Unnecessary escapes like '\{' should just yield {
|
||||||
|
else buf.append((char)charVal);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default :
|
||||||
|
buf.append(literal.charAt(i));
|
||||||
|
i++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return buf.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -6,7 +6,7 @@ import org.antlr.v4.Tool;
|
||||||
import org.antlr.v4.automata.DFA;
|
import org.antlr.v4.automata.DFA;
|
||||||
import org.antlr.v4.automata.Label;
|
import org.antlr.v4.automata.Label;
|
||||||
import org.antlr.v4.automata.NFA;
|
import org.antlr.v4.automata.NFA;
|
||||||
import org.antlr.v4.codegen.Target;
|
import org.antlr.v4.misc.CharSupport;
|
||||||
import org.antlr.v4.misc.IntSet;
|
import org.antlr.v4.misc.IntSet;
|
||||||
import org.antlr.v4.misc.IntervalSet;
|
import org.antlr.v4.misc.IntervalSet;
|
||||||
import org.antlr.v4.misc.Utils;
|
import org.antlr.v4.misc.Utils;
|
||||||
|
@ -359,7 +359,7 @@ public class Grammar implements AttributeResolver {
|
||||||
if ( isLexer() &&
|
if ( isLexer() &&
|
||||||
ttype >= Label.MIN_CHAR_VALUE && ttype <= Label.MAX_CHAR_VALUE )
|
ttype >= Label.MIN_CHAR_VALUE && ttype <= Label.MAX_CHAR_VALUE )
|
||||||
{
|
{
|
||||||
return Target.getANTLRCharLiteralForChar(ttype);
|
return CharSupport.getANTLRCharLiteralForChar(ttype);
|
||||||
}
|
}
|
||||||
// faux label?
|
// faux label?
|
||||||
else if ( ttype<0 ) {
|
else if ( ttype<0 ) {
|
||||||
|
|
Loading…
Reference in New Issue