forked from jasder/antlr
template names = class names; reorg'd char stuff; got bitsets defined.
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6850]
This commit is contained in:
parent
cab4d7d197
commit
3e1f43117e
|
@ -1,40 +1,45 @@
|
|||
// args must be <object-model-object>, <fields-resulting-in-STs>
|
||||
parserFile(f, parser) ::= <<
|
||||
ParserFile(f, parser, dfaDefs, bitSetDefs) ::= <<
|
||||
// $ANTLR ANTLRVersion> <f.fileName> generatedTimestamp>
|
||||
import org.antlr.runtime.*;
|
||||
|
||||
<parser>
|
||||
>>
|
||||
|
||||
parser(p,funcs,dfaDefs) ::= <<
|
||||
Parser(p, funcs) ::= <<
|
||||
public class <p.name> {
|
||||
<funcs>
|
||||
<dfaDefs>
|
||||
<funcs; separator="\n">
|
||||
<dfaDefs; separator="\n">
|
||||
<bitSetDefs; separator="\n">
|
||||
}
|
||||
>>
|
||||
|
||||
DFA(dfa) ::= <<
|
||||
DFADef(dfa) ::= <<
|
||||
// define <dfa.name>
|
||||
>>
|
||||
|
||||
parserFunction(f,code) ::= <<
|
||||
BitSetDef(b) ::= <<
|
||||
// define <b.name>
|
||||
>>
|
||||
|
||||
RuleFunction(f,code) ::= <<
|
||||
<f.modifiers:{f | <f> }>void <f.name>(<f.args>) {
|
||||
<code>
|
||||
}
|
||||
>>
|
||||
|
||||
codeBlock(c, ops) ::= <<
|
||||
<ops>
|
||||
CodeBlock(c, ops) ::= <<
|
||||
<ops; separator="\n">
|
||||
>>
|
||||
|
||||
switch(c, alts) ::= <<
|
||||
LL1Choice(c, alts) ::= <<
|
||||
switch ( input.LA(1) ) {
|
||||
<alts>
|
||||
<alts; separator="\n">
|
||||
}
|
||||
>>
|
||||
|
||||
matchToken(m) ::= <<
|
||||
match(<m.name>);
|
||||
MatchToken(m) ::= <<
|
||||
match(<m.name>, <m.follow.name>);
|
||||
>>
|
||||
|
||||
codeFileExtension() ::= ".java"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
package org.antlr.v4.automata;
|
||||
|
||||
import org.antlr.v4.codegen.Target;
|
||||
import org.antlr.v4.misc.CharSupport;
|
||||
import org.antlr.v4.tool.GrammarAST;
|
||||
import org.antlr.v4.tool.LexerGrammar;
|
||||
import org.antlr.v4.tool.Rule;
|
||||
|
@ -45,8 +45,8 @@ public class LexerNFAFactory extends ParserNFAFactory {
|
|||
public Handle range(GrammarAST a, GrammarAST b) {
|
||||
BasicState left = newState(a);
|
||||
BasicState right = newState(b);
|
||||
int t1 = Target.getCharValueFromGrammarCharLiteral(a.getText());
|
||||
int t2 = Target.getCharValueFromGrammarCharLiteral(b.getText());
|
||||
int t1 = CharSupport.getCharValueFromGrammarCharLiteral(a.getText());
|
||||
int t2 = CharSupport.getCharValueFromGrammarCharLiteral(b.getText());
|
||||
left.transition = new RangeTransition(t1, t2, right);
|
||||
a.nfaState = left;
|
||||
b.nfaState = left;
|
||||
|
|
|
@ -3,7 +3,7 @@ package org.antlr.v4.automata;
|
|||
|
||||
import org.antlr.runtime.RecognitionException;
|
||||
import org.antlr.runtime.tree.CommonTreeNodeStream;
|
||||
import org.antlr.v4.codegen.Target;
|
||||
import org.antlr.v4.misc.CharSupport;
|
||||
import org.antlr.v4.misc.IntervalSet;
|
||||
import org.antlr.v4.parse.ANTLRParser;
|
||||
import org.antlr.v4.parse.GrammarASTAdaptor;
|
||||
|
@ -103,7 +103,7 @@ public class ParserNFAFactory implements NFAFactory {
|
|||
GrammarAST ast = A.left.ast;
|
||||
int ttype = 0;
|
||||
if ( g.isLexer() ) {
|
||||
ttype = Target.getCharValueFromGrammarCharLiteral(ast.getText());
|
||||
ttype = CharSupport.getCharValueFromGrammarCharLiteral(ast.getText());
|
||||
}
|
||||
else {
|
||||
ttype = g.getTokenType(ast.getText());
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
package org.antlr.v4.automata;
|
||||
|
||||
import org.antlr.v4.codegen.Target;
|
||||
import org.antlr.v4.misc.CharSupport;
|
||||
import org.antlr.v4.misc.IntervalSet;
|
||||
|
||||
public class RangeTransition extends Transition {
|
||||
|
@ -21,7 +21,7 @@ public class RangeTransition extends Transition {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return Target.getANTLRCharLiteralForChar(from)+".."+
|
||||
Target.getANTLRCharLiteralForChar(to);
|
||||
return CharSupport.getANTLRCharLiteralForChar(from)+".."+
|
||||
CharSupport.getANTLRCharLiteralForChar(to);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,8 +1,13 @@
|
|||
package org.antlr.v4.codegen;
|
||||
|
||||
import org.antlr.v4.codegen.src.BitSetDef;
|
||||
import org.antlr.v4.codegen.src.OutputModelObject;
|
||||
import org.antlr.v4.codegen.src.ParserFile;
|
||||
import org.antlr.v4.misc.IntSet;
|
||||
import org.antlr.v4.parse.ANTLRParser;
|
||||
import org.antlr.v4.tool.ErrorType;
|
||||
import org.antlr.v4.tool.Grammar;
|
||||
import org.antlr.v4.tool.GrammarAST;
|
||||
import org.stringtemplate.v4.*;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -19,6 +24,7 @@ public abstract class CodeGenerator {
|
|||
public Grammar g;
|
||||
public Target target;
|
||||
public STGroup templates;
|
||||
public ParserFile outputModel;
|
||||
|
||||
public int lineWidth = 72;
|
||||
|
||||
|
@ -72,8 +78,7 @@ public abstract class CodeGenerator {
|
|||
public void write() {
|
||||
OutputModelObject root = buildOutputModel();
|
||||
|
||||
OutputModelWalker walker = new OutputModelWalker(g.tool, templates,
|
||||
ParserGenerator.modelToTemplateMap);
|
||||
OutputModelWalker walker = new OutputModelWalker(g.tool, templates);
|
||||
ST outputFileST = walker.walk(root);
|
||||
|
||||
// WRITE FILES
|
||||
|
@ -130,4 +135,16 @@ public abstract class CodeGenerator {
|
|||
// }
|
||||
return g.name+VOCAB_FILE_EXTENSION;
|
||||
}
|
||||
|
||||
public BitSetDef defineBitSet(GrammarAST ast, IntSet follow) {
|
||||
String inRuleName = ast.nfaState.rule.name;
|
||||
String elementName = ast.getText(); // assume rule ref
|
||||
if ( ast.getType() == ANTLRParser.TOKEN_REF ) {
|
||||
target.getTokenTypeAsTargetLabel(g, ast.getType() );
|
||||
}
|
||||
String name = "FOLLOW_"+elementName+"_in_"+inRuleName+ast.token.getTokenIndex();
|
||||
BitSetDef b = new BitSetDef(this, name, follow);
|
||||
outputModel.bitSetDefs.add(b);
|
||||
return b;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ import org.antlr.runtime.Token;
|
|||
import org.antlr.runtime.tree.CommonTreeNodeStream;
|
||||
import org.antlr.runtime.tree.TreeNodeStream;
|
||||
import org.antlr.v4.codegen.nfa.*;
|
||||
import org.antlr.v4.misc.CharSupport;
|
||||
import org.antlr.v4.misc.DoubleKeyMap;
|
||||
import org.antlr.v4.parse.ANTLRParser;
|
||||
import org.antlr.v4.parse.GrammarASTAdaptor;
|
||||
|
@ -89,7 +90,7 @@ public class NFABytecodeGenerator extends TreeParser {
|
|||
}
|
||||
|
||||
public void emitString(Token t) {
|
||||
String chars = Target.getStringFromGrammarStringLiteral(t.getText());
|
||||
String chars = CharSupport.getStringFromGrammarStringLiteral(t.getText());
|
||||
for (char c : chars.toCharArray()) {
|
||||
emit(new MatchInstr(t, c));
|
||||
}
|
||||
|
|
|
@ -15,20 +15,19 @@ import java.util.*;
|
|||
public class OutputModelWalker {
|
||||
Tool tool;
|
||||
STGroup templates;
|
||||
Map<Class, String> modelToTemplateMap;
|
||||
//Map<Class, String> modelToTemplateMap;
|
||||
|
||||
public OutputModelWalker(Tool tool,
|
||||
STGroup templates,
|
||||
Map<Class, String> modelToTemplateMap)
|
||||
STGroup templates)
|
||||
{
|
||||
this.tool = tool;
|
||||
this.templates = templates;
|
||||
this.modelToTemplateMap = modelToTemplateMap;
|
||||
//this.modelToTemplateMap = modelToTemplateMap;
|
||||
}
|
||||
|
||||
public ST walk(OutputModelObject omo) {
|
||||
// CREATE TEMPLATE FOR THIS OUTPUT OBJECT
|
||||
String templateName = modelToTemplateMap.get(omo.getClass());
|
||||
String templateName = omo.getClass().getSimpleName();
|
||||
if ( templateName == null ) {
|
||||
tool.errMgr.toolError(ErrorType.NO_MODEL_TO_TEMPLATE_MAPPING, omo.getClass().getSimpleName());
|
||||
return new BlankST();
|
||||
|
|
|
@ -1,30 +1,31 @@
|
|||
package org.antlr.v4.codegen;
|
||||
|
||||
import org.antlr.v4.codegen.src.*;
|
||||
import org.antlr.v4.codegen.src.OutputModelObject;
|
||||
import org.antlr.v4.codegen.src.Parser;
|
||||
import org.antlr.v4.codegen.src.ParserFile;
|
||||
import org.antlr.v4.tool.Grammar;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
/** */
|
||||
public class ParserGenerator extends CodeGenerator {
|
||||
public static final Map<Class, String> modelToTemplateMap = new HashMap<Class, String>() {{
|
||||
put(ParserFile.class, "parserFile");
|
||||
put(Parser.class, "parser");
|
||||
put(RuleFunction.class, "parserFunction");
|
||||
put(DFADef.class, "DFA");
|
||||
put(CodeBlock.class, "codeBlock");
|
||||
put(LL1Choice.class, "switch");
|
||||
put(MatchToken.class, "matchToken");
|
||||
}};
|
||||
// public static final Map<Class, String> modelToTemplateMap = new HashMap<Class, String>() {{
|
||||
// put(ParserFile.class, "parserFile");
|
||||
// put(Parser.class, "parser");
|
||||
// put(RuleFunction.class, "parserFunction");
|
||||
// put(DFADef.class, "DFA");
|
||||
// put(CodeBlock.class, "codeBlock");
|
||||
// put(LL1Choice.class, "switch");
|
||||
// put(MatchToken.class, "matchToken");
|
||||
// }};
|
||||
|
||||
public ParserGenerator(Grammar g) {
|
||||
super(g);
|
||||
}
|
||||
|
||||
public OutputModelObject buildOutputModel() {
|
||||
Parser p = new Parser(this);
|
||||
return new ParserFile(this, p, getRecognizerFileName());
|
||||
ParserFile pf = new ParserFile(this, getRecognizerFileName());
|
||||
outputModel = pf;
|
||||
pf.parser = new Parser(this, pf); // side-effect: fills pf dfa and bitset defs
|
||||
// at this point, model is built
|
||||
return outputModel;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package org.antlr.v4.codegen;
|
||||
|
||||
import org.antlr.v4.automata.Label;
|
||||
import org.antlr.v4.parse.ANTLRParser;
|
||||
import org.antlr.v4.tool.Grammar;
|
||||
import org.stringtemplate.v4.ST;
|
||||
|
||||
|
@ -8,31 +9,28 @@ import java.io.IOException;
|
|||
|
||||
/** */
|
||||
public class Target {
|
||||
/** When converting ANTLR char and string literals, here is the
|
||||
* value set of escape chars.
|
||||
/** For pure strings of Java 16-bit unicode char, how can we display
|
||||
* it in the target language as a literal. Useful for dumping
|
||||
* predicates and such that may refer to chars that need to be escaped
|
||||
* when represented as strings. Also, templates need to be escaped so
|
||||
* that the target language can hold them as a string.
|
||||
*
|
||||
* I have defined (via the constructor) the set of typical escapes,
|
||||
* but your Target subclass is free to alter the translated chars or
|
||||
* add more definitions. This is nonstatic so each target can have
|
||||
* a different set in memory at same time.
|
||||
*/
|
||||
public static int ANTLRLiteralEscapedCharValue[] = new int[255];
|
||||
protected String[] targetCharValueEscape = new String[255];
|
||||
|
||||
/** Given a char, we need to be able to show as an ANTLR literal.
|
||||
*/
|
||||
public static String ANTLRLiteralCharValueEscape[] = new String[255];
|
||||
|
||||
static {
|
||||
ANTLRLiteralEscapedCharValue['n'] = '\n';
|
||||
ANTLRLiteralEscapedCharValue['r'] = '\r';
|
||||
ANTLRLiteralEscapedCharValue['t'] = '\t';
|
||||
ANTLRLiteralEscapedCharValue['b'] = '\b';
|
||||
ANTLRLiteralEscapedCharValue['f'] = '\f';
|
||||
ANTLRLiteralEscapedCharValue['\\'] = '\\';
|
||||
ANTLRLiteralEscapedCharValue['\''] = '\'';
|
||||
ANTLRLiteralEscapedCharValue['"'] = '"';
|
||||
ANTLRLiteralCharValueEscape['\n'] = "\\n";
|
||||
ANTLRLiteralCharValueEscape['\r'] = "\\r";
|
||||
ANTLRLiteralCharValueEscape['\t'] = "\\t";
|
||||
ANTLRLiteralCharValueEscape['\b'] = "\\b";
|
||||
ANTLRLiteralCharValueEscape['\f'] = "\\f";
|
||||
ANTLRLiteralCharValueEscape['\\'] = "\\\\";
|
||||
ANTLRLiteralCharValueEscape['\''] = "\\'";
|
||||
public Target() {
|
||||
targetCharValueEscape['\n'] = "\\n";
|
||||
targetCharValueEscape['\r'] = "\\r";
|
||||
targetCharValueEscape['\t'] = "\\t";
|
||||
targetCharValueEscape['\b'] = "\\b";
|
||||
targetCharValueEscape['\f'] = "\\f";
|
||||
targetCharValueEscape['\\'] = "\\\\";
|
||||
targetCharValueEscape['\''] = "\\'";
|
||||
targetCharValueEscape['"'] = "\\\"";
|
||||
}
|
||||
|
||||
protected void genRecognizerFile(CodeGenerator generator,
|
||||
|
@ -53,95 +51,65 @@ public class Target {
|
|||
// no header file by default
|
||||
}
|
||||
|
||||
/** Given a literal like (the 3 char sequence with single quotes) 'a',
|
||||
* return the int value of 'a'. Convert escape sequences here also.
|
||||
/** Get a meaningful name for a token type useful during code generation.
|
||||
* Literals without associated names are converted to the string equivalent
|
||||
* of their integer values. Used to generate x==ID and x==34 type comparisons
|
||||
* etc... Essentially we are looking for the most obvious way to refer
|
||||
* to a token type in the generated code. If in the lexer, return the
|
||||
* char literal translated to the target language. For example, ttype=10
|
||||
* will yield '\n' from the getTokenDisplayName method. That must
|
||||
* be converted to the target languages literals. For most C-derived
|
||||
* languages no translation is needed.
|
||||
*/
|
||||
public static int getCharValueFromGrammarCharLiteral(String literal) {
|
||||
switch ( literal.length() ) {
|
||||
case 3 :
|
||||
// 'x'
|
||||
return literal.charAt(1); // no escape char
|
||||
case 4 :
|
||||
// '\x' (antlr lexer will catch invalid char)
|
||||
if ( Character.isDigit(literal.charAt(2)) ) {
|
||||
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
|
||||
// "invalid char literal: "+literal);
|
||||
return -1;
|
||||
public String getTokenTypeAsTargetLabel(Grammar g, int ttype) {
|
||||
if ( g.getType() == ANTLRParser.LEXER ) {
|
||||
// String name = g.getTokenDisplayName(ttype);
|
||||
// return getTargetCharLiteralFromANTLRCharLiteral(this,name);
|
||||
}
|
||||
int escChar = literal.charAt(2);
|
||||
int charVal = ANTLRLiteralEscapedCharValue[escChar];
|
||||
if ( charVal==0 ) {
|
||||
// Unnecessary escapes like '\{' should just yield {
|
||||
return escChar;
|
||||
}
|
||||
return charVal;
|
||||
case 8 :
|
||||
// '\u1234'
|
||||
String unicodeChars = literal.substring(3,literal.length()-1);
|
||||
return Integer.parseInt(unicodeChars, 16);
|
||||
default :
|
||||
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
|
||||
// "invalid char literal: "+literal);
|
||||
return -1;
|
||||
String name = g.getTokenDisplayName(ttype);
|
||||
// If name is a literal, return the token type instead
|
||||
if ( name.charAt(0)=='\'' ) {
|
||||
return String.valueOf(ttype);
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
public static String getStringFromGrammarStringLiteral(String literal) {
|
||||
StringBuilder buf = new StringBuilder();
|
||||
int n = literal.length();
|
||||
int i = 1; // skip first quote
|
||||
while ( i < (n-1) ) { // scan all but last quote
|
||||
switch ( literal.charAt(i) ) {
|
||||
case '\\' :
|
||||
i++;
|
||||
if ( literal.charAt(i)=='u' ) { // '\u1234'
|
||||
i++;
|
||||
String unicodeChars = literal.substring(3,literal.length()-1);
|
||||
buf.append((char)Integer.parseInt(unicodeChars, 16));
|
||||
/** Convert from an ANTLR char literal found in a grammar file to
|
||||
* an equivalent char literal in the target language. For most
|
||||
* languages, this means leaving 'x' as 'x'. Actually, we need
|
||||
* to escape '\u000A' so that it doesn't get converted to \n by
|
||||
* the compiler. Convert the literal to the char value and then
|
||||
* to an appropriate target char literal.
|
||||
*
|
||||
* Expect single quotes around the incoming literal.
|
||||
*/
|
||||
public String getTargetCharLiteralCharValue(int c) {
|
||||
StringBuffer buf = new StringBuffer();
|
||||
buf.append('\'');
|
||||
if ( c<Label.MIN_CHAR_VALUE ) return "'\u0000'";
|
||||
if ( c<targetCharValueEscape.length &&
|
||||
targetCharValueEscape[c]!=null )
|
||||
{
|
||||
buf.append(targetCharValueEscape[c]);
|
||||
}
|
||||
else if ( Character.UnicodeBlock.of((char)c)==
|
||||
Character.UnicodeBlock.BASIC_LATIN &&
|
||||
!Character.isISOControl((char)c) )
|
||||
{
|
||||
// normal char
|
||||
buf.append((char)c);
|
||||
}
|
||||
else {
|
||||
char escChar = literal.charAt(i);
|
||||
int charVal = ANTLRLiteralEscapedCharValue[escChar];
|
||||
if ( charVal==0 ) buf.append(escChar); // Unnecessary escapes like '\{' should just yield {
|
||||
else buf.append((char)charVal);
|
||||
}
|
||||
break;
|
||||
default :
|
||||
buf.append(literal.charAt(i));
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
// must be something unprintable...use \\uXXXX
|
||||
// turn on the bit above max "\\uFFFF" value so that we pad with zeros
|
||||
// then only take last 4 digits
|
||||
String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5);
|
||||
buf.append("\\u");
|
||||
buf.append(hex);
|
||||
}
|
||||
|
||||
buf.append('\'');
|
||||
return buf.toString();
|
||||
}
|
||||
|
||||
/** Return a string representing the escaped char for code c. E.g., If c
|
||||
* has value 0x100, you will get "\u0100". ASCII gets the usual
|
||||
* char (non-hex) representation. Control characters are spit out
|
||||
* as unicode. While this is specially set up for returning Java strings,
|
||||
* it can be used by any language target that has the same syntax. :)
|
||||
*/
|
||||
public static String getANTLRCharLiteralForChar(int c) {
|
||||
if ( c< Label.MIN_CHAR_VALUE ) {
|
||||
return "'<INVALID>'";
|
||||
}
|
||||
if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {
|
||||
return '\''+ANTLRLiteralCharValueEscape[c]+'\'';
|
||||
}
|
||||
if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN &&
|
||||
!Character.isISOControl((char)c) ) {
|
||||
if ( c=='\\' ) {
|
||||
return "'\\\\'";
|
||||
}
|
||||
if ( c=='\'') {
|
||||
return "'\\''";
|
||||
}
|
||||
return '\''+Character.toString((char)c)+'\'';
|
||||
}
|
||||
// turn on the bit above max "\uFFFF" value so that we pad with zeros
|
||||
// then only take last 4 digits
|
||||
String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5);
|
||||
String unicodeStr = "'\\u"+hex+"'";
|
||||
return unicodeStr;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@ package org.antlr.v4.codegen.nfa;
|
|||
|
||||
import org.antlr.runtime.Token;
|
||||
import org.antlr.v4.codegen.NFABytecodeGenerator;
|
||||
import org.antlr.v4.codegen.Target;
|
||||
import org.antlr.v4.misc.CharSupport;
|
||||
import org.antlr.v4.runtime.nfa.Bytecode;
|
||||
|
||||
/** */
|
||||
|
@ -12,8 +12,8 @@ public class RangeInstr extends Instr {
|
|||
public RangeInstr(Token start, Token stop) {
|
||||
this.start = start;
|
||||
this.stop = stop;
|
||||
a = (char) Target.getCharValueFromGrammarCharLiteral(start.getText());
|
||||
b = (char)Target.getCharValueFromGrammarCharLiteral(stop.getText());
|
||||
a = (char)CharSupport.getCharValueFromGrammarCharLiteral(start.getText());
|
||||
b = (char)CharSupport.getCharValueFromGrammarCharLiteral(stop.getText());
|
||||
}
|
||||
public short opcode() { return charSize(a, b)==1? Bytecode.RANGE8:Bytecode.RANGE16; };
|
||||
public int nBytes() { return 1+2*charSize(a, b); }
|
||||
|
|
|
@ -1,9 +1,15 @@
|
|||
package org.antlr.v4.codegen.src;
|
||||
|
||||
import org.antlr.v4.codegen.CodeGenerator;
|
||||
import org.antlr.v4.misc.IntSet;
|
||||
|
||||
/** */
|
||||
public class BitSetDef extends OutputModelObject {
|
||||
String name;
|
||||
IntSet[] set;
|
||||
public String name;
|
||||
public IntSet fset;
|
||||
public BitSetDef(CodeGenerator gen, String name, IntSet fset) {
|
||||
this.gen = gen;
|
||||
this.name = name;
|
||||
this.fset = fset;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,15 +8,15 @@ import org.antlr.v4.tool.TerminalAST;
|
|||
/** */
|
||||
public class MatchToken extends SrcOp {
|
||||
public String name;
|
||||
public String bitSetName;
|
||||
public BitSetDef follow;
|
||||
|
||||
public MatchToken(CodeGenerator gen, TerminalAST ast) {
|
||||
this.gen = gen;
|
||||
name = ast.getText();
|
||||
|
||||
LinearApproximator approx = new LinearApproximator(gen.g, -1);
|
||||
IntervalSet follow = approx.LOOK(ast.nfaState.transition(0).target);
|
||||
IntervalSet fset = approx.LOOK(ast.nfaState.transition(0).target);
|
||||
System.out.println("follow="+follow);
|
||||
//bitSetName = gen.defineBitSet(follow);
|
||||
follow = gen.defineBitSet(ast, fset);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
package org.antlr.v4.codegen.src;
|
||||
|
||||
import org.antlr.v4.automata.DFA;
|
||||
import org.antlr.v4.codegen.CodeGenerator;
|
||||
import org.antlr.v4.misc.IntSet;
|
||||
import org.antlr.v4.tool.Rule;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
@ -12,22 +10,23 @@ import java.util.List;
|
|||
public class Parser extends OutputModelObject {
|
||||
public String name;
|
||||
public List<RuleFunction> funcs = new ArrayList<RuleFunction>();
|
||||
public List<DFADef> dfaDefs = new ArrayList<DFADef>();
|
||||
public List<IntSet> bitsetDefs;
|
||||
ParserFile file;
|
||||
|
||||
public Parser(CodeGenerator gen) {
|
||||
public Parser(CodeGenerator gen, ParserFile file) {
|
||||
this.gen = gen;
|
||||
this.file = file; // who contains us?
|
||||
name = gen.g.getRecognizerName();
|
||||
for (Rule r : gen.g.rules.values()) funcs.add( new RuleFunction(gen, r) );
|
||||
|
||||
// build DFA, bitset defs
|
||||
for (DFA dfa : gen.g.decisionDFAs.values()) {
|
||||
dfaDefs.add( new DFADef("DFA"+dfa.decision, dfa) );
|
||||
}
|
||||
// We create dfa and bitsets during rule function construction.
|
||||
// They get stored in code gen for convenience as we walk rule block tree
|
||||
// for (DFA dfa : gen.g.decisionDFAs.values()) {
|
||||
// file.dfaDefs.add( new DFADef("DFA"+dfa.decision, dfa) );
|
||||
// }
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getChildren() {
|
||||
return new ArrayList<String>() {{ add("funcs"); add("dfaDefs"); }};
|
||||
return new ArrayList<String>() {{ add("funcs"); }};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,15 +9,20 @@ import java.util.List;
|
|||
public class ParserFile extends OutputModelObject {
|
||||
public String fileName;
|
||||
public Parser parser;
|
||||
public List<DFADef> dfaDefs = new ArrayList<DFADef>();
|
||||
public List<BitSetDef> bitSetDefs = new ArrayList<BitSetDef>();
|
||||
|
||||
public ParserFile(CodeGenerator gen, Parser p, String fileName) {
|
||||
public ParserFile(CodeGenerator gen, String fileName) {
|
||||
this.gen = gen;
|
||||
parser = p;
|
||||
this.fileName = fileName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getChildren() {
|
||||
return new ArrayList<String>() {{ add("parser"); }};
|
||||
return new ArrayList<String>() {{
|
||||
add("parser");
|
||||
add("dfaDefs");
|
||||
add("bitSetDefs");
|
||||
}};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,126 @@
|
|||
package org.antlr.v4.misc;
|
||||
|
||||
import org.antlr.v4.automata.Label;
|
||||
|
||||
/** */
|
||||
public class CharSupport {
|
||||
/** When converting ANTLR char and string literals, here is the
|
||||
* value set of escape chars.
|
||||
*/
|
||||
public static int ANTLRLiteralEscapedCharValue[] = new int[255];
|
||||
|
||||
/** Given a char, we need to be able to show as an ANTLR literal.
|
||||
*/
|
||||
public static String ANTLRLiteralCharValueEscape[] = new String[255];
|
||||
|
||||
static {
|
||||
ANTLRLiteralEscapedCharValue['n'] = '\n';
|
||||
ANTLRLiteralEscapedCharValue['r'] = '\r';
|
||||
ANTLRLiteralEscapedCharValue['t'] = '\t';
|
||||
ANTLRLiteralEscapedCharValue['b'] = '\b';
|
||||
ANTLRLiteralEscapedCharValue['f'] = '\f';
|
||||
ANTLRLiteralEscapedCharValue['\\'] = '\\';
|
||||
ANTLRLiteralEscapedCharValue['\''] = '\'';
|
||||
ANTLRLiteralEscapedCharValue['"'] = '"';
|
||||
ANTLRLiteralCharValueEscape['\n'] = "\\n";
|
||||
ANTLRLiteralCharValueEscape['\r'] = "\\r";
|
||||
ANTLRLiteralCharValueEscape['\t'] = "\\t";
|
||||
ANTLRLiteralCharValueEscape['\b'] = "\\b";
|
||||
ANTLRLiteralCharValueEscape['\f'] = "\\f";
|
||||
ANTLRLiteralCharValueEscape['\\'] = "\\\\";
|
||||
ANTLRLiteralCharValueEscape['\''] = "\\'";
|
||||
}
|
||||
|
||||
/** Return a string representing the escaped char for code c. E.g., If c
|
||||
* has value 0x100, you will get "\u0100". ASCII gets the usual
|
||||
* char (non-hex) representation. Control characters are spit out
|
||||
* as unicode. While this is specially set up for returning Java strings,
|
||||
* it can be used by any language target that has the same syntax. :)
|
||||
*/
|
||||
public static String getANTLRCharLiteralForChar(int c) {
|
||||
if ( c< Label.MIN_CHAR_VALUE ) {
|
||||
return "'<INVALID>'";
|
||||
}
|
||||
if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {
|
||||
return '\''+ANTLRLiteralCharValueEscape[c]+'\'';
|
||||
}
|
||||
if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN &&
|
||||
!Character.isISOControl((char)c) ) {
|
||||
if ( c=='\\' ) {
|
||||
return "'\\\\'";
|
||||
}
|
||||
if ( c=='\'') {
|
||||
return "'\\''";
|
||||
}
|
||||
return '\''+Character.toString((char)c)+'\'';
|
||||
}
|
||||
// turn on the bit above max "\uFFFF" value so that we pad with zeros
|
||||
// then only take last 4 digits
|
||||
String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5);
|
||||
String unicodeStr = "'\\u"+hex+"'";
|
||||
return unicodeStr;
|
||||
}
|
||||
|
||||
/** Given a literal like (the 3 char sequence with single quotes) 'a',
|
||||
* return the int value of 'a'. Convert escape sequences here also.
|
||||
*/
|
||||
public static int getCharValueFromGrammarCharLiteral(String literal) {
|
||||
switch ( literal.length() ) {
|
||||
case 3 :
|
||||
// 'x'
|
||||
return literal.charAt(1); // no escape char
|
||||
case 4 :
|
||||
// '\x' (antlr lexer will catch invalid char)
|
||||
if ( Character.isDigit(literal.charAt(2)) ) {
|
||||
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
|
||||
// "invalid char literal: "+literal);
|
||||
return -1;
|
||||
}
|
||||
int escChar = literal.charAt(2);
|
||||
int charVal = ANTLRLiteralEscapedCharValue[escChar];
|
||||
if ( charVal==0 ) {
|
||||
// Unnecessary escapes like '\{' should just yield {
|
||||
return escChar;
|
||||
}
|
||||
return charVal;
|
||||
case 8 :
|
||||
// '\u1234'
|
||||
String unicodeChars = literal.substring(3,literal.length()-1);
|
||||
return Integer.parseInt(unicodeChars, 16);
|
||||
default :
|
||||
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
|
||||
// "invalid char literal: "+literal);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
public static String getStringFromGrammarStringLiteral(String literal) {
|
||||
StringBuilder buf = new StringBuilder();
|
||||
int n = literal.length();
|
||||
int i = 1; // skip first quote
|
||||
while ( i < (n-1) ) { // scan all but last quote
|
||||
switch ( literal.charAt(i) ) {
|
||||
case '\\' :
|
||||
i++;
|
||||
if ( literal.charAt(i)=='u' ) { // '\u1234'
|
||||
i++;
|
||||
String unicodeChars = literal.substring(3,literal.length()-1);
|
||||
buf.append((char)Integer.parseInt(unicodeChars, 16));
|
||||
}
|
||||
else {
|
||||
char escChar = literal.charAt(i);
|
||||
int charVal = ANTLRLiteralEscapedCharValue[escChar];
|
||||
if ( charVal==0 ) buf.append(escChar); // Unnecessary escapes like '\{' should just yield {
|
||||
else buf.append((char)charVal);
|
||||
}
|
||||
break;
|
||||
default :
|
||||
buf.append(literal.charAt(i));
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return buf.toString();
|
||||
}
|
||||
|
||||
}
|
|
@ -6,7 +6,7 @@ import org.antlr.v4.Tool;
|
|||
import org.antlr.v4.automata.DFA;
|
||||
import org.antlr.v4.automata.Label;
|
||||
import org.antlr.v4.automata.NFA;
|
||||
import org.antlr.v4.codegen.Target;
|
||||
import org.antlr.v4.misc.CharSupport;
|
||||
import org.antlr.v4.misc.IntSet;
|
||||
import org.antlr.v4.misc.IntervalSet;
|
||||
import org.antlr.v4.misc.Utils;
|
||||
|
@ -359,7 +359,7 @@ public class Grammar implements AttributeResolver {
|
|||
if ( isLexer() &&
|
||||
ttype >= Label.MIN_CHAR_VALUE && ttype <= Label.MAX_CHAR_VALUE )
|
||||
{
|
||||
return Target.getANTLRCharLiteralForChar(ttype);
|
||||
return CharSupport.getANTLRCharLiteralForChar(ttype);
|
||||
}
|
||||
// faux label?
|
||||
else if ( ttype<0 ) {
|
||||
|
|
Loading…
Reference in New Issue