forked from jasder/antlr
Return predefined values instead of null for Grammar methods getTokenNames, getTokenDisplayNames, and getRuleNames
Improve consistency of Grammar methods for names of rules and tokens
This commit is contained in:
parent
747fb0ba2e
commit
65b176a36d
|
@ -859,8 +859,7 @@ public class <lexer.name> extends <superClass> {
|
|||
};
|
||||
|
||||
public static final String[] tokenNames = {
|
||||
"\<INVALID>",
|
||||
<lexer.tokenNames:{t | <t>}; separator=", ", wrap, anchor>
|
||||
<lexer.tokenNames:{t | <t>}; null="\"\<INVALID>\"", separator=", ", wrap, anchor>
|
||||
};
|
||||
public static final String[] ruleNames = {
|
||||
<lexer.ruleNames:{r | "<r>"}; separator=", ", wrap, anchor>
|
||||
|
|
|
@ -106,7 +106,7 @@ public class ATNPrinter {
|
|||
buf.append("-").append(not?"~":"").append(st.toString()).append("->").append(getStateString(t.target)).append('\n');
|
||||
}
|
||||
else {
|
||||
buf.append("-").append(not?"~":"").append(st.label().toString(g.getTokenNames())).append("->").append(getStateString(t.target)).append('\n');
|
||||
buf.append("-").append(not?"~":"").append(st.label().toString(g.getTokenDisplayNames())).append("->").append(getStateString(t.target)).append('\n');
|
||||
}
|
||||
}
|
||||
else if ( t instanceof AtomTransition ) {
|
||||
|
|
|
@ -123,11 +123,12 @@ public abstract class Target {
|
|||
* to a token type in the generated code.
|
||||
*/
|
||||
public String getTokenTypeAsTargetLabel(Grammar g, int ttype) {
|
||||
String name = g.getTokenDisplayName(ttype);
|
||||
// If name is a literal, return the token type instead
|
||||
if ( name==null || name.charAt(0)=='\'' ) {
|
||||
String name = g.getTokenName(ttype);
|
||||
// If name is not valid, return the token type instead
|
||||
if ( Grammar.INVALID_TOKEN_NAME.equals(name) ) {
|
||||
return String.valueOf(ttype);
|
||||
}
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
|
|
|
@ -74,6 +74,25 @@ import java.util.Set;
|
|||
|
||||
public class Grammar implements AttributeResolver {
|
||||
public static final String GRAMMAR_FROM_STRING_NAME = "<string>";
|
||||
/**
|
||||
* This value is used in the following situations to indicate that a token
|
||||
* type does not have an associated name which can be directly referenced in
|
||||
* a grammar.
|
||||
*
|
||||
* <ul>
|
||||
* <li>This value is the name and display name for the token with type
|
||||
* {@link Token#INVALID_TYPE}.</li>
|
||||
* <li>This value is the name for tokens with a type not represented by a
|
||||
* named token. The display name for these tokens is simply the string
|
||||
* representation of the token type as an integer.</li>
|
||||
* </ul>
|
||||
*/
|
||||
public static final String INVALID_TOKEN_NAME = "<INVALID>";
|
||||
/**
|
||||
* This value is used as the name for elements in the array returned by
|
||||
* {@link #getRuleNames} for indexes not associated with a rule.
|
||||
*/
|
||||
public static final String INVALID_RULE_NAME = "<invalid>";
|
||||
|
||||
public static final Set<String> parserOptions = new HashSet<String>();
|
||||
static {
|
||||
|
@ -178,23 +197,30 @@ public class Grammar implements AttributeResolver {
|
|||
*/
|
||||
int maxTokenType = Token.MIN_USER_TOKEN_TYPE -1;
|
||||
|
||||
/** Map token like ID (but not literals like "while") to its token type */
|
||||
public Map<String, Integer> tokenNameToTypeMap = new LinkedHashMap<String, Integer>();
|
||||
|
||||
/** Map token literals like "while" to its token type. It may be that
|
||||
* WHILE="while"=35, in which case both tokenIDToTypeMap and this
|
||||
* field will have entries both mapped to 35.
|
||||
/**
|
||||
* Map token like {@code ID} (but not literals like {@code 'while'}) to its
|
||||
* token type.
|
||||
*/
|
||||
public Map<String, Integer> stringLiteralToTypeMap = new LinkedHashMap<String, Integer>();
|
||||
public final Map<String, Integer> tokenNameToTypeMap = new LinkedHashMap<String, Integer>();
|
||||
|
||||
/** Reverse index for stringLiteralToTypeMap. Indexed with raw token type.
|
||||
* 0 is invalid. */
|
||||
public List<String> typeToStringLiteralList = new ArrayList<String>();
|
||||
|
||||
/** Map a token type to its token name. Indexed with raw token type.
|
||||
* 0 is invalid.
|
||||
/**
|
||||
* Map token literals like {@code 'while'} to its token type. It may be that
|
||||
* {@code WHILE="while"=35}, in which case both {@link #tokenNameToTypeMap}
|
||||
* and this field will have entries both mapped to 35.
|
||||
*/
|
||||
public List<String> typeToTokenList = new ArrayList<String>();
|
||||
public final Map<String, Integer> stringLiteralToTypeMap = new LinkedHashMap<String, Integer>();
|
||||
|
||||
/**
|
||||
* Reverse index for {@link #stringLiteralToTypeMap}. Indexed with raw token
|
||||
* type. 0 is invalid.
|
||||
*/
|
||||
public final List<String> typeToStringLiteralList = new ArrayList<String>();
|
||||
|
||||
/**
|
||||
* Map a token type to its token name. Indexed with raw token type. 0 is
|
||||
* invalid.
|
||||
*/
|
||||
public final List<String> typeToTokenList = new ArrayList<String>();
|
||||
|
||||
/** Map a name to an action.
|
||||
* The code generator will use this to fill holes in the output files.
|
||||
|
@ -565,37 +591,78 @@ public class Grammar implements AttributeResolver {
|
|||
* char vocabulary, compute an ANTLR-valid (possibly escaped) char literal.
|
||||
*/
|
||||
public String getTokenDisplayName(int ttype) {
|
||||
String tokenName;
|
||||
// inside any target's char range and is lexer grammar?
|
||||
if ( isLexer() &&
|
||||
ttype >= Lexer.MIN_CHAR_VALUE && ttype <= Lexer.MAX_CHAR_VALUE )
|
||||
{
|
||||
return CharSupport.getANTLRCharLiteralForChar(ttype);
|
||||
}
|
||||
else if ( ttype==Token.EOF ) {
|
||||
tokenName = "EOF";
|
||||
|
||||
if ( ttype==Token.EOF ) {
|
||||
return "EOF";
|
||||
}
|
||||
else {
|
||||
if ( ttype>0 && ttype<typeToTokenList.size() ) {
|
||||
tokenName = typeToTokenList.get(ttype);
|
||||
if ( tokenName!=null &&
|
||||
tokenName.startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX) &&
|
||||
ttype < typeToStringLiteralList.size() &&
|
||||
typeToStringLiteralList.get(ttype)!=null)
|
||||
{
|
||||
tokenName = typeToStringLiteralList.get(ttype);
|
||||
}
|
||||
}
|
||||
else {
|
||||
tokenName = String.valueOf(ttype);
|
||||
}
|
||||
|
||||
if ( ttype==Token.INVALID_TYPE ) {
|
||||
return INVALID_TOKEN_NAME;
|
||||
}
|
||||
// tool.log("grammar", "getTokenDisplayName ttype="+ttype+", name="+tokenName);
|
||||
return tokenName;
|
||||
|
||||
if (ttype >= 0 && ttype < typeToStringLiteralList.size() && typeToStringLiteralList.get(ttype) != null) {
|
||||
return typeToStringLiteralList.get(ttype);
|
||||
}
|
||||
|
||||
if (ttype >= 0 && ttype < typeToTokenList.size() && typeToTokenList.get(ttype) != null) {
|
||||
return typeToTokenList.get(ttype);
|
||||
}
|
||||
|
||||
return String.valueOf(ttype);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the name by which a token can be referenced in the generated code.
|
||||
* For tokens defined in a {@code tokens{}} block or via a lexer rule, this
|
||||
* is the declared name of the token. For token types generated by the use
|
||||
* of a string literal within a parser rule of a combined grammar, this is
|
||||
* the automatically generated token type which includes the
|
||||
* {@link #AUTO_GENERATED_TOKEN_NAME_PREFIX} prefix. For types which are not
|
||||
* associated with a defined token, this method returns
|
||||
* {@link #INVALID_TOKEN_NAME}.
|
||||
*
|
||||
* @param ttype The token type.
|
||||
* @return The name of the token with the specified type.
|
||||
*/
|
||||
@NotNull
|
||||
public String getTokenName(int ttype) {
|
||||
// inside any target's char range and is lexer grammar?
|
||||
if ( isLexer() &&
|
||||
ttype >= Lexer.MIN_CHAR_VALUE && ttype <= Lexer.MAX_CHAR_VALUE )
|
||||
{
|
||||
return CharSupport.getANTLRCharLiteralForChar(ttype);
|
||||
}
|
||||
|
||||
if ( ttype==Token.EOF ) {
|
||||
return "EOF";
|
||||
}
|
||||
|
||||
if (ttype >= 0 && ttype < typeToTokenList.size() && typeToTokenList.get(ttype) != null) {
|
||||
return typeToTokenList.get(ttype);
|
||||
}
|
||||
|
||||
return INVALID_TOKEN_NAME;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an array of rule names for rules defined or imported by the
|
||||
* grammar. The array index is the rule index, and the value is the name of
|
||||
* the rule with the corresponding {@link Rule#index}.
|
||||
*
|
||||
* <p>If no rule is defined with an index for an element of the resulting
|
||||
* array, the value of that element is {@link #INVALID_RULE_NAME}.</p>
|
||||
*
|
||||
* @return The names of all rules defined in the grammar.
|
||||
*/
|
||||
public String[] getRuleNames() {
|
||||
String[] result = new String[rules.size()];
|
||||
Arrays.fill(result, INVALID_RULE_NAME);
|
||||
for (Rule rule : rules.values()) {
|
||||
result[rule.index] = rule.name;
|
||||
}
|
||||
|
@ -603,39 +670,39 @@ public class Grammar implements AttributeResolver {
|
|||
return result;
|
||||
}
|
||||
|
||||
public List<String> getTokenDisplayNames(IntegerList types) {
|
||||
List<String> names = new ArrayList<String>();
|
||||
for (int t : types.toArray()) names.add(getTokenDisplayName(t));
|
||||
return names;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an array of token names for tokens defined or imported by the
|
||||
* grammar. The array index is the token type, and the value is the result
|
||||
* of {@link #getTokenName} for the corresponding token type.
|
||||
*
|
||||
* @see #getTokenName
|
||||
* @return The token names of all tokens defined in the grammar.
|
||||
*/
|
||||
public String[] getTokenNames() {
|
||||
int numTokens = getMaxTokenType();
|
||||
String[] tokenNames = new String[numTokens+1];
|
||||
for (String tokenName : tokenNameToTypeMap.keySet()) {
|
||||
Integer ttype = tokenNameToTypeMap.get(tokenName);
|
||||
if ( tokenName!=null &&
|
||||
tokenName.startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX) &&
|
||||
ttype < typeToStringLiteralList.size() )
|
||||
{
|
||||
tokenName = typeToStringLiteralList.get(ttype);
|
||||
}
|
||||
if ( ttype>0 ) tokenNames[ttype] = tokenName;
|
||||
for (int i = 0; i < tokenNames.length; i++) {
|
||||
tokenNames[i] = getTokenName(i);
|
||||
}
|
||||
|
||||
return tokenNames;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an array of display names for tokens defined or imported by the
|
||||
* grammar. The array index is the token type, and the value is the result
|
||||
* of {@link #getTokenDisplayName} for the corresponding token type.
|
||||
*
|
||||
* @see #getTokenDisplayName
|
||||
* @return The display names of all tokens defined in the grammar.
|
||||
*/
|
||||
public String[] getTokenDisplayNames() {
|
||||
int numTokens = getMaxTokenType();
|
||||
String[] tokenNames = new String[numTokens+1];
|
||||
for (String t : tokenNameToTypeMap.keySet()) {
|
||||
Integer ttype = tokenNameToTypeMap.get(t);
|
||||
if ( ttype>0 ) tokenNames[ttype] = t;
|
||||
}
|
||||
for (String t : stringLiteralToTypeMap.keySet()) {
|
||||
Integer ttype = stringLiteralToTypeMap.get(t);
|
||||
if ( ttype>0 ) tokenNames[ttype] = t;
|
||||
for (int i = 0; i < tokenNames.length; i++) {
|
||||
tokenNames[i] = getTokenDisplayName(i);
|
||||
}
|
||||
|
||||
return tokenNames;
|
||||
}
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@ import org.antlr.v4.tool.LexerGrammar;
|
|||
import org.junit.Test;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Set;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
|
@ -110,10 +110,10 @@ public class TestTokenTypeAssignment extends BaseTest {
|
|||
foundLiterals = g.implicitLexer.stringLiteralToTypeMap.keySet().toString();
|
||||
assertEquals("['x']", foundLiterals); // pushed in lexer from parser
|
||||
|
||||
String[] typeToTokenName = g.getTokenNames();
|
||||
Set<String> tokens = new HashSet<String>();
|
||||
String[] typeToTokenName = g.getTokenDisplayNames();
|
||||
Set<String> tokens = new LinkedHashSet<String>();
|
||||
for (String t : typeToTokenName) if ( t!=null ) tokens.add(t);
|
||||
assertEquals("[E, 'x']", tokens.toString());
|
||||
assertEquals("[<INVALID>, 'x', E]", tokens.toString());
|
||||
}
|
||||
|
||||
@Test public void testPredDoesNotHideNameToLiteralMapInLexer() throws Exception {
|
||||
|
@ -170,7 +170,17 @@ public class TestTokenTypeAssignment extends BaseTest {
|
|||
{
|
||||
String[] typeToTokenName = g.getTokenNames();
|
||||
Set<String> tokens = new HashSet<String>();
|
||||
for (String t : typeToTokenName) if ( t!=null ) tokens.add(t);
|
||||
for (int i = 0; i < typeToTokenName.length; i++) {
|
||||
String t = typeToTokenName[i];
|
||||
if ( t!=null ) {
|
||||
if (t.startsWith(Grammar.AUTO_GENERATED_TOKEN_NAME_PREFIX)) {
|
||||
tokens.add(g.getTokenDisplayName(i));
|
||||
}
|
||||
else {
|
||||
tokens.add(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// make sure expected tokens are there
|
||||
StringTokenizer st = new StringTokenizer(allValidTokensStr, ", ");
|
||||
|
|
Loading…
Reference in New Issue