Return predefined values instead of null for Grammar methods getTokenNames, getTokenDisplayNames, and getRuleNames

Improve consistency of Grammar methods for names of rules and tokens
This commit is contained in:
Sam Harwell 2014-04-13 13:35:03 -05:00
parent 747fb0ba2e
commit 65b176a36d
5 changed files with 143 additions and 66 deletions

View File

@ -859,8 +859,7 @@ public class <lexer.name> extends <superClass> {
};
public static final String[] tokenNames = {
"\<INVALID>",
<lexer.tokenNames:{t | <t>}; separator=", ", wrap, anchor>
<lexer.tokenNames:{t | <t>}; null="\"\<INVALID>\"", separator=", ", wrap, anchor>
};
public static final String[] ruleNames = {
<lexer.ruleNames:{r | "<r>"}; separator=", ", wrap, anchor>

View File

@ -106,7 +106,7 @@ public class ATNPrinter {
buf.append("-").append(not?"~":"").append(st.toString()).append("->").append(getStateString(t.target)).append('\n');
}
else {
buf.append("-").append(not?"~":"").append(st.label().toString(g.getTokenNames())).append("->").append(getStateString(t.target)).append('\n');
buf.append("-").append(not?"~":"").append(st.label().toString(g.getTokenDisplayNames())).append("->").append(getStateString(t.target)).append('\n');
}
}
else if ( t instanceof AtomTransition ) {

View File

@ -123,11 +123,12 @@ public abstract class Target {
* to a token type in the generated code.
*/
public String getTokenTypeAsTargetLabel(Grammar g, int ttype) {
String name = g.getTokenDisplayName(ttype);
// If name is a literal, return the token type instead
if ( name==null || name.charAt(0)=='\'' ) {
String name = g.getTokenName(ttype);
// If name is not valid, return the token type instead
if ( Grammar.INVALID_TOKEN_NAME.equals(name) ) {
return String.valueOf(ttype);
}
return name;
}

View File

@ -74,6 +74,25 @@ import java.util.Set;
public class Grammar implements AttributeResolver {
public static final String GRAMMAR_FROM_STRING_NAME = "<string>";
/**
* This value is used in the following situations to indicate that a token
* type does not have an associated name which can be directly referenced in
* a grammar.
*
* <ul>
* <li>This value is the name and display name for the token with type
* {@link Token#INVALID_TYPE}.</li>
* <li>This value is the name for tokens with a type not represented by a
* named token. The display name for these tokens is simply the string
* representation of the token type as an integer.</li>
* </ul>
*/
public static final String INVALID_TOKEN_NAME = "<INVALID>";
/**
* This value is used as the name for elements in the array returned by
* {@link #getRuleNames} for indexes not associated with a rule.
*/
public static final String INVALID_RULE_NAME = "<invalid>";
public static final Set<String> parserOptions = new HashSet<String>();
static {
@ -178,23 +197,30 @@ public class Grammar implements AttributeResolver {
*/
int maxTokenType = Token.MIN_USER_TOKEN_TYPE -1;
/** Map token like ID (but not literals like "while") to its token type */
public Map<String, Integer> tokenNameToTypeMap = new LinkedHashMap<String, Integer>();
/** Map token literals like "while" to its token type. It may be that
* WHILE="while"=35, in which case both tokenIDToTypeMap and this
* field will have entries both mapped to 35.
/**
* Map token like {@code ID} (but not literals like {@code 'while'}) to its
* token type.
*/
public Map<String, Integer> stringLiteralToTypeMap = new LinkedHashMap<String, Integer>();
public final Map<String, Integer> tokenNameToTypeMap = new LinkedHashMap<String, Integer>();
/** Reverse index for stringLiteralToTypeMap. Indexed with raw token type.
* 0 is invalid. */
public List<String> typeToStringLiteralList = new ArrayList<String>();
/** Map a token type to its token name. Indexed with raw token type.
* 0 is invalid.
/**
* Map token literals like {@code 'while'} to its token type. It may be that
* {@code WHILE="while"=35}, in which case both {@link #tokenNameToTypeMap}
* and this field will have entries both mapped to 35.
*/
public List<String> typeToTokenList = new ArrayList<String>();
public final Map<String, Integer> stringLiteralToTypeMap = new LinkedHashMap<String, Integer>();
/**
* Reverse index for {@link #stringLiteralToTypeMap}. Indexed with raw token
* type. 0 is invalid.
*/
public final List<String> typeToStringLiteralList = new ArrayList<String>();
/**
* Map a token type to its token name. Indexed with raw token type. 0 is
* invalid.
*/
public final List<String> typeToTokenList = new ArrayList<String>();
/** Map a name to an action.
* The code generator will use this to fill holes in the output files.
@ -565,37 +591,78 @@ public class Grammar implements AttributeResolver {
* char vocabulary, compute an ANTLR-valid (possibly escaped) char literal.
*/
public String getTokenDisplayName(int ttype) {
String tokenName;
// inside any target's char range and is lexer grammar?
if ( isLexer() &&
ttype >= Lexer.MIN_CHAR_VALUE && ttype <= Lexer.MAX_CHAR_VALUE )
{
return CharSupport.getANTLRCharLiteralForChar(ttype);
}
else if ( ttype==Token.EOF ) {
tokenName = "EOF";
}
else {
if ( ttype>0 && ttype<typeToTokenList.size() ) {
tokenName = typeToTokenList.get(ttype);
if ( tokenName!=null &&
tokenName.startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX) &&
ttype < typeToStringLiteralList.size() &&
typeToStringLiteralList.get(ttype)!=null)
{
tokenName = typeToStringLiteralList.get(ttype);
}
}
else {
tokenName = String.valueOf(ttype);
}
}
// tool.log("grammar", "getTokenDisplayName ttype="+ttype+", name="+tokenName);
return tokenName;
if ( ttype==Token.EOF ) {
return "EOF";
}
if ( ttype==Token.INVALID_TYPE ) {
return INVALID_TOKEN_NAME;
}
if (ttype >= 0 && ttype < typeToStringLiteralList.size() && typeToStringLiteralList.get(ttype) != null) {
return typeToStringLiteralList.get(ttype);
}
if (ttype >= 0 && ttype < typeToTokenList.size() && typeToTokenList.get(ttype) != null) {
return typeToTokenList.get(ttype);
}
return String.valueOf(ttype);
}
/**
* Gets the name by which a token can be referenced in the generated code.
* For tokens defined in a {@code tokens{}} block or via a lexer rule, this
* is the declared name of the token. For token types generated by the use
* of a string literal within a parser rule of a combined grammar, this is
* the automatically generated token type which includes the
* {@link #AUTO_GENERATED_TOKEN_NAME_PREFIX} prefix. For types which are not
* associated with a defined token, this method returns
* {@link #INVALID_TOKEN_NAME}.
*
* @param ttype The token type.
* @return The name of the token with the specified type.
*/
@NotNull
public String getTokenName(int ttype) {
// inside any target's char range and is lexer grammar?
if ( isLexer() &&
ttype >= Lexer.MIN_CHAR_VALUE && ttype <= Lexer.MAX_CHAR_VALUE )
{
return CharSupport.getANTLRCharLiteralForChar(ttype);
}
if ( ttype==Token.EOF ) {
return "EOF";
}
if (ttype >= 0 && ttype < typeToTokenList.size() && typeToTokenList.get(ttype) != null) {
return typeToTokenList.get(ttype);
}
return INVALID_TOKEN_NAME;
}
/**
* Gets an array of rule names for rules defined or imported by the
* grammar. The array index is the rule index, and the value is the name of
* the rule with the corresponding {@link Rule#index}.
*
* <p>If no rule is defined with an index for an element of the resulting
* array, the value of that element is {@link #INVALID_RULE_NAME}.</p>
*
* @return The names of all rules defined in the grammar.
*/
public String[] getRuleNames() {
String[] result = new String[rules.size()];
Arrays.fill(result, INVALID_RULE_NAME);
for (Rule rule : rules.values()) {
result[rule.index] = rule.name;
}
@ -603,39 +670,39 @@ public class Grammar implements AttributeResolver {
return result;
}
public List<String> getTokenDisplayNames(IntegerList types) {
List<String> names = new ArrayList<String>();
for (int t : types.toArray()) names.add(getTokenDisplayName(t));
return names;
}
/**
* Gets an array of token names for tokens defined or imported by the
* grammar. The array index is the token type, and the value is the result
* of {@link #getTokenName} for the corresponding token type.
*
* @see #getTokenName
* @return The token names of all tokens defined in the grammar.
*/
public String[] getTokenNames() {
int numTokens = getMaxTokenType();
String[] tokenNames = new String[numTokens+1];
for (String tokenName : tokenNameToTypeMap.keySet()) {
Integer ttype = tokenNameToTypeMap.get(tokenName);
if ( tokenName!=null &&
tokenName.startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX) &&
ttype < typeToStringLiteralList.size() )
{
tokenName = typeToStringLiteralList.get(ttype);
}
if ( ttype>0 ) tokenNames[ttype] = tokenName;
for (int i = 0; i < tokenNames.length; i++) {
tokenNames[i] = getTokenName(i);
}
return tokenNames;
}
/**
* Gets an array of display names for tokens defined or imported by the
* grammar. The array index is the token type, and the value is the result
* of {@link #getTokenDisplayName} for the corresponding token type.
*
* @see #getTokenDisplayName
* @return The display names of all tokens defined in the grammar.
*/
public String[] getTokenDisplayNames() {
int numTokens = getMaxTokenType();
String[] tokenNames = new String[numTokens+1];
for (String t : tokenNameToTypeMap.keySet()) {
Integer ttype = tokenNameToTypeMap.get(t);
if ( ttype>0 ) tokenNames[ttype] = t;
}
for (String t : stringLiteralToTypeMap.keySet()) {
Integer ttype = stringLiteralToTypeMap.get(t);
if ( ttype>0 ) tokenNames[ttype] = t;
for (int i = 0; i < tokenNames.length; i++) {
tokenNames[i] = getTokenDisplayName(i);
}
return tokenNames;
}

View File

@ -36,7 +36,7 @@ import org.antlr.v4.tool.LexerGrammar;
import org.junit.Test;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.StringTokenizer;
@ -110,10 +110,10 @@ public class TestTokenTypeAssignment extends BaseTest {
foundLiterals = g.implicitLexer.stringLiteralToTypeMap.keySet().toString();
assertEquals("['x']", foundLiterals); // pushed in lexer from parser
String[] typeToTokenName = g.getTokenNames();
Set<String> tokens = new HashSet<String>();
String[] typeToTokenName = g.getTokenDisplayNames();
Set<String> tokens = new LinkedHashSet<String>();
for (String t : typeToTokenName) if ( t!=null ) tokens.add(t);
assertEquals("[E, 'x']", tokens.toString());
assertEquals("[<INVALID>, 'x', E]", tokens.toString());
}
@Test public void testPredDoesNotHideNameToLiteralMapInLexer() throws Exception {
@ -170,7 +170,17 @@ public class TestTokenTypeAssignment extends BaseTest {
{
String[] typeToTokenName = g.getTokenNames();
Set<String> tokens = new HashSet<String>();
for (String t : typeToTokenName) if ( t!=null ) tokens.add(t);
for (int i = 0; i < typeToTokenName.length; i++) {
String t = typeToTokenName[i];
if ( t!=null ) {
if (t.startsWith(Grammar.AUTO_GENERATED_TOKEN_NAME_PREFIX)) {
tokens.add(g.getTokenDisplayName(i));
}
else {
tokens.add(t);
}
}
}
// make sure expected tokens are there
StringTokenizer st = new StringTokenizer(allValidTokensStr, ", ");