From 65b176a36dedc9f8a5012d3a35002493d09f385f Mon Sep 17 00:00:00 2001
From: Sam Harwell <sam@tunnelvisionlabs.com>
Date: Sun, 13 Apr 2014 13:35:03 -0500
Subject: [PATCH] Return predefined values instead of null for Grammar methods
 getTokenNames, getTokenDisplayNames, and getRuleNames

Improve consistency of Grammar methods for names of rules and tokens
---
 .../v4/tool/templates/codegen/Java/Java.stg   |   3 +-
 .../src/org/antlr/v4/automata/ATNPrinter.java |   2 +-
 tool/src/org/antlr/v4/codegen/Target.java     |   7 +-
 tool/src/org/antlr/v4/tool/Grammar.java       | 177 ++++++++++++------
 .../v4/test/TestTokenTypeAssignment.java      |  20 +-
 5 files changed, 143 insertions(+), 66 deletions(-)
diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg
index dfba5dc36..6b0e9bc07 100644
--- a/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg
+++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg
@@ -859,8 +859,7 @@ public class <lexer.name> extends <superClass> {
 	};
 
 	public static final String[] tokenNames = {
-		"\<INVALID>",
-		<lexer.tokenNames:{t | <t>}; separator=", ", wrap, anchor>
+		<lexer.tokenNames:{t | <t>}; null="\"\<INVALID>\"", separator=", ", wrap, anchor>
 	};
 	public static final String[] ruleNames = {
 		<lexer.ruleNames:{r | "<r>"}; separator=", ", wrap, anchor>
diff --git a/tool/src/org/antlr/v4/automata/ATNPrinter.java b/tool/src/org/antlr/v4/automata/ATNPrinter.java
index 9ee0b0b73..b3f0f3159 100644
--- a/tool/src/org/antlr/v4/automata/ATNPrinter.java
+++ b/tool/src/org/antlr/v4/automata/ATNPrinter.java
@@ -106,7 +106,7 @@ public class ATNPrinter {
 						buf.append("-").append(not?"~":"").append(st.toString()).append("->").append(getStateString(t.target)).append('\n');
 					}
 					else {
-						buf.append("-").append(not?"~":"").append(st.label().toString(g.getTokenNames())).append("->").append(getStateString(t.target)).append('\n');
+						buf.append("-").append(not?"~":"").append(st.label().toString(g.getTokenDisplayNames())).append("->").append(getStateString(t.target)).append('\n');
 					}
 				}
 				else if ( t instanceof AtomTransition ) {
diff --git a/tool/src/org/antlr/v4/codegen/Target.java b/tool/src/org/antlr/v4/codegen/Target.java
index b65aef07f..dbd7a6026 100644
--- a/tool/src/org/antlr/v4/codegen/Target.java
+++ b/tool/src/org/antlr/v4/codegen/Target.java
@@ -123,11 +123,12 @@ public abstract class Target {
 	 *  to a token type in the generated code.
 	 */
 	public String getTokenTypeAsTargetLabel(Grammar g, int ttype) {
-		String name = g.getTokenDisplayName(ttype);
-		// If name is a literal, return the token type instead
-		if ( name==null || name.charAt(0)=='\'' ) {
+		String name = g.getTokenName(ttype);
+		// If name is not valid, return the token type instead
+		if ( Grammar.INVALID_TOKEN_NAME.equals(name) ) {
 			return String.valueOf(ttype);
 		}
+
 		return name;
 	}
 
diff --git a/tool/src/org/antlr/v4/tool/Grammar.java b/tool/src/org/antlr/v4/tool/Grammar.java
index 0cb6f7927..a5f5695d5 100644
--- a/tool/src/org/antlr/v4/tool/Grammar.java
+++ b/tool/src/org/antlr/v4/tool/Grammar.java
@@ -74,6 +74,25 @@ import java.util.Set;
 
 public class Grammar implements AttributeResolver {
 	public static final String GRAMMAR_FROM_STRING_NAME = "<string>";
+	/**
+	 * This value is used in the following situations to indicate that a token
+	 * type does not have an associated name which can be directly referenced in
+	 * a grammar.
+	 *
+	 * <ul>
+	 * <li>This value is the name and display name for the token with type
+	 * {@link Token#INVALID_TYPE}.</li>
+	 * <li>This value is the name for tokens with a type not represented by a
+	 * named token. The display name for these tokens is simply the string
+	 * representation of the token type as an integer.</li>
+	 * </ul>
+	 */
+	public static final String INVALID_TOKEN_NAME = "<INVALID>";
+	/**
+	 * This value is used as the name for elements in the array returned by
+	 * {@link #getRuleNames} for indexes not associated with a rule.
+	 */
+	public static final String INVALID_RULE_NAME = "<invalid>";
 
 	public static final Set<String> parserOptions = new HashSet<String>();
 	static {
@@ -178,23 +197,30 @@ public class Grammar implements AttributeResolver {
 	 */
 	int maxTokenType = Token.MIN_USER_TOKEN_TYPE -1;
 
-	/** Map token like ID (but not literals like "while") to its token type */
-	public Map<String, Integer> tokenNameToTypeMap = new LinkedHashMap<String, Integer>();
-
-	/** Map token literals like "while" to its token type.  It may be that
-	 *  WHILE="while"=35, in which case both tokenIDToTypeMap and this
-	 *  field will have entries both mapped to 35.
+	/**
+	 * Map token like {@code ID} (but not literals like {@code 'while'}) to its
+	 * token type.
 	 */
-	public Map<String, Integer> stringLiteralToTypeMap = new LinkedHashMap<String, Integer>();
+	public final Map<String, Integer> tokenNameToTypeMap = new LinkedHashMap<String, Integer>();
 
-	/** Reverse index for stringLiteralToTypeMap.  Indexed with raw token type.
-	 *  0 is invalid. */
-	public List<String> typeToStringLiteralList = new ArrayList<String>();
-
-	/** Map a token type to its token name. Indexed with raw token type.
-	 *  0 is invalid.
+	/**
+	 * Map token literals like {@code 'while'} to its token type. It may be that
+	 * {@code WHILE="while"=35}, in which case both {@link #tokenNameToTypeMap}
+	 * and this field will have entries both mapped to 35.
 	 */
-	public List<String> typeToTokenList = new ArrayList<String>();
+	public final Map<String, Integer> stringLiteralToTypeMap = new LinkedHashMap<String, Integer>();
+
+	/**
+	 * Reverse index for {@link #stringLiteralToTypeMap}. Indexed with raw token
+	 * type. 0 is invalid.
+	 */
+	public final List<String> typeToStringLiteralList = new ArrayList<String>();
+
+	/**
+	 * Map a token type to its token name. Indexed with raw token type. 0 is
+	 * invalid.
+	 */
+	public final List<String> typeToTokenList = new ArrayList<String>();
 
     /** Map a name to an action.
      *  The code generator will use this to fill holes in the output files.
@@ -565,37 +591,78 @@ public class Grammar implements AttributeResolver {
 	 *  char vocabulary, compute an ANTLR-valid (possibly escaped) char literal.
 	 */
 	public String getTokenDisplayName(int ttype) {
-		String tokenName;
 		// inside any target's char range and is lexer grammar?
 		if ( isLexer() &&
 			 ttype >= Lexer.MIN_CHAR_VALUE && ttype <= Lexer.MAX_CHAR_VALUE )
 		{
 			return CharSupport.getANTLRCharLiteralForChar(ttype);
 		}
-		else if ( ttype==Token.EOF ) {
-			tokenName = "EOF";
+
+		if ( ttype==Token.EOF ) {
+			return "EOF";
 		}
-		else {
-			if ( ttype>0 && ttype<typeToTokenList.size() ) {
-				tokenName = typeToTokenList.get(ttype);
-				if ( tokenName!=null &&
-					 tokenName.startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX) &&
-					 ttype < typeToStringLiteralList.size() &&
-				     typeToStringLiteralList.get(ttype)!=null)
-				{
-					tokenName = typeToStringLiteralList.get(ttype);
-				}
-			}
-			else {
-				tokenName = String.valueOf(ttype);
-			}
+
+		if ( ttype==Token.INVALID_TYPE ) {
+			return INVALID_TOKEN_NAME;
 		}
-//		tool.log("grammar", "getTokenDisplayName ttype="+ttype+", name="+tokenName);
-		return tokenName;
+
+		if (ttype >= 0 && ttype < typeToStringLiteralList.size() && typeToStringLiteralList.get(ttype) != null) {
+			return typeToStringLiteralList.get(ttype);
+		}
+
+		if (ttype >= 0 && ttype < typeToTokenList.size() && typeToTokenList.get(ttype) != null) {
+			return typeToTokenList.get(ttype);
+		}
+
+		return String.valueOf(ttype);
 	}
 
+	/**
+	 * Gets the name by which a token can be referenced in the generated code.
+	 * For tokens defined in a {@code tokens{}} block or via a lexer rule, this
+	 * is the declared name of the token. For token types generated by the use
+	 * of a string literal within a parser rule of a combined grammar, this is
+	 * the automatically generated token type which includes the
+	 * {@link #AUTO_GENERATED_TOKEN_NAME_PREFIX} prefix. For types which are not
+	 * associated with a defined token, this method returns
+	 * {@link #INVALID_TOKEN_NAME}.
+	 *
+	 * @param ttype The token type.
+	 * @return The name of the token with the specified type.
+	 */
+	@NotNull
+	public String getTokenName(int ttype) {
+		// inside any target's char range and is lexer grammar?
+		if ( isLexer() &&
+			 ttype >= Lexer.MIN_CHAR_VALUE && ttype <= Lexer.MAX_CHAR_VALUE )
+		{
+			return CharSupport.getANTLRCharLiteralForChar(ttype);
+		}
+
+		if ( ttype==Token.EOF ) {
+			return "EOF";
+		}
+
+		if (ttype >= 0 && ttype < typeToTokenList.size() && typeToTokenList.get(ttype) != null) {
+			return typeToTokenList.get(ttype);
+		}
+
+		return INVALID_TOKEN_NAME;
+	}
+
+	/**
+	 * Gets an array of rule names for rules defined or imported by the
+	 * grammar. The array index is the rule index, and the value is the name of
+	 * the rule with the corresponding {@link Rule#index}.
+	 *
+	 * <p>If no rule is defined with an index for an element of the resulting
+	 * array, the value of that element is {@link #INVALID_RULE_NAME}.</p>
+	 *
+	 * @return The names of all rules defined in the grammar.
+	 */
 	public String[] getRuleNames() {
 		String[] result = new String[rules.size()];
+		Arrays.fill(result, INVALID_RULE_NAME);
 		for (Rule rule : rules.values()) {
 			result[rule.index] = rule.name;
 		}
@@ -603,39 +670,39 @@ public class Grammar implements AttributeResolver {
 		return result;
 	}
 
-	public List<String> getTokenDisplayNames(IntegerList types) {
-		List<String> names = new ArrayList<String>();
-		for (int t : types.toArray()) names.add(getTokenDisplayName(t));
-		return names;
-	}
-
+	/**
+	 * Gets an array of token names for tokens defined or imported by the
+	 * grammar. The array index is the token type, and the value is the result
+	 * of {@link #getTokenName} for the corresponding token type.
+	 *
+	 * @see #getTokenName
+	 * @return The token names of all tokens defined in the grammar.
+	 */
 	public String[] getTokenNames() {
 		int numTokens = getMaxTokenType();
 		String[] tokenNames = new String[numTokens+1];
-		for (String tokenName : tokenNameToTypeMap.keySet()) {
-			Integer ttype = tokenNameToTypeMap.get(tokenName);
-			if ( tokenName!=null &&
-                 tokenName.startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX) &&
-                 ttype < typeToStringLiteralList.size() )
-            {
-				tokenName = typeToStringLiteralList.get(ttype);
-			}
-			if ( ttype>0 ) tokenNames[ttype] = tokenName;
+		for (int i = 0; i < tokenNames.length; i++) {
+			tokenNames[i] = getTokenName(i);
 		}
+
 		return tokenNames;
 	}
 
+	/**
+	 * Gets an array of display names for tokens defined or imported by the
+	 * grammar. The array index is the token type, and the value is the result
+	 * of {@link #getTokenDisplayName} for the corresponding token type.
+	 *
+	 * @see #getTokenDisplayName
+	 * @return The display names of all tokens defined in the grammar.
+	 */
 	public String[] getTokenDisplayNames() {
 		int numTokens = getMaxTokenType();
 		String[] tokenNames = new String[numTokens+1];
-		for (String t : tokenNameToTypeMap.keySet()) {
-			Integer ttype = tokenNameToTypeMap.get(t);
-			if ( ttype>0 ) tokenNames[ttype] = t;
-		}
-		for (String t : stringLiteralToTypeMap.keySet()) {
-			Integer ttype = stringLiteralToTypeMap.get(t);
-			if ( ttype>0 ) tokenNames[ttype] = t;
+		for (int i = 0; i < tokenNames.length; i++) {
+			tokenNames[i] = getTokenDisplayName(i);
 		}
+
 		return tokenNames;
 	}
 
diff --git a/tool/test/org/antlr/v4/test/TestTokenTypeAssignment.java b/tool/test/org/antlr/v4/test/TestTokenTypeAssignment.java
index 603f0739a..a399a1c75 100644
--- a/tool/test/org/antlr/v4/test/TestTokenTypeAssignment.java
+++ b/tool/test/org/antlr/v4/test/TestTokenTypeAssignment.java
@@ -36,7 +36,7 @@ import org.antlr.v4.tool.LexerGrammar;
 import org.junit.Test;
 
 import java.util.HashSet;
-import java.util.Iterator;
+import java.util.LinkedHashSet;
 import java.util.Set;
 import java.util.StringTokenizer;
 
@@ -110,10 +110,10 @@ public class TestTokenTypeAssignment extends BaseTest {
 		foundLiterals = g.implicitLexer.stringLiteralToTypeMap.keySet().toString();
 		assertEquals("['x']", foundLiterals); // pushed in lexer from parser
 
-		String[] typeToTokenName = g.getTokenNames();
-		Set<String> tokens = new HashSet<String>();
+		String[] typeToTokenName = g.getTokenDisplayNames();
+		Set<String> tokens = new LinkedHashSet<String>();
 		for (String t : typeToTokenName) if ( t!=null ) tokens.add(t);
-		assertEquals("[E, 'x']", tokens.toString());
+		assertEquals("[<INVALID>, 'x', E]", tokens.toString());
 	}
 
 	@Test public void testPredDoesNotHideNameToLiteralMapInLexer() throws Exception {
@@ -170,7 +170,17 @@ public class TestTokenTypeAssignment extends BaseTest {
 	{
 		String[] typeToTokenName = g.getTokenNames();
 		Set<String> tokens = new HashSet<String>();
-		for (String t : typeToTokenName) if ( t!=null ) tokens.add(t);
+		for (int i = 0; i < typeToTokenName.length; i++) {
+			String t = typeToTokenName[i];
+			if ( t!=null ) {
+				if (t.startsWith(Grammar.AUTO_GENERATED_TOKEN_NAME_PREFIX)) {
+					tokens.add(g.getTokenDisplayName(i));
+				}
+				else {
+					tokens.add(t);
+				}
+			}
+		}
 
 		// make sure expected tokens are there
 		StringTokenizer st = new StringTokenizer(allValidTokensStr, ", ");