From 65b176a36dedc9f8a5012d3a35002493d09f385f Mon Sep 17 00:00:00 2001 From: Sam Harwell Date: Sun, 13 Apr 2014 13:35:03 -0500 Subject: [PATCH] Return predefined values instead of null for Grammar methods getTokenNames, getTokenDisplayNames, and getRuleNames Improve consistency of Grammar methods for names of rules and tokens --- .../v4/tool/templates/codegen/Java/Java.stg | 3 +- .../src/org/antlr/v4/automata/ATNPrinter.java | 2 +- tool/src/org/antlr/v4/codegen/Target.java | 7 +- tool/src/org/antlr/v4/tool/Grammar.java | 177 ++++++++++++------ .../v4/test/TestTokenTypeAssignment.java | 20 +- 5 files changed, 143 insertions(+), 66 deletions(-) diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg index dfba5dc36..6b0e9bc07 100644 --- a/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg +++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg @@ -859,8 +859,7 @@ public class extends { }; public static final String[] tokenNames = { - "\", - }; separator=", ", wrap, anchor> + }; null="\"\\"", separator=", ", wrap, anchor> }; public static final String[] ruleNames = { "}; separator=", ", wrap, anchor> diff --git a/tool/src/org/antlr/v4/automata/ATNPrinter.java b/tool/src/org/antlr/v4/automata/ATNPrinter.java index 9ee0b0b73..b3f0f3159 100644 --- a/tool/src/org/antlr/v4/automata/ATNPrinter.java +++ b/tool/src/org/antlr/v4/automata/ATNPrinter.java @@ -106,7 +106,7 @@ public class ATNPrinter { buf.append("-").append(not?"~":"").append(st.toString()).append("->").append(getStateString(t.target)).append('\n'); } else { - buf.append("-").append(not?"~":"").append(st.label().toString(g.getTokenNames())).append("->").append(getStateString(t.target)).append('\n'); + buf.append("-").append(not?"~":"").append(st.label().toString(g.getTokenDisplayNames())).append("->").append(getStateString(t.target)).append('\n'); } } else if ( t instanceof AtomTransition ) { diff --git a/tool/src/org/antlr/v4/codegen/Target.java b/tool/src/org/antlr/v4/codegen/Target.java index b65aef07f..dbd7a6026 100644 --- a/tool/src/org/antlr/v4/codegen/Target.java +++ b/tool/src/org/antlr/v4/codegen/Target.java @@ -123,11 +123,12 @@ public abstract class Target { * to a token type in the generated code. */ public String getTokenTypeAsTargetLabel(Grammar g, int ttype) { - String name = g.getTokenDisplayName(ttype); - // If name is a literal, return the token type instead - if ( name==null || name.charAt(0)=='\'' ) { + String name = g.getTokenName(ttype); + // If name is not valid, return the token type instead + if ( Grammar.INVALID_TOKEN_NAME.equals(name) ) { return String.valueOf(ttype); } + return name; } diff --git a/tool/src/org/antlr/v4/tool/Grammar.java b/tool/src/org/antlr/v4/tool/Grammar.java index 0cb6f7927..a5f5695d5 100644 --- a/tool/src/org/antlr/v4/tool/Grammar.java +++ b/tool/src/org/antlr/v4/tool/Grammar.java @@ -74,6 +74,25 @@ import java.util.Set; public class Grammar implements AttributeResolver { public static final String GRAMMAR_FROM_STRING_NAME = ""; + /** + * This value is used in the following situations to indicate that a token + * type does not have an associated name which can be directly referenced in + * a grammar. + * + *
    + *
  • This value is the name and display name for the token with type + * {@link Token#INVALID_TYPE}.
  • + *
  • This value is the name for tokens with a type not represented by a + * named token. The display name for these tokens is simply the string + * representation of the token type as an integer.
  • + *
+ */ + public static final String INVALID_TOKEN_NAME = ""; + /** + * This value is used as the name for elements in the array returned by + * {@link #getRuleNames} for indexes not associated with a rule. + */ + public static final String INVALID_RULE_NAME = ""; public static final Set parserOptions = new HashSet(); static { @@ -178,23 +197,30 @@ public class Grammar implements AttributeResolver { */ int maxTokenType = Token.MIN_USER_TOKEN_TYPE -1; - /** Map token like ID (but not literals like "while") to its token type */ - public Map tokenNameToTypeMap = new LinkedHashMap(); - - /** Map token literals like "while" to its token type. It may be that - * WHILE="while"=35, in which case both tokenIDToTypeMap and this - * field will have entries both mapped to 35. + /** + * Map token like {@code ID} (but not literals like {@code 'while'}) to its + * token type. */ - public Map stringLiteralToTypeMap = new LinkedHashMap(); + public final Map tokenNameToTypeMap = new LinkedHashMap(); - /** Reverse index for stringLiteralToTypeMap. Indexed with raw token type. - * 0 is invalid. */ - public List typeToStringLiteralList = new ArrayList(); - - /** Map a token type to its token name. Indexed with raw token type. - * 0 is invalid. + /** + * Map token literals like {@code 'while'} to its token type. It may be that + * {@code WHILE="while"=35}, in which case both {@link #tokenNameToTypeMap} + * and this field will have entries both mapped to 35. */ - public List typeToTokenList = new ArrayList(); + public final Map stringLiteralToTypeMap = new LinkedHashMap(); + + /** + * Reverse index for {@link #stringLiteralToTypeMap}. Indexed with raw token + * type. 0 is invalid. + */ + public final List typeToStringLiteralList = new ArrayList(); + + /** + * Map a token type to its token name. Indexed with raw token type. 0 is + * invalid. + */ + public final List typeToTokenList = new ArrayList(); /** Map a name to an action. * The code generator will use this to fill holes in the output files. @@ -565,37 +591,78 @@ public class Grammar implements AttributeResolver { * char vocabulary, compute an ANTLR-valid (possibly escaped) char literal. */ public String getTokenDisplayName(int ttype) { - String tokenName; // inside any target's char range and is lexer grammar? if ( isLexer() && ttype >= Lexer.MIN_CHAR_VALUE && ttype <= Lexer.MAX_CHAR_VALUE ) { return CharSupport.getANTLRCharLiteralForChar(ttype); } - else if ( ttype==Token.EOF ) { - tokenName = "EOF"; + + if ( ttype==Token.EOF ) { + return "EOF"; } - else { - if ( ttype>0 && ttype= 0 && ttype < typeToTokenList.size() && typeToTokenList.get(ttype) != null) { + return typeToTokenList.get(ttype); + } + + return INVALID_TOKEN_NAME; + } + + /** + * Gets an array of rule names for rules defined or imported by the + * grammar. The array index is the rule index, and the value is the name of + * the rule with the corresponding {@link Rule#index}. + * + *

If no rule is defined with an index for an element of the resulting + * array, the value of that element is {@link #INVALID_RULE_NAME}.

+ * + * @return The names of all rules defined in the grammar. + */ public String[] getRuleNames() { String[] result = new String[rules.size()]; + Arrays.fill(result, INVALID_RULE_NAME); for (Rule rule : rules.values()) { result[rule.index] = rule.name; } @@ -603,39 +670,39 @@ public class Grammar implements AttributeResolver { return result; } - public List getTokenDisplayNames(IntegerList types) { - List names = new ArrayList(); - for (int t : types.toArray()) names.add(getTokenDisplayName(t)); - return names; - } - + /** + * Gets an array of token names for tokens defined or imported by the + * grammar. The array index is the token type, and the value is the result + * of {@link #getTokenName} for the corresponding token type. + * + * @see #getTokenName + * @return The token names of all tokens defined in the grammar. + */ public String[] getTokenNames() { int numTokens = getMaxTokenType(); String[] tokenNames = new String[numTokens+1]; - for (String tokenName : tokenNameToTypeMap.keySet()) { - Integer ttype = tokenNameToTypeMap.get(tokenName); - if ( tokenName!=null && - tokenName.startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX) && - ttype < typeToStringLiteralList.size() ) - { - tokenName = typeToStringLiteralList.get(ttype); - } - if ( ttype>0 ) tokenNames[ttype] = tokenName; + for (int i = 0; i < tokenNames.length; i++) { + tokenNames[i] = getTokenName(i); } + return tokenNames; } + /** + * Gets an array of display names for tokens defined or imported by the + * grammar. The array index is the token type, and the value is the result + * of {@link #getTokenDisplayName} for the corresponding token type. + * + * @see #getTokenDisplayName + * @return The display names of all tokens defined in the grammar. + */ public String[] getTokenDisplayNames() { int numTokens = getMaxTokenType(); String[] tokenNames = new String[numTokens+1]; - for (String t : tokenNameToTypeMap.keySet()) { - Integer ttype = tokenNameToTypeMap.get(t); - if ( ttype>0 ) tokenNames[ttype] = t; - } - for (String t : stringLiteralToTypeMap.keySet()) { - Integer ttype = stringLiteralToTypeMap.get(t); - if ( ttype>0 ) tokenNames[ttype] = t; + for (int i = 0; i < tokenNames.length; i++) { + tokenNames[i] = getTokenDisplayName(i); } + return tokenNames; } diff --git a/tool/test/org/antlr/v4/test/TestTokenTypeAssignment.java b/tool/test/org/antlr/v4/test/TestTokenTypeAssignment.java index 603f0739a..a399a1c75 100644 --- a/tool/test/org/antlr/v4/test/TestTokenTypeAssignment.java +++ b/tool/test/org/antlr/v4/test/TestTokenTypeAssignment.java @@ -36,7 +36,7 @@ import org.antlr.v4.tool.LexerGrammar; import org.junit.Test; import java.util.HashSet; -import java.util.Iterator; +import java.util.LinkedHashSet; import java.util.Set; import java.util.StringTokenizer; @@ -110,10 +110,10 @@ public class TestTokenTypeAssignment extends BaseTest { foundLiterals = g.implicitLexer.stringLiteralToTypeMap.keySet().toString(); assertEquals("['x']", foundLiterals); // pushed in lexer from parser - String[] typeToTokenName = g.getTokenNames(); - Set tokens = new HashSet(); + String[] typeToTokenName = g.getTokenDisplayNames(); + Set tokens = new LinkedHashSet(); for (String t : typeToTokenName) if ( t!=null ) tokens.add(t); - assertEquals("[E, 'x']", tokens.toString()); + assertEquals("[, 'x', E]", tokens.toString()); } @Test public void testPredDoesNotHideNameToLiteralMapInLexer() throws Exception { @@ -170,7 +170,17 @@ public class TestTokenTypeAssignment extends BaseTest { { String[] typeToTokenName = g.getTokenNames(); Set tokens = new HashSet(); - for (String t : typeToTokenName) if ( t!=null ) tokens.add(t); + for (int i = 0; i < typeToTokenName.length; i++) { + String t = typeToTokenName[i]; + if ( t!=null ) { + if (t.startsWith(Grammar.AUTO_GENERATED_TOKEN_NAME_PREFIX)) { + tokens.add(g.getTokenDisplayName(i)); + } + else { + tokens.add(t); + } + } + } // make sure expected tokens are there StringTokenizer st = new StringTokenizer(allValidTokensStr, ", ");