Implement token vocabulary (fixes #711)

This commit is contained in:
Sam Harwell 2014-09-25 12:52:12 -05:00 committed by Sam Harwell
parent daa462206a
commit d476d3e5d9
24 changed files with 729 additions and 83 deletions

View File

@ -323,7 +323,7 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
@NotNull InputMismatchException e)
{
String msg = "mismatched input "+getTokenErrorDisplay(e.getOffendingToken())+
" expecting "+e.getExpectedTokens().toString(recognizer.getTokenNames());
" expecting "+e.getExpectedTokens().toString(recognizer.getVocabulary());
recognizer.notifyErrorListeners(e.getOffendingToken(), msg, e);
}
@ -373,7 +373,7 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
String tokenName = getTokenErrorDisplay(t);
IntervalSet expecting = getExpectedTokens(recognizer);
String msg = "extraneous input "+tokenName+" expecting "+
expecting.toString(recognizer.getTokenNames());
expecting.toString(recognizer.getVocabulary());
recognizer.notifyErrorListeners(t, msg, null);
}
@ -403,7 +403,7 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
Token t = recognizer.getCurrentToken();
IntervalSet expecting = getExpectedTokens(recognizer);
String msg = "missing "+expecting.toString(recognizer.getTokenNames())+
String msg = "missing "+expecting.toString(recognizer.getVocabulary())+
" at "+getTokenErrorDisplay(t);
recognizer.notifyErrorListeners(t, msg, null);
@ -581,7 +581,7 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
int expectedTokenType = expecting.getMinElement(); // get any element
String tokenText;
if ( expectedTokenType== Token.EOF ) tokenText = "<missing EOF>";
else tokenText = "<missing "+recognizer.getTokenNames()[expectedTokenType]+">";
else tokenText = "<missing "+recognizer.getVocabulary().getDisplayName(expectedTokenType)+">";
Token current = currentSymbol;
Token lookback = recognizer.getInputStream().LT(-1);
if ( current.getType() == Token.EOF && lookback!=null ) {

View File

@ -358,6 +358,7 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
* that overrides this to point to their String[] tokenNames.
*/
@Override
@Deprecated
public String[] getTokenNames() {
return null;
}

View File

@ -35,6 +35,7 @@ import org.antlr.v4.runtime.atn.ATNType;
import org.antlr.v4.runtime.atn.LexerATNSimulator;
import org.antlr.v4.runtime.atn.PredictionContextCache;
import org.antlr.v4.runtime.dfa.DFA;
import org.antlr.v4.runtime.misc.NotNull;
import java.util.Collection;
@ -42,15 +43,24 @@ public class LexerInterpreter extends Lexer {
protected final String grammarFileName;
protected final ATN atn;
@Deprecated
protected final String[] tokenNames;
protected final String[] ruleNames;
protected final String[] modeNames;
@NotNull
private final Vocabulary vocabulary;
protected final DFA[] _decisionToDFA;
protected final PredictionContextCache _sharedContextCache =
new PredictionContextCache();
@Deprecated
public LexerInterpreter(String grammarFileName, Collection<String> tokenNames, Collection<String> ruleNames, Collection<String> modeNames, ATN atn, CharStream input) {
this(grammarFileName, VocabularyImpl.fromTokenNames(tokenNames.toArray(new String[tokenNames.size()])), ruleNames, modeNames, atn, input);
}
public LexerInterpreter(String grammarFileName, @NotNull Vocabulary vocabulary, Collection<String> ruleNames, Collection<String> modeNames, ATN atn, CharStream input) {
super(input);
if (atn.grammarType != ATNType.LEXER) {
@ -59,9 +69,14 @@ public class LexerInterpreter extends Lexer {
this.grammarFileName = grammarFileName;
this.atn = atn;
this.tokenNames = tokenNames.toArray(new String[tokenNames.size()]);
this.tokenNames = new String[atn.maxTokenType];
for (int i = 0; i < tokenNames.length; i++) {
tokenNames[i] = vocabulary.getDisplayName(i);
}
this.ruleNames = ruleNames.toArray(new String[ruleNames.size()]);
this.modeNames = modeNames.toArray(new String[modeNames.size()]);
this.vocabulary = vocabulary;
this._decisionToDFA = new DFA[atn.getNumberOfDecisions()];
for (int i = 0; i < _decisionToDFA.length; i++) {
@ -81,6 +96,7 @@ public class LexerInterpreter extends Lexer {
}
@Override
@Deprecated
public String[] getTokenNames() {
return tokenNames;
}
@ -94,4 +110,13 @@ public class LexerInterpreter extends Lexer {
public String[] getModeNames() {
return modeNames;
}
@Override
public Vocabulary getVocabulary() {
if (vocabulary != null) {
return vocabulary;
}
return super.getVocabulary();
}
}

View File

@ -857,7 +857,7 @@ public abstract class Parser extends Recognizer<Token, ParserATNSimulator> {
List<String> s = new ArrayList<String>();
for (int d = 0; d < _interp.decisionToDFA.length; d++) {
DFA dfa = _interp.decisionToDFA[d];
s.add( dfa.toString(getTokenNames()) );
s.add( dfa.toString(getVocabulary()) );
}
return s;
}
@ -872,7 +872,7 @@ public abstract class Parser extends Recognizer<Token, ParserATNSimulator> {
if ( !dfa.states.isEmpty() ) {
if ( seenOne ) System.out.println();
System.out.println("Decision " + dfa.decision + ":");
System.out.print(dfa.toString(getTokenNames()));
System.out.print(dfa.toString(getVocabulary()));
seenOne = true;
}
}

View File

@ -45,6 +45,7 @@ import org.antlr.v4.runtime.atn.RuleTransition;
import org.antlr.v4.runtime.atn.StarLoopEntryState;
import org.antlr.v4.runtime.atn.Transition;
import org.antlr.v4.runtime.dfa.DFA;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Pair;
import java.util.ArrayDeque;
@ -74,19 +75,36 @@ public class ParserInterpreter extends Parser {
protected final PredictionContextCache sharedContextCache =
new PredictionContextCache();
@Deprecated
protected final String[] tokenNames;
protected final String[] ruleNames;
@NotNull
private final Vocabulary vocabulary;
protected final Deque<Pair<ParserRuleContext, Integer>> _parentContextStack = new ArrayDeque<Pair<ParserRuleContext, Integer>>();
/**
* @deprecated Use {@link #ParserInterpreter(String, Vocabulary, Collection, ATN, TokenStream)} instead.
*/
@Deprecated
public ParserInterpreter(String grammarFileName, Collection<String> tokenNames,
Collection<String> ruleNames, ATN atn, TokenStream input) {
this(grammarFileName, VocabularyImpl.fromTokenNames(tokenNames.toArray(new String[tokenNames.size()])), ruleNames, atn, input);
}
public ParserInterpreter(String grammarFileName, @NotNull Vocabulary vocabulary,
Collection<String> ruleNames, ATN atn, TokenStream input)
{
super(input);
this.grammarFileName = grammarFileName;
this.atn = atn;
this.tokenNames = tokenNames.toArray(new String[tokenNames.size()]);
this.tokenNames = new String[atn.maxTokenType];
for (int i = 0; i < tokenNames.length; i++) {
tokenNames[i] = vocabulary.getDisplayName(i);
}
this.ruleNames = ruleNames.toArray(new String[ruleNames.size()]);
this.vocabulary = vocabulary;
this.decisionToDFA = new DFA[atn.getNumberOfDecisions()];
for (int i = 0; i < decisionToDFA.length; i++) {
decisionToDFA[i] = new DFA(atn.getDecisionState(i), i);
@ -116,10 +134,16 @@ public class ParserInterpreter extends Parser {
}
@Override
@Deprecated
public String[] getTokenNames() {
return tokenNames;
}
@Override
public Vocabulary getVocabulary() {
return vocabulary;
}
@Override
public String[] getRuleNames() {
return ruleNames;

View File

@ -64,11 +64,26 @@ public abstract class Recognizer<Symbol, ATNInterpreter extends ATNSimulator> {
/** Used to print out token names like ID during debugging and
* error reporting. The generated parsers implement a method
* that overrides this to point to their String[] tokenNames.
*
* @deprecated Use {@link #getVocabulary()} instead.
*/
@Deprecated
public abstract String[] getTokenNames();
public abstract String[] getRuleNames();
/**
* Get the vocabulary used by the recognizer.
*
* @return A {@link Vocabulary} instance providing information about the
* vocabulary used by the grammar.
*/
@NotNull
@SuppressWarnings("deprecation")
public Vocabulary getVocabulary() {
return VocabularyImpl.fromTokenNames(getTokenNames());
}
/**
* Get a map from token names to token types.
*

View File

@ -0,0 +1,152 @@
/*
* [The "BSD license"]
* Copyright (c) 2014 Terence Parr
* Copyright (c) 2014 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.runtime;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
/**
* This interface provides information about the vocabulary used by a
* recognizer.
*
* @see Recognizer#getVocabulary()
* @author Sam Harwell
*/
public interface Vocabulary {
/**
* Gets the string literal associated with a token type. The string returned
* by this method, when not {@code null}, can be used unaltered in a parser
* grammar to represent this token type.
*
* <p>The following table shows examples of lexer rules and the literal
* names assigned to the corresponding token types.</p>
*
* <table>
* <tr>
* <th>Rule</th>
* <th>Literal Name</th>
* <th>Java String Literal</th>
* </tr>
* <tr>
* <td>{@code THIS : 'this';}</td>
* <td>{@code 'this'}</td>
* <td>{@code "'this'"}</td>
* </tr>
* <tr>
* <td>{@code SQUOTE : '\'';}</td>
* <td>{@code '\''}</td>
* <td>{@code "'\\''"}</td>
* </tr>
* <tr>
* <td>{@code ID : [A-Z]+;}</td>
* <td>n/a</td>
* <td>{@code null}</td>
* </tr>
* </table>
*
* @param tokenType The token type.
*
* @return The string literal associated with the specified token type, or
* {@code null} if no string literal is associated with the type.
*/
@Nullable
String getLiteralName(int tokenType);
/**
* Gets the symbolic name associated with a token type. The string returned
* by this method, when not {@code null}, can be used unaltered in a parser
* grammar to represent this token type.
*
* <p>This method supports token types defined by any of the following
* methods:</p>
*
* <ul>
* <li>Tokens created by lexer rules.</li>
* <li>Tokens defined in a {@code tokens{}} block in a lexer or parser
* grammar.</li>
* <li>The implicitly defined {@code EOF} token, which has the token type
* {@link Token#EOF}.</li>
* </ul>
*
* <p>The following table shows examples of lexer rules and the literal
* names assigned to the corresponding token types.</p>
*
* <table>
* <tr>
* <th>Rule</th>
* <th>Symbolic Name</th>
* </tr>
* <tr>
* <td>{@code THIS : 'this';}</td>
* <td>{@code THIS}</td>
* </tr>
* <tr>
* <td>{@code SQUOTE : '\'';}</td>
* <td>{@code SQUOTE}</td>
* </tr>
* <tr>
* <td>{@code ID : [A-Z]+;}</td>
* <td>{@code ID}</td>
* </tr>
* </table>
*
* @param tokenType The token type.
*
* @return The symbolic name associated with the specified token type, or
* {@code null} if no symbolic name is associated with the type.
*/
@Nullable
String getSymbolicName(int tokenType);
/**
* Gets the display name of a token type.
*
* <p>ANTLR provides a default implementation of this method, but
* applications are free to override the behavior in any manner which makes
* sense for the application. The default implementation returns the first
* result from the following list which produces a non-{@code null}
* result.</p>
*
* <ol>
* <li>The result of {@link #getLiteralName}</li>
* <li>The result of {@link #getSymbolicName}</li>
* <li>The result of {@link Integer#toString}</li>
* </ol>
*
* @param tokenType The token type.
*
* @return The display name of the token type, for use in error reporting or
* other user-visible messages which reference specific token types.
*/
@NotNull
String getDisplayName(int tokenType);
}

View File

@ -0,0 +1,196 @@
/*
* [The "BSD license"]
* Copyright (c) 2014 Terence Parr
* Copyright (c) 2014 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.runtime;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
import java.util.Arrays;
/**
* This class provides a default implementation of the {@link Vocabulary}
* interface.
*
* @author Sam Harwell
*/
public class VocabularyImpl implements Vocabulary {
private static final String[] EMPTY_NAMES = new String[0];
/**
* Gets an empty {@link Vocabulary} instance.
*
* <p>
* No literal or symbol names are assigned to token types, so
* {@link #getDisplayName(int)} returns the numeric value for all tokens
* except {@link Token#EOF}.</p>
*/
@NotNull
public static final VocabularyImpl EMPTY_VOCABULARY = new VocabularyImpl(EMPTY_NAMES, EMPTY_NAMES, EMPTY_NAMES);
@NotNull
private final String[] literalNames;
@NotNull
private final String[] symbolicNames;
@NotNull
private final String[] displayNames;
/**
* Constructs a new instance of {@link VocabularyImpl} from the specified
* literal and symbolic token names.
*
* @param literalNames The literal names assigned to tokens, or {@code null}
* if no literal names are assigned.
* @param symbolicNames The symbolic names assigned to tokens, or
* {@code null} if no symbolic names are assigned.
*
* @see #getLiteralName(int)
* @see #getSymbolicName(int)
*/
public VocabularyImpl(@Nullable String[] literalNames, @Nullable String[] symbolicNames) {
this(literalNames, symbolicNames, null);
}
/**
* Constructs a new instance of {@link VocabularyImpl} from the specified
* literal, symbolic, and display token names.
*
* @param literalNames The literal names assigned to tokens, or {@code null}
* if no literal names are assigned.
* @param symbolicNames The symbolic names assigned to tokens, or
* {@code null} if no symbolic names are assigned.
* @param displayNames The display names assigned to tokens, or {@code null}
* to use the values in {@code literalNames} and {@code symbolicNames} as
* the source of display names, as described in
* {@link #getDisplayName(int)}.
*
* @see #getLiteralName(int)
* @see #getSymbolicName(int)
* @see #getDisplayName(int)
*/
public VocabularyImpl(@Nullable String[] literalNames, @Nullable String[] symbolicNames, @Nullable String[] displayNames) {
this.literalNames = literalNames != null ? literalNames : EMPTY_NAMES;
this.symbolicNames = symbolicNames != null ? symbolicNames : EMPTY_NAMES;
this.displayNames = displayNames != null ? displayNames : EMPTY_NAMES;
}
/**
* Returns a {@link VocabularyImpl} instance from the specified set of token
* names. This method acts as a compatibility layer for the single
* {@code tokenNames} array generated by previous releases of ANTLR.
*
* <p>The resulting vocabulary instance returns {@code null} for
* {@link #getLiteralName(int)} and {@link #getSymbolicName(int)}, and the
* value from {@code tokenNames} for the display names.</p>
*
* @param tokenNames The token names, or {@code null} if no token names are
* available.
* @return A {@link Vocabulary} instance which uses {@code tokenNames} for
* the display names of tokens.
*/
public static Vocabulary fromTokenNames(@Nullable String[] tokenNames) {
if (tokenNames == null || tokenNames.length == 0) {
return EMPTY_VOCABULARY;
}
String[] literalNames = Arrays.copyOf(tokenNames, tokenNames.length);
String[] symbolicNames = Arrays.copyOf(tokenNames, tokenNames.length);
for (int i = 0; i < tokenNames.length; i++) {
String tokenName = tokenNames[i];
if (tokenName == null) {
continue;
}
if (!tokenName.isEmpty()) {
char firstChar = tokenName.charAt(0);
if (firstChar == '\'') {
symbolicNames[i] = null;
continue;
}
else if (Character.isUpperCase(firstChar)) {
literalNames[i] = null;
continue;
}
}
// wasn't a literal or symbolic name
literalNames[i] = null;
symbolicNames[i] = null;
}
return new VocabularyImpl(literalNames, symbolicNames, tokenNames);
}
@Override
@Nullable
public String getLiteralName(int tokenType) {
if (tokenType >= 0 && tokenType < literalNames.length) {
return literalNames[tokenType];
}
return null;
}
@Override
@Nullable
public String getSymbolicName(int tokenType) {
if (tokenType >= 0 && tokenType < symbolicNames.length) {
return symbolicNames[tokenType];
}
if (tokenType == Token.EOF) {
return "EOF";
}
return null;
}
@Override
@NotNull
public String getDisplayName(int tokenType) {
if (tokenType >= 0 && tokenType < displayNames.length) {
String displayName = displayNames[tokenType];
if (displayName != null) {
return displayName;
}
}
String literalName = getLiteralName(tokenType);
if (literalName != null) {
return literalName;
}
String symbolicName = getSymbolicName(tokenType);
if (symbolicName != null) {
return symbolicName;
}
return Integer.toString(tokenType);
}
}

View File

@ -31,7 +31,6 @@
package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.BailErrorStrategy;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.FailedPredicateException;
import org.antlr.v4.runtime.IntStream;
import org.antlr.v4.runtime.NoViableAltException;
@ -40,6 +39,8 @@ import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.RuleContext;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.TokenStream;
import org.antlr.v4.runtime.Vocabulary;
import org.antlr.v4.runtime.VocabularyImpl;
import org.antlr.v4.runtime.dfa.DFA;
import org.antlr.v4.runtime.dfa.DFAState;
import org.antlr.v4.runtime.misc.DoubleKeyMap;
@ -423,7 +424,7 @@ public class ParserATNSimulator extends ATNSimulator {
}
int alt = execATN(dfa, s0, input, index, outerContext);
if ( debug ) System.out.println("DFA after predictATN: "+ dfa.toString(parser.getTokenNames()));
if ( debug ) System.out.println("DFA after predictATN: "+ dfa.toString(parser.getVocabulary()));
return alt;
}
finally {
@ -1852,18 +1853,17 @@ public class ParserATNSimulator extends ATNSimulator {
@NotNull
public String getTokenName(int t) {
if ( t==Token.EOF ) return "EOF";
if ( parser!=null && parser.getTokenNames()!=null ) {
String[] tokensNames = parser.getTokenNames();
if ( t>=tokensNames.length ) {
System.err.println(t+" ttype out of range: "+ Arrays.toString(tokensNames));
System.err.println(((CommonTokenStream)parser.getInputStream()).getTokens());
}
else {
return tokensNames[t]+"<"+t+">";
}
if (t == Token.EOF) {
return "EOF";
}
return String.valueOf(t);
Vocabulary vocabulary = parser != null ? parser.getVocabulary() : VocabularyImpl.EMPTY_VOCABULARY;
String displayName = vocabulary.getDisplayName(t);
if (displayName.equals(Integer.toString(t))) {
return displayName;
}
return displayName + "<" + t + ">";
}
public String getLookaheadName(TokenStream input) {
@ -1966,7 +1966,7 @@ public class ParserATNSimulator extends ATNSimulator {
}
if ( debug ) {
System.out.println("DFA=\n"+dfa.toString(parser!=null?parser.getTokenNames():null));
System.out.println("DFA=\n"+dfa.toString(parser!=null?parser.getVocabulary():VocabularyImpl.EMPTY_VOCABULARY));
}
return to;

View File

@ -29,11 +29,13 @@
*/
package org.antlr.v4.runtime.dfa;
import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.Vocabulary;
import org.antlr.v4.runtime.VocabularyImpl;
import org.antlr.v4.runtime.atn.ATNConfigSet;
import org.antlr.v4.runtime.atn.DecisionState;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
import org.antlr.v4.runtime.Parser;
import java.util.ArrayList;
import java.util.Arrays;
@ -198,14 +200,27 @@ public class DFA {
}
@Override
public String toString() { return toString(null); }
public String toString() { return toString(VocabularyImpl.EMPTY_VOCABULARY); }
/**
* @deprecated Use {@link #toString(Vocabulary)} instead.
*/
@Deprecated
public String toString(@Nullable String[] tokenNames) {
if ( s0==null ) return "";
DFASerializer serializer = new DFASerializer(this,tokenNames);
return serializer.toString();
}
public String toString(@NotNull Vocabulary vocabulary) {
if (s0 == null) {
return "";
}
DFASerializer serializer = new DFASerializer(this, vocabulary);
return serializer.toString();
}
public String toLexerString() {
if ( s0==null ) return "";
DFASerializer serializer = new LexerDFASerializer(this);

View File

@ -30,6 +30,8 @@
package org.antlr.v4.runtime.dfa;
import org.antlr.v4.runtime.Vocabulary;
import org.antlr.v4.runtime.VocabularyImpl;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
@ -39,13 +41,21 @@ import java.util.List;
/** A DFA walker that knows how to dump them to serialized strings. */
public class DFASerializer {
@NotNull
final DFA dfa;
@Nullable
final String[] tokenNames;
private final DFA dfa;
@NotNull
private final Vocabulary vocabulary;
/**
* @deprecated Use {@link #DFASerializer(DFA, Vocabulary)} instead.
*/
@Deprecated
public DFASerializer(@NotNull DFA dfa, @Nullable String[] tokenNames) {
this(dfa, VocabularyImpl.fromTokenNames(tokenNames));
}
public DFASerializer(@NotNull DFA dfa, @NotNull Vocabulary vocabulary) {
this.dfa = dfa;
this.tokenNames = tokenNames;
this.vocabulary = vocabulary;
}
@Override
@ -73,11 +83,7 @@ public class DFASerializer {
}
protected String getEdgeLabel(int i) {
String label;
if ( i==0 ) return "EOF";
if ( tokenNames!=null ) label = tokenNames[i-1];
else label = String.valueOf(i-1);
return label;
return vocabulary.getDisplayName(i - 1);
}
@NotNull

View File

@ -30,11 +30,12 @@
package org.antlr.v4.runtime.dfa;
import org.antlr.v4.runtime.VocabularyImpl;
import org.antlr.v4.runtime.misc.NotNull;
public class LexerDFASerializer extends DFASerializer {
public LexerDFASerializer(@NotNull DFA dfa) {
super(dfa, null);
super(dfa, VocabularyImpl.EMPTY_VOCABULARY);
}
@Override

View File

@ -31,6 +31,8 @@ package org.antlr.v4.runtime.misc;
import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.Vocabulary;
import org.antlr.v4.runtime.VocabularyImpl;
import java.util.ArrayList;
import java.util.HashSet;
@ -544,7 +546,15 @@ public class IntervalSet implements IntSet {
return buf.toString();
}
/**
* @deprecated Use {@link #toString(Vocabulary)} instead.
*/
@Deprecated
public String toString(String[] tokenNames) {
return toString(VocabularyImpl.fromTokenNames(tokenNames));
}
public String toString(@NotNull Vocabulary vocabulary) {
StringBuilder buf = new StringBuilder();
if ( this.intervals==null || this.intervals.isEmpty() ) {
return "{}";
@ -558,12 +568,12 @@ public class IntervalSet implements IntSet {
int a = I.a;
int b = I.b;
if ( a==b ) {
buf.append(elementName(tokenNames, a));
buf.append(elementName(vocabulary, a));
}
else {
for (int i=a; i<=b; i++) {
if ( i>a ) buf.append(", ");
buf.append(elementName(tokenNames, i));
buf.append(elementName(vocabulary, i));
}
}
if ( iter.hasNext() ) {
@ -576,12 +586,26 @@ public class IntervalSet implements IntSet {
return buf.toString();
}
protected String elementName(String[] tokenNames, int a) {
if ( a==Token.EOF ) return "<EOF>";
else if ( a==Token.EPSILON ) return "<EPSILON>";
else return tokenNames[a];
/**
* @deprecated Use {@link #elementName(Vocabulary, int)} instead.
*/
@Deprecated
protected String elementName(String[] tokenNames, int a) {
return elementName(VocabularyImpl.fromTokenNames(tokenNames), a);
}
}
@NotNull
protected String elementName(@NotNull Vocabulary vocabulary, int a) {
if (a == Token.EOF) {
return "<EOF>";
}
else if (a == Token.EPSILON) {
return "<EPSILON>";
}
else {
return vocabulary.getDisplayName(a);
}
}
@Override
public int size() {

View File

@ -220,7 +220,7 @@ public class ParseTreePatternMatcher {
CommonTokenStream tokens = new CommonTokenStream(tokenSrc);
ParserInterpreter parserInterp = new ParserInterpreter(parser.getGrammarFileName(),
Arrays.asList(parser.getTokenNames()),
parser.getVocabulary(),
Arrays.asList(parser.getRuleNames()),
parser.getATNWithBypassAlts(),
tokens);

View File

@ -238,20 +238,16 @@ public class <parser.name> extends <superClass; null="Parser"> {
public static final int
<parser.tokens:{k | <k>=<parser.tokens.(k)>}; separator=", ", wrap, anchor>;
<endif>
public static final String[] tokenNames = {
<parser.tokenNames:{t | <t>}; null="\"\<INVALID>\"", separator=", ", wrap, anchor>
};
public static final int
<parser.rules:{r | RULE_<r.name> = <r.index>}; separator=", ", wrap, anchor>;
public static final String[] ruleNames = {
<parser.ruleNames:{r | "<r>"}; separator=", ", wrap, anchor>
};
@Override
public String getGrammarFileName() { return "<parser.grammarFileName; format="java-escape">"; }
<vocabulary(parser.literalNames, parser.symbolicNames)>
@Override
public String[] getTokenNames() { return tokenNames; }
public String getGrammarFileName() { return "<parser.grammarFileName; format="java-escape">"; }
@Override
public String[] getRuleNames() { return ruleNames; }
@ -281,6 +277,47 @@ case <f.ruleIndex>: return <f.name>_sempred((<f.ctxType>)_localctx, predIndex);}
}
>>
vocabulary(literalNames, symbolicNames) ::= <<
private static final String[] _LITERAL_NAMES = {
<literalNames:{t | <t>}; null="null", separator=", ", wrap, anchor>
};
private static final String[] _SYMBOLIC_NAMES = {
<symbolicNames:{t | <t>}; null="null", separator=", ", wrap, anchor>
};
public static final Vocabulary VOCABULARY = new VocabularyImpl(_LITERAL_NAMES, _SYMBOLIC_NAMES);
/**
* @deprecated Use {@link #VOCABULARY} instead.
*/
@Deprecated
public static final String[] tokenNames;
static {
tokenNames = new String[_SYMBOLIC_NAMES.length];
for (int i = 0; i \< tokenNames.length; i++) {
tokenNames[i] = VOCABULARY.getLiteralName(i);
if (tokenNames[i] == null) {
tokenNames[i] = VOCABULARY.getSymbolicName(i);
}
if (tokenNames[i] == null) {
tokenNames[i] = "\<INVALID>";
}
}
}
@Override
@Deprecated
public String[] getTokenNames() {
return tokenNames;
}
@Override
@NotNull
public Vocabulary getVocabulary() {
return VOCABULARY;
}
>>
dumpActions(recog, argFuncs, actionFuncs, sempredFuncs) ::= <<
<if(actionFuncs)>
@Override
@ -857,13 +894,12 @@ public class <lexer.name> extends <superClass; null="Lexer"> {
<lexer.modes:{m| "<m>"}; separator=", ", wrap, anchor>
};
public static final String[] tokenNames = {
<lexer.tokenNames:{t | <t>}; null="\"\<INVALID>\"", separator=", ", wrap, anchor>
};
public static final String[] ruleNames = {
<lexer.ruleNames:{r | "<r>"}; separator=", ", wrap, anchor>
};
<vocabulary(lexer.literalNames, lexer.symbolicNames)>
<namedActions.members>
public <lexer.name>(CharStream input) {
@ -874,9 +910,6 @@ public class <lexer.name> extends <superClass; null="Lexer"> {
@Override
public String getGrammarFileName() { return "<lexer.grammarFileName>"; }
@Override
public String[] getTokenNames() { return tokenNames; }
@Override
public String[] getRuleNames() { return ruleNames; }

View File

@ -106,7 +106,7 @@ public class ATNPrinter {
buf.append("-").append(not?"~":"").append(st.toString()).append("->").append(getStateString(t.target)).append('\n');
}
else {
buf.append("-").append(not?"~":"").append(st.label().toString(g.getTokenDisplayNames())).append("->").append(getStateString(t.target)).append('\n');
buf.append("-").append(not?"~":"").append(st.label().toString(g.getVocabulary())).append("->").append(getStateString(t.target)).append('\n');
}
}
else if ( t instanceof AtomTransition ) {

View File

@ -37,6 +37,7 @@ import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.Rule;
import java.io.File;
import java.util.Arrays;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.Map;
@ -47,7 +48,8 @@ public abstract class Recognizer extends OutputModelObject {
public String grammarName;
public String grammarFileName;
public Map<String,Integer> tokens;
public String[] tokenNames;
public String[] literalNames;
public String[] symbolicNames;
public Set<String> ruleNames;
public Collection<Rule> rules;
@ModelElement public ActionChunk superClass;
@ -71,23 +73,6 @@ public abstract class Recognizer extends OutputModelObject {
}
}
tokenNames = g.getTokenDisplayNames();
for (int i = 0; i < tokenNames.length; i++) {
if ( tokenNames[i]==null ) continue;
CodeGenerator gen = factory.getGenerator();
if ( tokenNames[i].charAt(0)=='\'' ) {
boolean addQuotes = false;
tokenNames[i] =
gen.getTarget().getTargetStringLiteralFromANTLRStringLiteral(gen,
tokenNames[i],
addQuotes);
tokenNames[i] = "\"'"+tokenNames[i]+"'\"";
}
else {
tokenNames[i] = gen.getTarget().getTargetStringLiteralFromString(tokenNames[i], true);
}
}
ruleNames = g.rules.keySet();
rules = g.rules.values();
atn = new SerializedATN(factory, g.atn);
@ -97,5 +82,44 @@ public abstract class Recognizer extends OutputModelObject {
else {
superClass = null;
}
CodeGenerator gen = factory.getGenerator();
literalNames = translateTokenStringsToTarget(g.getTokenLiteralNames(), gen);
symbolicNames = translateTokenStringsToTarget(g.getTokenSymbolicNames(), gen);
}
protected static String[] translateTokenStringsToTarget(String[] tokenStrings, CodeGenerator gen) {
String[] result = tokenStrings.clone();
for (int i = 0; i < tokenStrings.length; i++) {
result[i] = translateTokenStringToTarget(tokenStrings[i], gen);
}
int lastTrueEntry = result.length - 1;
while (lastTrueEntry >= 0 && result[lastTrueEntry] == null) {
lastTrueEntry --;
}
if (lastTrueEntry < result.length - 1) {
result = Arrays.copyOf(result, lastTrueEntry + 1);
}
return result;
}
protected static String translateTokenStringToTarget(String tokenName, CodeGenerator gen) {
if (tokenName == null) {
return null;
}
if (tokenName.charAt(0) == '\'') {
boolean addQuotes = false;
String targetString =
gen.getTarget().getTargetStringLiteralFromANTLRStringLiteral(gen, tokenName, addQuotes);
return "\"'" + targetString + "'\"";
}
else {
return gen.getTarget().getTargetStringLiteralFromString(tokenName, true);
}
}
}

View File

@ -189,7 +189,10 @@ public class SemanticPipeline {
for (String lit : conflictingLiterals) {
// Remove literal if repeated across rules so it's not
// found by parser grammar.
G.stringLiteralToTypeMap.remove(lit);
Integer value = G.stringLiteralToTypeMap.remove(lit);
if (value != null && value > 0 && value < G.typeToStringLiteralList.size() && lit.equals(G.typeToStringLiteralList.get(value))) {
G.typeToStringLiteralList.set(value, null);
}
}
}

View File

@ -292,7 +292,7 @@ public class DOTGenerator {
SetTransition set = (SetTransition)edge;
String label = set.label().toString();
if ( isLexer ) label = set.label().toString(true);
else if ( grammar!=null ) label = set.label().toString(grammar.getTokenDisplayNames());
else if ( grammar!=null ) label = set.label().toString(grammar.getVocabulary());
if ( edge instanceof NotSetTransition ) label = "~"+label;
edgeST.add("label", getEdgeLabel(label));
}
@ -301,7 +301,7 @@ public class DOTGenerator {
RangeTransition range = (RangeTransition)edge;
String label = range.label().toString();
if ( isLexer ) label = range.toString();
else if ( grammar!=null ) label = range.label().toString(grammar.getTokenDisplayNames());
else if ( grammar!=null ) label = range.label().toString(grammar.getVocabulary());
edgeST.add("label", getEdgeLabel(label));
}
else {

View File

@ -45,6 +45,8 @@ import org.antlr.v4.runtime.LexerInterpreter;
import org.antlr.v4.runtime.ParserInterpreter;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.TokenStream;
import org.antlr.v4.runtime.Vocabulary;
import org.antlr.v4.runtime.VocabularyImpl;
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNDeserializer;
import org.antlr.v4.runtime.atn.ATNSerializer;
@ -767,6 +769,53 @@ public class Grammar implements AttributeResolver {
return tokenNames;
}
/**
* Gets the literal names assigned to tokens in the grammar.
*/
@NotNull
public String[] getTokenLiteralNames() {
int numTokens = getMaxTokenType();
String[] literalNames = new String[numTokens+1];
for (int i = 0; i < Math.min(literalNames.length, typeToStringLiteralList.size()); i++) {
literalNames[i] = typeToStringLiteralList.get(i);
}
for (Map.Entry<String, Integer> entry : stringLiteralToTypeMap.entrySet()) {
if (entry.getValue() >= 0 && entry.getValue() < literalNames.length && literalNames[entry.getValue()] == null) {
literalNames[entry.getValue()] = entry.getKey();
}
}
return literalNames;
}
/**
* Gets the symbolic names assigned to tokens in the grammar.
*/
@NotNull
public String[] getTokenSymbolicNames() {
int numTokens = getMaxTokenType();
String[] symbolicNames = new String[numTokens+1];
for (int i = 0; i < Math.min(symbolicNames.length, typeToTokenList.size()); i++) {
if (typeToTokenList.get(i) == null || typeToTokenList.get(i).startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX)) {
continue;
}
symbolicNames[i] = typeToTokenList.get(i);
}
return symbolicNames;
}
/**
* Gets a {@link Vocabulary} instance describing the vocabulary used by the
* grammar.
*/
@NotNull
public Vocabulary getVocabulary() {
return new VocabularyImpl(getTokenLiteralNames(), getTokenSymbolicNames());
}
/** Given an arbitrarily complex SemanticContext, walk the "tree" and get display string.
* Pull predicates from grammar text.
*/
@ -1251,7 +1300,7 @@ public class Grammar implements AttributeResolver {
char[] serializedAtn = ATNSerializer.getSerializedAsChars(atn);
ATN deserialized = new ATNDeserializer().deserialize(serializedAtn);
return new LexerInterpreter(fileName, Arrays.asList(getTokenDisplayNames()), Arrays.asList(getRuleNames()), ((LexerGrammar)this).modes.keySet(), deserialized, input);
return new LexerInterpreter(fileName, getVocabulary(), Arrays.asList(getRuleNames()), ((LexerGrammar)this).modes.keySet(), deserialized, input);
}
public ParserInterpreter createParserInterpreter(TokenStream tokenStream) {
@ -1261,6 +1310,6 @@ public class Grammar implements AttributeResolver {
char[] serializedAtn = ATNSerializer.getSerializedAsChars(atn);
ATN deserialized = new ATNDeserializer().deserialize(serializedAtn);
return new ParserInterpreter(fileName, Arrays.asList(getTokenDisplayNames()), Arrays.asList(getRuleNames()), deserialized, tokenStream);
return new ParserInterpreter(fileName, getVocabulary(), Arrays.asList(getRuleNames()), deserialized, tokenStream);
}
}

View File

@ -73,6 +73,7 @@ public class ParserInterpreterForTesting {
}
@Override
@Deprecated
public String[] getTokenNames() {
return g.getTokenNames();
}

View File

@ -525,7 +525,7 @@ public class TestATNParserPrediction extends BaseTest {
nvae.printStackTrace(System.err);
}
DFA dfa = interp.parser.decisionToDFA[decision];
assertEquals(dfaString[i], dfa.toString(g.getTokenDisplayNames()));
assertEquals(dfaString[i], dfa.toString(g.getVocabulary()));
}
}
}

View File

@ -72,8 +72,6 @@ import org.junit.Test;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.lang.ref.Reference;
import java.lang.ref.SoftReference;
import java.lang.ref.WeakReference;
@ -1237,7 +1235,7 @@ public class TestPerformance extends BaseTest {
if (USE_PARSER_INTERPRETER) {
Parser referenceParser = parserCtor.newInstance(tokens);
parser = new ParserInterpreter(referenceParser.getGrammarFileName(), Arrays.asList(referenceParser.getTokenNames()), Arrays.asList(referenceParser.getRuleNames()), referenceParser.getATN(), tokens);
parser = new ParserInterpreter(referenceParser.getGrammarFileName(), referenceParser.getVocabulary(), Arrays.asList(referenceParser.getRuleNames()), referenceParser.getATN(), tokens);
}
else {
parser = parserCtor.newInstance(tokens);
@ -1318,7 +1316,7 @@ public class TestPerformance extends BaseTest {
if (USE_PARSER_INTERPRETER) {
Parser referenceParser = parserCtor.newInstance(tokens);
parser = new ParserInterpreter(referenceParser.getGrammarFileName(), Arrays.asList(referenceParser.getTokenNames()), Arrays.asList(referenceParser.getRuleNames()), referenceParser.getATN(), tokens);
parser = new ParserInterpreter(referenceParser.getGrammarFileName(), referenceParser.getVocabulary(), Arrays.asList(referenceParser.getRuleNames()), referenceParser.getATN(), tokens);
}
else {
parser = parserCtor.newInstance(tokens);

View File

@ -0,0 +1,79 @@
/*
* [The "BSD license"]
* Copyright (c) 2014 Terence Parr
* Copyright (c) 2014 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.test;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.Vocabulary;
import org.antlr.v4.runtime.VocabularyImpl;
import org.junit.Assert;
import org.junit.Test;
/**
*
* @author Sam Harwell
*/
public class TestVocabulary extends BaseTest {
@Test
public void testEmptyVocabulary() {
Assert.assertNotNull(VocabularyImpl.EMPTY_VOCABULARY);
Assert.assertEquals("EOF", VocabularyImpl.EMPTY_VOCABULARY.getSymbolicName(Token.EOF));
Assert.assertEquals("0", VocabularyImpl.EMPTY_VOCABULARY.getDisplayName(Token.INVALID_TYPE));
}
@Test
public void testVocabularyFromTokenNames() {
String[] tokenNames = {
"<INVALID>",
"TOKEN_REF", "RULE_REF", "'//'", "'/'", "'*'", "'!'", "ID", "STRING"
};
Vocabulary vocabulary = VocabularyImpl.fromTokenNames(tokenNames);
Assert.assertNotNull(vocabulary);
Assert.assertEquals("EOF", vocabulary.getSymbolicName(Token.EOF));
for (int i = 0; i < tokenNames.length; i++) {
Assert.assertEquals(tokenNames[i], vocabulary.getDisplayName(i));
if (tokenNames[i].startsWith("'")) {
Assert.assertEquals(tokenNames[i], vocabulary.getLiteralName(i));
Assert.assertNull(vocabulary.getSymbolicName(i));
}
else if (Character.isUpperCase(tokenNames[i].charAt(0))) {
Assert.assertNull(vocabulary.getLiteralName(i));
Assert.assertEquals(tokenNames[i], vocabulary.getSymbolicName(i));
}
else {
Assert.assertNull(vocabulary.getLiteralName(i));
Assert.assertNull(vocabulary.getSymbolicName(i));
}
}
}
}