Implement support for defining custom channels in the channels{} construct in lexer grammars (fixes #309)

This commit is contained in:
Sam Harwell 2014-09-01 13:28:08 -05:00
parent 561383c34c
commit cacd317d02
13 changed files with 358 additions and 5 deletions

View File

@ -57,6 +57,19 @@ public interface Token {
*/
public static final int HIDDEN_CHANNEL = 1;
/**
* This is the minimum constant value which can be assigned to a
* user-defined token channel.
*
* <p>
* The non-negative numbers less than {@link #MIN_USER_CHANNEL_VALUE} are
* assigned to the predefined channels {@link #DEFAULT_CHANNEL} and
* {@link #HIDDEN_CHANNEL}.</p>
*
* @see Token#getChannel()
*/
public static final int MIN_USER_CHANNEL_VALUE = 2;
/**
* Get the text of the token.
*/

View File

@ -848,6 +848,10 @@ public class <lexer.name> extends <superClass; null="Lexer"> {
new PredictionContextCache();
public static final int
<lexer.tokens:{k | <k>=<lexer.tokens.(k)>}; separator=", ", wrap, anchor>;
<if(lexer.channels)>
public static final int
<lexer.channels:{k | <k>=<lexer.channels.(k)>}; separator=", ", wrap, anchor>;
<endif>
<rest(lexer.modes):{m| public static final int <m> = <i>;}; separator="\n">
public static String[] modeNames = {
<lexer.modes:{m| "<m>"}; separator=", ", wrap, anchor>

View File

@ -464,6 +464,11 @@ public class LexerATNFactory extends ParserATNFactory {
return tokenType;
}
int channelValue = g.getChannelValue(name);
if (channelValue >= org.antlr.v4.runtime.Token.MIN_USER_CHANNEL_VALUE) {
return channelValue;
}
List<String> modeNames = new ArrayList<String>(((LexerGrammar)g).modes.keySet());
int mode = modeNames.indexOf(name);
if (mode >= 0) {

View File

@ -48,6 +48,7 @@ public class Lexer extends OutputModelObject {
public String name;
public String grammarFileName;
public Map<String,Integer> tokens;
public Map<String,Integer> channels;
public LexerFile file;
public String[] tokenNames;
public Set<String> ruleNames;
@ -67,6 +68,7 @@ public class Lexer extends OutputModelObject {
grammarFileName = new File(g.fileName).getName();
name = g.getRecognizerName();
tokens = new LinkedHashMap<String,Integer>();
channels = new LinkedHashMap<String,Integer>();
LexerGrammar lg = (LexerGrammar)g;
atn = new SerializedATN(factory, lg.atn);
modes = lg.modes.keySet();
@ -76,6 +78,10 @@ public class Lexer extends OutputModelObject {
if ( ttype>0 ) tokens.put(t, ttype);
}
for (Map.Entry<String, Integer> channel : g.channelNameToValueMap.entrySet()) {
channels.put(channel.getKey(), channel.getValue());
}
tokenNames = g.getTokenDisplayNames();
for (int i = 0; i < tokenNames.length; i++) {
if ( tokenNames[i]==null ) continue;

View File

@ -434,12 +434,13 @@ NESTED_ACTION
// keywords used to specify ANTLR v3 grammars. Keywords may not be used as
// labels for rules or in any other context where they would be ambiguous
// with the keyword vs some other identifier
// OPTIONS and TOKENS must also consume the opening brace that captures
// their option block, as this is teh easiest way to parse it separate
// to an ACTION block, despite it usingthe same {} delimiters.
// OPTIONS, TOKENS, and CHANNELS must also consume the opening brace that captures
// their option block, as this is the easiest way to parse it separate
// to an ACTION block, despite it using the same {} delimiters.
//
OPTIONS : 'options' WSNLCHARS* '{' ;
TOKENS_SPEC : 'tokens' WSNLCHARS* '{' ;
OPTIONS : 'options' WSNLCHARS* '{' ;
TOKENS_SPEC : 'tokens' WSNLCHARS* '{' ;
CHANNELS : 'channels' WSNLCHARS* '{' ;
IMPORT : 'import' ;
FRAGMENT : 'fragment' ;

View File

@ -237,6 +237,9 @@ prequelConstruct
// {tree} parser.
tokensSpec
| // A list of custom channels used by the grammar
channelsSpec
| // A declaration of language target implemented constructs. All such
// action sections start with '@' and are given to the language target's
// StringTemplate group. For instance @parser::header and @lexer::header
@ -301,6 +304,10 @@ v3tokenSpec
SEMI
;
channelsSpec
: CHANNELS^ id (COMMA! id)* RBRACE!
;
// A declaration of a language target specifc section,
// such as @header, @includes and so on. We do not verify these
// sections, they are just passed on to the language target.

View File

@ -125,6 +125,7 @@ public void grammarOption(GrammarAST ID, GrammarAST valueAST) { }
public void ruleOption(GrammarAST ID, GrammarAST valueAST) { }
public void blockOption(GrammarAST ID, GrammarAST valueAST) { }
public void defineToken(GrammarAST ID) { }
public void defineChannel(GrammarAST ID) { }
public void globalNamedAction(GrammarAST scope, GrammarAST ID, ActionAST action) { }
public void importGrammar(GrammarAST label, GrammarAST ID) { }
@ -189,6 +190,12 @@ protected void exitTokensSpec(GrammarAST tree) { }
protected void enterTokenSpec(GrammarAST tree) { }
protected void exitTokenSpec(GrammarAST tree) { }
protected void enterChannelsSpec(GrammarAST tree) { }
protected void exitChannelsSpec(GrammarAST tree) { }
protected void enterChannelSpec(GrammarAST tree) { }
protected void exitChannelSpec(GrammarAST tree) { }
protected void enterAction(GrammarAST tree) { }
protected void exitAction(GrammarAST tree) { }
@ -366,6 +373,7 @@ prequelConstruct
: optionsSpec
| delegateGrammars
| tokensSpec
| channelsSpec
| action
;
@ -450,6 +458,26 @@ tokenSpec
: ID {defineToken($ID);}
;
channelsSpec
@init {
enterChannelsSpec($start);
}
@after {
exitChannelsSpec($start);
}
: ^(CHANNELS channelSpec+)
;
channelSpec
@init {
enterChannelSpec($start);
}
@after {
exitChannelSpec($start);
}
: ID {defineChannel($ID);}
;
action
@init {
enterAction($start);

View File

@ -255,6 +255,21 @@ public class BasicSemanticChecks extends GrammarTreeVisitor {
checkTokenDefinition(ID.token);
}
@Override
protected void enterChannelsSpec(GrammarAST tree) {
if (g.isParser()) {
g.tool.errMgr.grammarError(ErrorType.CHANNELS_BLOCK_IN_PARSER_GRAMMAR, g.fileName, tree.token);
}
else if (g.isCombined()) {
g.tool.errMgr.grammarError(ErrorType.CHANNELS_BLOCK_IN_COMBINED_GRAMMAR, g.fileName, tree.token);
}
}
@Override
public void defineChannel(GrammarAST ID) {
checkChannelDefinition(ID.token);
}
@Override
public void elementOption(GrammarASTWithOptions elem, GrammarAST ID, GrammarAST valueAST) {
String v = null;
@ -394,6 +409,9 @@ public class BasicSemanticChecks extends GrammarTreeVisitor {
}
}
void checkChannelDefinition(Token tokenID) {
}
@Override
protected void enterLexerElement(GrammarAST tree) {
}

View File

@ -37,6 +37,7 @@ import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Pair;
import org.antlr.v4.tool.ErrorType;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.LexerGrammar;
import org.antlr.v4.tool.Rule;
import org.antlr.v4.tool.ast.GrammarAST;
@ -127,6 +128,8 @@ public class SemanticPipeline {
collector.tokenIDRefs, collector.terminals);
}
assignChannelTypes(g, collector.channelDefs);
// CHECK RULE REFS NOW (that we've defined rules in grammar)
symcheck.checkRuleArgs(g, collector.rulerefs);
identifyStartRules(collector);
@ -257,4 +260,38 @@ public class SemanticPipeline {
g.tool.log("semantics", "tokens="+g.tokenNameToTypeMap);
g.tool.log("semantics", "strings="+g.stringLiteralToTypeMap);
}
/**
* Assign constant values to custom channels defined in a grammar.
*
* @param g The grammar.
* @param channelDefs A collection of AST nodes defining individual channels
* within a {@code channels{}} block in the grammar.
*/
void assignChannelTypes(Grammar g, List<GrammarAST> channelDefs) {
Grammar outermost = g.getOutermostGrammar();
for (GrammarAST channel : channelDefs) {
String channelName = channel.getText();
// Channel names can't alias tokens or modes, because constant
// values are also assigned to them and the ->channel(NAME) lexer
// command does not distinguish between the various ways a constant
// can be declared. This method does not verify that channels do not
// alias rules, because rule names are not associated with constant
// values in ANTLR grammar semantics.
if (g.getTokenType(channelName) != Token.INVALID_TYPE) {
g.tool.errMgr.grammarError(ErrorType.CHANNEL_CONFLICTS_WITH_TOKEN, g.fileName, channel.token, channelName);
}
if (outermost instanceof LexerGrammar) {
LexerGrammar lexerGrammar = (LexerGrammar)outermost;
if (lexerGrammar.modes.containsKey(channelName)) {
g.tool.errMgr.grammarError(ErrorType.CHANNEL_CONFLICTS_WITH_MODE, g.fileName, channel.token, channelName);
}
}
outermost.defineChannelName(channel.getText());
}
}
}

View File

@ -65,6 +65,7 @@ public class SymbolCollector extends GrammarTreeVisitor {
public List<GrammarAST> tokenIDRefs = new ArrayList<GrammarAST>();
public Set<String> strings = new HashSet<String>();
public List<GrammarAST> tokensDefs = new ArrayList<GrammarAST>();
public List<GrammarAST> channelDefs = new ArrayList<GrammarAST>();
/** Track action name node in @parser::members {...} or @members {...} */
List<GrammarAST> namedActions = new ArrayList<GrammarAST>();
@ -97,6 +98,11 @@ public class SymbolCollector extends GrammarTreeVisitor {
tokensDefs.add(ID);
}
@Override
public void defineChannel(GrammarAST ID) {
channelDefs.add(ID);
}
@Override
public void discoverRule(RuleAST rule, GrammarAST ID,
List<GrammarAST> modifiers, ActionAST arg,

View File

@ -924,6 +924,30 @@ public enum ErrorType {
* @since 4.2.1
*/
RESERVED_RULE_NAME(159, "cannot declare a rule with reserved name <arg>", ErrorSeverity.ERROR),
/**
* Compiler Error 161.
*
* <p>channel <em>name</em> conflicts with token with same name</p>
*/
CHANNEL_CONFLICTS_WITH_TOKEN(161, "channel <arg> conflicts with token with same name", ErrorSeverity.ERROR),
/**
* Compiler Error 162.
*
* <p>channel <em>name</em> conflicts with mode with same name</p>
*/
CHANNEL_CONFLICTS_WITH_MODE(162, "channel <arg> conflicts with mode with same name", ErrorSeverity.ERROR),
/**
* Compiler Error 163.
*
* <p>custom channels are not supported in parser grammars</p>
*/
CHANNELS_BLOCK_IN_PARSER_GRAMMAR(163, "custom channels are not supported in parser grammars", ErrorSeverity.ERROR),
/**
* Compiler Error 164.
*
* <p>custom channels are not supported in combined grammars</p>
*/
CHANNELS_BLOCK_IN_COMBINED_GRAMMAR(164, "custom channels are not supported in combined grammars", ErrorSeverity.ERROR),
/*
* Backward incompatibility errors

View File

@ -231,6 +231,26 @@ public class Grammar implements AttributeResolver {
*/
public final List<String> typeToTokenList = new ArrayList<String>();
/**
* The maximum channel value which is assigned by this grammar. Values below
* {@link Token#MIN_USER_CHANNEL_VALUE} are assumed to be predefined.
*/
int maxChannelType = Token.MIN_USER_CHANNEL_VALUE - 1;
/**
* Map channel like {@code COMMENTS_CHANNEL} to its constant channel value.
* Only user-defined channels are defined in this map.
*/
public final Map<String, Integer> channelNameToValueMap = new LinkedHashMap<String, Integer>();
/**
* Map a constant channel value to its name. Indexed with raw channel value.
* The predefined channels {@link Token#DEFAULT_CHANNEL} and
* {@link Token#HIDDEN_CHANNEL} are not stored in this list, so the values
* at the corresponding indexes is {@code null}.
*/
public final List<String> channelValueToNameList = new ArrayList<String>();
/** Map a name to an action.
* The code generator will use this to fill holes in the output files.
* I track the AST node for the action in case I need the line number
@ -665,6 +685,26 @@ public class Grammar implements AttributeResolver {
return INVALID_TOKEN_NAME;
}
/**
* Gets the constant channel value for a user-defined channel.
*
* <p>
* This method only returns channel values for user-defined channels. All
* other channels, including the predefined channels
* {@link Token#DEFAULT_CHANNEL} and {@link Token#HIDDEN_CHANNEL} along with
* any channel defined in code (e.g. in a {@code @members{}} block), are
* ignored.</p>
*
* @param channel The channel name.
* @return The channel value, if {@code channel} is the name of a known
* user-defined token channel; otherwise, -1.
*/
public int getChannelValue(String channel) {
Integer I = channelNameToValueMap.get(channel);
int i = (I != null) ? I : -1;
return i;
}
/**
* Gets an array of rule names for rules defined or imported by the
* grammar. The array index is the rule index, and the value is the name of
@ -812,6 +852,12 @@ public class Grammar implements AttributeResolver {
return maxTokenType;
}
/** Return a new unique integer in the channel value space. */
public int getNewChannelNumber() {
maxChannelType++;
return maxChannelType;
}
public void importTokensFromTokensFile() {
String vocab = getOptionString("tokenVocab");
if ( vocab!=null ) {
@ -832,6 +878,9 @@ public class Grammar implements AttributeResolver {
for (String tokenName: importG.stringLiteralToTypeMap.keySet()) {
defineStringLiteral(tokenName, importG.stringLiteralToTypeMap.get(tokenName));
}
for (Map.Entry<String, Integer> channel : importG.channelNameToValueMap.entrySet()) {
defineChannelName(channel.getKey(), channel.getValue());
}
// this.tokenNameToTypeMap.putAll( importG.tokenNameToTypeMap );
// this.stringLiteralToTypeMap.putAll( importG.stringLiteralToTypeMap );
int max = Math.max(this.typeToTokenList.size(), importG.typeToTokenList.size());
@ -840,6 +889,13 @@ public class Grammar implements AttributeResolver {
maxTokenType = Math.max(maxTokenType, ttype);
this.typeToTokenList.set(ttype, importG.typeToTokenList.get(ttype));
}
max = Math.max(this.channelValueToNameList.size(), importG.channelValueToNameList.size());
Utils.setSize(channelValueToNameList, max);
for (int channelValue = 0; channelValue < importG.channelValueToNameList.size(); channelValue++) {
maxChannelType = Math.max(maxChannelType, channelValue);
this.channelValueToNameList.set(channelValue, importG.channelValueToNameList.get(channelValue));
}
}
public int defineTokenName(String name) {
@ -903,6 +959,68 @@ public class Grammar implements AttributeResolver {
}
}
/**
* Define a token channel with a specified name.
*
* <p>
* If a channel with the specified name already exists, the previously
* assigned channel value is returned.</p>
*
* @param name The channel name.
* @return The constant channel value assigned to the channel.
*/
public int defineChannelName(String name) {
Integer prev = channelNameToValueMap.get(name);
if (prev == null) {
return defineChannelName(name, getNewChannelNumber());
}
return prev;
}
/**
* Define a token channel with a specified name.
*
* <p>
* If a channel with the specified name already exists, the previously
* assigned channel value is not altered.</p>
*
* @param name The channel name.
* @return The constant channel value assigned to the channel.
*/
public int defineChannelName(String name, int value) {
Integer prev = channelNameToValueMap.get(name);
if (prev != null) {
return prev;
}
channelNameToValueMap.put(name, value);
setChannelNameForValue(value, name);
maxChannelType = Math.max(maxChannelType, value);
return value;
}
/**
* Sets the channel name associated with a particular channel value.
*
* <p>
* If a name has already been assigned to the channel with constant value
* {@code channelValue}, this method does nothing.</p>
*
* @param channelValue The constant value for the channel.
* @param name The channel name.
*/
public void setChannelNameForValue(int channelValue, String name) {
if (channelValue >= channelValueToNameList.size()) {
Utils.setSize(channelValueToNameList, channelValue + 1);
}
String prevChannel = channelValueToNameList.get(channelValue);
if (prevChannel == null) {
channelValueToNameList.set(channelValue, name);
}
}
// no isolated attr at grammar action level
@Override
public Attribute resolveToAttribute(String x, ActionAST node) {

View File

@ -567,4 +567,90 @@ public class TestToolSyntaxErrors extends BaseTest {
super.testErrors(pair, true);
}
@Test public void testChannelDefinitionInLexer() throws Exception {
String grammar =
"lexer grammar T;\n" +
"\n" +
"channels {\n" +
" WHITESPACE_CHANNEL,\n" +
" COMMENT_CHANNEL\n" +
"}\n" +
"\n" +
"COMMENT: '//' ~[\\n]+ -> channel(COMMENT_CHANNEL);\n" +
"WHITESPACE: [ \\t]+ -> channel(WHITESPACE_CHANNEL);\n";
String expected = "";
String[] pair = { grammar, expected };
super.testErrors(pair, true);
}
@Test public void testChannelDefinitionInParser() throws Exception {
String grammar =
"parser grammar T;\n" +
"\n" +
"channels {\n" +
" WHITESPACE_CHANNEL,\n" +
" COMMENT_CHANNEL\n" +
"}\n" +
"\n" +
"start : EOF;\n";
String expected =
"error(" + ErrorType.CHANNELS_BLOCK_IN_PARSER_GRAMMAR.code + "): T.g4:3:0: custom channels are not supported in parser grammars\n";
String[] pair = { grammar, expected };
super.testErrors(pair, true);
}
@Test public void testChannelDefinitionInCombined() throws Exception {
String grammar =
"grammar T;\n" +
"\n" +
"channels {\n" +
" WHITESPACE_CHANNEL,\n" +
" COMMENT_CHANNEL\n" +
"}\n" +
"\n" +
"start : EOF;\n" +
"\n" +
"COMMENT: '//' ~[\\n]+ -> channel(COMMENT_CHANNEL);\n" +
"WHITESPACE: [ \\t]+ -> channel(WHITESPACE_CHANNEL);\n";
String expected =
"warning(" + ErrorType.UNKNOWN_LEXER_CONSTANT.code + "): T.g4:10:35: rule COMMENT contains a lexer command with an unrecognized constant value; lexer interpreters may produce incorrect output\n" +
"warning(" + ErrorType.UNKNOWN_LEXER_CONSTANT.code + "): T.g4:11:35: rule WHITESPACE contains a lexer command with an unrecognized constant value; lexer interpreters may produce incorrect output\n" +
"error(" + ErrorType.CHANNELS_BLOCK_IN_COMBINED_GRAMMAR.code + "): T.g4:3:0: custom channels are not supported in combined grammars\n";
String[] pair = { grammar, expected };
super.testErrors(pair, true);
}
/**
* This is a regression test for antlr/antlr4#497 now that antlr/antlr4#309
* is resolved.
* https://github.com/antlr/antlr4/issues/497
* https://github.com/antlr/antlr4/issues/309
*/
@Test public void testChannelDefinitions() throws Exception {
String grammar =
"lexer grammar T;\n" +
"\n" +
"channels {\n" +
" WHITESPACE_CHANNEL,\n" +
" COMMENT_CHANNEL\n" +
"}\n" +
"\n" +
"COMMENT: '//' ~[\\n]+ -> channel(COMMENT_CHANNEL);\n" +
"WHITESPACE: [ \\t]+ -> channel(WHITESPACE_CHANNEL);\n" +
"NEWLINE: '\\r'? '\\n' -> channel(NEWLINE_CHANNEL);";
// WHITESPACE_CHANNEL and COMMENT_CHANNEL are defined, but NEWLINE_CHANNEL is not
String expected =
"warning(" + ErrorType.UNKNOWN_LEXER_CONSTANT.code + "): T.g4:10:34: rule NEWLINE contains a lexer command with an unrecognized constant value; lexer interpreters may produce incorrect output\n";
String[] pair = { grammar, expected };
super.testErrors(pair, true);
}
}