Added TokenFactory, CommonTokenFactory. Update the parser in the lexer with methods to set the factory. Alter the default error strategy and the lexer to use the factory. The parser's set token factory method updates the token source, usually the lexer, and the error handling strategy. I had to add the set token factory method to token source as well to make all of this work.

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9789]
This commit is contained in:
parrt 2012-01-03 10:58:01 -08:00
parent 3aeeb2b277
commit 5c3c8d6e7a
11 changed files with 185 additions and 69 deletions

View File

@ -31,10 +31,8 @@ import org.antlr.v4.runtime.misc.OrderedHashSet;
* TODO: what to do about lexers
*/
public interface ANTLRErrorStrategy {
/** Report any kind of RecognitionException. */
void reportError(@NotNull Parser recognizer,
@Nullable RecognitionException e)
throws RecognitionException;
/** To create missing tokens, we need a factory */
public void setTokenFactory(TokenFactory<?> factory);
/** When matching elements within alternative, use this method
* to recover. The default implementation uses single token
@ -113,6 +111,11 @@ public interface ANTLRErrorStrategy {
*/
void endErrorCondition(@NotNull Parser recognizer);
/** Report any kind of RecognitionException. */
void reportError(@NotNull Parser recognizer,
@Nullable RecognitionException e)
throws RecognitionException;
/** Called when the parser detects a true ambiguity: an input sequence can be matched
* literally by two or more pass through the grammar. ANTLR resolves the ambiguity in
* favor of the alternative appearing first in the grammar. The start and stop index are
@ -124,17 +127,6 @@ public interface ANTLRErrorStrategy {
DFA dfa, int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
@NotNull OrderedHashSet<ATNConfig> configs);
/** Called by the parser when it detects an input sequence that can be matched by two paths
* through the grammar. The difference between this and the reportAmbiguity method lies in
* the difference between Strong LL parsing and LL parsing. If we are not parsing with context,
* we can't be sure if a conflict is an ambiguity or simply a weakness in the Strong LL parsing
* strategy. If we are parsing with full context, this method is never called.
*/
// void reportConflict(@NotNull BaseRecognizer recognizer,
// int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
// @NotNull OrderedHashSet<ATNConfig> configs);
void reportAttemptingFullContext(@NotNull Parser recognizer,
@NotNull DFA dfa,
int startIndex, int stopIndex,

View File

@ -0,0 +1,55 @@
/*
[The "BSD license"]
Copyright (c) 2012 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.runtime;
public class CommonTokenFactory implements TokenFactory<CommonToken> {
public static final TokenFactory<CommonToken> DEFAULT = new CommonTokenFactory();
@Override
public CommonToken create(TokenSource source, int type, String text,
int channel, int start, int stop,
int line, int charPositionInLine)
{
CommonToken t = new CommonToken(source, type, channel, start, stop);
t.setLine(line);
t.setCharPositionInLine(charPositionInLine);
if ( text!=null ) {
t.setText(text);
t.setStartIndex(-1);
t.setStopIndex(-1);
}
return t;
}
@Override
public CommonToken create(int type, String text) {
return new CommonToken(type, text);
}
}

View File

@ -39,6 +39,9 @@ import org.antlr.v4.runtime.misc.OrderedHashSet;
* and tree parsers.
*/
public class DefaultErrorStrategy implements ANTLRErrorStrategy {
/** How to create token objects */
protected TokenFactory<?> _factory = CommonTokenFactory.DEFAULT;
/** This is true after we see an error and before having successfully
* matched a token. Prevents generation of more than one error message
* per error.
@ -55,6 +58,11 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
protected IntervalSet lastErrorStates;
@Override
public void setTokenFactory(TokenFactory<?> factory) {
this._factory = factory;
}
@Override
public void beginErrorCondition(Parser recognizer) {
errorRecoveryMode = true;
@ -354,26 +362,20 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
*/
protected Token getMissingSymbol(Parser recognizer) {
Token currentSymbol = recognizer.getCurrentToken();
if (!(currentSymbol instanceof Token)) {
throw new UnsupportedOperationException("This error strategy only supports Token symbols.");
}
IntervalSet expecting = getExpectedTokens(recognizer);
int expectedTokenType = expecting.getMinElement(); // get any element
String tokenText;
if ( expectedTokenType== Token.EOF ) tokenText = "<missing EOF>";
else tokenText = "<missing "+recognizer.getTokenNames()[expectedTokenType]+">";
CommonToken t = new CommonToken(expectedTokenType, tokenText);
Token current = (Token)currentSymbol;
Token current = currentSymbol;
if ( current.getType() == Token.EOF ) {
current = ((TokenStream)recognizer.getInputStream()).LT(-1);
current = recognizer.getInputStream().LT(-1);
}
t.line = current.getLine();
t.charPositionInLine = current.getCharPositionInLine();
t.channel = Token.DEFAULT_CHANNEL;
t.source = current.getTokenSource();
t.index = -1; // indicate we conjured this up because it has no index
return (Token)t;
return
_factory.create(current.getTokenSource(), expectedTokenType, tokenText,
Token.DEFAULT_CHANNEL,
-1, -1,
current.getLine(), current.getCharPositionInLine());
}
public IntervalSet getExpectedTokens(Parser recognizer) {

View File

@ -50,7 +50,10 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
public static final int MIN_CHAR_VALUE = '\u0000';
public static final int MAX_CHAR_VALUE = '\uFFFE';
public CharStream input;
public CharStream _input;
/** How to create token objects */
protected TokenFactory<?> _factory = CommonTokenFactory.DEFAULT;
/** The goal of all lexer rules/methods is to create a token object.
* This is an instance variable as multiple rules may collaborate to
@ -94,13 +97,13 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
public String text;
public Lexer(CharStream input) {
this.input = input;
this._input = input;
}
public void reset() {
// wack Lexer state variables
if ( input!=null ) {
input.seek(0); // rewind the input
if ( _input !=null ) {
_input.seek(0); // rewind the input
}
token = null;
type = Token.INVALID_TYPE;
@ -124,13 +127,13 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
*/
@Override
public Token nextToken() {
if ( hitEOF ) return emitEOF();
if ( hitEOF ) return anEOF();
outer:
while (true) {
token = null;
channel = Token.DEFAULT_CHANNEL;
tokenStartCharIndex = input.index();
tokenStartCharIndex = _input.index();
tokenStartCharPositionInLine = getInterpreter().getCharPositionInLine();
tokenStartLine = getInterpreter().getLine();
text = null;
@ -141,14 +144,14 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
// " at index "+input.index());
int ttype;
try {
ttype = getInterpreter().match(input, mode);
ttype = getInterpreter().match(_input, mode);
}
catch (LexerNoViableAltException e) {
notifyListeners(e); // report error
recover(e);
ttype = SKIP;
}
if ( input.LA(1)==CharStream.EOF ) {
if ( _input.LA(1)==CharStream.EOF ) {
hitEOF = true;
}
if ( type == Token.INVALID_TYPE ) type = ttype;
@ -195,22 +198,27 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
return mode;
}
@Override
public void setTokenFactory(TokenFactory<?> factory) {
this._factory = factory;
}
/** Set the char stream and reset the lexer */
@Override
public void setInputStream(IntStream input) {
this.input = null;
this._input = null;
reset();
this.input = (CharStream)input;
this._input = (CharStream)input;
}
@Override
public String getSourceName() {
return input.getSourceName();
return _input.getSourceName();
}
@Override
public CharStream getInputStream() {
return input;
return _input;
}
/** Currently does not support multiple emits per nextToken invocation
@ -228,35 +236,25 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
* outermost lexical rule. The token object should point into the
* char buffer start..stop. If there is a text override in 'text',
* use that to set the token's text. Override this method to emit
* custom Token objects.
*
* If you are building trees, then you should also override
* Parser or TreeParser.getMissingSymbol().
* custom Token objects or provide a new factory.
*/
public Token emit() {
WritableToken t = new CommonToken(this, type,
channel, tokenStartCharIndex,
getCharIndex()-1);
t.setLine(tokenStartLine);
if ( text!=null ) t.setText(text);
t.setCharPositionInLine(tokenStartCharPositionInLine);
Token t = _factory.create(this, type, text, channel, tokenStartCharIndex, getCharIndex()-1,
tokenStartLine, tokenStartCharPositionInLine);
emit(t);
return t;
}
public Token emitEOF() {
WritableToken eof = new CommonToken(this,Token.EOF,
Token.DEFAULT_CHANNEL,
input.index(),input.index()-1);
eof.setLine(getLine());
public Token anEOF() {
int cpos = getCharPositionInLine();
// The character position for EOF is one beyond the position of
// the previous token's last character
int cpos = getCharPositionInLine();
if ( token!=null ) {
int n = token.getStopIndex() - token.getStartIndex() + 1;
cpos = token.getCharPositionInLine()+n;
}
eof.setCharPositionInLine(cpos);
Token eof = _factory.create(this, Token.EOF, null, channel, _input.index(), _input.index()-1,
getLine(), cpos);
return eof;
}
@ -272,7 +270,7 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
/** What is the index of the current character of lookahead? */
public int getCharIndex() {
return input.index();
return _input.index();
}
/** Return the text matched so far for the current token or any
@ -282,7 +280,7 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
if ( text!=null ) {
return text;
}
return getInterpreter().getText(input);
return getInterpreter().getText(_input);
// return ((CharStream)input).substring(tokenStartCharIndex,getCharIndex()-1);
}
@ -318,12 +316,12 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
}
public void recover(LexerNoViableAltException e) {
getInterpreter().consume(input); // skip a char and try again
getInterpreter().consume(_input); // skip a char and try again
}
public void notifyListeners(LexerNoViableAltException e) {
String msg = "token recognition error at: '"+
input.substring(tokenStartCharIndex,input.index())+"'";
_input.substring(tokenStartCharIndex, _input.index())+"'";
ANTLRErrorListener<Integer>[] listeners = getListeners();
if ( listeners.length == 0 ) {
System.err.println("line "+tokenStartLine+":"+
@ -364,6 +362,6 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
//System.out.println("consuming char "+(char)input.LA(1)+" during recovery");
//re.printStackTrace();
// TODO: Do we lose character or line position information?
input.consume();
_input.consume();
}
}

View File

@ -159,6 +159,13 @@ public abstract class Parser extends Recognizer<Token, v2ParserATNSimulator<Toke
return syntaxErrors;
}
/** Tell our token source and error strategy about a new way to create tokens */
@Override
public void setTokenFactory(TokenFactory<?> factory) {
_input.getTokenSource().setTokenFactory(factory);
_errHandler.setTokenFactory(factory);
}
@Override
public TokenStream getInputStream() { return getTokenStream(); }

View File

@ -134,4 +134,6 @@ public abstract class Recognizer<Symbol, ATNInterpreter extends ATNSimulator> {
public abstract IntStream getInputStream();
public abstract void setInputStream(IntStream input);
public abstract void setTokenFactory(TokenFactory<?> input);
}

View File

@ -35,7 +35,7 @@ package org.antlr.v4.runtime;
*/
public interface Token {
public static final int INVALID_TYPE = 0;
public static final Token INVALID_TOKEN = new CommonToken(INVALID_TYPE);
// public static final Token INVALID_TOKEN = new CommonToken(INVALID_TYPE);
public static final int MIN_TOKEN_TYPE = 1;
/** During lookahead operations, this "token" signifies we hit rule end ATN state

View File

@ -0,0 +1,47 @@
/*
[The "BSD license"]
Copyright (c) 2012 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.runtime;
/** The default mechanism for creating tokens. It's used by default in Lexer and
* the error handling strategy (to create missing tokens). Notifying the parser
* of a new factory means that it notifies it's token source and error strategy.
*/
public interface TokenFactory<Symbol extends Token> {
/** This is the method used to create tokens in the lexer and in the
* error handling strategy. If text!=null, than the start and stop positions
* are wiped to -1 in the text override is set in the CommonToken.
*/
Symbol create(TokenSource source, int type, String text,
int channel, int start, int stop,
int line, int charPositionInLine);
/** Generically useful */
Symbol create(int type, String text);
}

View File

@ -62,4 +62,7 @@ public interface TokenSource {
* ask lexers input stream.
*/
public String getSourceName();
/** Optional method that lets users set factory in lexer or other source */
public void setTokenFactory(TokenFactory<?> factory);
}

View File

@ -60,6 +60,11 @@ public class LexerInterpreter implements TokenSource {
public String getSourceName() { return g.name; }
@Override
public void setTokenFactory(TokenFactory<?> factory) {
// TODO: use TokenFactory
}
public int getCharPositionInLine() {
return 0;
}
@ -79,6 +84,7 @@ public class LexerInterpreter implements TokenSource {
int tokenStartLine = interp.getLine();
int ttype = interp.match(input, Lexer.DEFAULT_MODE);
int stop = input.index()-1;
// TODO: use TokenFactory
WritableToken t = new CommonToken(this, ttype, Token.DEFAULT_CHANNEL, start, stop);
t.setLine(tokenStartLine);
t.setCharPositionInLine(tokenStartCharPositionInLine);

View File

@ -209,6 +209,10 @@ public class TestCommonTokenStream extends BaseTest {
public CharStream getInputStream() {
return null;
}
@Override
public void setTokenFactory(TokenFactory<?> factory) {
}
};
CommonTokenStream tokens = new CommonTokenStream(lexer);