tokens now have token and char source to draw from. fix and close antlr/antlr4#88

This commit is contained in:
Terence Parr 2012-12-01 17:23:50 -08:00
parent 35202df715
commit 9e3907d573
8 changed files with 49 additions and 26 deletions

View File

@ -14,6 +14,8 @@ December 1, 2012
* label+='foo' wasn't generating good code. It was generating token type as
variable name. Now, I gen "s<ttype>" for implicit labels on string literals.
* tokens now have token and char source to draw from.
November 30, 2012
* Maven updates (cleanup, unification, and specify Java 6 bootstrap classpath)

View File

@ -29,6 +29,8 @@
package org.antlr.v4.runtime;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Pair;
import java.io.Serializable;
@ -37,8 +39,7 @@ public class CommonToken implements WritableToken, Serializable {
protected int line;
protected int charPositionInLine = -1; // set to invalid position
protected int channel=DEFAULT_CHANNEL;
protected TokenSource source;
// TODO: rm protected transient CharStream input;
protected Pair<TokenSource, CharStream> source;
/** We need to be able to change the text once in a while. If
* this is non-null, then getText should return this. Note that
@ -60,15 +61,15 @@ public class CommonToken implements WritableToken, Serializable {
this.type = type;
}
public CommonToken(TokenSource source, int type, int channel, int start, int stop) {
public CommonToken(@NotNull Pair<TokenSource, CharStream> source, int type, int channel, int start, int stop) {
this.source = source;
this.type = type;
this.channel = channel;
this.start = start;
this.stop = stop;
if (source != null) {
this.line = source.getLine();
this.charPositionInLine = source.getCharPositionInLine();
if (source.a != null) {
this.line = source.a.getLine();
this.charPositionInLine = source.a.getCharPositionInLine();
}
}
@ -85,9 +86,15 @@ public class CommonToken implements WritableToken, Serializable {
index = oldToken.getTokenIndex();
charPositionInLine = oldToken.getCharPositionInLine();
channel = oldToken.getChannel();
source = oldToken.getTokenSource();
start = oldToken.getStartIndex();
stop = oldToken.getStopIndex();
if (oldToken instanceof CommonToken) {
source = ((CommonToken)oldToken).source;
}
else {
source = new Pair<TokenSource, CharStream>(oldToken.getTokenSource(), oldToken.getInputStream());
}
}
@Override
@ -105,9 +112,8 @@ public class CommonToken implements WritableToken, Serializable {
if ( text!=null ) {
return text;
}
TokenSource tokens = getTokenSource();
if ( tokens==null ) return null;
CharStream input = tokens.getInputStream();
CharStream input = getInputStream();
if ( input==null ) return null;
int n = input.size();
if ( start<n && stop<n) {
@ -188,11 +194,12 @@ public class CommonToken implements WritableToken, Serializable {
@Override
public TokenSource getTokenSource() {
return source;
return source.a;
}
@Override
public CharStream getInputStream() {
return source != null ? source.getInputStream() : null;
return source.b;
}
@Override

View File

@ -30,6 +30,7 @@
package org.antlr.v4.runtime;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.misc.Pair;
public class CommonTokenFactory implements TokenFactory<CommonToken> {
public static final TokenFactory<CommonToken> DEFAULT = new CommonTokenFactory();
@ -48,7 +49,7 @@ public class CommonTokenFactory implements TokenFactory<CommonToken> {
public CommonTokenFactory() { this(false); }
@Override
public CommonToken create(TokenSource source, int type, String text,
public CommonToken create(Pair<TokenSource, CharStream> source, int type, String text,
int channel, int start, int stop,
int line, int charPositionInLine)
{
@ -58,12 +59,10 @@ public class CommonTokenFactory implements TokenFactory<CommonToken> {
if ( text!=null ) {
t.setText(text);
}
else {
if ( copyText ) {
CharStream input = source.getInputStream();
t.setText(input.getText(Interval.of(start,stop)));
}
else if ( copyText && source.b != null ) {
t.setText(source.b.getText(Interval.of(start,stop)));
}
return t;
}

View File

@ -39,6 +39,7 @@ import org.antlr.v4.runtime.atn.StarLoopEntryState;
import org.antlr.v4.runtime.atn.StarLoopbackState;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Pair;
/** This is the default error handling mechanism for ANTLR parsers
* and tree parsers.
@ -380,7 +381,7 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
current = lookback;
}
return
_factory.create(current.getTokenSource(), expectedTokenType, tokenText,
_factory.create(new Pair<TokenSource, CharStream>(current.getTokenSource(), current.getTokenSource().getInputStream()), expectedTokenType, tokenText,
Token.DEFAULT_CHANNEL,
-1, -1,
current.getLine(), current.getCharPositionInLine());

View File

@ -31,6 +31,7 @@ package org.antlr.v4.runtime;
import org.antlr.v4.runtime.atn.LexerATNSimulator;
import org.antlr.v4.runtime.misc.IntegerStack;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.misc.Pair;
import java.util.ArrayList;
import java.util.EmptyStackException;
@ -54,6 +55,7 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
public static final int MAX_CHAR_VALUE = '\uFFFE';
public CharStream _input;
protected Pair<TokenSource, CharStream> _tokenFactorySourcePair;
/** How to create token objects */
protected TokenFactory<?> _factory = CommonTokenFactory.DEFAULT;
@ -103,6 +105,7 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
public Lexer(CharStream input) {
this._input = input;
this._tokenFactorySourcePair = new Pair<TokenSource, CharStream>(this, input);
}
public void reset() {
@ -228,8 +231,10 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
@Override
public void setInputStream(IntStream input) {
this._input = null;
this._tokenFactorySourcePair = new Pair<TokenSource, CharStream>(this, _input);
reset();
this._input = (CharStream)input;
this._tokenFactorySourcePair = new Pair<TokenSource, CharStream>(this, _input);
}
@Override
@ -259,7 +264,7 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
* custom Token objects or provide a new factory.
*/
public Token emit() {
Token t = _factory.create(this, _type, _text, _channel, _tokenStartCharIndex, getCharIndex()-1,
Token t = _factory.create(_tokenFactorySourcePair, _type, _text, _channel, _tokenStartCharIndex, getCharIndex()-1,
_tokenStartLine, _tokenStartCharPositionInLine);
emit(t);
return t;
@ -273,7 +278,7 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
int n = _token.getStopIndex() - _token.getStartIndex() + 1;
cpos = _token.getCharPositionInLine()+n;
}
Token eof = _factory.create(this, Token.EOF, null, Token.DEFAULT_CHANNEL, _input.index(), _input.index()-1,
Token eof = _factory.create(_tokenFactorySourcePair, Token.EOF, null, Token.DEFAULT_CHANNEL, _input.index(), _input.index()-1,
getLine(), cpos);
emit(eof);
return eof;

View File

@ -97,8 +97,12 @@ public interface Token {
*/
int getStopIndex();
/** Where does this token come from? You can get the
* character input stream from the token source.
/** Gets the {@link TokenSource} which created this token.
*/
TokenSource getTokenSource();
/**
* Gets the {@link CharStream} from which this token was derived.
*/
CharStream getInputStream();
}

View File

@ -29,6 +29,8 @@
package org.antlr.v4.runtime;
import org.antlr.v4.runtime.misc.Pair;
/** The default mechanism for creating tokens. It's used by default in Lexer and
* the error handling strategy (to create missing tokens). Notifying the parser
* of a new factory means that it notifies it's token source and error strategy.
@ -38,7 +40,7 @@ public interface TokenFactory<Symbol extends Token> {
* error handling strategy. If text!=null, than the start and stop positions
* are wiped to -1 in the text override is set in the CommonToken.
*/
Symbol create(TokenSource source, int type, String text,
Symbol create(Pair<TokenSource, CharStream> source, int type, String text,
int channel, int start, int stop,
int line, int charPositionInLine);

View File

@ -40,12 +40,14 @@ import org.antlr.v4.runtime.TokenSource;
import org.antlr.v4.runtime.atn.LexerATNSimulator;
import org.antlr.v4.runtime.atn.PredictionContextCache;
import org.antlr.v4.runtime.dfa.DFA;
import org.antlr.v4.runtime.misc.Pair;
import org.antlr.v4.tool.LexerGrammar;
public class LexerInterpreter implements TokenSource {
protected LexerGrammar g;
protected LexerATNSimulator interp;
protected CharStream input;
protected Pair<TokenSource, CharStream> tokenFactorySourcePair;
/** How to create token objects */
protected TokenFactory<?> _factory = CommonTokenFactory.DEFAULT;
@ -66,11 +68,12 @@ public class LexerInterpreter implements TokenSource {
}
public void setInput(String inputString) {
input = new ANTLRInputStream(inputString);
setInput(new ANTLRInputStream(inputString));
}
public void setInput(CharStream input) {
this.input = input;
this.tokenFactorySourcePair = new Pair<TokenSource, CharStream>(this, input);
}
@Override
@ -112,7 +115,7 @@ public class LexerInterpreter implements TokenSource {
int ttype = interp.match(input, Lexer.DEFAULT_MODE);
int stop = input.index()-1;
return _factory.create(this, ttype, null, Token.DEFAULT_CHANNEL, start, stop,
return _factory.create(tokenFactorySourcePair, ttype, null, Token.DEFAULT_CHANNEL, start, stop,
tokenStartLine, tokenStartCharPositionInLine);
}
finally {