tokens now have token and char source to draw from. fix and close antlr/antlr4#88
This commit is contained in:
parent
35202df715
commit
9e3907d573
|
@ -14,6 +14,8 @@ December 1, 2012
|
|||
* label+='foo' wasn't generating good code. It was generating token type as
|
||||
variable name. Now, I gen "s<ttype>" for implicit labels on string literals.
|
||||
|
||||
* tokens now have token and char source to draw from.
|
||||
|
||||
November 30, 2012
|
||||
|
||||
* Maven updates (cleanup, unification, and specify Java 6 bootstrap classpath)
|
||||
|
|
|
@ -29,6 +29,8 @@
|
|||
package org.antlr.v4.runtime;
|
||||
|
||||
import org.antlr.v4.runtime.misc.Interval;
|
||||
import org.antlr.v4.runtime.misc.NotNull;
|
||||
import org.antlr.v4.runtime.misc.Pair;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
|
@ -37,8 +39,7 @@ public class CommonToken implements WritableToken, Serializable {
|
|||
protected int line;
|
||||
protected int charPositionInLine = -1; // set to invalid position
|
||||
protected int channel=DEFAULT_CHANNEL;
|
||||
protected TokenSource source;
|
||||
// TODO: rm protected transient CharStream input;
|
||||
protected Pair<TokenSource, CharStream> source;
|
||||
|
||||
/** We need to be able to change the text once in a while. If
|
||||
* this is non-null, then getText should return this. Note that
|
||||
|
@ -60,15 +61,15 @@ public class CommonToken implements WritableToken, Serializable {
|
|||
this.type = type;
|
||||
}
|
||||
|
||||
public CommonToken(TokenSource source, int type, int channel, int start, int stop) {
|
||||
public CommonToken(@NotNull Pair<TokenSource, CharStream> source, int type, int channel, int start, int stop) {
|
||||
this.source = source;
|
||||
this.type = type;
|
||||
this.channel = channel;
|
||||
this.start = start;
|
||||
this.stop = stop;
|
||||
if (source != null) {
|
||||
this.line = source.getLine();
|
||||
this.charPositionInLine = source.getCharPositionInLine();
|
||||
if (source.a != null) {
|
||||
this.line = source.a.getLine();
|
||||
this.charPositionInLine = source.a.getCharPositionInLine();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -85,9 +86,15 @@ public class CommonToken implements WritableToken, Serializable {
|
|||
index = oldToken.getTokenIndex();
|
||||
charPositionInLine = oldToken.getCharPositionInLine();
|
||||
channel = oldToken.getChannel();
|
||||
source = oldToken.getTokenSource();
|
||||
start = oldToken.getStartIndex();
|
||||
stop = oldToken.getStopIndex();
|
||||
|
||||
if (oldToken instanceof CommonToken) {
|
||||
source = ((CommonToken)oldToken).source;
|
||||
}
|
||||
else {
|
||||
source = new Pair<TokenSource, CharStream>(oldToken.getTokenSource(), oldToken.getInputStream());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -105,9 +112,8 @@ public class CommonToken implements WritableToken, Serializable {
|
|||
if ( text!=null ) {
|
||||
return text;
|
||||
}
|
||||
TokenSource tokens = getTokenSource();
|
||||
if ( tokens==null ) return null;
|
||||
CharStream input = tokens.getInputStream();
|
||||
|
||||
CharStream input = getInputStream();
|
||||
if ( input==null ) return null;
|
||||
int n = input.size();
|
||||
if ( start<n && stop<n) {
|
||||
|
@ -188,11 +194,12 @@ public class CommonToken implements WritableToken, Serializable {
|
|||
|
||||
@Override
|
||||
public TokenSource getTokenSource() {
|
||||
return source;
|
||||
return source.a;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharStream getInputStream() {
|
||||
return source != null ? source.getInputStream() : null;
|
||||
return source.b;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
package org.antlr.v4.runtime;
|
||||
|
||||
import org.antlr.v4.runtime.misc.Interval;
|
||||
import org.antlr.v4.runtime.misc.Pair;
|
||||
|
||||
public class CommonTokenFactory implements TokenFactory<CommonToken> {
|
||||
public static final TokenFactory<CommonToken> DEFAULT = new CommonTokenFactory();
|
||||
|
@ -48,7 +49,7 @@ public class CommonTokenFactory implements TokenFactory<CommonToken> {
|
|||
public CommonTokenFactory() { this(false); }
|
||||
|
||||
@Override
|
||||
public CommonToken create(TokenSource source, int type, String text,
|
||||
public CommonToken create(Pair<TokenSource, CharStream> source, int type, String text,
|
||||
int channel, int start, int stop,
|
||||
int line, int charPositionInLine)
|
||||
{
|
||||
|
@ -58,12 +59,10 @@ public class CommonTokenFactory implements TokenFactory<CommonToken> {
|
|||
if ( text!=null ) {
|
||||
t.setText(text);
|
||||
}
|
||||
else {
|
||||
if ( copyText ) {
|
||||
CharStream input = source.getInputStream();
|
||||
t.setText(input.getText(Interval.of(start,stop)));
|
||||
}
|
||||
else if ( copyText && source.b != null ) {
|
||||
t.setText(source.b.getText(Interval.of(start,stop)));
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
|
|
|
@ -39,6 +39,7 @@ import org.antlr.v4.runtime.atn.StarLoopEntryState;
|
|||
import org.antlr.v4.runtime.atn.StarLoopbackState;
|
||||
import org.antlr.v4.runtime.misc.IntervalSet;
|
||||
import org.antlr.v4.runtime.misc.NotNull;
|
||||
import org.antlr.v4.runtime.misc.Pair;
|
||||
|
||||
/** This is the default error handling mechanism for ANTLR parsers
|
||||
* and tree parsers.
|
||||
|
@ -380,7 +381,7 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
|
|||
current = lookback;
|
||||
}
|
||||
return
|
||||
_factory.create(current.getTokenSource(), expectedTokenType, tokenText,
|
||||
_factory.create(new Pair<TokenSource, CharStream>(current.getTokenSource(), current.getTokenSource().getInputStream()), expectedTokenType, tokenText,
|
||||
Token.DEFAULT_CHANNEL,
|
||||
-1, -1,
|
||||
current.getLine(), current.getCharPositionInLine());
|
||||
|
|
|
@ -31,6 +31,7 @@ package org.antlr.v4.runtime;
|
|||
import org.antlr.v4.runtime.atn.LexerATNSimulator;
|
||||
import org.antlr.v4.runtime.misc.IntegerStack;
|
||||
import org.antlr.v4.runtime.misc.Interval;
|
||||
import org.antlr.v4.runtime.misc.Pair;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.EmptyStackException;
|
||||
|
@ -54,6 +55,7 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
|
|||
public static final int MAX_CHAR_VALUE = '\uFFFE';
|
||||
|
||||
public CharStream _input;
|
||||
protected Pair<TokenSource, CharStream> _tokenFactorySourcePair;
|
||||
|
||||
/** How to create token objects */
|
||||
protected TokenFactory<?> _factory = CommonTokenFactory.DEFAULT;
|
||||
|
@ -103,6 +105,7 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
|
|||
|
||||
public Lexer(CharStream input) {
|
||||
this._input = input;
|
||||
this._tokenFactorySourcePair = new Pair<TokenSource, CharStream>(this, input);
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
|
@ -228,8 +231,10 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
|
|||
@Override
|
||||
public void setInputStream(IntStream input) {
|
||||
this._input = null;
|
||||
this._tokenFactorySourcePair = new Pair<TokenSource, CharStream>(this, _input);
|
||||
reset();
|
||||
this._input = (CharStream)input;
|
||||
this._tokenFactorySourcePair = new Pair<TokenSource, CharStream>(this, _input);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -259,7 +264,7 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
|
|||
* custom Token objects or provide a new factory.
|
||||
*/
|
||||
public Token emit() {
|
||||
Token t = _factory.create(this, _type, _text, _channel, _tokenStartCharIndex, getCharIndex()-1,
|
||||
Token t = _factory.create(_tokenFactorySourcePair, _type, _text, _channel, _tokenStartCharIndex, getCharIndex()-1,
|
||||
_tokenStartLine, _tokenStartCharPositionInLine);
|
||||
emit(t);
|
||||
return t;
|
||||
|
@ -273,7 +278,7 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
|
|||
int n = _token.getStopIndex() - _token.getStartIndex() + 1;
|
||||
cpos = _token.getCharPositionInLine()+n;
|
||||
}
|
||||
Token eof = _factory.create(this, Token.EOF, null, Token.DEFAULT_CHANNEL, _input.index(), _input.index()-1,
|
||||
Token eof = _factory.create(_tokenFactorySourcePair, Token.EOF, null, Token.DEFAULT_CHANNEL, _input.index(), _input.index()-1,
|
||||
getLine(), cpos);
|
||||
emit(eof);
|
||||
return eof;
|
||||
|
|
|
@ -97,8 +97,12 @@ public interface Token {
|
|||
*/
|
||||
int getStopIndex();
|
||||
|
||||
/** Where does this token come from? You can get the
|
||||
* character input stream from the token source.
|
||||
/** Gets the {@link TokenSource} which created this token.
|
||||
*/
|
||||
TokenSource getTokenSource();
|
||||
|
||||
/**
|
||||
* Gets the {@link CharStream} from which this token was derived.
|
||||
*/
|
||||
CharStream getInputStream();
|
||||
}
|
||||
|
|
|
@ -29,6 +29,8 @@
|
|||
|
||||
package org.antlr.v4.runtime;
|
||||
|
||||
import org.antlr.v4.runtime.misc.Pair;
|
||||
|
||||
/** The default mechanism for creating tokens. It's used by default in Lexer and
|
||||
* the error handling strategy (to create missing tokens). Notifying the parser
|
||||
* of a new factory means that it notifies it's token source and error strategy.
|
||||
|
@ -38,7 +40,7 @@ public interface TokenFactory<Symbol extends Token> {
|
|||
* error handling strategy. If text!=null, than the start and stop positions
|
||||
* are wiped to -1 in the text override is set in the CommonToken.
|
||||
*/
|
||||
Symbol create(TokenSource source, int type, String text,
|
||||
Symbol create(Pair<TokenSource, CharStream> source, int type, String text,
|
||||
int channel, int start, int stop,
|
||||
int line, int charPositionInLine);
|
||||
|
||||
|
|
|
@ -40,12 +40,14 @@ import org.antlr.v4.runtime.TokenSource;
|
|||
import org.antlr.v4.runtime.atn.LexerATNSimulator;
|
||||
import org.antlr.v4.runtime.atn.PredictionContextCache;
|
||||
import org.antlr.v4.runtime.dfa.DFA;
|
||||
import org.antlr.v4.runtime.misc.Pair;
|
||||
import org.antlr.v4.tool.LexerGrammar;
|
||||
|
||||
public class LexerInterpreter implements TokenSource {
|
||||
protected LexerGrammar g;
|
||||
protected LexerATNSimulator interp;
|
||||
protected CharStream input;
|
||||
protected Pair<TokenSource, CharStream> tokenFactorySourcePair;
|
||||
|
||||
/** How to create token objects */
|
||||
protected TokenFactory<?> _factory = CommonTokenFactory.DEFAULT;
|
||||
|
@ -66,11 +68,12 @@ public class LexerInterpreter implements TokenSource {
|
|||
}
|
||||
|
||||
public void setInput(String inputString) {
|
||||
input = new ANTLRInputStream(inputString);
|
||||
setInput(new ANTLRInputStream(inputString));
|
||||
}
|
||||
|
||||
public void setInput(CharStream input) {
|
||||
this.input = input;
|
||||
this.tokenFactorySourcePair = new Pair<TokenSource, CharStream>(this, input);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -112,7 +115,7 @@ public class LexerInterpreter implements TokenSource {
|
|||
int ttype = interp.match(input, Lexer.DEFAULT_MODE);
|
||||
int stop = input.index()-1;
|
||||
|
||||
return _factory.create(this, ttype, null, Token.DEFAULT_CHANNEL, start, stop,
|
||||
return _factory.create(tokenFactorySourcePair, ttype, null, Token.DEFAULT_CHANNEL, start, stop,
|
||||
tokenStartLine, tokenStartCharPositionInLine);
|
||||
}
|
||||
finally {
|
||||
|
|
Loading…
Reference in New Issue