diff --git a/CHANGES.txt b/CHANGES.txt index c682336a1..af94da081 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -14,6 +14,8 @@ December 1, 2012 * label+='foo' wasn't generating good code. It was generating token type as variable name. Now, I gen "s" for implicit labels on string literals. +* tokens now have token and char source to draw from. + November 30, 2012 * Maven updates (cleanup, unification, and specify Java 6 bootstrap classpath) diff --git a/runtime/Java/src/org/antlr/v4/runtime/CommonToken.java b/runtime/Java/src/org/antlr/v4/runtime/CommonToken.java index 26914dbac..b94fcb65c 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/CommonToken.java +++ b/runtime/Java/src/org/antlr/v4/runtime/CommonToken.java @@ -29,6 +29,8 @@ package org.antlr.v4.runtime; import org.antlr.v4.runtime.misc.Interval; +import org.antlr.v4.runtime.misc.NotNull; +import org.antlr.v4.runtime.misc.Pair; import java.io.Serializable; @@ -37,8 +39,7 @@ public class CommonToken implements WritableToken, Serializable { protected int line; protected int charPositionInLine = -1; // set to invalid position protected int channel=DEFAULT_CHANNEL; - protected TokenSource source; - // TODO: rm protected transient CharStream input; + protected Pair source; /** We need to be able to change the text once in a while. If * this is non-null, then getText should return this. Note that @@ -60,15 +61,15 @@ public class CommonToken implements WritableToken, Serializable { this.type = type; } - public CommonToken(TokenSource source, int type, int channel, int start, int stop) { + public CommonToken(@NotNull Pair source, int type, int channel, int start, int stop) { this.source = source; this.type = type; this.channel = channel; this.start = start; this.stop = stop; - if (source != null) { - this.line = source.getLine(); - this.charPositionInLine = source.getCharPositionInLine(); + if (source.a != null) { + this.line = source.a.getLine(); + this.charPositionInLine = source.a.getCharPositionInLine(); } } @@ -85,9 +86,15 @@ public class CommonToken implements WritableToken, Serializable { index = oldToken.getTokenIndex(); charPositionInLine = oldToken.getCharPositionInLine(); channel = oldToken.getChannel(); - source = oldToken.getTokenSource(); start = oldToken.getStartIndex(); stop = oldToken.getStopIndex(); + + if (oldToken instanceof CommonToken) { + source = ((CommonToken)oldToken).source; + } + else { + source = new Pair(oldToken.getTokenSource(), oldToken.getInputStream()); + } } @Override @@ -105,9 +112,8 @@ public class CommonToken implements WritableToken, Serializable { if ( text!=null ) { return text; } - TokenSource tokens = getTokenSource(); - if ( tokens==null ) return null; - CharStream input = tokens.getInputStream(); + + CharStream input = getInputStream(); if ( input==null ) return null; int n = input.size(); if ( start { public static final TokenFactory DEFAULT = new CommonTokenFactory(); @@ -48,7 +49,7 @@ public class CommonTokenFactory implements TokenFactory { public CommonTokenFactory() { this(false); } @Override - public CommonToken create(TokenSource source, int type, String text, + public CommonToken create(Pair source, int type, String text, int channel, int start, int stop, int line, int charPositionInLine) { @@ -58,12 +59,10 @@ public class CommonTokenFactory implements TokenFactory { if ( text!=null ) { t.setText(text); } - else { - if ( copyText ) { - CharStream input = source.getInputStream(); - t.setText(input.getText(Interval.of(start,stop))); - } + else if ( copyText && source.b != null ) { + t.setText(source.b.getText(Interval.of(start,stop))); } + return t; } diff --git a/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java b/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java index 98796df9a..529b94e0f 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java +++ b/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java @@ -39,6 +39,7 @@ import org.antlr.v4.runtime.atn.StarLoopEntryState; import org.antlr.v4.runtime.atn.StarLoopbackState; import org.antlr.v4.runtime.misc.IntervalSet; import org.antlr.v4.runtime.misc.NotNull; +import org.antlr.v4.runtime.misc.Pair; /** This is the default error handling mechanism for ANTLR parsers * and tree parsers. @@ -380,7 +381,7 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { current = lookback; } return - _factory.create(current.getTokenSource(), expectedTokenType, tokenText, + _factory.create(new Pair(current.getTokenSource(), current.getTokenSource().getInputStream()), expectedTokenType, tokenText, Token.DEFAULT_CHANNEL, -1, -1, current.getLine(), current.getCharPositionInLine()); diff --git a/runtime/Java/src/org/antlr/v4/runtime/Lexer.java b/runtime/Java/src/org/antlr/v4/runtime/Lexer.java index f6c81403e..81cfd30af 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/Lexer.java +++ b/runtime/Java/src/org/antlr/v4/runtime/Lexer.java @@ -31,6 +31,7 @@ package org.antlr.v4.runtime; import org.antlr.v4.runtime.atn.LexerATNSimulator; import org.antlr.v4.runtime.misc.IntegerStack; import org.antlr.v4.runtime.misc.Interval; +import org.antlr.v4.runtime.misc.Pair; import java.util.ArrayList; import java.util.EmptyStackException; @@ -54,6 +55,7 @@ public abstract class Lexer extends Recognizer public static final int MAX_CHAR_VALUE = '\uFFFE'; public CharStream _input; + protected Pair _tokenFactorySourcePair; /** How to create token objects */ protected TokenFactory _factory = CommonTokenFactory.DEFAULT; @@ -103,6 +105,7 @@ public abstract class Lexer extends Recognizer public Lexer(CharStream input) { this._input = input; + this._tokenFactorySourcePair = new Pair(this, input); } public void reset() { @@ -228,8 +231,10 @@ public abstract class Lexer extends Recognizer @Override public void setInputStream(IntStream input) { this._input = null; + this._tokenFactorySourcePair = new Pair(this, _input); reset(); this._input = (CharStream)input; + this._tokenFactorySourcePair = new Pair(this, _input); } @Override @@ -259,7 +264,7 @@ public abstract class Lexer extends Recognizer * custom Token objects or provide a new factory. */ public Token emit() { - Token t = _factory.create(this, _type, _text, _channel, _tokenStartCharIndex, getCharIndex()-1, + Token t = _factory.create(_tokenFactorySourcePair, _type, _text, _channel, _tokenStartCharIndex, getCharIndex()-1, _tokenStartLine, _tokenStartCharPositionInLine); emit(t); return t; @@ -273,7 +278,7 @@ public abstract class Lexer extends Recognizer int n = _token.getStopIndex() - _token.getStartIndex() + 1; cpos = _token.getCharPositionInLine()+n; } - Token eof = _factory.create(this, Token.EOF, null, Token.DEFAULT_CHANNEL, _input.index(), _input.index()-1, + Token eof = _factory.create(_tokenFactorySourcePair, Token.EOF, null, Token.DEFAULT_CHANNEL, _input.index(), _input.index()-1, getLine(), cpos); emit(eof); return eof; diff --git a/runtime/Java/src/org/antlr/v4/runtime/Token.java b/runtime/Java/src/org/antlr/v4/runtime/Token.java index dd689a36a..41acfb3b0 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/Token.java +++ b/runtime/Java/src/org/antlr/v4/runtime/Token.java @@ -97,8 +97,12 @@ public interface Token { */ int getStopIndex(); - /** Where does this token come from? You can get the - * character input stream from the token source. + /** Gets the {@link TokenSource} which created this token. */ TokenSource getTokenSource(); + + /** + * Gets the {@link CharStream} from which this token was derived. + */ + CharStream getInputStream(); } diff --git a/runtime/Java/src/org/antlr/v4/runtime/TokenFactory.java b/runtime/Java/src/org/antlr/v4/runtime/TokenFactory.java index dbc8de43a..e29fa000a 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/TokenFactory.java +++ b/runtime/Java/src/org/antlr/v4/runtime/TokenFactory.java @@ -29,6 +29,8 @@ package org.antlr.v4.runtime; +import org.antlr.v4.runtime.misc.Pair; + /** The default mechanism for creating tokens. It's used by default in Lexer and * the error handling strategy (to create missing tokens). Notifying the parser * of a new factory means that it notifies it's token source and error strategy. @@ -38,7 +40,7 @@ public interface TokenFactory { * error handling strategy. If text!=null, than the start and stop positions * are wiped to -1 in the text override is set in the CommonToken. */ - Symbol create(TokenSource source, int type, String text, + Symbol create(Pair source, int type, String text, int channel, int start, int stop, int line, int charPositionInLine); diff --git a/tool/src/org/antlr/v4/tool/interp/LexerInterpreter.java b/tool/src/org/antlr/v4/tool/interp/LexerInterpreter.java index bd5561f7c..f125e527a 100644 --- a/tool/src/org/antlr/v4/tool/interp/LexerInterpreter.java +++ b/tool/src/org/antlr/v4/tool/interp/LexerInterpreter.java @@ -40,12 +40,14 @@ import org.antlr.v4.runtime.TokenSource; import org.antlr.v4.runtime.atn.LexerATNSimulator; import org.antlr.v4.runtime.atn.PredictionContextCache; import org.antlr.v4.runtime.dfa.DFA; +import org.antlr.v4.runtime.misc.Pair; import org.antlr.v4.tool.LexerGrammar; public class LexerInterpreter implements TokenSource { protected LexerGrammar g; protected LexerATNSimulator interp; protected CharStream input; + protected Pair tokenFactorySourcePair; /** How to create token objects */ protected TokenFactory _factory = CommonTokenFactory.DEFAULT; @@ -66,11 +68,12 @@ public class LexerInterpreter implements TokenSource { } public void setInput(String inputString) { - input = new ANTLRInputStream(inputString); + setInput(new ANTLRInputStream(inputString)); } public void setInput(CharStream input) { this.input = input; + this.tokenFactorySourcePair = new Pair(this, input); } @Override @@ -112,7 +115,7 @@ public class LexerInterpreter implements TokenSource { int ttype = interp.match(input, Lexer.DEFAULT_MODE); int stop = input.index()-1; - return _factory.create(this, ttype, null, Token.DEFAULT_CHANNEL, start, stop, + return _factory.create(tokenFactorySourcePair, ttype, null, Token.DEFAULT_CHANNEL, start, stop, tokenStartLine, tokenStartCharPositionInLine); } finally {