Fix a retain cycle between a Token and the TokenSource that it came from.

This was causing all the tokens, streams, and lexers to be retained.  The
primary cycle was because of the backreference at CommonToken.source, and
the fact that the token streams buffer the tokens that they create.

Fix this by replacing the use of a (TokenSource?, CharStream?) pair with
TokenSourceAndStream, which does the same job but references its fields
weakly.  This means that Token.getTokenSource() and Token.getInputStream()
will return valid values as long as you retain the lexer / stream elsewhere,
but a Token won't itself retain those things.
This commit is contained in:
Ewan Mellor 2017-10-23 15:34:10 -07:00
parent 555c14956a
commit cf011b57ee
No known key found for this signature in database
GPG Key ID: 7CE1C6BC9EC8645D
9 changed files with 71 additions and 49 deletions

View File

@ -7,12 +7,6 @@
public class CommonToken: WritableToken {
///
/// An empty _org.antlr.v4.runtime.misc.Pair_ which is used as the default value of
/// _#source_ for tokens that do not have a source.
///
internal static let EMPTY_SOURCE: (TokenSource?, CharStream?) = (nil, nil)
///
/// This is the backing field for _#getType_ and _#setType_.
///
@ -47,7 +41,7 @@ public class CommonToken: WritableToken {
/// _org.antlr.v4.runtime.misc.Pair_ containing these values.
///
internal var source: (TokenSource?, CharStream?)
internal let source: TokenSourceAndStream
///
/// This is the backing field for _#getText_ when the token text is
@ -85,16 +79,16 @@ public class CommonToken: WritableToken {
public init(_ type: Int) {
self.type = type
self.source = CommonToken.EMPTY_SOURCE
self.source = TokenSourceAndStream.EMPTY
}
public init(_ source: (TokenSource?, CharStream?), _ type: Int, _ channel: Int, _ start: Int, _ stop: Int) {
public init(_ source: TokenSourceAndStream, _ type: Int, _ channel: Int, _ start: Int, _ stop: Int) {
self.source = source
self.type = type
self.channel = channel
self.start = start
self.stop = stop
if let tsource = source.0 {
if let tsource = source.tokenSource {
self.line = tsource.getLine()
self.charPositionInLine = tsource.getCharPositionInLine()
}
@ -111,20 +105,12 @@ public class CommonToken: WritableToken {
self.type = type
self.channel = CommonToken.DEFAULT_CHANNEL
self.text = text
self.source = CommonToken.EMPTY_SOURCE
self.source = TokenSourceAndStream.EMPTY
}
///
/// Constructs a new _org.antlr.v4.runtime.CommonToken_ as a copy of another _org.antlr.v4.runtime.Token_.
///
///
/// If `oldToken` is also a _org.antlr.v4.runtime.CommonToken_ instance, the newly
/// constructed token will share a reference to the _#text_ field and
/// the _org.antlr.v4.runtime.misc.Pair_ stored in _#source_. Otherwise, _#text_ will
/// be assigned the result of calling _#getText_, and _#source_
/// will be constructed from the result of _org.antlr.v4.runtime.Token#getTokenSource_ and
/// _org.antlr.v4.runtime.Token#getInputStream_.
///
///
/// - parameter oldToken: The token to copy.
///
public init(_ oldToken: Token) {
@ -135,14 +121,8 @@ public class CommonToken: WritableToken {
channel = oldToken.getChannel()
start = oldToken.getStartIndex()
stop = oldToken.getStopIndex()
if oldToken is CommonToken {
text = (oldToken as! CommonToken).text
source = (oldToken as! CommonToken).source
} else {
text = oldToken.getText()
source = (oldToken.getTokenSource(), oldToken.getInputStream())
}
text = oldToken.getText()
source = oldToken.getTokenSourceAndStream()
}
@ -252,12 +232,16 @@ public class CommonToken: WritableToken {
public func getTokenSource() -> TokenSource? {
return source.0
return source.tokenSource
}
public func getInputStream() -> CharStream? {
return source.1
return source.stream
}
public func getTokenSourceAndStream() -> TokenSourceAndStream {
return source
}
public var description: String {

View File

@ -65,7 +65,7 @@ public class CommonTokenFactory: TokenFactory {
}
public func create(_ source: (TokenSource?, CharStream?), _ type: Int, _ text: String?,
public func create(_ source: TokenSourceAndStream, _ type: Int, _ text: String?,
_ channel: Int, _ start: Int, _ stop: Int,
_ line: Int, _ charPositionInLine: Int) -> Token {
let t = CommonToken(source, type, channel, start, stop)
@ -74,7 +74,7 @@ public class CommonTokenFactory: TokenFactory {
if let text = text {
t.setText(text)
}
else if let cStream = source.1, copyText {
else if let cStream = source.stream, copyText {
t.setText(try! cStream.getText(Interval.of(start, stop)))
}

View File

@ -538,10 +538,12 @@ public class DefaultErrorStrategy: ANTLRErrorStrategy {
current = lookback!
}
let token = recognizer.getTokenFactory().create((current.getTokenSource(), current.getTokenSource()!.getInputStream()), expectedTokenType, tokenText,
CommonToken.DEFAULT_CHANNEL,
-1, -1,
current.getLine(), current.getCharPositionInLine())
let token = recognizer.getTokenFactory().create(
current.getTokenSourceAndStream(),
expectedTokenType, tokenText,
CommonToken.DEFAULT_CHANNEL,
-1, -1,
current.getLine(), current.getCharPositionInLine())
return token
}

View File

@ -26,7 +26,7 @@ open class Lexer: Recognizer<LexerATNSimulator>, TokenSource {
public static let MAX_CHAR_VALUE = Character.MAX_VALUE;
public var _input: CharStream?
internal var _tokenFactorySourcePair: (TokenSource?, CharStream?)
internal var _tokenFactorySourcePair: TokenSourceAndStream
///
/// How to create token objects
@ -87,13 +87,17 @@ open class Lexer: Recognizer<LexerATNSimulator>, TokenSource {
public var _text: String?
public override init() {
self._tokenFactorySourcePair = TokenSourceAndStream()
super.init()
self._tokenFactorySourcePair.tokenSource = self
}
public init(_ input: CharStream) {
super.init()
self._input = input
self._tokenFactorySourcePair = (self, input)
self._tokenFactorySourcePair = TokenSourceAndStream()
super.init()
self._tokenFactorySourcePair.tokenSource = self
self._tokenFactorySourcePair.stream = input
}
open func reset() throws {
@ -234,10 +238,10 @@ open class Lexer: Recognizer<LexerATNSimulator>, TokenSource {
open override func setInputStream(_ input: IntStream) throws {
self._input = nil
self._tokenFactorySourcePair = (self, _input!)
self._tokenFactorySourcePair = makeTokenSourceAndStream()
try reset()
self._input = input as? CharStream
self._tokenFactorySourcePair = (self, _input!)
self._tokenFactorySourcePair = makeTokenSourceAndStream()
}
@ -449,4 +453,8 @@ open class Lexer: Recognizer<LexerATNSimulator>, TokenSource {
// TODO: Do we lose character or line position information?
try _input!.consume()
}
internal func makeTokenSourceAndStream() -> TokenSourceAndStream {
return TokenSourceAndStream(self, _input)
}
}

View File

@ -45,9 +45,7 @@ public class LexerInterpreter: Lexer {
for i in 0..<_decisionToDFALength {
_decisionToDFA[i] = DFA(atn.getDecisionState(i)!, i)
}
super.init()
self._input = input
self._tokenFactorySourcePair = (self, input)
super.init(input)
self._interp = LexerATNSimulator(self, atn, _decisionToDFA, _sharedContextCache)
if atn.grammarType != ATNType.lexer {

View File

@ -112,7 +112,8 @@ public class ListTokenSource: TokenSource {
}
let stop = max(-1, start - 1)
eofToken = _factory.create((self, getInputStream()!), CommonToken.EOF, "EOF", CommonToken.DEFAULT_CHANNEL, start, stop, getLine(), getCharPositionInLine())
let source = TokenSourceAndStream(self, getInputStream())
eofToken = _factory.create(source, CommonToken.EOF, "EOF", CommonToken.DEFAULT_CHANNEL, start, stop, getLine(), getCharPositionInLine())
}
return eofToken!

View File

@ -98,5 +98,7 @@ public protocol Token: class, CustomStringConvertible {
///
func getInputStream() -> CharStream?
func getTokenSourceAndStream() -> TokenSourceAndStream
var visited: Bool { get set }
}

View File

@ -15,10 +15,33 @@ public protocol TokenFactory {
/// error handling strategy. If text!=null, than the start and stop positions
/// are wiped to -1 in the text override is set in the CommonToken.
///
func create(_ source: (TokenSource?, CharStream?), _ type: Int, _ text: String?,
func create(_ source: TokenSourceAndStream, _ type: Int, _ text: String?,
_ channel: Int, _ start: Int, _ stop: Int,
_ line: Int, _ charPositionInLine: Int) -> Token
/// Generically useful
func create(_ type: Int, _ text: String) -> Token
}
/**
Holds the references to the TokenSource and CharStream used to create a Token.
These are together to reduce memory footprint by having one instance of
TokenSourceAndStream shared across many tokens. The references here are weak
to avoid retain cycles.
*/
public class TokenSourceAndStream {
///
/// An empty TokenSourceAndStream which is used as the default value of
/// _#source_ for tokens that do not have a source.
///
public static let EMPTY = TokenSourceAndStream()
public weak var tokenSource: TokenSource?
public weak var stream: CharStream?
public init(_ tokenSource: TokenSource? = nil, _ stream: CharStream? = nil) {
self.tokenSource = tokenSource
self.stream = stream
}
}

View File

@ -149,14 +149,18 @@ public class RuleTagToken: Token, CustomStringConvertible {
return nil
}
///
///
/// The implementation for _org.antlr.v4.runtime.tree.pattern.RuleTagToken_ always returns `null`.
///
public func getInputStream() -> CharStream? {
return nil
}
///
public func getTokenSourceAndStream() -> TokenSourceAndStream {
return TokenSourceAndStream.EMPTY
}
///
/// The implementation for _org.antlr.v4.runtime.tree.pattern.RuleTagToken_ returns a string of the form
/// `ruleName:bypassTokenType`.
///