From cf011b57ee94c395eac92ddc4db91d452d8808e8 Mon Sep 17 00:00:00 2001 From: Ewan Mellor Date: Mon, 23 Oct 2017 15:34:10 -0700 Subject: [PATCH] Fix a retain cycle between a Token and the TokenSource that it came from. This was causing all the tokens, streams, and lexers to be retained. The primary cycle was because of the backreference at CommonToken.source, and the fact that the token streams buffer the tokens that they create. Fix this by replacing the use of a (TokenSource?, CharStream?) pair with TokenSourceAndStream, which does the same job but references its fields weakly. This means that Token.getTokenSource() and Token.getInputStream() will return valid values as long as you retain the lexer / stream elsewhere, but a Token won't itself retain those things. --- .../Swift/Sources/Antlr4/CommonToken.swift | 44 ++++++------------- .../Sources/Antlr4/CommonTokenFactory.swift | 4 +- .../Sources/Antlr4/DefaultErrorStrategy.swift | 10 +++-- runtime/Swift/Sources/Antlr4/Lexer.swift | 20 ++++++--- .../Sources/Antlr4/LexerInterpreter.swift | 4 +- .../Sources/Antlr4/ListTokenSource.swift | 3 +- runtime/Swift/Sources/Antlr4/Token.swift | 2 + .../Swift/Sources/Antlr4/TokenFactory.swift | 25 ++++++++++- .../Antlr4/tree/pattern/RuleTagToken.swift | 8 +++- 9 files changed, 71 insertions(+), 49 deletions(-) diff --git a/runtime/Swift/Sources/Antlr4/CommonToken.swift b/runtime/Swift/Sources/Antlr4/CommonToken.swift index 60d8fdd88..cef3973c0 100644 --- a/runtime/Swift/Sources/Antlr4/CommonToken.swift +++ b/runtime/Swift/Sources/Antlr4/CommonToken.swift @@ -7,12 +7,6 @@ public class CommonToken: WritableToken { - /// - /// An empty _org.antlr.v4.runtime.misc.Pair_ which is used as the default value of - /// _#source_ for tokens that do not have a source. - /// - internal static let EMPTY_SOURCE: (TokenSource?, CharStream?) = (nil, nil) - /// /// This is the backing field for _#getType_ and _#setType_. /// @@ -47,7 +41,7 @@ public class CommonToken: WritableToken { /// _org.antlr.v4.runtime.misc.Pair_ containing these values. /// - internal var source: (TokenSource?, CharStream?) + internal let source: TokenSourceAndStream /// /// This is the backing field for _#getText_ when the token text is @@ -85,16 +79,16 @@ public class CommonToken: WritableToken { public init(_ type: Int) { self.type = type - self.source = CommonToken.EMPTY_SOURCE + self.source = TokenSourceAndStream.EMPTY } - public init(_ source: (TokenSource?, CharStream?), _ type: Int, _ channel: Int, _ start: Int, _ stop: Int) { + public init(_ source: TokenSourceAndStream, _ type: Int, _ channel: Int, _ start: Int, _ stop: Int) { self.source = source self.type = type self.channel = channel self.start = start self.stop = stop - if let tsource = source.0 { + if let tsource = source.tokenSource { self.line = tsource.getLine() self.charPositionInLine = tsource.getCharPositionInLine() } @@ -111,20 +105,12 @@ public class CommonToken: WritableToken { self.type = type self.channel = CommonToken.DEFAULT_CHANNEL self.text = text - self.source = CommonToken.EMPTY_SOURCE + self.source = TokenSourceAndStream.EMPTY } /// /// Constructs a new _org.antlr.v4.runtime.CommonToken_ as a copy of another _org.antlr.v4.runtime.Token_. - /// - /// - /// If `oldToken` is also a _org.antlr.v4.runtime.CommonToken_ instance, the newly - /// constructed token will share a reference to the _#text_ field and - /// the _org.antlr.v4.runtime.misc.Pair_ stored in _#source_. Otherwise, _#text_ will - /// be assigned the result of calling _#getText_, and _#source_ - /// will be constructed from the result of _org.antlr.v4.runtime.Token#getTokenSource_ and - /// _org.antlr.v4.runtime.Token#getInputStream_. - /// + /// /// - parameter oldToken: The token to copy. /// public init(_ oldToken: Token) { @@ -135,14 +121,8 @@ public class CommonToken: WritableToken { channel = oldToken.getChannel() start = oldToken.getStartIndex() stop = oldToken.getStopIndex() - - if oldToken is CommonToken { - text = (oldToken as! CommonToken).text - source = (oldToken as! CommonToken).source - } else { - text = oldToken.getText() - source = (oldToken.getTokenSource(), oldToken.getInputStream()) - } + text = oldToken.getText() + source = oldToken.getTokenSourceAndStream() } @@ -252,12 +232,16 @@ public class CommonToken: WritableToken { public func getTokenSource() -> TokenSource? { - return source.0 + return source.tokenSource } public func getInputStream() -> CharStream? { - return source.1 + return source.stream + } + + public func getTokenSourceAndStream() -> TokenSourceAndStream { + return source } public var description: String { diff --git a/runtime/Swift/Sources/Antlr4/CommonTokenFactory.swift b/runtime/Swift/Sources/Antlr4/CommonTokenFactory.swift index 7f5c0df60..d7922802f 100644 --- a/runtime/Swift/Sources/Antlr4/CommonTokenFactory.swift +++ b/runtime/Swift/Sources/Antlr4/CommonTokenFactory.swift @@ -65,7 +65,7 @@ public class CommonTokenFactory: TokenFactory { } - public func create(_ source: (TokenSource?, CharStream?), _ type: Int, _ text: String?, + public func create(_ source: TokenSourceAndStream, _ type: Int, _ text: String?, _ channel: Int, _ start: Int, _ stop: Int, _ line: Int, _ charPositionInLine: Int) -> Token { let t = CommonToken(source, type, channel, start, stop) @@ -74,7 +74,7 @@ public class CommonTokenFactory: TokenFactory { if let text = text { t.setText(text) } - else if let cStream = source.1, copyText { + else if let cStream = source.stream, copyText { t.setText(try! cStream.getText(Interval.of(start, stop))) } diff --git a/runtime/Swift/Sources/Antlr4/DefaultErrorStrategy.swift b/runtime/Swift/Sources/Antlr4/DefaultErrorStrategy.swift index 29c2890c4..fb34ac3c2 100644 --- a/runtime/Swift/Sources/Antlr4/DefaultErrorStrategy.swift +++ b/runtime/Swift/Sources/Antlr4/DefaultErrorStrategy.swift @@ -538,10 +538,12 @@ public class DefaultErrorStrategy: ANTLRErrorStrategy { current = lookback! } - let token = recognizer.getTokenFactory().create((current.getTokenSource(), current.getTokenSource()!.getInputStream()), expectedTokenType, tokenText, - CommonToken.DEFAULT_CHANNEL, - -1, -1, - current.getLine(), current.getCharPositionInLine()) + let token = recognizer.getTokenFactory().create( + current.getTokenSourceAndStream(), + expectedTokenType, tokenText, + CommonToken.DEFAULT_CHANNEL, + -1, -1, + current.getLine(), current.getCharPositionInLine()) return token } diff --git a/runtime/Swift/Sources/Antlr4/Lexer.swift b/runtime/Swift/Sources/Antlr4/Lexer.swift index 084b82733..e47aa1a93 100644 --- a/runtime/Swift/Sources/Antlr4/Lexer.swift +++ b/runtime/Swift/Sources/Antlr4/Lexer.swift @@ -26,7 +26,7 @@ open class Lexer: Recognizer, TokenSource { public static let MAX_CHAR_VALUE = Character.MAX_VALUE; public var _input: CharStream? - internal var _tokenFactorySourcePair: (TokenSource?, CharStream?) + internal var _tokenFactorySourcePair: TokenSourceAndStream /// /// How to create token objects @@ -87,13 +87,17 @@ open class Lexer: Recognizer, TokenSource { public var _text: String? public override init() { + self._tokenFactorySourcePair = TokenSourceAndStream() + super.init() + self._tokenFactorySourcePair.tokenSource = self } public init(_ input: CharStream) { - - super.init() self._input = input - self._tokenFactorySourcePair = (self, input) + self._tokenFactorySourcePair = TokenSourceAndStream() + super.init() + self._tokenFactorySourcePair.tokenSource = self + self._tokenFactorySourcePair.stream = input } open func reset() throws { @@ -234,10 +238,10 @@ open class Lexer: Recognizer, TokenSource { open override func setInputStream(_ input: IntStream) throws { self._input = nil - self._tokenFactorySourcePair = (self, _input!) + self._tokenFactorySourcePair = makeTokenSourceAndStream() try reset() self._input = input as? CharStream - self._tokenFactorySourcePair = (self, _input!) + self._tokenFactorySourcePair = makeTokenSourceAndStream() } @@ -449,4 +453,8 @@ open class Lexer: Recognizer, TokenSource { // TODO: Do we lose character or line position information? try _input!.consume() } + + internal func makeTokenSourceAndStream() -> TokenSourceAndStream { + return TokenSourceAndStream(self, _input) + } } diff --git a/runtime/Swift/Sources/Antlr4/LexerInterpreter.swift b/runtime/Swift/Sources/Antlr4/LexerInterpreter.swift index 55111b351..99d9f72d8 100644 --- a/runtime/Swift/Sources/Antlr4/LexerInterpreter.swift +++ b/runtime/Swift/Sources/Antlr4/LexerInterpreter.swift @@ -45,9 +45,7 @@ public class LexerInterpreter: Lexer { for i in 0..<_decisionToDFALength { _decisionToDFA[i] = DFA(atn.getDecisionState(i)!, i) } - super.init() - self._input = input - self._tokenFactorySourcePair = (self, input) + super.init(input) self._interp = LexerATNSimulator(self, atn, _decisionToDFA, _sharedContextCache) if atn.grammarType != ATNType.lexer { diff --git a/runtime/Swift/Sources/Antlr4/ListTokenSource.swift b/runtime/Swift/Sources/Antlr4/ListTokenSource.swift index d5d601852..0863e16ee 100644 --- a/runtime/Swift/Sources/Antlr4/ListTokenSource.swift +++ b/runtime/Swift/Sources/Antlr4/ListTokenSource.swift @@ -112,7 +112,8 @@ public class ListTokenSource: TokenSource { } let stop = max(-1, start - 1) - eofToken = _factory.create((self, getInputStream()!), CommonToken.EOF, "EOF", CommonToken.DEFAULT_CHANNEL, start, stop, getLine(), getCharPositionInLine()) + let source = TokenSourceAndStream(self, getInputStream()) + eofToken = _factory.create(source, CommonToken.EOF, "EOF", CommonToken.DEFAULT_CHANNEL, start, stop, getLine(), getCharPositionInLine()) } return eofToken! diff --git a/runtime/Swift/Sources/Antlr4/Token.swift b/runtime/Swift/Sources/Antlr4/Token.swift index a249adc2a..264318ddd 100644 --- a/runtime/Swift/Sources/Antlr4/Token.swift +++ b/runtime/Swift/Sources/Antlr4/Token.swift @@ -98,5 +98,7 @@ public protocol Token: class, CustomStringConvertible { /// func getInputStream() -> CharStream? + func getTokenSourceAndStream() -> TokenSourceAndStream + var visited: Bool { get set } } diff --git a/runtime/Swift/Sources/Antlr4/TokenFactory.swift b/runtime/Swift/Sources/Antlr4/TokenFactory.swift index 3531b7343..63fa74b3c 100644 --- a/runtime/Swift/Sources/Antlr4/TokenFactory.swift +++ b/runtime/Swift/Sources/Antlr4/TokenFactory.swift @@ -15,10 +15,33 @@ public protocol TokenFactory { /// error handling strategy. If text!=null, than the start and stop positions /// are wiped to -1 in the text override is set in the CommonToken. /// - func create(_ source: (TokenSource?, CharStream?), _ type: Int, _ text: String?, + func create(_ source: TokenSourceAndStream, _ type: Int, _ text: String?, _ channel: Int, _ start: Int, _ stop: Int, _ line: Int, _ charPositionInLine: Int) -> Token /// Generically useful func create(_ type: Int, _ text: String) -> Token } + + +/** + Holds the references to the TokenSource and CharStream used to create a Token. + These are together to reduce memory footprint by having one instance of + TokenSourceAndStream shared across many tokens. The references here are weak + to avoid retain cycles. + */ +public class TokenSourceAndStream { + /// + /// An empty TokenSourceAndStream which is used as the default value of + /// _#source_ for tokens that do not have a source. + /// + public static let EMPTY = TokenSourceAndStream() + + public weak var tokenSource: TokenSource? + public weak var stream: CharStream? + + public init(_ tokenSource: TokenSource? = nil, _ stream: CharStream? = nil) { + self.tokenSource = tokenSource + self.stream = stream + } +} diff --git a/runtime/Swift/Sources/Antlr4/tree/pattern/RuleTagToken.swift b/runtime/Swift/Sources/Antlr4/tree/pattern/RuleTagToken.swift index d01c4aafa..038efa7fa 100644 --- a/runtime/Swift/Sources/Antlr4/tree/pattern/RuleTagToken.swift +++ b/runtime/Swift/Sources/Antlr4/tree/pattern/RuleTagToken.swift @@ -149,14 +149,18 @@ public class RuleTagToken: Token, CustomStringConvertible { return nil } - /// + /// /// The implementation for _org.antlr.v4.runtime.tree.pattern.RuleTagToken_ always returns `null`. /// public func getInputStream() -> CharStream? { return nil } - /// + public func getTokenSourceAndStream() -> TokenSourceAndStream { + return TokenSourceAndStream.EMPTY + } + + /// /// The implementation for _org.antlr.v4.runtime.tree.pattern.RuleTagToken_ returns a string of the form /// `ruleName:bypassTokenType`. ///