Merge pull request #1505 from janyou/Fixes-1238-994

Swift target Fixes #1238, Fixes #994
2016-12-14 13:25:04 -08:00 · 2016-12-14 13:25:04 -08:00 · 8b75e3fc5b
parent 64a8c32138 c9508b74a4
commit 8b75e3fc5b
2 changed files with 175 additions and 5 deletions
--- a/runtime/Swift/Antlr4/org/antlr/v4/runtime/BufferedTokenStream.swift
+++ b/runtime/Swift/Antlr4/org/antlr/v4/runtime/BufferedTokenStream.swift
@ -473,8 +473,6 @@ public class BufferedTokenStream: TokenStream {


    public func getText() throws -> String {
-        try lazyInit()
-        try fill()
        return try getText(Interval.of(0, size() - 1))
    }

@ -485,7 +483,7 @@ public class BufferedTokenStream: TokenStream {
        if start < 0 || stop < 0 {
            return ""
        }
-        try lazyInit()
+        try fill()
        if stop >= tokens.count {
            stop = tokens.count - 1
        }
--- a/runtime/Swift/Antlr4/org/antlr/v4/runtime/atn/ParserATNSimulator.swift
+++ b/runtime/Swift/Antlr4/org/antlr/v4/runtime/atn/ParserATNSimulator.swift
@ -232,13 +232,20 @@
 * both SLL and LL parsing. Erroneous input will therefore require 2 passes over
 * the input.</p>
 */
+import Foundation

 open class ParserATNSimulator: ATNSimulator {
    public let debug: Bool = false
    public let debug_list_atn_decisions: Bool = false
    public let dfa_debug: Bool = false
    public let retry_debug: Bool = false
-
+    /** Just in case this optimization is bad, add an ENV variable to turn it off */
+    public static let TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT: Bool = {
+        if let value = ProcessInfo.processInfo.environment["TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT"] {
+            return NSString(string: value).boolValue
+        }
+        return false
+    }()
    internal final var parser: Parser

    public final var decisionToDFA: [DFA]
@ -1441,6 +1448,10 @@ open class ParserATNSimulator: ATNSimulator {
            }
            let length = p.getNumberOfTransitions()
            for i in 0..<length {
+                if i == 0 &&
+                    canDropLoopEntryEdgeInLeftRecursiveRule(config) {
+                    continue
+                }
                let t: Transition = p.transition(i)
                let continueCollecting: Bool =
                !(t is ActionTransition) && collectPredicates
@ -1506,7 +1517,168 @@ open class ParserATNSimulator: ATNSimulator {
            //print("That took: "+(finishTime-startTime)+ " ms");
    }

-
+    /** Implements first-edge (loop entry) elimination as an optimization
+     *  during closure operations.  See antlr/antlr4#1398.
+     *
+     * The optimization is to avoid adding the loop entry config when
+     * the exit path can only lead back to the same
+     * StarLoopEntryState after popping context at the rule end state
+     * (traversing only epsilon edges, so we're still in closure, in
+     * this same rule).
+     *
+     * We need to detect any state that can reach loop entry on
+     * epsilon w/o exiting rule. We don't have to look at FOLLOW
+     * links, just ensure that all stack tops for config refer to key
+     * states in LR rule.
+     *
+     * To verify we are in the right situation we must first check
+     * closure is at a StarLoopEntryState generated during LR removal.
+     * Then we check that each stack top of context is a return state
+     * from one of these cases:
+     *
+     *   1. 'not' expr, '(' type ')' expr. The return state points at loop entry state
+     *   2. expr op expr. The return state is the block end of internal block of (...)*
+     *   3. 'between' expr 'and' expr. The return state of 2nd expr reference.
+     *      That state points at block end of internal block of (...)*.
+     *   4. expr '?' expr ':' expr. The return state points at block end,
+     *      which points at loop entry state.
+     *
+     * If any is true for each stack top, then closure does not add a
+     * config to the current config set for edge[0], the loop entry branch.
+     *
+     *  Conditions fail if any context for the current config is:
+     *
+     *   a. empty (we'd fall out of expr to do a global FOLLOW which could
+     *      even be to some weird spot in expr) or,
+     *   b. lies outside of expr or,
+     *   c. lies within expr but at a state not the BlockEndState
+     *   generated during LR removal
+     *
+     * Do we need to evaluate predicates ever in closure for this case?
+     *
+     * No. Predicates, including precedence predicates, are only
+     * evaluated when computing a DFA start state. I.e., only before
+     * the lookahead (but not parser) consumes a token.
+     *
+     * There are no epsilon edges allowed in LR rule alt blocks or in
+     * the "primary" part (ID here). If closure is in
+     * StarLoopEntryState any lookahead operation will have consumed a
+     * token as there are no epsilon-paths that lead to
+     * StarLoopEntryState. We do not have to evaluate predicates
+     * therefore if we are in the generated StarLoopEntryState of a LR
+     * rule. Note that when making a prediction starting at that
+     * decision point, decision d=2, compute-start-state performs
+     * closure starting at edges[0], edges[1] emanating from
+     * StarLoopEntryState. That means it is not performing closure on
+     * StarLoopEntryState during compute-start-state.
+     *
+     * How do we know this always gives same prediction answer?
+     *
+     * Without predicates, loop entry and exit paths are ambiguous
+     * upon remaining input +b (in, say, a+b). Either paths lead to
+     * valid parses. Closure can lead to consuming + immediately or by
+     * falling out of this call to expr back into expr and loop back
+     * again to StarLoopEntryState to match +b. In this special case,
+     * we choose the more efficient path, which is to take the bypass
+     * path.
+     *
+     * The lookahead language has not changed because closure chooses
+     * one path over the other. Both paths lead to consuming the same
+     * remaining input during a lookahead operation. If the next token
+     * is an operator, lookahead will enter the choice block with
+     * operators. If it is not, lookahead will exit expr. Same as if
+     * closure had chosen to enter the choice block immediately.
+     *
+     * Closure is examining one config (some loopentrystate, some alt,
+     * context) which means it is considering exactly one alt. Closure
+     * always copies the same alt to any derived configs.
+     *
+     * How do we know this optimization doesn't mess up precedence in
+     * our parse trees?
+     *
+     * Looking through expr from left edge of stat only has to confirm
+     * that an input, say, a+b+c; begins with any valid interpretation
+     * of an expression. The precedence actually doesn't matter when
+     * making a decision in stat seeing through expr. It is only when
+     * parsing rule expr that we must use the precedence to get the
+     * right interpretation and, hence, parse tree.
+     *
+     * @since 4.6
+     */
+    internal func canDropLoopEntryEdgeInLeftRecursiveRule(_ config: ATNConfig) -> Bool {
+        if ParserATNSimulator.TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT {
+            return false
+        }
+        let p: ATNState = config.state
+        guard let configContext = config.context else {
+            return false
+        }
+        // First check to see if we are in StarLoopEntryState generated during
+        // left-recursion elimination. For efficiency, also check if
+        // the context has an empty stack case. If so, it would mean
+        // global FOLLOW so we can't perform optimization
+        if  p.getStateType() != ATNState.STAR_LOOP_ENTRY ||
+            !( (p as! StarLoopEntryState)).precedenceRuleDecision || // Are we the special loop entry/exit state?
+            configContext.isEmpty() || // If SLL wildcard
+            configContext.hasEmptyPath(){
+            return false
+        }
+        
+        // Require all return states to return back to the same rule
+        // that p is in.
+        let numCtxs: Int = configContext.size()
+        for  i in 0 ..< numCtxs { // for each stack context
+            let returnState: ATNState = atn.states[configContext.getReturnState(i)]!
+            if  returnState.ruleIndex != p.ruleIndex
+            {return false}
+        }
+        
+        let decisionStartState: BlockStartState =  (p.transition(0).target as! BlockStartState)
+        let blockEndStateNum: Int = decisionStartState.endState!.stateNumber
+        let blockEndState: BlockEndState =  (atn.states[blockEndStateNum] as! BlockEndState)
+        
+        // Verify that the top of each stack context leads to loop entry/exit
+        // state through epsilon edges and w/o leaving rule.
+        for  i in 0 ..< numCtxs { // for each stack context
+            let returnStateNumber: Int = configContext.getReturnState(i)
+            let returnState: ATNState = atn.states[returnStateNumber]!
+            // all states must have single outgoing epsilon edge
+            if  returnState.getNumberOfTransitions() != 1 || !returnState.transition(0).isEpsilon(){
+                return false
+            }
+            // Look for prefix op case like 'not expr', (' type ')' expr
+            let returnStateTarget: ATNState = returnState.transition(0).target
+            if  returnState.getStateType() == ATNState.BLOCK_END &&
+                returnStateTarget == p {
+                continue
+            }
+            // Look for 'expr op expr' or case where expr's return state is block end
+            // of (...)* internal block; the block end points to loop back
+            // which points to p but we don't need to check that
+            if  returnState == blockEndState{
+                continue
+            }
+            // Look for ternary expr ? expr : expr. The return state points at block end,
+            // which points at loop entry state
+            if  returnStateTarget == blockEndState{
+                continue
+            }
+            // Look for complex prefix 'between expr and expr' case where 2nd expr's
+            // return state points at block end state of (...)* internal block
+            if  returnStateTarget.getStateType() == ATNState.BLOCK_END &&
+                returnStateTarget.getNumberOfTransitions() == 1 &&
+                returnStateTarget.transition(0).isEpsilon() &&
+                returnStateTarget.transition(0).target == p{
+                continue
+            }
+            
+            // anything else ain't conforming
+            return false
+        }
+        
+        return true
+    }
+    
    open func getRuleName(_ index: Int) -> String {
        if index >= 0  {
            return parser.getRuleNames()[index]