forked from jasder/antlr
Merge pull request #1505 from janyou/Fixes-1238-994
Swift target Fixes #1238, Fixes #994
This commit is contained in:
commit
8b75e3fc5b
|
@ -473,8 +473,6 @@ public class BufferedTokenStream: TokenStream {
|
||||||
|
|
||||||
|
|
||||||
public func getText() throws -> String {
|
public func getText() throws -> String {
|
||||||
try lazyInit()
|
|
||||||
try fill()
|
|
||||||
return try getText(Interval.of(0, size() - 1))
|
return try getText(Interval.of(0, size() - 1))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -485,7 +483,7 @@ public class BufferedTokenStream: TokenStream {
|
||||||
if start < 0 || stop < 0 {
|
if start < 0 || stop < 0 {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
try lazyInit()
|
try fill()
|
||||||
if stop >= tokens.count {
|
if stop >= tokens.count {
|
||||||
stop = tokens.count - 1
|
stop = tokens.count - 1
|
||||||
}
|
}
|
||||||
|
|
|
@ -232,13 +232,20 @@
|
||||||
* both SLL and LL parsing. Erroneous input will therefore require 2 passes over
|
* both SLL and LL parsing. Erroneous input will therefore require 2 passes over
|
||||||
* the input.</p>
|
* the input.</p>
|
||||||
*/
|
*/
|
||||||
|
import Foundation
|
||||||
|
|
||||||
open class ParserATNSimulator: ATNSimulator {
|
open class ParserATNSimulator: ATNSimulator {
|
||||||
public let debug: Bool = false
|
public let debug: Bool = false
|
||||||
public let debug_list_atn_decisions: Bool = false
|
public let debug_list_atn_decisions: Bool = false
|
||||||
public let dfa_debug: Bool = false
|
public let dfa_debug: Bool = false
|
||||||
public let retry_debug: Bool = false
|
public let retry_debug: Bool = false
|
||||||
|
/** Just in case this optimization is bad, add an ENV variable to turn it off */
|
||||||
|
public static let TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT: Bool = {
|
||||||
|
if let value = ProcessInfo.processInfo.environment["TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT"] {
|
||||||
|
return NSString(string: value).boolValue
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}()
|
||||||
internal final var parser: Parser
|
internal final var parser: Parser
|
||||||
|
|
||||||
public final var decisionToDFA: [DFA]
|
public final var decisionToDFA: [DFA]
|
||||||
|
@ -1441,6 +1448,10 @@ open class ParserATNSimulator: ATNSimulator {
|
||||||
}
|
}
|
||||||
let length = p.getNumberOfTransitions()
|
let length = p.getNumberOfTransitions()
|
||||||
for i in 0..<length {
|
for i in 0..<length {
|
||||||
|
if i == 0 &&
|
||||||
|
canDropLoopEntryEdgeInLeftRecursiveRule(config) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
let t: Transition = p.transition(i)
|
let t: Transition = p.transition(i)
|
||||||
let continueCollecting: Bool =
|
let continueCollecting: Bool =
|
||||||
!(t is ActionTransition) && collectPredicates
|
!(t is ActionTransition) && collectPredicates
|
||||||
|
@ -1506,6 +1517,167 @@ open class ParserATNSimulator: ATNSimulator {
|
||||||
//print("That took: "+(finishTime-startTime)+ " ms");
|
//print("That took: "+(finishTime-startTime)+ " ms");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Implements first-edge (loop entry) elimination as an optimization
|
||||||
|
* during closure operations. See antlr/antlr4#1398.
|
||||||
|
*
|
||||||
|
* The optimization is to avoid adding the loop entry config when
|
||||||
|
* the exit path can only lead back to the same
|
||||||
|
* StarLoopEntryState after popping context at the rule end state
|
||||||
|
* (traversing only epsilon edges, so we're still in closure, in
|
||||||
|
* this same rule).
|
||||||
|
*
|
||||||
|
* We need to detect any state that can reach loop entry on
|
||||||
|
* epsilon w/o exiting rule. We don't have to look at FOLLOW
|
||||||
|
* links, just ensure that all stack tops for config refer to key
|
||||||
|
* states in LR rule.
|
||||||
|
*
|
||||||
|
* To verify we are in the right situation we must first check
|
||||||
|
* closure is at a StarLoopEntryState generated during LR removal.
|
||||||
|
* Then we check that each stack top of context is a return state
|
||||||
|
* from one of these cases:
|
||||||
|
*
|
||||||
|
* 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state
|
||||||
|
* 2. expr op expr. The return state is the block end of internal block of (...)*
|
||||||
|
* 3. 'between' expr 'and' expr. The return state of 2nd expr reference.
|
||||||
|
* That state points at block end of internal block of (...)*.
|
||||||
|
* 4. expr '?' expr ':' expr. The return state points at block end,
|
||||||
|
* which points at loop entry state.
|
||||||
|
*
|
||||||
|
* If any is true for each stack top, then closure does not add a
|
||||||
|
* config to the current config set for edge[0], the loop entry branch.
|
||||||
|
*
|
||||||
|
* Conditions fail if any context for the current config is:
|
||||||
|
*
|
||||||
|
* a. empty (we'd fall out of expr to do a global FOLLOW which could
|
||||||
|
* even be to some weird spot in expr) or,
|
||||||
|
* b. lies outside of expr or,
|
||||||
|
* c. lies within expr but at a state not the BlockEndState
|
||||||
|
* generated during LR removal
|
||||||
|
*
|
||||||
|
* Do we need to evaluate predicates ever in closure for this case?
|
||||||
|
*
|
||||||
|
* No. Predicates, including precedence predicates, are only
|
||||||
|
* evaluated when computing a DFA start state. I.e., only before
|
||||||
|
* the lookahead (but not parser) consumes a token.
|
||||||
|
*
|
||||||
|
* There are no epsilon edges allowed in LR rule alt blocks or in
|
||||||
|
* the "primary" part (ID here). If closure is in
|
||||||
|
* StarLoopEntryState any lookahead operation will have consumed a
|
||||||
|
* token as there are no epsilon-paths that lead to
|
||||||
|
* StarLoopEntryState. We do not have to evaluate predicates
|
||||||
|
* therefore if we are in the generated StarLoopEntryState of a LR
|
||||||
|
* rule. Note that when making a prediction starting at that
|
||||||
|
* decision point, decision d=2, compute-start-state performs
|
||||||
|
* closure starting at edges[0], edges[1] emanating from
|
||||||
|
* StarLoopEntryState. That means it is not performing closure on
|
||||||
|
* StarLoopEntryState during compute-start-state.
|
||||||
|
*
|
||||||
|
* How do we know this always gives same prediction answer?
|
||||||
|
*
|
||||||
|
* Without predicates, loop entry and exit paths are ambiguous
|
||||||
|
* upon remaining input +b (in, say, a+b). Either paths lead to
|
||||||
|
* valid parses. Closure can lead to consuming + immediately or by
|
||||||
|
* falling out of this call to expr back into expr and loop back
|
||||||
|
* again to StarLoopEntryState to match +b. In this special case,
|
||||||
|
* we choose the more efficient path, which is to take the bypass
|
||||||
|
* path.
|
||||||
|
*
|
||||||
|
* The lookahead language has not changed because closure chooses
|
||||||
|
* one path over the other. Both paths lead to consuming the same
|
||||||
|
* remaining input during a lookahead operation. If the next token
|
||||||
|
* is an operator, lookahead will enter the choice block with
|
||||||
|
* operators. If it is not, lookahead will exit expr. Same as if
|
||||||
|
* closure had chosen to enter the choice block immediately.
|
||||||
|
*
|
||||||
|
* Closure is examining one config (some loopentrystate, some alt,
|
||||||
|
* context) which means it is considering exactly one alt. Closure
|
||||||
|
* always copies the same alt to any derived configs.
|
||||||
|
*
|
||||||
|
* How do we know this optimization doesn't mess up precedence in
|
||||||
|
* our parse trees?
|
||||||
|
*
|
||||||
|
* Looking through expr from left edge of stat only has to confirm
|
||||||
|
* that an input, say, a+b+c; begins with any valid interpretation
|
||||||
|
* of an expression. The precedence actually doesn't matter when
|
||||||
|
* making a decision in stat seeing through expr. It is only when
|
||||||
|
* parsing rule expr that we must use the precedence to get the
|
||||||
|
* right interpretation and, hence, parse tree.
|
||||||
|
*
|
||||||
|
* @since 4.6
|
||||||
|
*/
|
||||||
|
internal func canDropLoopEntryEdgeInLeftRecursiveRule(_ config: ATNConfig) -> Bool {
|
||||||
|
if ParserATNSimulator.TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
let p: ATNState = config.state
|
||||||
|
guard let configContext = config.context else {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
// First check to see if we are in StarLoopEntryState generated during
|
||||||
|
// left-recursion elimination. For efficiency, also check if
|
||||||
|
// the context has an empty stack case. If so, it would mean
|
||||||
|
// global FOLLOW so we can't perform optimization
|
||||||
|
if p.getStateType() != ATNState.STAR_LOOP_ENTRY ||
|
||||||
|
!( (p as! StarLoopEntryState)).precedenceRuleDecision || // Are we the special loop entry/exit state?
|
||||||
|
configContext.isEmpty() || // If SLL wildcard
|
||||||
|
configContext.hasEmptyPath(){
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Require all return states to return back to the same rule
|
||||||
|
// that p is in.
|
||||||
|
let numCtxs: Int = configContext.size()
|
||||||
|
for i in 0 ..< numCtxs { // for each stack context
|
||||||
|
let returnState: ATNState = atn.states[configContext.getReturnState(i)]!
|
||||||
|
if returnState.ruleIndex != p.ruleIndex
|
||||||
|
{return false}
|
||||||
|
}
|
||||||
|
|
||||||
|
let decisionStartState: BlockStartState = (p.transition(0).target as! BlockStartState)
|
||||||
|
let blockEndStateNum: Int = decisionStartState.endState!.stateNumber
|
||||||
|
let blockEndState: BlockEndState = (atn.states[blockEndStateNum] as! BlockEndState)
|
||||||
|
|
||||||
|
// Verify that the top of each stack context leads to loop entry/exit
|
||||||
|
// state through epsilon edges and w/o leaving rule.
|
||||||
|
for i in 0 ..< numCtxs { // for each stack context
|
||||||
|
let returnStateNumber: Int = configContext.getReturnState(i)
|
||||||
|
let returnState: ATNState = atn.states[returnStateNumber]!
|
||||||
|
// all states must have single outgoing epsilon edge
|
||||||
|
if returnState.getNumberOfTransitions() != 1 || !returnState.transition(0).isEpsilon(){
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
// Look for prefix op case like 'not expr', (' type ')' expr
|
||||||
|
let returnStateTarget: ATNState = returnState.transition(0).target
|
||||||
|
if returnState.getStateType() == ATNState.BLOCK_END &&
|
||||||
|
returnStateTarget == p {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Look for 'expr op expr' or case where expr's return state is block end
|
||||||
|
// of (...)* internal block; the block end points to loop back
|
||||||
|
// which points to p but we don't need to check that
|
||||||
|
if returnState == blockEndState{
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Look for ternary expr ? expr : expr. The return state points at block end,
|
||||||
|
// which points at loop entry state
|
||||||
|
if returnStateTarget == blockEndState{
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Look for complex prefix 'between expr and expr' case where 2nd expr's
|
||||||
|
// return state points at block end state of (...)* internal block
|
||||||
|
if returnStateTarget.getStateType() == ATNState.BLOCK_END &&
|
||||||
|
returnStateTarget.getNumberOfTransitions() == 1 &&
|
||||||
|
returnStateTarget.transition(0).isEpsilon() &&
|
||||||
|
returnStateTarget.transition(0).target == p{
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// anything else ain't conforming
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
open func getRuleName(_ index: Int) -> String {
|
open func getRuleName(_ index: Int) -> String {
|
||||||
if index >= 0 {
|
if index >= 0 {
|
||||||
|
|
Loading…
Reference in New Issue