Simplify and document non-greedy behavior in processAcceptConfigs
This commit is contained in:
parent
fb87e4c785
commit
1af9b4c338
|
@ -409,11 +409,39 @@ public class LexerATNSimulator extends ATNSimulator {
|
||||||
reach, prevAccept.config, prevAccept.index);
|
reach, prevAccept.config, prevAccept.index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Non-greedy handling works by removing all non-greedy configurations
|
||||||
|
* from reach when an accept state is reached for the same token. For
|
||||||
|
* example, consider the following two tokens:
|
||||||
|
*
|
||||||
|
* BLOCK : '{' .* '}';
|
||||||
|
* OPTIONAL_BLOCK : '{' .* '}' '?';
|
||||||
|
*
|
||||||
|
* With the following input:
|
||||||
|
*
|
||||||
|
* {stuff}?
|
||||||
|
*
|
||||||
|
* After matching '}', an accept state at the end of BLOCK is reached,
|
||||||
|
* so any configurations inside the non-greedy .* loop in BLOCK will be
|
||||||
|
* removed from reach. The configuration(s) inside the non-greedy .*
|
||||||
|
* loop in OPTIONAL_BLOCK are unaffected by this because no
|
||||||
|
* configuration is in an accept state for OPTIONAL_BLOCK at this input
|
||||||
|
* symbol.
|
||||||
|
*/
|
||||||
BitSet altsAtAcceptState = new BitSet();
|
BitSet altsAtAcceptState = new BitSet();
|
||||||
BitSet nonGreedyAlts = new BitSet();
|
BitSet nonGreedyAlts = new BitSet();
|
||||||
|
LexerATNConfig acceptConfig = null;
|
||||||
for (ATNConfig config : reach) {
|
for (ATNConfig config : reach) {
|
||||||
if (config.state instanceof RuleStopState) {
|
if (config.state instanceof RuleStopState) {
|
||||||
altsAtAcceptState.set(config.alt);
|
altsAtAcceptState.set(config.alt);
|
||||||
|
|
||||||
|
if ( debug ) {
|
||||||
|
System.out.format("processAcceptConfigs: hit accept config %s index %d\n",
|
||||||
|
config, input.index());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (acceptConfig == null) {
|
||||||
|
acceptConfig = (LexerATNConfig)config;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!((LexerATNConfig)config).isGreedy()) {
|
if (!((LexerATNConfig)config).isGreedy()) {
|
||||||
|
@ -423,38 +451,24 @@ public class LexerATNSimulator extends ATNSimulator {
|
||||||
}
|
}
|
||||||
|
|
||||||
nonGreedyAlts.and(altsAtAcceptState);
|
nonGreedyAlts.and(altsAtAcceptState);
|
||||||
|
// this is now "alts with at least one non-greedy config and one accept config"
|
||||||
if (!nonGreedyAlts.isEmpty()) {
|
if (!nonGreedyAlts.isEmpty()) {
|
||||||
reach.removeNonGreedyConfigsInAlts(nonGreedyAlts);
|
reach.removeNonGreedyConfigsInAlts(nonGreedyAlts);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int ci=0; ci<reach.size(); ci++) {
|
// mark the new preferred accept state
|
||||||
LexerATNConfig c = (LexerATNConfig)reach.get(ci);
|
if (acceptConfig != null && input.index() > prevAccept.index) {
|
||||||
if ( c.state instanceof RuleStopState) {
|
if ( debug ) {
|
||||||
if ( debug ) {
|
if ( prevAccept.index>=0 ) {
|
||||||
System.out.format("processAcceptConfigs: hit accept config %s index %d\n",
|
System.out.println("processAcceptConfigs: found longer token");
|
||||||
c, input.index());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int index = input.index();
|
|
||||||
if ( index > prevAccept.index ) {
|
|
||||||
if ( debug ) {
|
|
||||||
if ( prevAccept.index>=0 ) {
|
|
||||||
System.out.println("processAcceptConfigs: found longer token");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// condition > not >= will favor prev accept at same index.
|
|
||||||
// This way, "int" is keyword not ID if listed first.
|
|
||||||
traceAcceptState(c.alt);
|
|
||||||
if ( debug ) {
|
|
||||||
System.out.format("markExecSettings for %s @ index=%d, line %d:%d\n", c, index, prevAccept.line, prevAccept.charPos);
|
|
||||||
}
|
|
||||||
captureSimState(prevAccept, input, c);
|
|
||||||
}
|
|
||||||
|
|
||||||
// move to next char, looking for longer match
|
|
||||||
// (we continue processing if there are states in reach)
|
|
||||||
}
|
}
|
||||||
|
// condition > not >= will favor prev accept at same index.
|
||||||
|
// This way, "int" is keyword not ID if listed first.
|
||||||
|
traceAcceptState(acceptConfig.alt);
|
||||||
|
captureSimState(prevAccept, input, acceptConfig);
|
||||||
}
|
}
|
||||||
|
|
||||||
return reach;
|
return reach;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue