got fast prediction termination with exactAmbig option that forces ANTLR to scan ahead until it identifies the true ambiguity.

This commit is contained in:
Terence Parr 2012-10-13 18:12:33 -07:00
parent ec32b71761
commit e7ece0e90a
2 changed files with 87 additions and 41 deletions

View File

@ -52,6 +52,7 @@ import java.util.BitSet;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
@ -299,6 +300,16 @@ public class ParserATNSimulator extends ATNSimulator {
/** Do only local context prediction (SLL(k) style). */
protected boolean SLL = false;
/** Tell the full LL prediction algorithm to pursue lookahead until
* it has uniquely predicted alternative without conflict or it's
* certain that it's found and ambiguous input sequence. For speed
* reasons, we terminate the prediction process early when this
* variable is false. When true, the prediction process will
* continue looking for the exact ambiguous sequence even if
* it has already figured out which alternative to predict.
*/
protected boolean exactAmbig = false;
/** Each prediction operation uses a cache for merge of prediction contexts.
* Don't keep around as it wastes huge amounts of memory. DoubleKeyMap
* isn't synchronized but we're ok since two threads shouldn't reuse same
@ -674,7 +685,7 @@ public class ParserATNSimulator extends ATNSimulator {
}
else {
// MORE THAN ONE VIABLE ALTERNATIVE
if ( allSubsetsConflict(altSubSets) ) {
if ( hasConflictingAltSet(altSubSets) && !hasStateAssociatedWithOneAlt(reach) ) {
D.configs.conflictingAlts = getConflictingAlts(reach);
if ( outerContext == ParserRuleContext.EMPTY || // in grammar start rule
!D.configs.dipsIntoOuterContext || // didn't fall out of rule
@ -810,7 +821,6 @@ public class ParserATNSimulator extends ATNSimulator {
}
Collection<BitSet> altSubSets = getConflictingAltSubsets(reach);
if ( debug ) {
System.out.println("LL altSubSets="+altSubSets+
", predict="+getUniqueAlt(altSubSets)+
@ -821,7 +831,17 @@ public class ParserATNSimulator extends ATNSimulator {
// System.out.println("altSubSets: "+altSubSets);
reach.uniqueAlt = getUniqueAlt(altSubSets);
if ( reach.uniqueAlt!=ATN.INVALID_ALT_NUMBER ) break;
if ( resolvesToJustOneViableAlt(altSubSets) ) break;
if ( exactAmbig ) {
if ( allSubsetsConflict(altSubSets) && allSubsetsEqual(altSubSets) ) {
break;
}
}
else if ( (hasConflictingAltSet(altSubSets) &&
!hasStateAssociatedWithOneAlt(reach)) ||
resolvesToJustOneViableAlt(altSubSets) )
{
break;
}
previous = reach;
input.consume();
t = input.LA(1);
@ -858,7 +878,11 @@ public class ParserATNSimulator extends ATNSimulator {
// We just can't say for sure that there is an ambiguity without
// looking further.
if ( /* TODO: len(all subsets)>1 or input consistent with a subset with len=1 */ true ) {
// if ( /* TODO: len(all subsets)>1 or input consistent with a subset with len=1 */ true ) {
// reportAmbiguity(dfa, D, startIndex, input.index(), getConflictingAlts(reach), reach);
// }
if ( exactAmbig ) {
reportAmbiguity(dfa, D, startIndex, input.index(), getConflictingAlts(reach), reach);
}
@ -1443,18 +1467,37 @@ public class ParserATNSimulator extends ATNSimulator {
return !hasNonConflictingAltSet(altsets);
}
/** return (there exists len(m)==1 for some m in altsets) */
/** return (there exists len(A_i)==1 for some A_i in altsets A) */
public boolean hasNonConflictingAltSet(Collection<BitSet> altsets) {
for (BitSet alts : altsets) {
if ( alts.cardinality()==1 ) { // more than 1 viable alt
// System.out.println("SLL go; found nonconflicting alt: "+alts);
return true; // use more lookahead
if ( alts.cardinality()==1 ) {
return true;
}
}
// System.out.println("SLL stop");
return false; // all sets conflict with len(viable_alts)>1, stop
return false;
}
/** return (there exists len(A_i)>1 for some A_i in altsets A) */
public boolean hasConflictingAltSet(Collection<BitSet> altsets) {
for (BitSet alts : altsets) {
if ( alts.cardinality()>1 ) {
return true;
}
}
return false;
}
public boolean allSubsetsEqual(Collection<BitSet> altsets) {
Iterator<BitSet> it = altsets.iterator();
BitSet first = it.next();
while ( it.hasNext() ) {
BitSet next = it.next();
if ( !next.equals(first) ) return false;
}
return true;
}
public int getUniqueAlt(Collection<BitSet> altsets) {
BitSet all = getAlts(altsets);
if ( all.cardinality()==1 ) return all.nextSetBit(0);
@ -1612,7 +1655,7 @@ public class ParserATNSimulator extends ATNSimulator {
}
/** Get the conflicting alt subsets from a configuration set.
* for c in configs:
* for c in configs:
* map[c] U= c.alt # map hash/equals uses s and x, not alt and not pred
*/
public Collection<BitSet> getConflictingAltSubsets(ATNConfigSet configs) {
@ -1628,6 +1671,31 @@ public class ParserATNSimulator extends ATNSimulator {
return configToAlts.values();
}
/** Get a map from state to alt subset from a configuration set.
* for c in configs:
* map[c.state] U= c.alt
*/
public Map<ATNState, BitSet> getStateToAltMap(ATNConfigSet configs) {
Map<ATNState, BitSet> m = new HashMap<ATNState, BitSet>();
for (ATNConfig c : configs) {
BitSet alts = m.get(c.state);
if ( alts==null ) {
alts = new BitSet();
m.put(c.state, alts);
}
alts.set(c.alt);
}
return m;
}
public boolean hasStateAssociatedWithOneAlt(ATNConfigSet configs) {
Map<ATNState, BitSet> x = getStateToAltMap(configs);
for (BitSet alts : x.values()) {
if ( alts.cardinality()==1 ) return true;
}
return false;
}
public boolean resolvesToJustOneViableAlt(Collection<BitSet> altsets) {
return !hasMoreThanOneViableAlt(altsets);
}
@ -1667,8 +1735,9 @@ public class ParserATNSimulator extends ATNSimulator {
8 -> (8,1,1), (8,2)
11 -> (11,1,4), (11,2,8), (11,1,8 1)
Walk and find state config lists with > 1 alt. If none, no conflict. return null. Here, states 11
and 8 have lists with both alts 1 and 2. Must check these config lists for conflicting configs.
Walk and find state config lists with > 1 alt. If none, no conflict.
return null. Here, states 11 and 8 have lists with both alts 1 and 2.
Must check these config lists for conflicting configs.
Sam pointed out a problem with the previous definition, v3, of
ambiguous states. If we have another state associated with conflicting
@ -1718,6 +1787,7 @@ public class ParserATNSimulator extends ATNSimulator {
TODO: now we know contexts are merged, can we optimize? Use big int -> config array?
*/
@Nullable
public IntervalSet getConflictingAlts_old(@NotNull ATNConfigSet configs) {
if ( debug ) System.out.println("### check ambiguous "+configs);
@ -2011,4 +2081,8 @@ public class ParserATNSimulator extends ATNSimulator {
public void setSLL(boolean SLL) {
this.SLL = SLL;
}
public void setExactAmbig(boolean exactAmbig) {
this.exactAmbig = exactAmbig;
}
}

View File

@ -1,28 +0,0 @@
package org.antlr.v4.test;
import org.antlr.runtime.RecognitionException;
import org.antlr.v4.Tool;
import org.antlr.v4.codegen.CodeGenerator;
import org.antlr.v4.tool.LexerGrammar;
import org.junit.Test;
import org.stringtemplate.v4.ST;
public class TestLexerAttributes extends BaseTest {
@Test
public void testSetType() throws RecognitionException {
LexerGrammar g = new LexerGrammar(
"lexer grammar T;\n" +
"A : 'a' {#$type=101;#} ;\n"
);
Tool antlr = new Tool();
antlr.process(g,false);
CodeGenerator gen = new CodeGenerator(g);
ST outputFileST = gen.generateLexer();
String output = outputFileST.render();
int start = output.indexOf('#');
int end = output.lastIndexOf('#');
String snippet = output.substring(start+1,end);
assertEquals("_type = 101;", snippet);
}
}