reworked how i assign label indexes

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6836]
This commit is contained in:
parrt 2010-05-03 16:14:20 -08:00
parent 9c03c08ba0
commit cf7a730b31
8 changed files with 321 additions and 307 deletions

View File

@ -13,22 +13,20 @@ public class NFA {
public byte[] code;
public Map<String, Integer> ruleToAddr;
public int[] tokenTypeToAddr;
public String[] labels; // TODO: need for actions. What is $label?
public CommonToken[] labelValues;
public NFA(byte[] code, Map<String, Integer> ruleToAddr, int[] tokenTypeToAddr,
String[] labels)
{
public NFA(byte[] code, Map<String, Integer> ruleToAddr, int[] tokenTypeToAddr, int nLabels) {
this.code = code;
this.ruleToAddr = ruleToAddr;
this.tokenTypeToAddr = tokenTypeToAddr;
this.labels = labels;
labelValues = new CommonToken[nLabels];
}
public int execThompson(CharStream input) {
return execThompson(input, 0, false, new CommonToken[labels.length]);
return execThompson(input, 0, false);
}
public int execThompson(CharStream input, int ip, boolean doActions, CommonToken[] labelValues) {
public int execThompson(CharStream input, int ip, boolean doActions) {
int c = input.LA(1);
if ( c==Token.EOF ) return Token.EOF;
@ -37,7 +35,6 @@ public class NFA {
ThreadState prevAccept = new ThreadState(Integer.MAX_VALUE, -1, NFAStack.EMPTY);
ThreadState firstAccept = null;
// int maxAlts = closure.size(); // >= number of alts; if no decision, this is 1
int firstCharIndex = input.index(); // use when creating Token
do { // while more work
@ -83,7 +80,6 @@ processOneChar:
case Bytecode.LABEL :
if ( doActions ) {
int labelIndex = getShort(code, ip);
System.out.println("label "+labels[labelIndex]);
labelValues[labelIndex] =
new CommonToken(input, 0, 0, input.index(), -1);
}
@ -91,7 +87,6 @@ processOneChar:
case Bytecode.SAVE :
if ( doActions ) {
int labelIndex = getShort(code, ip);
System.out.println("save "+labels[labelIndex]);
labelValues[labelIndex].setStopIndex(input.index()-1);
}
break;

View File

@ -26,13 +26,16 @@ public class NFABytecodeGenerator extends TreeParser {
public int ip = 0; // where to write next
Map<String, Integer> ruleToAddr = new HashMap<String, Integer>();
int[] tokenTypeToAddr;
List<String> labels = new ArrayList<String>();
public NFABytecodeGenerator(LexerGrammar lg, TreeNodeStream input) {
super(input);
this.lg = lg;
tokenTypeToAddr = new int[lg.getMaxTokenType()+1];
}
Map<Rule, Map<String, Integer>> labels = new HashMap<Rule, Map<String, Integer>>();
public Rule currentRule;
/** labels in all rules share single label space
* but we still track labels per rule so we can translate $label
* to an index in an action.
*/
public int numLabels = 0;
public NFABytecodeGenerator(TreeNodeStream input, RecognizerSharedState state) {
super(input, state);
@ -40,6 +43,7 @@ public class NFABytecodeGenerator extends TreeParser {
public void emit(Instr I) {
I.addr = ip;
I.rule = currentRule;
ip += I.nBytes();
instrs.add(I);
}
@ -55,6 +59,7 @@ public class NFABytecodeGenerator extends TreeParser {
Instr last = instrs.get(instrs.size() - 1);
int size = last.addr + last.nBytes();
byte[] code = new byte[size];
// resolve CALL instruction targets and index labels before generating code
for (Instr I : instrs) {
if ( I instanceof CallInstr ) {
@ -64,12 +69,24 @@ public class NFABytecodeGenerator extends TreeParser {
}
else if ( I instanceof LabelInstr ) {
LabelInstr L = (LabelInstr)I;
L.labelIndex = labels.size();
labels.add(L.token.getText());
Map<String, Integer> ruleLabels = labels.get(I.rule);
if ( ruleLabels==null ) {
ruleLabels = new HashMap<String, Integer>();
labels.put(I.rule, ruleLabels);
}
String labelName = L.token.getText();
if ( ruleLabels.get(labelName)!=null ) {
L.labelIndex = ruleLabels.get(labelName);
}
else {
ruleLabels.put(labelName, numLabels);
L.labelIndex = numLabels++;
}
}
else if ( I instanceof SaveInstr ) {
SaveInstr S = (SaveInstr)I;
S.labelIndex = labels.size()-1;
Map<String, Integer> ruleLabels = labels.get(I.rule);
S.labelIndex = ruleLabels.get(S.token.getText());
}
}
for (Instr I : instrs) {
@ -80,7 +97,9 @@ public class NFABytecodeGenerator extends TreeParser {
public static NFA getBytecode(LexerGrammar lg, String modeName) {
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
NFABytecodeTriggers gen = new NFABytecodeTriggers(lg, null);
NFABytecodeTriggers gen = new NFABytecodeTriggers(null);
gen.lg = lg;
gen.tokenTypeToAddr = new int[lg.getMaxTokenType()+1];
// add split for s0 to hook up rules (fill in operands as we gen rules)
int numRules = lg.modes.get(modeName).size();
@ -89,7 +108,9 @@ public class NFABytecodeGenerator extends TreeParser {
SplitInstr s0 = new SplitInstr(numRules - numFragmentRules);
gen.emit(s0);
for (Rule r : lg.modes.get(modeName)) { // for each rule in mode
gen.currentRule = r;
GrammarAST blk = (GrammarAST)r.ast.getFirstChildWithType(ANTLRParser.BLOCK);
CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
gen.setTreeNodeStream(nodes);
@ -100,7 +121,7 @@ public class NFABytecodeGenerator extends TreeParser {
gen.tokenTypeToAddr[ttype] = gen.ip;
}
try {
((NFABytecodeTriggers)gen).block();
gen.block(); // GEN Instr OBJECTS
int ruleTokenType = lg.getTokenType(r.name);
if ( !r.isFragment() ) {
gen.emit(new AcceptInstr(ruleTokenType));
@ -117,7 +138,7 @@ public class NFABytecodeGenerator extends TreeParser {
System.out.println(Bytecode.disassemble(code));
System.out.println("rule addrs="+gen.ruleToAddr);
return new NFA(code, gen.ruleToAddr, gen.tokenTypeToAddr, gen.labels.toArray(new String[0]));
return new NFA(code, gen.ruleToAddr, gen.tokenTypeToAddr, gen.numLabels);
}
/** Write value at index into a byte array highest to lowest byte,

View File

@ -17,22 +17,6 @@ import java.util.Map;
import java.util.HashMap;
}
@members {
public NFABytecodeTriggers(LexerGrammar lg, TreeNodeStream input) {
super(lg, input);
}
}
/*
e1 | e2 | e3:
split 3, L1, L2, L3
L1: e1
jmp END
L2: e2
jmp END
L3: e3
END:
*/
block
: ^( BLOCK (^(OPTIONS .+))?
{

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,11 @@
package org.antlr.v4.codegen.nfa;
import org.antlr.v4.tool.Rule;
/** */
public abstract class Instr {
public int addr;
public Rule rule;
public abstract short opcode();
public abstract int nBytes();
public int charSize(int a, int b) { return Math.max(charSize(a), charSize(b)); }

View File

@ -132,6 +132,7 @@ public class Rule implements AttributeResolver {
return refs;
}
// TODO: called frequently; make it more efficient
public MultiMap<String, LabelElementPair> getLabelDefs() {
MultiMap<String, LabelElementPair> defs =
new MultiMap<String, LabelElementPair>();

View File

@ -125,6 +125,7 @@ public class TestNFABytecodeGeneration extends BaseTest {
}
@Test public void testLabelIndexes() throws Exception {
// labels indexed from 0 in each rule
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" +
"A : a='a' ;\n" +
@ -145,6 +146,23 @@ public class TestNFABytecodeGeneration extends BaseTest {
checkBytecode(g, expecting);
}
@Test public void testLabelReuseWithinRule() throws Exception {
// labels indexed from 0 in each rule
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" +
"A : a='b' a='c' ;\n");
String expecting =
"0000:\tsplit 5\n" +
"0005:\tlabel 0\n" +
"0008:\tmatch8 'b'\n" +
"0010:\tsave 0\n" +
"0013:\tlabel 0\n" +
"0016:\tmatch8 'c'\n" +
"0018:\tsave 0\n" +
"0021:\taccept 4\n";
checkBytecode(g, expecting);
}
public void _template() throws Exception {
LexerGrammar g = new LexerGrammar(
"\n");

View File

@ -4,7 +4,6 @@ import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.Token;
import org.antlr.v4.Tool;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.CommonToken;
import org.antlr.v4.runtime.nfa.NFA;
import org.antlr.v4.semantics.SemanticPipeline;
import org.antlr.v4.tool.Grammar;
@ -195,16 +194,15 @@ public class TestNFABytecodeInterp extends BaseTest {
NFA nfa = NFABytecodeGenerator.getBytecode(g, LexerGrammar.DEFAULT_MODE_NAME);
ANTLRStringStream in = new ANTLRStringStream(input);
List<Integer> tokenTypes = new ArrayList<Integer>();
CommonToken[] tokens = new CommonToken[nfa.labels.length];
int ttype = 0;
do {
ttype = nfa.execThompson(in, 0, true, tokens);
ttype = nfa.execThompson(in, 0, true);
tokenTypes.add(ttype);
} while ( ttype!= Token.EOF );
assertEquals(expectingTokenTypes, tokenTypes);
if ( expectingTokens!=null ) {
assertEquals(expectingTokens, Arrays.toString(tokens));
assertEquals(expectingTokens, Arrays.toString(nfa.labelValues));
}
}
}