reworked how i assign label indexes

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6836]
This commit is contained in:
parrt 2010-05-03 16:14:20 -08:00
parent 9c03c08ba0
commit cf7a730b31
8 changed files with 321 additions and 307 deletions

View File

@ -13,22 +13,20 @@ public class NFA {
public byte[] code; public byte[] code;
public Map<String, Integer> ruleToAddr; public Map<String, Integer> ruleToAddr;
public int[] tokenTypeToAddr; public int[] tokenTypeToAddr;
public String[] labels; // TODO: need for actions. What is $label? public CommonToken[] labelValues;
public NFA(byte[] code, Map<String, Integer> ruleToAddr, int[] tokenTypeToAddr, public NFA(byte[] code, Map<String, Integer> ruleToAddr, int[] tokenTypeToAddr, int nLabels) {
String[] labels)
{
this.code = code; this.code = code;
this.ruleToAddr = ruleToAddr; this.ruleToAddr = ruleToAddr;
this.tokenTypeToAddr = tokenTypeToAddr; this.tokenTypeToAddr = tokenTypeToAddr;
this.labels = labels; labelValues = new CommonToken[nLabels];
} }
public int execThompson(CharStream input) { public int execThompson(CharStream input) {
return execThompson(input, 0, false, new CommonToken[labels.length]); return execThompson(input, 0, false);
} }
public int execThompson(CharStream input, int ip, boolean doActions, CommonToken[] labelValues) { public int execThompson(CharStream input, int ip, boolean doActions) {
int c = input.LA(1); int c = input.LA(1);
if ( c==Token.EOF ) return Token.EOF; if ( c==Token.EOF ) return Token.EOF;
@ -37,7 +35,6 @@ public class NFA {
ThreadState prevAccept = new ThreadState(Integer.MAX_VALUE, -1, NFAStack.EMPTY); ThreadState prevAccept = new ThreadState(Integer.MAX_VALUE, -1, NFAStack.EMPTY);
ThreadState firstAccept = null; ThreadState firstAccept = null;
// int maxAlts = closure.size(); // >= number of alts; if no decision, this is 1
int firstCharIndex = input.index(); // use when creating Token int firstCharIndex = input.index(); // use when creating Token
do { // while more work do { // while more work
@ -83,7 +80,6 @@ processOneChar:
case Bytecode.LABEL : case Bytecode.LABEL :
if ( doActions ) { if ( doActions ) {
int labelIndex = getShort(code, ip); int labelIndex = getShort(code, ip);
System.out.println("label "+labels[labelIndex]);
labelValues[labelIndex] = labelValues[labelIndex] =
new CommonToken(input, 0, 0, input.index(), -1); new CommonToken(input, 0, 0, input.index(), -1);
} }
@ -91,7 +87,6 @@ processOneChar:
case Bytecode.SAVE : case Bytecode.SAVE :
if ( doActions ) { if ( doActions ) {
int labelIndex = getShort(code, ip); int labelIndex = getShort(code, ip);
System.out.println("save "+labels[labelIndex]);
labelValues[labelIndex].setStopIndex(input.index()-1); labelValues[labelIndex].setStopIndex(input.index()-1);
} }
break; break;

View File

@ -26,13 +26,16 @@ public class NFABytecodeGenerator extends TreeParser {
public int ip = 0; // where to write next public int ip = 0; // where to write next
Map<String, Integer> ruleToAddr = new HashMap<String, Integer>(); Map<String, Integer> ruleToAddr = new HashMap<String, Integer>();
int[] tokenTypeToAddr; int[] tokenTypeToAddr;
List<String> labels = new ArrayList<String>();
public NFABytecodeGenerator(LexerGrammar lg, TreeNodeStream input) { Map<Rule, Map<String, Integer>> labels = new HashMap<Rule, Map<String, Integer>>();
super(input);
this.lg = lg; public Rule currentRule;
tokenTypeToAddr = new int[lg.getMaxTokenType()+1];
} /** labels in all rules share single label space
* but we still track labels per rule so we can translate $label
* to an index in an action.
*/
public int numLabels = 0;
public NFABytecodeGenerator(TreeNodeStream input, RecognizerSharedState state) { public NFABytecodeGenerator(TreeNodeStream input, RecognizerSharedState state) {
super(input, state); super(input, state);
@ -40,6 +43,7 @@ public class NFABytecodeGenerator extends TreeParser {
public void emit(Instr I) { public void emit(Instr I) {
I.addr = ip; I.addr = ip;
I.rule = currentRule;
ip += I.nBytes(); ip += I.nBytes();
instrs.add(I); instrs.add(I);
} }
@ -55,6 +59,7 @@ public class NFABytecodeGenerator extends TreeParser {
Instr last = instrs.get(instrs.size() - 1); Instr last = instrs.get(instrs.size() - 1);
int size = last.addr + last.nBytes(); int size = last.addr + last.nBytes();
byte[] code = new byte[size]; byte[] code = new byte[size];
// resolve CALL instruction targets and index labels before generating code // resolve CALL instruction targets and index labels before generating code
for (Instr I : instrs) { for (Instr I : instrs) {
if ( I instanceof CallInstr ) { if ( I instanceof CallInstr ) {
@ -64,12 +69,24 @@ public class NFABytecodeGenerator extends TreeParser {
} }
else if ( I instanceof LabelInstr ) { else if ( I instanceof LabelInstr ) {
LabelInstr L = (LabelInstr)I; LabelInstr L = (LabelInstr)I;
L.labelIndex = labels.size(); Map<String, Integer> ruleLabels = labels.get(I.rule);
labels.add(L.token.getText()); if ( ruleLabels==null ) {
ruleLabels = new HashMap<String, Integer>();
labels.put(I.rule, ruleLabels);
}
String labelName = L.token.getText();
if ( ruleLabels.get(labelName)!=null ) {
L.labelIndex = ruleLabels.get(labelName);
}
else {
ruleLabels.put(labelName, numLabels);
L.labelIndex = numLabels++;
}
} }
else if ( I instanceof SaveInstr ) { else if ( I instanceof SaveInstr ) {
SaveInstr S = (SaveInstr)I; SaveInstr S = (SaveInstr)I;
S.labelIndex = labels.size()-1; Map<String, Integer> ruleLabels = labels.get(I.rule);
S.labelIndex = ruleLabels.get(S.token.getText());
} }
} }
for (Instr I : instrs) { for (Instr I : instrs) {
@ -80,7 +97,9 @@ public class NFABytecodeGenerator extends TreeParser {
public static NFA getBytecode(LexerGrammar lg, String modeName) { public static NFA getBytecode(LexerGrammar lg, String modeName) {
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(); GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
NFABytecodeTriggers gen = new NFABytecodeTriggers(lg, null); NFABytecodeTriggers gen = new NFABytecodeTriggers(null);
gen.lg = lg;
gen.tokenTypeToAddr = new int[lg.getMaxTokenType()+1];
// add split for s0 to hook up rules (fill in operands as we gen rules) // add split for s0 to hook up rules (fill in operands as we gen rules)
int numRules = lg.modes.get(modeName).size(); int numRules = lg.modes.get(modeName).size();
@ -89,7 +108,9 @@ public class NFABytecodeGenerator extends TreeParser {
SplitInstr s0 = new SplitInstr(numRules - numFragmentRules); SplitInstr s0 = new SplitInstr(numRules - numFragmentRules);
gen.emit(s0); gen.emit(s0);
for (Rule r : lg.modes.get(modeName)) { // for each rule in mode for (Rule r : lg.modes.get(modeName)) { // for each rule in mode
gen.currentRule = r;
GrammarAST blk = (GrammarAST)r.ast.getFirstChildWithType(ANTLRParser.BLOCK); GrammarAST blk = (GrammarAST)r.ast.getFirstChildWithType(ANTLRParser.BLOCK);
CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk); CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
gen.setTreeNodeStream(nodes); gen.setTreeNodeStream(nodes);
@ -100,7 +121,7 @@ public class NFABytecodeGenerator extends TreeParser {
gen.tokenTypeToAddr[ttype] = gen.ip; gen.tokenTypeToAddr[ttype] = gen.ip;
} }
try { try {
((NFABytecodeTriggers)gen).block(); gen.block(); // GEN Instr OBJECTS
int ruleTokenType = lg.getTokenType(r.name); int ruleTokenType = lg.getTokenType(r.name);
if ( !r.isFragment() ) { if ( !r.isFragment() ) {
gen.emit(new AcceptInstr(ruleTokenType)); gen.emit(new AcceptInstr(ruleTokenType));
@ -117,7 +138,7 @@ public class NFABytecodeGenerator extends TreeParser {
System.out.println(Bytecode.disassemble(code)); System.out.println(Bytecode.disassemble(code));
System.out.println("rule addrs="+gen.ruleToAddr); System.out.println("rule addrs="+gen.ruleToAddr);
return new NFA(code, gen.ruleToAddr, gen.tokenTypeToAddr, gen.labels.toArray(new String[0])); return new NFA(code, gen.ruleToAddr, gen.tokenTypeToAddr, gen.numLabels);
} }
/** Write value at index into a byte array highest to lowest byte, /** Write value at index into a byte array highest to lowest byte,

View File

@ -17,22 +17,6 @@ import java.util.Map;
import java.util.HashMap; import java.util.HashMap;
} }
@members {
public NFABytecodeTriggers(LexerGrammar lg, TreeNodeStream input) {
super(lg, input);
}
}
/*
e1 | e2 | e3:
split 3, L1, L2, L3
L1: e1
jmp END
L2: e2
jmp END
L3: e3
END:
*/
block block
: ^( BLOCK (^(OPTIONS .+))? : ^( BLOCK (^(OPTIONS .+))?
{ {

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,11 @@
package org.antlr.v4.codegen.nfa; package org.antlr.v4.codegen.nfa;
import org.antlr.v4.tool.Rule;
/** */ /** */
public abstract class Instr { public abstract class Instr {
public int addr; public int addr;
public Rule rule;
public abstract short opcode(); public abstract short opcode();
public abstract int nBytes(); public abstract int nBytes();
public int charSize(int a, int b) { return Math.max(charSize(a), charSize(b)); } public int charSize(int a, int b) { return Math.max(charSize(a), charSize(b)); }

View File

@ -132,6 +132,7 @@ public class Rule implements AttributeResolver {
return refs; return refs;
} }
// TODO: called frequently; make it more efficient
public MultiMap<String, LabelElementPair> getLabelDefs() { public MultiMap<String, LabelElementPair> getLabelDefs() {
MultiMap<String, LabelElementPair> defs = MultiMap<String, LabelElementPair> defs =
new MultiMap<String, LabelElementPair>(); new MultiMap<String, LabelElementPair>();

View File

@ -125,6 +125,7 @@ public class TestNFABytecodeGeneration extends BaseTest {
} }
@Test public void testLabelIndexes() throws Exception { @Test public void testLabelIndexes() throws Exception {
// labels indexed from 0 in each rule
LexerGrammar g = new LexerGrammar( LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" + "lexer grammar L;\n" +
"A : a='a' ;\n" + "A : a='a' ;\n" +
@ -145,6 +146,23 @@ public class TestNFABytecodeGeneration extends BaseTest {
checkBytecode(g, expecting); checkBytecode(g, expecting);
} }
@Test public void testLabelReuseWithinRule() throws Exception {
// labels indexed from 0 in each rule
LexerGrammar g = new LexerGrammar(
"lexer grammar L;\n" +
"A : a='b' a='c' ;\n");
String expecting =
"0000:\tsplit 5\n" +
"0005:\tlabel 0\n" +
"0008:\tmatch8 'b'\n" +
"0010:\tsave 0\n" +
"0013:\tlabel 0\n" +
"0016:\tmatch8 'c'\n" +
"0018:\tsave 0\n" +
"0021:\taccept 4\n";
checkBytecode(g, expecting);
}
public void _template() throws Exception { public void _template() throws Exception {
LexerGrammar g = new LexerGrammar( LexerGrammar g = new LexerGrammar(
"\n"); "\n");

View File

@ -4,7 +4,6 @@ import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.Token; import org.antlr.runtime.Token;
import org.antlr.v4.Tool; import org.antlr.v4.Tool;
import org.antlr.v4.codegen.NFABytecodeGenerator; import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.CommonToken;
import org.antlr.v4.runtime.nfa.NFA; import org.antlr.v4.runtime.nfa.NFA;
import org.antlr.v4.semantics.SemanticPipeline; import org.antlr.v4.semantics.SemanticPipeline;
import org.antlr.v4.tool.Grammar; import org.antlr.v4.tool.Grammar;
@ -195,16 +194,15 @@ public class TestNFABytecodeInterp extends BaseTest {
NFA nfa = NFABytecodeGenerator.getBytecode(g, LexerGrammar.DEFAULT_MODE_NAME); NFA nfa = NFABytecodeGenerator.getBytecode(g, LexerGrammar.DEFAULT_MODE_NAME);
ANTLRStringStream in = new ANTLRStringStream(input); ANTLRStringStream in = new ANTLRStringStream(input);
List<Integer> tokenTypes = new ArrayList<Integer>(); List<Integer> tokenTypes = new ArrayList<Integer>();
CommonToken[] tokens = new CommonToken[nfa.labels.length];
int ttype = 0; int ttype = 0;
do { do {
ttype = nfa.execThompson(in, 0, true, tokens); ttype = nfa.execThompson(in, 0, true);
tokenTypes.add(ttype); tokenTypes.add(ttype);
} while ( ttype!= Token.EOF ); } while ( ttype!= Token.EOF );
assertEquals(expectingTokenTypes, tokenTypes); assertEquals(expectingTokenTypes, tokenTypes);
if ( expectingTokens!=null ) { if ( expectingTokens!=null ) {
assertEquals(expectingTokens, Arrays.toString(tokens)); assertEquals(expectingTokens, Arrays.toString(nfa.labelValues));
} }
} }
} }