got basic lexers generated

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6871]
This commit is contained in:
parrt 2010-05-16 15:00:56 -08:00
parent 563de77ec4
commit f5cc4efc1b
28 changed files with 169 additions and 128 deletions

View File

@ -46,10 +46,6 @@ public abstract class BaseRecognizer {
public static final int MEMO_RULE_UNKNOWN = -1;
public static final int INITIAL_FOLLOW_STACK_SIZE = 100;
// copies from Token object for convenience in actions
public static final int DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL;
public static final int HIDDEN = Token.HIDDEN_CHANNEL;
public static final String NEXT_TOKEN_RULE_NAME = "nextToken";
public IntStream input;
@ -731,7 +727,7 @@ public abstract class BaseRecognizer {
int i = 0;
for (i=stack.length-1; i>=0; i--) {
StackTraceElement t = stack[i];
if ( t.getClassName().startsWith("org.antlr.runtime.") ) {
if ( t.getClassName().startsWith("org.antlr.v4.runtime.") ) {
continue; // skip support code such as this method
}
if ( t.getMethodName().equals(NEXT_TOKEN_RULE_NAME) ) {

View File

@ -27,7 +27,9 @@
*/
package org.antlr.v4.runtime;
import org.antlr.runtime.*;
import org.antlr.runtime.CharStream;
import org.antlr.runtime.Token;
import org.antlr.runtime.TokenSource;
/** A lexer is recognizer that draws input symbols from a character stream.
* lexer grammars result in a subclass of this object. A Lexer object
@ -88,7 +90,7 @@ public abstract class Lexer extends BaseRecognizer implements TokenSource {
return eof;
}
try {
mTokens();
_nextToken();
if ( state.token==null ) {
emit();
}
@ -119,7 +121,7 @@ public abstract class Lexer extends BaseRecognizer implements TokenSource {
}
/** This is the lexer entry point that sets instance var 'token' */
public abstract void mTokens() throws RecognitionException;
public abstract void _nextToken() throws RecognitionException;
/** Set the char stream and reset the lexer */
public void setCharStream(CharStream input) {
@ -163,61 +165,6 @@ public abstract class Lexer extends BaseRecognizer implements TokenSource {
return t;
}
public void match(String s) throws MismatchedTokenException {
int i = 0;
while ( i<s.length() ) {
if ( input.LA(1)!=s.charAt(i) ) {
if ( state.backtracking>0 ) {
state.failed = true;
return;
}
MismatchedTokenException mte =
new MismatchedTokenException(s.charAt(i), input);
recover(mte);
throw mte;
}
i++;
input.consume();
state.failed = false;
}
}
public void matchAny() {
input.consume();
}
public void match(int c) throws MismatchedTokenException {
if ( input.LA(1)!=c ) {
if ( state.backtracking>0 ) {
state.failed = true;
return;
}
MismatchedTokenException mte =
new MismatchedTokenException(c, input);
recover(mte); // don't really recover; just consume in lexer
throw mte;
}
input.consume();
state.failed = false;
}
public void matchRange(int a, int b)
throws MismatchedRangeException
{
if ( input.LA(1)<a || input.LA(1)>b ) {
if ( state.backtracking>0 ) {
state.failed = true;
return;
}
MismatchedRangeException mre =
new MismatchedRangeException(a,b,input);
recover(mre);
throw mre;
}
input.consume();
state.failed = false;
}
public int getLine() {
return input.getLine();
}

View File

@ -75,7 +75,7 @@ public class Parser extends BaseRecognizer {
}
t.line = current.getLine();
t.charPositionInLine = current.getCharPositionInLine();
t.channel = DEFAULT_TOKEN_CHANNEL;
t.channel = Token.DEFAULT_CHANNEL;
return t;
}

View File

@ -33,12 +33,13 @@ public class LABitSet {
/** Construction from a static array of longs */
public LABitSet(long[] bits_) {
bits = bits_;
if ( bits_==null || bits_.length==0 ) bits = new long[1];
else bits = bits_;
}
/** Construction from a static array of longs */
public LABitSet(long[] bits_, boolean EOF) {
bits = bits_;
this(bits_);
this.EOF = EOF;
}

View File

@ -1,4 +1,4 @@
package org.antlr.v4.runtime.nfa;
package org.antlr.v4.runtime.pda;
import java.util.ArrayList;
import java.util.List;

View File

@ -1,4 +1,4 @@
package org.antlr.v4.runtime.nfa;
package org.antlr.v4.runtime.pda;
/** Identical to ANTLR's static grammar analysis NFAContext object */
public class NFAStack {

View File

@ -1,4 +1,4 @@
package org.antlr.v4.runtime.nfa;
package org.antlr.v4.runtime.pda;
import org.antlr.runtime.CharStream;
import org.antlr.runtime.Token;
@ -9,8 +9,11 @@ import java.util.Arrays;
import java.util.List;
import java.util.Map;
/** http://swtch.com/~rsc/regexp/regexp2.html */
public class NFA {
/** A (nondeterministic) pushdown bytecode machine for lexing and LL prediction.
* Derived partially from Cox' description of Thompson's 1960s work:
* http://swtch.com/~rsc/regexp/regexp2.html
*/
public class PDA {
public byte[] code;
public Map<String, Integer> ruleToAddr;
public int[] tokenTypeToAddr;
@ -20,8 +23,9 @@ public class NFA {
/** If we hit an action, we'll have to rewind and do the winning rule again */
boolean bypassedAction;
public PDA() {;}
public NFA(byte[] code, Map<String, Integer> ruleToAddr, int[] tokenTypeToAddr, int nLabels) {
public PDA(byte[] code, Map<String, Integer> ruleToAddr, int[] tokenTypeToAddr, int nLabels) {
this.code = code;
this.ruleToAddr = ruleToAddr;
this.tokenTypeToAddr = tokenTypeToAddr;
@ -29,6 +33,14 @@ public class NFA {
labelValues = new CommonToken[nLabels];
}
public PDA(byte[] code, int[] tokenTypeToAddr, int nLabels) {
System.out.println("code="+Arrays.toString(code));
this.code = code;
this.tokenTypeToAddr = tokenTypeToAddr;
this.nLabels = nLabels;
labelValues = new CommonToken[nLabels];
}
public int execThompson(CharStream input) {
int m = input.mark();
Arrays.fill(labelValues, null);

View File

@ -1,4 +1,4 @@
package org.antlr.v4.runtime.nfa;
package org.antlr.v4.runtime.pda;
/** NFA simulation thread state */
public class ThreadState {

View File

@ -11,6 +11,7 @@ javaTypeInitMap ::= [
]
// args must be <object-model-object>, <fields-resulting-in-STs>
ParserFile(file, parser, dfaDecls, bitSetDecls) ::= <<
// $ANTLR ANTLRVersion> <file.fileName> generatedTimestamp>
import org.antlr.v4.runtime.Parser;
@ -215,6 +216,58 @@ BitSetDecl(b) ::= <<
public static final LABitSet <b.name>=new LABitSet(new long[]{<b.fset.bits:{<it>L};separator=",">}<if(b.fset.EOF)>, true<endif>);
>>
LexerFile(fileName, lexer) ::= <<
// $ANTLR ANTLRVersion> <fileName> generatedTimestamp>
import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.RecognizerSharedState;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.pda.*;
import org.antlr.v4.runtime.misc.*;
import org.antlr.runtime.*;
<lexer>
>>
Lexer(lexerName,modes,pdas) ::= <<
public class <lexerName> extends Lexer {
public static enum Mode { <modes:{m|<m>(<i0>)}; separator=", ">; int mode; Mode(int m) {mode=m;\}}
public Mode _mode = Mode.DEFAULT_MODE;
public <lexerName>() {;}
public <lexerName>(CharStream input) {
this(input, new RecognizerSharedState());
}
public <lexerName>(CharStream input, RecognizerSharedState state) {
super(input,state);
}
public String getGrammarFileName() { return "<fileName>"; }
public void _nextToken() throws RecognitionException {
state.type = modeToPDA[_mode.ordinal()].execThompson(input);
}
<pdas>
public static PDA[] modeToPDA = { <modes:{m | new <m>_PDA()}; separator=", "> };
}
>>
PDA(name, model) ::= <<
public static final byte[] <name>_code = {
<model.code; separator=", ">
};
public static final int[] <name>_tokenTypeToAddr = {
<model.tokenTypeToAddr; separator=", ">
};
public static final class <name>_PDA extends PDA {
<!byte[] code, Map<String, Integer> ruleToAddr, int[] tokenTypeToAddr, int nLabels!>
public <name>_PDA() {
super(<name>_code, <name>_tokenTypeToAddr, <model.nLabels>);
}
}
>>
/** Using a type to init value map, try to init a type; if not in table
* must be an object, default value is "null".
*/

View File

@ -1,10 +1,7 @@
package org.antlr.v4.codegen;
import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.Token;
import org.antlr.v4.runtime.nfa.NFA;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.LexerGrammar;
import org.stringtemplate.v4.ST;
public class CodeGenPipeline {
Grammar g;
@ -12,27 +9,24 @@ public class CodeGenPipeline {
this.g = g;
}
public void process() {
if ( g.isLexer() ) processLexer();
else if ( g.isParser() ) processParser();
CodeGenerator gen = new CodeGenerator(g);
ST outputFileST = gen.generate();
gen.write(outputFileST);
// if ( g.isLexer() ) processLexer();
// else if ( g.isParser() ) processParser();
}
void processParser() {
CodeGenerator gen = new CodeGenerator(g);
gen.write();
ST outputFileST = gen.generate();
gen.write(outputFileST);
}
void processLexer() {
LexerGrammar lg = (LexerGrammar)g;
for (String modeName : lg.modes.keySet()) { // for each mode
NFA nfa = NFABytecodeGenerator.getBytecode(lg, modeName);
//ANTLRStringStream input = new ANTLRStringStream("32");
ANTLRStringStream input = new ANTLRStringStream("/*x*/!ab");
//ANTLRStringStream input = new ANTLRStringStream("abc32ab");
int ttype = 0;
while ( ttype!= Token.EOF ) {
ttype = nfa.execThompson(input);
System.out.println("ttype="+ttype);
}
}
CodeGenerator gen = new CodeGenerator(g);
ST outputFileST = gen.generate();
gen.write(outputFileST);
}
}

View File

@ -67,15 +67,23 @@ public class CodeGenerator {
// }
}
public void write() {
OutputModelFactory factory = null;
if ( g.isParser() ) factory = new ParserFactory(this);
// ...
OutputModelObject outputModel = factory.buildOutputModel();
public ST generate() {
ST outputFileST = null;
if ( g.isParser() || g.isTreeGrammar() ) {
ParserFactory pf = new ParserFactory(this);
OutputModelObject outputModel = pf.buildOutputModel();
OutputModelWalker walker = new OutputModelWalker(g.tool, templates);
ST outputFileST = walker.walk(outputModel);
outputFileST = walker.walk(outputModel);
}
else if ( g.isLexer() ) {
LexerFactory lf = new LexerFactory(this);
outputFileST = lf.build();
}
return outputFileST;
}
public void write(ST outputFileST) {
// WRITE FILES
try {
target.genRecognizerFile(this,g,outputFileST);

View File

@ -0,0 +1,31 @@
package org.antlr.v4.codegen;
import org.antlr.v4.runtime.pda.PDA;
import org.antlr.v4.tool.LexerGrammar;
import org.stringtemplate.v4.ST;
/** */
public class LexerFactory {
public CodeGenerator gen;
public LexerFactory(CodeGenerator gen) {
this.gen = gen;
}
public ST build() {
LexerGrammar lg = (LexerGrammar)gen.g;
ST fileST = gen.templates.getInstanceOf("LexerFile");
ST lexerST = gen.templates.getInstanceOf("Lexer");
lexerST.add("lexerName", gen.g.getRecognizerName());
lexerST.add("modes", lg.modes.keySet());
fileST.add("fileName", gen.getRecognizerFileName());
fileST.add("lexer", lexerST);
for (String modeName : lg.modes.keySet()) { // for each mode
PDA pda = NFABytecodeGenerator.getBytecode(lg, modeName);
ST pdaST = gen.templates.getInstanceOf("PDA");
pdaST.add("name", modeName);
pdaST.add("model", pda);
lexerST.add("pdas", pdaST);
}
return fileST;
}
}

View File

@ -9,8 +9,8 @@ import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.misc.DoubleKeyMap;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.parse.GrammarASTAdaptor;
import org.antlr.v4.runtime.nfa.Bytecode;
import org.antlr.v4.runtime.nfa.NFA;
import org.antlr.v4.runtime.pda.Bytecode;
import org.antlr.v4.runtime.pda.PDA;
import org.antlr.v4.runtime.tree.TreeParser;
import org.antlr.v4.tool.GrammarAST;
import org.antlr.v4.tool.LexerGrammar;
@ -115,7 +115,7 @@ public class NFABytecodeGenerator extends TreeParser {
return code;
}
public static NFA getBytecode(LexerGrammar lg, String modeName) {
public static PDA getBytecode(LexerGrammar lg, String modeName) {
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
NFABytecodeTriggers gen = new NFABytecodeTriggers(null);
gen.lg = lg;
@ -158,7 +158,7 @@ public class NFABytecodeGenerator extends TreeParser {
System.out.println(Bytecode.disassemble(code));
System.out.println("rule addrs="+gen.ruleToAddr);
return new NFA(code, gen.ruleToAddr, gen.tokenTypeToAddr, gen.labelIndex);
return new PDA(code, gen.ruleToAddr, gen.tokenTypeToAddr, gen.labelIndex);
}
/** Write value at index into a byte array highest to lowest byte,

View File

@ -1,7 +1,7 @@
package org.antlr.v4.codegen.nfa;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.Bytecode;
import org.antlr.v4.runtime.pda.Bytecode;
/** */
public class AcceptInstr extends Instr {

View File

@ -2,7 +2,7 @@ package org.antlr.v4.codegen.nfa;
import org.antlr.runtime.Token;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.Bytecode;
import org.antlr.v4.runtime.pda.Bytecode;
/** */
public class ActionInstr extends Instr {

View File

@ -2,7 +2,7 @@ package org.antlr.v4.codegen.nfa;
import org.antlr.runtime.Token;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.Bytecode;
import org.antlr.v4.runtime.pda.Bytecode;
/** */
public class CallInstr extends Instr {

View File

@ -1,7 +1,7 @@
package org.antlr.v4.codegen.nfa;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.Bytecode;
import org.antlr.v4.runtime.pda.Bytecode;
/** */
public class JumpInstr extends Instr {

View File

@ -2,7 +2,7 @@ package org.antlr.v4.codegen.nfa;
import org.antlr.runtime.Token;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.Bytecode;
import org.antlr.v4.runtime.pda.Bytecode;
/** */
public class LabelInstr extends Instr {

View File

@ -2,7 +2,7 @@ package org.antlr.v4.codegen.nfa;
import org.antlr.runtime.Token;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.Bytecode;
import org.antlr.v4.runtime.pda.Bytecode;
/** */
public class MatchInstr extends Instr {

View File

@ -3,7 +3,7 @@ package org.antlr.v4.codegen.nfa;
import org.antlr.runtime.Token;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.runtime.nfa.Bytecode;
import org.antlr.v4.runtime.pda.Bytecode;
/** */
public class RangeInstr extends Instr {

View File

@ -1,6 +1,6 @@
package org.antlr.v4.codegen.nfa;
import org.antlr.v4.runtime.nfa.Bytecode;
import org.antlr.v4.runtime.pda.Bytecode;
/** */
public class RetInstr extends Instr {

View File

@ -2,7 +2,7 @@ package org.antlr.v4.codegen.nfa;
import org.antlr.runtime.Token;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.Bytecode;
import org.antlr.v4.runtime.pda.Bytecode;
/** */
public class SaveInstr extends Instr {

View File

@ -2,7 +2,7 @@ package org.antlr.v4.codegen.nfa;
import org.antlr.runtime.Token;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.Bytecode;
import org.antlr.v4.runtime.pda.Bytecode;
/** */
public class SemPredInstr extends Instr {

View File

@ -1,7 +1,7 @@
package org.antlr.v4.codegen.nfa;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.Bytecode;
import org.antlr.v4.runtime.pda.Bytecode;
import java.util.ArrayList;
import java.util.List;

View File

@ -1,7 +1,7 @@
package org.antlr.v4.codegen.nfa;
import org.antlr.runtime.Token;
import org.antlr.v4.runtime.nfa.Bytecode;
import org.antlr.v4.runtime.pda.Bytecode;
/** */
public class WildcardInstr extends Instr {

View File

@ -9,7 +9,6 @@ public class LexerGrammar extends Grammar {
public static final String DEFAULT_MODE_NAME = "DEFAULT_MODE";
public MultiMap<String, Rule> modes = new MultiMap<String, Rule>();
//public Map<String, Integer> modeToDecision = new HashMap<String, Integer>();
public LexerGrammar(Tool tool, GrammarRootAST ast) {
super(tool, ast);

View File

@ -2,8 +2,8 @@ package org.antlr.v4.test;
import org.antlr.v4.Tool;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.Bytecode;
import org.antlr.v4.runtime.nfa.NFA;
import org.antlr.v4.runtime.pda.Bytecode;
import org.antlr.v4.runtime.pda.PDA;
import org.antlr.v4.semantics.SemanticPipeline;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.LexerGrammar;
@ -215,7 +215,7 @@ public class TestNFABytecodeGeneration extends BaseTest {
}
}
}
NFA nfa = NFABytecodeGenerator.getBytecode(g, LexerGrammar.DEFAULT_MODE_NAME);
assertEquals(expecting, Bytecode.disassemble(nfa.code));
PDA PDA = NFABytecodeGenerator.getBytecode(g, LexerGrammar.DEFAULT_MODE_NAME);
assertEquals(expecting, Bytecode.disassemble(PDA.code));
}
}

View File

@ -4,7 +4,7 @@ import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.Token;
import org.antlr.v4.Tool;
import org.antlr.v4.codegen.NFABytecodeGenerator;
import org.antlr.v4.runtime.nfa.NFA;
import org.antlr.v4.runtime.pda.PDA;
import org.antlr.v4.semantics.SemanticPipeline;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.LexerGrammar;
@ -202,12 +202,12 @@ public class TestNFABytecodeInterp extends BaseTest {
}
}
NFA nfa = NFABytecodeGenerator.getBytecode(g, LexerGrammar.DEFAULT_MODE_NAME);
PDA PDA = NFABytecodeGenerator.getBytecode(g, LexerGrammar.DEFAULT_MODE_NAME);
ANTLRStringStream in = new ANTLRStringStream(input);
List<Integer> tokenTypes = new ArrayList<Integer>();
int ttype = 0;
do {
ttype = nfa.execThompson(in);
ttype = PDA.execThompson(in);
tokenTypes.add(ttype);
} while ( ttype!= Token.EOF );
assertEquals(expectingTokenTypes, tokenTypes);
@ -236,15 +236,15 @@ public class TestNFABytecodeInterp extends BaseTest {
}
}
NFA nfa = NFABytecodeGenerator.getBytecode(g, LexerGrammar.DEFAULT_MODE_NAME);
PDA PDA = NFABytecodeGenerator.getBytecode(g, LexerGrammar.DEFAULT_MODE_NAME);
ANTLRStringStream in = new ANTLRStringStream(input);
List<Integer> tokenTypes = new ArrayList<Integer>();
int ttype = nfa.execThompson(in);
int ttype = PDA.execThompson(in);
tokenTypes.add(ttype);
assertEquals(expectingTokenTypes, tokenTypes);
if ( expectingTokens!=null ) {
assertEquals(expectingTokens, Arrays.toString(nfa.labelValues));
assertEquals(expectingTokens, Arrays.toString(PDA.labelValues));
}
}
}