From 1be2e2c556b22e4caf75f3f5fff72d72b949014f Mon Sep 17 00:00:00 2001 From: sharwell Date: Tue, 29 Nov 2011 20:13:06 -0800 Subject: [PATCH] v4: Initial tracing support in the lexer [git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9490] --- .../Java/src/org/antlr/v4/runtime/Lexer.java | 3 + .../v4/runtime/atn/LexerATNSimulator.java | 269 ++++++++++++++++++ 2 files changed, 272 insertions(+) diff --git a/runtime/Java/src/org/antlr/v4/runtime/Lexer.java b/runtime/Java/src/org/antlr/v4/runtime/Lexer.java index 2163a4dbe..c07634eee 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/Lexer.java +++ b/runtime/Java/src/org/antlr/v4/runtime/Lexer.java @@ -182,6 +182,7 @@ public abstract class Lexer extends Recognizer public void pushMode(int m) { if ( LexerATNSimulator.debug ) System.out.println("pushMode "+m); if ( modeStack==null ) modeStack = new ArrayDeque(); + getInterpreter().tracePushMode(m); modeStack.push(mode); mode(m); } @@ -189,6 +190,7 @@ public abstract class Lexer extends Recognizer public int popMode() { if ( modeStack==null ) throw new EmptyStackException(); if ( LexerATNSimulator.debug ) System.out.println("popMode back to "+modeStack.peek()); + getInterpreter().tracePopMode(); mode( modeStack.pop() ); return mode; } @@ -217,6 +219,7 @@ public abstract class Lexer extends Recognizer * than a single variable as this implementation does). */ public void emit(Token token) { + getInterpreter().traceEmit(token); //System.err.println("emit "+token); this.token = token; } diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java index b73231a65..68d3f0e37 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java @@ -36,12 +36,21 @@ import org.antlr.v4.runtime.misc.NotNull; import org.antlr.v4.runtime.misc.Nullable; import org.antlr.v4.runtime.misc.OrderedHashSet; +import java.io.IOException; +import java.io.OutputStream; +import org.antlr.v4.runtime.misc.Interval; +import org.antlr.v4.runtime.misc.IntervalSet; + /** "dup" of ParserInterpreter */ public class LexerATNSimulator extends ATNSimulator { public static boolean debug = false; public static boolean dfa_debug = false; public static final int NUM_EDGES = 255; + private boolean trace = false; + private OutputStream traceStream = null; + private boolean traceFailed = false; + /** When we hit an accept state in either the DFA or the ATN, we * have to notify the character stream to start buffering characters * via mark() and record the current state. The current state includes @@ -131,10 +140,17 @@ public class LexerATNSimulator extends ATNSimulator { this.recog = recog; } + public void setTraceStream(OutputStream traceStream) { + this.traceStream = traceStream; + this.trace = traceStream != null; + this.traceFailed = false; + } + public int match(@NotNull CharStream input, int mode) { match_calls++; this.mode = mode; int mark = input.mark(); + traceBeginMatch(input, mode); try { if ( dfa[mode].s0==null ) { return matchATN(input); @@ -144,6 +160,7 @@ public class LexerATNSimulator extends ATNSimulator { } } finally { + traceEndMatch(); input.release(mark); } } @@ -159,6 +176,7 @@ public class LexerATNSimulator extends ATNSimulator { // only called from test code from outside public int matchATN(@NotNull CharStream input) { + traceMatchATN(); startIndex = input.index(); ATNState startState = atn.modeToStartState.get(mode); if ( debug ) System.out.println("mode "+ mode +" start: "+startState); @@ -167,10 +185,12 @@ public class LexerATNSimulator extends ATNSimulator { dfa[mode].s0 = addDFAState(s0_closure); int predict = exec(input, s0_closure); if ( debug ) System.out.println("DFA after matchATN: "+dfa[old_mode].toLexerString()); + tracePredict(predict); return predict; } protected int exec(@NotNull CharStream input, @NotNull DFAState s0) { + traceMatchDFA(); if ( dfa_debug ) System.out.println("DFA[mode "+(recog==null?0:recog.mode)+"] exec LA(1)=="+ (char)input.LA(1)); //System.out.println("DFA start of execDFA: "+dfa[mode].toLexerString()); @@ -178,6 +198,7 @@ public class LexerATNSimulator extends ATNSimulator { dfaPrevAccept.reset(); LexerNoViableAltException atnException = null; DFAState s = s0; + traceLookahead1(); int t = input.LA(1); loop: while ( true ) { @@ -211,6 +232,7 @@ public class LexerATNSimulator extends ATNSimulator { } consume(input); + traceLookahead1(); t = input.LA(1); } if ( dfaPrevAccept.state==null ) { @@ -224,6 +246,7 @@ public class LexerATNSimulator extends ATNSimulator { int ruleIndex = dfaPrevAccept.state.ruleIndex; accept(input, ruleIndex, dfaPrevAccept); + tracePredict(dfaPrevAccept.state.prediction); return dfaPrevAccept.state.prediction; } @@ -238,6 +261,7 @@ public class LexerATNSimulator extends ATNSimulator { OrderedHashSet reach = new OrderedHashSet(); atnPrevAccept.reset(); + traceLookahead1(); int t = input.LA(1); do { // while more work @@ -272,6 +296,7 @@ public class LexerATNSimulator extends ATNSimulator { consume(input); addDFAEdge(closure, t, reach); + traceLookahead1(); t = input.LA(1); // swap to avoid reallocating space @@ -309,6 +334,7 @@ public class LexerATNSimulator extends ATNSimulator { } int index = input.index(); if ( index > atnPrevAccept.index ) { + traceAcceptState(c.alt); // will favor prev accept at same index so "int" is keyword not ID markAcceptState(atnPrevAccept, input); atnPrevAccept.config = c; @@ -340,6 +366,7 @@ public class LexerATNSimulator extends ATNSimulator { if ( actionIndex>=0 && recog!=null ) recog.action(null, ruleIndex, actionIndex); // seek to after last char in token + traceSeek(prevAccept.index); input.seek(prevAccept.index); line = prevAccept.line; charPositionInLine = prevAccept.charPos; @@ -479,6 +506,7 @@ public class LexerATNSimulator extends ATNSimulator { } int failOverToATN(@NotNull CharStream input, @NotNull DFAState s) { + traceFailOverToATN(); if ( dfa_debug ) System.out.println("no edge for "+(char)input.LA(1)); if ( dfa_debug ) { System.out.println("ATN exec upon "+ @@ -493,6 +521,7 @@ public class LexerATNSimulator extends ATNSimulator { } // action already executed by ATN // we've updated DFA, exec'd action, and have our deepest answer + tracePredict(ttype); return ttype; } @@ -610,6 +639,7 @@ public class LexerATNSimulator extends ATNSimulator { charPositionInLine++; } input.consume(); + traceConsume(input, curChar); } @NotNull @@ -619,4 +649,243 @@ public class LexerATNSimulator extends ATNSimulator { return "'"+(char)t+"'"; } + /* + * Trace helpers (API and file format are work in progress) + */ + + public void traceEndMatch() { + if (trace) { + traceSlow(LexerOpCode.EndMatch); + } + } + + public void traceMatchATN() { + if (trace) { + traceSlow(LexerOpCode.MatchATN); + } + } + + public void traceMatchDFA() { + if (trace) { + traceSlow(LexerOpCode.MatchDFA); + } + } + + public void traceLookahead1() { + if (trace) { + traceSlow(LexerOpCode.Lookahead1); + } + } + + public void traceFailOverToATN() { + if (trace) { + traceSlow(LexerOpCode.FailOverToATN); + } + } + + public void tracePredict(int prediction) { + if (trace) { + traceIntSlow(LexerOpCode.Predict, prediction); + } + } + + public void traceAcceptState(int prediction) { + if (trace) { + traceIntSlow(LexerOpCode.AcceptState, prediction); + } + } + + public void traceSeek(int index) { + if (trace) { + traceIntSlow(LexerOpCode.Seek, index); + } + } + + public final void traceBeginMatch(CharStream input, int mode) { + if (trace) { + traceBeginMatchSlow(input, mode); + } + } + + public final void traceConsume(CharStream input, int c) { + if (trace) { + traceConsumeSlow(input, c); + } + } + + public final void tracePushMode(int mode) { + if (trace) { + traceByteSlow(LexerOpCode.PushMode, (byte)mode); + } + } + + public final void tracePopMode() { + if (trace) { + traceSlow(LexerOpCode.PopMode); + } + } + + public final void traceEmit(Token token) { + if (trace) { + traceEmitSlow(token); + } + } + + private void traceSlow(LexerOpCode opcode) { + assert traceStream != null; + assert opcode.getArgumentSize() == 0; + + if (!traceFailed) { + try { + traceStream.write(opcode.ordinal()); + } catch (IOException e) { + e.printStackTrace(); + traceFailed = true; + } + } + } + + private void traceByteSlow(LexerOpCode opcode, byte arg) { + assert traceStream != null; + assert opcode.getArgumentSize() == 1; + + if (!traceFailed) { + try { + traceStream.write(opcode.ordinal()); + traceStream.write(arg); + } catch (IOException e) { + e.printStackTrace(); + traceFailed = true; + } + } + } + + private void traceByteIntSlow(LexerOpCode opcode, byte arg1, int arg2) { + assert traceStream != null; + assert opcode.getArgumentSize() == 5; + + if (!traceFailed) { + try { + traceStream.write(opcode.ordinal()); + traceStream.write(arg1); + traceIntSlow(arg2); + } catch (IOException e) { + e.printStackTrace(); + traceFailed = true; + } + } + } + + private void traceIntSlow(LexerOpCode opcode, int arg) { + assert traceStream != null; + assert opcode.getArgumentSize() == 4; + + if (!traceFailed) { + try { + traceStream.write(opcode.ordinal()); + traceIntSlow(arg); + } catch (IOException e) { + e.printStackTrace(); + traceFailed = true; + } + } + } + + private void traceIntIntSlow(LexerOpCode opcode, int arg1, int arg2) { + assert traceStream != null; + assert opcode.getArgumentSize() == 8; + + if (!traceFailed) { + try { + traceStream.write(opcode.ordinal()); + traceIntSlow(arg1); + traceIntSlow(arg2); + } catch (IOException e) { + e.printStackTrace(); + traceFailed = true; + } + } + } + + private void traceIntIntIntIntSlow(LexerOpCode opcode, int arg1, int arg2, int arg3, int arg4) { + assert traceStream != null; + assert opcode.getArgumentSize() == 16; + + if (!traceFailed) { + try { + traceStream.write(opcode.ordinal()); + traceIntSlow(arg1); + traceIntSlow(arg2); + traceIntSlow(arg3); + traceIntSlow(arg4); + } catch (IOException e) { + e.printStackTrace(); + traceFailed = true; + } + } + } + + private void traceIntSlow(int arg) { + assert traceStream != null; + + if (!traceFailed) { + try { + traceStream.write(arg); + traceStream.write(arg >> 8); + traceStream.write(arg >> 16); + traceStream.write(arg >> 24); + } catch (IOException e) { + e.printStackTrace(); + traceFailed = true; + } + } + } + + private void traceBeginMatchSlow(CharStream input, int mode) { + traceByteIntSlow(LexerOpCode.BeginMatch, (byte)mode, input.index()); + } + + private void traceConsumeSlow(CharStream input, int c) { + assert traceStream != null; + + if (!traceFailed) { + traceIntIntSlow(LexerOpCode.Consume, c, input.index()); + } + } + + private void traceEmitSlow(Token token) { + assert traceStream != null; + + if (token != null && !traceFailed) { + traceIntIntIntIntSlow(LexerOpCode.Emit, token.getStartIndex(), token.getStopIndex(), token.getType(), token.getChannel()); + } + } + + public enum LexerOpCode { + BeginMatch(5), + EndMatch(0), + MatchATN(0), + MatchDFA(0), + FailOverToATN(0), + AcceptState(4), + Predict(4), + + Seek(4), + Consume(8), + Lookahead1(0), + + PushMode(1), + PopMode(0), + Emit(16); + + private final int argumentSize; + + private LexerOpCode(int argumentSize) { + this.argumentSize = argumentSize; + } + + public int getArgumentSize() { + return argumentSize; + } + } }