diff --git a/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorStrategy.java b/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorStrategy.java index 5d07747c5..5d3a515de 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorStrategy.java +++ b/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorStrategy.java @@ -31,10 +31,8 @@ import org.antlr.v4.runtime.misc.OrderedHashSet; * TODO: what to do about lexers */ public interface ANTLRErrorStrategy { - /** Report any kind of RecognitionException. */ - void reportError(@NotNull Parser recognizer, - @Nullable RecognitionException e) - throws RecognitionException; + /** To create missing tokens, we need a factory */ + public void setTokenFactory(TokenFactory factory); /** When matching elements within alternative, use this method * to recover. The default implementation uses single token @@ -109,13 +107,18 @@ public interface ANTLRErrorStrategy { /** Reset the error handler. Call this when the parser * matches a valid token (indicating no longer in recovery mode) - * and from its own reset method. - */ - void endErrorCondition(@NotNull Parser recognizer); + * and from its own reset method. + */ + void endErrorCondition(@NotNull Parser recognizer); - /** Called when the parser detects a true ambiguity: an input sequence can be matched - * literally by two or more pass through the grammar. ANTLR resolves the ambiguity in - * favor of the alternative appearing first in the grammar. The start and stop index are + /** Report any kind of RecognitionException. */ + void reportError(@NotNull Parser recognizer, + @Nullable RecognitionException e) + throws RecognitionException; + + /** Called when the parser detects a true ambiguity: an input sequence can be matched + * literally by two or more pass through the grammar. ANTLR resolves the ambiguity in + * favor of the alternative appearing first in the grammar. The start and stop index are * zero-based absolute indices into the token stream. ambigAlts is a set of alternative numbers * that can match the input sequence. This method is only called when we are parsing with * full context. @@ -124,17 +127,6 @@ public interface ANTLRErrorStrategy { DFA dfa, int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts, @NotNull OrderedHashSet configs); - /** Called by the parser when it detects an input sequence that can be matched by two paths - * through the grammar. The difference between this and the reportAmbiguity method lies in - * the difference between Strong LL parsing and LL parsing. If we are not parsing with context, - * we can't be sure if a conflict is an ambiguity or simply a weakness in the Strong LL parsing - * strategy. If we are parsing with full context, this method is never called. - */ -// void reportConflict(@NotNull BaseRecognizer recognizer, -// int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts, -// @NotNull OrderedHashSet configs); - - void reportAttemptingFullContext(@NotNull Parser recognizer, @NotNull DFA dfa, int startIndex, int stopIndex, diff --git a/runtime/Java/src/org/antlr/v4/runtime/CommonTokenFactory.java b/runtime/Java/src/org/antlr/v4/runtime/CommonTokenFactory.java new file mode 100644 index 000000000..e36a9632c --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/CommonTokenFactory.java @@ -0,0 +1,55 @@ +/* + [The "BSD license"] + Copyright (c) 2012 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.antlr.v4.runtime; + +public class CommonTokenFactory implements TokenFactory { + public static final TokenFactory DEFAULT = new CommonTokenFactory(); + + @Override + public CommonToken create(TokenSource source, int type, String text, + int channel, int start, int stop, + int line, int charPositionInLine) + { + CommonToken t = new CommonToken(source, type, channel, start, stop); + t.setLine(line); + t.setCharPositionInLine(charPositionInLine); + if ( text!=null ) { + t.setText(text); + t.setStartIndex(-1); + t.setStopIndex(-1); + } + return t; + } + + @Override + public CommonToken create(int type, String text) { + return new CommonToken(type, text); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java b/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java index bab64fb6e..8de3ce476 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java +++ b/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java @@ -39,6 +39,9 @@ import org.antlr.v4.runtime.misc.OrderedHashSet; * and tree parsers. */ public class DefaultErrorStrategy implements ANTLRErrorStrategy { + /** How to create token objects */ + protected TokenFactory _factory = CommonTokenFactory.DEFAULT; + /** This is true after we see an error and before having successfully * matched a token. Prevents generation of more than one error message * per error. @@ -55,6 +58,11 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { protected IntervalSet lastErrorStates; + @Override + public void setTokenFactory(TokenFactory factory) { + this._factory = factory; + } + @Override public void beginErrorCondition(Parser recognizer) { errorRecoveryMode = true; @@ -354,26 +362,20 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy { */ protected Token getMissingSymbol(Parser recognizer) { Token currentSymbol = recognizer.getCurrentToken(); - if (!(currentSymbol instanceof Token)) { - throw new UnsupportedOperationException("This error strategy only supports Token symbols."); - } - IntervalSet expecting = getExpectedTokens(recognizer); int expectedTokenType = expecting.getMinElement(); // get any element String tokenText; if ( expectedTokenType== Token.EOF ) tokenText = ""; else tokenText = ""; - CommonToken t = new CommonToken(expectedTokenType, tokenText); - Token current = (Token)currentSymbol; + Token current = currentSymbol; if ( current.getType() == Token.EOF ) { - current = ((TokenStream)recognizer.getInputStream()).LT(-1); + current = recognizer.getInputStream().LT(-1); } - t.line = current.getLine(); - t.charPositionInLine = current.getCharPositionInLine(); - t.channel = Token.DEFAULT_CHANNEL; - t.source = current.getTokenSource(); - t.index = -1; // indicate we conjured this up because it has no index - return (Token)t; + return + _factory.create(current.getTokenSource(), expectedTokenType, tokenText, + Token.DEFAULT_CHANNEL, + -1, -1, + current.getLine(), current.getCharPositionInLine()); } public IntervalSet getExpectedTokens(Parser recognizer) { diff --git a/runtime/Java/src/org/antlr/v4/runtime/Lexer.java b/runtime/Java/src/org/antlr/v4/runtime/Lexer.java index c07634eee..a9b3c531d 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/Lexer.java +++ b/runtime/Java/src/org/antlr/v4/runtime/Lexer.java @@ -50,7 +50,10 @@ public abstract class Lexer extends Recognizer public static final int MIN_CHAR_VALUE = '\u0000'; public static final int MAX_CHAR_VALUE = '\uFFFE'; - public CharStream input; + public CharStream _input; + + /** How to create token objects */ + protected TokenFactory _factory = CommonTokenFactory.DEFAULT; /** The goal of all lexer rules/methods is to create a token object. * This is an instance variable as multiple rules may collaborate to @@ -94,13 +97,13 @@ public abstract class Lexer extends Recognizer public String text; public Lexer(CharStream input) { - this.input = input; + this._input = input; } public void reset() { // wack Lexer state variables - if ( input!=null ) { - input.seek(0); // rewind the input + if ( _input !=null ) { + _input.seek(0); // rewind the input } token = null; type = Token.INVALID_TYPE; @@ -124,13 +127,13 @@ public abstract class Lexer extends Recognizer */ @Override public Token nextToken() { - if ( hitEOF ) return emitEOF(); + if ( hitEOF ) return anEOF(); outer: while (true) { token = null; channel = Token.DEFAULT_CHANNEL; - tokenStartCharIndex = input.index(); + tokenStartCharIndex = _input.index(); tokenStartCharPositionInLine = getInterpreter().getCharPositionInLine(); tokenStartLine = getInterpreter().getLine(); text = null; @@ -141,14 +144,14 @@ public abstract class Lexer extends Recognizer // " at index "+input.index()); int ttype; try { - ttype = getInterpreter().match(input, mode); + ttype = getInterpreter().match(_input, mode); } catch (LexerNoViableAltException e) { notifyListeners(e); // report error recover(e); ttype = SKIP; } - if ( input.LA(1)==CharStream.EOF ) { + if ( _input.LA(1)==CharStream.EOF ) { hitEOF = true; } if ( type == Token.INVALID_TYPE ) type = ttype; @@ -195,22 +198,27 @@ public abstract class Lexer extends Recognizer return mode; } + @Override + public void setTokenFactory(TokenFactory factory) { + this._factory = factory; + } + /** Set the char stream and reset the lexer */ @Override public void setInputStream(IntStream input) { - this.input = null; + this._input = null; reset(); - this.input = (CharStream)input; + this._input = (CharStream)input; } @Override public String getSourceName() { - return input.getSourceName(); + return _input.getSourceName(); } @Override public CharStream getInputStream() { - return input; + return _input; } /** Currently does not support multiple emits per nextToken invocation @@ -228,35 +236,25 @@ public abstract class Lexer extends Recognizer * outermost lexical rule. The token object should point into the * char buffer start..stop. If there is a text override in 'text', * use that to set the token's text. Override this method to emit - * custom Token objects. - * - * If you are building trees, then you should also override - * Parser or TreeParser.getMissingSymbol(). + * custom Token objects or provide a new factory. */ public Token emit() { - WritableToken t = new CommonToken(this, type, - channel, tokenStartCharIndex, - getCharIndex()-1); - t.setLine(tokenStartLine); - if ( text!=null ) t.setText(text); - t.setCharPositionInLine(tokenStartCharPositionInLine); + Token t = _factory.create(this, type, text, channel, tokenStartCharIndex, getCharIndex()-1, + tokenStartLine, tokenStartCharPositionInLine); emit(t); return t; } - public Token emitEOF() { - WritableToken eof = new CommonToken(this,Token.EOF, - Token.DEFAULT_CHANNEL, - input.index(),input.index()-1); - eof.setLine(getLine()); + public Token anEOF() { + int cpos = getCharPositionInLine(); // The character position for EOF is one beyond the position of // the previous token's last character - int cpos = getCharPositionInLine(); if ( token!=null ) { int n = token.getStopIndex() - token.getStartIndex() + 1; cpos = token.getCharPositionInLine()+n; } - eof.setCharPositionInLine(cpos); + Token eof = _factory.create(this, Token.EOF, null, channel, _input.index(), _input.index()-1, + getLine(), cpos); return eof; } @@ -272,7 +270,7 @@ public abstract class Lexer extends Recognizer /** What is the index of the current character of lookahead? */ public int getCharIndex() { - return input.index(); + return _input.index(); } /** Return the text matched so far for the current token or any @@ -282,7 +280,7 @@ public abstract class Lexer extends Recognizer if ( text!=null ) { return text; } - return getInterpreter().getText(input); + return getInterpreter().getText(_input); // return ((CharStream)input).substring(tokenStartCharIndex,getCharIndex()-1); } @@ -318,12 +316,12 @@ public abstract class Lexer extends Recognizer } public void recover(LexerNoViableAltException e) { - getInterpreter().consume(input); // skip a char and try again + getInterpreter().consume(_input); // skip a char and try again } public void notifyListeners(LexerNoViableAltException e) { String msg = "token recognition error at: '"+ - input.substring(tokenStartCharIndex,input.index())+"'"; + _input.substring(tokenStartCharIndex, _input.index())+"'"; ANTLRErrorListener[] listeners = getListeners(); if ( listeners.length == 0 ) { System.err.println("line "+tokenStartLine+":"+ @@ -364,6 +362,6 @@ public abstract class Lexer extends Recognizer //System.out.println("consuming char "+(char)input.LA(1)+" during recovery"); //re.printStackTrace(); // TODO: Do we lose character or line position information? - input.consume(); + _input.consume(); } } diff --git a/runtime/Java/src/org/antlr/v4/runtime/Parser.java b/runtime/Java/src/org/antlr/v4/runtime/Parser.java index 348f9b424..3156ca6d5 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/Parser.java +++ b/runtime/Java/src/org/antlr/v4/runtime/Parser.java @@ -159,6 +159,13 @@ public abstract class Parser extends Recognizer factory) { + _input.getTokenSource().setTokenFactory(factory); + _errHandler.setTokenFactory(factory); + } + @Override public TokenStream getInputStream() { return getTokenStream(); } diff --git a/runtime/Java/src/org/antlr/v4/runtime/Recognizer.java b/runtime/Java/src/org/antlr/v4/runtime/Recognizer.java index 1a2d351a1..7f848fde4 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/Recognizer.java +++ b/runtime/Java/src/org/antlr/v4/runtime/Recognizer.java @@ -134,4 +134,6 @@ public abstract class Recognizer { public abstract IntStream getInputStream(); public abstract void setInputStream(IntStream input); + + public abstract void setTokenFactory(TokenFactory input); } diff --git a/runtime/Java/src/org/antlr/v4/runtime/Token.java b/runtime/Java/src/org/antlr/v4/runtime/Token.java index 52ca12020..a04188047 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/Token.java +++ b/runtime/Java/src/org/antlr/v4/runtime/Token.java @@ -35,7 +35,7 @@ package org.antlr.v4.runtime; */ public interface Token { public static final int INVALID_TYPE = 0; - public static final Token INVALID_TOKEN = new CommonToken(INVALID_TYPE); +// public static final Token INVALID_TOKEN = new CommonToken(INVALID_TYPE); public static final int MIN_TOKEN_TYPE = 1; /** During lookahead operations, this "token" signifies we hit rule end ATN state diff --git a/runtime/Java/src/org/antlr/v4/runtime/TokenFactory.java b/runtime/Java/src/org/antlr/v4/runtime/TokenFactory.java new file mode 100644 index 000000000..dbc8de43a --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/TokenFactory.java @@ -0,0 +1,47 @@ +/* + [The "BSD license"] + Copyright (c) 2012 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.antlr.v4.runtime; + +/** The default mechanism for creating tokens. It's used by default in Lexer and + * the error handling strategy (to create missing tokens). Notifying the parser + * of a new factory means that it notifies it's token source and error strategy. + */ +public interface TokenFactory { + /** This is the method used to create tokens in the lexer and in the + * error handling strategy. If text!=null, than the start and stop positions + * are wiped to -1 in the text override is set in the CommonToken. + */ + Symbol create(TokenSource source, int type, String text, + int channel, int start, int stop, + int line, int charPositionInLine); + + /** Generically useful */ + Symbol create(int type, String text); +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/TokenSource.java b/runtime/Java/src/org/antlr/v4/runtime/TokenSource.java index b1117de67..5ff2e9dfd 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/TokenSource.java +++ b/runtime/Java/src/org/antlr/v4/runtime/TokenSource.java @@ -62,4 +62,7 @@ public interface TokenSource { * ask lexers input stream. */ public String getSourceName(); + + /** Optional method that lets users set factory in lexer or other source */ + public void setTokenFactory(TokenFactory factory); } diff --git a/tool/src/org/antlr/v4/tool/interp/LexerInterpreter.java b/tool/src/org/antlr/v4/tool/interp/LexerInterpreter.java index 6a613cdfd..9bad5fea4 100644 --- a/tool/src/org/antlr/v4/tool/interp/LexerInterpreter.java +++ b/tool/src/org/antlr/v4/tool/interp/LexerInterpreter.java @@ -60,6 +60,11 @@ public class LexerInterpreter implements TokenSource { public String getSourceName() { return g.name; } + @Override + public void setTokenFactory(TokenFactory factory) { + // TODO: use TokenFactory + } + public int getCharPositionInLine() { return 0; } @@ -79,6 +84,7 @@ public class LexerInterpreter implements TokenSource { int tokenStartLine = interp.getLine(); int ttype = interp.match(input, Lexer.DEFAULT_MODE); int stop = input.index()-1; + // TODO: use TokenFactory WritableToken t = new CommonToken(this, ttype, Token.DEFAULT_CHANNEL, start, stop); t.setLine(tokenStartLine); t.setCharPositionInLine(tokenStartCharPositionInLine); diff --git a/tool/test/org/antlr/v4/test/TestCommonTokenStream.java b/tool/test/org/antlr/v4/test/TestCommonTokenStream.java index dcb0d0ae9..636ca98dc 100644 --- a/tool/test/org/antlr/v4/test/TestCommonTokenStream.java +++ b/tool/test/org/antlr/v4/test/TestCommonTokenStream.java @@ -209,6 +209,10 @@ public class TestCommonTokenStream extends BaseTest { public CharStream getInputStream() { return null; } + + @Override + public void setTokenFactory(TokenFactory factory) { + } }; CommonTokenStream tokens = new CommonTokenStream(lexer);