Added TokenFactory, CommonTokenFactory. Update the parser in the lexer with methods to set the factory. Alter the default error strategy and the lexer to use the factory. The parser's set token factory method updates the token source, usually the lexer, and the error handling strategy. I had to add the set token factory method to token source as well to make all of this work.

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9789]
2012-01-03 10:58:01 -08:00 · 2012-01-03 10:58:01 -08:00 · 5c3c8d6e7a
parent 3aeeb2b277
commit 5c3c8d6e7a
11 changed files with 185 additions and 69 deletions
--- a/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorStrategy.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorStrategy.java
@ -31,10 +31,8 @@ import org.antlr.v4.runtime.misc.OrderedHashSet;
 *  TODO: what to do about lexers
 */
 public interface ANTLRErrorStrategy {
-	/** Report any kind of RecognitionException. */
+	/** To create missing tokens, we need a factory */
-	void reportError(@NotNull Parser recognizer,
+	public void setTokenFactory(TokenFactory<?> factory);
 					 @Nullable RecognitionException e)
 		throws RecognitionException;
 	/** When matching elements within alternative, use this method
 	 *  to recover. The default implementation uses single token
@ -109,13 +107,18 @@ public interface ANTLRErrorStrategy {
 	/** Reset the error handler. Call this when the parser
 	 *  matches a valid token (indicating no longer in recovery mode)
-     *  and from its own reset method.
+	 *  and from its own reset method.
-     */
+	 */
-    void endErrorCondition(@NotNull Parser recognizer);
+	void endErrorCondition(@NotNull Parser recognizer);
-    /** Called when the parser detects a true ambiguity: an input sequence can be matched
+	/** Report any kind of RecognitionException. */
-     * literally by two or more pass through the grammar. ANTLR resolves the ambiguity in
+	void reportError(@NotNull Parser recognizer,
-     * favor of the alternative appearing first in the grammar. The start and stop index are
+					 @Nullable RecognitionException e)
 	throws RecognitionException;
 	/** Called when the parser detects a true ambiguity: an input sequence can be matched
 	 * literally by two or more pass through the grammar. ANTLR resolves the ambiguity in
 	 * favor of the alternative appearing first in the grammar. The start and stop index are
     * zero-based absolute indices into the token stream. ambigAlts is a set of alternative numbers
     * that can match the input sequence. This method is only called when we are parsing with
     * full context.
@ -124,17 +127,6 @@ public interface ANTLRErrorStrategy {
 						 DFA dfa, int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
 						 @NotNull OrderedHashSet<ATNConfig> configs);
    /** Called by the parser when it detects an input sequence that can be matched by two paths
     *  through the grammar. The difference between this and the reportAmbiguity method lies in
     *  the difference between Strong LL parsing and LL parsing. If we are not parsing with context,
     *  we can't be sure if a conflict is an ambiguity or simply a weakness in the Strong LL parsing
     *  strategy. If we are parsing with full context, this method is never called.
     */
 //    void reportConflict(@NotNull BaseRecognizer recognizer,
 //                        int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
 //                        @NotNull OrderedHashSet<ATNConfig> configs);
 	void reportAttemptingFullContext(@NotNull Parser recognizer,
 									 @NotNull DFA dfa,
 									 int startIndex, int stopIndex,
--- a/runtime/Java/src/org/antlr/v4/runtime/CommonTokenFactory.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/CommonTokenFactory.java
@ -0,0 +1,55 @@
 /*
 [The "BSD license"]
 Copyright (c) 2012 Terence Parr
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
 are met:
 1. Redistributions of source code must retain the above copyright
    notice, this list of conditions and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions and the following disclaimer in the
    documentation and/or other materials provided with the distribution.
 3. The name of the author may not be used to endorse or promote products
    derived from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 package org.antlr.v4.runtime;
 public class CommonTokenFactory implements TokenFactory<CommonToken> {
 	public static final TokenFactory<CommonToken> DEFAULT = new CommonTokenFactory();
 	@Override
 	public CommonToken create(TokenSource source, int type, String text,
 							  int channel, int start, int stop,
 							  int line, int charPositionInLine)
 	{
 		CommonToken t = new CommonToken(source, type, channel, start, stop);
 		t.setLine(line);
 		t.setCharPositionInLine(charPositionInLine);
 		if ( text!=null ) {
 			t.setText(text);
 			t.setStartIndex(-1);
 			t.setStopIndex(-1);
 		}
 		return t;
 	}
 	@Override
 	public CommonToken create(int type, String text) {
 		return new CommonToken(type, text);
 	}
 }
--- a/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java
@ -39,6 +39,9 @@ import org.antlr.v4.runtime.misc.OrderedHashSet;
 *  and tree parsers.
 */
 public class DefaultErrorStrategy implements ANTLRErrorStrategy {
 	/** How to create token objects */
 	protected TokenFactory<?> _factory = CommonTokenFactory.DEFAULT;
 	/** This is true after we see an error and before having successfully
 	 *  matched a token. Prevents generation of more than one error message
 	 *  per error.
@ -55,6 +58,11 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
 	protected IntervalSet lastErrorStates;
 	@Override
 	public void setTokenFactory(TokenFactory<?> factory) {
 		this._factory = factory;
 	}
 	@Override
 	public void beginErrorCondition(Parser recognizer) {
 		errorRecoveryMode = true;
@ -354,26 +362,20 @@ public class DefaultErrorStrategy implements ANTLRErrorStrategy {
 	 */
 	protected Token getMissingSymbol(Parser recognizer) {
 		Token currentSymbol = recognizer.getCurrentToken();
 		if (!(currentSymbol instanceof Token)) {
 			throw new UnsupportedOperationException("This error strategy only supports Token symbols.");
 		}
 		IntervalSet expecting = getExpectedTokens(recognizer);
 		int expectedTokenType = expecting.getMinElement(); // get any element
 		String tokenText;
 		if ( expectedTokenType== Token.EOF ) tokenText = "<missing EOF>";
 		else tokenText = "<missing "+recognizer.getTokenNames()[expectedTokenType]+">";
-		CommonToken t = new CommonToken(expectedTokenType, tokenText);
+		Token current = currentSymbol;
 		Token current = (Token)currentSymbol;
 		if ( current.getType() == Token.EOF ) {
-			current = ((TokenStream)recognizer.getInputStream()).LT(-1);
+			current = recognizer.getInputStream().LT(-1);
 		}
-		t.line = current.getLine();
+		return
-		t.charPositionInLine = current.getCharPositionInLine();
+			_factory.create(current.getTokenSource(), expectedTokenType, tokenText,
-		t.channel = Token.DEFAULT_CHANNEL;
+							Token.DEFAULT_CHANNEL,
-		t.source = current.getTokenSource();
+							-1, -1,
-		t.index = -1; // indicate we conjured this up because it has no index
+							current.getLine(), current.getCharPositionInLine());
 		return (Token)t;
 	}
 	public IntervalSet getExpectedTokens(Parser recognizer) {
--- a/runtime/Java/src/org/antlr/v4/runtime/Lexer.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/Lexer.java
@ -50,7 +50,10 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
 	public static final int MIN_CHAR_VALUE = '\u0000';
 	public static final int MAX_CHAR_VALUE = '\uFFFE';
-	public CharStream input;
+	public CharStream _input;
 	/** How to create token objects */
 	protected TokenFactory<?> _factory = CommonTokenFactory.DEFAULT;
 	/** The goal of all lexer rules/methods is to create a token object.
 	 *  This is an instance variable as multiple rules may collaborate to
@ -94,13 +97,13 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
 	public String text;
 	public Lexer(CharStream input) {
-		this.input = input;
+		this._input = input;
 	}
 	public void reset() {
 		// wack Lexer state variables
-		if ( input!=null ) {
+		if ( _input !=null ) {
-			input.seek(0); // rewind the input
+			_input.seek(0); // rewind the input
 		}
 		token = null;
 		type = Token.INVALID_TYPE;
@ -124,13 +127,13 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
 	 */
 	@Override
 	public Token nextToken() {
-		if ( hitEOF ) return emitEOF();
+		if ( hitEOF ) return anEOF();
 		outer:
 		while (true) {
 			token = null;
 			channel = Token.DEFAULT_CHANNEL;
-			tokenStartCharIndex = input.index();
+			tokenStartCharIndex = _input.index();
 			tokenStartCharPositionInLine = getInterpreter().getCharPositionInLine();
 			tokenStartLine = getInterpreter().getLine();
 			text = null;
@ -141,14 +144,14 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
 //								   " at index "+input.index());
 				int ttype;
 				try {
-					ttype = getInterpreter().match(input, mode);
+					ttype = getInterpreter().match(_input, mode);
 				}
 				catch (LexerNoViableAltException e) {
 					notifyListeners(e);		// report error
 					recover(e);
 					ttype = SKIP;
 				}
-				if ( input.LA(1)==CharStream.EOF ) {
+				if ( _input.LA(1)==CharStream.EOF ) {
 					hitEOF = true;
 				}
 				if ( type == Token.INVALID_TYPE ) type = ttype;
@ -195,22 +198,27 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
 		return mode;
 	}
 	@Override
 	public void setTokenFactory(TokenFactory<?> factory) {
 		this._factory = factory;
 	}
 	/** Set the char stream and reset the lexer */
 	@Override
 	public void setInputStream(IntStream input) {
-		this.input = null;
+		this._input = null;
 		reset();
-		this.input = (CharStream)input;
+		this._input = (CharStream)input;
 	}
 	@Override
 	public String getSourceName() {
-		return input.getSourceName();
+		return _input.getSourceName();
 	}
 	@Override
 	public CharStream getInputStream() {
-		return input;
+		return _input;
 	}
 	/** Currently does not support multiple emits per nextToken invocation
@ -228,35 +236,25 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
 	 *  outermost lexical rule.  The token object should point into the
 	 *  char buffer start..stop.  If there is a text override in 'text',
 	 *  use that to set the token's text.  Override this method to emit
-	 *  custom Token objects.
+	 *  custom Token objects or provide a new factory.
 	 *
 	 *  If you are building trees, then you should also override
 	 *  Parser or TreeParser.getMissingSymbol().
 	 */
 	public Token emit() {
-		WritableToken t = new CommonToken(this, type,
+		Token t = _factory.create(this, type, text, channel, tokenStartCharIndex, getCharIndex()-1,
-										  channel, tokenStartCharIndex,
+								  tokenStartLine, tokenStartCharPositionInLine);
 										  getCharIndex()-1);
 		t.setLine(tokenStartLine);
 		if ( text!=null ) t.setText(text);
 		t.setCharPositionInLine(tokenStartCharPositionInLine);
 		emit(t);
 		return t;
 	}
-	public Token emitEOF() {
+	public Token anEOF() {
-		WritableToken eof = new CommonToken(this,Token.EOF,
+		int cpos = getCharPositionInLine();
 											Token.DEFAULT_CHANNEL,
 											input.index(),input.index()-1);
 		eof.setLine(getLine());
 		// The character position for EOF is one beyond the position of
 		// the previous token's last character
 		int cpos = getCharPositionInLine();
 		if ( token!=null ) {
 			int n = token.getStopIndex() - token.getStartIndex() + 1;
 			cpos = token.getCharPositionInLine()+n;
 		}
-		eof.setCharPositionInLine(cpos);
+		Token eof = _factory.create(this, Token.EOF, null, channel, _input.index(), _input.index()-1,
 									getLine(), cpos);
 		return eof;
 	}
@ -272,7 +270,7 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
 	/** What is the index of the current character of lookahead? */
 	public int getCharIndex() {
-		return input.index();
+		return _input.index();
 	}
 	/** Return the text matched so far for the current token or any
@ -282,7 +280,7 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
 		if ( text!=null ) {
 			return text;
 		}
-		return getInterpreter().getText(input);
+		return getInterpreter().getText(_input);
 //		return ((CharStream)input).substring(tokenStartCharIndex,getCharIndex()-1);
 	}
@ -318,12 +316,12 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
 	}
 	public void recover(LexerNoViableAltException e) {
-		getInterpreter().consume(input); // skip a char and try again
+		getInterpreter().consume(_input); // skip a char and try again
 	}
 	public void notifyListeners(LexerNoViableAltException e) {
 		String msg = "token recognition error at: '"+
-			input.substring(tokenStartCharIndex,input.index())+"'";
+			_input.substring(tokenStartCharIndex, _input.index())+"'";
 		ANTLRErrorListener<Integer>[] listeners = getListeners();
 		if ( listeners.length == 0 ) {
 			System.err.println("line "+tokenStartLine+":"+
@ -364,6 +362,6 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
 		//System.out.println("consuming char "+(char)input.LA(1)+" during recovery");
 		//re.printStackTrace();
 		// TODO: Do we lose character or line position information?
-		input.consume();
+		_input.consume();
 	}
 }
--- a/runtime/Java/src/org/antlr/v4/runtime/Parser.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/Parser.java
@ -159,6 +159,13 @@ public abstract class Parser extends Recognizer<Token, v2ParserATNSimulator<Toke
 		return syntaxErrors;
 	}
 	/** Tell our token source and error strategy about a new way to create tokens */
 	@Override
 	public void setTokenFactory(TokenFactory<?> factory) {
 		_input.getTokenSource().setTokenFactory(factory);
 		_errHandler.setTokenFactory(factory);
 	}
 	@Override
 	public TokenStream getInputStream() { return getTokenStream(); }
--- a/runtime/Java/src/org/antlr/v4/runtime/Recognizer.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/Recognizer.java
@ -134,4 +134,6 @@ public abstract class Recognizer<Symbol, ATNInterpreter extends ATNSimulator> {
 	public abstract IntStream getInputStream();
 	public abstract void setInputStream(IntStream input);
 	public abstract void setTokenFactory(TokenFactory<?> input);
 }
--- a/runtime/Java/src/org/antlr/v4/runtime/Token.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/Token.java
@ -35,7 +35,7 @@ package org.antlr.v4.runtime;
 */
 public interface Token {
 	public static final int INVALID_TYPE = 0;
-	public static final Token INVALID_TOKEN = new CommonToken(INVALID_TYPE);
+//	public static final Token INVALID_TOKEN = new CommonToken(INVALID_TYPE);
 	public static final int MIN_TOKEN_TYPE = 1;
    /** During lookahead operations, this "token" signifies we hit rule end ATN state
--- a/runtime/Java/src/org/antlr/v4/runtime/TokenFactory.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/TokenFactory.java
@ -0,0 +1,47 @@
 /*
 [The "BSD license"]
 Copyright (c) 2012 Terence Parr
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
 are met:
 1. Redistributions of source code must retain the above copyright
    notice, this list of conditions and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions and the following disclaimer in the
    documentation and/or other materials provided with the distribution.
 3. The name of the author may not be used to endorse or promote products
    derived from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 package org.antlr.v4.runtime;
 /** The default mechanism for creating tokens. It's used by default in Lexer and
 *  the error handling strategy (to create missing tokens).  Notifying the parser
 *  of a new factory means that it notifies it's token source and error strategy.
 */
 public interface TokenFactory<Symbol extends Token> {
 	/** This is the method used to create tokens in the lexer and in the
 	 *  error handling strategy. If text!=null, than the start and stop positions
 	 *  are wiped to -1 in the text override is set in the CommonToken.
 	 */
 	Symbol create(TokenSource source, int type, String text,
 				  int channel, int start, int stop,
 				  int line, int charPositionInLine);
 	/** Generically useful */
 	Symbol create(int type, String text);
 }
--- a/runtime/Java/src/org/antlr/v4/runtime/TokenSource.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/TokenSource.java
@ -62,4 +62,7 @@ public interface TokenSource {
 	 *  ask lexers input stream.
 	 */
 	public String getSourceName();
 	/** Optional method that lets users set factory in lexer or other source */
 	public void setTokenFactory(TokenFactory<?> factory);
 }
--- a/tool/src/org/antlr/v4/tool/interp/LexerInterpreter.java
+++ b/tool/src/org/antlr/v4/tool/interp/LexerInterpreter.java
@ -60,6 +60,11 @@ public class LexerInterpreter implements TokenSource {
 	public String getSourceName() {	return g.name; }
 	@Override
 	public void setTokenFactory(TokenFactory<?> factory) {
 			// TODO: use TokenFactory
 	}
 	public int getCharPositionInLine() {
 		return 0;
 	}
@ -79,6 +84,7 @@ public class LexerInterpreter implements TokenSource {
 		int tokenStartLine = interp.getLine();
 		int ttype = interp.match(input, Lexer.DEFAULT_MODE);
 		int stop = input.index()-1;
 		// TODO: use TokenFactory
 		WritableToken t = new CommonToken(this, ttype, Token.DEFAULT_CHANNEL, start, stop);
 		t.setLine(tokenStartLine);
 		t.setCharPositionInLine(tokenStartCharPositionInLine);
--- a/tool/test/org/antlr/v4/test/TestCommonTokenStream.java
+++ b/tool/test/org/antlr/v4/test/TestCommonTokenStream.java
@ -209,6 +209,10 @@ public class TestCommonTokenStream extends BaseTest {
 				public CharStream getInputStream() {
 					return null;
 				}
 				@Override
 				public void setTokenFactory(TokenFactory<?> factory) {
 				}
 			};
        CommonTokenStream tokens = new CommonTokenStream(lexer);