diff --git a/runtime/Java/src/org/antlr/v4/runtime/ANTLRInputStream.java b/runtime/Java/src/org/antlr/v4/runtime/ANTLRInputStream.java index d4c305177..de9258849 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/ANTLRInputStream.java +++ b/runtime/Java/src/org/antlr/v4/runtime/ANTLRInputStream.java @@ -159,13 +159,13 @@ public class ANTLRInputStream implements CharStream { if ( i<0 ) { i++; // e.g., translate LA(-1) to use offset i=0; then data[p+0-1] if ( (p+i-1) < 0 ) { - return CharStream.EOF; // invalid; no char before first char + return IntStream.EOF; // invalid; no char before first char } } if ( (p+i-1) >= n ) { //System.out.println("char LA("+i+")=EOF; p="+p); - return CharStream.EOF; + return IntStream.EOF; } //System.out.println("char LA("+i+")="+(char)data[p+i-1]+"; p="+p); //System.out.println("LA("+i+"); p="+p+" n="+n+" data.length="+data.length); diff --git a/runtime/Java/src/org/antlr/v4/runtime/CharStream.java b/runtime/Java/src/org/antlr/v4/runtime/CharStream.java index 7c2f691c0..1845cbb5a 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/CharStream.java +++ b/runtime/Java/src/org/antlr/v4/runtime/CharStream.java @@ -1,46 +1,73 @@ /* - [The "BSD license"] - Copyright (c) 2011 Terence Parr - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * [The "BSD license"] + * Copyright (c) 2012 Terence Parr + * Copyright (c) 2012 Sam Harwell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + package org.antlr.v4.runtime; import org.antlr.v4.runtime.misc.Interval; +import org.antlr.v4.runtime.misc.NotNull; -/** A source of characters for an ANTLR lexer */ +/** A source of characters for an ANTLR lexer. */ public interface CharStream extends IntStream { - public static final int EOF = -1; - public static final int MIN_CHAR = Character.MIN_VALUE; - public static final int MAX_CHAR = Character.MAX_VALUE-1; // FFFE is max - - /** This is primaril a useful interface for action code. - * Just make sure actions don't use this on streams that don't support it. - * For unbuffered streams, you can't use this except in case - * where interval is in current buffer window. Lexer guarantees - * text of current token at emit() time will be available. + /** + * The minimum allowed value for a character in a {@code CharStream}. */ - public String getText(Interval interval); + public static final int MIN_CHAR = Character.MIN_VALUE; + + /** + * The maximum allowed value for a character in a {@code CharStream}. + *

+ * This value is {@code Character.MAX_VALUE - 1}, which reserves the value + * {@code Character.MAX_VALUE} for special use within an implementing class. + * For some implementations, the data buffers required for supporting the + * marked ranges of {@link IntStream} are stored as {@code char[]} instead + * of {@code int[]}, with {@code Character.MAX_VALUE} being used instead of + * {@code -1} to mark the end of the stream internally. + */ + public static final int MAX_CHAR = Character.MAX_VALUE-1; + + /** + * This method returns the text for a range of characters within this input + * stream. This method is guaranteed to not throw an exception if the + * specified {@code interval} lies entirely within a marked range. For more + * information about marked ranges, see {@link IntStream#mark}. + * + * @param interval an interval within the stream + * @return the text of the specified interval + * + * @throws NullPointerException if {@code interval} is {@code null} + * @throws IllegalArgumentException if {@code interval.a < 0}, or if + * {@code interval.b < interval.a - 1}, or if {@code interval.b} lies at or + * past the end of the stream + * @throws UnsupportedOperationException if the stream does not support + * getting the text of the specified interval + */ + @NotNull + public String getText(@NotNull Interval interval); } diff --git a/runtime/Java/src/org/antlr/v4/runtime/IntStream.java b/runtime/Java/src/org/antlr/v4/runtime/IntStream.java index 143c4dda5..a02b83408 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/IntStream.java +++ b/runtime/Java/src/org/antlr/v4/runtime/IntStream.java @@ -1,100 +1,242 @@ /* - [The "BSD license"] - Copyright (c) 2011 Terence Parr - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * [The "BSD license"] + * Copyright (c) 2012 Terence Parr + * Copyright (c) 2012 Sam Harwell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + package org.antlr.v4.runtime; -/** A simple stream of integers used when all I care about is the char - * or token type sequence (such as interpretation). +import org.antlr.v4.runtime.misc.NotNull; + +/** + * A simple stream of symbols whose values are represented as integers. This + * interface provides marked ranges which support for a minimum level + * of buffering necessary to implement arbitrary lookahead during prediction. + * For more information on marked ranges, see {@link #mark}. + *

+ * Initializing Methods: Some methods in this interface have + * unspecified behavior if no call to an initializing method has occurred after + * the stream was constructed. The following is a list of initializing methods: + * + *

*/ public interface IntStream { + /** + * The value returned by {@link #LA LA()} when the end of the stream is + * reached. + */ + public static final int EOF = -1; + + /** + * The value returned by {@link #getSourceName} when the actual name of the + * underlying source is not known. + */ + public static final String UNKNOWN_SOURCE_NAME = ""; + + /** + * Consumes the current symbol in the stream. This method has the following + * effects: + * + * + * + * Note that calling this method does not guarantee that {@code index()} is + * incremented by exactly 1, as that would preclude the ability to implement + * filtering streams (e.g. {@link CommonTokenStream} which distinguishes + * between "on-channel" and "off-channel" tokens). + * + * @throws IllegalStateException if an attempt is made to consume the the + * end of the stream (i.e. if {@code LA(1)==}{@link #EOF EOF} before calling + * {@code consume}). + */ void consume(); - /** Get int at current input pointer + i ahead where i=1 is next int. - * Negative indexes are allowed. LA(-1) is previous token (token - * just matched). LA(-i) where i is before first token should - * yield -1, invalid char / EOF. + /** + * Gets the value of the symbol at offset {@code i} from the current + * position. When {@code i==1}, this method returns the value of the current + * symbol in the stream (which is the next symbol to be consumed). When + * {@code i==-1}, this method returns the value of the previously read + * symbol in the stream. It is not valid to call this method with + * {@code i==0}, but the specific behavior is unspecified because this + * method is frequently called from performance-critical code. + *

+ * This method is guaranteed to succeed if any of the following are true: + * + *

+ * + * If {@code i} represents a position at or beyond the end of the stream, + * this method returns {@link #EOF}. + *

+ * The return value is unspecified if {@code i<0} and fewer than {@code -i} + * calls to {@link #consume consume()} have occurred from the beginning of + * the stream before calling this method. + * + * @throws UnsupportedOperationException if the stream does not support + * retrieving the value of the specified symbol */ int LA(int i); - /** Tell the stream to start buffering if it hasn't already. Return - * a marker, usually a function of current input position, index(). - * Calling release(mark()) should not affect the input cursor. - * Can seek to any index between where we were when mark() was called - * and current index() until we release this marker. No mark can appear - * at an index before the first mark. + /** + * A mark provides a guarantee that {@link #seek seek()} operations will be + * valid over a "marked range" extending from the index where {@code mark()} + * was called to the current {@link #index index()}. This allows the use of + * streaming input sources by specifying the minimum buffering requirements + * to support arbitrary lookahead during prediction. + *

+ * The returned mark is an opaque handle (type {@code int}) which is passed + * to {@link #release release()} when the guarantees provided by the marked + * range are no longer necessary. When calls to + * {@code mark()}/{@code release()} are nested, the marks must be released + * in reverse order of which they were obtained. Since marked regions are + * used during performance-critical sections of prediction, the specific + * behavior of invalid usage is unspecified (i.e. a mark is not released, or + * a mark is released twice, or marks are not released in reverse order from + * which they were created). + *

+ * The behavior of this method is unspecified if no call to an + * {@link IntStream initializing method} has occurred after this stream was + * constructed. + *

+ * This method does not change the current position in the input stream. + *

+ * The following example shows the use of {@link #mark mark()}, + * {@link #release release(mark)}, {@link #index index()}, and + * {@link #seek seek(index)} as part of an operation to safely work within a + * marked region, then restore the stream position to its original value and + * release the mark. + *

+	 * IntStream stream = ...;
+	 * int index = -1;
+	 * int mark = stream.mark();
+	 * try {
+	 *   index = stream.index();
+	 *   // perform work here...
+	 * } finally {
+	 *   if (index != -1) {
+	 *     stream.seek(index);
+	 *   }
+	 *   stream.release(mark);
+	 * }
+	 * 
* - * A resource leak may occur if the value returned from a call to - * mark() is not passed to release() afterwards. When calls to mark() - * are nested, release() must be called in reverse order of the calls - * to mark(), otherwise the behavior is unspecified. - * - * @return An opaque marker which should be passed to release - * when the range of symbols from where the marker was dropped - * to the current input symbol is no longer required. - */ + * @return An opaque marker which should be passed to + * {@link #release release()} when the marked range is no longer required. + */ int mark(); - /** Release requirement that stream holds tokens from marked location - * to current index(). Must release in reverse order (like stack) - * of mark() otherwise undefined behavior. + /** + * This method releases a marked range created by a call to + * {@link #mark mark()}. Calls to {@code release()} must appear in the + * reverse order of the corresponding calls to {@code mark()}. If a mark is + * released twice, or if marks are not released in reverse order of the + * corresponding calls to {@code mark()}, the behavior is unspecified. + *

+ * For more information and an example, see {@link #mark}. + * + * @param marker A marker returned by a call to {@code mark()}. + * @see #mark */ void release(int marker); - /** Return the current input symbol index 0..n where n indicates the - * last symbol has been read. The index is the symbol about to be - * read not the most recently read symbol. - */ + /** + * Return the index into the stream of the input symbol referred to by + * {@code LA(1)}. + *

+ * The behavior of this method is unspecified if no call to an + * {@link IntStream initializing method} has occurred after this stream was + * constructed. + */ int index(); - /** Set the input cursor to the position indicated by index. This is - * normally used to rewind the input stream but can move forward as well. - * It's up to the stream implementation to make sure that symbols are - * buffered as necessary to make seek land on a valid symbol. - * Or, they should avoid moving the input cursor. + /** + * Set the input cursor to the position indicated by {@code index}. If the + * specified index lies past the end of the stream, the operation behaves as + * though {@code index} was the index of the EOF symbol. After this method + * returns without throwing an exception, the at least one of the following + * will be true. * - * The index is 0..n-1. A seek to position i means that LA(1) will - * return the ith symbol. So, seeking to 0 means LA(1) will return the - * first element in the stream. - * - * For unbuffered streams, index i might not be in buffer. That throws - * index exception. + *

+ * + * This operation is guaranteed to not throw an exception if {@code index} + * lies within a marked region. For more information on marked regions, see + * {@link #mark}. The behavior of this method is unspecified if no call to + * an {@link IntStream initializing method} has occurred after this stream + * was constructed. + * + * @param index The absolute index to seek to. + * + * @throws IllegalArgumentException if {@code index} is less than 0 + * @throws UnsupportedOperationException if the stream does not support + * seeking to the specified index */ void seek(int index); - /** Only makes sense for streams that buffer everything up probably, but - * might be useful to display the entire stream or for testing. This - * value includes a single EOF. + /** + * Returns the total number of symbols in the stream, including a single EOF + * symbol. + * + * @throws UnsupportedOperationException if the size of the stream is + * unknown. */ int size(); - /** Where are you getting symbols from? Normally, implementations will - * pass the buck all the way to the lexer who can ask its input stream - * for the file name or whatever. + /** + * Gets the name of the underlying symbol source. This method returns a + * non-null, non-empty string. If such a name is not known, this method + * returns {@link #UNKNOWN_SOURCE_NAME}. */ + @NotNull public String getSourceName(); } diff --git a/runtime/Java/src/org/antlr/v4/runtime/Lexer.java b/runtime/Java/src/org/antlr/v4/runtime/Lexer.java index 78957dacc..6fb9dd927 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/Lexer.java +++ b/runtime/Java/src/org/antlr/v4/runtime/Lexer.java @@ -165,7 +165,7 @@ public abstract class Lexer extends Recognizer recover(e); ttype = SKIP; } - if ( _input.LA(1)==CharStream.EOF ) { + if ( _input.LA(1)==IntStream.EOF ) { _hitEOF = true; } if ( _type == Token.INVALID_TYPE ) _type = ttype; diff --git a/runtime/Java/src/org/antlr/v4/runtime/Token.java b/runtime/Java/src/org/antlr/v4/runtime/Token.java index 908db2b47..dd689a36a 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/Token.java +++ b/runtime/Java/src/org/antlr/v4/runtime/Token.java @@ -43,7 +43,7 @@ public interface Token { public static final int MIN_USER_TOKEN_TYPE = 1; - public static final int EOF = CharStream.EOF; + public static final int EOF = IntStream.EOF; /** All tokens go to the parser (unless skip() is called in that rule) * on a particular "channel". The parser tunes to a particular channel diff --git a/runtime/Java/src/org/antlr/v4/runtime/TokenStream.java b/runtime/Java/src/org/antlr/v4/runtime/TokenStream.java index e643535bd..64e5bee71 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/TokenStream.java +++ b/runtime/Java/src/org/antlr/v4/runtime/TokenStream.java @@ -1,80 +1,172 @@ /* - [The "BSD license"] - Copyright (c) 2011 Terence Parr - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * [The "BSD license"] + * Copyright (c) 2012 Terence Parr + * Copyright (c) 2012 Sam Harwell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.antlr.v4.runtime; import org.antlr.v4.runtime.misc.Interval; +import org.antlr.v4.runtime.misc.NotNull; -/** A stream of tokens accessing tokens from a TokenSource */ +/** + * An {@link IntStream} whose symbols are {@link Token} instances. + */ public interface TokenStream extends IntStream { - /** Get Token at current input pointer + i ahead where i=1 is next Token. - * i<0 indicates tokens in the past. So -1 is previous token and -2 is - * two tokens ago. LT(0) is undefined. For i>=n, return eof token. - * Return null for LT(0) and any index that results in an absolute address - * that is negative. - * TODO (Sam): Throw exception for invalid k? + /** + * Get the {@link Token} instance associated with the value returned by + * {@link #LA LA(k)}. This method has the same pre- and post-conditions as + * {@link IntStream#LA}. In addition, when the preconditions of this method + * are met, the return value is non-null and the value of + * {@code LT(k).getType()==LA(k)}. + * + * @see IntStream#LA */ - public Token LT(int k); + @NotNull + public Token LT(int k); - /** Get a token at an absolute index i; 0..n-1. This is really only - * needed for profiling and debugging and token stream rewriting. - * If you don't want to buffer up tokens, then this method makes no - * sense for you. Naturally you can't use the rewrite stream feature. - * I believe DebugTokenStream can easily be altered to not use - * this method, removing the dependency. + /** + * Gets the {@link Token} at the specified {@code index} in the stream. When + * the preconditions of this method are met, the return value is non-null. + *

+ * The preconditions for this method are the same as the preconditions of + * {@link IntStream#seek}. If the behavior of {@code seek(index)} is + * unspecified for the current state and given {@code index}, then the + * behavior of this method is also unspecified. + *

+ * The symbol referred to by {@code index} differs from {@code seek()} only + * in the case of filtering streams where {@code index} lies before the end + * of the stream. Unlike {@code seek()}, this method does not adjust + * {@code index} to point to a non-ignored symbol. + * + * @throws IllegalArgumentException if {code index} is less than 0 + * @throws UnsupportedOperationException if the stream does not support + * retrieving the token at the specified index */ - public Token get(int i); + @NotNull + public Token get(int index); - /** Where is this stream pulling tokens from? This is not the name, but - * the object that provides Token objects. + /** + * Gets the underlying {@link TokenSource} which provides tokens for this + * stream. */ + @NotNull public TokenSource getTokenSource(); - /** Return the text of all tokens from within the interval. - * If the stream does not buffer all the tokens then it must - * throw UnsupportedOperationException; - * Users should not access $ruleLabel.text in an action of course in - * that case. - * @param interval + /** + * Return the text of all tokens within the specified {@code interval}. This + * method behaves like the following code (including potential exceptions + * for violating preconditions of {@link #get}, but may be optimized by the + * specific implementation. + * + *

+	 * TokenStream stream = ...;
+	 * String text = "";
+	 * for (int i = interval.a; i <= interval.b; i++) {
+	 *   text += stream.get(i).getText();
+	 * }
+	 * 
+ * + * @param interval The interval of tokens within this stream to get text + * for. + * @return The text of all tokens within the specified interval in this + * stream. + * + * @throws NullPointerException if {@code interval} is {@code null} */ - public String getText(Interval interval); + @NotNull + public String getText(@NotNull Interval interval); + /** + * Return the text of all tokens in the stream. This method behaves like the + * following code, including potential exceptions from the calls to + * {@link IntStream#size} and {@link #getText(Interval)}, but may be + * optimized by the specific implementation. + * + *
+	 * TokenStream stream = ...;
+	 * String text = stream.getText(new Interval(0, stream.size()));
+	 * 
+ * + * @return The text of all tokens in the stream. + */ + @NotNull public String getText(); - public String getText(RuleContext ctx); - - /** Because the user is not required to use a token with an index stored - * in it, we must provide a means for two token objects themselves to - * indicate the start/end location. Most often this will just delegate - * to the other getText(Interval). - * If the stream does not buffer all the tokens then it must - * throw UnsupportedOperationException; + /** + * Return the text of all tokens in the source interval of the specified + * context. This method behaves like the following code, including potential + * exceptions from the call to {@link #getText(Interval)}, but may be + * optimized by the specific implementation. + *

+ * If {@code ctx.getSourceInterval()} does not return a valid interval of + * tokens provided by this stream, the behavior is unspecified. + * + *
+	 * TokenStream stream = ...;
+	 * String text = stream.getText(ctx.getSourceInterval());
+	 * 
+ * + * @param ctx The context providing the source interval of tokens to get + * text for. + * @return The text of all tokens within the source interval of {@code ctx}. */ - public String getText(Token start, Token stop); + @NotNull + public String getText(@NotNull RuleContext ctx); + + /** + * Return the text of all tokens in this stream between {@code start} and + * {@code stop} (inclusive). + *

+ * If the specified {@code start} or {@code stop} token was not provided by + * this stream, or if the {@code stop} occurred before the {@code start} + * token, the behavior is unspecified. + *

+ * For streams which ensure that the {@link Token#getTokenIndex} method is + * accurate for all of its provided tokens, this method behaves like the + * following code. Other streams may implement this method in other ways + * provided the behavior is consistent with this at a high level. + * + *

+	 * TokenStream stream = ...;
+	 * String text = "";
+	 * for (int i = start.getTokenIndex(); i <= stop.getTokenIndex(); i++) {
+	 *   text += stream.get(i).getText();
+	 * }
+	 * 
+ * + * @param start The first token in the interval to get text for. + * @param stop The last token in the interval to get text for (inclusive). + * @return The text of all tokens lying between the specified {@code start} + * and {@code stop} tokens. + * + * @throws UnsupportedOperationException if this stream does not support + * this method for the specified tokens + */ + @NotNull + public String getText(@NotNull Token start, @NotNull Token stop); } diff --git a/runtime/Java/src/org/antlr/v4/runtime/UnbufferedCharStream.java b/runtime/Java/src/org/antlr/v4/runtime/UnbufferedCharStream.java index 12783c3c5..5f6193b1a 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/UnbufferedCharStream.java +++ b/runtime/Java/src/org/antlr/v4/runtime/UnbufferedCharStream.java @@ -42,38 +42,58 @@ import java.io.Reader; * that it doesn't buffer all data, not that's it's on demand loading of char. */ public class UnbufferedCharStream implements CharStream { - /** A moving window buffer of the data being scanned. While there's a - * marker, we keep adding to buffer. Otherwise, consume() resets - * so we start filling at index 0 again. + /** + * A moving window buffer of the data being scanned. While there's a marker, + * we keep adding to buffer. Otherwise, {@link #consume consume()} resets so + * we start filling at index 0 again. */ protected char[] data; - /** How many characters are actually in the buffer; this is not - the buffer size, that's data.length. - */ + /** + * The number of characters currently in {@link #data data}. + *

+ * This is not the buffer capacity, that's {@code data.length}. + */ protected int n; - /** 0..n-1 index into data of next char; data[p] is LA(1). - * If p == n, we are out of buffered char. + /** + * 0..n-1 index into {@link #data data} of next character. + *

+ * The {@code LA(1)} character is {@code data[p]}. If {@code p == n}, we are + * out of buffered characters. */ protected int p=0; - /** Count up with mark() and down with release(). When we release() - * and hit zero, reset buffer to beginning. Copy data[p]..data[n-1] - * to data[0]..data[(n-1)-p]. + /** + * Count up with {@link #mark mark()} and down with + * {@link #release release()}. When we {@code release()} the last mark, + * {@code numMarkers} reaches 0 and we reset the buffer. Copy + * {@code data[p]..data[n-1]} to {@code data[0]..data[(n-1)-p]}. */ protected int numMarkers = 0; + /** + * This is the {@code LA(-1)} character for the current position. + */ protected int lastChar = -1; - /** Absolute char index. It's the index of the char about to be - * read via LA(1). Goes from 0 to numchar-1 in entire stream. + /** + * When {@code numMarkers > 0}, this is the {@code LA(-1)} character for the + * first character in {@link #data data}. Otherwise, this is unspecified. + */ + protected int lastCharBufferStart; + + /** + * Absolute character index. It's the index of the character about to be + * read via {@code LA(1)}. Goes from 0 to the number of characters in the + * entire stream, although the stream size is unknown before the end is + * reached. */ protected int currentCharIndex = 0; protected Reader input; - /** What is name or source of this char stream? */ + /** The name or source of this char stream. */ public String name; /** Useful for subclasses that pull char from other than this.input. */ @@ -109,39 +129,64 @@ public class UnbufferedCharStream implements CharStream { @Override public void consume() { + if (LA(1) == CharStream.EOF) { + throw new IllegalStateException("cannot consume EOF"); + } + // buf always has at least data[p==0] in this method due to ctor - if ( p==0 ) lastChar = -1; // we're at first char; no LA(-1) - else lastChar = data[p]; // track last char for LA(-1) + lastChar = data[p]; // track last char for LA(-1) + + if (p == n-1 && numMarkers==0) { + n = 0; + p = -1; // p++ will leave this at 0 + lastCharBufferStart = lastChar; + } + p++; currentCharIndex++; -// System.out.println("consume p="+p+", numMarkers="+numMarkers+ -// ", currentCharIndex="+currentCharIndex+", n="+n); sync(1); } - /** Make sure we have 'need' elements from current position p. Last valid - * p index is data.size()-1. p+need-1 is the data index 'need' elements - * ahead. If we need 1 element, (p+1-1)==p must be < data.size(). + /** + * Make sure we have 'need' elements from current position {@link #p p}. + * Last valid {@code p} index is {@code data.length-1}. {@code p+need-1} is + * the char index 'need' elements ahead. If we need 1 element, + * {@code (p+1-1)==p} must be less than {@code data.length}. */ protected void sync(int want) { int need = (p+want-1) - n + 1; // how many more elements we need? - if ( need > 0 ) fill(need); // out of elements? - } - - /** add n elements to buffer */ - public void fill(int n) { - for (int i=1; i<=n; i++) { - try { - int c = nextChar(); - add(c); - } - catch (IOException ioe) { - throw new RuntimeException(ioe); - } + if ( need > 0 ) { + fill(need); } } - /** Override to provide different source of characters than this.input */ + /** + * Add {@code n} characters to the buffer. Returns the number of characters + * actually added to the buffer. If the return value is less than {@code n}, + * then EOF was reached before {@code n} characters could be added. + */ + protected int fill(int n) { + for (int i=0; i 0 && data[this.n - 1] == CharStream.EOF) { + return i; + } + + try { + int c = nextChar(); + add(c); + } + catch (IOException ioe) { + throw new RuntimeException(ioe); + } + } + + return n; + } + + /** + * Override to provide different source of characters than + * {@link #input input}. + */ protected int nextChar() throws IOException { return input.read(); } @@ -161,22 +206,28 @@ public class UnbufferedCharStream implements CharStream { sync(i); int index = p + i - 1; if ( index < 0 ) throw new IndexOutOfBoundsException(); - if ( index > n ) return CharStream.EOF; + if ( index > n ) return IntStream.EOF; int c = data[index]; - if ( c==(char)CharStream.EOF ) return CharStream.EOF; + if ( c==(char)IntStream.EOF ) return IntStream.EOF; return c; } - /** Return a marker that we can release later. Marker happens to be - * index into buffer (not index()). - */ + /** + * Return a marker that we can release later. + *

+ * The specific marker value used for this class allows for some level of + * protection against misuse where {@code seek()} is called on a mark or + * {@code release()} is called in the wrong order. + */ @Override public int mark() { - int m = p; + if (numMarkers == 0) { + lastCharBufferStart = lastChar; + } + + int mark = -numMarkers - 1; numMarkers++; -// StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace(); -// System.out.println(stackTrace[2].getMethodName()+": mark " + m); - return m; + return mark; } /** Decrement number of markers, resetting buffer if we hit 0. @@ -184,19 +235,19 @@ public class UnbufferedCharStream implements CharStream { */ @Override public void release(int marker) { - if ( numMarkers==0 ) { - throw new IllegalStateException("release() called w/o prior matching mark()"); + int expectedMark = -numMarkers; + if ( marker!=expectedMark ) { + throw new IllegalStateException("release() called with an invalid marker."); } -// StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace(); -// System.out.println(stackTrace[2].getMethodName()+": release " + marker); + numMarkers--; - if ( numMarkers==0 ) { // can we release buffer? -// System.out.println("release: shift "+p+".."+(n-1)+" to 0: '"+ new String(data,p,n)+"'"); + if ( numMarkers==0 && p > 0 ) { // release buffer when we can, but don't do unnecessary work // Copy data[p]..data[n-1] to data[0]..data[(n-1)-p], reset ptrs // p is last valid char; move nothing if p==n as we have no valid char System.arraycopy(data, p, data, 0, n - p); // shift n-p char from p to 0 n = n - p; p = 0; + lastCharBufferStart = lastChar; } } @@ -206,19 +257,37 @@ public class UnbufferedCharStream implements CharStream { } /** Seek to absolute character index, which might not be in the current - * sliding window. Move p to index-bufferStartIndex. + * sliding window. Move {@code p} to {@code index-bufferStartIndex}. */ @Override public void seek(int index) { -// System.out.println("seek "+index); + if (index == currentCharIndex) { + return; + } + + if (index > currentCharIndex) { + sync(index - currentCharIndex); + index = Math.min(index, getBufferStartIndex() + n - 1); + } + // index == to bufferStartIndex should set p to 0 int i = index - getBufferStartIndex(); - if ( i < 0 || i >= n ) { + if ( i < 0 ) { + throw new IllegalArgumentException("cannot seek to negative index " + index); + } + else if (i >= n) { throw new UnsupportedOperationException("seek to index outside buffer: "+ index+" not in "+getBufferStartIndex()+".."+(getBufferStartIndex()+n)); } - p = i; + + p = i; currentCharIndex = index; + if (p == 0) { + lastChar = lastCharBufferStart; + } + else { + lastChar = data[p-1]; + } } @Override @@ -233,9 +302,19 @@ public class UnbufferedCharStream implements CharStream { @Override public String getText(Interval interval) { + if (interval.a < 0 || interval.b < interval.a - 1) { + throw new IllegalArgumentException("invalid interval"); + } + int bufferStartIndex = getBufferStartIndex(); + if (n > 0 && data[n - 1] == Character.MAX_VALUE) { + if (interval.a + interval.length() > bufferStartIndex + n) { + throw new IllegalArgumentException("the interval extends past the end of the stream"); + } + } + if (interval.a < bufferStartIndex || interval.b >= bufferStartIndex + n) { - throw new IndexOutOfBoundsException("interval "+interval+" outside buffer: "+ + throw new UnsupportedOperationException("interval "+interval+" outside buffer: "+ bufferStartIndex+".."+(bufferStartIndex+n)); } // convert from absolute to local index @@ -243,23 +322,7 @@ public class UnbufferedCharStream implements CharStream { return new String(data, i, interval.length()); } - /** For testing. What's in moving window into data stream from - * current index, LA(1) or data[p], to end of buffer? - */ - public String getRemainingBuffer() { - if ( n==0 ) return null; - return new String(data,p,n-p); - } - - /** For testing. What's in moving window buffer into data stream. - * From 0..p-1 have been consume. - */ - public String getBuffer() { - if ( n==0 ) return null; - return new String(data,0,n); - } - - public int getBufferStartIndex() { + protected final int getBufferStartIndex() { return currentCharIndex - p; } } diff --git a/runtime/Java/src/org/antlr/v4/runtime/UnbufferedTokenStream.java b/runtime/Java/src/org/antlr/v4/runtime/UnbufferedTokenStream.java index de1381ee7..69989b51d 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/UnbufferedTokenStream.java +++ b/runtime/Java/src/org/antlr/v4/runtime/UnbufferedTokenStream.java @@ -1,45 +1,91 @@ +/* + * [The "BSD license"] + * Copyright (c) 2012 Terence Parr + * Copyright (c) 2012 Sam Harwell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + package org.antlr.v4.runtime; import org.antlr.v4.runtime.misc.Interval; - -import java.util.Arrays; -import java.util.List; +import org.antlr.v4.runtime.misc.NotNull; public class UnbufferedTokenStream implements TokenStream { protected TokenSource tokenSource; - /** A moving window buffer of the data being scanned. While there's a - * marker, we keep adding to buffer. Otherwise, consume() resets - * so we start filling at index 0 again. + /** + * A moving window buffer of the data being scanned. While there's a marker, + * we keep adding to buffer. Otherwise, {@link #consume consume()} resets so + * we start filling at index 0 again. */ protected Token[] tokens; - /** How many tokens are actually in the buffer; this is not - * the buffer size, that's tokens.length. + /** + * The number of tokens currently in {@link #tokens tokens}. + *

+ * This is not the buffer capacity, that's {@code tokens.length}. */ protected int n; - /** 0..n-1 index into tokens of next token; tokens[p] is LT(1). - * If p == n, we are out of buffered tokens. + /** + * 0..n-1 index into {@link #tokens tokens} of next token. + *

+ * The {@code LT(1)} token is {@code tokens[p]}. If {@code p == n}, we are + * out of buffered tokens. */ protected int p=0; - /** Count up with mark() and down with release(). When we release() - * and hit zero, reset buffer to beginning. Copy data[p]..data[n-1] - * to data[0]..data[(n-1)-p]. + /** + * Count up with {@link #mark mark()} and down with + * {@link #release release()}. When we {@code release()} the last mark, + * {@code numMarkers} reaches 0 and we reset the buffer. Copy + * {@code tokens[p]..tokens[n-1]} to {@code tokens[0]..tokens[(n-1)-p]}. */ protected int numMarkers = 0; + /** + * This is the {@code LT(-1)} token for the current position. + */ protected Token lastToken; - /** Absolute token index. It's the index of the token about to be - * read via LA(1). Goes from 0 to numtokens-1 in entire stream. + /** + * When {@code numMarkers > 0}, this is the {@code LT(-1)} token for the + * first token in {@link #tokens}. Otherwise, this is {@code null}. */ - protected int currentTokenIndex = 0; // simple counter to set token index in tokens + protected Token lastTokenBufferStart; - /** Skip tokens on any channel but this one; this is how we skip whitespace... */ - // TODO: skip off-channel tokens!!! - protected int channel = Token.DEFAULT_CHANNEL; + /** + * Absolute token index. It's the index of the token about to be read via + * {@code LT(1)}. Goes from 0 to the number of tokens in the entire stream, + * although the stream size is unknown before the end is reached. + *

+ * This value is used to set the token indexes if the stream provides tokens + * that implement {@link WritableToken}. + */ + protected int currentTokenIndex = 0; public UnbufferedTokenStream(TokenSource tokenSource) { this(tokenSource, 256); @@ -64,29 +110,32 @@ public class UnbufferedTokenStream implements TokenStream { @Override public Token LT(int i) { - if ( i==-1 ) return lastToken; // special case - sync(i); - int index = p + i - 1; - if ( index < 0 ) throw new IndexOutOfBoundsException("LT("+i+") gives negative index"); - if ( index > n ) { - TokenFactory factory = tokenSource.getTokenFactory(); - int cpos = tokenSource.getCharPositionInLine(); - // The character position for EOF is one beyond the position of - // the previous token's last character - Token eof = factory.create(tokenSource, Token.EOF, null, Token.DEFAULT_CHANNEL, - index(), index()-1, - tokenSource.getLine(), cpos); - return eof; + if ( i==-1 ) { + return lastToken; } - return tokens[index]; + + sync(i); + int index = p + i - 1; + if ( index < 0 ) { + throw new IndexOutOfBoundsException("LT("+i+") gives negative index"); + } + + if ( index >= n ) { + assert n > 0 && tokens[n-1].getType() == Token.EOF; + return tokens[n-1]; + } + + return tokens[index]; } @Override - public int LA(int i) { return LT(i).getType(); } + public int LA(int i) { + return LT(i).getType(); + } @Override public TokenSource getTokenSource() { - return null; + return tokenSource; } @Override @@ -106,80 +155,104 @@ public class UnbufferedTokenStream implements TokenStream { @Override public void consume() { - // buf always has at least data[p==0] in this method due to ctor - if ( p==0 ) lastToken = null; // we're at first token; no LA(-1) - else lastToken = tokens[p]; // track last char for LT(-1) + if (LA(1) == Token.EOF) { + throw new IllegalStateException("cannot consume EOF"); + } + + // buf always has at least tokens[p==0] in this method due to ctor + lastToken = tokens[p]; // track last token for LT(-1) // if we're at last token and no markers, opportunity to flush buffer - if ( p == n-1 && numMarkers==0 ) { // can we release buffer? -// System.out.println("consume: reset"); + if ( p == n-1 && numMarkers==0 ) { n = 0; p = -1; // p++ will leave this at 0 + lastTokenBufferStart = lastToken; } p++; currentTokenIndex++; -// System.out.println("consume p="+p+", numMarkers="+numMarkers+ -// ", currentCharIndex="+currentCharIndex+", n="+n); sync(1); } - /** Make sure we have 'need' elements from current position p. Last valid - * p index is tokens.size()-1. p+need-1 is the tokens index 'need' elements - * ahead. If we need 1 element, (p+1-1)==p must be < tokens.size(). + /** Make sure we have 'need' elements from current position {@link #p p}. Last valid + * {@code p} index is {@code tokens.length-1}. {@code p+need-1} is the tokens index 'need' elements + * ahead. If we need 1 element, {@code (p+1-1)==p} must be less than {@code tokens.length}. */ protected void sync(int want) { int need = (p+want-1) - n + 1; // how many more elements we need? - if ( need > 0 ) fill(need); // out of elements? - } - - /** add n elements to buffer */ - public void fill(int n) { - for (int i=1; i<=n; i++) { - Token t = tokenSource.nextToken(); - if ( t instanceof WritableToken ) { - ((WritableToken)t).setTokenIndex(currentTokenIndex); - } - add(t); + if ( need > 0 ) { + fill(need); } } - protected void add(Token t) { + /** + * Add {@code n} elements to the buffer. Returns the number of tokens + * actually added to the buffer. If the return value is less than {@code n}, + * then EOF was reached before {@code n} tokens could be added. + */ + protected int fill(int n) { + for (int i=0; i 0 && tokens[this.n-1].getType() == Token.EOF) { + return i; + } + + Token t = tokenSource.nextToken(); + add(t); + } + + return n; + } + + protected void add(@NotNull Token t) { if ( n>=tokens.length ) { Token[] newtokens = new Token[tokens.length*2]; // resize System.arraycopy(tokens, 0, newtokens, 0, tokens.length); tokens = newtokens; } + + if (t instanceof WritableToken) { + ((WritableToken)t).setTokenIndex(getBufferStartIndex() + n); + } + tokens[n++] = t; } - - /** Return a marker that we can release later. Marker happens to be - * index into buffer (not index()). + /** + * Return a marker that we can release later. + *

+ * The specific marker value used for this class allows for some level of + * protection against misuse where {@code seek()} is called on a mark or + * {@code release()} is called in the wrong order. */ @Override public int mark() { - int m = p; + if (numMarkers == 0) { + lastTokenBufferStart = lastToken; + } + + int mark = -numMarkers - 1; numMarkers++; - return m; + return mark; } @Override public void release(int marker) { - if ( numMarkers==0 ) { - throw new IllegalStateException("release() called w/o prior matching mark()"); + int expectedMark = -numMarkers; + if ( marker!=expectedMark ) { + throw new IllegalStateException("release() called with an invalid marker."); } -// StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace(); -// System.out.println(stackTrace[2].getMethodName()+": release " + marker); + numMarkers--; if ( numMarkers==0 ) { // can we release buffer? - System.out.println("release: shift "+p+".."+(n-1)+" to 0: '"+ - Arrays.toString(Arrays.copyOfRange(tokens,p,n))+"'"); - // Copy data[p]..data[n-1] to data[0]..data[(n-1)-p], reset ptrs - // p is last valid token; move nothing if p==n as we have no valid char - System.arraycopy(tokens, p, tokens, 0, n - p); // shift n-p char from p to 0 - n = n - p; - p = 0; + if (p > 0) { + // Copy tokens[p]..tokens[n-1] to tokens[0]..tokens[(n-1)-p], reset ptrs + // p is last valid token; move nothing if p==n as we have no valid char + System.arraycopy(tokens, p, tokens, 0, n - p); // shift n-p tokens from p to 0 + n = n - p; + p = 0; + } + + lastTokenBufferStart = lastToken; } } @@ -190,13 +263,33 @@ public class UnbufferedTokenStream implements TokenStream { @Override public void seek(int index) { // seek to absolute index + if (index == currentTokenIndex) { + return; + } + + if (index > currentTokenIndex) { + sync(index - currentTokenIndex); + index = Math.min(index, getBufferStartIndex() + n - 1); + } + int bufferStartIndex = getBufferStartIndex(); int i = index - bufferStartIndex; - if ( i < 0 || i >= n ) { + if ( i < 0 ) { + throw new IllegalArgumentException("cannot seek to negative index " + index); + } + else if (i >= n) { throw new UnsupportedOperationException("seek to index outside buffer: "+ index+" not in "+ bufferStartIndex +".."+(bufferStartIndex +n)); } + p = i; + currentTokenIndex = index; + if (p == 0) { + lastToken = lastTokenBufferStart; + } + else { + lastToken = tokens[p-1]; + } } @Override @@ -233,23 +326,7 @@ public class UnbufferedTokenStream implements TokenStream { return buf.toString(); } - /** For testing. What's in moving window into token stream from - * current index, LT(1) or tokens[p], to end of buffer? - */ - public List getRemainingBuffer() { - if ( n==0 ) return null; - return (List)Arrays.asList(Arrays.copyOfRange(tokens, p, n)); - } - - /** For testing. What's in moving window buffer into data stream. - * From 0..p-1 have been consume. - */ - public List getBuffer() { - if ( n==0 ) return null; - return (List)Arrays.asList(Arrays.copyOfRange(tokens, 0, n)); - } - - public int getBufferStartIndex() { + protected final int getBufferStartIndex() { return currentTokenIndex - p; } } diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java index b441674ad..e49d61aad 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java @@ -229,7 +229,7 @@ public class LexerATNSimulator extends ATNSimulator { } // if no edge, pop over to ATN interpreter, update DFA and return - if ( s.edges == null || t >= s.edges.length || t <= CharStream.EOF || + if ( s.edges == null || t >= s.edges.length || t <= IntStream.EOF || s.edges[t] == null ) { ATN_failover++; @@ -248,7 +248,7 @@ public class LexerATNSimulator extends ATNSimulator { captureSimState(prevAccept, input, s); // keep going unless we're at EOF; check if something else could match // EOF never in DFA - if ( t==CharStream.EOF ) break; + if ( t==IntStream.EOF ) break; } consume(input); @@ -297,7 +297,7 @@ public class LexerATNSimulator extends ATNSimulator { DFAState target = null; ATNConfigSet reach = null; if (s != null) { - if ( s.edges != null && t < s.edges.length && t > CharStream.EOF ) { + if ( s.edges != null && t < s.edges.length && t > IntStream.EOF ) { closure = s.configs; target = s.edges[t]; if (target == ERROR) { @@ -374,7 +374,7 @@ public class LexerATNSimulator extends ATNSimulator { } else { // if no accept and EOF is first char, return EOF - if ( t==CharStream.EOF && input.index()==startIndex ) { + if ( t==IntStream.EOF && input.index()==startIndex ) { return Token.EOF; } @@ -503,7 +503,7 @@ public class LexerATNSimulator extends ATNSimulator { case Transition.NOT_SET: NotSetTransition nst = (NotSetTransition)trans; - if (!nst.set.contains(t) && t!=CharStream.EOF) // ~set doesn't not match EOF + if (!nst.set.contains(t) && t!=IntStream.EOF) // ~set doesn't not match EOF { if ( debug ) { System.out.format("match ~set %s\n", nst.set.toString(true)); @@ -515,7 +515,7 @@ public class LexerATNSimulator extends ATNSimulator { return null; case Transition.WILDCARD: - if (t != CharStream.EOF) { + if (t != IntStream.EOF) { return trans.target; } diff --git a/tool/src/org/antlr/v4/automata/LexerATNFactory.java b/tool/src/org/antlr/v4/automata/LexerATNFactory.java index 484013dcd..68407e3e8 100644 --- a/tool/src/org/antlr/v4/automata/LexerATNFactory.java +++ b/tool/src/org/antlr/v4/automata/LexerATNFactory.java @@ -33,7 +33,7 @@ import org.antlr.runtime.CommonToken; import org.antlr.v4.codegen.CodeGenerator; import org.antlr.v4.misc.CharSupport; import org.antlr.v4.parse.ANTLRParser; -import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.IntStream; import org.antlr.v4.runtime.atn.ATN; import org.antlr.v4.runtime.atn.ATNState; import org.antlr.v4.runtime.atn.ActionTransition; @@ -271,7 +271,7 @@ public class LexerATNFactory extends ParserATNFactory { if ( node.getText().equals("EOF") ) { ATNState left = newState(node); ATNState right = newState(node); - left.addTransition(new AtomTransition(right, CharStream.EOF)); + left.addTransition(new AtomTransition(right, IntStream.EOF)); return new Handle(left, right); } return _ruleRef(node); diff --git a/tool/test/org/antlr/v4/test/BaseTest.java b/tool/test/org/antlr/v4/test/BaseTest.java index a7e541f83..a4bc26e8e 100644 --- a/tool/test/org/antlr/v4/test/BaseTest.java +++ b/tool/test/org/antlr/v4/test/BaseTest.java @@ -39,6 +39,7 @@ import org.antlr.v4.runtime.ANTLRInputStream; import org.antlr.v4.runtime.CharStream; import org.antlr.v4.runtime.CommonToken; import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.IntStream; import org.antlr.v4.runtime.Lexer; import org.antlr.v4.runtime.RuleContext; import org.antlr.v4.runtime.Token; @@ -232,7 +233,7 @@ public abstract class BaseTest { tokenTypes.add(lg.typeToTokenList.get(ttype)); } - if ( t==CharStream.EOF ) { + if ( t==IntStream.EOF ) { hitEOF = true; } } while ( ttype!=Token.EOF ); diff --git a/tool/test/org/antlr/v4/test/TestUnbufferedCharStream.java b/tool/test/org/antlr/v4/test/TestUnbufferedCharStream.java index d84ecd005..ed0943c8c 100644 --- a/tool/test/org/antlr/v4/test/TestUnbufferedCharStream.java +++ b/tool/test/org/antlr/v4/test/TestUnbufferedCharStream.java @@ -1,30 +1,31 @@ /* - [The "BSD license"] - Copyright (c) 2011 Terence Parr - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * [The "BSD license"] + * Copyright (c) 2012 Terence Parr + * Copyright (c) 2012 Sam Harwell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.antlr.v4.test; @@ -32,78 +33,201 @@ package org.antlr.v4.test; import org.antlr.v4.runtime.CharStream; import org.antlr.v4.runtime.CommonTokenFactory; import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.IntStream; import org.antlr.v4.runtime.UnbufferedCharStream; +import org.antlr.v4.runtime.misc.Interval; import org.antlr.v4.tool.LexerGrammar; import org.antlr.v4.tool.interp.LexerInterpreter; import org.junit.Test; +import java.io.Reader; import java.io.StringReader; public class TestUnbufferedCharStream extends BaseTest { @Test public void testNoChar() throws Exception { - CharStream input = new UnbufferedCharStream( - new StringReader("") - ); - assertEquals(CharStream.EOF, input.LA(1)); + CharStream input = createStream(""); + assertEquals(IntStream.EOF, input.LA(1)); + assertEquals(IntStream.EOF, input.LA(2)); + } + + /** + * The {@link IntStream} interface does not specify the behavior when the + * EOF symbol is consumed, but {@link UnbufferedCharStream} handles this + * particular case by throwing an {@link IllegalStateException}. + */ + @Test(expected = IllegalStateException.class) + public void testConsumeEOF() throws Exception { + CharStream input = createStream(""); + assertEquals(IntStream.EOF, input.LA(1)); input.consume(); - assertEquals(CharStream.EOF, input.LA(1)); input.consume(); - assertEquals(CharStream.EOF, input.LA(1)); + } + + @Test(expected = IllegalArgumentException.class) + public void testNegativeSeek() { + CharStream input = createStream(""); + input.seek(-1); + } + + @Test + public void testSeekPastEOF() { + CharStream input = createStream(""); + assertEquals(0, input.index()); + input.seek(1); + assertEquals(0, input.index()); + } + + /** + * The {@link IntStream} interface does not specify the behavior when marks + * are not released in the reversed order they were created, but + * {@link UnbufferedCharStream} handles this case by throwing an + * {@link IllegalStateException}. + */ + @Test(expected = IllegalStateException.class) + public void testMarkReleaseOutOfOrder() { + CharStream input = createStream(""); + int m1 = input.mark(); + int m2 = input.mark(); + input.release(m1); + } + + /** + * The {@link IntStream} interface does not specify the behavior when a mark + * is released twice, but {@link UnbufferedCharStream} handles this case by + * throwing an {@link IllegalStateException}. + */ + @Test(expected = IllegalStateException.class) + public void testMarkReleasedTwice() { + CharStream input = createStream(""); + int m1 = input.mark(); + input.release(m1); + input.release(m1); + } + + /** + * The {@link IntStream} interface does not specify the behavior when a mark + * is released twice, but {@link UnbufferedCharStream} handles this case by + * throwing an {@link IllegalStateException}. + */ + @Test(expected = IllegalStateException.class) + public void testNestedMarkReleasedTwice() { + CharStream input = createStream(""); + int m1 = input.mark(); + int m2 = input.mark(); + input.release(m2); + input.release(m2); + } + + /** + * It is not valid to pass a mark to {@link IntStream#seek}, but + * {@link UnbufferedCharStream} creates marks in such a way that this + * invalid usage results in an {@link IllegalArgumentException}. + */ + @Test(expected = IllegalArgumentException.class) + public void testMarkPassedToSeek() { + CharStream input = createStream(""); + int m1 = input.mark(); + input.seek(m1); + } + + @Test(expected = IllegalArgumentException.class) + public void testSeekBeforeBufferStart() { + CharStream input = createStream("xyz"); + input.consume(); + int m1 = input.mark(); + assertEquals(1, input.index()); + input.consume(); + input.seek(0); + } + + @Test(expected = UnsupportedOperationException.class) + public void testGetTextBeforeBufferStart() { + CharStream input = createStream("xyz"); + input.consume(); + int m1 = input.mark(); + assertEquals(1, input.index()); + input.getText(new Interval(0, 1)); + } + + @Test + public void testGetTextInMarkedRange() { + CharStream input = createStream("xyz"); + input.consume(); + int m1 = input.mark(); + assertEquals(1, input.index()); + input.consume(); + input.consume(); + assertEquals("yz", input.getText(new Interval(1, 2))); + } + + @Test + public void testLastChar() { + CharStream input = createStream("abcdef"); + + input.consume(); + assertEquals('a', input.LA(-1)); + + int m1 = input.mark(); + input.consume(); + input.consume(); + input.consume(); + assertEquals('d', input.LA(-1)); + + input.seek(2); + assertEquals('b', input.LA(-1)); + + input.release(m1); + input.seek(3); + assertEquals('c', input.LA(-1)); + // this special case is not required by the IntStream interface, but + // UnbufferedCharStream allows it so we have to make sure the resulting + // state is consistent + input.seek(2); + assertEquals('b', input.LA(-1)); } @Test public void test1Char() throws Exception { - UnbufferedCharStream input = new UnbufferedCharStream( - new StringReader("x") - ); + TestingUnbufferedCharStream input = createStream("x"); assertEquals('x', input.LA(1)); input.consume(); - assertEquals(CharStream.EOF, input.LA(1)); + assertEquals(IntStream.EOF, input.LA(1)); String r = input.getRemainingBuffer(); assertEquals("\uFFFF", r); // shouldn't include x - assertEquals("x\uFFFF", input.getBuffer()); // whole buffer + assertEquals("\uFFFF", input.getBuffer()); // whole buffer } @Test public void test2Char() throws Exception { - UnbufferedCharStream input = new UnbufferedCharStream( - new StringReader("xy") - ); + TestingUnbufferedCharStream input = createStream("xy"); assertEquals('x', input.LA(1)); input.consume(); assertEquals('y', input.LA(1)); assertEquals("y", input.getRemainingBuffer()); // shouldn't include x - assertEquals("xy", input.getBuffer()); + assertEquals("y", input.getBuffer()); input.consume(); - assertEquals(CharStream.EOF, input.LA(1)); + assertEquals(IntStream.EOF, input.LA(1)); + assertEquals("\uFFFF", input.getBuffer()); } @Test public void test2CharAhead() throws Exception { - CharStream input = new UnbufferedCharStream( - new StringReader("xy") - ); + CharStream input = createStream("xy"); assertEquals('x', input.LA(1)); assertEquals('y', input.LA(2)); - assertEquals(CharStream.EOF, input.LA(3)); + assertEquals(IntStream.EOF, input.LA(3)); } @Test public void testBufferExpand() throws Exception { - UnbufferedCharStream input = new UnbufferedCharStream( - new StringReader("01234"), - 2 // buff size 2 - ); + TestingUnbufferedCharStream input = createStream("01234", 2); assertEquals('0', input.LA(1)); assertEquals('1', input.LA(2)); assertEquals('2', input.LA(3)); assertEquals('3', input.LA(4)); assertEquals('4', input.LA(5)); assertEquals("01234", input.getBuffer()); - assertEquals(CharStream.EOF, input.LA(6)); + assertEquals(IntStream.EOF, input.LA(6)); } @Test public void testBufferWrapSize1() throws Exception { - CharStream input = new UnbufferedCharStream( - new StringReader("01234"), - 1 // buff size 1 - ); + CharStream input = createStream("01234", 1); assertEquals('0', input.LA(1)); input.consume(); assertEquals('1', input.LA(1)); @@ -114,14 +238,11 @@ public class TestUnbufferedCharStream extends BaseTest { input.consume(); assertEquals('4', input.LA(1)); input.consume(); - assertEquals(CharStream.EOF, input.LA(1)); + assertEquals(IntStream.EOF, input.LA(1)); } @Test public void testBufferWrapSize2() throws Exception { - CharStream input = new UnbufferedCharStream( - new StringReader("01234"), - 2 // buff size 2 - ); + CharStream input = createStream("01234", 2); assertEquals('0', input.LA(1)); input.consume(); assertEquals('1', input.LA(1)); @@ -132,54 +253,45 @@ public class TestUnbufferedCharStream extends BaseTest { input.consume(); assertEquals('4', input.LA(1)); input.consume(); - assertEquals(CharStream.EOF, input.LA(1)); + assertEquals(IntStream.EOF, input.LA(1)); } @Test public void test1Mark() throws Exception { - UnbufferedCharStream input = new UnbufferedCharStream( - new StringReader("xyz") - ); + TestingUnbufferedCharStream input = createStream("xyz"); int m = input.mark(); assertEquals('x', input.LA(1)); assertEquals('y', input.LA(2)); assertEquals('z', input.LA(3)); input.release(m); - assertEquals(CharStream.EOF, input.LA(4)); + assertEquals(IntStream.EOF, input.LA(4)); assertEquals("xyz\uFFFF", input.getBuffer()); } @Test public void test1MarkWithConsumesInSequence() throws Exception { - UnbufferedCharStream input = new UnbufferedCharStream( - new StringReader("xyz") - ); + TestingUnbufferedCharStream input = createStream("xyz"); int m = input.mark(); input.consume(); // x, moves to y input.consume(); // y input.consume(); // z, moves to EOF - assertEquals(CharStream.EOF, input.LA(1)); + assertEquals(IntStream.EOF, input.LA(1)); assertEquals("xyz\uFFFF", input.getBuffer()); input.release(m); // wipes buffer assertEquals("\uFFFF", input.getBuffer()); } @Test public void test2Mark() throws Exception { - UnbufferedCharStream input = new UnbufferedCharStream( - new StringReader("xyz"), - 100 - ); + TestingUnbufferedCharStream input = createStream("xyz", 100); assertEquals('x', input.LA(1)); input.consume(); // reset buffer index (p) to 0 int m1 = input.mark(); - assertEquals(1, m1); assertEquals('y', input.LA(1)); input.consume(); int m2 = input.mark(); - assertEquals(2, m2); // 2nd consume leaves p==2 - assertEquals("xyz", input.getBuffer()); + assertEquals("yz", input.getBuffer()); input.release(m2); // drop to 1 marker input.consume(); input.release(m1); // shifts remaining char to beginning - assertEquals(CharStream.EOF, input.LA(1)); + assertEquals(IntStream.EOF, input.LA(1)); assertEquals("\uFFFF", input.getBuffer()); } @@ -195,9 +307,7 @@ public class TestUnbufferedCharStream extends BaseTest { "WS : ' '+;\n"); // Tokens: 012345678901234567 // Input: x = 3 * 0 + 2 * 0; - UnbufferedCharStream input = new UnbufferedCharStream( - new StringReader("x = 302 * 91 + 20234234 * 0;") - ); + TestingUnbufferedCharStream input = createStream("x = 302 * 91 + 20234234 * 0;"); LexerInterpreter lexEngine = new LexerInterpreter(g); // copy text into tokens from char stream lexEngine.setTokenFactory(new CommonTokenFactory(true)); @@ -217,4 +327,40 @@ public class TestUnbufferedCharStream extends BaseTest { " [@17,27:27=';',<3>,1:27], [@18,28:27='',<-1>,1:28]]"; assertEquals(expecting, tokens.getTokens().toString()); } + + protected static TestingUnbufferedCharStream createStream(String text) { + return new TestingUnbufferedCharStream(new StringReader(text)); + } + + protected static TestingUnbufferedCharStream createStream(String text, int bufferSize) { + return new TestingUnbufferedCharStream(new StringReader(text), bufferSize); + } + + protected static class TestingUnbufferedCharStream extends UnbufferedCharStream { + + public TestingUnbufferedCharStream(Reader input) { + super(input); + } + + public TestingUnbufferedCharStream(Reader input, int bufferSize) { + super(input, bufferSize); + } + + /** For testing. What's in moving window into data stream from + * current index, LA(1) or data[p], to end of buffer? + */ + public String getRemainingBuffer() { + if ( n==0 ) return ""; + return new String(data,p,n-p); + } + + /** For testing. What's in moving window buffer into data stream. + * From 0..p-1 have been consume. + */ + public String getBuffer() { + if ( n==0 ) return ""; + return new String(data,0,n); + } + + } } diff --git a/tool/test/org/antlr/v4/test/TestUnbufferedTokenStream.java b/tool/test/org/antlr/v4/test/TestUnbufferedTokenStream.java index c78c265bd..a384e472a 100644 --- a/tool/test/org/antlr/v4/test/TestUnbufferedTokenStream.java +++ b/tool/test/org/antlr/v4/test/TestUnbufferedTokenStream.java @@ -1,8 +1,39 @@ +/* + * [The "BSD license"] + * Copyright (c) 2012 Terence Parr + * Copyright (c) 2012 Sam Harwell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + package org.antlr.v4.test; import org.antlr.v4.runtime.ANTLRInputStream; import org.antlr.v4.runtime.CharStream; import org.antlr.v4.runtime.Token; +import org.antlr.v4.runtime.TokenSource; import org.antlr.v4.runtime.TokenStream; import org.antlr.v4.runtime.UnbufferedTokenStream; import org.antlr.v4.tool.LexerGrammar; @@ -10,6 +41,9 @@ import org.antlr.v4.tool.interp.LexerInterpreter; import org.junit.Test; import java.io.StringReader; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; public class TestUnbufferedTokenStream extends BaseTest { @Test public void testLookahead() throws Exception { @@ -56,7 +90,7 @@ public class TestUnbufferedTokenStream extends BaseTest { ); LexerInterpreter lexEngine = new LexerInterpreter(g); lexEngine.setInput(input); - UnbufferedTokenStream tokens = new UnbufferedTokenStream(lexEngine); + TestingUnbufferedTokenStream tokens = new TestingUnbufferedTokenStream(lexEngine); assertEquals("[[@0,0:0='x',<1>,1:0]]", tokens.getBuffer().toString()); assertEquals("x", tokens.LT(1).getText()); @@ -94,7 +128,7 @@ public class TestUnbufferedTokenStream extends BaseTest { ); LexerInterpreter lexEngine = new LexerInterpreter(g); lexEngine.setInput(input); - UnbufferedTokenStream tokens = new UnbufferedTokenStream(lexEngine); + TestingUnbufferedTokenStream tokens = new TestingUnbufferedTokenStream(lexEngine); int m = tokens.mark(); assertEquals("[[@0,0:0='x',<1>,1:0]]", tokens.getBuffer().toString()); @@ -130,7 +164,7 @@ public class TestUnbufferedTokenStream extends BaseTest { ); LexerInterpreter lexEngine = new LexerInterpreter(g); lexEngine.setInput(input); - UnbufferedTokenStream tokens = new UnbufferedTokenStream(lexEngine); + TestingUnbufferedTokenStream tokens = new TestingUnbufferedTokenStream(lexEngine); int m = tokens.mark(); assertEquals("[[@0,0:0='x',<1>,1:0]]", tokens.getBuffer().toString()); @@ -158,4 +192,34 @@ public class TestUnbufferedTokenStream extends BaseTest { tokens.getBuffer().toString()); tokens.release(m); } + + protected static class TestingUnbufferedTokenStream extends UnbufferedTokenStream { + + public TestingUnbufferedTokenStream(TokenSource tokenSource) { + super(tokenSource); + } + + /** For testing. What's in moving window into token stream from + * current index, LT(1) or tokens[p], to end of buffer? + */ + protected List getRemainingBuffer() { + if ( n==0 ) { + return Collections.emptyList(); + } + + return Arrays.asList(tokens).subList(p, n); + } + + /** For testing. What's in moving window buffer into data stream. + * From 0..p-1 have been consume. + */ + protected List getBuffer() { + if ( n==0 ) { + return Collections.emptyList(); + } + + return Arrays.asList(tokens).subList(0, n); + } + + } }