diff --git a/runtime/Java/src/org/antlr/v4/runtime/ANTLRInputStream.java b/runtime/Java/src/org/antlr/v4/runtime/ANTLRInputStream.java
index d4c305177..de9258849 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/ANTLRInputStream.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/ANTLRInputStream.java
@@ -159,13 +159,13 @@ public class ANTLRInputStream implements CharStream {
if ( i<0 ) {
i++; // e.g., translate LA(-1) to use offset i=0; then data[p+0-1]
if ( (p+i-1) < 0 ) {
- return CharStream.EOF; // invalid; no char before first char
+ return IntStream.EOF; // invalid; no char before first char
}
}
if ( (p+i-1) >= n ) {
//System.out.println("char LA("+i+")=EOF; p="+p);
- return CharStream.EOF;
+ return IntStream.EOF;
}
//System.out.println("char LA("+i+")="+(char)data[p+i-1]+"; p="+p);
//System.out.println("LA("+i+"); p="+p+" n="+n+" data.length="+data.length);
diff --git a/runtime/Java/src/org/antlr/v4/runtime/CharStream.java b/runtime/Java/src/org/antlr/v4/runtime/CharStream.java
index 7c2f691c0..1845cbb5a 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/CharStream.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/CharStream.java
@@ -1,46 +1,73 @@
/*
- [The "BSD license"]
- Copyright (c) 2011 Terence Parr
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- 1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- 2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- 3. The name of the author may not be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * [The "BSD license"]
+ * Copyright (c) 2012 Terence Parr
+ * Copyright (c) 2012 Sam Harwell
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+
package org.antlr.v4.runtime;
import org.antlr.v4.runtime.misc.Interval;
+import org.antlr.v4.runtime.misc.NotNull;
-/** A source of characters for an ANTLR lexer */
+/** A source of characters for an ANTLR lexer. */
public interface CharStream extends IntStream {
- public static final int EOF = -1;
- public static final int MIN_CHAR = Character.MIN_VALUE;
- public static final int MAX_CHAR = Character.MAX_VALUE-1; // FFFE is max
-
- /** This is primaril a useful interface for action code.
- * Just make sure actions don't use this on streams that don't support it.
- * For unbuffered streams, you can't use this except in case
- * where interval is in current buffer window. Lexer guarantees
- * text of current token at emit() time will be available.
+ /**
+ * The minimum allowed value for a character in a {@code CharStream}.
*/
- public String getText(Interval interval);
+ public static final int MIN_CHAR = Character.MIN_VALUE;
+
+ /**
+ * The maximum allowed value for a character in a {@code CharStream}.
+ *
+ * This value is {@code Character.MAX_VALUE - 1}, which reserves the value
+ * {@code Character.MAX_VALUE} for special use within an implementing class.
+ * For some implementations, the data buffers required for supporting the
+ * marked ranges of {@link IntStream} are stored as {@code char[]} instead
+ * of {@code int[]}, with {@code Character.MAX_VALUE} being used instead of
+ * {@code -1} to mark the end of the stream internally.
+ */
+ public static final int MAX_CHAR = Character.MAX_VALUE-1;
+
+ /**
+ * This method returns the text for a range of characters within this input
+ * stream. This method is guaranteed to not throw an exception if the
+ * specified {@code interval} lies entirely within a marked range. For more
+ * information about marked ranges, see {@link IntStream#mark}.
+ *
+ * @param interval an interval within the stream
+ * @return the text of the specified interval
+ *
+ * @throws NullPointerException if {@code interval} is {@code null}
+ * @throws IllegalArgumentException if {@code interval.a < 0}, or if
+ * {@code interval.b < interval.a - 1}, or if {@code interval.b} lies at or
+ * past the end of the stream
+ * @throws UnsupportedOperationException if the stream does not support
+ * getting the text of the specified interval
+ */
+ @NotNull
+ public String getText(@NotNull Interval interval);
}
diff --git a/runtime/Java/src/org/antlr/v4/runtime/IntStream.java b/runtime/Java/src/org/antlr/v4/runtime/IntStream.java
index 143c4dda5..a02b83408 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/IntStream.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/IntStream.java
@@ -1,100 +1,242 @@
/*
- [The "BSD license"]
- Copyright (c) 2011 Terence Parr
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- 1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- 2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- 3. The name of the author may not be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * [The "BSD license"]
+ * Copyright (c) 2012 Terence Parr
+ * Copyright (c) 2012 Sam Harwell
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+
package org.antlr.v4.runtime;
-/** A simple stream of integers used when all I care about is the char
- * or token type sequence (such as interpretation).
+import org.antlr.v4.runtime.misc.NotNull;
+
+/**
+ * A simple stream of symbols whose values are represented as integers. This
+ * interface provides marked ranges which support for a minimum level
+ * of buffering necessary to implement arbitrary lookahead during prediction.
+ * For more information on marked ranges, see {@link #mark}.
+ *
+ * Initializing Methods: Some methods in this interface have
+ * unspecified behavior if no call to an initializing method has occurred after
+ * the stream was constructed. The following is a list of initializing methods:
+ *
+ *
+ * - {@link #LA}
+ * - {@link #consume}
+ * - {@link #size}
+ *
*/
public interface IntStream {
+ /**
+ * The value returned by {@link #LA LA()} when the end of the stream is
+ * reached.
+ */
+ public static final int EOF = -1;
+
+ /**
+ * The value returned by {@link #getSourceName} when the actual name of the
+ * underlying source is not known.
+ */
+ public static final String UNKNOWN_SOURCE_NAME = "";
+
+ /**
+ * Consumes the current symbol in the stream. This method has the following
+ * effects:
+ *
+ *
+ * - Forward movement: The value of {@link #index index()}
+ * before calling this method is less than the value of {@code index()}
+ * after calling this method.
+ * - Ordered lookahead: The value of {@code LA(1)} before
+ * calling this method becomes the value of {@code LA(-1)} after calling
+ * this method.
+ *
+ *
+ * Note that calling this method does not guarantee that {@code index()} is
+ * incremented by exactly 1, as that would preclude the ability to implement
+ * filtering streams (e.g. {@link CommonTokenStream} which distinguishes
+ * between "on-channel" and "off-channel" tokens).
+ *
+ * @throws IllegalStateException if an attempt is made to consume the the
+ * end of the stream (i.e. if {@code LA(1)==}{@link #EOF EOF} before calling
+ * {@code consume}).
+ */
void consume();
- /** Get int at current input pointer + i ahead where i=1 is next int.
- * Negative indexes are allowed. LA(-1) is previous token (token
- * just matched). LA(-i) where i is before first token should
- * yield -1, invalid char / EOF.
+ /**
+ * Gets the value of the symbol at offset {@code i} from the current
+ * position. When {@code i==1}, this method returns the value of the current
+ * symbol in the stream (which is the next symbol to be consumed). When
+ * {@code i==-1}, this method returns the value of the previously read
+ * symbol in the stream. It is not valid to call this method with
+ * {@code i==0}, but the specific behavior is unspecified because this
+ * method is frequently called from performance-critical code.
+ *
+ * This method is guaranteed to succeed if any of the following are true:
+ *
+ *
+ * - {@code i>0}
+ * - {@code i==-1} and {@link #index index()} returns a value greater
+ * than the value of {@code index()} after the stream was constructed
+ * and {@code LA(1)} was called in that order. Specifying the current
+ * {@code index()} relative to the index after the stream was created
+ * allows for filtering implementations that do not return every symbol
+ * from the underlying source. Specifying the call to {@code LA(1)}
+ * allows for lazily initialized streams.
+ * - {@code LA(i)} refers to a symbol consumed within a marked region
+ * that has not yet been released.
+ *
+ *
+ * If {@code i} represents a position at or beyond the end of the stream,
+ * this method returns {@link #EOF}.
+ *
+ * The return value is unspecified if {@code i<0} and fewer than {@code -i}
+ * calls to {@link #consume consume()} have occurred from the beginning of
+ * the stream before calling this method.
+ *
+ * @throws UnsupportedOperationException if the stream does not support
+ * retrieving the value of the specified symbol
*/
int LA(int i);
- /** Tell the stream to start buffering if it hasn't already. Return
- * a marker, usually a function of current input position, index().
- * Calling release(mark()) should not affect the input cursor.
- * Can seek to any index between where we were when mark() was called
- * and current index() until we release this marker. No mark can appear
- * at an index before the first mark.
+ /**
+ * A mark provides a guarantee that {@link #seek seek()} operations will be
+ * valid over a "marked range" extending from the index where {@code mark()}
+ * was called to the current {@link #index index()}. This allows the use of
+ * streaming input sources by specifying the minimum buffering requirements
+ * to support arbitrary lookahead during prediction.
+ *
+ * The returned mark is an opaque handle (type {@code int}) which is passed
+ * to {@link #release release()} when the guarantees provided by the marked
+ * range are no longer necessary. When calls to
+ * {@code mark()}/{@code release()} are nested, the marks must be released
+ * in reverse order of which they were obtained. Since marked regions are
+ * used during performance-critical sections of prediction, the specific
+ * behavior of invalid usage is unspecified (i.e. a mark is not released, or
+ * a mark is released twice, or marks are not released in reverse order from
+ * which they were created).
+ *
+ * The behavior of this method is unspecified if no call to an
+ * {@link IntStream initializing method} has occurred after this stream was
+ * constructed.
+ *
+ * This method does not change the current position in the input stream.
+ *
+ * The following example shows the use of {@link #mark mark()},
+ * {@link #release release(mark)}, {@link #index index()}, and
+ * {@link #seek seek(index)} as part of an operation to safely work within a
+ * marked region, then restore the stream position to its original value and
+ * release the mark.
+ *
+ * IntStream stream = ...;
+ * int index = -1;
+ * int mark = stream.mark();
+ * try {
+ * index = stream.index();
+ * // perform work here...
+ * } finally {
+ * if (index != -1) {
+ * stream.seek(index);
+ * }
+ * stream.release(mark);
+ * }
+ *
*
- * A resource leak may occur if the value returned from a call to
- * mark() is not passed to release() afterwards. When calls to mark()
- * are nested, release() must be called in reverse order of the calls
- * to mark(), otherwise the behavior is unspecified.
- *
- * @return An opaque marker which should be passed to release
- * when the range of symbols from where the marker was dropped
- * to the current input symbol is no longer required.
- */
+ * @return An opaque marker which should be passed to
+ * {@link #release release()} when the marked range is no longer required.
+ */
int mark();
- /** Release requirement that stream holds tokens from marked location
- * to current index(). Must release in reverse order (like stack)
- * of mark() otherwise undefined behavior.
+ /**
+ * This method releases a marked range created by a call to
+ * {@link #mark mark()}. Calls to {@code release()} must appear in the
+ * reverse order of the corresponding calls to {@code mark()}. If a mark is
+ * released twice, or if marks are not released in reverse order of the
+ * corresponding calls to {@code mark()}, the behavior is unspecified.
+ *
+ * For more information and an example, see {@link #mark}.
+ *
+ * @param marker A marker returned by a call to {@code mark()}.
+ * @see #mark
*/
void release(int marker);
- /** Return the current input symbol index 0..n where n indicates the
- * last symbol has been read. The index is the symbol about to be
- * read not the most recently read symbol.
- */
+ /**
+ * Return the index into the stream of the input symbol referred to by
+ * {@code LA(1)}.
+ *
+ * The behavior of this method is unspecified if no call to an
+ * {@link IntStream initializing method} has occurred after this stream was
+ * constructed.
+ */
int index();
- /** Set the input cursor to the position indicated by index. This is
- * normally used to rewind the input stream but can move forward as well.
- * It's up to the stream implementation to make sure that symbols are
- * buffered as necessary to make seek land on a valid symbol.
- * Or, they should avoid moving the input cursor.
+ /**
+ * Set the input cursor to the position indicated by {@code index}. If the
+ * specified index lies past the end of the stream, the operation behaves as
+ * though {@code index} was the index of the EOF symbol. After this method
+ * returns without throwing an exception, the at least one of the following
+ * will be true.
*
- * The index is 0..n-1. A seek to position i means that LA(1) will
- * return the ith symbol. So, seeking to 0 means LA(1) will return the
- * first element in the stream.
- *
- * For unbuffered streams, index i might not be in buffer. That throws
- * index exception.
+ *
+ * - {@link #index index()} will return the index of the first symbol
+ * appearing at or after the specified {@code index}. Specifically,
+ * implementations which filter their sources should automatically
+ * adjust {@code index} forward the minimum amount required for the
+ * operation to target a non-ignored symbol.
+ * - {@code LA(1)} returns {@link #EOF}
+ *
+ *
+ * This operation is guaranteed to not throw an exception if {@code index}
+ * lies within a marked region. For more information on marked regions, see
+ * {@link #mark}. The behavior of this method is unspecified if no call to
+ * an {@link IntStream initializing method} has occurred after this stream
+ * was constructed.
+ *
+ * @param index The absolute index to seek to.
+ *
+ * @throws IllegalArgumentException if {@code index} is less than 0
+ * @throws UnsupportedOperationException if the stream does not support
+ * seeking to the specified index
*/
void seek(int index);
- /** Only makes sense for streams that buffer everything up probably, but
- * might be useful to display the entire stream or for testing. This
- * value includes a single EOF.
+ /**
+ * Returns the total number of symbols in the stream, including a single EOF
+ * symbol.
+ *
+ * @throws UnsupportedOperationException if the size of the stream is
+ * unknown.
*/
int size();
- /** Where are you getting symbols from? Normally, implementations will
- * pass the buck all the way to the lexer who can ask its input stream
- * for the file name or whatever.
+ /**
+ * Gets the name of the underlying symbol source. This method returns a
+ * non-null, non-empty string. If such a name is not known, this method
+ * returns {@link #UNKNOWN_SOURCE_NAME}.
*/
+ @NotNull
public String getSourceName();
}
diff --git a/runtime/Java/src/org/antlr/v4/runtime/Lexer.java b/runtime/Java/src/org/antlr/v4/runtime/Lexer.java
index 78957dacc..6fb9dd927 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/Lexer.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/Lexer.java
@@ -165,7 +165,7 @@ public abstract class Lexer extends Recognizer
recover(e);
ttype = SKIP;
}
- if ( _input.LA(1)==CharStream.EOF ) {
+ if ( _input.LA(1)==IntStream.EOF ) {
_hitEOF = true;
}
if ( _type == Token.INVALID_TYPE ) _type = ttype;
diff --git a/runtime/Java/src/org/antlr/v4/runtime/Token.java b/runtime/Java/src/org/antlr/v4/runtime/Token.java
index 908db2b47..dd689a36a 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/Token.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/Token.java
@@ -43,7 +43,7 @@ public interface Token {
public static final int MIN_USER_TOKEN_TYPE = 1;
- public static final int EOF = CharStream.EOF;
+ public static final int EOF = IntStream.EOF;
/** All tokens go to the parser (unless skip() is called in that rule)
* on a particular "channel". The parser tunes to a particular channel
diff --git a/runtime/Java/src/org/antlr/v4/runtime/TokenStream.java b/runtime/Java/src/org/antlr/v4/runtime/TokenStream.java
index e643535bd..64e5bee71 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/TokenStream.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/TokenStream.java
@@ -1,80 +1,172 @@
/*
- [The "BSD license"]
- Copyright (c) 2011 Terence Parr
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- 1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- 2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- 3. The name of the author may not be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * [The "BSD license"]
+ * Copyright (c) 2012 Terence Parr
+ * Copyright (c) 2012 Sam Harwell
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.runtime;
import org.antlr.v4.runtime.misc.Interval;
+import org.antlr.v4.runtime.misc.NotNull;
-/** A stream of tokens accessing tokens from a TokenSource */
+/**
+ * An {@link IntStream} whose symbols are {@link Token} instances.
+ */
public interface TokenStream extends IntStream {
- /** Get Token at current input pointer + i ahead where i=1 is next Token.
- * i<0 indicates tokens in the past. So -1 is previous token and -2 is
- * two tokens ago. LT(0) is undefined. For i>=n, return eof token.
- * Return null for LT(0) and any index that results in an absolute address
- * that is negative.
- * TODO (Sam): Throw exception for invalid k?
+ /**
+ * Get the {@link Token} instance associated with the value returned by
+ * {@link #LA LA(k)}. This method has the same pre- and post-conditions as
+ * {@link IntStream#LA}. In addition, when the preconditions of this method
+ * are met, the return value is non-null and the value of
+ * {@code LT(k).getType()==LA(k)}.
+ *
+ * @see IntStream#LA
*/
- public Token LT(int k);
+ @NotNull
+ public Token LT(int k);
- /** Get a token at an absolute index i; 0..n-1. This is really only
- * needed for profiling and debugging and token stream rewriting.
- * If you don't want to buffer up tokens, then this method makes no
- * sense for you. Naturally you can't use the rewrite stream feature.
- * I believe DebugTokenStream can easily be altered to not use
- * this method, removing the dependency.
+ /**
+ * Gets the {@link Token} at the specified {@code index} in the stream. When
+ * the preconditions of this method are met, the return value is non-null.
+ *
+ * The preconditions for this method are the same as the preconditions of
+ * {@link IntStream#seek}. If the behavior of {@code seek(index)} is
+ * unspecified for the current state and given {@code index}, then the
+ * behavior of this method is also unspecified.
+ *
+ * The symbol referred to by {@code index} differs from {@code seek()} only
+ * in the case of filtering streams where {@code index} lies before the end
+ * of the stream. Unlike {@code seek()}, this method does not adjust
+ * {@code index} to point to a non-ignored symbol.
+ *
+ * @throws IllegalArgumentException if {code index} is less than 0
+ * @throws UnsupportedOperationException if the stream does not support
+ * retrieving the token at the specified index
*/
- public Token get(int i);
+ @NotNull
+ public Token get(int index);
- /** Where is this stream pulling tokens from? This is not the name, but
- * the object that provides Token objects.
+ /**
+ * Gets the underlying {@link TokenSource} which provides tokens for this
+ * stream.
*/
+ @NotNull
public TokenSource getTokenSource();
- /** Return the text of all tokens from within the interval.
- * If the stream does not buffer all the tokens then it must
- * throw UnsupportedOperationException;
- * Users should not access $ruleLabel.text in an action of course in
- * that case.
- * @param interval
+ /**
+ * Return the text of all tokens within the specified {@code interval}. This
+ * method behaves like the following code (including potential exceptions
+ * for violating preconditions of {@link #get}, but may be optimized by the
+ * specific implementation.
+ *
+ *
+ * TokenStream stream = ...;
+ * String text = "";
+ * for (int i = interval.a; i <= interval.b; i++) {
+ * text += stream.get(i).getText();
+ * }
+ *
+ *
+ * @param interval The interval of tokens within this stream to get text
+ * for.
+ * @return The text of all tokens within the specified interval in this
+ * stream.
+ *
+ * @throws NullPointerException if {@code interval} is {@code null}
*/
- public String getText(Interval interval);
+ @NotNull
+ public String getText(@NotNull Interval interval);
+ /**
+ * Return the text of all tokens in the stream. This method behaves like the
+ * following code, including potential exceptions from the calls to
+ * {@link IntStream#size} and {@link #getText(Interval)}, but may be
+ * optimized by the specific implementation.
+ *
+ *
+ * TokenStream stream = ...;
+ * String text = stream.getText(new Interval(0, stream.size()));
+ *
+ *
+ * @return The text of all tokens in the stream.
+ */
+ @NotNull
public String getText();
- public String getText(RuleContext ctx);
-
- /** Because the user is not required to use a token with an index stored
- * in it, we must provide a means for two token objects themselves to
- * indicate the start/end location. Most often this will just delegate
- * to the other getText(Interval).
- * If the stream does not buffer all the tokens then it must
- * throw UnsupportedOperationException;
+ /**
+ * Return the text of all tokens in the source interval of the specified
+ * context. This method behaves like the following code, including potential
+ * exceptions from the call to {@link #getText(Interval)}, but may be
+ * optimized by the specific implementation.
+ *
+ * If {@code ctx.getSourceInterval()} does not return a valid interval of
+ * tokens provided by this stream, the behavior is unspecified.
+ *
+ *
+ * TokenStream stream = ...;
+ * String text = stream.getText(ctx.getSourceInterval());
+ *
+ *
+ * @param ctx The context providing the source interval of tokens to get
+ * text for.
+ * @return The text of all tokens within the source interval of {@code ctx}.
*/
- public String getText(Token start, Token stop);
+ @NotNull
+ public String getText(@NotNull RuleContext ctx);
+
+ /**
+ * Return the text of all tokens in this stream between {@code start} and
+ * {@code stop} (inclusive).
+ *
+ * If the specified {@code start} or {@code stop} token was not provided by
+ * this stream, or if the {@code stop} occurred before the {@code start}
+ * token, the behavior is unspecified.
+ *
+ * For streams which ensure that the {@link Token#getTokenIndex} method is
+ * accurate for all of its provided tokens, this method behaves like the
+ * following code. Other streams may implement this method in other ways
+ * provided the behavior is consistent with this at a high level.
+ *
+ *
+ * TokenStream stream = ...;
+ * String text = "";
+ * for (int i = start.getTokenIndex(); i <= stop.getTokenIndex(); i++) {
+ * text += stream.get(i).getText();
+ * }
+ *
+ *
+ * @param start The first token in the interval to get text for.
+ * @param stop The last token in the interval to get text for (inclusive).
+ * @return The text of all tokens lying between the specified {@code start}
+ * and {@code stop} tokens.
+ *
+ * @throws UnsupportedOperationException if this stream does not support
+ * this method for the specified tokens
+ */
+ @NotNull
+ public String getText(@NotNull Token start, @NotNull Token stop);
}
diff --git a/runtime/Java/src/org/antlr/v4/runtime/UnbufferedCharStream.java b/runtime/Java/src/org/antlr/v4/runtime/UnbufferedCharStream.java
index 12783c3c5..5f6193b1a 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/UnbufferedCharStream.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/UnbufferedCharStream.java
@@ -42,38 +42,58 @@ import java.io.Reader;
* that it doesn't buffer all data, not that's it's on demand loading of char.
*/
public class UnbufferedCharStream implements CharStream {
- /** A moving window buffer of the data being scanned. While there's a
- * marker, we keep adding to buffer. Otherwise, consume() resets
- * so we start filling at index 0 again.
+ /**
+ * A moving window buffer of the data being scanned. While there's a marker,
+ * we keep adding to buffer. Otherwise, {@link #consume consume()} resets so
+ * we start filling at index 0 again.
*/
protected char[] data;
- /** How many characters are actually in the buffer; this is not
- the buffer size, that's data.length.
- */
+ /**
+ * The number of characters currently in {@link #data data}.
+ *
+ * This is not the buffer capacity, that's {@code data.length}.
+ */
protected int n;
- /** 0..n-1 index into data of next char; data[p] is LA(1).
- * If p == n, we are out of buffered char.
+ /**
+ * 0..n-1 index into {@link #data data} of next character.
+ *
+ * The {@code LA(1)} character is {@code data[p]}. If {@code p == n}, we are
+ * out of buffered characters.
*/
protected int p=0;
- /** Count up with mark() and down with release(). When we release()
- * and hit zero, reset buffer to beginning. Copy data[p]..data[n-1]
- * to data[0]..data[(n-1)-p].
+ /**
+ * Count up with {@link #mark mark()} and down with
+ * {@link #release release()}. When we {@code release()} the last mark,
+ * {@code numMarkers} reaches 0 and we reset the buffer. Copy
+ * {@code data[p]..data[n-1]} to {@code data[0]..data[(n-1)-p]}.
*/
protected int numMarkers = 0;
+ /**
+ * This is the {@code LA(-1)} character for the current position.
+ */
protected int lastChar = -1;
- /** Absolute char index. It's the index of the char about to be
- * read via LA(1). Goes from 0 to numchar-1 in entire stream.
+ /**
+ * When {@code numMarkers > 0}, this is the {@code LA(-1)} character for the
+ * first character in {@link #data data}. Otherwise, this is unspecified.
+ */
+ protected int lastCharBufferStart;
+
+ /**
+ * Absolute character index. It's the index of the character about to be
+ * read via {@code LA(1)}. Goes from 0 to the number of characters in the
+ * entire stream, although the stream size is unknown before the end is
+ * reached.
*/
protected int currentCharIndex = 0;
protected Reader input;
- /** What is name or source of this char stream? */
+ /** The name or source of this char stream. */
public String name;
/** Useful for subclasses that pull char from other than this.input. */
@@ -109,39 +129,64 @@ public class UnbufferedCharStream implements CharStream {
@Override
public void consume() {
+ if (LA(1) == CharStream.EOF) {
+ throw new IllegalStateException("cannot consume EOF");
+ }
+
// buf always has at least data[p==0] in this method due to ctor
- if ( p==0 ) lastChar = -1; // we're at first char; no LA(-1)
- else lastChar = data[p]; // track last char for LA(-1)
+ lastChar = data[p]; // track last char for LA(-1)
+
+ if (p == n-1 && numMarkers==0) {
+ n = 0;
+ p = -1; // p++ will leave this at 0
+ lastCharBufferStart = lastChar;
+ }
+
p++;
currentCharIndex++;
-// System.out.println("consume p="+p+", numMarkers="+numMarkers+
-// ", currentCharIndex="+currentCharIndex+", n="+n);
sync(1);
}
- /** Make sure we have 'need' elements from current position p. Last valid
- * p index is data.size()-1. p+need-1 is the data index 'need' elements
- * ahead. If we need 1 element, (p+1-1)==p must be < data.size().
+ /**
+ * Make sure we have 'need' elements from current position {@link #p p}.
+ * Last valid {@code p} index is {@code data.length-1}. {@code p+need-1} is
+ * the char index 'need' elements ahead. If we need 1 element,
+ * {@code (p+1-1)==p} must be less than {@code data.length}.
*/
protected void sync(int want) {
int need = (p+want-1) - n + 1; // how many more elements we need?
- if ( need > 0 ) fill(need); // out of elements?
- }
-
- /** add n elements to buffer */
- public void fill(int n) {
- for (int i=1; i<=n; i++) {
- try {
- int c = nextChar();
- add(c);
- }
- catch (IOException ioe) {
- throw new RuntimeException(ioe);
- }
+ if ( need > 0 ) {
+ fill(need);
}
}
- /** Override to provide different source of characters than this.input */
+ /**
+ * Add {@code n} characters to the buffer. Returns the number of characters
+ * actually added to the buffer. If the return value is less than {@code n},
+ * then EOF was reached before {@code n} characters could be added.
+ */
+ protected int fill(int n) {
+ for (int i=0; i 0 && data[this.n - 1] == CharStream.EOF) {
+ return i;
+ }
+
+ try {
+ int c = nextChar();
+ add(c);
+ }
+ catch (IOException ioe) {
+ throw new RuntimeException(ioe);
+ }
+ }
+
+ return n;
+ }
+
+ /**
+ * Override to provide different source of characters than
+ * {@link #input input}.
+ */
protected int nextChar() throws IOException {
return input.read();
}
@@ -161,22 +206,28 @@ public class UnbufferedCharStream implements CharStream {
sync(i);
int index = p + i - 1;
if ( index < 0 ) throw new IndexOutOfBoundsException();
- if ( index > n ) return CharStream.EOF;
+ if ( index > n ) return IntStream.EOF;
int c = data[index];
- if ( c==(char)CharStream.EOF ) return CharStream.EOF;
+ if ( c==(char)IntStream.EOF ) return IntStream.EOF;
return c;
}
- /** Return a marker that we can release later. Marker happens to be
- * index into buffer (not index()).
- */
+ /**
+ * Return a marker that we can release later.
+ *
+ * The specific marker value used for this class allows for some level of
+ * protection against misuse where {@code seek()} is called on a mark or
+ * {@code release()} is called in the wrong order.
+ */
@Override
public int mark() {
- int m = p;
+ if (numMarkers == 0) {
+ lastCharBufferStart = lastChar;
+ }
+
+ int mark = -numMarkers - 1;
numMarkers++;
-// StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
-// System.out.println(stackTrace[2].getMethodName()+": mark " + m);
- return m;
+ return mark;
}
/** Decrement number of markers, resetting buffer if we hit 0.
@@ -184,19 +235,19 @@ public class UnbufferedCharStream implements CharStream {
*/
@Override
public void release(int marker) {
- if ( numMarkers==0 ) {
- throw new IllegalStateException("release() called w/o prior matching mark()");
+ int expectedMark = -numMarkers;
+ if ( marker!=expectedMark ) {
+ throw new IllegalStateException("release() called with an invalid marker.");
}
-// StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
-// System.out.println(stackTrace[2].getMethodName()+": release " + marker);
+
numMarkers--;
- if ( numMarkers==0 ) { // can we release buffer?
-// System.out.println("release: shift "+p+".."+(n-1)+" to 0: '"+ new String(data,p,n)+"'");
+ if ( numMarkers==0 && p > 0 ) { // release buffer when we can, but don't do unnecessary work
// Copy data[p]..data[n-1] to data[0]..data[(n-1)-p], reset ptrs
// p is last valid char; move nothing if p==n as we have no valid char
System.arraycopy(data, p, data, 0, n - p); // shift n-p char from p to 0
n = n - p;
p = 0;
+ lastCharBufferStart = lastChar;
}
}
@@ -206,19 +257,37 @@ public class UnbufferedCharStream implements CharStream {
}
/** Seek to absolute character index, which might not be in the current
- * sliding window. Move p to index-bufferStartIndex.
+ * sliding window. Move {@code p} to {@code index-bufferStartIndex}.
*/
@Override
public void seek(int index) {
-// System.out.println("seek "+index);
+ if (index == currentCharIndex) {
+ return;
+ }
+
+ if (index > currentCharIndex) {
+ sync(index - currentCharIndex);
+ index = Math.min(index, getBufferStartIndex() + n - 1);
+ }
+
// index == to bufferStartIndex should set p to 0
int i = index - getBufferStartIndex();
- if ( i < 0 || i >= n ) {
+ if ( i < 0 ) {
+ throw new IllegalArgumentException("cannot seek to negative index " + index);
+ }
+ else if (i >= n) {
throw new UnsupportedOperationException("seek to index outside buffer: "+
index+" not in "+getBufferStartIndex()+".."+(getBufferStartIndex()+n));
}
- p = i;
+
+ p = i;
currentCharIndex = index;
+ if (p == 0) {
+ lastChar = lastCharBufferStart;
+ }
+ else {
+ lastChar = data[p-1];
+ }
}
@Override
@@ -233,9 +302,19 @@ public class UnbufferedCharStream implements CharStream {
@Override
public String getText(Interval interval) {
+ if (interval.a < 0 || interval.b < interval.a - 1) {
+ throw new IllegalArgumentException("invalid interval");
+ }
+
int bufferStartIndex = getBufferStartIndex();
+ if (n > 0 && data[n - 1] == Character.MAX_VALUE) {
+ if (interval.a + interval.length() > bufferStartIndex + n) {
+ throw new IllegalArgumentException("the interval extends past the end of the stream");
+ }
+ }
+
if (interval.a < bufferStartIndex || interval.b >= bufferStartIndex + n) {
- throw new IndexOutOfBoundsException("interval "+interval+" outside buffer: "+
+ throw new UnsupportedOperationException("interval "+interval+" outside buffer: "+
bufferStartIndex+".."+(bufferStartIndex+n));
}
// convert from absolute to local index
@@ -243,23 +322,7 @@ public class UnbufferedCharStream implements CharStream {
return new String(data, i, interval.length());
}
- /** For testing. What's in moving window into data stream from
- * current index, LA(1) or data[p], to end of buffer?
- */
- public String getRemainingBuffer() {
- if ( n==0 ) return null;
- return new String(data,p,n-p);
- }
-
- /** For testing. What's in moving window buffer into data stream.
- * From 0..p-1 have been consume.
- */
- public String getBuffer() {
- if ( n==0 ) return null;
- return new String(data,0,n);
- }
-
- public int getBufferStartIndex() {
+ protected final int getBufferStartIndex() {
return currentCharIndex - p;
}
}
diff --git a/runtime/Java/src/org/antlr/v4/runtime/UnbufferedTokenStream.java b/runtime/Java/src/org/antlr/v4/runtime/UnbufferedTokenStream.java
index de1381ee7..69989b51d 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/UnbufferedTokenStream.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/UnbufferedTokenStream.java
@@ -1,45 +1,91 @@
+/*
+ * [The "BSD license"]
+ * Copyright (c) 2012 Terence Parr
+ * Copyright (c) 2012 Sam Harwell
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
package org.antlr.v4.runtime;
import org.antlr.v4.runtime.misc.Interval;
-
-import java.util.Arrays;
-import java.util.List;
+import org.antlr.v4.runtime.misc.NotNull;
public class UnbufferedTokenStream implements TokenStream {
protected TokenSource tokenSource;
- /** A moving window buffer of the data being scanned. While there's a
- * marker, we keep adding to buffer. Otherwise, consume() resets
- * so we start filling at index 0 again.
+ /**
+ * A moving window buffer of the data being scanned. While there's a marker,
+ * we keep adding to buffer. Otherwise, {@link #consume consume()} resets so
+ * we start filling at index 0 again.
*/
protected Token[] tokens;
- /** How many tokens are actually in the buffer; this is not
- * the buffer size, that's tokens.length.
+ /**
+ * The number of tokens currently in {@link #tokens tokens}.
+ *
+ * This is not the buffer capacity, that's {@code tokens.length}.
*/
protected int n;
- /** 0..n-1 index into tokens of next token; tokens[p] is LT(1).
- * If p == n, we are out of buffered tokens.
+ /**
+ * 0..n-1 index into {@link #tokens tokens} of next token.
+ *
+ * The {@code LT(1)} token is {@code tokens[p]}. If {@code p == n}, we are
+ * out of buffered tokens.
*/
protected int p=0;
- /** Count up with mark() and down with release(). When we release()
- * and hit zero, reset buffer to beginning. Copy data[p]..data[n-1]
- * to data[0]..data[(n-1)-p].
+ /**
+ * Count up with {@link #mark mark()} and down with
+ * {@link #release release()}. When we {@code release()} the last mark,
+ * {@code numMarkers} reaches 0 and we reset the buffer. Copy
+ * {@code tokens[p]..tokens[n-1]} to {@code tokens[0]..tokens[(n-1)-p]}.
*/
protected int numMarkers = 0;
+ /**
+ * This is the {@code LT(-1)} token for the current position.
+ */
protected Token lastToken;
- /** Absolute token index. It's the index of the token about to be
- * read via LA(1). Goes from 0 to numtokens-1 in entire stream.
+ /**
+ * When {@code numMarkers > 0}, this is the {@code LT(-1)} token for the
+ * first token in {@link #tokens}. Otherwise, this is {@code null}.
*/
- protected int currentTokenIndex = 0; // simple counter to set token index in tokens
+ protected Token lastTokenBufferStart;
- /** Skip tokens on any channel but this one; this is how we skip whitespace... */
- // TODO: skip off-channel tokens!!!
- protected int channel = Token.DEFAULT_CHANNEL;
+ /**
+ * Absolute token index. It's the index of the token about to be read via
+ * {@code LT(1)}. Goes from 0 to the number of tokens in the entire stream,
+ * although the stream size is unknown before the end is reached.
+ *
+ * This value is used to set the token indexes if the stream provides tokens
+ * that implement {@link WritableToken}.
+ */
+ protected int currentTokenIndex = 0;
public UnbufferedTokenStream(TokenSource tokenSource) {
this(tokenSource, 256);
@@ -64,29 +110,32 @@ public class UnbufferedTokenStream implements TokenStream {
@Override
public Token LT(int i) {
- if ( i==-1 ) return lastToken; // special case
- sync(i);
- int index = p + i - 1;
- if ( index < 0 ) throw new IndexOutOfBoundsException("LT("+i+") gives negative index");
- if ( index > n ) {
- TokenFactory> factory = tokenSource.getTokenFactory();
- int cpos = tokenSource.getCharPositionInLine();
- // The character position for EOF is one beyond the position of
- // the previous token's last character
- Token eof = factory.create(tokenSource, Token.EOF, null, Token.DEFAULT_CHANNEL,
- index(), index()-1,
- tokenSource.getLine(), cpos);
- return eof;
+ if ( i==-1 ) {
+ return lastToken;
}
- return tokens[index];
+
+ sync(i);
+ int index = p + i - 1;
+ if ( index < 0 ) {
+ throw new IndexOutOfBoundsException("LT("+i+") gives negative index");
+ }
+
+ if ( index >= n ) {
+ assert n > 0 && tokens[n-1].getType() == Token.EOF;
+ return tokens[n-1];
+ }
+
+ return tokens[index];
}
@Override
- public int LA(int i) { return LT(i).getType(); }
+ public int LA(int i) {
+ return LT(i).getType();
+ }
@Override
public TokenSource getTokenSource() {
- return null;
+ return tokenSource;
}
@Override
@@ -106,80 +155,104 @@ public class UnbufferedTokenStream implements TokenStream {
@Override
public void consume() {
- // buf always has at least data[p==0] in this method due to ctor
- if ( p==0 ) lastToken = null; // we're at first token; no LA(-1)
- else lastToken = tokens[p]; // track last char for LT(-1)
+ if (LA(1) == Token.EOF) {
+ throw new IllegalStateException("cannot consume EOF");
+ }
+
+ // buf always has at least tokens[p==0] in this method due to ctor
+ lastToken = tokens[p]; // track last token for LT(-1)
// if we're at last token and no markers, opportunity to flush buffer
- if ( p == n-1 && numMarkers==0 ) { // can we release buffer?
-// System.out.println("consume: reset");
+ if ( p == n-1 && numMarkers==0 ) {
n = 0;
p = -1; // p++ will leave this at 0
+ lastTokenBufferStart = lastToken;
}
p++;
currentTokenIndex++;
-// System.out.println("consume p="+p+", numMarkers="+numMarkers+
-// ", currentCharIndex="+currentCharIndex+", n="+n);
sync(1);
}
- /** Make sure we have 'need' elements from current position p. Last valid
- * p index is tokens.size()-1. p+need-1 is the tokens index 'need' elements
- * ahead. If we need 1 element, (p+1-1)==p must be < tokens.size().
+ /** Make sure we have 'need' elements from current position {@link #p p}. Last valid
+ * {@code p} index is {@code tokens.length-1}. {@code p+need-1} is the tokens index 'need' elements
+ * ahead. If we need 1 element, {@code (p+1-1)==p} must be less than {@code tokens.length}.
*/
protected void sync(int want) {
int need = (p+want-1) - n + 1; // how many more elements we need?
- if ( need > 0 ) fill(need); // out of elements?
- }
-
- /** add n elements to buffer */
- public void fill(int n) {
- for (int i=1; i<=n; i++) {
- Token t = tokenSource.nextToken();
- if ( t instanceof WritableToken ) {
- ((WritableToken)t).setTokenIndex(currentTokenIndex);
- }
- add(t);
+ if ( need > 0 ) {
+ fill(need);
}
}
- protected void add(Token t) {
+ /**
+ * Add {@code n} elements to the buffer. Returns the number of tokens
+ * actually added to the buffer. If the return value is less than {@code n},
+ * then EOF was reached before {@code n} tokens could be added.
+ */
+ protected int fill(int n) {
+ for (int i=0; i 0 && tokens[this.n-1].getType() == Token.EOF) {
+ return i;
+ }
+
+ Token t = tokenSource.nextToken();
+ add(t);
+ }
+
+ return n;
+ }
+
+ protected void add(@NotNull Token t) {
if ( n>=tokens.length ) {
Token[] newtokens = new Token[tokens.length*2]; // resize
System.arraycopy(tokens, 0, newtokens, 0, tokens.length);
tokens = newtokens;
}
+
+ if (t instanceof WritableToken) {
+ ((WritableToken)t).setTokenIndex(getBufferStartIndex() + n);
+ }
+
tokens[n++] = t;
}
-
- /** Return a marker that we can release later. Marker happens to be
- * index into buffer (not index()).
+ /**
+ * Return a marker that we can release later.
+ *
+ * The specific marker value used for this class allows for some level of
+ * protection against misuse where {@code seek()} is called on a mark or
+ * {@code release()} is called in the wrong order.
*/
@Override
public int mark() {
- int m = p;
+ if (numMarkers == 0) {
+ lastTokenBufferStart = lastToken;
+ }
+
+ int mark = -numMarkers - 1;
numMarkers++;
- return m;
+ return mark;
}
@Override
public void release(int marker) {
- if ( numMarkers==0 ) {
- throw new IllegalStateException("release() called w/o prior matching mark()");
+ int expectedMark = -numMarkers;
+ if ( marker!=expectedMark ) {
+ throw new IllegalStateException("release() called with an invalid marker.");
}
-// StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
-// System.out.println(stackTrace[2].getMethodName()+": release " + marker);
+
numMarkers--;
if ( numMarkers==0 ) { // can we release buffer?
- System.out.println("release: shift "+p+".."+(n-1)+" to 0: '"+
- Arrays.toString(Arrays.copyOfRange(tokens,p,n))+"'");
- // Copy data[p]..data[n-1] to data[0]..data[(n-1)-p], reset ptrs
- // p is last valid token; move nothing if p==n as we have no valid char
- System.arraycopy(tokens, p, tokens, 0, n - p); // shift n-p char from p to 0
- n = n - p;
- p = 0;
+ if (p > 0) {
+ // Copy tokens[p]..tokens[n-1] to tokens[0]..tokens[(n-1)-p], reset ptrs
+ // p is last valid token; move nothing if p==n as we have no valid char
+ System.arraycopy(tokens, p, tokens, 0, n - p); // shift n-p tokens from p to 0
+ n = n - p;
+ p = 0;
+ }
+
+ lastTokenBufferStart = lastToken;
}
}
@@ -190,13 +263,33 @@ public class UnbufferedTokenStream implements TokenStream {
@Override
public void seek(int index) { // seek to absolute index
+ if (index == currentTokenIndex) {
+ return;
+ }
+
+ if (index > currentTokenIndex) {
+ sync(index - currentTokenIndex);
+ index = Math.min(index, getBufferStartIndex() + n - 1);
+ }
+
int bufferStartIndex = getBufferStartIndex();
int i = index - bufferStartIndex;
- if ( i < 0 || i >= n ) {
+ if ( i < 0 ) {
+ throw new IllegalArgumentException("cannot seek to negative index " + index);
+ }
+ else if (i >= n) {
throw new UnsupportedOperationException("seek to index outside buffer: "+
index+" not in "+ bufferStartIndex +".."+(bufferStartIndex +n));
}
+
p = i;
+ currentTokenIndex = index;
+ if (p == 0) {
+ lastToken = lastTokenBufferStart;
+ }
+ else {
+ lastToken = tokens[p-1];
+ }
}
@Override
@@ -233,23 +326,7 @@ public class UnbufferedTokenStream implements TokenStream {
return buf.toString();
}
- /** For testing. What's in moving window into token stream from
- * current index, LT(1) or tokens[p], to end of buffer?
- */
- public List getRemainingBuffer() {
- if ( n==0 ) return null;
- return (List)Arrays.asList(Arrays.copyOfRange(tokens, p, n));
- }
-
- /** For testing. What's in moving window buffer into data stream.
- * From 0..p-1 have been consume.
- */
- public List getBuffer() {
- if ( n==0 ) return null;
- return (List)Arrays.asList(Arrays.copyOfRange(tokens, 0, n));
- }
-
- public int getBufferStartIndex() {
+ protected final int getBufferStartIndex() {
return currentTokenIndex - p;
}
}
diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java
index b441674ad..e49d61aad 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java
@@ -229,7 +229,7 @@ public class LexerATNSimulator extends ATNSimulator {
}
// if no edge, pop over to ATN interpreter, update DFA and return
- if ( s.edges == null || t >= s.edges.length || t <= CharStream.EOF ||
+ if ( s.edges == null || t >= s.edges.length || t <= IntStream.EOF ||
s.edges[t] == null )
{
ATN_failover++;
@@ -248,7 +248,7 @@ public class LexerATNSimulator extends ATNSimulator {
captureSimState(prevAccept, input, s);
// keep going unless we're at EOF; check if something else could match
// EOF never in DFA
- if ( t==CharStream.EOF ) break;
+ if ( t==IntStream.EOF ) break;
}
consume(input);
@@ -297,7 +297,7 @@ public class LexerATNSimulator extends ATNSimulator {
DFAState target = null;
ATNConfigSet reach = null;
if (s != null) {
- if ( s.edges != null && t < s.edges.length && t > CharStream.EOF ) {
+ if ( s.edges != null && t < s.edges.length && t > IntStream.EOF ) {
closure = s.configs;
target = s.edges[t];
if (target == ERROR) {
@@ -374,7 +374,7 @@ public class LexerATNSimulator extends ATNSimulator {
}
else {
// if no accept and EOF is first char, return EOF
- if ( t==CharStream.EOF && input.index()==startIndex ) {
+ if ( t==IntStream.EOF && input.index()==startIndex ) {
return Token.EOF;
}
@@ -503,7 +503,7 @@ public class LexerATNSimulator extends ATNSimulator {
case Transition.NOT_SET:
NotSetTransition nst = (NotSetTransition)trans;
- if (!nst.set.contains(t) && t!=CharStream.EOF) // ~set doesn't not match EOF
+ if (!nst.set.contains(t) && t!=IntStream.EOF) // ~set doesn't not match EOF
{
if ( debug ) {
System.out.format("match ~set %s\n", nst.set.toString(true));
@@ -515,7 +515,7 @@ public class LexerATNSimulator extends ATNSimulator {
return null;
case Transition.WILDCARD:
- if (t != CharStream.EOF) {
+ if (t != IntStream.EOF) {
return trans.target;
}
diff --git a/tool/src/org/antlr/v4/automata/LexerATNFactory.java b/tool/src/org/antlr/v4/automata/LexerATNFactory.java
index 484013dcd..68407e3e8 100644
--- a/tool/src/org/antlr/v4/automata/LexerATNFactory.java
+++ b/tool/src/org/antlr/v4/automata/LexerATNFactory.java
@@ -33,7 +33,7 @@ import org.antlr.runtime.CommonToken;
import org.antlr.v4.codegen.CodeGenerator;
import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.parse.ANTLRParser;
-import org.antlr.v4.runtime.CharStream;
+import org.antlr.v4.runtime.IntStream;
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNState;
import org.antlr.v4.runtime.atn.ActionTransition;
@@ -271,7 +271,7 @@ public class LexerATNFactory extends ParserATNFactory {
if ( node.getText().equals("EOF") ) {
ATNState left = newState(node);
ATNState right = newState(node);
- left.addTransition(new AtomTransition(right, CharStream.EOF));
+ left.addTransition(new AtomTransition(right, IntStream.EOF));
return new Handle(left, right);
}
return _ruleRef(node);
diff --git a/tool/test/org/antlr/v4/test/BaseTest.java b/tool/test/org/antlr/v4/test/BaseTest.java
index a7e541f83..a4bc26e8e 100644
--- a/tool/test/org/antlr/v4/test/BaseTest.java
+++ b/tool/test/org/antlr/v4/test/BaseTest.java
@@ -39,6 +39,7 @@ import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.CommonToken;
import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.IntStream;
import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.RuleContext;
import org.antlr.v4.runtime.Token;
@@ -232,7 +233,7 @@ public abstract class BaseTest {
tokenTypes.add(lg.typeToTokenList.get(ttype));
}
- if ( t==CharStream.EOF ) {
+ if ( t==IntStream.EOF ) {
hitEOF = true;
}
} while ( ttype!=Token.EOF );
diff --git a/tool/test/org/antlr/v4/test/TestUnbufferedCharStream.java b/tool/test/org/antlr/v4/test/TestUnbufferedCharStream.java
index d84ecd005..ed0943c8c 100644
--- a/tool/test/org/antlr/v4/test/TestUnbufferedCharStream.java
+++ b/tool/test/org/antlr/v4/test/TestUnbufferedCharStream.java
@@ -1,30 +1,31 @@
/*
- [The "BSD license"]
- Copyright (c) 2011 Terence Parr
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- 1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- 2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- 3. The name of the author may not be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * [The "BSD license"]
+ * Copyright (c) 2012 Terence Parr
+ * Copyright (c) 2012 Sam Harwell
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.test;
@@ -32,78 +33,201 @@ package org.antlr.v4.test;
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.CommonTokenFactory;
import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.IntStream;
import org.antlr.v4.runtime.UnbufferedCharStream;
+import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.tool.LexerGrammar;
import org.antlr.v4.tool.interp.LexerInterpreter;
import org.junit.Test;
+import java.io.Reader;
import java.io.StringReader;
public class TestUnbufferedCharStream extends BaseTest {
@Test public void testNoChar() throws Exception {
- CharStream input = new UnbufferedCharStream(
- new StringReader("")
- );
- assertEquals(CharStream.EOF, input.LA(1));
+ CharStream input = createStream("");
+ assertEquals(IntStream.EOF, input.LA(1));
+ assertEquals(IntStream.EOF, input.LA(2));
+ }
+
+ /**
+ * The {@link IntStream} interface does not specify the behavior when the
+ * EOF symbol is consumed, but {@link UnbufferedCharStream} handles this
+ * particular case by throwing an {@link IllegalStateException}.
+ */
+ @Test(expected = IllegalStateException.class)
+ public void testConsumeEOF() throws Exception {
+ CharStream input = createStream("");
+ assertEquals(IntStream.EOF, input.LA(1));
input.consume();
- assertEquals(CharStream.EOF, input.LA(1));
input.consume();
- assertEquals(CharStream.EOF, input.LA(1));
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testNegativeSeek() {
+ CharStream input = createStream("");
+ input.seek(-1);
+ }
+
+ @Test
+ public void testSeekPastEOF() {
+ CharStream input = createStream("");
+ assertEquals(0, input.index());
+ input.seek(1);
+ assertEquals(0, input.index());
+ }
+
+ /**
+ * The {@link IntStream} interface does not specify the behavior when marks
+ * are not released in the reversed order they were created, but
+ * {@link UnbufferedCharStream} handles this case by throwing an
+ * {@link IllegalStateException}.
+ */
+ @Test(expected = IllegalStateException.class)
+ public void testMarkReleaseOutOfOrder() {
+ CharStream input = createStream("");
+ int m1 = input.mark();
+ int m2 = input.mark();
+ input.release(m1);
+ }
+
+ /**
+ * The {@link IntStream} interface does not specify the behavior when a mark
+ * is released twice, but {@link UnbufferedCharStream} handles this case by
+ * throwing an {@link IllegalStateException}.
+ */
+ @Test(expected = IllegalStateException.class)
+ public void testMarkReleasedTwice() {
+ CharStream input = createStream("");
+ int m1 = input.mark();
+ input.release(m1);
+ input.release(m1);
+ }
+
+ /**
+ * The {@link IntStream} interface does not specify the behavior when a mark
+ * is released twice, but {@link UnbufferedCharStream} handles this case by
+ * throwing an {@link IllegalStateException}.
+ */
+ @Test(expected = IllegalStateException.class)
+ public void testNestedMarkReleasedTwice() {
+ CharStream input = createStream("");
+ int m1 = input.mark();
+ int m2 = input.mark();
+ input.release(m2);
+ input.release(m2);
+ }
+
+ /**
+ * It is not valid to pass a mark to {@link IntStream#seek}, but
+ * {@link UnbufferedCharStream} creates marks in such a way that this
+ * invalid usage results in an {@link IllegalArgumentException}.
+ */
+ @Test(expected = IllegalArgumentException.class)
+ public void testMarkPassedToSeek() {
+ CharStream input = createStream("");
+ int m1 = input.mark();
+ input.seek(m1);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testSeekBeforeBufferStart() {
+ CharStream input = createStream("xyz");
+ input.consume();
+ int m1 = input.mark();
+ assertEquals(1, input.index());
+ input.consume();
+ input.seek(0);
+ }
+
+ @Test(expected = UnsupportedOperationException.class)
+ public void testGetTextBeforeBufferStart() {
+ CharStream input = createStream("xyz");
+ input.consume();
+ int m1 = input.mark();
+ assertEquals(1, input.index());
+ input.getText(new Interval(0, 1));
+ }
+
+ @Test
+ public void testGetTextInMarkedRange() {
+ CharStream input = createStream("xyz");
+ input.consume();
+ int m1 = input.mark();
+ assertEquals(1, input.index());
+ input.consume();
+ input.consume();
+ assertEquals("yz", input.getText(new Interval(1, 2)));
+ }
+
+ @Test
+ public void testLastChar() {
+ CharStream input = createStream("abcdef");
+
+ input.consume();
+ assertEquals('a', input.LA(-1));
+
+ int m1 = input.mark();
+ input.consume();
+ input.consume();
+ input.consume();
+ assertEquals('d', input.LA(-1));
+
+ input.seek(2);
+ assertEquals('b', input.LA(-1));
+
+ input.release(m1);
+ input.seek(3);
+ assertEquals('c', input.LA(-1));
+ // this special case is not required by the IntStream interface, but
+ // UnbufferedCharStream allows it so we have to make sure the resulting
+ // state is consistent
+ input.seek(2);
+ assertEquals('b', input.LA(-1));
}
@Test public void test1Char() throws Exception {
- UnbufferedCharStream input = new UnbufferedCharStream(
- new StringReader("x")
- );
+ TestingUnbufferedCharStream input = createStream("x");
assertEquals('x', input.LA(1));
input.consume();
- assertEquals(CharStream.EOF, input.LA(1));
+ assertEquals(IntStream.EOF, input.LA(1));
String r = input.getRemainingBuffer();
assertEquals("\uFFFF", r); // shouldn't include x
- assertEquals("x\uFFFF", input.getBuffer()); // whole buffer
+ assertEquals("\uFFFF", input.getBuffer()); // whole buffer
}
@Test public void test2Char() throws Exception {
- UnbufferedCharStream input = new UnbufferedCharStream(
- new StringReader("xy")
- );
+ TestingUnbufferedCharStream input = createStream("xy");
assertEquals('x', input.LA(1));
input.consume();
assertEquals('y', input.LA(1));
assertEquals("y", input.getRemainingBuffer()); // shouldn't include x
- assertEquals("xy", input.getBuffer());
+ assertEquals("y", input.getBuffer());
input.consume();
- assertEquals(CharStream.EOF, input.LA(1));
+ assertEquals(IntStream.EOF, input.LA(1));
+ assertEquals("\uFFFF", input.getBuffer());
}
@Test public void test2CharAhead() throws Exception {
- CharStream input = new UnbufferedCharStream(
- new StringReader("xy")
- );
+ CharStream input = createStream("xy");
assertEquals('x', input.LA(1));
assertEquals('y', input.LA(2));
- assertEquals(CharStream.EOF, input.LA(3));
+ assertEquals(IntStream.EOF, input.LA(3));
}
@Test public void testBufferExpand() throws Exception {
- UnbufferedCharStream input = new UnbufferedCharStream(
- new StringReader("01234"),
- 2 // buff size 2
- );
+ TestingUnbufferedCharStream input = createStream("01234", 2);
assertEquals('0', input.LA(1));
assertEquals('1', input.LA(2));
assertEquals('2', input.LA(3));
assertEquals('3', input.LA(4));
assertEquals('4', input.LA(5));
assertEquals("01234", input.getBuffer());
- assertEquals(CharStream.EOF, input.LA(6));
+ assertEquals(IntStream.EOF, input.LA(6));
}
@Test public void testBufferWrapSize1() throws Exception {
- CharStream input = new UnbufferedCharStream(
- new StringReader("01234"),
- 1 // buff size 1
- );
+ CharStream input = createStream("01234", 1);
assertEquals('0', input.LA(1));
input.consume();
assertEquals('1', input.LA(1));
@@ -114,14 +238,11 @@ public class TestUnbufferedCharStream extends BaseTest {
input.consume();
assertEquals('4', input.LA(1));
input.consume();
- assertEquals(CharStream.EOF, input.LA(1));
+ assertEquals(IntStream.EOF, input.LA(1));
}
@Test public void testBufferWrapSize2() throws Exception {
- CharStream input = new UnbufferedCharStream(
- new StringReader("01234"),
- 2 // buff size 2
- );
+ CharStream input = createStream("01234", 2);
assertEquals('0', input.LA(1));
input.consume();
assertEquals('1', input.LA(1));
@@ -132,54 +253,45 @@ public class TestUnbufferedCharStream extends BaseTest {
input.consume();
assertEquals('4', input.LA(1));
input.consume();
- assertEquals(CharStream.EOF, input.LA(1));
+ assertEquals(IntStream.EOF, input.LA(1));
}
@Test public void test1Mark() throws Exception {
- UnbufferedCharStream input = new UnbufferedCharStream(
- new StringReader("xyz")
- );
+ TestingUnbufferedCharStream input = createStream("xyz");
int m = input.mark();
assertEquals('x', input.LA(1));
assertEquals('y', input.LA(2));
assertEquals('z', input.LA(3));
input.release(m);
- assertEquals(CharStream.EOF, input.LA(4));
+ assertEquals(IntStream.EOF, input.LA(4));
assertEquals("xyz\uFFFF", input.getBuffer());
}
@Test public void test1MarkWithConsumesInSequence() throws Exception {
- UnbufferedCharStream input = new UnbufferedCharStream(
- new StringReader("xyz")
- );
+ TestingUnbufferedCharStream input = createStream("xyz");
int m = input.mark();
input.consume(); // x, moves to y
input.consume(); // y
input.consume(); // z, moves to EOF
- assertEquals(CharStream.EOF, input.LA(1));
+ assertEquals(IntStream.EOF, input.LA(1));
assertEquals("xyz\uFFFF", input.getBuffer());
input.release(m); // wipes buffer
assertEquals("\uFFFF", input.getBuffer());
}
@Test public void test2Mark() throws Exception {
- UnbufferedCharStream input = new UnbufferedCharStream(
- new StringReader("xyz"),
- 100
- );
+ TestingUnbufferedCharStream input = createStream("xyz", 100);
assertEquals('x', input.LA(1));
input.consume(); // reset buffer index (p) to 0
int m1 = input.mark();
- assertEquals(1, m1);
assertEquals('y', input.LA(1));
input.consume();
int m2 = input.mark();
- assertEquals(2, m2); // 2nd consume leaves p==2
- assertEquals("xyz", input.getBuffer());
+ assertEquals("yz", input.getBuffer());
input.release(m2); // drop to 1 marker
input.consume();
input.release(m1); // shifts remaining char to beginning
- assertEquals(CharStream.EOF, input.LA(1));
+ assertEquals(IntStream.EOF, input.LA(1));
assertEquals("\uFFFF", input.getBuffer());
}
@@ -195,9 +307,7 @@ public class TestUnbufferedCharStream extends BaseTest {
"WS : ' '+;\n");
// Tokens: 012345678901234567
// Input: x = 3 * 0 + 2 * 0;
- UnbufferedCharStream input = new UnbufferedCharStream(
- new StringReader("x = 302 * 91 + 20234234 * 0;")
- );
+ TestingUnbufferedCharStream input = createStream("x = 302 * 91 + 20234234 * 0;");
LexerInterpreter lexEngine = new LexerInterpreter(g);
// copy text into tokens from char stream
lexEngine.setTokenFactory(new CommonTokenFactory(true));
@@ -217,4 +327,40 @@ public class TestUnbufferedCharStream extends BaseTest {
" [@17,27:27=';',<3>,1:27], [@18,28:27='',<-1>,1:28]]";
assertEquals(expecting, tokens.getTokens().toString());
}
+
+ protected static TestingUnbufferedCharStream createStream(String text) {
+ return new TestingUnbufferedCharStream(new StringReader(text));
+ }
+
+ protected static TestingUnbufferedCharStream createStream(String text, int bufferSize) {
+ return new TestingUnbufferedCharStream(new StringReader(text), bufferSize);
+ }
+
+ protected static class TestingUnbufferedCharStream extends UnbufferedCharStream {
+
+ public TestingUnbufferedCharStream(Reader input) {
+ super(input);
+ }
+
+ public TestingUnbufferedCharStream(Reader input, int bufferSize) {
+ super(input, bufferSize);
+ }
+
+ /** For testing. What's in moving window into data stream from
+ * current index, LA(1) or data[p], to end of buffer?
+ */
+ public String getRemainingBuffer() {
+ if ( n==0 ) return "";
+ return new String(data,p,n-p);
+ }
+
+ /** For testing. What's in moving window buffer into data stream.
+ * From 0..p-1 have been consume.
+ */
+ public String getBuffer() {
+ if ( n==0 ) return "";
+ return new String(data,0,n);
+ }
+
+ }
}
diff --git a/tool/test/org/antlr/v4/test/TestUnbufferedTokenStream.java b/tool/test/org/antlr/v4/test/TestUnbufferedTokenStream.java
index c78c265bd..a384e472a 100644
--- a/tool/test/org/antlr/v4/test/TestUnbufferedTokenStream.java
+++ b/tool/test/org/antlr/v4/test/TestUnbufferedTokenStream.java
@@ -1,8 +1,39 @@
+/*
+ * [The "BSD license"]
+ * Copyright (c) 2012 Terence Parr
+ * Copyright (c) 2012 Sam Harwell
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
package org.antlr.v4.test;
import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.Token;
+import org.antlr.v4.runtime.TokenSource;
import org.antlr.v4.runtime.TokenStream;
import org.antlr.v4.runtime.UnbufferedTokenStream;
import org.antlr.v4.tool.LexerGrammar;
@@ -10,6 +41,9 @@ import org.antlr.v4.tool.interp.LexerInterpreter;
import org.junit.Test;
import java.io.StringReader;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
public class TestUnbufferedTokenStream extends BaseTest {
@Test public void testLookahead() throws Exception {
@@ -56,7 +90,7 @@ public class TestUnbufferedTokenStream extends BaseTest {
);
LexerInterpreter lexEngine = new LexerInterpreter(g);
lexEngine.setInput(input);
- UnbufferedTokenStream tokens = new UnbufferedTokenStream(lexEngine);
+ TestingUnbufferedTokenStream tokens = new TestingUnbufferedTokenStream(lexEngine);
assertEquals("[[@0,0:0='x',<1>,1:0]]", tokens.getBuffer().toString());
assertEquals("x", tokens.LT(1).getText());
@@ -94,7 +128,7 @@ public class TestUnbufferedTokenStream extends BaseTest {
);
LexerInterpreter lexEngine = new LexerInterpreter(g);
lexEngine.setInput(input);
- UnbufferedTokenStream tokens = new UnbufferedTokenStream(lexEngine);
+ TestingUnbufferedTokenStream tokens = new TestingUnbufferedTokenStream(lexEngine);
int m = tokens.mark();
assertEquals("[[@0,0:0='x',<1>,1:0]]", tokens.getBuffer().toString());
@@ -130,7 +164,7 @@ public class TestUnbufferedTokenStream extends BaseTest {
);
LexerInterpreter lexEngine = new LexerInterpreter(g);
lexEngine.setInput(input);
- UnbufferedTokenStream tokens = new UnbufferedTokenStream(lexEngine);
+ TestingUnbufferedTokenStream tokens = new TestingUnbufferedTokenStream(lexEngine);
int m = tokens.mark();
assertEquals("[[@0,0:0='x',<1>,1:0]]", tokens.getBuffer().toString());
@@ -158,4 +192,34 @@ public class TestUnbufferedTokenStream extends BaseTest {
tokens.getBuffer().toString());
tokens.release(m);
}
+
+ protected static class TestingUnbufferedTokenStream extends UnbufferedTokenStream {
+
+ public TestingUnbufferedTokenStream(TokenSource tokenSource) {
+ super(tokenSource);
+ }
+
+ /** For testing. What's in moving window into token stream from
+ * current index, LT(1) or tokens[p], to end of buffer?
+ */
+ protected List extends Token> getRemainingBuffer() {
+ if ( n==0 ) {
+ return Collections.emptyList();
+ }
+
+ return Arrays.asList(tokens).subList(p, n);
+ }
+
+ /** For testing. What's in moving window buffer into data stream.
+ * From 0..p-1 have been consume.
+ */
+ protected List extends Token> getBuffer() {
+ if ( n==0 ) {
+ return Collections.emptyList();
+ }
+
+ return Arrays.asList(tokens).subList(0, n);
+ }
+
+ }
}