Merge branch 'unbuffered-streams' of git://github.com/sharwell/antlr4
This commit is contained in:
commit
0f29f50f2a
|
@ -159,13 +159,13 @@ public class ANTLRInputStream implements CharStream {
|
|||
if ( i<0 ) {
|
||||
i++; // e.g., translate LA(-1) to use offset i=0; then data[p+0-1]
|
||||
if ( (p+i-1) < 0 ) {
|
||||
return CharStream.EOF; // invalid; no char before first char
|
||||
return IntStream.EOF; // invalid; no char before first char
|
||||
}
|
||||
}
|
||||
|
||||
if ( (p+i-1) >= n ) {
|
||||
//System.out.println("char LA("+i+")=EOF; p="+p);
|
||||
return CharStream.EOF;
|
||||
return IntStream.EOF;
|
||||
}
|
||||
//System.out.println("char LA("+i+")="+(char)data[p+i-1]+"; p="+p);
|
||||
//System.out.println("LA("+i+"); p="+p+" n="+n+" data.length="+data.length);
|
||||
|
|
|
@ -1,46 +1,73 @@
|
|||
/*
|
||||
[The "BSD license"]
|
||||
Copyright (c) 2011 Terence Parr
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. The name of the author may not be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* [The "BSD license"]
|
||||
* Copyright (c) 2012 Terence Parr
|
||||
* Copyright (c) 2012 Sam Harwell
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
package org.antlr.v4.runtime;
|
||||
|
||||
import org.antlr.v4.runtime.misc.Interval;
|
||||
import org.antlr.v4.runtime.misc.NotNull;
|
||||
|
||||
/** A source of characters for an ANTLR lexer */
|
||||
/** A source of characters for an ANTLR lexer. */
|
||||
public interface CharStream extends IntStream {
|
||||
public static final int EOF = -1;
|
||||
public static final int MIN_CHAR = Character.MIN_VALUE;
|
||||
public static final int MAX_CHAR = Character.MAX_VALUE-1; // FFFE is max
|
||||
|
||||
/** This is primaril a useful interface for action code.
|
||||
* Just make sure actions don't use this on streams that don't support it.
|
||||
* For unbuffered streams, you can't use this except in case
|
||||
* where interval is in current buffer window. Lexer guarantees
|
||||
* text of current token at emit() time will be available.
|
||||
/**
|
||||
* The minimum allowed value for a character in a {@code CharStream}.
|
||||
*/
|
||||
public String getText(Interval interval);
|
||||
public static final int MIN_CHAR = Character.MIN_VALUE;
|
||||
|
||||
/**
|
||||
* The maximum allowed value for a character in a {@code CharStream}.
|
||||
* <p/>
|
||||
* This value is {@code Character.MAX_VALUE - 1}, which reserves the value
|
||||
* {@code Character.MAX_VALUE} for special use within an implementing class.
|
||||
* For some implementations, the data buffers required for supporting the
|
||||
* marked ranges of {@link IntStream} are stored as {@code char[]} instead
|
||||
* of {@code int[]}, with {@code Character.MAX_VALUE} being used instead of
|
||||
* {@code -1} to mark the end of the stream internally.
|
||||
*/
|
||||
public static final int MAX_CHAR = Character.MAX_VALUE-1;
|
||||
|
||||
/**
|
||||
* This method returns the text for a range of characters within this input
|
||||
* stream. This method is guaranteed to not throw an exception if the
|
||||
* specified {@code interval} lies entirely within a marked range. For more
|
||||
* information about marked ranges, see {@link IntStream#mark}.
|
||||
*
|
||||
* @param interval an interval within the stream
|
||||
* @return the text of the specified interval
|
||||
*
|
||||
* @throws NullPointerException if {@code interval} is {@code null}
|
||||
* @throws IllegalArgumentException if {@code interval.a < 0}, or if
|
||||
* {@code interval.b < interval.a - 1}, or if {@code interval.b} lies at or
|
||||
* past the end of the stream
|
||||
* @throws UnsupportedOperationException if the stream does not support
|
||||
* getting the text of the specified interval
|
||||
*/
|
||||
@NotNull
|
||||
public String getText(@NotNull Interval interval);
|
||||
}
|
||||
|
|
|
@ -1,100 +1,242 @@
|
|||
/*
|
||||
[The "BSD license"]
|
||||
Copyright (c) 2011 Terence Parr
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. The name of the author may not be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* [The "BSD license"]
|
||||
* Copyright (c) 2012 Terence Parr
|
||||
* Copyright (c) 2012 Sam Harwell
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
package org.antlr.v4.runtime;
|
||||
|
||||
/** A simple stream of integers used when all I care about is the char
|
||||
* or token type sequence (such as interpretation).
|
||||
import org.antlr.v4.runtime.misc.NotNull;
|
||||
|
||||
/**
|
||||
* A simple stream of symbols whose values are represented as integers. This
|
||||
* interface provides <em>marked ranges</em> which support for a minimum level
|
||||
* of buffering necessary to implement arbitrary lookahead during prediction.
|
||||
* For more information on marked ranges, see {@link #mark}.
|
||||
* <p/>
|
||||
* <strong>Initializing Methods:</strong> Some methods in this interface have
|
||||
* unspecified behavior if no call to an initializing method has occurred after
|
||||
* the stream was constructed. The following is a list of initializing methods:
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@link #LA}</li>
|
||||
* <li>{@link #consume}</li>
|
||||
* <li>{@link #size}</li>
|
||||
* </ul>
|
||||
*/
|
||||
public interface IntStream {
|
||||
/**
|
||||
* The value returned by {@link #LA LA()} when the end of the stream is
|
||||
* reached.
|
||||
*/
|
||||
public static final int EOF = -1;
|
||||
|
||||
/**
|
||||
* The value returned by {@link #getSourceName} when the actual name of the
|
||||
* underlying source is not known.
|
||||
*/
|
||||
public static final String UNKNOWN_SOURCE_NAME = "<unknown>";
|
||||
|
||||
/**
|
||||
* Consumes the current symbol in the stream. This method has the following
|
||||
* effects:
|
||||
*
|
||||
* <ul>
|
||||
* <li><strong>Forward movement:</strong> The value of {@link #index index()}
|
||||
* before calling this method is less than the value of {@code index()}
|
||||
* after calling this method.</li>
|
||||
* <li><strong>Ordered lookahead:</strong> The value of {@code LA(1)} before
|
||||
* calling this method becomes the value of {@code LA(-1)} after calling
|
||||
* this method.</li>
|
||||
* </ul>
|
||||
*
|
||||
* Note that calling this method does not guarantee that {@code index()} is
|
||||
* incremented by exactly 1, as that would preclude the ability to implement
|
||||
* filtering streams (e.g. {@link CommonTokenStream} which distinguishes
|
||||
* between "on-channel" and "off-channel" tokens).
|
||||
*
|
||||
* @throws IllegalStateException if an attempt is made to consume the the
|
||||
* end of the stream (i.e. if {@code LA(1)==}{@link #EOF EOF} before calling
|
||||
* {@code consume}).
|
||||
*/
|
||||
void consume();
|
||||
|
||||
/** Get int at current input pointer + i ahead where i=1 is next int.
|
||||
* Negative indexes are allowed. LA(-1) is previous token (token
|
||||
* just matched). LA(-i) where i is before first token should
|
||||
* yield -1, invalid char / EOF.
|
||||
/**
|
||||
* Gets the value of the symbol at offset {@code i} from the current
|
||||
* position. When {@code i==1}, this method returns the value of the current
|
||||
* symbol in the stream (which is the next symbol to be consumed). When
|
||||
* {@code i==-1}, this method returns the value of the previously read
|
||||
* symbol in the stream. It is not valid to call this method with
|
||||
* {@code i==0}, but the specific behavior is unspecified because this
|
||||
* method is frequently called from performance-critical code.
|
||||
* <p/>
|
||||
* This method is guaranteed to succeed if any of the following are true:
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@code i>0}</li>
|
||||
* <li>{@code i==-1} and {@link #index index()} returns a value greater
|
||||
* than the value of {@code index()} after the stream was constructed
|
||||
* and {@code LA(1)} was called in that order. Specifying the current
|
||||
* {@code index()} relative to the index after the stream was created
|
||||
* allows for filtering implementations that do not return every symbol
|
||||
* from the underlying source. Specifying the call to {@code LA(1)}
|
||||
* allows for lazily initialized streams.</li>
|
||||
* <li>{@code LA(i)} refers to a symbol consumed within a marked region
|
||||
* that has not yet been released.</li>
|
||||
* </ul>
|
||||
*
|
||||
* If {@code i} represents a position at or beyond the end of the stream,
|
||||
* this method returns {@link #EOF}.
|
||||
* <p/>
|
||||
* The return value is unspecified if {@code i<0} and fewer than {@code -i}
|
||||
* calls to {@link #consume consume()} have occurred from the beginning of
|
||||
* the stream before calling this method.
|
||||
*
|
||||
* @throws UnsupportedOperationException if the stream does not support
|
||||
* retrieving the value of the specified symbol
|
||||
*/
|
||||
int LA(int i);
|
||||
|
||||
/** Tell the stream to start buffering if it hasn't already. Return
|
||||
* a marker, usually a function of current input position, index().
|
||||
* Calling release(mark()) should not affect the input cursor.
|
||||
* Can seek to any index between where we were when mark() was called
|
||||
* and current index() until we release this marker. No mark can appear
|
||||
* at an index before the first mark.
|
||||
/**
|
||||
* A mark provides a guarantee that {@link #seek seek()} operations will be
|
||||
* valid over a "marked range" extending from the index where {@code mark()}
|
||||
* was called to the current {@link #index index()}. This allows the use of
|
||||
* streaming input sources by specifying the minimum buffering requirements
|
||||
* to support arbitrary lookahead during prediction.
|
||||
* <p/>
|
||||
* The returned mark is an opaque handle (type {@code int}) which is passed
|
||||
* to {@link #release release()} when the guarantees provided by the marked
|
||||
* range are no longer necessary. When calls to
|
||||
* {@code mark()}/{@code release()} are nested, the marks must be released
|
||||
* in reverse order of which they were obtained. Since marked regions are
|
||||
* used during performance-critical sections of prediction, the specific
|
||||
* behavior of invalid usage is unspecified (i.e. a mark is not released, or
|
||||
* a mark is released twice, or marks are not released in reverse order from
|
||||
* which they were created).
|
||||
* <p/>
|
||||
* The behavior of this method is unspecified if no call to an
|
||||
* {@link IntStream initializing method} has occurred after this stream was
|
||||
* constructed.
|
||||
* <p/>
|
||||
* This method does not change the current position in the input stream.
|
||||
* <p/>
|
||||
* The following example shows the use of {@link #mark mark()},
|
||||
* {@link #release release(mark)}, {@link #index index()}, and
|
||||
* {@link #seek seek(index)} as part of an operation to safely work within a
|
||||
* marked region, then restore the stream position to its original value and
|
||||
* release the mark.
|
||||
* <pre>
|
||||
* IntStream stream = ...;
|
||||
* int index = -1;
|
||||
* int mark = stream.mark();
|
||||
* try {
|
||||
* index = stream.index();
|
||||
* // perform work here...
|
||||
* } finally {
|
||||
* if (index != -1) {
|
||||
* stream.seek(index);
|
||||
* }
|
||||
* stream.release(mark);
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* A resource leak may occur if the value returned from a call to
|
||||
* mark() is not passed to release() afterwards. When calls to mark()
|
||||
* are nested, release() must be called in reverse order of the calls
|
||||
* to mark(), otherwise the behavior is unspecified.
|
||||
*
|
||||
* @return An opaque marker which should be passed to release
|
||||
* when the range of symbols from where the marker was dropped
|
||||
* to the current input symbol is no longer required.
|
||||
*/
|
||||
* @return An opaque marker which should be passed to
|
||||
* {@link #release release()} when the marked range is no longer required.
|
||||
*/
|
||||
int mark();
|
||||
|
||||
/** Release requirement that stream holds tokens from marked location
|
||||
* to current index(). Must release in reverse order (like stack)
|
||||
* of mark() otherwise undefined behavior.
|
||||
/**
|
||||
* This method releases a marked range created by a call to
|
||||
* {@link #mark mark()}. Calls to {@code release()} must appear in the
|
||||
* reverse order of the corresponding calls to {@code mark()}. If a mark is
|
||||
* released twice, or if marks are not released in reverse order of the
|
||||
* corresponding calls to {@code mark()}, the behavior is unspecified.
|
||||
* <p/>
|
||||
* For more information and an example, see {@link #mark}.
|
||||
*
|
||||
* @param marker A marker returned by a call to {@code mark()}.
|
||||
* @see #mark
|
||||
*/
|
||||
void release(int marker);
|
||||
|
||||
/** Return the current input symbol index 0..n where n indicates the
|
||||
* last symbol has been read. The index is the symbol about to be
|
||||
* read not the most recently read symbol.
|
||||
*/
|
||||
/**
|
||||
* Return the index into the stream of the input symbol referred to by
|
||||
* {@code LA(1)}.
|
||||
* <p/>
|
||||
* The behavior of this method is unspecified if no call to an
|
||||
* {@link IntStream initializing method} has occurred after this stream was
|
||||
* constructed.
|
||||
*/
|
||||
int index();
|
||||
|
||||
/** Set the input cursor to the position indicated by index. This is
|
||||
* normally used to rewind the input stream but can move forward as well.
|
||||
* It's up to the stream implementation to make sure that symbols are
|
||||
* buffered as necessary to make seek land on a valid symbol.
|
||||
* Or, they should avoid moving the input cursor.
|
||||
/**
|
||||
* Set the input cursor to the position indicated by {@code index}. If the
|
||||
* specified index lies past the end of the stream, the operation behaves as
|
||||
* though {@code index} was the index of the EOF symbol. After this method
|
||||
* returns without throwing an exception, the at least one of the following
|
||||
* will be true.
|
||||
*
|
||||
* The index is 0..n-1. A seek to position i means that LA(1) will
|
||||
* return the ith symbol. So, seeking to 0 means LA(1) will return the
|
||||
* first element in the stream.
|
||||
*
|
||||
* For unbuffered streams, index i might not be in buffer. That throws
|
||||
* index exception.
|
||||
* <ul>
|
||||
* <li>{@link #index index()} will return the index of the first symbol
|
||||
* appearing at or after the specified {@code index}. Specifically,
|
||||
* implementations which filter their sources should automatically
|
||||
* adjust {@code index} forward the minimum amount required for the
|
||||
* operation to target a non-ignored symbol.</li>
|
||||
* <li>{@code LA(1)} returns {@link #EOF}</li>
|
||||
* </ul>
|
||||
*
|
||||
* This operation is guaranteed to not throw an exception if {@code index}
|
||||
* lies within a marked region. For more information on marked regions, see
|
||||
* {@link #mark}. The behavior of this method is unspecified if no call to
|
||||
* an {@link IntStream initializing method} has occurred after this stream
|
||||
* was constructed.
|
||||
*
|
||||
* @param index The absolute index to seek to.
|
||||
*
|
||||
* @throws IllegalArgumentException if {@code index} is less than 0
|
||||
* @throws UnsupportedOperationException if the stream does not support
|
||||
* seeking to the specified index
|
||||
*/
|
||||
void seek(int index);
|
||||
|
||||
/** Only makes sense for streams that buffer everything up probably, but
|
||||
* might be useful to display the entire stream or for testing. This
|
||||
* value includes a single EOF.
|
||||
/**
|
||||
* Returns the total number of symbols in the stream, including a single EOF
|
||||
* symbol.
|
||||
*
|
||||
* @throws UnsupportedOperationException if the size of the stream is
|
||||
* unknown.
|
||||
*/
|
||||
int size();
|
||||
|
||||
/** Where are you getting symbols from? Normally, implementations will
|
||||
* pass the buck all the way to the lexer who can ask its input stream
|
||||
* for the file name or whatever.
|
||||
/**
|
||||
* Gets the name of the underlying symbol source. This method returns a
|
||||
* non-null, non-empty string. If such a name is not known, this method
|
||||
* returns {@link #UNKNOWN_SOURCE_NAME}.
|
||||
*/
|
||||
@NotNull
|
||||
public String getSourceName();
|
||||
}
|
||||
|
|
|
@ -165,7 +165,7 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
|
|||
recover(e);
|
||||
ttype = SKIP;
|
||||
}
|
||||
if ( _input.LA(1)==CharStream.EOF ) {
|
||||
if ( _input.LA(1)==IntStream.EOF ) {
|
||||
_hitEOF = true;
|
||||
}
|
||||
if ( _type == Token.INVALID_TYPE ) _type = ttype;
|
||||
|
|
|
@ -43,7 +43,7 @@ public interface Token {
|
|||
|
||||
public static final int MIN_USER_TOKEN_TYPE = 1;
|
||||
|
||||
public static final int EOF = CharStream.EOF;
|
||||
public static final int EOF = IntStream.EOF;
|
||||
|
||||
/** All tokens go to the parser (unless skip() is called in that rule)
|
||||
* on a particular "channel". The parser tunes to a particular channel
|
||||
|
|
|
@ -1,80 +1,172 @@
|
|||
/*
|
||||
[The "BSD license"]
|
||||
Copyright (c) 2011 Terence Parr
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. The name of the author may not be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* [The "BSD license"]
|
||||
* Copyright (c) 2012 Terence Parr
|
||||
* Copyright (c) 2012 Sam Harwell
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
package org.antlr.v4.runtime;
|
||||
|
||||
import org.antlr.v4.runtime.misc.Interval;
|
||||
import org.antlr.v4.runtime.misc.NotNull;
|
||||
|
||||
/** A stream of tokens accessing tokens from a TokenSource */
|
||||
/**
|
||||
* An {@link IntStream} whose symbols are {@link Token} instances.
|
||||
*/
|
||||
public interface TokenStream extends IntStream {
|
||||
/** Get Token at current input pointer + i ahead where i=1 is next Token.
|
||||
* i<0 indicates tokens in the past. So -1 is previous token and -2 is
|
||||
* two tokens ago. LT(0) is undefined. For i>=n, return eof token.
|
||||
* Return null for LT(0) and any index that results in an absolute address
|
||||
* that is negative.
|
||||
* TODO (Sam): Throw exception for invalid k?
|
||||
/**
|
||||
* Get the {@link Token} instance associated with the value returned by
|
||||
* {@link #LA LA(k)}. This method has the same pre- and post-conditions as
|
||||
* {@link IntStream#LA}. In addition, when the preconditions of this method
|
||||
* are met, the return value is non-null and the value of
|
||||
* {@code LT(k).getType()==LA(k)}.
|
||||
*
|
||||
* @see IntStream#LA
|
||||
*/
|
||||
public Token LT(int k);
|
||||
@NotNull
|
||||
public Token LT(int k);
|
||||
|
||||
/** Get a token at an absolute index i; 0..n-1. This is really only
|
||||
* needed for profiling and debugging and token stream rewriting.
|
||||
* If you don't want to buffer up tokens, then this method makes no
|
||||
* sense for you. Naturally you can't use the rewrite stream feature.
|
||||
* I believe DebugTokenStream can easily be altered to not use
|
||||
* this method, removing the dependency.
|
||||
/**
|
||||
* Gets the {@link Token} at the specified {@code index} in the stream. When
|
||||
* the preconditions of this method are met, the return value is non-null.
|
||||
* <p/>
|
||||
* The preconditions for this method are the same as the preconditions of
|
||||
* {@link IntStream#seek}. If the behavior of {@code seek(index)} is
|
||||
* unspecified for the current state and given {@code index}, then the
|
||||
* behavior of this method is also unspecified.
|
||||
* <p/>
|
||||
* The symbol referred to by {@code index} differs from {@code seek()} only
|
||||
* in the case of filtering streams where {@code index} lies before the end
|
||||
* of the stream. Unlike {@code seek()}, this method does not adjust
|
||||
* {@code index} to point to a non-ignored symbol.
|
||||
*
|
||||
* @throws IllegalArgumentException if {code index} is less than 0
|
||||
* @throws UnsupportedOperationException if the stream does not support
|
||||
* retrieving the token at the specified index
|
||||
*/
|
||||
public Token get(int i);
|
||||
@NotNull
|
||||
public Token get(int index);
|
||||
|
||||
/** Where is this stream pulling tokens from? This is not the name, but
|
||||
* the object that provides Token objects.
|
||||
/**
|
||||
* Gets the underlying {@link TokenSource} which provides tokens for this
|
||||
* stream.
|
||||
*/
|
||||
@NotNull
|
||||
public TokenSource getTokenSource();
|
||||
|
||||
/** Return the text of all tokens from within the interval.
|
||||
* If the stream does not buffer all the tokens then it must
|
||||
* throw UnsupportedOperationException;
|
||||
* Users should not access $ruleLabel.text in an action of course in
|
||||
* that case.
|
||||
* @param interval
|
||||
/**
|
||||
* Return the text of all tokens within the specified {@code interval}. This
|
||||
* method behaves like the following code (including potential exceptions
|
||||
* for violating preconditions of {@link #get}, but may be optimized by the
|
||||
* specific implementation.
|
||||
*
|
||||
* <pre>
|
||||
* TokenStream stream = ...;
|
||||
* String text = "";
|
||||
* for (int i = interval.a; i <= interval.b; i++) {
|
||||
* text += stream.get(i).getText();
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* @param interval The interval of tokens within this stream to get text
|
||||
* for.
|
||||
* @return The text of all tokens within the specified interval in this
|
||||
* stream.
|
||||
*
|
||||
* @throws NullPointerException if {@code interval} is {@code null}
|
||||
*/
|
||||
public String getText(Interval interval);
|
||||
@NotNull
|
||||
public String getText(@NotNull Interval interval);
|
||||
|
||||
/**
|
||||
* Return the text of all tokens in the stream. This method behaves like the
|
||||
* following code, including potential exceptions from the calls to
|
||||
* {@link IntStream#size} and {@link #getText(Interval)}, but may be
|
||||
* optimized by the specific implementation.
|
||||
*
|
||||
* <pre>
|
||||
* TokenStream stream = ...;
|
||||
* String text = stream.getText(new Interval(0, stream.size()));
|
||||
* </pre>
|
||||
*
|
||||
* @return The text of all tokens in the stream.
|
||||
*/
|
||||
@NotNull
|
||||
public String getText();
|
||||
|
||||
public String getText(RuleContext ctx);
|
||||
|
||||
/** Because the user is not required to use a token with an index stored
|
||||
* in it, we must provide a means for two token objects themselves to
|
||||
* indicate the start/end location. Most often this will just delegate
|
||||
* to the other getText(Interval).
|
||||
* If the stream does not buffer all the tokens then it must
|
||||
* throw UnsupportedOperationException;
|
||||
/**
|
||||
* Return the text of all tokens in the source interval of the specified
|
||||
* context. This method behaves like the following code, including potential
|
||||
* exceptions from the call to {@link #getText(Interval)}, but may be
|
||||
* optimized by the specific implementation.
|
||||
* </p>
|
||||
* If {@code ctx.getSourceInterval()} does not return a valid interval of
|
||||
* tokens provided by this stream, the behavior is unspecified.
|
||||
*
|
||||
* <pre>
|
||||
* TokenStream stream = ...;
|
||||
* String text = stream.getText(ctx.getSourceInterval());
|
||||
* </pre>
|
||||
*
|
||||
* @param ctx The context providing the source interval of tokens to get
|
||||
* text for.
|
||||
* @return The text of all tokens within the source interval of {@code ctx}.
|
||||
*/
|
||||
public String getText(Token start, Token stop);
|
||||
@NotNull
|
||||
public String getText(@NotNull RuleContext ctx);
|
||||
|
||||
/**
|
||||
* Return the text of all tokens in this stream between {@code start} and
|
||||
* {@code stop} (inclusive).
|
||||
* <p/>
|
||||
* If the specified {@code start} or {@code stop} token was not provided by
|
||||
* this stream, or if the {@code stop} occurred before the {@code start}
|
||||
* token, the behavior is unspecified.
|
||||
* <p/>
|
||||
* For streams which ensure that the {@link Token#getTokenIndex} method is
|
||||
* accurate for all of its provided tokens, this method behaves like the
|
||||
* following code. Other streams may implement this method in other ways
|
||||
* provided the behavior is consistent with this at a high level.
|
||||
*
|
||||
* <pre>
|
||||
* TokenStream stream = ...;
|
||||
* String text = "";
|
||||
* for (int i = start.getTokenIndex(); i <= stop.getTokenIndex(); i++) {
|
||||
* text += stream.get(i).getText();
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* @param start The first token in the interval to get text for.
|
||||
* @param stop The last token in the interval to get text for (inclusive).
|
||||
* @return The text of all tokens lying between the specified {@code start}
|
||||
* and {@code stop} tokens.
|
||||
*
|
||||
* @throws UnsupportedOperationException if this stream does not support
|
||||
* this method for the specified tokens
|
||||
*/
|
||||
@NotNull
|
||||
public String getText(@NotNull Token start, @NotNull Token stop);
|
||||
}
|
||||
|
|
|
@ -42,38 +42,58 @@ import java.io.Reader;
|
|||
* that it doesn't buffer all data, not that's it's on demand loading of char.
|
||||
*/
|
||||
public class UnbufferedCharStream implements CharStream {
|
||||
/** A moving window buffer of the data being scanned. While there's a
|
||||
* marker, we keep adding to buffer. Otherwise, consume() resets
|
||||
* so we start filling at index 0 again.
|
||||
/**
|
||||
* A moving window buffer of the data being scanned. While there's a marker,
|
||||
* we keep adding to buffer. Otherwise, {@link #consume consume()} resets so
|
||||
* we start filling at index 0 again.
|
||||
*/
|
||||
protected char[] data;
|
||||
|
||||
/** How many characters are actually in the buffer; this is not
|
||||
the buffer size, that's data.length.
|
||||
*/
|
||||
/**
|
||||
* The number of characters currently in {@link #data data}.
|
||||
* <p/>
|
||||
* This is not the buffer capacity, that's {@code data.length}.
|
||||
*/
|
||||
protected int n;
|
||||
|
||||
/** 0..n-1 index into data of next char; data[p] is LA(1).
|
||||
* If p == n, we are out of buffered char.
|
||||
/**
|
||||
* 0..n-1 index into {@link #data data} of next character.
|
||||
* <p/>
|
||||
* The {@code LA(1)} character is {@code data[p]}. If {@code p == n}, we are
|
||||
* out of buffered characters.
|
||||
*/
|
||||
protected int p=0;
|
||||
|
||||
/** Count up with mark() and down with release(). When we release()
|
||||
* and hit zero, reset buffer to beginning. Copy data[p]..data[n-1]
|
||||
* to data[0]..data[(n-1)-p].
|
||||
/**
|
||||
* Count up with {@link #mark mark()} and down with
|
||||
* {@link #release release()}. When we {@code release()} the last mark,
|
||||
* {@code numMarkers} reaches 0 and we reset the buffer. Copy
|
||||
* {@code data[p]..data[n-1]} to {@code data[0]..data[(n-1)-p]}.
|
||||
*/
|
||||
protected int numMarkers = 0;
|
||||
|
||||
/**
|
||||
* This is the {@code LA(-1)} character for the current position.
|
||||
*/
|
||||
protected int lastChar = -1;
|
||||
|
||||
/** Absolute char index. It's the index of the char about to be
|
||||
* read via LA(1). Goes from 0 to numchar-1 in entire stream.
|
||||
/**
|
||||
* When {@code numMarkers > 0}, this is the {@code LA(-1)} character for the
|
||||
* first character in {@link #data data}. Otherwise, this is unspecified.
|
||||
*/
|
||||
protected int lastCharBufferStart;
|
||||
|
||||
/**
|
||||
* Absolute character index. It's the index of the character about to be
|
||||
* read via {@code LA(1)}. Goes from 0 to the number of characters in the
|
||||
* entire stream, although the stream size is unknown before the end is
|
||||
* reached.
|
||||
*/
|
||||
protected int currentCharIndex = 0;
|
||||
|
||||
protected Reader input;
|
||||
|
||||
/** What is name or source of this char stream? */
|
||||
/** The name or source of this char stream. */
|
||||
public String name;
|
||||
|
||||
/** Useful for subclasses that pull char from other than this.input. */
|
||||
|
@ -109,39 +129,64 @@ public class UnbufferedCharStream implements CharStream {
|
|||
|
||||
@Override
|
||||
public void consume() {
|
||||
if (LA(1) == CharStream.EOF) {
|
||||
throw new IllegalStateException("cannot consume EOF");
|
||||
}
|
||||
|
||||
// buf always has at least data[p==0] in this method due to ctor
|
||||
if ( p==0 ) lastChar = -1; // we're at first char; no LA(-1)
|
||||
else lastChar = data[p]; // track last char for LA(-1)
|
||||
lastChar = data[p]; // track last char for LA(-1)
|
||||
|
||||
if (p == n-1 && numMarkers==0) {
|
||||
n = 0;
|
||||
p = -1; // p++ will leave this at 0
|
||||
lastCharBufferStart = lastChar;
|
||||
}
|
||||
|
||||
p++;
|
||||
currentCharIndex++;
|
||||
// System.out.println("consume p="+p+", numMarkers="+numMarkers+
|
||||
// ", currentCharIndex="+currentCharIndex+", n="+n);
|
||||
sync(1);
|
||||
}
|
||||
|
||||
/** Make sure we have 'need' elements from current position p. Last valid
|
||||
* p index is data.size()-1. p+need-1 is the data index 'need' elements
|
||||
* ahead. If we need 1 element, (p+1-1)==p must be < data.size().
|
||||
/**
|
||||
* Make sure we have 'need' elements from current position {@link #p p}.
|
||||
* Last valid {@code p} index is {@code data.length-1}. {@code p+need-1} is
|
||||
* the char index 'need' elements ahead. If we need 1 element,
|
||||
* {@code (p+1-1)==p} must be less than {@code data.length}.
|
||||
*/
|
||||
protected void sync(int want) {
|
||||
int need = (p+want-1) - n + 1; // how many more elements we need?
|
||||
if ( need > 0 ) fill(need); // out of elements?
|
||||
}
|
||||
|
||||
/** add n elements to buffer */
|
||||
public void fill(int n) {
|
||||
for (int i=1; i<=n; i++) {
|
||||
try {
|
||||
int c = nextChar();
|
||||
add(c);
|
||||
}
|
||||
catch (IOException ioe) {
|
||||
throw new RuntimeException(ioe);
|
||||
}
|
||||
if ( need > 0 ) {
|
||||
fill(need);
|
||||
}
|
||||
}
|
||||
|
||||
/** Override to provide different source of characters than this.input */
|
||||
/**
|
||||
* Add {@code n} characters to the buffer. Returns the number of characters
|
||||
* actually added to the buffer. If the return value is less than {@code n},
|
||||
* then EOF was reached before {@code n} characters could be added.
|
||||
*/
|
||||
protected int fill(int n) {
|
||||
for (int i=0; i<n; i++) {
|
||||
if (this.n > 0 && data[this.n - 1] == CharStream.EOF) {
|
||||
return i;
|
||||
}
|
||||
|
||||
try {
|
||||
int c = nextChar();
|
||||
add(c);
|
||||
}
|
||||
catch (IOException ioe) {
|
||||
throw new RuntimeException(ioe);
|
||||
}
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
* Override to provide different source of characters than
|
||||
* {@link #input input}.
|
||||
*/
|
||||
protected int nextChar() throws IOException {
|
||||
return input.read();
|
||||
}
|
||||
|
@ -161,22 +206,28 @@ public class UnbufferedCharStream implements CharStream {
|
|||
sync(i);
|
||||
int index = p + i - 1;
|
||||
if ( index < 0 ) throw new IndexOutOfBoundsException();
|
||||
if ( index > n ) return CharStream.EOF;
|
||||
if ( index > n ) return IntStream.EOF;
|
||||
int c = data[index];
|
||||
if ( c==(char)CharStream.EOF ) return CharStream.EOF;
|
||||
if ( c==(char)IntStream.EOF ) return IntStream.EOF;
|
||||
return c;
|
||||
}
|
||||
|
||||
/** Return a marker that we can release later. Marker happens to be
|
||||
* index into buffer (not index()).
|
||||
*/
|
||||
/**
|
||||
* Return a marker that we can release later.
|
||||
* <p/>
|
||||
* The specific marker value used for this class allows for some level of
|
||||
* protection against misuse where {@code seek()} is called on a mark or
|
||||
* {@code release()} is called in the wrong order.
|
||||
*/
|
||||
@Override
|
||||
public int mark() {
|
||||
int m = p;
|
||||
if (numMarkers == 0) {
|
||||
lastCharBufferStart = lastChar;
|
||||
}
|
||||
|
||||
int mark = -numMarkers - 1;
|
||||
numMarkers++;
|
||||
// StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
|
||||
// System.out.println(stackTrace[2].getMethodName()+": mark " + m);
|
||||
return m;
|
||||
return mark;
|
||||
}
|
||||
|
||||
/** Decrement number of markers, resetting buffer if we hit 0.
|
||||
|
@ -184,19 +235,19 @@ public class UnbufferedCharStream implements CharStream {
|
|||
*/
|
||||
@Override
|
||||
public void release(int marker) {
|
||||
if ( numMarkers==0 ) {
|
||||
throw new IllegalStateException("release() called w/o prior matching mark()");
|
||||
int expectedMark = -numMarkers;
|
||||
if ( marker!=expectedMark ) {
|
||||
throw new IllegalStateException("release() called with an invalid marker.");
|
||||
}
|
||||
// StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
|
||||
// System.out.println(stackTrace[2].getMethodName()+": release " + marker);
|
||||
|
||||
numMarkers--;
|
||||
if ( numMarkers==0 ) { // can we release buffer?
|
||||
// System.out.println("release: shift "+p+".."+(n-1)+" to 0: '"+ new String(data,p,n)+"'");
|
||||
if ( numMarkers==0 && p > 0 ) { // release buffer when we can, but don't do unnecessary work
|
||||
// Copy data[p]..data[n-1] to data[0]..data[(n-1)-p], reset ptrs
|
||||
// p is last valid char; move nothing if p==n as we have no valid char
|
||||
System.arraycopy(data, p, data, 0, n - p); // shift n-p char from p to 0
|
||||
n = n - p;
|
||||
p = 0;
|
||||
lastCharBufferStart = lastChar;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -206,19 +257,37 @@ public class UnbufferedCharStream implements CharStream {
|
|||
}
|
||||
|
||||
/** Seek to absolute character index, which might not be in the current
|
||||
* sliding window. Move p to index-bufferStartIndex.
|
||||
* sliding window. Move {@code p} to {@code index-bufferStartIndex}.
|
||||
*/
|
||||
@Override
|
||||
public void seek(int index) {
|
||||
// System.out.println("seek "+index);
|
||||
if (index == currentCharIndex) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (index > currentCharIndex) {
|
||||
sync(index - currentCharIndex);
|
||||
index = Math.min(index, getBufferStartIndex() + n - 1);
|
||||
}
|
||||
|
||||
// index == to bufferStartIndex should set p to 0
|
||||
int i = index - getBufferStartIndex();
|
||||
if ( i < 0 || i >= n ) {
|
||||
if ( i < 0 ) {
|
||||
throw new IllegalArgumentException("cannot seek to negative index " + index);
|
||||
}
|
||||
else if (i >= n) {
|
||||
throw new UnsupportedOperationException("seek to index outside buffer: "+
|
||||
index+" not in "+getBufferStartIndex()+".."+(getBufferStartIndex()+n));
|
||||
}
|
||||
p = i;
|
||||
|
||||
p = i;
|
||||
currentCharIndex = index;
|
||||
if (p == 0) {
|
||||
lastChar = lastCharBufferStart;
|
||||
}
|
||||
else {
|
||||
lastChar = data[p-1];
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -233,9 +302,19 @@ public class UnbufferedCharStream implements CharStream {
|
|||
|
||||
@Override
|
||||
public String getText(Interval interval) {
|
||||
if (interval.a < 0 || interval.b < interval.a - 1) {
|
||||
throw new IllegalArgumentException("invalid interval");
|
||||
}
|
||||
|
||||
int bufferStartIndex = getBufferStartIndex();
|
||||
if (n > 0 && data[n - 1] == Character.MAX_VALUE) {
|
||||
if (interval.a + interval.length() > bufferStartIndex + n) {
|
||||
throw new IllegalArgumentException("the interval extends past the end of the stream");
|
||||
}
|
||||
}
|
||||
|
||||
if (interval.a < bufferStartIndex || interval.b >= bufferStartIndex + n) {
|
||||
throw new IndexOutOfBoundsException("interval "+interval+" outside buffer: "+
|
||||
throw new UnsupportedOperationException("interval "+interval+" outside buffer: "+
|
||||
bufferStartIndex+".."+(bufferStartIndex+n));
|
||||
}
|
||||
// convert from absolute to local index
|
||||
|
@ -243,23 +322,7 @@ public class UnbufferedCharStream implements CharStream {
|
|||
return new String(data, i, interval.length());
|
||||
}
|
||||
|
||||
/** For testing. What's in moving window into data stream from
|
||||
* current index, LA(1) or data[p], to end of buffer?
|
||||
*/
|
||||
public String getRemainingBuffer() {
|
||||
if ( n==0 ) return null;
|
||||
return new String(data,p,n-p);
|
||||
}
|
||||
|
||||
/** For testing. What's in moving window buffer into data stream.
|
||||
* From 0..p-1 have been consume.
|
||||
*/
|
||||
public String getBuffer() {
|
||||
if ( n==0 ) return null;
|
||||
return new String(data,0,n);
|
||||
}
|
||||
|
||||
public int getBufferStartIndex() {
|
||||
protected final int getBufferStartIndex() {
|
||||
return currentCharIndex - p;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,45 +1,91 @@
|
|||
/*
|
||||
* [The "BSD license"]
|
||||
* Copyright (c) 2012 Terence Parr
|
||||
* Copyright (c) 2012 Sam Harwell
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
package org.antlr.v4.runtime;
|
||||
|
||||
import org.antlr.v4.runtime.misc.Interval;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import org.antlr.v4.runtime.misc.NotNull;
|
||||
|
||||
public class UnbufferedTokenStream<T extends Token> implements TokenStream {
|
||||
protected TokenSource tokenSource;
|
||||
|
||||
/** A moving window buffer of the data being scanned. While there's a
|
||||
* marker, we keep adding to buffer. Otherwise, consume() resets
|
||||
* so we start filling at index 0 again.
|
||||
/**
|
||||
* A moving window buffer of the data being scanned. While there's a marker,
|
||||
* we keep adding to buffer. Otherwise, {@link #consume consume()} resets so
|
||||
* we start filling at index 0 again.
|
||||
*/
|
||||
protected Token[] tokens;
|
||||
|
||||
/** How many tokens are actually in the buffer; this is not
|
||||
* the buffer size, that's tokens.length.
|
||||
/**
|
||||
* The number of tokens currently in {@link #tokens tokens}.
|
||||
* <p/>
|
||||
* This is not the buffer capacity, that's {@code tokens.length}.
|
||||
*/
|
||||
protected int n;
|
||||
|
||||
/** 0..n-1 index into tokens of next token; tokens[p] is LT(1).
|
||||
* If p == n, we are out of buffered tokens.
|
||||
/**
|
||||
* 0..n-1 index into {@link #tokens tokens} of next token.
|
||||
* <p/>
|
||||
* The {@code LT(1)} token is {@code tokens[p]}. If {@code p == n}, we are
|
||||
* out of buffered tokens.
|
||||
*/
|
||||
protected int p=0;
|
||||
|
||||
/** Count up with mark() and down with release(). When we release()
|
||||
* and hit zero, reset buffer to beginning. Copy data[p]..data[n-1]
|
||||
* to data[0]..data[(n-1)-p].
|
||||
/**
|
||||
* Count up with {@link #mark mark()} and down with
|
||||
* {@link #release release()}. When we {@code release()} the last mark,
|
||||
* {@code numMarkers} reaches 0 and we reset the buffer. Copy
|
||||
* {@code tokens[p]..tokens[n-1]} to {@code tokens[0]..tokens[(n-1)-p]}.
|
||||
*/
|
||||
protected int numMarkers = 0;
|
||||
|
||||
/**
|
||||
* This is the {@code LT(-1)} token for the current position.
|
||||
*/
|
||||
protected Token lastToken;
|
||||
|
||||
/** Absolute token index. It's the index of the token about to be
|
||||
* read via LA(1). Goes from 0 to numtokens-1 in entire stream.
|
||||
/**
|
||||
* When {@code numMarkers > 0}, this is the {@code LT(-1)} token for the
|
||||
* first token in {@link #tokens}. Otherwise, this is {@code null}.
|
||||
*/
|
||||
protected int currentTokenIndex = 0; // simple counter to set token index in tokens
|
||||
protected Token lastTokenBufferStart;
|
||||
|
||||
/** Skip tokens on any channel but this one; this is how we skip whitespace... */
|
||||
// TODO: skip off-channel tokens!!!
|
||||
protected int channel = Token.DEFAULT_CHANNEL;
|
||||
/**
|
||||
* Absolute token index. It's the index of the token about to be read via
|
||||
* {@code LT(1)}. Goes from 0 to the number of tokens in the entire stream,
|
||||
* although the stream size is unknown before the end is reached.
|
||||
* <p/>
|
||||
* This value is used to set the token indexes if the stream provides tokens
|
||||
* that implement {@link WritableToken}.
|
||||
*/
|
||||
protected int currentTokenIndex = 0;
|
||||
|
||||
public UnbufferedTokenStream(TokenSource tokenSource) {
|
||||
this(tokenSource, 256);
|
||||
|
@ -64,29 +110,32 @@ public class UnbufferedTokenStream<T extends Token> implements TokenStream {
|
|||
|
||||
@Override
|
||||
public Token LT(int i) {
|
||||
if ( i==-1 ) return lastToken; // special case
|
||||
sync(i);
|
||||
int index = p + i - 1;
|
||||
if ( index < 0 ) throw new IndexOutOfBoundsException("LT("+i+") gives negative index");
|
||||
if ( index > n ) {
|
||||
TokenFactory<?> factory = tokenSource.getTokenFactory();
|
||||
int cpos = tokenSource.getCharPositionInLine();
|
||||
// The character position for EOF is one beyond the position of
|
||||
// the previous token's last character
|
||||
Token eof = factory.create(tokenSource, Token.EOF, null, Token.DEFAULT_CHANNEL,
|
||||
index(), index()-1,
|
||||
tokenSource.getLine(), cpos);
|
||||
return eof;
|
||||
if ( i==-1 ) {
|
||||
return lastToken;
|
||||
}
|
||||
return tokens[index];
|
||||
|
||||
sync(i);
|
||||
int index = p + i - 1;
|
||||
if ( index < 0 ) {
|
||||
throw new IndexOutOfBoundsException("LT("+i+") gives negative index");
|
||||
}
|
||||
|
||||
if ( index >= n ) {
|
||||
assert n > 0 && tokens[n-1].getType() == Token.EOF;
|
||||
return tokens[n-1];
|
||||
}
|
||||
|
||||
return tokens[index];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int LA(int i) { return LT(i).getType(); }
|
||||
public int LA(int i) {
|
||||
return LT(i).getType();
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenSource getTokenSource() {
|
||||
return null;
|
||||
return tokenSource;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -106,80 +155,104 @@ public class UnbufferedTokenStream<T extends Token> implements TokenStream {
|
|||
|
||||
@Override
|
||||
public void consume() {
|
||||
// buf always has at least data[p==0] in this method due to ctor
|
||||
if ( p==0 ) lastToken = null; // we're at first token; no LA(-1)
|
||||
else lastToken = tokens[p]; // track last char for LT(-1)
|
||||
if (LA(1) == Token.EOF) {
|
||||
throw new IllegalStateException("cannot consume EOF");
|
||||
}
|
||||
|
||||
// buf always has at least tokens[p==0] in this method due to ctor
|
||||
lastToken = tokens[p]; // track last token for LT(-1)
|
||||
|
||||
// if we're at last token and no markers, opportunity to flush buffer
|
||||
if ( p == n-1 && numMarkers==0 ) { // can we release buffer?
|
||||
// System.out.println("consume: reset");
|
||||
if ( p == n-1 && numMarkers==0 ) {
|
||||
n = 0;
|
||||
p = -1; // p++ will leave this at 0
|
||||
lastTokenBufferStart = lastToken;
|
||||
}
|
||||
|
||||
p++;
|
||||
currentTokenIndex++;
|
||||
// System.out.println("consume p="+p+", numMarkers="+numMarkers+
|
||||
// ", currentCharIndex="+currentCharIndex+", n="+n);
|
||||
sync(1);
|
||||
}
|
||||
|
||||
/** Make sure we have 'need' elements from current position p. Last valid
|
||||
* p index is tokens.size()-1. p+need-1 is the tokens index 'need' elements
|
||||
* ahead. If we need 1 element, (p+1-1)==p must be < tokens.size().
|
||||
/** Make sure we have 'need' elements from current position {@link #p p}. Last valid
|
||||
* {@code p} index is {@code tokens.length-1}. {@code p+need-1} is the tokens index 'need' elements
|
||||
* ahead. If we need 1 element, {@code (p+1-1)==p} must be less than {@code tokens.length}.
|
||||
*/
|
||||
protected void sync(int want) {
|
||||
int need = (p+want-1) - n + 1; // how many more elements we need?
|
||||
if ( need > 0 ) fill(need); // out of elements?
|
||||
}
|
||||
|
||||
/** add n elements to buffer */
|
||||
public void fill(int n) {
|
||||
for (int i=1; i<=n; i++) {
|
||||
Token t = tokenSource.nextToken();
|
||||
if ( t instanceof WritableToken ) {
|
||||
((WritableToken)t).setTokenIndex(currentTokenIndex);
|
||||
}
|
||||
add(t);
|
||||
if ( need > 0 ) {
|
||||
fill(need);
|
||||
}
|
||||
}
|
||||
|
||||
protected void add(Token t) {
|
||||
/**
|
||||
* Add {@code n} elements to the buffer. Returns the number of tokens
|
||||
* actually added to the buffer. If the return value is less than {@code n},
|
||||
* then EOF was reached before {@code n} tokens could be added.
|
||||
*/
|
||||
protected int fill(int n) {
|
||||
for (int i=0; i<n; i++) {
|
||||
if (this.n > 0 && tokens[this.n-1].getType() == Token.EOF) {
|
||||
return i;
|
||||
}
|
||||
|
||||
Token t = tokenSource.nextToken();
|
||||
add(t);
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
protected void add(@NotNull Token t) {
|
||||
if ( n>=tokens.length ) {
|
||||
Token[] newtokens = new Token[tokens.length*2]; // resize
|
||||
System.arraycopy(tokens, 0, newtokens, 0, tokens.length);
|
||||
tokens = newtokens;
|
||||
}
|
||||
|
||||
if (t instanceof WritableToken) {
|
||||
((WritableToken)t).setTokenIndex(getBufferStartIndex() + n);
|
||||
}
|
||||
|
||||
tokens[n++] = t;
|
||||
}
|
||||
|
||||
|
||||
/** Return a marker that we can release later. Marker happens to be
|
||||
* index into buffer (not index()).
|
||||
/**
|
||||
* Return a marker that we can release later.
|
||||
* <p/>
|
||||
* The specific marker value used for this class allows for some level of
|
||||
* protection against misuse where {@code seek()} is called on a mark or
|
||||
* {@code release()} is called in the wrong order.
|
||||
*/
|
||||
@Override
|
||||
public int mark() {
|
||||
int m = p;
|
||||
if (numMarkers == 0) {
|
||||
lastTokenBufferStart = lastToken;
|
||||
}
|
||||
|
||||
int mark = -numMarkers - 1;
|
||||
numMarkers++;
|
||||
return m;
|
||||
return mark;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void release(int marker) {
|
||||
if ( numMarkers==0 ) {
|
||||
throw new IllegalStateException("release() called w/o prior matching mark()");
|
||||
int expectedMark = -numMarkers;
|
||||
if ( marker!=expectedMark ) {
|
||||
throw new IllegalStateException("release() called with an invalid marker.");
|
||||
}
|
||||
// StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
|
||||
// System.out.println(stackTrace[2].getMethodName()+": release " + marker);
|
||||
|
||||
numMarkers--;
|
||||
if ( numMarkers==0 ) { // can we release buffer?
|
||||
System.out.println("release: shift "+p+".."+(n-1)+" to 0: '"+
|
||||
Arrays.toString(Arrays.copyOfRange(tokens,p,n))+"'");
|
||||
// Copy data[p]..data[n-1] to data[0]..data[(n-1)-p], reset ptrs
|
||||
// p is last valid token; move nothing if p==n as we have no valid char
|
||||
System.arraycopy(tokens, p, tokens, 0, n - p); // shift n-p char from p to 0
|
||||
n = n - p;
|
||||
p = 0;
|
||||
if (p > 0) {
|
||||
// Copy tokens[p]..tokens[n-1] to tokens[0]..tokens[(n-1)-p], reset ptrs
|
||||
// p is last valid token; move nothing if p==n as we have no valid char
|
||||
System.arraycopy(tokens, p, tokens, 0, n - p); // shift n-p tokens from p to 0
|
||||
n = n - p;
|
||||
p = 0;
|
||||
}
|
||||
|
||||
lastTokenBufferStart = lastToken;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -190,13 +263,33 @@ public class UnbufferedTokenStream<T extends Token> implements TokenStream {
|
|||
|
||||
@Override
|
||||
public void seek(int index) { // seek to absolute index
|
||||
if (index == currentTokenIndex) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (index > currentTokenIndex) {
|
||||
sync(index - currentTokenIndex);
|
||||
index = Math.min(index, getBufferStartIndex() + n - 1);
|
||||
}
|
||||
|
||||
int bufferStartIndex = getBufferStartIndex();
|
||||
int i = index - bufferStartIndex;
|
||||
if ( i < 0 || i >= n ) {
|
||||
if ( i < 0 ) {
|
||||
throw new IllegalArgumentException("cannot seek to negative index " + index);
|
||||
}
|
||||
else if (i >= n) {
|
||||
throw new UnsupportedOperationException("seek to index outside buffer: "+
|
||||
index+" not in "+ bufferStartIndex +".."+(bufferStartIndex +n));
|
||||
}
|
||||
|
||||
p = i;
|
||||
currentTokenIndex = index;
|
||||
if (p == 0) {
|
||||
lastToken = lastTokenBufferStart;
|
||||
}
|
||||
else {
|
||||
lastToken = tokens[p-1];
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -233,23 +326,7 @@ public class UnbufferedTokenStream<T extends Token> implements TokenStream {
|
|||
return buf.toString();
|
||||
}
|
||||
|
||||
/** For testing. What's in moving window into token stream from
|
||||
* current index, LT(1) or tokens[p], to end of buffer?
|
||||
*/
|
||||
public List<T> getRemainingBuffer() {
|
||||
if ( n==0 ) return null;
|
||||
return (List<T>)Arrays.asList(Arrays.copyOfRange(tokens, p, n));
|
||||
}
|
||||
|
||||
/** For testing. What's in moving window buffer into data stream.
|
||||
* From 0..p-1 have been consume.
|
||||
*/
|
||||
public List<T> getBuffer() {
|
||||
if ( n==0 ) return null;
|
||||
return (List<T>)Arrays.asList(Arrays.copyOfRange(tokens, 0, n));
|
||||
}
|
||||
|
||||
public int getBufferStartIndex() {
|
||||
protected final int getBufferStartIndex() {
|
||||
return currentTokenIndex - p;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -229,7 +229,7 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
}
|
||||
|
||||
// if no edge, pop over to ATN interpreter, update DFA and return
|
||||
if ( s.edges == null || t >= s.edges.length || t <= CharStream.EOF ||
|
||||
if ( s.edges == null || t >= s.edges.length || t <= IntStream.EOF ||
|
||||
s.edges[t] == null )
|
||||
{
|
||||
ATN_failover++;
|
||||
|
@ -248,7 +248,7 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
captureSimState(prevAccept, input, s);
|
||||
// keep going unless we're at EOF; check if something else could match
|
||||
// EOF never in DFA
|
||||
if ( t==CharStream.EOF ) break;
|
||||
if ( t==IntStream.EOF ) break;
|
||||
}
|
||||
|
||||
consume(input);
|
||||
|
@ -297,7 +297,7 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
DFAState target = null;
|
||||
ATNConfigSet reach = null;
|
||||
if (s != null) {
|
||||
if ( s.edges != null && t < s.edges.length && t > CharStream.EOF ) {
|
||||
if ( s.edges != null && t < s.edges.length && t > IntStream.EOF ) {
|
||||
closure = s.configs;
|
||||
target = s.edges[t];
|
||||
if (target == ERROR) {
|
||||
|
@ -374,7 +374,7 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
}
|
||||
else {
|
||||
// if no accept and EOF is first char, return EOF
|
||||
if ( t==CharStream.EOF && input.index()==startIndex ) {
|
||||
if ( t==IntStream.EOF && input.index()==startIndex ) {
|
||||
return Token.EOF;
|
||||
}
|
||||
|
||||
|
@ -503,7 +503,7 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
|
||||
case Transition.NOT_SET:
|
||||
NotSetTransition nst = (NotSetTransition)trans;
|
||||
if (!nst.set.contains(t) && t!=CharStream.EOF) // ~set doesn't not match EOF
|
||||
if (!nst.set.contains(t) && t!=IntStream.EOF) // ~set doesn't not match EOF
|
||||
{
|
||||
if ( debug ) {
|
||||
System.out.format("match ~set %s\n", nst.set.toString(true));
|
||||
|
@ -515,7 +515,7 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
return null;
|
||||
|
||||
case Transition.WILDCARD:
|
||||
if (t != CharStream.EOF) {
|
||||
if (t != IntStream.EOF) {
|
||||
return trans.target;
|
||||
}
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ import org.antlr.runtime.CommonToken;
|
|||
import org.antlr.v4.codegen.CodeGenerator;
|
||||
import org.antlr.v4.misc.CharSupport;
|
||||
import org.antlr.v4.parse.ANTLRParser;
|
||||
import org.antlr.v4.runtime.CharStream;
|
||||
import org.antlr.v4.runtime.IntStream;
|
||||
import org.antlr.v4.runtime.atn.ATN;
|
||||
import org.antlr.v4.runtime.atn.ATNState;
|
||||
import org.antlr.v4.runtime.atn.ActionTransition;
|
||||
|
@ -271,7 +271,7 @@ public class LexerATNFactory extends ParserATNFactory {
|
|||
if ( node.getText().equals("EOF") ) {
|
||||
ATNState left = newState(node);
|
||||
ATNState right = newState(node);
|
||||
left.addTransition(new AtomTransition(right, CharStream.EOF));
|
||||
left.addTransition(new AtomTransition(right, IntStream.EOF));
|
||||
return new Handle(left, right);
|
||||
}
|
||||
return _ruleRef(node);
|
||||
|
|
|
@ -39,6 +39,7 @@ import org.antlr.v4.runtime.ANTLRInputStream;
|
|||
import org.antlr.v4.runtime.CharStream;
|
||||
import org.antlr.v4.runtime.CommonToken;
|
||||
import org.antlr.v4.runtime.CommonTokenStream;
|
||||
import org.antlr.v4.runtime.IntStream;
|
||||
import org.antlr.v4.runtime.Lexer;
|
||||
import org.antlr.v4.runtime.RuleContext;
|
||||
import org.antlr.v4.runtime.Token;
|
||||
|
@ -232,7 +233,7 @@ public abstract class BaseTest {
|
|||
tokenTypes.add(lg.typeToTokenList.get(ttype));
|
||||
}
|
||||
|
||||
if ( t==CharStream.EOF ) {
|
||||
if ( t==IntStream.EOF ) {
|
||||
hitEOF = true;
|
||||
}
|
||||
} while ( ttype!=Token.EOF );
|
||||
|
|
|
@ -1,30 +1,31 @@
|
|||
/*
|
||||
[The "BSD license"]
|
||||
Copyright (c) 2011 Terence Parr
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. The name of the author may not be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* [The "BSD license"]
|
||||
* Copyright (c) 2012 Terence Parr
|
||||
* Copyright (c) 2012 Sam Harwell
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
package org.antlr.v4.test;
|
||||
|
@ -32,78 +33,201 @@ package org.antlr.v4.test;
|
|||
import org.antlr.v4.runtime.CharStream;
|
||||
import org.antlr.v4.runtime.CommonTokenFactory;
|
||||
import org.antlr.v4.runtime.CommonTokenStream;
|
||||
import org.antlr.v4.runtime.IntStream;
|
||||
import org.antlr.v4.runtime.UnbufferedCharStream;
|
||||
import org.antlr.v4.runtime.misc.Interval;
|
||||
import org.antlr.v4.tool.LexerGrammar;
|
||||
import org.antlr.v4.tool.interp.LexerInterpreter;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
|
||||
public class TestUnbufferedCharStream extends BaseTest {
|
||||
@Test public void testNoChar() throws Exception {
|
||||
CharStream input = new UnbufferedCharStream(
|
||||
new StringReader("")
|
||||
);
|
||||
assertEquals(CharStream.EOF, input.LA(1));
|
||||
CharStream input = createStream("");
|
||||
assertEquals(IntStream.EOF, input.LA(1));
|
||||
assertEquals(IntStream.EOF, input.LA(2));
|
||||
}
|
||||
|
||||
/**
|
||||
* The {@link IntStream} interface does not specify the behavior when the
|
||||
* EOF symbol is consumed, but {@link UnbufferedCharStream} handles this
|
||||
* particular case by throwing an {@link IllegalStateException}.
|
||||
*/
|
||||
@Test(expected = IllegalStateException.class)
|
||||
public void testConsumeEOF() throws Exception {
|
||||
CharStream input = createStream("");
|
||||
assertEquals(IntStream.EOF, input.LA(1));
|
||||
input.consume();
|
||||
assertEquals(CharStream.EOF, input.LA(1));
|
||||
input.consume();
|
||||
assertEquals(CharStream.EOF, input.LA(1));
|
||||
}
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testNegativeSeek() {
|
||||
CharStream input = createStream("");
|
||||
input.seek(-1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSeekPastEOF() {
|
||||
CharStream input = createStream("");
|
||||
assertEquals(0, input.index());
|
||||
input.seek(1);
|
||||
assertEquals(0, input.index());
|
||||
}
|
||||
|
||||
/**
|
||||
* The {@link IntStream} interface does not specify the behavior when marks
|
||||
* are not released in the reversed order they were created, but
|
||||
* {@link UnbufferedCharStream} handles this case by throwing an
|
||||
* {@link IllegalStateException}.
|
||||
*/
|
||||
@Test(expected = IllegalStateException.class)
|
||||
public void testMarkReleaseOutOfOrder() {
|
||||
CharStream input = createStream("");
|
||||
int m1 = input.mark();
|
||||
int m2 = input.mark();
|
||||
input.release(m1);
|
||||
}
|
||||
|
||||
/**
|
||||
* The {@link IntStream} interface does not specify the behavior when a mark
|
||||
* is released twice, but {@link UnbufferedCharStream} handles this case by
|
||||
* throwing an {@link IllegalStateException}.
|
||||
*/
|
||||
@Test(expected = IllegalStateException.class)
|
||||
public void testMarkReleasedTwice() {
|
||||
CharStream input = createStream("");
|
||||
int m1 = input.mark();
|
||||
input.release(m1);
|
||||
input.release(m1);
|
||||
}
|
||||
|
||||
/**
|
||||
* The {@link IntStream} interface does not specify the behavior when a mark
|
||||
* is released twice, but {@link UnbufferedCharStream} handles this case by
|
||||
* throwing an {@link IllegalStateException}.
|
||||
*/
|
||||
@Test(expected = IllegalStateException.class)
|
||||
public void testNestedMarkReleasedTwice() {
|
||||
CharStream input = createStream("");
|
||||
int m1 = input.mark();
|
||||
int m2 = input.mark();
|
||||
input.release(m2);
|
||||
input.release(m2);
|
||||
}
|
||||
|
||||
/**
|
||||
* It is not valid to pass a mark to {@link IntStream#seek}, but
|
||||
* {@link UnbufferedCharStream} creates marks in such a way that this
|
||||
* invalid usage results in an {@link IllegalArgumentException}.
|
||||
*/
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testMarkPassedToSeek() {
|
||||
CharStream input = createStream("");
|
||||
int m1 = input.mark();
|
||||
input.seek(m1);
|
||||
}
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testSeekBeforeBufferStart() {
|
||||
CharStream input = createStream("xyz");
|
||||
input.consume();
|
||||
int m1 = input.mark();
|
||||
assertEquals(1, input.index());
|
||||
input.consume();
|
||||
input.seek(0);
|
||||
}
|
||||
|
||||
@Test(expected = UnsupportedOperationException.class)
|
||||
public void testGetTextBeforeBufferStart() {
|
||||
CharStream input = createStream("xyz");
|
||||
input.consume();
|
||||
int m1 = input.mark();
|
||||
assertEquals(1, input.index());
|
||||
input.getText(new Interval(0, 1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetTextInMarkedRange() {
|
||||
CharStream input = createStream("xyz");
|
||||
input.consume();
|
||||
int m1 = input.mark();
|
||||
assertEquals(1, input.index());
|
||||
input.consume();
|
||||
input.consume();
|
||||
assertEquals("yz", input.getText(new Interval(1, 2)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLastChar() {
|
||||
CharStream input = createStream("abcdef");
|
||||
|
||||
input.consume();
|
||||
assertEquals('a', input.LA(-1));
|
||||
|
||||
int m1 = input.mark();
|
||||
input.consume();
|
||||
input.consume();
|
||||
input.consume();
|
||||
assertEquals('d', input.LA(-1));
|
||||
|
||||
input.seek(2);
|
||||
assertEquals('b', input.LA(-1));
|
||||
|
||||
input.release(m1);
|
||||
input.seek(3);
|
||||
assertEquals('c', input.LA(-1));
|
||||
// this special case is not required by the IntStream interface, but
|
||||
// UnbufferedCharStream allows it so we have to make sure the resulting
|
||||
// state is consistent
|
||||
input.seek(2);
|
||||
assertEquals('b', input.LA(-1));
|
||||
}
|
||||
|
||||
@Test public void test1Char() throws Exception {
|
||||
UnbufferedCharStream input = new UnbufferedCharStream(
|
||||
new StringReader("x")
|
||||
);
|
||||
TestingUnbufferedCharStream input = createStream("x");
|
||||
assertEquals('x', input.LA(1));
|
||||
input.consume();
|
||||
assertEquals(CharStream.EOF, input.LA(1));
|
||||
assertEquals(IntStream.EOF, input.LA(1));
|
||||
String r = input.getRemainingBuffer();
|
||||
assertEquals("\uFFFF", r); // shouldn't include x
|
||||
assertEquals("x\uFFFF", input.getBuffer()); // whole buffer
|
||||
assertEquals("\uFFFF", input.getBuffer()); // whole buffer
|
||||
}
|
||||
|
||||
@Test public void test2Char() throws Exception {
|
||||
UnbufferedCharStream input = new UnbufferedCharStream(
|
||||
new StringReader("xy")
|
||||
);
|
||||
TestingUnbufferedCharStream input = createStream("xy");
|
||||
assertEquals('x', input.LA(1));
|
||||
input.consume();
|
||||
assertEquals('y', input.LA(1));
|
||||
assertEquals("y", input.getRemainingBuffer()); // shouldn't include x
|
||||
assertEquals("xy", input.getBuffer());
|
||||
assertEquals("y", input.getBuffer());
|
||||
input.consume();
|
||||
assertEquals(CharStream.EOF, input.LA(1));
|
||||
assertEquals(IntStream.EOF, input.LA(1));
|
||||
assertEquals("\uFFFF", input.getBuffer());
|
||||
}
|
||||
|
||||
@Test public void test2CharAhead() throws Exception {
|
||||
CharStream input = new UnbufferedCharStream(
|
||||
new StringReader("xy")
|
||||
);
|
||||
CharStream input = createStream("xy");
|
||||
assertEquals('x', input.LA(1));
|
||||
assertEquals('y', input.LA(2));
|
||||
assertEquals(CharStream.EOF, input.LA(3));
|
||||
assertEquals(IntStream.EOF, input.LA(3));
|
||||
}
|
||||
|
||||
@Test public void testBufferExpand() throws Exception {
|
||||
UnbufferedCharStream input = new UnbufferedCharStream(
|
||||
new StringReader("01234"),
|
||||
2 // buff size 2
|
||||
);
|
||||
TestingUnbufferedCharStream input = createStream("01234", 2);
|
||||
assertEquals('0', input.LA(1));
|
||||
assertEquals('1', input.LA(2));
|
||||
assertEquals('2', input.LA(3));
|
||||
assertEquals('3', input.LA(4));
|
||||
assertEquals('4', input.LA(5));
|
||||
assertEquals("01234", input.getBuffer());
|
||||
assertEquals(CharStream.EOF, input.LA(6));
|
||||
assertEquals(IntStream.EOF, input.LA(6));
|
||||
}
|
||||
|
||||
@Test public void testBufferWrapSize1() throws Exception {
|
||||
CharStream input = new UnbufferedCharStream(
|
||||
new StringReader("01234"),
|
||||
1 // buff size 1
|
||||
);
|
||||
CharStream input = createStream("01234", 1);
|
||||
assertEquals('0', input.LA(1));
|
||||
input.consume();
|
||||
assertEquals('1', input.LA(1));
|
||||
|
@ -114,14 +238,11 @@ public class TestUnbufferedCharStream extends BaseTest {
|
|||
input.consume();
|
||||
assertEquals('4', input.LA(1));
|
||||
input.consume();
|
||||
assertEquals(CharStream.EOF, input.LA(1));
|
||||
assertEquals(IntStream.EOF, input.LA(1));
|
||||
}
|
||||
|
||||
@Test public void testBufferWrapSize2() throws Exception {
|
||||
CharStream input = new UnbufferedCharStream(
|
||||
new StringReader("01234"),
|
||||
2 // buff size 2
|
||||
);
|
||||
CharStream input = createStream("01234", 2);
|
||||
assertEquals('0', input.LA(1));
|
||||
input.consume();
|
||||
assertEquals('1', input.LA(1));
|
||||
|
@ -132,54 +253,45 @@ public class TestUnbufferedCharStream extends BaseTest {
|
|||
input.consume();
|
||||
assertEquals('4', input.LA(1));
|
||||
input.consume();
|
||||
assertEquals(CharStream.EOF, input.LA(1));
|
||||
assertEquals(IntStream.EOF, input.LA(1));
|
||||
}
|
||||
|
||||
@Test public void test1Mark() throws Exception {
|
||||
UnbufferedCharStream input = new UnbufferedCharStream(
|
||||
new StringReader("xyz")
|
||||
);
|
||||
TestingUnbufferedCharStream input = createStream("xyz");
|
||||
int m = input.mark();
|
||||
assertEquals('x', input.LA(1));
|
||||
assertEquals('y', input.LA(2));
|
||||
assertEquals('z', input.LA(3));
|
||||
input.release(m);
|
||||
assertEquals(CharStream.EOF, input.LA(4));
|
||||
assertEquals(IntStream.EOF, input.LA(4));
|
||||
assertEquals("xyz\uFFFF", input.getBuffer());
|
||||
}
|
||||
|
||||
@Test public void test1MarkWithConsumesInSequence() throws Exception {
|
||||
UnbufferedCharStream input = new UnbufferedCharStream(
|
||||
new StringReader("xyz")
|
||||
);
|
||||
TestingUnbufferedCharStream input = createStream("xyz");
|
||||
int m = input.mark();
|
||||
input.consume(); // x, moves to y
|
||||
input.consume(); // y
|
||||
input.consume(); // z, moves to EOF
|
||||
assertEquals(CharStream.EOF, input.LA(1));
|
||||
assertEquals(IntStream.EOF, input.LA(1));
|
||||
assertEquals("xyz\uFFFF", input.getBuffer());
|
||||
input.release(m); // wipes buffer
|
||||
assertEquals("\uFFFF", input.getBuffer());
|
||||
}
|
||||
|
||||
@Test public void test2Mark() throws Exception {
|
||||
UnbufferedCharStream input = new UnbufferedCharStream(
|
||||
new StringReader("xyz"),
|
||||
100
|
||||
);
|
||||
TestingUnbufferedCharStream input = createStream("xyz", 100);
|
||||
assertEquals('x', input.LA(1));
|
||||
input.consume(); // reset buffer index (p) to 0
|
||||
int m1 = input.mark();
|
||||
assertEquals(1, m1);
|
||||
assertEquals('y', input.LA(1));
|
||||
input.consume();
|
||||
int m2 = input.mark();
|
||||
assertEquals(2, m2); // 2nd consume leaves p==2
|
||||
assertEquals("xyz", input.getBuffer());
|
||||
assertEquals("yz", input.getBuffer());
|
||||
input.release(m2); // drop to 1 marker
|
||||
input.consume();
|
||||
input.release(m1); // shifts remaining char to beginning
|
||||
assertEquals(CharStream.EOF, input.LA(1));
|
||||
assertEquals(IntStream.EOF, input.LA(1));
|
||||
assertEquals("\uFFFF", input.getBuffer());
|
||||
}
|
||||
|
||||
|
@ -195,9 +307,7 @@ public class TestUnbufferedCharStream extends BaseTest {
|
|||
"WS : ' '+;\n");
|
||||
// Tokens: 012345678901234567
|
||||
// Input: x = 3 * 0 + 2 * 0;
|
||||
UnbufferedCharStream input = new UnbufferedCharStream(
|
||||
new StringReader("x = 302 * 91 + 20234234 * 0;")
|
||||
);
|
||||
TestingUnbufferedCharStream input = createStream("x = 302 * 91 + 20234234 * 0;");
|
||||
LexerInterpreter lexEngine = new LexerInterpreter(g);
|
||||
// copy text into tokens from char stream
|
||||
lexEngine.setTokenFactory(new CommonTokenFactory(true));
|
||||
|
@ -217,4 +327,40 @@ public class TestUnbufferedCharStream extends BaseTest {
|
|||
" [@17,27:27=';',<3>,1:27], [@18,28:27='',<-1>,1:28]]";
|
||||
assertEquals(expecting, tokens.getTokens().toString());
|
||||
}
|
||||
|
||||
protected static TestingUnbufferedCharStream createStream(String text) {
|
||||
return new TestingUnbufferedCharStream(new StringReader(text));
|
||||
}
|
||||
|
||||
protected static TestingUnbufferedCharStream createStream(String text, int bufferSize) {
|
||||
return new TestingUnbufferedCharStream(new StringReader(text), bufferSize);
|
||||
}
|
||||
|
||||
protected static class TestingUnbufferedCharStream extends UnbufferedCharStream {
|
||||
|
||||
public TestingUnbufferedCharStream(Reader input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
public TestingUnbufferedCharStream(Reader input, int bufferSize) {
|
||||
super(input, bufferSize);
|
||||
}
|
||||
|
||||
/** For testing. What's in moving window into data stream from
|
||||
* current index, LA(1) or data[p], to end of buffer?
|
||||
*/
|
||||
public String getRemainingBuffer() {
|
||||
if ( n==0 ) return "";
|
||||
return new String(data,p,n-p);
|
||||
}
|
||||
|
||||
/** For testing. What's in moving window buffer into data stream.
|
||||
* From 0..p-1 have been consume.
|
||||
*/
|
||||
public String getBuffer() {
|
||||
if ( n==0 ) return "";
|
||||
return new String(data,0,n);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,8 +1,39 @@
|
|||
/*
|
||||
* [The "BSD license"]
|
||||
* Copyright (c) 2012 Terence Parr
|
||||
* Copyright (c) 2012 Sam Harwell
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
package org.antlr.v4.test;
|
||||
|
||||
import org.antlr.v4.runtime.ANTLRInputStream;
|
||||
import org.antlr.v4.runtime.CharStream;
|
||||
import org.antlr.v4.runtime.Token;
|
||||
import org.antlr.v4.runtime.TokenSource;
|
||||
import org.antlr.v4.runtime.TokenStream;
|
||||
import org.antlr.v4.runtime.UnbufferedTokenStream;
|
||||
import org.antlr.v4.tool.LexerGrammar;
|
||||
|
@ -10,6 +41,9 @@ import org.antlr.v4.tool.interp.LexerInterpreter;
|
|||
import org.junit.Test;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
public class TestUnbufferedTokenStream extends BaseTest {
|
||||
@Test public void testLookahead() throws Exception {
|
||||
|
@ -56,7 +90,7 @@ public class TestUnbufferedTokenStream extends BaseTest {
|
|||
);
|
||||
LexerInterpreter lexEngine = new LexerInterpreter(g);
|
||||
lexEngine.setInput(input);
|
||||
UnbufferedTokenStream<Token> tokens = new UnbufferedTokenStream<Token>(lexEngine);
|
||||
TestingUnbufferedTokenStream<Token> tokens = new TestingUnbufferedTokenStream<Token>(lexEngine);
|
||||
|
||||
assertEquals("[[@0,0:0='x',<1>,1:0]]", tokens.getBuffer().toString());
|
||||
assertEquals("x", tokens.LT(1).getText());
|
||||
|
@ -94,7 +128,7 @@ public class TestUnbufferedTokenStream extends BaseTest {
|
|||
);
|
||||
LexerInterpreter lexEngine = new LexerInterpreter(g);
|
||||
lexEngine.setInput(input);
|
||||
UnbufferedTokenStream<Token> tokens = new UnbufferedTokenStream<Token>(lexEngine);
|
||||
TestingUnbufferedTokenStream<Token> tokens = new TestingUnbufferedTokenStream<Token>(lexEngine);
|
||||
|
||||
int m = tokens.mark();
|
||||
assertEquals("[[@0,0:0='x',<1>,1:0]]", tokens.getBuffer().toString());
|
||||
|
@ -130,7 +164,7 @@ public class TestUnbufferedTokenStream extends BaseTest {
|
|||
);
|
||||
LexerInterpreter lexEngine = new LexerInterpreter(g);
|
||||
lexEngine.setInput(input);
|
||||
UnbufferedTokenStream<Token> tokens = new UnbufferedTokenStream<Token>(lexEngine);
|
||||
TestingUnbufferedTokenStream<Token> tokens = new TestingUnbufferedTokenStream<Token>(lexEngine);
|
||||
|
||||
int m = tokens.mark();
|
||||
assertEquals("[[@0,0:0='x',<1>,1:0]]", tokens.getBuffer().toString());
|
||||
|
@ -158,4 +192,34 @@ public class TestUnbufferedTokenStream extends BaseTest {
|
|||
tokens.getBuffer().toString());
|
||||
tokens.release(m);
|
||||
}
|
||||
|
||||
protected static class TestingUnbufferedTokenStream<T extends Token> extends UnbufferedTokenStream<T> {
|
||||
|
||||
public TestingUnbufferedTokenStream(TokenSource tokenSource) {
|
||||
super(tokenSource);
|
||||
}
|
||||
|
||||
/** For testing. What's in moving window into token stream from
|
||||
* current index, LT(1) or tokens[p], to end of buffer?
|
||||
*/
|
||||
protected List<? extends Token> getRemainingBuffer() {
|
||||
if ( n==0 ) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
return Arrays.asList(tokens).subList(p, n);
|
||||
}
|
||||
|
||||
/** For testing. What's in moving window buffer into data stream.
|
||||
* From 0..p-1 have been consume.
|
||||
*/
|
||||
protected List<? extends Token> getBuffer() {
|
||||
if ( n==0 ) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
return Arrays.asList(tokens).subList(0, n);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue