Rewrote the unbuffered token stream to use a type-adjusted version of the unbuffered character stream. Simpler and I get to remove fast queue and lookahead string classes. These unbuffered streams always prime the pump with the 1st single.

Added a unit test for the unbuffered token stream.

Made sure that the unbuffered streams move forward always on a consume

Removed the reset method from the unbuffered streams because it's meaningless to reset to the beginning of the buffer.
This commit is contained in:
Terence Parr 2012-07-01 09:39:11 -07:00
parent 5c69d31e88
commit f80166b39c
5 changed files with 180 additions and 386 deletions

View File

@ -43,7 +43,7 @@ package org.antlr.v4.runtime;
* whitespace and comments etc. to the parser on a hidden channel (i.e.,
* you set $channel instead of calling skip() in lexer rules.)
*
* @see UnbufferedTokenStream
* @see OldUnbufferedTokenStream
* @see BufferedTokenStream
*/
public class CommonTokenStream extends BufferedTokenStream<Token> {

View File

@ -84,21 +84,15 @@ public class UnbufferedCharStream implements CharStream {
public UnbufferedCharStream(InputStream input, int bufferSize) {
this.input = new InputStreamReader(input);
data = new char[bufferSize];
fill(1); // prime
}
public UnbufferedCharStream(Reader input, int bufferSize) {
this.input = input;
data = new char[bufferSize];
fill(1); // prime
}
public void reset() {
p = 0;
earliestMarker = -1;
currentCharIndex = 0;
bufferStartIndex = 0;
n = 0;
}
@Override
public void consume() {
p++;
@ -111,6 +105,7 @@ public class UnbufferedCharStream implements CharStream {
n = 0;
bufferStartIndex = currentCharIndex;
}
sync(1);
}
/** Make sure we have 'need' elements from current position p. Last valid

View File

@ -1,135 +1,212 @@
/*
[The "BSD license"]
Copyright (c) 2011 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.runtime;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.misc.LookaheadStream;
/** A token stream that pulls tokens from the source on-demand and
* without tracking a complete buffer of the tokens. This stream buffers
* the minimum number of tokens possible.
*
* You can't use this stream if you pass whitespace or other off-channel
* tokens to the parser. The stream can't ignore off-channel tokens.
*
* You can only look backwards 1 token: LT(-1).
*
* Use this when you need to read from a socket or other infinite stream.
*
* @see BufferedTokenStream
* @see CommonTokenStream
*/
public class UnbufferedTokenStream<T extends Token>
extends LookaheadStream<T>
implements TokenStream
{
import java.util.Arrays;
import java.util.List;
public class UnbufferedTokenStream<T extends Token> implements TokenStream {
protected TokenSource tokenSource;
protected int tokenIndex = 0; // simple counter to set token index in tokens
/** A moving window buffer of the data being scanned. While there's a
* marker, we keep adding to buffer. Otherwise, consume() resets
* so we start filling at index 0 again.
*/
protected Token[] tokens;
/** How many tokens are actually in the buffer; this is not
* the buffer size, that's tokens.length.
*/
protected int n;
/** 0..n-1 index into tokens of next token; tokens[p] is LA(1). */
protected int p=0;
protected int earliestMarker = -1;
/** Absolute token index. It's the index of the token about to be
* read via LA(1). Goes from 0 to numtokens-1 in entire stream.
*/
protected int currentTokenIndex = 0; // simple counter to set token index in tokens
/** Buf is window into stream. This is absolute token index into entire
* stream of tokens[0]
*/
protected int bufferStartTokenIndex = 0;
/** Skip tokens on any channel but this one; this is how we skip whitespace... */
// TODO: skip off-channel tokens!!!
protected int channel = Token.DEFAULT_CHANNEL;
public UnbufferedTokenStream(TokenSource tokenSource) {
this(tokenSource, 256);
}
public UnbufferedTokenStream(TokenSource tokenSource, int bufferSize) {
this.tokenSource = tokenSource;
tokens = new Token[bufferSize];
fill(1); // prime the pump
}
@Override
public T nextElement() {
T t = (T)tokenSource.nextToken();
if ( t instanceof WritableToken ) {
((WritableToken)t).setTokenIndex(tokenIndex);
}
tokenIndex++;
return t;
@Override
public Token get(int i) {
return null;
}
@Override
public boolean isEOF(Token o) {
return false;
}
@Override
public Token LT(int i) {
sync(i);
int index = p + i - 1;
if ( index < 0 || index > n ) throw new IndexOutOfBoundsException();
return tokens[index];
}
@Override
public TokenSource getTokenSource() { return tokenSource; }
@Override
public int LA(int i) { return LT(i).getType(); }
@Override
public TokenSource getTokenSource() {
return null;
}
@Override
public String getText() {
return null;
}
@Override
public String getText(RuleContext ctx) {
return null;
}
@Override
public String getText(Token start, Token stop) {
return null;
}
@Override
public void consume() {
p++;
currentTokenIndex++;
// have we hit end of buffer when no markers?
if ( p==n && earliestMarker < 0 ) {
// if so, it's an opportunity to start filling at index 0 again
// System.out.println("p=="+n+", no marker; reset buf start index="+currentCharIndex);
p = 0;
n = 0;
bufferStartTokenIndex = currentTokenIndex;
}
sync(1);
}
/** Make sure we have 'need' elements from current position p. Last valid
* p index is tokens.size()-1. p+need-1 is the tokens index 'need' elements
* ahead. If we need 1 element, (p+1-1)==p must be < tokens.size().
*/
protected void sync(int want) {
int need = (p+want-1) - n + 1; // how many more elements we need?
if ( need > 0 ) fill(need); // out of elements?
}
/** add n elements to buffer */
public void fill(int n) {
for (int i=1; i<=n; i++) {
Token t = tokenSource.nextToken();
if ( t instanceof WritableToken ) {
((WritableToken)t).setTokenIndex(currentTokenIndex);
}
add(t);
}
}
protected void add(Token t) {
if ( n>=tokens.length ) {
Token[] newtokens = new Token[tokens.length*2]; // resize
System.arraycopy(tokens, 0, newtokens, 0, tokens.length);
tokens = newtokens;
}
tokens[n++] = t;
}
/** Return a marker that we can release later. Marker happens to be
* index into buffer (not index()).
*/
@Override
public int mark() {
int m = p;
if ( p < earliestMarker) {
// they must have done seek to before min marker
throw new IllegalArgumentException("can't set marker earlier than previous existing marker: "+p+"<"+ earliestMarker);
}
if ( earliestMarker < 0 ) earliestMarker = m; // set first marker
return m;
}
@Override
public void release(int marker) {
// release is noop unless we remove earliest. then we don't need to
// keep anything in buffer. We only care about earliest. Releasing
// marker other than earliest does nothing as we can just keep in
// buffer.
if ( marker < earliestMarker || marker >= n ) {
throw new IllegalArgumentException("invalid marker: "+
marker+" not in "+0+".."+n);
}
if ( marker == earliestMarker) earliestMarker = -1;
}
@Override
public int index() {
return p + bufferStartTokenIndex;
}
@Override
public void seek(int index) {
// index == to bufferStartIndex should set p to 0
int i = index - bufferStartTokenIndex;
if ( i < 0 || i >= n ) {
throw new UnsupportedOperationException("seek to index outside buffer: "+
index+" not in "+ bufferStartTokenIndex +".."+(bufferStartTokenIndex +n));
}
p = i;
}
@Override
public int size() {
throw new UnsupportedOperationException("Unbuffered stream cannot know its size");
}
@Override
public String getSourceName() {
return tokenSource.getSourceName();
}
@Override
public String getText(Interval interval) {
int bufferStartIndex = currentElementIndex - p;
int bufferStopIndex = bufferStartIndex + data.size() - 1;
int bufferStartIndex = currentTokenIndex - p;
int bufferStopIndex = bufferStartIndex + tokens.length - 1;
int start = interval.a;
int stop = interval.b;
if (start < bufferStartIndex || stop > bufferStopIndex) {
throw new UnsupportedOperationException();
throw new UnsupportedOperationException("interval "+interval+" not in token buffer window: "+
bufferStartIndex+".."+bufferStopIndex);
}
StringBuilder buf = new StringBuilder();
for (int i = start; i <= stop; i++) {
T t = data.get(i - bufferStartIndex);
Token t = tokens[i - bufferStartIndex];
buf.append(t.getText());
}
return buf.toString();
}
@Override
public String getText() {
return getText(Interval.of(0,index()));
/** For testing. What's in moving window into tokens stream? */
public List<T> getBuffer() {
if ( n==0 ) return null;
return (List<T>)Arrays.asList(Arrays.copyOfRange(tokens, 0, n));
}
@Override
public String getText(RuleContext ctx) {
return getText(ctx.getSourceInterval());
}
@Override
public String getText(Token start, Token stop) {
if ( start!=null && stop!=null ) {
return getText(Interval.of(start.getTokenIndex(), stop.getTokenIndex()));
}
return null;
}
@Override
public int LA(int i) { return LT(i).getType(); }
@Override
public T get(int i) {
int bufferStartIndex = currentElementIndex - p;
int bufferStopIndex = bufferStartIndex + data.size() - 1;
if (i < bufferStartIndex || i > bufferStopIndex) {
throw new UnsupportedOperationException();
}
return data.get(i - bufferStartIndex);
}
@Override
public String getSourceName() { return tokenSource.getSourceName(); }
}

View File

@ -1,98 +0,0 @@
/*
[The "BSD license"]
Copyright (c) 2011 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.runtime.misc;
import java.util.ArrayList;
import java.util.List;
import java.util.NoSuchElementException;
/** A queue that can dequeue and get(i) in O(1) and grow arbitrarily large.
* A linked list is fast at dequeue but slow at get(i). An array is
* the reverse. This is O(1) for both operations.
*
* List grows until you dequeue last element at end of buffer. Then
* it resets to start filling at 0 again. If adds/removes are balanced, the
* buffer will not grow too large.
*
* No iterator stuff as that's not how we'll use it.
*/
public class FastQueue<T> {
/** dynamically-sized buffer of elements */
protected List<T> data = new ArrayList<T>();
/** index of next element to fill */
protected int p = 0;
public void reset() { clear(); }
public void clear() { p = 0; data.clear(); }
/** Get and remove first element in queue */
public T remove() {
T o = elementAt(0);
p++;
// have we hit end of buffer?
if ( p == data.size() ) {
// if so, it's an opportunity to start filling at index 0 again
clear(); // size goes to 0, but retains memory
}
return o;
}
public void add(T o) { data.add(o); }
public int size() { return data.size() - p; }
public T head() { return elementAt(0); }
/** Return element i elements ahead of current element. i==0 gets
* current element. This is not an absolute index into the data list
* since p defines the start of the real list.
*/
public T elementAt(int i) {
int absIndex = p + i;
if ( absIndex >= data.size() ) {
throw new NoSuchElementException("queue index "+ absIndex +" > last index "+(data.size()-1));
}
if ( absIndex < 0 ) {
throw new NoSuchElementException("queue index "+ absIndex +" < 0");
}
return data.get(absIndex);
}
/** Return string of current buffer contents; non-destructive */
@Override
public String toString() {
StringBuilder buf = new StringBuilder();
int n = size();
for (int i=0; i<n; i++) {
buf.append(elementAt(i));
if ( (i+1)<n ) buf.append(" ");
}
return buf.toString();
}
}

View File

@ -1,180 +0,0 @@
/*
[The "BSD license"]
Copyright (c) 2011 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.runtime.misc;
import java.util.NoSuchElementException;
/** A lookahead queue that knows how to mark/release locations
* in the buffer for backtracking purposes. Any markers force the FastQueue
* superclass to keep all tokens until no more markers; then can reset
* to avoid growing a huge buffer.
*/
public abstract class LookaheadStream<T> extends FastQueue<T> {
public static final int UNINITIALIZED_EOF_ELEMENT_INDEX = Integer.MAX_VALUE;
/** Absolute token index. It's the index of the symbol about to be
* read via LT(1). Goes from 0 to numtokens.
*/
protected int currentElementIndex = 0;
protected T prevElement;
/** Track object returned by nextElement upon end of stream;
* Return it later when they ask for LT passed end of input.
*/
public T eof = null;
/** tracks how deep mark() calls are nested */
protected int markDepth = 0;
@Override
public void reset() {
super.reset();
currentElementIndex = 0;
p = 0;
prevElement=null;
}
/** Implement nextElement to supply a stream of elements to this
* lookahead buffer. Return eof upon end of the stream we're pulling from.
*/
public abstract T nextElement();
public abstract boolean isEOF(T o);
/** Get and remove first element in queue; override FastQueue.remove();
* it's the same, just checks for backtracking.
*/
@Override
public T remove() {
T o = elementAt(0);
p++;
// have we hit end of buffer and not backtracking?
if ( p == data.size() && markDepth==0 ) {
// if so, it's an opportunity to start filling at index 0 again
clear(); // size goes to 0, but retains memory
}
return o;
}
/** Make sure we have at least one element to remove, even if EOF */
public void consume() {
syncAhead(1);
T element = remove();
if (markDepth == 0) {
prevElement = element;
}
currentElementIndex++;
}
/** Make sure we have 'need' elements from current position p. Last valid
* p index is data.size()-1. p+need-1 is the data index 'need' elements
* ahead. If we need 1 element, (p+1-1)==p must be < data.size().
*/
protected void syncAhead(int need) {
int n = (p+need-1) - data.size() + 1; // how many more elements we need?
if ( n > 0 ) fill(n); // out of elements?
}
/** add n elements to buffer */
public void fill(int n) {
for (int i=1; i<=n; i++) {
T o = nextElement();
if ( isEOF(o) ) eof = o;
data.add(o);
}
}
/** Size of entire stream is unknown; we only know buffer size from FastQueue */
@Override
public int size() { throw new UnsupportedOperationException("streams are of unknown size"); }
public T LT(int k) {
if ( k==0 ) {
return null;
}
if ( k<0 ) return LB(-k);
//System.out.print("LT(p="+p+","+k+")=");
syncAhead(k);
if ( (p+k-1) > data.size() ) return eof;
return elementAt(k-1);
}
public int index() { return currentElementIndex; }
public int mark() {
markDepth++;
return markDepth;
}
public void release(int marker) {
// no resources to release
}
/** Seek to a 0-indexed position within data buffer. Can't handle
* case where you seek beyond end of existing buffer. Normally used
* to seek backwards in the buffer. Does not force loading of nodes.
* Doesn't seek to absolute position in input stream since this stream
* is unbuffered. Seeks only into our moving window of elements.
*/
public void seek(int index) {
int bufferStartIndex = currentElementIndex - p;
if (index < bufferStartIndex) {
throw new UnsupportedOperationException("Cannot seek to the specified index.");
}
if (index > currentElementIndex) {
int startElementIndex = currentElementIndex;
for (int i = 0; i < index - startElementIndex; i++) {
consume();
}
}
else {
currentElementIndex = index;
p = index - bufferStartIndex;
}
}
protected T LB(int k) {
int bufferIndex = p - k;
if (bufferIndex == -1) {
return prevElement;
}
else if (bufferIndex >= 0 && bufferIndex < data.size()) {
return data.get(bufferIndex);
}
throw new NoSuchElementException("can't look backwards more than one token in this stream");
}
@Override
public String toString() {
return "a LookaheadStream";
}
}