Rewrote the unbuffered token stream to use a type-adjusted version of the unbuffered character stream. Simpler and I get to remove fast queue and lookahead string classes. These unbuffered streams always prime the pump with the 1st single.

Added a unit test for the unbuffered token stream. Made sure that the unbuffered streams move forward always on a consume Removed the reset method from the unbuffered streams because it's meaningless to reset to the beginning of the buffer.
2012-07-01 09:39:11 -07:00 · 2012-07-01 09:39:11 -07:00 · f80166b39c
parent 5c69d31e88
commit f80166b39c
5 changed files with 180 additions and 386 deletions
--- a/runtime/Java/src/org/antlr/v4/runtime/CommonTokenStream.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/CommonTokenStream.java
@ -43,7 +43,7 @@ package org.antlr.v4.runtime;
 *  whitespace and comments etc. to the parser on a hidden channel (i.e.,
 *  you set $channel instead of calling skip() in lexer rules.)
 *
- *  @see UnbufferedTokenStream
+ *  @see OldUnbufferedTokenStream
 *  @see BufferedTokenStream
 */
 public class CommonTokenStream extends BufferedTokenStream<Token> {
--- a/runtime/Java/src/org/antlr/v4/runtime/UnbufferedCharStream.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/UnbufferedCharStream.java
@ -84,21 +84,15 @@ public class UnbufferedCharStream implements CharStream {
    public UnbufferedCharStream(InputStream input, int bufferSize) {
   		this.input = new InputStreamReader(input);
        data = new char[bufferSize];
 		fill(1); // prime
   	}
   	public UnbufferedCharStream(Reader input, int bufferSize) {
   		this.input = input;
        data = new char[bufferSize];
 		fill(1); // prime
   	}
 	public void reset() {
 		p = 0;
 		earliestMarker = -1;
 		currentCharIndex = 0;
        bufferStartIndex = 0;
 		n = 0;
 	}
 	@Override
 	public void consume() {
 		p++;
@ -111,6 +105,7 @@ public class UnbufferedCharStream implements CharStream {
 			n = 0;
            bufferStartIndex = currentCharIndex;
        }
 		sync(1);
    }
 	/** Make sure we have 'need' elements from current position p. Last valid
--- a/runtime/Java/src/org/antlr/v4/runtime/UnbufferedTokenStream.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/UnbufferedTokenStream.java
@ -1,135 +1,212 @@
 /*
 [The "BSD license"]
 Copyright (c) 2011 Terence Parr
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
 are met:
 1. Redistributions of source code must retain the above copyright
    notice, this list of conditions and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions and the following disclaimer in the
    documentation and/or other materials provided with the distribution.
 3. The name of the author may not be used to endorse or promote products
    derived from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 package org.antlr.v4.runtime;
 import org.antlr.v4.runtime.misc.Interval;
 import org.antlr.v4.runtime.misc.LookaheadStream;
-/** A token stream that pulls tokens from the source on-demand and
+import java.util.Arrays;
- *  without tracking a complete buffer of the tokens. This stream buffers
+import java.util.List;
- *  the minimum number of tokens possible.
+
- *
+public class UnbufferedTokenStream<T extends Token> implements TokenStream {
 *  You can't use this stream if you pass whitespace or other off-channel
 *  tokens to the parser. The stream can't ignore off-channel tokens.
 *
 *  You can only look backwards 1 token: LT(-1).
 *
 *  Use this when you need to read from a socket or other infinite stream.
 *
 *  @see BufferedTokenStream
 *  @see CommonTokenStream
 */
 public class UnbufferedTokenStream<T extends Token>
        extends LookaheadStream<T>
        implements TokenStream
 {
 	protected TokenSource tokenSource;
-    protected int tokenIndex = 0; // simple counter to set token index in tokens
+
 	/** A moving window buffer of the data being scanned. While there's a
 	 *  marker, we keep adding to buffer.  Otherwise, consume() resets
 	 *  so we start filling at index 0 again.
 	 */
 	protected Token[] tokens;
 	/** How many tokens are actually in the buffer; this is not
 	 *  the buffer size, that's tokens.length.
 	 */
 	protected int n;
 	/** 0..n-1 index into tokens of next token; tokens[p] is LA(1). */
 	protected int p=0;
 	protected int earliestMarker = -1;
 	/** Absolute token index. It's the index of the token about to be
 	 *  read via LA(1). Goes from 0 to numtokens-1 in entire stream.
 	 */
 	protected int currentTokenIndex = 0; // simple counter to set token index in tokens
 	/** Buf is window into stream. This is absolute token index into entire
 	 *  stream of tokens[0]
 	 */
 	protected int bufferStartTokenIndex = 0;
    /** Skip tokens on any channel but this one; this is how we skip whitespace... */
 	//  TODO: skip off-channel tokens!!!
    protected int channel = Token.DEFAULT_CHANNEL;
 	public UnbufferedTokenStream(TokenSource tokenSource) {
 		this(tokenSource, 256);
 	}
 	public UnbufferedTokenStream(TokenSource tokenSource, int bufferSize) {
 		this.tokenSource = tokenSource;
 		tokens = new Token[bufferSize];
 		fill(1); // prime the pump
 	}
-    @Override
+	@Override
-	public T nextElement() {
+	public Token get(int i) {
-		T t = (T)tokenSource.nextToken();
+		return null;
        if ( t instanceof WritableToken ) {
            ((WritableToken)t).setTokenIndex(tokenIndex);
        }
        tokenIndex++;
 		return t;
 	}
-    @Override
+	@Override
-    public boolean isEOF(Token o) {
+	public Token LT(int i) {
-        return false;
+		sync(i);
-    }
+		int index = p + i - 1;
 		if ( index < 0 || index > n ) throw new IndexOutOfBoundsException();
 		return tokens[index];
 	}
-    @Override
+	@Override
-	public TokenSource getTokenSource() { return tokenSource; }
+	public int LA(int i) { return LT(i).getType(); }
 	@Override
 	public TokenSource getTokenSource() {
 		return null;
 	}
 	@Override
 	public String getText() {
 		return null;
 	}
 	@Override
 	public String getText(RuleContext ctx) {
 		return null;
 	}
 	@Override
 	public String getText(Token start, Token stop) {
 		return null;
 	}
 	@Override
 	public void consume() {
 		p++;
 		currentTokenIndex++;
 		// have we hit end of buffer when no markers?
 		if ( p==n && earliestMarker < 0 ) {
 			// if so, it's an opportunity to start filling at index 0 again
 			// System.out.println("p=="+n+", no marker; reset buf start index="+currentCharIndex);
 			p = 0;
 			n = 0;
 			bufferStartTokenIndex = currentTokenIndex;
 		}
 		sync(1);
 	}
 	/** Make sure we have 'need' elements from current position p. Last valid
 	 *  p index is tokens.size()-1.  p+need-1 is the tokens index 'need' elements
 	 *  ahead.  If we need 1 element, (p+1-1)==p must be < tokens.size().
 	 */
 	protected void sync(int want) {
 		int need = (p+want-1) - n + 1; // how many more elements we need?
 		if ( need > 0 ) fill(need);    // out of elements?
 	}
 	/** add n elements to buffer */
 	public void fill(int n) {
 		for (int i=1; i<=n; i++) {
 			Token t = tokenSource.nextToken();
 			if ( t instanceof WritableToken ) {
 				((WritableToken)t).setTokenIndex(currentTokenIndex);
 			}
 			add(t);
 		}
 	}
 	protected void add(Token t) {
 		if ( n>=tokens.length ) {
 			Token[] newtokens = new Token[tokens.length*2]; // resize
 			System.arraycopy(tokens, 0, newtokens, 0, tokens.length);
 			tokens = newtokens;
 		}
 		tokens[n++] = t;
 	}
 	/** Return a marker that we can release later.  Marker happens to be
 	 *  index into buffer (not index()).
 	 */
 	@Override
 	public int mark() {
 		int m = p;
 		if ( p < earliestMarker) {
 			// they must have done seek to before min marker
 			throw new IllegalArgumentException("can't set marker earlier than previous existing marker: "+p+"<"+ earliestMarker);
 		}
 		if ( earliestMarker < 0 ) earliestMarker = m; // set first marker
 		return m;
 	}
 	@Override
 	public void release(int marker) {
 		// release is noop unless we remove earliest. then we don't need to
 		// keep anything in buffer. We only care about earliest. Releasing
 		// marker other than earliest does nothing as we can just keep in
 		// buffer.
 		if ( marker < earliestMarker || marker >= n ) {
 			throw new IllegalArgumentException("invalid marker: "+
 											   marker+" not in "+0+".."+n);
 		}
 		if ( marker == earliestMarker) earliestMarker = -1;
 	}
 	@Override
 	public int index() {
 		return p + bufferStartTokenIndex;
 	}
 	@Override
 	public void seek(int index) {
 		// index == to bufferStartIndex should set p to 0
 		int i = index - bufferStartTokenIndex;
 		if ( i < 0 || i >= n ) {
 			throw new UnsupportedOperationException("seek to index outside buffer: "+
 													index+" not in "+ bufferStartTokenIndex +".."+(bufferStartTokenIndex +n));
 		}
 		p = i;
 	}
 	@Override
 	public int size() {
 		throw new UnsupportedOperationException("Unbuffered stream cannot know its size");
 	}
 	@Override
 	public String getSourceName() {
 		return tokenSource.getSourceName();
 	}
 	@Override
 	public String getText(Interval interval) {
-		int bufferStartIndex = currentElementIndex - p;
+		int bufferStartIndex = currentTokenIndex - p;
-		int bufferStopIndex = bufferStartIndex + data.size() - 1;
+		int bufferStopIndex = bufferStartIndex + tokens.length - 1;
 		int start = interval.a;
 		int stop = interval.b;
 		if (start < bufferStartIndex || stop > bufferStopIndex) {
-			throw new UnsupportedOperationException();
+			throw new UnsupportedOperationException("interval "+interval+" not in token buffer window: "+
 													bufferStartIndex+".."+bufferStopIndex);
 		}
 		StringBuilder buf = new StringBuilder();
 		for (int i = start; i <= stop; i++) {
-			T t = data.get(i - bufferStartIndex);
+			Token t = tokens[i - bufferStartIndex];
 			buf.append(t.getText());
 		}
 		return buf.toString();
 	}
-	@Override
+	/** For testing.  What's in moving window into tokens stream? */
-	public String getText() {
+	public List<T> getBuffer() {
-		return getText(Interval.of(0,index()));
+		if ( n==0 ) return null;
 		return (List<T>)Arrays.asList(Arrays.copyOfRange(tokens, 0, n));
 	}
 	@Override
 	public String getText(RuleContext ctx) {
 		return getText(ctx.getSourceInterval());
 	}
 	@Override
 	public String getText(Token start, Token stop) {
 		if ( start!=null && stop!=null ) {
 			return getText(Interval.of(start.getTokenIndex(), stop.getTokenIndex()));
 		}
 		return null;
 	}
 	@Override
    public int LA(int i) { return LT(i).getType(); }
    @Override
    public T get(int i) {
 		int bufferStartIndex = currentElementIndex - p;
 		int bufferStopIndex = bufferStartIndex + data.size() - 1;
 		if (i < bufferStartIndex || i > bufferStopIndex) {
 			throw new UnsupportedOperationException();
 		}
 		return data.get(i - bufferStartIndex);
    }
    @Override
 	public String getSourceName() {	return tokenSource.getSourceName();	}
 }
--- a/runtime/Java/src/org/antlr/v4/runtime/misc/FastQueue.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/misc/FastQueue.java
@ -1,98 +0,0 @@
 /*
 [The "BSD license"]
 Copyright (c) 2011 Terence Parr
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
 are met:
 1. Redistributions of source code must retain the above copyright
    notice, this list of conditions and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions and the following disclaimer in the
    documentation and/or other materials provided with the distribution.
 3. The name of the author may not be used to endorse or promote products
    derived from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 package org.antlr.v4.runtime.misc;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.NoSuchElementException;
 /** A queue that can dequeue and get(i) in O(1) and grow arbitrarily large.
 *  A linked list is fast at dequeue but slow at get(i).  An array is
 *  the reverse.  This is O(1) for both operations.
 *
 *  List grows until you dequeue last element at end of buffer. Then
 *  it resets to start filling at 0 again.  If adds/removes are balanced, the
 *  buffer will not grow too large.
 *
 *  No iterator stuff as that's not how we'll use it.
 */
 public class FastQueue<T> {
    /** dynamically-sized buffer of elements */
    protected List<T> data = new ArrayList<T>();
    /** index of next element to fill */
    protected int p = 0;
    public void reset() { clear(); }
    public void clear() { p = 0; data.clear(); }
    /** Get and remove first element in queue */
    public T remove() {
        T o = elementAt(0);
        p++;
        // have we hit end of buffer?
        if ( p == data.size() ) {
            // if so, it's an opportunity to start filling at index 0 again
            clear(); // size goes to 0, but retains memory
        }
        return o;
    }
    public void add(T o) { data.add(o); }
    public int size() { return data.size() - p; }
    public T head() { return elementAt(0); }
    /** Return element i elements ahead of current element.  i==0 gets
     *  current element.  This is not an absolute index into the data list
     *  since p defines the start of the real list.
     */
    public T elementAt(int i) {
 		int absIndex = p + i;
 		if ( absIndex >= data.size() ) {
            throw new NoSuchElementException("queue index "+ absIndex +" > last index "+(data.size()-1));
        }
        if ( absIndex < 0 ) {
            throw new NoSuchElementException("queue index "+ absIndex +" < 0");
        }
        return data.get(absIndex);
    }
    /** Return string of current buffer contents; non-destructive */
    @Override
    public String toString() {
        StringBuilder buf = new StringBuilder();
        int n = size();
        for (int i=0; i<n; i++) {
            buf.append(elementAt(i));
            if ( (i+1)<n ) buf.append(" ");
        }
        return buf.toString();
    }
 }
--- a/runtime/Java/src/org/antlr/v4/runtime/misc/LookaheadStream.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/misc/LookaheadStream.java
@ -1,180 +0,0 @@
 /*
 [The "BSD license"]
 Copyright (c) 2011 Terence Parr
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
 are met:
 1. Redistributions of source code must retain the above copyright
    notice, this list of conditions and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions and the following disclaimer in the
    documentation and/or other materials provided with the distribution.
 3. The name of the author may not be used to endorse or promote products
    derived from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 package org.antlr.v4.runtime.misc;
 import java.util.NoSuchElementException;
 /** A lookahead queue that knows how to mark/release locations
 *  in the buffer for backtracking purposes. Any markers force the FastQueue
 *  superclass to keep all tokens until no more markers; then can reset
 *  to avoid growing a huge buffer.
 */
 public abstract class LookaheadStream<T> extends FastQueue<T> {
    public static final int UNINITIALIZED_EOF_ELEMENT_INDEX = Integer.MAX_VALUE;
    /** Absolute token index. It's the index of the symbol about to be
 	 *  read via LT(1). Goes from 0 to numtokens.
     */
    protected int currentElementIndex = 0;
    protected T prevElement;
    /** Track object returned by nextElement upon end of stream;
     *  Return it later when they ask for LT passed end of input.
     */
    public T eof = null;
    /** tracks how deep mark() calls are nested */
    protected int markDepth = 0;
    @Override
    public void reset() {
        super.reset();
        currentElementIndex = 0;
        p = 0;
        prevElement=null;
    }
    /** Implement nextElement to supply a stream of elements to this
     *  lookahead buffer.  Return eof upon end of the stream we're pulling from.
     */
    public abstract T nextElement();
    public abstract boolean isEOF(T o);
    /** Get and remove first element in queue; override FastQueue.remove();
     *  it's the same, just checks for backtracking.
     */
    @Override
    public T remove() {
        T o = elementAt(0);
        p++;
        // have we hit end of buffer and not backtracking?
        if ( p == data.size() && markDepth==0 ) {
            // if so, it's an opportunity to start filling at index 0 again
            clear(); // size goes to 0, but retains memory
        }
        return o;
    }
    /** Make sure we have at least one element to remove, even if EOF */
    public void consume() {
        syncAhead(1);
        T element = remove();
        if (markDepth == 0) {
            prevElement = element;
        }
        currentElementIndex++;
    }
    /** Make sure we have 'need' elements from current position p. Last valid
     *  p index is data.size()-1.  p+need-1 is the data index 'need' elements
     *  ahead.  If we need 1 element, (p+1-1)==p must be < data.size().
     */
    protected void syncAhead(int need) {
        int n = (p+need-1) - data.size() + 1; // how many more elements we need?
        if ( n > 0 ) fill(n);                 // out of elements?
    }
    /** add n elements to buffer */
    public void fill(int n) {
        for (int i=1; i<=n; i++) {
            T o = nextElement();
            if ( isEOF(o) ) eof = o;
            data.add(o);
        }
    }
    /** Size of entire stream is unknown; we only know buffer size from FastQueue */
    @Override
    public int size() { throw new UnsupportedOperationException("streams are of unknown size"); }
    public T LT(int k) {
 		if ( k==0 ) {
 			return null;
 		}
 		if ( k<0 ) return LB(-k);
 		//System.out.print("LT(p="+p+","+k+")=");
        syncAhead(k);
        if ( (p+k-1) > data.size() ) return eof;
        return elementAt(k-1);
 	}
    public int index() { return currentElementIndex; }
 	public int mark() {
        markDepth++;
        return markDepth;
 	}
 	public void release(int marker) {
 		// no resources to release
 	}
    /** Seek to a 0-indexed position within data buffer.  Can't handle
     *  case where you seek beyond end of existing buffer.  Normally used
     *  to seek backwards in the buffer. Does not force loading of nodes.
     *  Doesn't seek to absolute position in input stream since this stream
     *  is unbuffered. Seeks only into our moving window of elements.
     */
    public void seek(int index) {
        int bufferStartIndex = currentElementIndex - p;
        if (index < bufferStartIndex) {
            throw new UnsupportedOperationException("Cannot seek to the specified index.");
        }
        if (index > currentElementIndex) {
            int startElementIndex = currentElementIndex;
            for (int i = 0; i < index - startElementIndex; i++) {
                consume();
            }
        }
 		else {
            currentElementIndex = index;
            p = index - bufferStartIndex;
        }
    }
    protected T LB(int k) {
        int bufferIndex = p - k;
        if (bufferIndex == -1) {
            return prevElement;
        }
 		else if (bufferIndex >= 0 && bufferIndex < data.size()) {
            return data.get(bufferIndex);
        }
        throw new NoSuchElementException("can't look backwards more than one token in this stream");
    }
 	@Override
 	public String toString() {
 		return "a LookaheadStream";
 	}
 }