diff --git a/runtime/Java/src/org/antlr/v4/runtime/ANTLRUnbufferedInputStream.java b/runtime/Java/src/org/antlr/v4/runtime/ANTLRUnbufferedInputStream.java index 0080892ee..c15e7fe9e 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/ANTLRUnbufferedInputStream.java +++ b/runtime/Java/src/org/antlr/v4/runtime/ANTLRUnbufferedInputStream.java @@ -36,7 +36,7 @@ import java.io.Reader; public class ANTLRUnbufferedInputStream implements CharStream { /** A buffer of the data being scanned */ - protected int[] data = new int[256]; + protected char[] data; /** How many characters are actually in the buffer */ protected int n; @@ -44,42 +44,58 @@ public class ANTLRUnbufferedInputStream implements CharStream { /** 0..n-1 index into string of next char */ protected int p=0; - protected int minMarker = -1; + protected int earliestMarker = -1; /** Absolute char index. It's the index of the char about to be * read via LA(1). Goes from 0 to numchar-1. */ - protected int currentElementIndex = 0; + protected int currentCharIndex = 0; + + /** Buf is window into stream. This is absolute index of data[0] */ + protected int bufferStartIndex = 0; protected Reader input; /** What is name or source of this char stream? */ public String name; - public ANTLRUnbufferedInputStream(InputStream input) { - this.input = new InputStreamReader(input); - } + public ANTLRUnbufferedInputStream(InputStream input) { + this(input, 256); + } - public ANTLRUnbufferedInputStream(Reader input) { - this.input = input; - } + public ANTLRUnbufferedInputStream(Reader input) { + this(input, 256); + } + + public ANTLRUnbufferedInputStream(InputStream input, int bufferSize) { + this.input = new InputStreamReader(input); + data = new char[bufferSize]; + } + + public ANTLRUnbufferedInputStream(Reader input, int bufferSize) { + this.input = input; + data = new char[bufferSize]; + } public void reset() { p = 0; - minMarker = -1; - currentElementIndex = 0; + earliestMarker = -1; + currentCharIndex = 0; + bufferStartIndex = 0; n = 0; } @Override public void consume() { - sync(1); p++; + currentCharIndex++; // have we hit end of buffer when no markers? - if ( p==n && minMarker<0 ) { + if ( p==n && earliestMarker < 0 ) { // if so, it's an opportunity to start filling at index 0 again +// System.out.println("p=="+n+", no marker; reset buf start index="+currentCharIndex); p = 0; n = 0; + bufferStartIndex = currentCharIndex; } } @@ -106,12 +122,12 @@ public class ANTLRUnbufferedInputStream implements CharStream { } protected void add(int c) { - if ( p>=data.length ) { - int[] newdata = new int[data.length*2]; // resize + if ( n>=data.length ) { + char[] newdata = new char[data.length*2]; // resize System.arraycopy(data, 0, newdata, 0, data.length); data = newdata; } - data[n++] = c; + data[n++] = (char)c; } @Override @@ -120,32 +136,52 @@ public class ANTLRUnbufferedInputStream implements CharStream { int index = p + i - 1; if ( index < 0 ) throw new IndexOutOfBoundsException(); if ( index > n ) return CharStream.EOF; - return data[index]; + int c = data[index]; + if ( c==(char)CharStream.EOF ) return CharStream.EOF; + return c; } + /** Return a marker that we can release later. Marker happens to be + * index into buffer (not index()). + */ @Override public int mark() { int m = p; - if ( p < minMarker ) { - throw new IllegalArgumentException("can't set marker earlier than previous existing marker: "+p+"<"+minMarker); + if ( p < earliestMarker) { + // they must have done seek to before min marker + throw new IllegalArgumentException("can't set marker earlier than previous existing marker: "+p+"<"+ earliestMarker); } - if ( minMarker<0 ) minMarker = m; // set first marker + if ( earliestMarker < 0 ) earliestMarker = m; // set first marker return m; } @Override public void release(int marker) { - if ( marker == minMarker ) minMarker = -1; + // release is noop unless we remove earliest. then we don't need to + // keep anything in buffer. We only care about earliest. Releasing + // marker other than earliest does nothing as we can just keep in + // buffer. + if ( marker < earliestMarker || marker >= n ) { + throw new IllegalArgumentException("invalid marker: "+ + marker+" not in "+0+".."+n); + } + if ( marker == earliestMarker) earliestMarker = -1; } @Override public int index() { - return 0; + return p + bufferStartIndex; } @Override public void seek(int index) { - p = index; + // index == to bufferStartIndex should set p to 0 + int i = index - bufferStartIndex; + if ( i < 0 || i >= n ) { + throw new UnsupportedOperationException("seek to index outside buffer: "+ + index+" not in "+bufferStartIndex+".."+(bufferStartIndex+n)); + } + p = i; } @Override diff --git a/runtime/Java/src/org/antlr/v4/runtime/IntStream.java b/runtime/Java/src/org/antlr/v4/runtime/IntStream.java index a2e038977..62f251bc2 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/IntStream.java +++ b/runtime/Java/src/org/antlr/v4/runtime/IntStream.java @@ -53,7 +53,8 @@ public interface IntStream { int mark(); /** Release requirement that stream holds tokens from marked location - * to current index(). + * to current index(). Must release in reverse order (like stack) + * of mark() otherwise undefined behavior. */ void release(int marker); @@ -65,18 +66,16 @@ public interface IntStream { /** Set the input cursor to the position indicated by index. This is * normally used to rewind the input stream but can move forward as well. - * It's up to the stream implementation to make sure that tokens are - * buffered as necessary to make seek land on a valid token. + * It's up to the stream implementation to make sure that symbols are + * buffered as necessary to make seek land on a valid symbol. * Or, they should avoid moving the input cursor. * - * For char streams, seeking forward must update the stream state such - * as line number. For seeking backwards, you will be presumably - * backtracking using the mark/rewind mechanism that restores state and - * so this method does not need to update state when seeking backwards. - * * The index is 0..n-1. A seek to position i means that LA(1) will * return the ith symbol. So, seeking to 0 means LA(1) will return the * first element in the stream. + * + * For unbuffered streams, index i might not be in buffer. That throws + * index exception */ void seek(int index); diff --git a/tool/test/org/antlr/v4/test/TestUnbufferedInputStream.java b/tool/test/org/antlr/v4/test/TestUnbufferedInputStream.java index a38c375c4..406e77e55 100644 --- a/tool/test/org/antlr/v4/test/TestUnbufferedInputStream.java +++ b/tool/test/org/antlr/v4/test/TestUnbufferedInputStream.java @@ -67,26 +67,93 @@ public class TestUnbufferedInputStream extends BaseTest { assertEquals(CharStream.EOF, input.LA(1)); } - @Test public void test2CharAhead() throws Exception { - CharStream input = new ANTLRUnbufferedInputStream( - new StringReader("xy") - ); - assertEquals('x', input.LA(1)); - assertEquals('y', input.LA(2)); - assertEquals(CharStream.EOF, input.LA(3)); - } + @Test public void test2CharAhead() throws Exception { + CharStream input = new ANTLRUnbufferedInputStream( + new StringReader("xy") + ); + assertEquals('x', input.LA(1)); + assertEquals('y', input.LA(2)); + assertEquals(CharStream.EOF, input.LA(3)); + } - @Test public void test1Mark() throws Exception { - CharStream input = new ANTLRUnbufferedInputStream( - new StringReader("xyz") - ); - int m = input.mark(); - assertEquals('x', input.LA(1)); - assertEquals('y', input.LA(2)); - assertEquals('z', input.LA(3)); - input.release(m); - assertEquals(CharStream.EOF, input.LA(4)); - } + @Test public void testBufferExpand() throws Exception { + CharStream input = new ANTLRUnbufferedInputStream( + new StringReader("01234"), + 2 // buff size 2 + ); + assertEquals('0', input.LA(1)); + assertEquals('1', input.LA(2)); + assertEquals('2', input.LA(3)); + assertEquals('3', input.LA(4)); + assertEquals('4', input.LA(5)); + assertEquals(CharStream.EOF, input.LA(6)); + } + + @Test public void testBufferWrapSize1() throws Exception { + CharStream input = new ANTLRUnbufferedInputStream( + new StringReader("01234"), + 1 // buff size 1 + ); + assertEquals('0', input.LA(1)); + input.consume(); + assertEquals('1', input.LA(1)); + input.consume(); + assertEquals('2', input.LA(1)); + input.consume(); + assertEquals('3', input.LA(1)); + input.consume(); + assertEquals('4', input.LA(1)); + input.consume(); + assertEquals(CharStream.EOF, input.LA(1)); + } + + @Test public void testBufferWrapSize2() throws Exception { + CharStream input = new ANTLRUnbufferedInputStream( + new StringReader("01234"), + 2 // buff size 2 + ); + assertEquals('0', input.LA(1)); + input.consume(); + assertEquals('1', input.LA(1)); + input.consume(); + assertEquals('2', input.LA(1)); + input.consume(); + assertEquals('3', input.LA(1)); + input.consume(); + assertEquals('4', input.LA(1)); + input.consume(); + assertEquals(CharStream.EOF, input.LA(1)); + } + + @Test public void test1Mark() throws Exception { + CharStream input = new ANTLRUnbufferedInputStream( + new StringReader("xyz") + ); + int m = input.mark(); + assertEquals('x', input.LA(1)); + assertEquals('y', input.LA(2)); + assertEquals('z', input.LA(3)); + input.release(m); + assertEquals(CharStream.EOF, input.LA(4)); + } + + @Test public void test2Mark() throws Exception { + CharStream input = new ANTLRUnbufferedInputStream( + new StringReader("xyz"), + 2 + ); + assertEquals('x', input.LA(1)); + input.consume(); + int m1 = input.mark(); + assertEquals('y', input.LA(1)); + input.consume(); + int m2 = input.mark(); + assertEquals('z', input.LA(1)); + input.release(m2); // noop since not earliest in buf + input.consume(); + input.release(m1); + assertEquals(CharStream.EOF, input.LA(1)); + } // @Test public void testFirstToken() throws Exception { // LexerGrammar g = new LexerGrammar(