unbuff char stream working

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9425]
This commit is contained in:
parrt 2011-11-22 12:32:15 -08:00
parent 45dc683a91
commit 31c24292cd
3 changed files with 152 additions and 50 deletions

View File

@ -36,7 +36,7 @@ import java.io.Reader;
public class ANTLRUnbufferedInputStream implements CharStream {
/** A buffer of the data being scanned */
protected int[] data = new int[256];
protected char[] data;
/** How many characters are actually in the buffer */
protected int n;
@ -44,12 +44,15 @@ public class ANTLRUnbufferedInputStream implements CharStream {
/** 0..n-1 index into string of next char */
protected int p=0;
protected int minMarker = -1;
protected int earliestMarker = -1;
/** Absolute char index. It's the index of the char about to be
* read via LA(1). Goes from 0 to numchar-1.
*/
protected int currentElementIndex = 0;
protected int currentCharIndex = 0;
/** Buf is window into stream. This is absolute index of data[0] */
protected int bufferStartIndex = 0;
protected Reader input;
@ -57,29 +60,42 @@ public class ANTLRUnbufferedInputStream implements CharStream {
public String name;
public ANTLRUnbufferedInputStream(InputStream input) {
this.input = new InputStreamReader(input);
this(input, 256);
}
public ANTLRUnbufferedInputStream(Reader input) {
this(input, 256);
}
public ANTLRUnbufferedInputStream(InputStream input, int bufferSize) {
this.input = new InputStreamReader(input);
data = new char[bufferSize];
}
public ANTLRUnbufferedInputStream(Reader input, int bufferSize) {
this.input = input;
data = new char[bufferSize];
}
public void reset() {
p = 0;
minMarker = -1;
currentElementIndex = 0;
earliestMarker = -1;
currentCharIndex = 0;
bufferStartIndex = 0;
n = 0;
}
@Override
public void consume() {
sync(1);
p++;
currentCharIndex++;
// have we hit end of buffer when no markers?
if ( p==n && minMarker<0 ) {
if ( p==n && earliestMarker < 0 ) {
// if so, it's an opportunity to start filling at index 0 again
// System.out.println("p=="+n+", no marker; reset buf start index="+currentCharIndex);
p = 0;
n = 0;
bufferStartIndex = currentCharIndex;
}
}
@ -106,12 +122,12 @@ public class ANTLRUnbufferedInputStream implements CharStream {
}
protected void add(int c) {
if ( p>=data.length ) {
int[] newdata = new int[data.length*2]; // resize
if ( n>=data.length ) {
char[] newdata = new char[data.length*2]; // resize
System.arraycopy(data, 0, newdata, 0, data.length);
data = newdata;
}
data[n++] = c;
data[n++] = (char)c;
}
@Override
@ -120,32 +136,52 @@ public class ANTLRUnbufferedInputStream implements CharStream {
int index = p + i - 1;
if ( index < 0 ) throw new IndexOutOfBoundsException();
if ( index > n ) return CharStream.EOF;
return data[index];
int c = data[index];
if ( c==(char)CharStream.EOF ) return CharStream.EOF;
return c;
}
/** Return a marker that we can release later. Marker happens to be
* index into buffer (not index()).
*/
@Override
public int mark() {
int m = p;
if ( p < minMarker ) {
throw new IllegalArgumentException("can't set marker earlier than previous existing marker: "+p+"<"+minMarker);
if ( p < earliestMarker) {
// they must have done seek to before min marker
throw new IllegalArgumentException("can't set marker earlier than previous existing marker: "+p+"<"+ earliestMarker);
}
if ( minMarker<0 ) minMarker = m; // set first marker
if ( earliestMarker < 0 ) earliestMarker = m; // set first marker
return m;
}
@Override
public void release(int marker) {
if ( marker == minMarker ) minMarker = -1;
// release is noop unless we remove earliest. then we don't need to
// keep anything in buffer. We only care about earliest. Releasing
// marker other than earliest does nothing as we can just keep in
// buffer.
if ( marker < earliestMarker || marker >= n ) {
throw new IllegalArgumentException("invalid marker: "+
marker+" not in "+0+".."+n);
}
if ( marker == earliestMarker) earliestMarker = -1;
}
@Override
public int index() {
return 0;
return p + bufferStartIndex;
}
@Override
public void seek(int index) {
p = index;
// index == to bufferStartIndex should set p to 0
int i = index - bufferStartIndex;
if ( i < 0 || i >= n ) {
throw new UnsupportedOperationException("seek to index outside buffer: "+
index+" not in "+bufferStartIndex+".."+(bufferStartIndex+n));
}
p = i;
}
@Override

View File

@ -53,7 +53,8 @@ public interface IntStream {
int mark();
/** Release requirement that stream holds tokens from marked location
* to current index().
* to current index(). Must release in reverse order (like stack)
* of mark() otherwise undefined behavior.
*/
void release(int marker);
@ -65,18 +66,16 @@ public interface IntStream {
/** Set the input cursor to the position indicated by index. This is
* normally used to rewind the input stream but can move forward as well.
* It's up to the stream implementation to make sure that tokens are
* buffered as necessary to make seek land on a valid token.
* It's up to the stream implementation to make sure that symbols are
* buffered as necessary to make seek land on a valid symbol.
* Or, they should avoid moving the input cursor.
*
* For char streams, seeking forward must update the stream state such
* as line number. For seeking backwards, you will be presumably
* backtracking using the mark/rewind mechanism that restores state and
* so this method does not need to update state when seeking backwards.
*
* The index is 0..n-1. A seek to position i means that LA(1) will
* return the ith symbol. So, seeking to 0 means LA(1) will return the
* first element in the stream.
*
* For unbuffered streams, index i might not be in buffer. That throws
* index exception
*/
void seek(int index);

View File

@ -76,6 +76,55 @@ public class TestUnbufferedInputStream extends BaseTest {
assertEquals(CharStream.EOF, input.LA(3));
}
@Test public void testBufferExpand() throws Exception {
CharStream input = new ANTLRUnbufferedInputStream(
new StringReader("01234"),
2 // buff size 2
);
assertEquals('0', input.LA(1));
assertEquals('1', input.LA(2));
assertEquals('2', input.LA(3));
assertEquals('3', input.LA(4));
assertEquals('4', input.LA(5));
assertEquals(CharStream.EOF, input.LA(6));
}
@Test public void testBufferWrapSize1() throws Exception {
CharStream input = new ANTLRUnbufferedInputStream(
new StringReader("01234"),
1 // buff size 1
);
assertEquals('0', input.LA(1));
input.consume();
assertEquals('1', input.LA(1));
input.consume();
assertEquals('2', input.LA(1));
input.consume();
assertEquals('3', input.LA(1));
input.consume();
assertEquals('4', input.LA(1));
input.consume();
assertEquals(CharStream.EOF, input.LA(1));
}
@Test public void testBufferWrapSize2() throws Exception {
CharStream input = new ANTLRUnbufferedInputStream(
new StringReader("01234"),
2 // buff size 2
);
assertEquals('0', input.LA(1));
input.consume();
assertEquals('1', input.LA(1));
input.consume();
assertEquals('2', input.LA(1));
input.consume();
assertEquals('3', input.LA(1));
input.consume();
assertEquals('4', input.LA(1));
input.consume();
assertEquals(CharStream.EOF, input.LA(1));
}
@Test public void test1Mark() throws Exception {
CharStream input = new ANTLRUnbufferedInputStream(
new StringReader("xyz")
@ -88,6 +137,24 @@ public class TestUnbufferedInputStream extends BaseTest {
assertEquals(CharStream.EOF, input.LA(4));
}
@Test public void test2Mark() throws Exception {
CharStream input = new ANTLRUnbufferedInputStream(
new StringReader("xyz"),
2
);
assertEquals('x', input.LA(1));
input.consume();
int m1 = input.mark();
assertEquals('y', input.LA(1));
input.consume();
int m2 = input.mark();
assertEquals('z', input.LA(1));
input.release(m2); // noop since not earliest in buf
input.consume();
input.release(m1);
assertEquals(CharStream.EOF, input.LA(1));
}
// @Test public void testFirstToken() throws Exception {
// LexerGrammar g = new LexerGrammar(
// "lexer grammar t;\n"+