unbuff char stream working

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9425]
This commit is contained in:
parrt 2011-11-22 12:32:15 -08:00
parent 45dc683a91
commit 31c24292cd
3 changed files with 152 additions and 50 deletions

View File

@ -36,7 +36,7 @@ import java.io.Reader;
public class ANTLRUnbufferedInputStream implements CharStream { public class ANTLRUnbufferedInputStream implements CharStream {
/** A buffer of the data being scanned */ /** A buffer of the data being scanned */
protected int[] data = new int[256]; protected char[] data;
/** How many characters are actually in the buffer */ /** How many characters are actually in the buffer */
protected int n; protected int n;
@ -44,12 +44,15 @@ public class ANTLRUnbufferedInputStream implements CharStream {
/** 0..n-1 index into string of next char */ /** 0..n-1 index into string of next char */
protected int p=0; protected int p=0;
protected int minMarker = -1; protected int earliestMarker = -1;
/** Absolute char index. It's the index of the char about to be /** Absolute char index. It's the index of the char about to be
* read via LA(1). Goes from 0 to numchar-1. * read via LA(1). Goes from 0 to numchar-1.
*/ */
protected int currentElementIndex = 0; protected int currentCharIndex = 0;
/** Buf is window into stream. This is absolute index of data[0] */
protected int bufferStartIndex = 0;
protected Reader input; protected Reader input;
@ -57,29 +60,42 @@ public class ANTLRUnbufferedInputStream implements CharStream {
public String name; public String name;
public ANTLRUnbufferedInputStream(InputStream input) { public ANTLRUnbufferedInputStream(InputStream input) {
this.input = new InputStreamReader(input); this(input, 256);
} }
public ANTLRUnbufferedInputStream(Reader input) { public ANTLRUnbufferedInputStream(Reader input) {
this(input, 256);
}
public ANTLRUnbufferedInputStream(InputStream input, int bufferSize) {
this.input = new InputStreamReader(input);
data = new char[bufferSize];
}
public ANTLRUnbufferedInputStream(Reader input, int bufferSize) {
this.input = input; this.input = input;
data = new char[bufferSize];
} }
public void reset() { public void reset() {
p = 0; p = 0;
minMarker = -1; earliestMarker = -1;
currentElementIndex = 0; currentCharIndex = 0;
bufferStartIndex = 0;
n = 0; n = 0;
} }
@Override @Override
public void consume() { public void consume() {
sync(1);
p++; p++;
currentCharIndex++;
// have we hit end of buffer when no markers? // have we hit end of buffer when no markers?
if ( p==n && minMarker<0 ) { if ( p==n && earliestMarker < 0 ) {
// if so, it's an opportunity to start filling at index 0 again // if so, it's an opportunity to start filling at index 0 again
// System.out.println("p=="+n+", no marker; reset buf start index="+currentCharIndex);
p = 0; p = 0;
n = 0; n = 0;
bufferStartIndex = currentCharIndex;
} }
} }
@ -106,12 +122,12 @@ public class ANTLRUnbufferedInputStream implements CharStream {
} }
protected void add(int c) { protected void add(int c) {
if ( p>=data.length ) { if ( n>=data.length ) {
int[] newdata = new int[data.length*2]; // resize char[] newdata = new char[data.length*2]; // resize
System.arraycopy(data, 0, newdata, 0, data.length); System.arraycopy(data, 0, newdata, 0, data.length);
data = newdata; data = newdata;
} }
data[n++] = c; data[n++] = (char)c;
} }
@Override @Override
@ -120,32 +136,52 @@ public class ANTLRUnbufferedInputStream implements CharStream {
int index = p + i - 1; int index = p + i - 1;
if ( index < 0 ) throw new IndexOutOfBoundsException(); if ( index < 0 ) throw new IndexOutOfBoundsException();
if ( index > n ) return CharStream.EOF; if ( index > n ) return CharStream.EOF;
return data[index]; int c = data[index];
if ( c==(char)CharStream.EOF ) return CharStream.EOF;
return c;
} }
/** Return a marker that we can release later. Marker happens to be
* index into buffer (not index()).
*/
@Override @Override
public int mark() { public int mark() {
int m = p; int m = p;
if ( p < minMarker ) { if ( p < earliestMarker) {
throw new IllegalArgumentException("can't set marker earlier than previous existing marker: "+p+"<"+minMarker); // they must have done seek to before min marker
throw new IllegalArgumentException("can't set marker earlier than previous existing marker: "+p+"<"+ earliestMarker);
} }
if ( minMarker<0 ) minMarker = m; // set first marker if ( earliestMarker < 0 ) earliestMarker = m; // set first marker
return m; return m;
} }
@Override @Override
public void release(int marker) { public void release(int marker) {
if ( marker == minMarker ) minMarker = -1; // release is noop unless we remove earliest. then we don't need to
// keep anything in buffer. We only care about earliest. Releasing
// marker other than earliest does nothing as we can just keep in
// buffer.
if ( marker < earliestMarker || marker >= n ) {
throw new IllegalArgumentException("invalid marker: "+
marker+" not in "+0+".."+n);
}
if ( marker == earliestMarker) earliestMarker = -1;
} }
@Override @Override
public int index() { public int index() {
return 0; return p + bufferStartIndex;
} }
@Override @Override
public void seek(int index) { public void seek(int index) {
p = index; // index == to bufferStartIndex should set p to 0
int i = index - bufferStartIndex;
if ( i < 0 || i >= n ) {
throw new UnsupportedOperationException("seek to index outside buffer: "+
index+" not in "+bufferStartIndex+".."+(bufferStartIndex+n));
}
p = i;
} }
@Override @Override

View File

@ -53,7 +53,8 @@ public interface IntStream {
int mark(); int mark();
/** Release requirement that stream holds tokens from marked location /** Release requirement that stream holds tokens from marked location
* to current index(). * to current index(). Must release in reverse order (like stack)
* of mark() otherwise undefined behavior.
*/ */
void release(int marker); void release(int marker);
@ -65,18 +66,16 @@ public interface IntStream {
/** Set the input cursor to the position indicated by index. This is /** Set the input cursor to the position indicated by index. This is
* normally used to rewind the input stream but can move forward as well. * normally used to rewind the input stream but can move forward as well.
* It's up to the stream implementation to make sure that tokens are * It's up to the stream implementation to make sure that symbols are
* buffered as necessary to make seek land on a valid token. * buffered as necessary to make seek land on a valid symbol.
* Or, they should avoid moving the input cursor. * Or, they should avoid moving the input cursor.
* *
* For char streams, seeking forward must update the stream state such
* as line number. For seeking backwards, you will be presumably
* backtracking using the mark/rewind mechanism that restores state and
* so this method does not need to update state when seeking backwards.
*
* The index is 0..n-1. A seek to position i means that LA(1) will * The index is 0..n-1. A seek to position i means that LA(1) will
* return the ith symbol. So, seeking to 0 means LA(1) will return the * return the ith symbol. So, seeking to 0 means LA(1) will return the
* first element in the stream. * first element in the stream.
*
* For unbuffered streams, index i might not be in buffer. That throws
* index exception
*/ */
void seek(int index); void seek(int index);

View File

@ -76,6 +76,55 @@ public class TestUnbufferedInputStream extends BaseTest {
assertEquals(CharStream.EOF, input.LA(3)); assertEquals(CharStream.EOF, input.LA(3));
} }
@Test public void testBufferExpand() throws Exception {
CharStream input = new ANTLRUnbufferedInputStream(
new StringReader("01234"),
2 // buff size 2
);
assertEquals('0', input.LA(1));
assertEquals('1', input.LA(2));
assertEquals('2', input.LA(3));
assertEquals('3', input.LA(4));
assertEquals('4', input.LA(5));
assertEquals(CharStream.EOF, input.LA(6));
}
@Test public void testBufferWrapSize1() throws Exception {
CharStream input = new ANTLRUnbufferedInputStream(
new StringReader("01234"),
1 // buff size 1
);
assertEquals('0', input.LA(1));
input.consume();
assertEquals('1', input.LA(1));
input.consume();
assertEquals('2', input.LA(1));
input.consume();
assertEquals('3', input.LA(1));
input.consume();
assertEquals('4', input.LA(1));
input.consume();
assertEquals(CharStream.EOF, input.LA(1));
}
@Test public void testBufferWrapSize2() throws Exception {
CharStream input = new ANTLRUnbufferedInputStream(
new StringReader("01234"),
2 // buff size 2
);
assertEquals('0', input.LA(1));
input.consume();
assertEquals('1', input.LA(1));
input.consume();
assertEquals('2', input.LA(1));
input.consume();
assertEquals('3', input.LA(1));
input.consume();
assertEquals('4', input.LA(1));
input.consume();
assertEquals(CharStream.EOF, input.LA(1));
}
@Test public void test1Mark() throws Exception { @Test public void test1Mark() throws Exception {
CharStream input = new ANTLRUnbufferedInputStream( CharStream input = new ANTLRUnbufferedInputStream(
new StringReader("xyz") new StringReader("xyz")
@ -88,6 +137,24 @@ public class TestUnbufferedInputStream extends BaseTest {
assertEquals(CharStream.EOF, input.LA(4)); assertEquals(CharStream.EOF, input.LA(4));
} }
@Test public void test2Mark() throws Exception {
CharStream input = new ANTLRUnbufferedInputStream(
new StringReader("xyz"),
2
);
assertEquals('x', input.LA(1));
input.consume();
int m1 = input.mark();
assertEquals('y', input.LA(1));
input.consume();
int m2 = input.mark();
assertEquals('z', input.LA(1));
input.release(m2); // noop since not earliest in buf
input.consume();
input.release(m1);
assertEquals(CharStream.EOF, input.LA(1));
}
// @Test public void testFirstToken() throws Exception { // @Test public void testFirstToken() throws Exception {
// LexerGrammar g = new LexerGrammar( // LexerGrammar g = new LexerGrammar(
// "lexer grammar t;\n"+ // "lexer grammar t;\n"+