forked from jasder/antlr
unbuff char stream working
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9425]
This commit is contained in:
parent
45dc683a91
commit
31c24292cd
|
@ -36,7 +36,7 @@ import java.io.Reader;
|
|||
|
||||
public class ANTLRUnbufferedInputStream implements CharStream {
|
||||
/** A buffer of the data being scanned */
|
||||
protected int[] data = new int[256];
|
||||
protected char[] data;
|
||||
|
||||
/** How many characters are actually in the buffer */
|
||||
protected int n;
|
||||
|
@ -44,12 +44,15 @@ public class ANTLRUnbufferedInputStream implements CharStream {
|
|||
/** 0..n-1 index into string of next char */
|
||||
protected int p=0;
|
||||
|
||||
protected int minMarker = -1;
|
||||
protected int earliestMarker = -1;
|
||||
|
||||
/** Absolute char index. It's the index of the char about to be
|
||||
* read via LA(1). Goes from 0 to numchar-1.
|
||||
*/
|
||||
protected int currentElementIndex = 0;
|
||||
protected int currentCharIndex = 0;
|
||||
|
||||
/** Buf is window into stream. This is absolute index of data[0] */
|
||||
protected int bufferStartIndex = 0;
|
||||
|
||||
protected Reader input;
|
||||
|
||||
|
@ -57,29 +60,42 @@ public class ANTLRUnbufferedInputStream implements CharStream {
|
|||
public String name;
|
||||
|
||||
public ANTLRUnbufferedInputStream(InputStream input) {
|
||||
this.input = new InputStreamReader(input);
|
||||
this(input, 256);
|
||||
}
|
||||
|
||||
public ANTLRUnbufferedInputStream(Reader input) {
|
||||
this(input, 256);
|
||||
}
|
||||
|
||||
public ANTLRUnbufferedInputStream(InputStream input, int bufferSize) {
|
||||
this.input = new InputStreamReader(input);
|
||||
data = new char[bufferSize];
|
||||
}
|
||||
|
||||
public ANTLRUnbufferedInputStream(Reader input, int bufferSize) {
|
||||
this.input = input;
|
||||
data = new char[bufferSize];
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
p = 0;
|
||||
minMarker = -1;
|
||||
currentElementIndex = 0;
|
||||
earliestMarker = -1;
|
||||
currentCharIndex = 0;
|
||||
bufferStartIndex = 0;
|
||||
n = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void consume() {
|
||||
sync(1);
|
||||
p++;
|
||||
currentCharIndex++;
|
||||
// have we hit end of buffer when no markers?
|
||||
if ( p==n && minMarker<0 ) {
|
||||
if ( p==n && earliestMarker < 0 ) {
|
||||
// if so, it's an opportunity to start filling at index 0 again
|
||||
// System.out.println("p=="+n+", no marker; reset buf start index="+currentCharIndex);
|
||||
p = 0;
|
||||
n = 0;
|
||||
bufferStartIndex = currentCharIndex;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -106,12 +122,12 @@ public class ANTLRUnbufferedInputStream implements CharStream {
|
|||
}
|
||||
|
||||
protected void add(int c) {
|
||||
if ( p>=data.length ) {
|
||||
int[] newdata = new int[data.length*2]; // resize
|
||||
if ( n>=data.length ) {
|
||||
char[] newdata = new char[data.length*2]; // resize
|
||||
System.arraycopy(data, 0, newdata, 0, data.length);
|
||||
data = newdata;
|
||||
}
|
||||
data[n++] = c;
|
||||
data[n++] = (char)c;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -120,32 +136,52 @@ public class ANTLRUnbufferedInputStream implements CharStream {
|
|||
int index = p + i - 1;
|
||||
if ( index < 0 ) throw new IndexOutOfBoundsException();
|
||||
if ( index > n ) return CharStream.EOF;
|
||||
return data[index];
|
||||
int c = data[index];
|
||||
if ( c==(char)CharStream.EOF ) return CharStream.EOF;
|
||||
return c;
|
||||
}
|
||||
|
||||
/** Return a marker that we can release later. Marker happens to be
|
||||
* index into buffer (not index()).
|
||||
*/
|
||||
@Override
|
||||
public int mark() {
|
||||
int m = p;
|
||||
if ( p < minMarker ) {
|
||||
throw new IllegalArgumentException("can't set marker earlier than previous existing marker: "+p+"<"+minMarker);
|
||||
if ( p < earliestMarker) {
|
||||
// they must have done seek to before min marker
|
||||
throw new IllegalArgumentException("can't set marker earlier than previous existing marker: "+p+"<"+ earliestMarker);
|
||||
}
|
||||
if ( minMarker<0 ) minMarker = m; // set first marker
|
||||
if ( earliestMarker < 0 ) earliestMarker = m; // set first marker
|
||||
return m;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void release(int marker) {
|
||||
if ( marker == minMarker ) minMarker = -1;
|
||||
// release is noop unless we remove earliest. then we don't need to
|
||||
// keep anything in buffer. We only care about earliest. Releasing
|
||||
// marker other than earliest does nothing as we can just keep in
|
||||
// buffer.
|
||||
if ( marker < earliestMarker || marker >= n ) {
|
||||
throw new IllegalArgumentException("invalid marker: "+
|
||||
marker+" not in "+0+".."+n);
|
||||
}
|
||||
if ( marker == earliestMarker) earliestMarker = -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int index() {
|
||||
return 0;
|
||||
return p + bufferStartIndex;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seek(int index) {
|
||||
p = index;
|
||||
// index == to bufferStartIndex should set p to 0
|
||||
int i = index - bufferStartIndex;
|
||||
if ( i < 0 || i >= n ) {
|
||||
throw new UnsupportedOperationException("seek to index outside buffer: "+
|
||||
index+" not in "+bufferStartIndex+".."+(bufferStartIndex+n));
|
||||
}
|
||||
p = i;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -53,7 +53,8 @@ public interface IntStream {
|
|||
int mark();
|
||||
|
||||
/** Release requirement that stream holds tokens from marked location
|
||||
* to current index().
|
||||
* to current index(). Must release in reverse order (like stack)
|
||||
* of mark() otherwise undefined behavior.
|
||||
*/
|
||||
void release(int marker);
|
||||
|
||||
|
@ -65,18 +66,16 @@ public interface IntStream {
|
|||
|
||||
/** Set the input cursor to the position indicated by index. This is
|
||||
* normally used to rewind the input stream but can move forward as well.
|
||||
* It's up to the stream implementation to make sure that tokens are
|
||||
* buffered as necessary to make seek land on a valid token.
|
||||
* It's up to the stream implementation to make sure that symbols are
|
||||
* buffered as necessary to make seek land on a valid symbol.
|
||||
* Or, they should avoid moving the input cursor.
|
||||
*
|
||||
* For char streams, seeking forward must update the stream state such
|
||||
* as line number. For seeking backwards, you will be presumably
|
||||
* backtracking using the mark/rewind mechanism that restores state and
|
||||
* so this method does not need to update state when seeking backwards.
|
||||
*
|
||||
* The index is 0..n-1. A seek to position i means that LA(1) will
|
||||
* return the ith symbol. So, seeking to 0 means LA(1) will return the
|
||||
* first element in the stream.
|
||||
*
|
||||
* For unbuffered streams, index i might not be in buffer. That throws
|
||||
* index exception
|
||||
*/
|
||||
void seek(int index);
|
||||
|
||||
|
|
|
@ -76,6 +76,55 @@ public class TestUnbufferedInputStream extends BaseTest {
|
|||
assertEquals(CharStream.EOF, input.LA(3));
|
||||
}
|
||||
|
||||
@Test public void testBufferExpand() throws Exception {
|
||||
CharStream input = new ANTLRUnbufferedInputStream(
|
||||
new StringReader("01234"),
|
||||
2 // buff size 2
|
||||
);
|
||||
assertEquals('0', input.LA(1));
|
||||
assertEquals('1', input.LA(2));
|
||||
assertEquals('2', input.LA(3));
|
||||
assertEquals('3', input.LA(4));
|
||||
assertEquals('4', input.LA(5));
|
||||
assertEquals(CharStream.EOF, input.LA(6));
|
||||
}
|
||||
|
||||
@Test public void testBufferWrapSize1() throws Exception {
|
||||
CharStream input = new ANTLRUnbufferedInputStream(
|
||||
new StringReader("01234"),
|
||||
1 // buff size 1
|
||||
);
|
||||
assertEquals('0', input.LA(1));
|
||||
input.consume();
|
||||
assertEquals('1', input.LA(1));
|
||||
input.consume();
|
||||
assertEquals('2', input.LA(1));
|
||||
input.consume();
|
||||
assertEquals('3', input.LA(1));
|
||||
input.consume();
|
||||
assertEquals('4', input.LA(1));
|
||||
input.consume();
|
||||
assertEquals(CharStream.EOF, input.LA(1));
|
||||
}
|
||||
|
||||
@Test public void testBufferWrapSize2() throws Exception {
|
||||
CharStream input = new ANTLRUnbufferedInputStream(
|
||||
new StringReader("01234"),
|
||||
2 // buff size 2
|
||||
);
|
||||
assertEquals('0', input.LA(1));
|
||||
input.consume();
|
||||
assertEquals('1', input.LA(1));
|
||||
input.consume();
|
||||
assertEquals('2', input.LA(1));
|
||||
input.consume();
|
||||
assertEquals('3', input.LA(1));
|
||||
input.consume();
|
||||
assertEquals('4', input.LA(1));
|
||||
input.consume();
|
||||
assertEquals(CharStream.EOF, input.LA(1));
|
||||
}
|
||||
|
||||
@Test public void test1Mark() throws Exception {
|
||||
CharStream input = new ANTLRUnbufferedInputStream(
|
||||
new StringReader("xyz")
|
||||
|
@ -88,6 +137,24 @@ public class TestUnbufferedInputStream extends BaseTest {
|
|||
assertEquals(CharStream.EOF, input.LA(4));
|
||||
}
|
||||
|
||||
@Test public void test2Mark() throws Exception {
|
||||
CharStream input = new ANTLRUnbufferedInputStream(
|
||||
new StringReader("xyz"),
|
||||
2
|
||||
);
|
||||
assertEquals('x', input.LA(1));
|
||||
input.consume();
|
||||
int m1 = input.mark();
|
||||
assertEquals('y', input.LA(1));
|
||||
input.consume();
|
||||
int m2 = input.mark();
|
||||
assertEquals('z', input.LA(1));
|
||||
input.release(m2); // noop since not earliest in buf
|
||||
input.consume();
|
||||
input.release(m1);
|
||||
assertEquals(CharStream.EOF, input.LA(1));
|
||||
}
|
||||
|
||||
// @Test public void testFirstToken() throws Exception {
|
||||
// LexerGrammar g = new LexerGrammar(
|
||||
// "lexer grammar t;\n"+
|
||||
|
|
Loading…
Reference in New Issue