forked from jasder/antlr
unbuff char stream working
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9425]
This commit is contained in:
parent
45dc683a91
commit
31c24292cd
|
@ -36,7 +36,7 @@ import java.io.Reader;
|
||||||
|
|
||||||
public class ANTLRUnbufferedInputStream implements CharStream {
|
public class ANTLRUnbufferedInputStream implements CharStream {
|
||||||
/** A buffer of the data being scanned */
|
/** A buffer of the data being scanned */
|
||||||
protected int[] data = new int[256];
|
protected char[] data;
|
||||||
|
|
||||||
/** How many characters are actually in the buffer */
|
/** How many characters are actually in the buffer */
|
||||||
protected int n;
|
protected int n;
|
||||||
|
@ -44,12 +44,15 @@ public class ANTLRUnbufferedInputStream implements CharStream {
|
||||||
/** 0..n-1 index into string of next char */
|
/** 0..n-1 index into string of next char */
|
||||||
protected int p=0;
|
protected int p=0;
|
||||||
|
|
||||||
protected int minMarker = -1;
|
protected int earliestMarker = -1;
|
||||||
|
|
||||||
/** Absolute char index. It's the index of the char about to be
|
/** Absolute char index. It's the index of the char about to be
|
||||||
* read via LA(1). Goes from 0 to numchar-1.
|
* read via LA(1). Goes from 0 to numchar-1.
|
||||||
*/
|
*/
|
||||||
protected int currentElementIndex = 0;
|
protected int currentCharIndex = 0;
|
||||||
|
|
||||||
|
/** Buf is window into stream. This is absolute index of data[0] */
|
||||||
|
protected int bufferStartIndex = 0;
|
||||||
|
|
||||||
protected Reader input;
|
protected Reader input;
|
||||||
|
|
||||||
|
@ -57,29 +60,42 @@ public class ANTLRUnbufferedInputStream implements CharStream {
|
||||||
public String name;
|
public String name;
|
||||||
|
|
||||||
public ANTLRUnbufferedInputStream(InputStream input) {
|
public ANTLRUnbufferedInputStream(InputStream input) {
|
||||||
this.input = new InputStreamReader(input);
|
this(input, 256);
|
||||||
}
|
}
|
||||||
|
|
||||||
public ANTLRUnbufferedInputStream(Reader input) {
|
public ANTLRUnbufferedInputStream(Reader input) {
|
||||||
|
this(input, 256);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ANTLRUnbufferedInputStream(InputStream input, int bufferSize) {
|
||||||
|
this.input = new InputStreamReader(input);
|
||||||
|
data = new char[bufferSize];
|
||||||
|
}
|
||||||
|
|
||||||
|
public ANTLRUnbufferedInputStream(Reader input, int bufferSize) {
|
||||||
this.input = input;
|
this.input = input;
|
||||||
|
data = new char[bufferSize];
|
||||||
}
|
}
|
||||||
|
|
||||||
public void reset() {
|
public void reset() {
|
||||||
p = 0;
|
p = 0;
|
||||||
minMarker = -1;
|
earliestMarker = -1;
|
||||||
currentElementIndex = 0;
|
currentCharIndex = 0;
|
||||||
|
bufferStartIndex = 0;
|
||||||
n = 0;
|
n = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void consume() {
|
public void consume() {
|
||||||
sync(1);
|
|
||||||
p++;
|
p++;
|
||||||
|
currentCharIndex++;
|
||||||
// have we hit end of buffer when no markers?
|
// have we hit end of buffer when no markers?
|
||||||
if ( p==n && minMarker<0 ) {
|
if ( p==n && earliestMarker < 0 ) {
|
||||||
// if so, it's an opportunity to start filling at index 0 again
|
// if so, it's an opportunity to start filling at index 0 again
|
||||||
|
// System.out.println("p=="+n+", no marker; reset buf start index="+currentCharIndex);
|
||||||
p = 0;
|
p = 0;
|
||||||
n = 0;
|
n = 0;
|
||||||
|
bufferStartIndex = currentCharIndex;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -106,12 +122,12 @@ public class ANTLRUnbufferedInputStream implements CharStream {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void add(int c) {
|
protected void add(int c) {
|
||||||
if ( p>=data.length ) {
|
if ( n>=data.length ) {
|
||||||
int[] newdata = new int[data.length*2]; // resize
|
char[] newdata = new char[data.length*2]; // resize
|
||||||
System.arraycopy(data, 0, newdata, 0, data.length);
|
System.arraycopy(data, 0, newdata, 0, data.length);
|
||||||
data = newdata;
|
data = newdata;
|
||||||
}
|
}
|
||||||
data[n++] = c;
|
data[n++] = (char)c;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -120,32 +136,52 @@ public class ANTLRUnbufferedInputStream implements CharStream {
|
||||||
int index = p + i - 1;
|
int index = p + i - 1;
|
||||||
if ( index < 0 ) throw new IndexOutOfBoundsException();
|
if ( index < 0 ) throw new IndexOutOfBoundsException();
|
||||||
if ( index > n ) return CharStream.EOF;
|
if ( index > n ) return CharStream.EOF;
|
||||||
return data[index];
|
int c = data[index];
|
||||||
|
if ( c==(char)CharStream.EOF ) return CharStream.EOF;
|
||||||
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Return a marker that we can release later. Marker happens to be
|
||||||
|
* index into buffer (not index()).
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public int mark() {
|
public int mark() {
|
||||||
int m = p;
|
int m = p;
|
||||||
if ( p < minMarker ) {
|
if ( p < earliestMarker) {
|
||||||
throw new IllegalArgumentException("can't set marker earlier than previous existing marker: "+p+"<"+minMarker);
|
// they must have done seek to before min marker
|
||||||
|
throw new IllegalArgumentException("can't set marker earlier than previous existing marker: "+p+"<"+ earliestMarker);
|
||||||
}
|
}
|
||||||
if ( minMarker<0 ) minMarker = m; // set first marker
|
if ( earliestMarker < 0 ) earliestMarker = m; // set first marker
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void release(int marker) {
|
public void release(int marker) {
|
||||||
if ( marker == minMarker ) minMarker = -1;
|
// release is noop unless we remove earliest. then we don't need to
|
||||||
|
// keep anything in buffer. We only care about earliest. Releasing
|
||||||
|
// marker other than earliest does nothing as we can just keep in
|
||||||
|
// buffer.
|
||||||
|
if ( marker < earliestMarker || marker >= n ) {
|
||||||
|
throw new IllegalArgumentException("invalid marker: "+
|
||||||
|
marker+" not in "+0+".."+n);
|
||||||
|
}
|
||||||
|
if ( marker == earliestMarker) earliestMarker = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int index() {
|
public int index() {
|
||||||
return 0;
|
return p + bufferStartIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void seek(int index) {
|
public void seek(int index) {
|
||||||
p = index;
|
// index == to bufferStartIndex should set p to 0
|
||||||
|
int i = index - bufferStartIndex;
|
||||||
|
if ( i < 0 || i >= n ) {
|
||||||
|
throw new UnsupportedOperationException("seek to index outside buffer: "+
|
||||||
|
index+" not in "+bufferStartIndex+".."+(bufferStartIndex+n));
|
||||||
|
}
|
||||||
|
p = i;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -53,7 +53,8 @@ public interface IntStream {
|
||||||
int mark();
|
int mark();
|
||||||
|
|
||||||
/** Release requirement that stream holds tokens from marked location
|
/** Release requirement that stream holds tokens from marked location
|
||||||
* to current index().
|
* to current index(). Must release in reverse order (like stack)
|
||||||
|
* of mark() otherwise undefined behavior.
|
||||||
*/
|
*/
|
||||||
void release(int marker);
|
void release(int marker);
|
||||||
|
|
||||||
|
@ -65,18 +66,16 @@ public interface IntStream {
|
||||||
|
|
||||||
/** Set the input cursor to the position indicated by index. This is
|
/** Set the input cursor to the position indicated by index. This is
|
||||||
* normally used to rewind the input stream but can move forward as well.
|
* normally used to rewind the input stream but can move forward as well.
|
||||||
* It's up to the stream implementation to make sure that tokens are
|
* It's up to the stream implementation to make sure that symbols are
|
||||||
* buffered as necessary to make seek land on a valid token.
|
* buffered as necessary to make seek land on a valid symbol.
|
||||||
* Or, they should avoid moving the input cursor.
|
* Or, they should avoid moving the input cursor.
|
||||||
*
|
*
|
||||||
* For char streams, seeking forward must update the stream state such
|
|
||||||
* as line number. For seeking backwards, you will be presumably
|
|
||||||
* backtracking using the mark/rewind mechanism that restores state and
|
|
||||||
* so this method does not need to update state when seeking backwards.
|
|
||||||
*
|
|
||||||
* The index is 0..n-1. A seek to position i means that LA(1) will
|
* The index is 0..n-1. A seek to position i means that LA(1) will
|
||||||
* return the ith symbol. So, seeking to 0 means LA(1) will return the
|
* return the ith symbol. So, seeking to 0 means LA(1) will return the
|
||||||
* first element in the stream.
|
* first element in the stream.
|
||||||
|
*
|
||||||
|
* For unbuffered streams, index i might not be in buffer. That throws
|
||||||
|
* index exception
|
||||||
*/
|
*/
|
||||||
void seek(int index);
|
void seek(int index);
|
||||||
|
|
||||||
|
|
|
@ -76,6 +76,55 @@ public class TestUnbufferedInputStream extends BaseTest {
|
||||||
assertEquals(CharStream.EOF, input.LA(3));
|
assertEquals(CharStream.EOF, input.LA(3));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test public void testBufferExpand() throws Exception {
|
||||||
|
CharStream input = new ANTLRUnbufferedInputStream(
|
||||||
|
new StringReader("01234"),
|
||||||
|
2 // buff size 2
|
||||||
|
);
|
||||||
|
assertEquals('0', input.LA(1));
|
||||||
|
assertEquals('1', input.LA(2));
|
||||||
|
assertEquals('2', input.LA(3));
|
||||||
|
assertEquals('3', input.LA(4));
|
||||||
|
assertEquals('4', input.LA(5));
|
||||||
|
assertEquals(CharStream.EOF, input.LA(6));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test public void testBufferWrapSize1() throws Exception {
|
||||||
|
CharStream input = new ANTLRUnbufferedInputStream(
|
||||||
|
new StringReader("01234"),
|
||||||
|
1 // buff size 1
|
||||||
|
);
|
||||||
|
assertEquals('0', input.LA(1));
|
||||||
|
input.consume();
|
||||||
|
assertEquals('1', input.LA(1));
|
||||||
|
input.consume();
|
||||||
|
assertEquals('2', input.LA(1));
|
||||||
|
input.consume();
|
||||||
|
assertEquals('3', input.LA(1));
|
||||||
|
input.consume();
|
||||||
|
assertEquals('4', input.LA(1));
|
||||||
|
input.consume();
|
||||||
|
assertEquals(CharStream.EOF, input.LA(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test public void testBufferWrapSize2() throws Exception {
|
||||||
|
CharStream input = new ANTLRUnbufferedInputStream(
|
||||||
|
new StringReader("01234"),
|
||||||
|
2 // buff size 2
|
||||||
|
);
|
||||||
|
assertEquals('0', input.LA(1));
|
||||||
|
input.consume();
|
||||||
|
assertEquals('1', input.LA(1));
|
||||||
|
input.consume();
|
||||||
|
assertEquals('2', input.LA(1));
|
||||||
|
input.consume();
|
||||||
|
assertEquals('3', input.LA(1));
|
||||||
|
input.consume();
|
||||||
|
assertEquals('4', input.LA(1));
|
||||||
|
input.consume();
|
||||||
|
assertEquals(CharStream.EOF, input.LA(1));
|
||||||
|
}
|
||||||
|
|
||||||
@Test public void test1Mark() throws Exception {
|
@Test public void test1Mark() throws Exception {
|
||||||
CharStream input = new ANTLRUnbufferedInputStream(
|
CharStream input = new ANTLRUnbufferedInputStream(
|
||||||
new StringReader("xyz")
|
new StringReader("xyz")
|
||||||
|
@ -88,6 +137,24 @@ public class TestUnbufferedInputStream extends BaseTest {
|
||||||
assertEquals(CharStream.EOF, input.LA(4));
|
assertEquals(CharStream.EOF, input.LA(4));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test public void test2Mark() throws Exception {
|
||||||
|
CharStream input = new ANTLRUnbufferedInputStream(
|
||||||
|
new StringReader("xyz"),
|
||||||
|
2
|
||||||
|
);
|
||||||
|
assertEquals('x', input.LA(1));
|
||||||
|
input.consume();
|
||||||
|
int m1 = input.mark();
|
||||||
|
assertEquals('y', input.LA(1));
|
||||||
|
input.consume();
|
||||||
|
int m2 = input.mark();
|
||||||
|
assertEquals('z', input.LA(1));
|
||||||
|
input.release(m2); // noop since not earliest in buf
|
||||||
|
input.consume();
|
||||||
|
input.release(m1);
|
||||||
|
assertEquals(CharStream.EOF, input.LA(1));
|
||||||
|
}
|
||||||
|
|
||||||
// @Test public void testFirstToken() throws Exception {
|
// @Test public void testFirstToken() throws Exception {
|
||||||
// LexerGrammar g = new LexerGrammar(
|
// LexerGrammar g = new LexerGrammar(
|
||||||
// "lexer grammar t;\n"+
|
// "lexer grammar t;\n"+
|
||||||
|
|
Loading…
Reference in New Issue