diff --git a/runtime/Java/src/org/antlr/v4/runtime/ANTLRFileStream.java b/runtime/Java/src/org/antlr/v4/runtime/ANTLRFileStream.java index 381b418f7..a50f51698 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/ANTLRFileStream.java +++ b/runtime/Java/src/org/antlr/v4/runtime/ANTLRFileStream.java @@ -33,13 +33,12 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; -/** This is a char buffer stream that is loaded from a file - * all at once when you construct the object. This looks very - * much like an ANTLReader or ANTLRInputStream, but it's a special case +/** This is an ANTLRInputStream that is loaded from a file + * all at once when you construct the object. This is a special case * since we know the exact size of the object to load. We can avoid lots * of data copying. */ -public class ANTLRFileStream extends ANTLRStringStream { +public class ANTLRFileStream extends ANTLRInputStream { protected String fileName; public ANTLRFileStream(String fileName) throws IOException { diff --git a/runtime/Java/src/org/antlr/v4/runtime/ANTLRInputStream.java b/runtime/Java/src/org/antlr/v4/runtime/ANTLRInputStream.java index 2aea7d951..c7951448a 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/ANTLRInputStream.java +++ b/runtime/Java/src/org/antlr/v4/runtime/ANTLRInputStream.java @@ -1,18 +1,19 @@ /* [The "BSD license"] - Copyright (c) 2005-2009 Terence Parr + Copyright (c) 2011 Terence Parr All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. + notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. + derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES @@ -32,99 +33,197 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; -/** Vacuum all input from a Reader/InputStream and then treat it like a StringStream. - * Manage the buffer manually to avoid unnecessary data copying. +/** Vacuum all input from a Reader/InputStream and then treat it like a char[] buffer. + * Can also pass in a string or char[] to use. * - * If you need encoding, use ANTLRInputStream. + * If you need encoding, pass in stream/reader with correct encoding. */ -public class ANTLRInputStream extends ANTLRStringStream { - public static final int READ_BUFFER_SIZE = 1024; - public static final int INITIAL_BUFFER_SIZE = 1024; +public class ANTLRInputStream implements CharStream { + public static final int READ_BUFFER_SIZE = 1024; + public static final int INITIAL_BUFFER_SIZE = 1024; - public ANTLRInputStream() { + /** The data being scanned */ + protected char[] data; + + /** How many characters are actually in the buffer */ + protected int n; + + /** 0..n-1 index into string of next char */ + protected int p=0; + + /** What is name or source of this char stream? */ + public String name; + + public ANTLRInputStream() { } + + /** Copy data in string to a local char array */ + public ANTLRInputStream(String input) { + this.data = input.toCharArray(); + this.n = input.length(); } - public ANTLRInputStream(Reader r) throws IOException { - this(r, INITIAL_BUFFER_SIZE, READ_BUFFER_SIZE); + /** This is the preferred constructor for strings as no data is copied */ + public ANTLRInputStream(char[] data, int numberOfActualCharsInArray) { + this.data = data; + this.n = numberOfActualCharsInArray; } - public ANTLRInputStream(Reader r, int size) throws IOException { - this(r, size, READ_BUFFER_SIZE); - } + public ANTLRInputStream(Reader r) throws IOException { + this(r, INITIAL_BUFFER_SIZE, READ_BUFFER_SIZE); + } - public ANTLRInputStream(Reader r, int size, int readChunkSize) throws IOException { - load(r, size, readChunkSize); - } + public ANTLRInputStream(Reader r, int size) throws IOException { + this(r, size, READ_BUFFER_SIZE); + } + + public ANTLRInputStream(Reader r, int size, int readChunkSize) throws IOException { + load(r, size, readChunkSize); + } public ANTLRInputStream(InputStream input) throws IOException { - this(input, null); + this(new InputStreamReader(input), INITIAL_BUFFER_SIZE); } public ANTLRInputStream(InputStream input, int size) throws IOException { - this(input, size, null); + this(new InputStreamReader(input), size); } - public ANTLRInputStream(InputStream input, String encoding) throws IOException { - this(input, INITIAL_BUFFER_SIZE, encoding); - } - - public ANTLRInputStream(InputStream input, int size, String encoding) throws IOException { - this(input, size, READ_BUFFER_SIZE, encoding); - } - - public ANTLRInputStream(InputStream input, - int size, - int readBufferSize, - String encoding) + public void load(Reader r, int size, int readChunkSize) throws IOException { - InputStreamReader isr; - if ( encoding!=null ) { - isr = new InputStreamReader(input, encoding); + if ( r==null ) { + return; } - else { - isr = new InputStreamReader(input); + if ( size<=0 ) { + size = INITIAL_BUFFER_SIZE; + } + if ( readChunkSize<=0 ) { + readChunkSize = READ_BUFFER_SIZE; + } + // System.out.println("load "+size+" in chunks of "+readChunkSize); + try { + // alloc initial buffer size. + data = new char[size]; + // read all the data in chunks of readChunkSize + int numRead=0; + int p = 0; + do { + if ( p+readChunkSize > data.length ) { // overflow? + // System.out.println("### overflow p="+p+", data.length="+data.length); + char[] newdata = new char[data.length*2]; // resize + System.arraycopy(data, 0, newdata, 0, data.length); + data = newdata; + } + numRead = r.read(data, p, readChunkSize); + // System.out.println("read "+numRead+" chars; p was "+p+" is now "+(p+numRead)); + p += numRead; + } while (numRead!=-1); // while not EOF + // set the actual size of the data available; + // EOF subtracted one above in p+=numRead; add one back + n = p+1; + //System.out.println("n="+n); + } + finally { + r.close(); } - load(isr, size, readBufferSize); } - public void load(Reader r, int size, int readChunkSize) - throws IOException - { - if ( r==null ) { + /** Reset the stream so that it's in the same state it was + * when the object was created *except* the data array is not + * touched. + */ + public void reset() { + p = 0; + } + + @Override + public void consume() { + //System.out.println("prev p="+p+", c="+(char)data[p]); + if ( p < n ) { + p++; + //System.out.println("p moves to "+p+" (c='"+(char)data[p]+"')"); + } + } + + @Override + public int LA(int i) { + if ( i==0 ) { + return 0; // undefined + } + if ( i<0 ) { + i++; // e.g., translate LA(-1) to use offset i=0; then data[p+0-1] + if ( (p+i-1) < 0 ) { + return CharStream.EOF; // invalid; no char before first char + } + } + + if ( (p+i-1) >= n ) { + //System.out.println("char LA("+i+")=EOF; p="+p); + return CharStream.EOF; + } + //System.out.println("char LA("+i+")="+(char)data[p+i-1]+"; p="+p); + //System.out.println("LA("+i+"); p="+p+" n="+n+" data.length="+data.length); + return data[p+i-1]; + } + + public int LT(int i) { + return LA(i); + } + + /** Return the current input symbol index 0..n where n indicates the + * last symbol has been read. The index is the index of char to + * be returned from LA(1). + */ + @Override + public int index() { + return p; + } + + @Override + public int size() { + return n; + } + + /** mark/release do nothing; we have entire buffer */ + @Override + public int mark() { + return -1; + } + + @Override + public void release(int marker) { + } + + /** consume() ahead until p==index; can't just set p=index as we must + * update line and charPositionInLine. If we seek backwards, just set p + */ + @Override + public void seek(int index) { + if ( index<=p ) { + p = index; // just jump; don't update stream state (line, ...) return; } - if ( size<=0 ) { - size = INITIAL_BUFFER_SIZE; - } - if ( readChunkSize<=0 ) { - readChunkSize = READ_BUFFER_SIZE; - } - // System.out.println("load "+size+" in chunks of "+readChunkSize); - try { - // alloc initial buffer size. - data = new char[size]; - // read all the data in chunks of readChunkSize - int numRead=0; - int p = 0; - do { - if ( p+readChunkSize > data.length ) { // overflow? - // System.out.println("### overflow p="+p+", data.length="+data.length); - char[] newdata = new char[data.length*2]; // resize - System.arraycopy(data, 0, newdata, 0, data.length); - data = newdata; - } - numRead = r.read(data, p, readChunkSize); - // System.out.println("read "+numRead+" chars; p was "+p+" is now "+(p+numRead)); - p += numRead; - } while (numRead!=-1); // while not EOF - // set the actual size of the data available; - // EOF subtracted one above in p+=numRead; add one back - super.n = p+1; - //System.out.println("n="+n); - } - finally { - r.close(); + // seek forward, consume until p hits index + while ( p= n ) stop = n-1; + int count = stop - start + 1; + if ( start >= n ) return ""; +// System.err.println("data: "+Arrays.toString(data)+", n="+n+ +// ", start="+start+ +// ", stop="+stop); + return new String(data, start, count); + } + + @Override + public String getSourceName() { + return name; + } + + public String toString() { return new String(data); } } diff --git a/runtime/Java/src/org/antlr/v4/runtime/ANTLRStringStream.java b/runtime/Java/src/org/antlr/v4/runtime/ANTLRStringStream.java deleted file mode 100644 index c78a79f3d..000000000 --- a/runtime/Java/src/org/antlr/v4/runtime/ANTLRStringStream.java +++ /dev/null @@ -1,163 +0,0 @@ -/* - [The "BSD license"] - Copyright (c) 2011 Terence Parr - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -package org.antlr.v4.runtime; - -/** A pretty quick CharStream that pulls all data from an array - * directly. Every method call counts in the lexer. Java's - * strings aren't very good so I'm avoiding. - */ -public class ANTLRStringStream implements CharStream { - /** The data being scanned */ - protected char[] data; - - /** How many characters are actually in the buffer */ - protected int n; - - /** 0..n-1 index into string of next char */ - protected int p=0; - - /** What is name or source of this char stream? */ - public String name; - - public ANTLRStringStream() { - } - - /** Copy data in string to a local char array */ - public ANTLRStringStream(String input) { - this(); - this.data = input.toCharArray(); - this.n = input.length(); - } - - /** This is the preferred constructor as no data is copied */ - public ANTLRStringStream(char[] data, int numberOfActualCharsInArray) { - this(); - this.data = data; - this.n = numberOfActualCharsInArray; - } - - /** Reset the stream so that it's in the same state it was - * when the object was created *except* the data array is not - * touched. - */ - public void reset() { - p = 0; - } - - @Override - public void consume() { - //System.out.println("prev p="+p+", c="+(char)data[p]); - if ( p < n ) { - p++; - //System.out.println("p moves to "+p+" (c='"+(char)data[p]+"')"); - } - } - - @Override - public int LA(int i) { - if ( i==0 ) { - return 0; // undefined - } - if ( i<0 ) { - i++; // e.g., translate LA(-1) to use offset i=0; then data[p+0-1] - if ( (p+i-1) < 0 ) { - return CharStream.EOF; // invalid; no char before first char - } - } - - if ( (p+i-1) >= n ) { - //System.out.println("char LA("+i+")=EOF; p="+p); - return CharStream.EOF; - } - //System.out.println("char LA("+i+")="+(char)data[p+i-1]+"; p="+p); - //System.out.println("LA("+i+"); p="+p+" n="+n+" data.length="+data.length); - return data[p+i-1]; - } - - public int LT(int i) { - return LA(i); - } - - /** Return the current input symbol index 0..n where n indicates the - * last symbol has been read. The index is the index of char to - * be returned from LA(1). - */ - @Override - public int index() { - return p; - } - - @Override - public int size() { - return n; - } - - /** mark/release do nothing; we have entire buffer */ - @Override - public int mark() { - return -1; - } - - @Override - public void release(int marker) { - } - - /** consume() ahead until p==index; can't just set p=index as we must - * update line and charPositionInLine. If we seek backwards, just set p - */ - @Override - public void seek(int index) { - if ( index<=p ) { - p = index; // just jump; don't update stream state (line, ...) - return; - } - // seek forward, consume until p hits index - while ( p= n ) stop = n-1; - int count = stop - start + 1; - if ( start >= n ) return ""; -// System.err.println("data: "+Arrays.toString(data)+", n="+n+ -// ", start="+start+ -// ", stop="+stop); - return new String(data, start, count); - } - - @Override - public String getSourceName() { - return name; - } - - public String toString() { return new String(data); } -} diff --git a/runtime/Java/src/org/antlr/v4/runtime/CharStream.java b/runtime/Java/src/org/antlr/v4/runtime/CharStream.java index 5bff93459..8269f6d56 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/CharStream.java +++ b/runtime/Java/src/org/antlr/v4/runtime/CharStream.java @@ -31,9 +31,8 @@ package org.antlr.v4.runtime; /** A source of characters for an ANTLR lexer */ public interface CharStream extends IntStream { public static final int EOF = -1; - public static final int INVALID_CHAR = -2; - /** For infinite streams, you don't need this; primarily I'm providing + /** For unbuffered streams, you can't use this; primarily I'm providing * a useful interface for action code. Just make sure actions don't * use this on streams that don't support it. */ diff --git a/tool/src/org/antlr/v4/tool/interp/LexerInterpreter.java b/tool/src/org/antlr/v4/tool/interp/LexerInterpreter.java index 2f9965f25..6a613cdfd 100644 --- a/tool/src/org/antlr/v4/tool/interp/LexerInterpreter.java +++ b/tool/src/org/antlr/v4/tool/interp/LexerInterpreter.java @@ -51,7 +51,7 @@ public class LexerInterpreter implements TokenSource { } public void setInput(String inputString) { - input = new ANTLRStringStream(inputString); + input = new ANTLRInputStream(inputString); } public void setInput(CharStream input) { diff --git a/tool/test/org/antlr/v4/test/BaseTest.java b/tool/test/org/antlr/v4/test/BaseTest.java index b056203cd..f64bc77e6 100644 --- a/tool/test/org/antlr/v4/test/BaseTest.java +++ b/tool/test/org/antlr/v4/test/BaseTest.java @@ -33,15 +33,24 @@ import org.antlr.v4.automata.*; import org.antlr.v4.codegen.CodeGenerator; import org.antlr.v4.misc.Utils; import org.antlr.v4.runtime.*; -import org.antlr.v4.runtime.atn.*; +import org.antlr.v4.runtime.atn.ATN; +import org.antlr.v4.runtime.atn.ATNState; +import org.antlr.v4.runtime.atn.DecisionState; +import org.antlr.v4.runtime.atn.LexerATNSimulator; import org.antlr.v4.runtime.dfa.DFA; import org.antlr.v4.semantics.SemanticPipeline; import org.antlr.v4.tool.*; -import org.antlr.v4.tool.Rule; -import org.junit.*; -import org.stringtemplate.v4.*; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.stringtemplate.v4.ST; +import org.stringtemplate.v4.STGroup; +import org.stringtemplate.v4.STGroupString; -import javax.tools.*; +import javax.tools.JavaCompiler; +import javax.tools.JavaFileObject; +import javax.tools.StandardJavaFileManager; +import javax.tools.ToolProvider; import java.io.*; import java.util.*; @@ -142,7 +151,7 @@ public abstract class BaseTest { } public List getTokenTypesViaATN(String input, LexerATNSimulator lexerATN) { - ANTLRStringStream in = new ANTLRStringStream(input); + ANTLRInputStream in = new ANTLRInputStream(input); List tokenTypes = new ArrayList(); int ttype = 0; do { diff --git a/tool/test/org/antlr/v4/test/TestATNLexerInterpreter.java b/tool/test/org/antlr/v4/test/TestATNLexerInterpreter.java index 91d36e7e5..7bc58c282 100644 --- a/tool/test/org/antlr/v4/test/TestATNLexerInterpreter.java +++ b/tool/test/org/antlr/v4/test/TestATNLexerInterpreter.java @@ -1,9 +1,13 @@ package org.antlr.v4.test; import org.antlr.v4.misc.Utils; -import org.antlr.v4.runtime.*; -import org.antlr.v4.runtime.atn.*; -import org.antlr.v4.tool.*; +import org.antlr.v4.runtime.ANTLRInputStream; +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.LexerRecognitionExeption; +import org.antlr.v4.runtime.atn.ATN; +import org.antlr.v4.runtime.atn.ATNState; +import org.antlr.v4.tool.DOTGenerator; +import org.antlr.v4.tool.LexerGrammar; import org.junit.Test; import java.util.List; @@ -242,7 +246,7 @@ public class TestATNLexerInterpreter extends BaseTest { protected LexerRecognitionExeption checkLexerMatches(LexerGrammar lg, String inputString, String expecting) { ATN atn = createATN(lg); - CharStream input = new ANTLRStringStream(inputString); + CharStream input = new ANTLRInputStream(inputString); ATNState startState = atn.modeNameToStartState.get("DEFAULT_MODE"); DOTGenerator dot = new DOTGenerator(lg); System.out.println(dot.getDOT(startState, true)); diff --git a/tool/test/org/antlr/v4/test/TestCommonTokenStream.java b/tool/test/org/antlr/v4/test/TestCommonTokenStream.java index b6d5a7724..dcb0d0ae9 100644 --- a/tool/test/org/antlr/v4/test/TestCommonTokenStream.java +++ b/tool/test/org/antlr/v4/test/TestCommonTokenStream.java @@ -47,7 +47,7 @@ public class TestCommonTokenStream extends BaseTest { "WS : ' '+;\n"); // Tokens: 012345678901234567 // Input: x = 3 * 0 + 2 * 0; - CharStream input = new ANTLRStringStream("x = 3 * 0 + 2 * 0;"); + CharStream input = new ANTLRInputStream("x = 3 * 0 + 2 * 0;"); LexerInterpreter lexEngine = new LexerInterpreter(g); lexEngine.setInput(input); BufferedTokenStream tokens = new BufferedTokenStream(lexEngine); @@ -69,7 +69,7 @@ public class TestCommonTokenStream extends BaseTest { "WS : ' '+;\n"); // Tokens: 012345678901234567 // Input: x = 3 * 0 + 2 * 0; - CharStream input = new ANTLRStringStream("x = 3 * 0 + 2 * 0;"); + CharStream input = new ANTLRInputStream("x = 3 * 0 + 2 * 0;"); LexerInterpreter lexEngine = new LexerInterpreter(g); lexEngine.setInput(input); BufferedTokenStream tokens = new BufferedTokenStream(lexEngine); @@ -91,7 +91,7 @@ public class TestCommonTokenStream extends BaseTest { "WS : ' '+;\n"); // Tokens: 012345678901234567 // Input: x = 3 * 0 + 2 * 0; - CharStream input = new ANTLRStringStream("x = 3 * 0 + 2 * 0;"); + CharStream input = new ANTLRInputStream("x = 3 * 0 + 2 * 0;"); LexerInterpreter lexEngine = new LexerInterpreter(g); lexEngine.setInput(input); BufferedTokenStream tokens = new BufferedTokenStream(lexEngine); @@ -122,7 +122,7 @@ public class TestCommonTokenStream extends BaseTest { "WS : ' '+;\n"); // Tokens: 012345678901234567 // Input: x = 3 * 0 + 2 * 0; - CharStream input = new ANTLRStringStream("x = 3 * 0 + 2 * 0;"); + CharStream input = new ANTLRInputStream("x = 3 * 0 + 2 * 0;"); LexerInterpreter lexEngine = new LexerInterpreter(g); lexEngine.setInput(input); BufferedTokenStream tokens = new BufferedTokenStream(lexEngine); @@ -154,7 +154,7 @@ public class TestCommonTokenStream extends BaseTest { "WS : ' '+;\n"); // Tokens: 012345678901234567 // Input: x = 3 * 0 + 2 * 0; - CharStream input = new ANTLRStringStream("x = 3 * 0 + 2 * 0;"); + CharStream input = new ANTLRInputStream("x = 3 * 0 + 2 * 0;"); LexerInterpreter lexEngine = new LexerInterpreter(g); lexEngine.setInput(input); BufferedTokenStream tokens = new BufferedTokenStream(lexEngine);