Improve memory usage of CodePointCharStream: Use 8-bit, 16-bit, or 32-bit buffer

2017-03-21 18:04:11 -07:00 · 2017-03-21 18:04:11 -07:00 · ab0655598e
parent de4d129921
commit ab0655598e
8 changed files with 759 additions and 631 deletions
--- a/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestCodePointCharStream.java
+++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestCodePointCharStream.java
@ -3,17 +3,14 @@
 * Use of this file is governed by the BSD 3-clause license that
 * can be found in the LICENSE.txt file in the project root.
 */
-package org.antlr.v4.test.runtime.java;
+package org.antlr.v4.runtime;

 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;

 import java.io.IOException;
 import java.nio.IntBuffer;

-import org.antlr.v4.runtime.CharStreams;
-import org.antlr.v4.runtime.CodePointCharStream;
-import org.antlr.v4.runtime.IntStream;
-
 import org.antlr.v4.runtime.misc.Interval;

 import org.junit.Rule;
@ -291,4 +288,25 @@ public class TestCodePointCharStream {
 		s.seek(6);
 		assertEquals(0x1F522, s.LA(-1));
 	}
+
+	@Test
+	public void asciiContentsShouldUse8BitBuffer() {
+		CodePointCharStream s = CharStreams.fromString("hello");
+		assertTrue(s.getInternalStorage() instanceof byte[]);
+		assertEquals(5, s.size());
+	}
+
+	@Test
+	public void bmpContentsShouldUse16BitBuffer() {
+		CodePointCharStream s = CharStreams.fromString("hello \u4E16\u754C");
+		assertTrue(s.getInternalStorage() instanceof char[]);
+		assertEquals(8, s.size());
+	}
+
+	@Test
+	public void smpContentsShouldUse32BitBuffer() {
+		CodePointCharStream s = CharStreams.fromString("hello \uD83C\uDF0D");
+		assertTrue(s.getInternalStorage() instanceof int[]);
+		assertEquals(7, s.size());
+	}
 }
--- a/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestCharStreams.java
+++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestCharStreams.java
@ -137,9 +137,9 @@ public class TestCharStreams {
 		try (SeekableByteChannel c = Files.newByteChannel(p)) {
 			CharStream s = CharStreams.fromChannel(
 					c, 4096, CodingErrorAction.REPLACE, "foo");
-			assertEquals(3, s.size());
+			assertEquals(4, s.size());
 			assertEquals(0, s.index());
-			assertEquals("\uFFFD\uFFFD\uFFFD", s.toString());
+			assertEquals("\uFFFD\uFFFD\uFFFD\uFFFD", s.toString());
 		}
 	}

--- a/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestUTF8CodePointDecoder.java
+++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestUTF8CodePointDecoder.java
@ -1,162 +0,0 @@
-/*
- * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
- * Use of this file is governed by the BSD 3-clause license that
- * can be found in the LICENSE.txt file in the project root.
- */
-package org.antlr.v4.test.runtime.java;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.IntBuffer;
-
-import java.nio.charset.CharacterCodingException;
-import java.nio.charset.CodingErrorAction;
-import java.nio.charset.StandardCharsets;
-
-import org.antlr.v4.runtime.UTF8CodePointDecoder;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-
-public class TestUTF8CodePointDecoder {
-	@Rule
-	public ExpectedException thrown = ExpectedException.none();
-
-	@Test
-	public void decodeEmptyByteBufferWritesNothing() throws Exception {
-		UTF8CodePointDecoder decoder = new UTF8CodePointDecoder(CodingErrorAction.REPLACE);
-		ByteBuffer utf8BytesIn = ByteBuffer.allocate(0);
-		IntBuffer codePointsOut = IntBuffer.allocate(0);
-		IntBuffer result = decoder.decodeCodePointsFromBuffer(
-				utf8BytesIn,
-				codePointsOut,
-				true);
-		result.flip();
-		assertEquals(0, result.remaining());
-	}
-
-	@Test
-	public void decodeLatinByteBufferWritesCodePoint() throws Exception {
-		UTF8CodePointDecoder decoder = new UTF8CodePointDecoder(CodingErrorAction.REPLACE);
-		ByteBuffer utf8BytesIn = StandardCharsets.UTF_8.encode("X");
-		IntBuffer codePointsOut = IntBuffer.allocate(1);
-		IntBuffer result = decoder.decodeCodePointsFromBuffer(
-				utf8BytesIn,
-				codePointsOut,
-				true);
-		result.flip();
-		assertEquals(1, result.remaining());
-		assertEquals('X', result.get(0));
-	}
-
-	@Test
-	public void decodeCyrillicByteBufferWritesCodePoint() throws Exception {
-		UTF8CodePointDecoder decoder = new UTF8CodePointDecoder(CodingErrorAction.REPLACE);
-		ByteBuffer utf8BytesIn = StandardCharsets.UTF_8.encode("\u042F");
-		IntBuffer codePointsOut = IntBuffer.allocate(1);
-		IntBuffer result = decoder.decodeCodePointsFromBuffer(
-				utf8BytesIn,
-				codePointsOut,
-				true);
-		result.flip();
-		assertEquals(1, result.remaining());
-		assertEquals(0x042F, result.get(0));
-	}
-
-	@Test
-	public void decodeCJKByteBufferWritesCodePoint() throws Exception {
-		UTF8CodePointDecoder decoder = new UTF8CodePointDecoder(CodingErrorAction.REPLACE);
-		ByteBuffer utf8BytesIn = StandardCharsets.UTF_8.encode("\u611B");
-		IntBuffer codePointsOut = IntBuffer.allocate(1);
-		IntBuffer result = decoder.decodeCodePointsFromBuffer(
-				utf8BytesIn,
-				codePointsOut,
-				true);
-		result.flip();
-		assertEquals(1, result.remaining());
-		assertEquals(0x611B, result.get(0));
-	}
-
-	@Test
-	public void decodeEmojiByteBufferWritesCodePoint() throws Exception {
-		UTF8CodePointDecoder decoder = new UTF8CodePointDecoder(CodingErrorAction.REPLACE);
-		ByteBuffer utf8BytesIn = StandardCharsets.UTF_8.encode(
-				new StringBuilder().appendCodePoint(0x1F4A9).toString()
-		);
-		IntBuffer codePointsOut = IntBuffer.allocate(1);
-		IntBuffer result = decoder.decodeCodePointsFromBuffer(
-				utf8BytesIn,
-				codePointsOut,
-				true);
-		result.flip();
-		assertEquals(1, result.remaining());
-		assertEquals(0x1F4A9, result.get(0));
-	}
-
-	@Test
-	public void decodingInvalidLeadInReplaceModeWritesSubstitutionCharacter() throws Exception {
-		UTF8CodePointDecoder decoder = new UTF8CodePointDecoder(CodingErrorAction.REPLACE);
-		ByteBuffer utf8BytesIn = ByteBuffer.wrap(new byte[] { (byte)0xF8 });
-		IntBuffer codePointsOut = IntBuffer.allocate(1);
-		IntBuffer result = decoder.decodeCodePointsFromBuffer(utf8BytesIn, codePointsOut, true);
-		result.flip();
-		assertEquals(1, result.remaining());
-		assertEquals(0xFFFD, result.get(0));
-	}
-
-	@Test
-	public void decodingInvalidLeadInReportModeThrows() throws Exception {
-		UTF8CodePointDecoder decoder = new UTF8CodePointDecoder(CodingErrorAction.REPORT);
-		ByteBuffer utf8BytesIn = ByteBuffer.wrap(new byte[] { (byte)0xF8 });
-		IntBuffer codePointsOut = IntBuffer.allocate(1);
-		thrown.expect(CharacterCodingException.class);
-		thrown.expectMessage("Invalid UTF-8 leading byte 0xF8");
-		decoder.decodeCodePointsFromBuffer(utf8BytesIn, codePointsOut, true);
-	}
-
-	@Test
-	public void decodingInvalidTrailInReplaceModeWritesSubstitutionCharacter() throws Exception {
-		UTF8CodePointDecoder decoder = new UTF8CodePointDecoder(CodingErrorAction.REPLACE);
-		ByteBuffer utf8BytesIn = ByteBuffer.wrap(new byte[] { (byte)0xC0, (byte)0xC0 });
-		IntBuffer codePointsOut = IntBuffer.allocate(1);
-		IntBuffer result = decoder.decodeCodePointsFromBuffer(utf8BytesIn, codePointsOut, true);
-		result.flip();
-		assertEquals(1, result.remaining());
-		assertEquals(0xFFFD, result.get(0));
-	}
-
-	@Test
-	public void decodingInvalidTrailInReportModeThrows() throws Exception {
-		UTF8CodePointDecoder decoder = new UTF8CodePointDecoder(CodingErrorAction.REPORT);
-		ByteBuffer utf8BytesIn = ByteBuffer.wrap(new byte[] { (byte)0xC0, (byte)0xC0 });
-		IntBuffer codePointsOut = IntBuffer.allocate(1);
-		thrown.expect(CharacterCodingException.class);
-		thrown.expectMessage("Invalid UTF-8 trailing byte 0xC0");
-		decoder.decodeCodePointsFromBuffer(utf8BytesIn, codePointsOut, true);
-	}
-
-	@Test
-	public void decodingNonShortestFormInReplaceModeWritesSubstitutionCharacter() throws Exception {
-		UTF8CodePointDecoder decoder = new UTF8CodePointDecoder(CodingErrorAction.REPLACE);
-		// 0xC1 0x9C would decode to \ (U+005C) if we didn't have this check
-		ByteBuffer utf8BytesIn = ByteBuffer.wrap(new byte[] { (byte)0xC1, (byte)0x9C });
-		IntBuffer codePointsOut = IntBuffer.allocate(1);
-		IntBuffer result = decoder.decodeCodePointsFromBuffer(utf8BytesIn, codePointsOut, true);
-		result.flip();
-		assertEquals(1, result.remaining());
-		assertEquals(0xFFFD, result.get(0));
-	}
-
-	@Test
-	public void decodingNonShortestFormInReportModeThrows() throws Exception {
-		UTF8CodePointDecoder decoder = new UTF8CodePointDecoder(CodingErrorAction.REPORT);
-		// 0xC1 0x9C would decode to \ (U+005C) if we didn't have this check
-		ByteBuffer utf8BytesIn = ByteBuffer.wrap(new byte[] { (byte)0xC1, (byte)0x9C });
-		IntBuffer codePointsOut = IntBuffer.allocate(1);
-		thrown.expect(CharacterCodingException.class);
-		thrown.expectMessage("Code point 92 is out of expected range 128..2047");
-		decoder.decodeCodePointsFromBuffer(utf8BytesIn, codePointsOut, true);
-	}
-}
--- a/runtime-testsuite/test/org/antlr/v4/test/runtime/java/api/perf/TimeLexerSpeed.java
+++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/api/perf/TimeLexerSpeed.java
@ -13,9 +13,11 @@ import java.io.BufferedReader;
 import java.io.File;
 import java.io.InputStream;
 import java.io.InputStreamReader;
+import java.io.IOException;
 import java.lang.management.ManagementFactory;
 import java.lang.management.RuntimeMXBean;
 import java.net.URL;
+import java.net.URLConnection;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Path;
 import java.nio.file.Paths;
@ -331,10 +333,12 @@ public class TimeLexerSpeed { // don't call it Test else it'll run during "mvn t
 		for (int i = 0; i<n; i++) {
 			streams[i] = loader.getResourceAsStream(resourceName);
 		}
+		URLConnection uc = null;
+		long streamLength = getResourceSize(loader, resourceName);
 		long start = System.nanoTime(); // track only time to suck data out of stream
 		for (int i = 0; i<n; i++) {
 			try (InputStream is = streams[i]) {
-				input[i] = CharStreams.fromStream(is);
+				input[i] = CharStreams.fromStream(is, StandardCharsets.UTF_8, streamLength);
 			}
 		}
 		long stop = System.nanoTime();
@ -370,8 +374,10 @@ public class TimeLexerSpeed { // don't call it Test else it'll run during "mvn t
 	}

 	public void lex_new_java_utf8(int n, boolean clearLexerDFACache) throws Exception {
-		try (InputStream is = TimeLexerSpeed.class.getClassLoader().getResourceAsStream(Parser_java_file);) {
-			CharStream input = CharStreams.fromStream(is);
+		ClassLoader loader = TimeLexerSpeed.class.getClassLoader();
+		try (InputStream is = loader.getResourceAsStream(Parser_java_file);) {
+			long size = getResourceSize(loader, Parser_java_file);
+			CharStream input = CharStreams.fromStream(is, StandardCharsets.UTF_8, size);
 			JavaLexer lexer = new JavaLexer(input);
 			double avg = tokenize(lexer, n, clearLexerDFACache);
 			String currentMethodName = new Exception().getStackTrace()[0].getMethodName();
@ -403,8 +409,11 @@ public class TimeLexerSpeed { // don't call it Test else it'll run during "mvn t
 	}

 	public void lex_new_grapheme_utf8(String fileName, int n, boolean clearLexerDFACache) throws Exception {
-		try (InputStream is = TimeLexerSpeed.class.getClassLoader().getResourceAsStream(PerfDir+"/"+fileName)) {
-			CharStream input = CharStreams.fromStream(is);
+		String resourceName = PerfDir+"/"+fileName;
+		ClassLoader loader = TimeLexerSpeed.class.getClassLoader();
+		try (InputStream is = loader.getResourceAsStream(resourceName)) {
+			long size = getResourceSize(loader, resourceName);
+			CharStream input = CharStreams.fromStream(is, StandardCharsets.UTF_8, size);
 			graphemesLexer lexer = new graphemesLexer(input);
 			double avg = tokenize(lexer, n, clearLexerDFACache);
 			String currentMethodName = new Exception().getStackTrace()[0].getMethodName();
@ -474,4 +483,18 @@ public class TimeLexerSpeed { // don't call it Test else it'll run during "mvn t
 	public static String dirname(Path path) {
 		return path.getName(0).toString();
 	}
+
+	public static final long getResourceSize(ClassLoader loader, String resourceName) throws IOException {
+		URLConnection uc = null;
+		try {
+			// Sadly, URLConnection is not AutoCloseable, but it leaks resources if
+			// we don't close its stream.
+			uc = loader.getResource(resourceName).openConnection();
+			return uc.getContentLengthLong();
+		} finally {
+			if (uc != null) {
+				uc.getInputStream().close();
+			}
+		}
+	}
 }
--- a/runtime/Java/src/org/antlr/v4/runtime/CharStreams.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/CharStreams.java
@ -10,10 +10,12 @@ import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.nio.ByteBuffer;
-import java.nio.IntBuffer;
+import java.nio.CharBuffer;
 import java.nio.channels.Channels;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
 import java.nio.charset.CodingErrorAction;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
@ -55,17 +57,15 @@ public final class CharStreams {
 	 * For other sources, only supports Unicode code points up to U+FFFF.
 	 */
 	public static CharStream fromPath(Path path, Charset charset) throws IOException {
-		if (charset.equals(StandardCharsets.UTF_8)) {
-			try (ReadableByteChannel channel = Files.newByteChannel(path)) {
-				return fromChannel(
-					channel,
-					DEFAULT_BUFFER_SIZE,
-					CodingErrorAction.REPLACE,
-					path.toString());
-			}
-		}
-		else {
-			return new ANTLRFileStream(path.toString(), charset.toString());
+		long size = Files.size(path);
+		try (ReadableByteChannel channel = Files.newByteChannel(path)) {
+			return fromChannel(
+				channel,
+				charset,
+				DEFAULT_BUFFER_SIZE,
+				CodingErrorAction.REPLACE,
+				path.toString(),
+				size);
 		}
 	}

@ -120,19 +120,18 @@ public final class CharStreams {
 	 * For other sources, only supports Unicode code points up to U+FFFF.
 	 */
 	public static CharStream fromStream(InputStream is, Charset charset) throws IOException {
-		if (charset.equals(StandardCharsets.UTF_8)) {
-			try (ReadableByteChannel channel = Channels.newChannel(is)) {
-				return fromChannel(
-					channel,
-					DEFAULT_BUFFER_SIZE,
-					CodingErrorAction.REPLACE,
-					IntStream.UNKNOWN_SOURCE_NAME);
-			}
-		}
-		else {
-			try (InputStreamReader isr = new InputStreamReader(is, charset)) {
-				return new ANTLRInputStream(isr);
-			}
+		return fromStream(is, charset, -1);
+	}
+
+	public static CharStream fromStream(InputStream is, Charset charset, long inputSize) throws IOException {
+		try (ReadableByteChannel channel = Channels.newChannel(is)) {
+			return fromChannel(
+				channel,
+				charset,
+				DEFAULT_BUFFER_SIZE,
+				CodingErrorAction.REPLACE,
+				IntStream.UNKNOWN_SOURCE_NAME,
+				inputSize);
 		}
 	}

@ -160,18 +159,11 @@ public final class CharStreams {
 	 * For other sources, only supports Unicode code points up to U+FFFF.
 	 */
 	public static CharStream fromChannel(ReadableByteChannel channel, Charset charset) throws IOException {
-		if (charset.equals(StandardCharsets.UTF_8)) {
-			return fromChannel(
-				channel,
-				DEFAULT_BUFFER_SIZE,
-				CodingErrorAction.REPLACE,
-				IntStream.UNKNOWN_SOURCE_NAME);
-		}
-		else {
-			try (InputStreamReader isr = new InputStreamReader(Channels.newInputStream(channel), charset)) {
-				return new ANTLRInputStream(isr);
-			}
-		}
+		return fromChannel(
+			channel,
+			DEFAULT_BUFFER_SIZE,
+			CodingErrorAction.REPLACE,
+			IntStream.UNKNOWN_SOURCE_NAME);
 	}

 	/**
@ -187,50 +179,15 @@ public final class CharStreams {
 	 * source name. Closes the reader before returning.
 	 */
 	public static CodePointCharStream fromReader(Reader r, String sourceName) throws IOException {
-		IntBuffer codePointBuffer = IntBuffer.allocate(DEFAULT_BUFFER_SIZE);
-		int highSurrogate = -1;
-		int curCodeUnit;
 		try {
-			while ((curCodeUnit = r.read()) != -1) {
-				if (!codePointBuffer.hasRemaining()) {
-					// Grow the code point buffer size by 2.
-					IntBuffer newBuffer = IntBuffer.allocate(codePointBuffer.capacity() * 2);
-					codePointBuffer.flip();
-					newBuffer.put(codePointBuffer);
-					codePointBuffer = newBuffer;
-				}
-				if (Character.isHighSurrogate((char) curCodeUnit)) {
-					if (highSurrogate != -1) {
-						// Dangling high surrogate followed by another high surrogate.
-						codePointBuffer.put(highSurrogate);
-					}
-					highSurrogate = curCodeUnit;
-				}
-				else if (Character.isLowSurrogate((char) curCodeUnit)) {
-					if (highSurrogate == -1) {
-						// Low surrogate not preceded by high surrogate.
-						codePointBuffer.put(curCodeUnit);
-					}
-					else {
-						codePointBuffer.put(Character.toCodePoint((char) highSurrogate, (char) curCodeUnit));
-						highSurrogate = -1;
-					}
-				}
-				else {
-					if (highSurrogate != -1) {
-						// Dangling high surrogate followed by a non-surrogate.
-						codePointBuffer.put(highSurrogate);
-						highSurrogate = -1;
-					}
-					codePointBuffer.put(curCodeUnit);
-				}
+			CodePointBuffer.Builder codePointBufferBuilder = CodePointBuffer.builder(DEFAULT_BUFFER_SIZE);
+			CharBuffer charBuffer = CharBuffer.allocate(DEFAULT_BUFFER_SIZE);
+			while ((r.read(charBuffer)) != -1) {
+				charBuffer.flip();
+				codePointBufferBuilder.append(charBuffer);
+				charBuffer.compact();
 			}
-			if (highSurrogate != -1) {
-				// Dangling high surrogate at end of file.
-				codePointBuffer.put(highSurrogate);
-			}
-			codePointBuffer.flip();
-			return new CodePointCharStream(codePointBuffer, sourceName);
+			return CodePointCharStream.fromBuffer(codePointBufferBuilder.build(), sourceName);
 		}
 		finally {
 			r.close();
@ -251,22 +208,14 @@ public final class CharStreams {
 	public static CodePointCharStream fromString(String s, String sourceName) {
 		// Initial guess assumes no code points > U+FFFF: one code
 		// point for each code unit in the string
-		IntBuffer codePointBuffer = IntBuffer.allocate(s.length());
-		int stringIdx = 0;
-		while (stringIdx < s.length()) {
-			if (!codePointBuffer.hasRemaining()) {
-				// Grow the code point buffer size by 2.
-				IntBuffer newBuffer = IntBuffer.allocate(codePointBuffer.capacity() * 2);
-				codePointBuffer.flip();
-				newBuffer.put(codePointBuffer);
-				codePointBuffer = newBuffer;
-			}
-			int codePoint = Character.codePointAt(s, stringIdx);
-			codePointBuffer.put(codePoint);
-			stringIdx += Character.charCount(codePoint);
-		}
-		codePointBuffer.flip();
-		return new CodePointCharStream(codePointBuffer, sourceName);
+		CodePointBuffer.Builder codePointBufferBuilder = CodePointBuffer.builder(s.length());
+		// TODO: CharBuffer.wrap(String) rightfully returns a read-only buffer
+		// which doesn't expose its array, so we make a copy.
+		CharBuffer cb = CharBuffer.allocate(s.length());
+		cb.put(s);
+		cb.flip();
+		codePointBufferBuilder.append(cb);
+		return CodePointCharStream.fromBuffer(codePointBufferBuilder.build(), sourceName);
 	}

 	/**
@ -282,25 +231,62 @@ public final class CharStreams {
 		CodingErrorAction decodingErrorAction,
 		String sourceName)
 		throws IOException
+	{
+		return fromChannel(channel, StandardCharsets.UTF_8, bufferSize, decodingErrorAction, sourceName, -1);
+	}
+
+	public static CodePointCharStream fromChannel(
+		ReadableByteChannel channel,
+		Charset charset,
+		int bufferSize,
+		CodingErrorAction decodingErrorAction,
+		String sourceName,
+		long inputSize)
+		throws IOException
 	{
 		try {
-			ByteBuffer utf8BytesIn = ByteBuffer.allocateDirect(bufferSize);
-			IntBuffer codePointsOut = IntBuffer.allocate(bufferSize);
+			ByteBuffer utf8BytesIn = ByteBuffer.allocate(bufferSize);
+			CharBuffer utf16CodeUnitsOut = CharBuffer.allocate(bufferSize);
+			if (inputSize == -1) {
+				inputSize = bufferSize;
+			} else if (inputSize > Integer.MAX_VALUE) {
+				// ByteBuffer et al don't support long sizes
+				throw new IOException(String.format("inputSize %d larger than max %d", inputSize, Integer.MAX_VALUE));
+			}
+			CodePointBuffer.Builder codePointBufferBuilder = CodePointBuffer.builder((int) inputSize);
+			CharsetDecoder decoder = charset
+					.newDecoder()
+					.onMalformedInput(decodingErrorAction)
+					.onUnmappableCharacter(decodingErrorAction);
+
 			boolean endOfInput = false;
-			UTF8CodePointDecoder decoder = new UTF8CodePointDecoder(decodingErrorAction);
 			while (!endOfInput) {
 				int bytesRead = channel.read(utf8BytesIn);
 				endOfInput = (bytesRead == -1);
 				utf8BytesIn.flip();
-				codePointsOut = decoder.decodeCodePointsFromBuffer(
+				CoderResult result = decoder.decode(
 					utf8BytesIn,
-					codePointsOut,
+					utf16CodeUnitsOut,
 					endOfInput);
+				if (result.isError() && decodingErrorAction.equals(CodingErrorAction.REPORT)) {
+					result.throwException();
+				}
+				utf16CodeUnitsOut.flip();
+				codePointBufferBuilder.append(utf16CodeUnitsOut);
 				utf8BytesIn.compact();
+				utf16CodeUnitsOut.compact();
 			}
-			codePointsOut.limit(codePointsOut.position());
-			codePointsOut.flip();
-			return new CodePointCharStream(codePointsOut, sourceName);
+			// Handle any bytes at the end of the file which need to
+			// be represented as errors or substitution characters.
+			CoderResult flushResult = decoder.flush(utf16CodeUnitsOut);
+			if (flushResult.isError() && decodingErrorAction.equals(CodingErrorAction.REPORT)) {
+				flushResult.throwException();
+			}
+			utf16CodeUnitsOut.flip();
+			codePointBufferBuilder.append(utf16CodeUnitsOut);
+
+			CodePointBuffer codePointBuffer = codePointBufferBuilder.build();
+			return CodePointCharStream.fromBuffer(codePointBuffer, sourceName);
 		}
 		finally {
 			channel.close();
--- a/runtime/Java/src/org/antlr/v4/runtime/CodePointBuffer.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/CodePointBuffer.java
@ -0,0 +1,388 @@
+/*
+ * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+package org.antlr.v4.runtime;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+
+/**
+ * Wrapper for {@link ByteBuffer} / {@link CharBuffer} / {@link IntBuffer}.
+ *
+ * Because Java lacks generics on primitive types, these three types
+ * do not share an interface, so we have to write one manually.
+ */
+public class CodePointBuffer {
+	public enum Type {
+			BYTE,
+			CHAR,
+			INT
+	}
+	private final Type type;
+	private final ByteBuffer byteBuffer;
+	private final CharBuffer charBuffer;
+	private final IntBuffer intBuffer;
+
+	private CodePointBuffer(Type type, ByteBuffer byteBuffer, CharBuffer charBuffer, IntBuffer intBuffer) {
+		this.type = type;
+		this.byteBuffer = byteBuffer;
+		this.charBuffer = charBuffer;
+		this.intBuffer = intBuffer;
+	}
+
+	public static CodePointBuffer withBytes(ByteBuffer byteBuffer) {
+		return new CodePointBuffer(Type.BYTE, byteBuffer, null, null);
+	}
+
+	public static CodePointBuffer withChars(CharBuffer charBuffer) {
+		return new CodePointBuffer(Type.CHAR, null, charBuffer, null);
+	}
+
+	public static CodePointBuffer withInts(IntBuffer intBuffer) {
+		return new CodePointBuffer(Type.INT, null, null, intBuffer);
+	}
+
+	public int position() {
+		switch (type) {
+			case BYTE:
+				return byteBuffer.position();
+			case CHAR:
+				return charBuffer.position();
+			case INT:
+				return intBuffer.position();
+		}
+		throw new UnsupportedOperationException("Not reached");
+	}
+
+	public void position(int newPosition) {
+		switch (type) {
+			case BYTE:
+				byteBuffer.position(newPosition);
+				break;
+			case CHAR:
+				charBuffer.position(newPosition);
+				break;
+			case INT:
+				intBuffer.position(newPosition);
+				break;
+		}
+	}
+
+	public int remaining() {
+		switch (type) {
+			case BYTE:
+				return byteBuffer.remaining();
+			case CHAR:
+				return charBuffer.remaining();
+			case INT:
+				return intBuffer.remaining();
+		}
+		throw new UnsupportedOperationException("Not reached");
+	}
+
+	public int get(int offset) {
+		switch (type) {
+			case BYTE:
+				return byteBuffer.get(offset);
+			case CHAR:
+				return charBuffer.get(offset);
+			case INT:
+				return intBuffer.get(offset);
+		}
+		throw new UnsupportedOperationException("Not reached");
+	}
+
+	Type getType() {
+		return type;
+	}
+
+	int arrayOffset() {
+		switch (type) {
+			case BYTE:
+				return byteBuffer.arrayOffset();
+			case CHAR:
+				return charBuffer.arrayOffset();
+			case INT:
+				return intBuffer.arrayOffset();
+		}
+		throw new UnsupportedOperationException("Not reached");
+	}
+
+	byte[] byteArray() {
+		assert type == Type.BYTE;
+		return byteBuffer.array();
+	}
+
+	char[] charArray() {
+		assert type == Type.CHAR;
+		return charBuffer.array();
+	}
+
+	int[] intArray() {
+		assert type == Type.INT;
+		return intBuffer.array();
+	}
+
+	public static Builder builder(int initialBufferSize) {
+		return new Builder(initialBufferSize);
+	}
+
+	public static class Builder {
+		private Type type;
+		private ByteBuffer byteBuffer;
+		private CharBuffer charBuffer;
+		private IntBuffer intBuffer;
+		private int prevHighSurrogate;
+
+		private Builder(int initialBufferSize) {
+			type = Type.BYTE;
+			byteBuffer = ByteBuffer.allocate(initialBufferSize);
+			charBuffer = null;
+			intBuffer = null;
+			prevHighSurrogate = -1;
+		}
+
+		Type getType() {
+			return type;
+		}
+
+		ByteBuffer getByteBuffer() {
+			return byteBuffer;
+		}
+
+		CharBuffer getCharBuffer() {
+			return charBuffer;
+		}
+
+		IntBuffer getIntBuffer() {
+			return intBuffer;
+		}
+
+		public CodePointBuffer build() {
+			switch (type) {
+				case BYTE:
+					byteBuffer.flip();
+					break;
+				case CHAR:
+					charBuffer.flip();
+					break;
+				case INT:
+					intBuffer.flip();
+					break;
+			}
+			return new CodePointBuffer(type, byteBuffer, charBuffer, intBuffer);
+		}
+
+		private static int roundUpToNextPowerOfTwo(int i) {
+			int nextPowerOfTwo = 32 - Integer.numberOfLeadingZeros(i - 1);
+			return (int) Math.pow(2, nextPowerOfTwo);
+		}
+
+		public void ensureRemaining(int remainingNeeded) {
+			switch (type) {
+				case BYTE:
+					if (byteBuffer.remaining() < remainingNeeded) {
+						int newCapacity = roundUpToNextPowerOfTwo(byteBuffer.capacity() + remainingNeeded);
+						ByteBuffer newBuffer = ByteBuffer.allocate(newCapacity);
+						byteBuffer.flip();
+						newBuffer.put(byteBuffer);
+						byteBuffer = newBuffer;
+					}
+					break;
+				case CHAR:
+					if (charBuffer.remaining() < remainingNeeded) {
+						int newCapacity = roundUpToNextPowerOfTwo(charBuffer.capacity() + remainingNeeded);
+						CharBuffer newBuffer = CharBuffer.allocate(newCapacity);
+						charBuffer.flip();
+						newBuffer.put(charBuffer);
+						charBuffer = newBuffer;
+					}
+					break;
+				case INT:
+					if (intBuffer.remaining() < remainingNeeded) {
+						int newCapacity = roundUpToNextPowerOfTwo(intBuffer.capacity() + remainingNeeded);
+						IntBuffer newBuffer = IntBuffer.allocate(newCapacity);
+						intBuffer.flip();
+						newBuffer.put(intBuffer);
+						intBuffer = newBuffer;
+					}
+					break;
+			}
+		}
+
+		public void append(CharBuffer utf16In) {
+			ensureRemaining(utf16In.remaining());
+			if (utf16In.hasArray()) {
+				appendArray(utf16In);
+			} else {
+				// TODO
+				throw new UnsupportedOperationException("TODO");
+			}
+		}
+
+		private void appendArray(CharBuffer utf16In) {
+			assert utf16In.hasArray();
+
+			switch (type) {
+				case BYTE:
+					appendArrayByte(utf16In);
+					break;
+				case CHAR:
+					appendArrayChar(utf16In);
+					break;
+				case INT:
+					appendArrayInt(utf16In);
+					break;
+			}
+		}
+
+		private void appendArrayByte(CharBuffer utf16In) {
+			assert prevHighSurrogate == -1;
+
+			char[] in = utf16In.array();
+			int inOffset = utf16In.arrayOffset() + utf16In.position();
+			int inLimit = utf16In.arrayOffset() + utf16In.limit();
+
+			byte[] outByte = byteBuffer.array();
+			int outOffset = byteBuffer.arrayOffset() + byteBuffer.position();
+
+			while (inOffset < inLimit) {
+				char c = in[inOffset];
+				if (c <= 0xFF) {
+					outByte[outOffset] = (byte)(c & 0xFF);
+				} else {
+					utf16In.position(inOffset - utf16In.arrayOffset());
+					byteBuffer.position(outOffset - byteBuffer.arrayOffset());
+					if (!Character.isHighSurrogate(c)) {
+						byteToCharBuffer(utf16In.remaining());
+						appendArrayChar(utf16In);
+						return;
+					} else {
+						byteToIntBuffer(utf16In.remaining());
+						appendArrayInt(utf16In);
+						return;
+					}
+				}
+				inOffset++;
+				outOffset++;
+			}
+
+			utf16In.position(inOffset - utf16In.arrayOffset());
+			byteBuffer.position(outOffset - byteBuffer.arrayOffset());
+		}
+
+		private void appendArrayChar(CharBuffer utf16In) {
+			assert prevHighSurrogate == -1;
+
+			char[] in = utf16In.array();
+			int inOffset = utf16In.arrayOffset() + utf16In.position();
+			int inLimit = utf16In.arrayOffset() + utf16In.limit();
+
+			char[] outChar = charBuffer.array();
+			int outOffset = charBuffer.arrayOffset() + charBuffer.position();
+
+			while (inOffset < inLimit) {
+				char c = in[inOffset];
+				if (!Character.isHighSurrogate(c)) {
+					outChar[outOffset] = c;
+				} else {
+					utf16In.position(inOffset - utf16In.arrayOffset());
+					charBuffer.position(outOffset - charBuffer.arrayOffset());
+					charToIntBuffer(utf16In.remaining());
+					appendArrayInt(utf16In);
+					return;
+				}
+				inOffset++;
+				outOffset++;
+			}
+
+			utf16In.position(inOffset - utf16In.arrayOffset());
+			charBuffer.position(outOffset - charBuffer.arrayOffset());
+		}
+
+		private void appendArrayInt(CharBuffer utf16In) {
+			char[] in = utf16In.array();
+			int inOffset = utf16In.arrayOffset() + utf16In.position();
+			int inLimit = utf16In.arrayOffset() + utf16In.limit();
+
+			int[] outInt = intBuffer.array();
+			int outOffset = intBuffer.arrayOffset() + intBuffer.position();
+
+			while (inOffset < inLimit) {
+				char c = in[inOffset];
+				inOffset++;
+				if (prevHighSurrogate != -1) {
+					if (Character.isLowSurrogate(c)) {
+						outInt[outOffset] = Character.toCodePoint((char) prevHighSurrogate, c);
+						outOffset++;
+						prevHighSurrogate = -1;
+					} else {
+						// Dangling high surrogate
+						outInt[outOffset] = prevHighSurrogate;
+						outOffset++;
+						if (Character.isHighSurrogate(c)) {
+							prevHighSurrogate = c & 0xFFFF;
+						} else {
+							outInt[outOffset] = c & 0xFFFF;
+							outOffset++;
+							prevHighSurrogate = -1;
+						}
+					}
+				} else if (Character.isHighSurrogate(c)) {
+					prevHighSurrogate = c & 0xFFFF;
+				} else {
+					outInt[outOffset] = c & 0xFFFF;
+					outOffset++;
+				}
+			}
+
+			if (prevHighSurrogate != -1) {
+				// Dangling high surrogate
+				outInt[outOffset] = prevHighSurrogate & 0xFFFF;
+				outOffset++;
+			}
+
+			utf16In.position(inOffset - utf16In.arrayOffset());
+			intBuffer.position(outOffset - intBuffer.arrayOffset());
+		}
+
+		private void byteToCharBuffer(int toAppend) {
+			byteBuffer.flip();
+			// CharBuffers hold twice as much per unit as ByteBuffers, so start with half the capacity.
+			CharBuffer newBuffer = CharBuffer.allocate(Math.max(byteBuffer.remaining() + toAppend, byteBuffer.capacity() / 2));
+			while (byteBuffer.hasRemaining()) {
+				newBuffer.put((char) (byteBuffer.get() & 0xFF));
+			}
+			type = Type.CHAR;
+			byteBuffer = null;
+			charBuffer = newBuffer;
+		}
+
+		private void byteToIntBuffer(int toAppend) {
+			byteBuffer.flip();
+			// IntBuffers hold four times as much per unit as ByteBuffers, so start with one quarter the capacity.
+			IntBuffer newBuffer = IntBuffer.allocate(Math.max(byteBuffer.remaining() + toAppend, byteBuffer.capacity() / 4));
+			while (byteBuffer.hasRemaining()) {
+				newBuffer.put(byteBuffer.get() & 0xFF);
+			}
+			type = Type.INT;
+			byteBuffer = null;
+			intBuffer = newBuffer;
+		}
+
+		private void charToIntBuffer(int toAppend) {
+			charBuffer.flip();
+			// IntBuffers hold two times as much per unit as ByteBuffers, so start with one half the capacity.
+			IntBuffer newBuffer = IntBuffer.allocate(Math.max(charBuffer.remaining() + toAppend, charBuffer.capacity() / 2));
+			while (charBuffer.hasRemaining()) {
+				newBuffer.put(charBuffer.get() & 0xFFFF);
+			}
+			type = Type.INT;
+			charBuffer = null;
+			intBuffer = newBuffer;
+		}
+	}
+}
--- a/runtime/Java/src/org/antlr/v4/runtime/CodePointCharStream.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/CodePointCharStream.java
@ -7,7 +7,7 @@ package org.antlr.v4.runtime;

 import org.antlr.v4.runtime.misc.Interval;

-import java.nio.IntBuffer;
+import java.nio.charset.StandardCharsets;

 /**
 * Alternative to {@link ANTLRInputStream} which treats the input
@ -17,115 +17,113 @@ import java.nio.IntBuffer;
 * Use this if you need to parse input which potentially contains
 * Unicode values > U+FFFF.
 */
-public final class CodePointCharStream implements CharStream {
-	private final IntBuffer codePointBuffer;
-	private final int initialPosition;
-	private final int size;
-	private final String name;
+public abstract class CodePointCharStream implements CharStream {
+	protected final int size;
+	protected final String name;
+
+	// To avoid lots of virtual method calls, we directly access
+	// the state of the underlying code points in the
+	// CodePointBuffer.
+	protected int position;
+
+	// Use the factory method {@link #fromBuffer(CodePointBuffer)} to
+	// construct instances of this type.
+	private CodePointCharStream(int position, int remaining, String name) {
+		// TODO
+		assert position == 0;
+		this.size = remaining;
+		this.name = name;
+		this.position = 0;
+	}
+
+	// Visible for testing.
+	abstract Object getInternalStorage();

 	/**
 	 * Constructs a {@link CodePointCharStream} which provides access
 	 * to the Unicode code points stored in {@code codePointBuffer}.
-	 *
-	 * {@code codePointBuffer}'s {@link IntBuffer#position position}
-	 * reflects the first code point of the stream, and its
-	 * {@link IntBuffer#limit limit} is just after the last code point
-	 * of the stream.
 	 */
-	public CodePointCharStream(IntBuffer codePointBuffer) {
-		this(codePointBuffer, UNKNOWN_SOURCE_NAME);
+	public static CodePointCharStream fromBuffer(CodePointBuffer codePointBuffer) {
+		return fromBuffer(codePointBuffer, UNKNOWN_SOURCE_NAME);
 	}

 	/**
 	 * Constructs a named {@link CodePointCharStream} which provides access
 	 * to the Unicode code points stored in {@code codePointBuffer}.
-	 *
-	 * {@code codePointBuffer}'s {@link IntBuffer#position position}
-	 * reflects the first code point of the stream, and its
-	 * {@link IntBuffer#limit limit} is just after the last code point
-	 * of the stream.
 	 */
-	public CodePointCharStream(IntBuffer codePointBuffer, String name) {
-		this.codePointBuffer = codePointBuffer;
-		this.initialPosition = codePointBuffer.position();
-		this.size = codePointBuffer.remaining();
-		this.name = name;
-	}
-
-	private int relativeBufferPosition(int i) {
-		return initialPosition + codePointBuffer.position() + i;
+	public static CodePointCharStream fromBuffer(CodePointBuffer codePointBuffer, String name) {
+		// Java lacks generics on primitive types.
+		//
+		// To avoid lots of calls to virtual methods in the
+		// very hot codepath of LA() below, we construct one
+		// of three concrete subclasses.
+		//
+		// The concrete subclasses directly access the code
+		// points stored in the underlying array (byte[],
+		// char[], or int[]), so we can avoid lots of virtual
+		// method calls to ByteBuffer.get(offset).
+		switch (codePointBuffer.getType()) {
+			case BYTE:
+				return new CodePoint8BitCharStream(
+						codePointBuffer.position(),
+						codePointBuffer.remaining(),
+						name,
+						codePointBuffer.byteArray(),
+						codePointBuffer.arrayOffset());
+			case CHAR:
+				return new CodePoint16BitCharStream(
+						codePointBuffer.position(),
+						codePointBuffer.remaining(),
+						name,
+						codePointBuffer.charArray(),
+						codePointBuffer.arrayOffset());
+			case INT:
+				return new CodePoint32BitCharStream(
+						codePointBuffer.position(),
+						codePointBuffer.remaining(),
+						name,
+						codePointBuffer.intArray(),
+						codePointBuffer.arrayOffset());
+		}
+		throw new UnsupportedOperationException("Not reached");
 	}

 	@Override
-	public void consume() {
-		if (!codePointBuffer.hasRemaining()) {
+	public final void consume() {
+		if (size - position == 0) {
 			assert LA(1) == IntStream.EOF;
 			throw new IllegalStateException("cannot consume EOF");
 		}
-		codePointBuffer.position(codePointBuffer.position() + 1);
+		position = position + 1;
 	}

 	@Override
-	public int LA(int i) {
-		if (i == 0) {
-			// Undefined
-			return 0;
-		}
-		else if (i < 0) {
-			if (codePointBuffer.position() + i < initialPosition) {
-				return IntStream.EOF;
-			}
-			return codePointBuffer.get(relativeBufferPosition(i));
-		}
-		else if (i > codePointBuffer.remaining()) {
-			return IntStream.EOF;
-		}
-		else {
-			return codePointBuffer.get(relativeBufferPosition(i - 1));
-		}
+	public final int index() {
+		return position;
 	}

 	@Override
-	public int index() {
-		return codePointBuffer.position() - initialPosition;
-	}
-
-	@Override
-	public int size() {
+	public final int size() {
 		return size;
 	}

 	/** mark/release do nothing; we have entire buffer */
 	@Override
-	public int mark() {
+	public final int mark() {
 		return -1;
 	}

 	@Override
-	public void release(int marker) {
+	public final void release(int marker) {
 	}

 	@Override
-	public void seek(int index) {
-		codePointBuffer.position(initialPosition + index);
-	}
-
-	/** Return the UTF-16 encoded string for the given interval */
-	@Override
-	public String getText(Interval interval) {
-		final int startIdx = initialPosition + Math.min(interval.a, size - 1);
-		final int stopIdx = initialPosition + Math.min(interval.b, size - 1);
-		// interval.length() will be too small if we contain any code points > U+FFFF,
-		// but it's just a hint for initial capacity; StringBuilder will grow anyway.
-		StringBuilder sb = new StringBuilder(interval.length());
-		for (int codePointIdx = startIdx; codePointIdx <= stopIdx; codePointIdx++) {
-			sb.appendCodePoint(codePointBuffer.get(codePointIdx));
-		}
-		return sb.toString();
+	public final void seek(int index) {
+		position = index;
 	}

 	@Override
-	public String getSourceName() {
+	public final String getSourceName() {
 		if (name == null || name.isEmpty()) {
 			return UNKNOWN_SOURCE_NAME;
 		}
@ -134,7 +132,165 @@ public final class CodePointCharStream implements CharStream {
 	}

 	@Override
-	public String toString() {
+	public final String toString() {
 		return getText(Interval.of(0, size - 1));
 	}
+
+	// 8-bit storage for code points <= U+00FF.
+	private static final class CodePoint8BitCharStream extends CodePointCharStream {
+		private final byte[] byteArray;
+
+		private CodePoint8BitCharStream(int position, int remaining, String name, byte[] byteArray, int arrayOffset) {
+			super(position, remaining, name);
+			// TODO
+			assert arrayOffset == 0;
+			this.byteArray = byteArray;
+		}
+
+		/** Return the UTF-16 encoded string for the given interval */
+		@Override
+		public String getText(Interval interval) {
+			int startIdx = Math.min(interval.a, size - 1);
+			int len = Math.min(interval.b - interval.a + 1, size);
+
+			// We know the maximum code point in byteArray is U+00FF,
+			// so we can treat this as if it were ISO-8859-1, aka Latin-1,
+			// which shares the same code points up to 0xFF.
+			return new String(byteArray, startIdx, len, StandardCharsets.ISO_8859_1);
+		}
+
+		@Override
+		public int LA(int i) {
+			int offset;
+			switch (Integer.signum(i)) {
+				case -1:
+					offset = position + i;
+					if (offset < 0) {
+						return IntStream.EOF;
+					}
+					return byteArray[offset] & 0xFF;
+				case 0:
+					// Undefined
+					return 0;
+				case 1:
+					offset = position + i - 1;
+					if (offset >= size) {
+						return IntStream.EOF;
+					}
+					return byteArray[offset] & 0xFF;
+			}
+			throw new UnsupportedOperationException("Not reached");
+		}
+
+		@Override
+		Object getInternalStorage() {
+			return byteArray;
+		}
+	}
+
+	// 16-bit internal storage for code points between U+0100 and U+FFFF.
+	private static final class CodePoint16BitCharStream extends CodePointCharStream {
+		private final char[] charArray;
+
+		private CodePoint16BitCharStream(int position, int remaining, String name, char[] charArray, int arrayOffset) {
+			super(position, remaining, name);
+			this.charArray = charArray;
+			// TODO
+			assert arrayOffset == 0;
+		}
+
+		/** Return the UTF-16 encoded string for the given interval */
+		@Override
+		public String getText(Interval interval) {
+			int startIdx = Math.min(interval.a, size - 1);
+			int len = Math.min(interval.b - interval.a + 1, size);
+
+			// We know there are no surrogates in this
+			// array, since otherwise we would be given a
+			// 32-bit int[] array.
+			//
+			// So, it's safe to treat this as if it were
+			// UTF-16.
+			return new String(charArray, startIdx, len);
+		}
+
+		@Override
+		public int LA(int i) {
+			int offset;
+			switch (Integer.signum(i)) {
+				case -1:
+					offset = position + i;
+					if (offset < 0) {
+						return IntStream.EOF;
+					}
+					return charArray[offset] & 0xFFFF;
+				case 0:
+					// Undefined
+					return 0;
+				case 1:
+					offset = position + i - 1;
+					if (offset >= size) {
+						return IntStream.EOF;
+					}
+					return charArray[offset] & 0xFFFF;
+			}
+			throw new UnsupportedOperationException("Not reached");
+		}
+
+		@Override
+		Object getInternalStorage() {
+			return charArray;
+		}
+	}
+
+	// 32-bit internal storage for code points between U+10000 and U+10FFFF.
+	private static final class CodePoint32BitCharStream extends CodePointCharStream {
+		private final int[] intArray;
+
+		private CodePoint32BitCharStream(int position, int remaining, String name, int[] intArray, int arrayOffset) {
+			super(position, remaining, name);
+			this.intArray = intArray;
+			// TODO
+			assert arrayOffset == 0;
+		}
+
+		/** Return the UTF-16 encoded string for the given interval */
+		@Override
+		public String getText(Interval interval) {
+			int startIdx = Math.min(interval.a, size - 1);
+			int len = Math.min(interval.b - interval.a + 1, size);
+
+			// Note that we pass the int[] code points to the String constructor --
+			// this is supported, and the constructor will convert to UTF-16 internally.
+			return new String(intArray, startIdx, len);
+		}
+
+		@Override
+		public int LA(int i) {
+			int offset;
+			switch (Integer.signum(i)) {
+				case -1:
+					offset = position + i;
+					if (offset < 0) {
+						return IntStream.EOF;
+					}
+					return intArray[offset];
+				case 0:
+					// Undefined
+					return 0;
+				case 1:
+					offset = position + i - 1;
+					if (offset >= size) {
+						return IntStream.EOF;
+					}
+					return intArray[offset];
+			}
+			throw new UnsupportedOperationException("Not reached");
+		}
+
+		@Override
+		Object getInternalStorage() {
+			return intArray;
+		}
+	}
 }
--- a/runtime/Java/src/org/antlr/v4/runtime/UTF8CodePointDecoder.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/UTF8CodePointDecoder.java
@ -1,281 +0,0 @@
-/*
- * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
- * Use of this file is governed by the BSD 3-clause license that
- * can be found in the LICENSE.txt file in the project root.
- */
-package org.antlr.v4.runtime;
-
-import org.antlr.v4.runtime.misc.Interval;
-
-import java.nio.ByteBuffer;
-import java.nio.IntBuffer;
-import java.nio.charset.CharacterCodingException;
-import java.nio.charset.CodingErrorAction;
-
-/**
- * Decodes UTF-8 bytes directly to Unicode code points, stored in an
- * {@link IntBuffer}.
- *
- * Unlike {@link CharsetDecoder}, this does not use UTF-16 as an
- * intermediate representation, so this optimizes the common case of
- * decoding a UTF-8 file for parsing as Unicode code points.
- */
-public class UTF8CodePointDecoder {
-	private static final int SUBSTITUTION_CHARACTER = 0xFFFD;
-	private static final byte NVAL = (byte) 0xFF;
-
-	// Table mapping UTF-8 leading byte to the length of the trailing
-	// sequence.
-	protected static final byte[] UTF8_LEADING_BYTE_LENGTHS = new byte[] {
-		// [0x00, 0x7F] -> 0 trailing bytes
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-
-		// [0x80, 0xBF] -> invalid leading byte
-		NVAL, NVAL, NVAL, NVAL, NVAL, NVAL, NVAL, NVAL,
-		NVAL, NVAL, NVAL, NVAL, NVAL, NVAL, NVAL, NVAL,
-		NVAL, NVAL, NVAL, NVAL, NVAL, NVAL, NVAL, NVAL,
-		NVAL, NVAL, NVAL, NVAL, NVAL, NVAL, NVAL, NVAL,
-		NVAL, NVAL, NVAL, NVAL, NVAL, NVAL, NVAL, NVAL,
-		NVAL, NVAL, NVAL, NVAL, NVAL, NVAL, NVAL, NVAL,
-		NVAL, NVAL, NVAL, NVAL, NVAL, NVAL, NVAL, NVAL,
-		NVAL, NVAL, NVAL, NVAL, NVAL, NVAL, NVAL, NVAL,
-
-		// [0xC0, 0xDF] -> one trailing byte
-		0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
-		0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
-		0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
-		0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
-
-		// [0xE0, 0xEF] -> two trailing bytes
-		0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
-		0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
-
-		// [0xF0, 0xF7] -> three trailing bytes
-		0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
-
-		// [0xF8, 0xFF] -> invalid leading sequence
-		NVAL, NVAL, NVAL, NVAL, NVAL, NVAL, NVAL, NVAL
-	};
-
-	// Table mapping UTF-8 sequence length to valid Unicode code point
-	// ranges for that sequence length.
-	protected static final Interval[] UTF8_VALID_INTERVALS = new Interval[] {
-		Interval.of(0x00, 0x7F),
-		Interval.of(0x80, 0x7FF),
-		Interval.of(0x800, 0xFFFF),
-		Interval.of(0x10000, 0x10FFFF)
-	};
-
-	protected final CodingErrorAction decodingErrorAction;
-	protected int decodingTrailBytesNeeded;
-	protected int decodingCurrentCodePoint;
-	protected Interval validDecodedCodePointRange;
-
-	/**
-	 * Constructs a new {@link UTF8CodePointDecoder} with a specified
-	 * {@link CodingErrorAction} to handle invalid UTF-8 sequences.
-	 */
-	public UTF8CodePointDecoder(CodingErrorAction decodingErrorAction) {
-		this.decodingErrorAction = decodingErrorAction;
-		reset();
-	}
-
-	/**
-	 * Resets the state in this {@link UTF8CodePointDecoder}, preparing it
-	 * for use with a new input buffer.
-	 */
-	public void reset() {
-		this.decodingTrailBytesNeeded = -1;
-		this.decodingCurrentCodePoint = -1;
-		this.validDecodedCodePointRange = Interval.INVALID;
-	}
-
-	/**
-	 * Decodes as many UTF-8 bytes as possible from {@code utf8BytesIn},
-	 * writing the result to {@code codePointsOut}.
-	 *
-	 * If you have more bytes to decode, set {@code endOfInput} to
-	 * {@code false} and call this method again once more bytes
-	 * are available.
-	 *
-	 * If there are no more bytes available, make sure to call this
-	 * setting {@code endOfInput} to {@code true} so that any invalid
-	 * UTF-8 sequence at the end of the input is handled.
-	 *
-	 * If {@code codePointsOut} is not large enough to store the result,
-	 * a new buffer is allocated and returned. Otherwise, returns
-	 * {@code codePointsOut}.
-	 *
-	 * After returning, the {@link ByteBuffer#position position} of
-	 * {@code utf8BytesIn} is moved forward to reflect the bytes consumed,
-	 * and the {@link IntBuffer#position position} of the result
-	 * is moved forward to reflect the code points written.
-	 *
-	 * The {@link IntBuffer#limit limit} of the result is not changed,
-	 * so if this is the end of the input, you will want to set the
-	 * limit to the {@link IntBuffer#position position}, then
-	 * {@link IntBuffer#flip flip} the result to prepare for reading.
-	 */
-	public IntBuffer decodeCodePointsFromBuffer(
-			ByteBuffer utf8BytesIn,
-			IntBuffer codePointsOut,
-			boolean endOfInput)
-		throws CharacterCodingException
-	{
-		while (utf8BytesIn.hasRemaining()) {
-			if (decodingTrailBytesNeeded == -1) {
-				// Start a new UTF-8 sequence by checking the leading byte.
-				byte leadingByte = utf8BytesIn.get();
-				if (!decodeLeadingByte(leadingByte)) {
-					codePointsOut = handleDecodeError(
-						String.format("Invalid UTF-8 leading byte 0x%02X", leadingByte),
-						codePointsOut);
-					reset();
-					continue;
-				}
-			}
-			assert decodingTrailBytesNeeded != -1;
-			if (utf8BytesIn.remaining() < decodingTrailBytesNeeded) {
-				// The caller will have to call us back with more bytes.
-				break;
-			}
-			// Now we know the input buffer has enough bytes to decode
-			// the entire sequence.
-			while (decodingTrailBytesNeeded > 0) {
-				// Continue a multi-byte UTF-8 sequence by checking the next trailing byte.
-				byte trailingByte = utf8BytesIn.get();
-				decodingTrailBytesNeeded--;
-				if (!decodeTrailingByte(trailingByte)) {
-					codePointsOut = handleDecodeError(
-							String.format("Invalid UTF-8 trailing byte 0x%02X", trailingByte),
-							codePointsOut);
-					// Skip past any remaining trailing bytes in the sequence.
-					utf8BytesIn.position(utf8BytesIn.position() + decodingTrailBytesNeeded);
-					reset();
-					continue;
-				}
-			}
-			if (decodingTrailBytesNeeded == 0) {
-				codePointsOut = appendCodePointFromInterval(
-						decodingCurrentCodePoint,
-						validDecodedCodePointRange,
-						codePointsOut);
-				reset();
-				continue;
-			}
-		}
-		if (endOfInput) {
-			if (decodingTrailBytesNeeded != -1) {
-				codePointsOut = handleDecodeError(
-						"Unterminated UTF-8 sequence at end of bytes",
-						codePointsOut);
-			}
-		}
-		return codePointsOut;
-	}
-
-	private boolean decodeLeadingByte(byte leadingByte) {
-		// Be careful about Java silently widening (unsigned)
-		// byte to (signed) int and sign-extending here.
-		//
-		// We use binary AND liberally below to prevent widening.
-		int leadingByteIdx = leadingByte & 0xFF;
-		decodingTrailBytesNeeded = UTF8_LEADING_BYTE_LENGTHS[leadingByteIdx];
-		switch (decodingTrailBytesNeeded) {
-			case 0:
-				decodingCurrentCodePoint = leadingByte;
-				break;
-			case 1:
-			case 2:
-			case 3:
-				int mask = (0b00111111 >> decodingTrailBytesNeeded);
-				decodingCurrentCodePoint = leadingByte & mask;
-				break;
-			default:
-				return false;
-		}
-		validDecodedCodePointRange = UTF8_VALID_INTERVALS[decodingTrailBytesNeeded];
-		return true;
-	}
-
-	private boolean decodeTrailingByte(byte trailingByte) {
-		int trailingValue = (trailingByte & 0xFF) - 0x80;
-		if (trailingValue < 0x00 || trailingValue > 0x3F) {
-			return false;
-		}
-		else {
-			decodingCurrentCodePoint = (decodingCurrentCodePoint << 6) | trailingValue;
-			return true;
-		}
-	}
-
-	private IntBuffer appendCodePointFromInterval(
-			int codePoint,
-			Interval validCodePointRange,
-			IntBuffer codePointsOut)
-		throws CharacterCodingException
-	{
-		assert validCodePointRange != Interval.INVALID;
-
-		// Security check: UTF-8 must represent code points using their
-		// shortest encoded form.
-		if (codePoint < validCodePointRange.a ||
-			codePoint > validCodePointRange.b) {
-			return handleDecodeError(
-					String.format(
-							"Code point %d is out of expected range %s",
-							codePoint,
-							validCodePointRange),
-					codePointsOut);
-		}
-		else {
-			return appendCodePoint(codePoint, codePointsOut);
-		}
-	}
-
-	private IntBuffer appendCodePoint(int codePoint, IntBuffer codePointsOut) {
-		if (!codePointsOut.hasRemaining()) {
-			// Grow the code point buffer size by 2.
-			IntBuffer newBuffer = IntBuffer.allocate(codePointsOut.capacity() * 2);
-			codePointsOut.flip();
-			newBuffer.put(codePointsOut);
-			codePointsOut = newBuffer;
-		}
-		codePointsOut.put(codePoint);
-		return codePointsOut;
-	}
-
-	private IntBuffer handleDecodeError(
-			final String error,
-			IntBuffer codePointsOut)
-		throws CharacterCodingException
-	{
-		if (decodingErrorAction == CodingErrorAction.REPLACE) {
-			codePointsOut = appendCodePoint(SUBSTITUTION_CHARACTER, codePointsOut);
-		}
-		else if (decodingErrorAction == CodingErrorAction.REPORT) {
-			throw new CharacterCodingException() {
-				@Override
-				public String getMessage() {
-					return error;
-				}
-			};
-		}
-		return codePointsOut;
-	}
-}