Merge pull request #1650 from bhamiltoncx/js-fromcodepoint-polyfill

Fix JavaScript InputStream.getText() when input contains Unicode values > U+FFFF
This commit is contained in:
Terence Parr 2017-02-13 15:08:59 -08:00 committed by GitHub
commit 565f6299c0
4 changed files with 77 additions and 3 deletions

View File

@ -28,6 +28,7 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
===== =====
MIT License for codepointat.js from https://git.io/codepointat MIT License for codepointat.js from https://git.io/codepointat
MIT License for fromcodepoint.js from https://git.io/vDW1m
Copyright Mathias Bynens <https://mathiasbynens.be/> Copyright Mathias Bynens <https://mathiasbynens.be/>

View File

@ -7,13 +7,14 @@
var Token = require('./Token').Token; var Token = require('./Token').Token;
require('./polyfills/codepointat'); require('./polyfills/codepointat');
require('./polyfills/fromcodepoint');
// Vacuum all input from a string and then treat it like a buffer. // Vacuum all input from a string and then treat it like a buffer.
function _loadString(stream, decodeToUnicodeCodePoints) { function _loadString(stream, decodeToUnicodeCodePoints) {
stream._index = 0; stream._index = 0;
stream.data = []; stream.data = [];
if (decodeToUnicodeCodePoints) { if (stream.decodeToUnicodeCodePoints) {
for (var i = 0; i < stream.strdata.length; ) { for (var i = 0; i < stream.strdata.length; ) {
var codePoint = stream.strdata.codePointAt(i); var codePoint = stream.strdata.codePointAt(i);
stream.data.push(codePoint); stream.data.push(codePoint);
@ -36,7 +37,8 @@ function _loadString(stream, decodeToUnicodeCodePoints) {
function InputStream(data, decodeToUnicodeCodePoints) { function InputStream(data, decodeToUnicodeCodePoints) {
this.name = "<empty>"; this.name = "<empty>";
this.strdata = data; this.strdata = data;
_loadString(this, decodeToUnicodeCodePoints || false); this.decodeToUnicodeCodePoints = decodeToUnicodeCodePoints || false;
_loadString(this);
return this; return this;
} }
@ -114,7 +116,15 @@ InputStream.prototype.getText = function(start, stop) {
if (start >= this._size) { if (start >= this._size) {
return ""; return "";
} else { } else {
return this.strdata.slice(start, stop + 1); if (this.decodeToUnicodeCodePoints) {
var result = "";
for (var i = start; i <= stop; i++) {
result += String.fromCodePoint(this.data[i]);
}
return result;
} else {
return this.strdata.slice(start, stop + 1);
}
} }
}; };

View File

@ -5,6 +5,7 @@
exports.atn = require('./atn/index'); exports.atn = require('./atn/index');
exports.codepointat = require('./polyfills/codepointat'); exports.codepointat = require('./polyfills/codepointat');
exports.dfa = require('./dfa/index'); exports.dfa = require('./dfa/index');
exports.fromcodepoint = require('./polyfills/fromcodepoint');
exports.tree = require('./tree/index'); exports.tree = require('./tree/index');
exports.error = require('./error/index'); exports.error = require('./error/index');
exports.Token = require('./Token').Token; exports.Token = require('./Token').Token;

View File

@ -0,0 +1,62 @@
/*! https://mths.be/fromcodepoint v0.2.1 by @mathias */
if (!String.fromCodePoint) {
(function() {
var defineProperty = (function() {
// IE 8 only supports `Object.defineProperty` on DOM elements
try {
var object = {};
var $defineProperty = Object.defineProperty;
var result = $defineProperty(object, object, object) && $defineProperty;
} catch(error) {}
return result;
}());
var stringFromCharCode = String.fromCharCode;
var floor = Math.floor;
var fromCodePoint = function(_) {
var MAX_SIZE = 0x4000;
var codeUnits = [];
var highSurrogate;
var lowSurrogate;
var index = -1;
var length = arguments.length;
if (!length) {
return '';
}
var result = '';
while (++index < length) {
var codePoint = Number(arguments[index]);
if (
!isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
codePoint < 0 || // not a valid Unicode code point
codePoint > 0x10FFFF || // not a valid Unicode code point
floor(codePoint) != codePoint // not an integer
) {
throw RangeError('Invalid code point: ' + codePoint);
}
if (codePoint <= 0xFFFF) { // BMP code point
codeUnits.push(codePoint);
} else { // Astral code point; split in surrogate halves
// https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
codePoint -= 0x10000;
highSurrogate = (codePoint >> 10) + 0xD800;
lowSurrogate = (codePoint % 0x400) + 0xDC00;
codeUnits.push(highSurrogate, lowSurrogate);
}
if (index + 1 == length || codeUnits.length > MAX_SIZE) {
result += stringFromCharCode.apply(null, codeUnits);
codeUnits.length = 0;
}
}
return result;
};
if (defineProperty) {
defineProperty(String, 'fromCodePoint', {
'value': fromCodePoint,
'configurable': true,
'writable': true
});
} else {
String.fromCodePoint = fromCodePoint;
}
}());
}