Merge pull request #1650 from bhamiltoncx/js-fromcodepoint-polyfill
Fix JavaScript InputStream.getText() when input contains Unicode values > U+FFFF
This commit is contained in:
commit
565f6299c0
|
@ -28,6 +28,7 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
=====
|
=====
|
||||||
|
|
||||||
MIT License for codepointat.js from https://git.io/codepointat
|
MIT License for codepointat.js from https://git.io/codepointat
|
||||||
|
MIT License for fromcodepoint.js from https://git.io/vDW1m
|
||||||
|
|
||||||
Copyright Mathias Bynens <https://mathiasbynens.be/>
|
Copyright Mathias Bynens <https://mathiasbynens.be/>
|
||||||
|
|
||||||
|
|
|
@ -7,13 +7,14 @@
|
||||||
|
|
||||||
var Token = require('./Token').Token;
|
var Token = require('./Token').Token;
|
||||||
require('./polyfills/codepointat');
|
require('./polyfills/codepointat');
|
||||||
|
require('./polyfills/fromcodepoint');
|
||||||
|
|
||||||
// Vacuum all input from a string and then treat it like a buffer.
|
// Vacuum all input from a string and then treat it like a buffer.
|
||||||
|
|
||||||
function _loadString(stream, decodeToUnicodeCodePoints) {
|
function _loadString(stream, decodeToUnicodeCodePoints) {
|
||||||
stream._index = 0;
|
stream._index = 0;
|
||||||
stream.data = [];
|
stream.data = [];
|
||||||
if (decodeToUnicodeCodePoints) {
|
if (stream.decodeToUnicodeCodePoints) {
|
||||||
for (var i = 0; i < stream.strdata.length; ) {
|
for (var i = 0; i < stream.strdata.length; ) {
|
||||||
var codePoint = stream.strdata.codePointAt(i);
|
var codePoint = stream.strdata.codePointAt(i);
|
||||||
stream.data.push(codePoint);
|
stream.data.push(codePoint);
|
||||||
|
@ -36,7 +37,8 @@ function _loadString(stream, decodeToUnicodeCodePoints) {
|
||||||
function InputStream(data, decodeToUnicodeCodePoints) {
|
function InputStream(data, decodeToUnicodeCodePoints) {
|
||||||
this.name = "<empty>";
|
this.name = "<empty>";
|
||||||
this.strdata = data;
|
this.strdata = data;
|
||||||
_loadString(this, decodeToUnicodeCodePoints || false);
|
this.decodeToUnicodeCodePoints = decodeToUnicodeCodePoints || false;
|
||||||
|
_loadString(this);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -114,7 +116,15 @@ InputStream.prototype.getText = function(start, stop) {
|
||||||
if (start >= this._size) {
|
if (start >= this._size) {
|
||||||
return "";
|
return "";
|
||||||
} else {
|
} else {
|
||||||
return this.strdata.slice(start, stop + 1);
|
if (this.decodeToUnicodeCodePoints) {
|
||||||
|
var result = "";
|
||||||
|
for (var i = start; i <= stop; i++) {
|
||||||
|
result += String.fromCodePoint(this.data[i]);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
} else {
|
||||||
|
return this.strdata.slice(start, stop + 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
exports.atn = require('./atn/index');
|
exports.atn = require('./atn/index');
|
||||||
exports.codepointat = require('./polyfills/codepointat');
|
exports.codepointat = require('./polyfills/codepointat');
|
||||||
exports.dfa = require('./dfa/index');
|
exports.dfa = require('./dfa/index');
|
||||||
|
exports.fromcodepoint = require('./polyfills/fromcodepoint');
|
||||||
exports.tree = require('./tree/index');
|
exports.tree = require('./tree/index');
|
||||||
exports.error = require('./error/index');
|
exports.error = require('./error/index');
|
||||||
exports.Token = require('./Token').Token;
|
exports.Token = require('./Token').Token;
|
||||||
|
|
|
@ -0,0 +1,62 @@
|
||||||
|
/*! https://mths.be/fromcodepoint v0.2.1 by @mathias */
|
||||||
|
if (!String.fromCodePoint) {
|
||||||
|
(function() {
|
||||||
|
var defineProperty = (function() {
|
||||||
|
// IE 8 only supports `Object.defineProperty` on DOM elements
|
||||||
|
try {
|
||||||
|
var object = {};
|
||||||
|
var $defineProperty = Object.defineProperty;
|
||||||
|
var result = $defineProperty(object, object, object) && $defineProperty;
|
||||||
|
} catch(error) {}
|
||||||
|
return result;
|
||||||
|
}());
|
||||||
|
var stringFromCharCode = String.fromCharCode;
|
||||||
|
var floor = Math.floor;
|
||||||
|
var fromCodePoint = function(_) {
|
||||||
|
var MAX_SIZE = 0x4000;
|
||||||
|
var codeUnits = [];
|
||||||
|
var highSurrogate;
|
||||||
|
var lowSurrogate;
|
||||||
|
var index = -1;
|
||||||
|
var length = arguments.length;
|
||||||
|
if (!length) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
var result = '';
|
||||||
|
while (++index < length) {
|
||||||
|
var codePoint = Number(arguments[index]);
|
||||||
|
if (
|
||||||
|
!isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
|
||||||
|
codePoint < 0 || // not a valid Unicode code point
|
||||||
|
codePoint > 0x10FFFF || // not a valid Unicode code point
|
||||||
|
floor(codePoint) != codePoint // not an integer
|
||||||
|
) {
|
||||||
|
throw RangeError('Invalid code point: ' + codePoint);
|
||||||
|
}
|
||||||
|
if (codePoint <= 0xFFFF) { // BMP code point
|
||||||
|
codeUnits.push(codePoint);
|
||||||
|
} else { // Astral code point; split in surrogate halves
|
||||||
|
// https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
|
||||||
|
codePoint -= 0x10000;
|
||||||
|
highSurrogate = (codePoint >> 10) + 0xD800;
|
||||||
|
lowSurrogate = (codePoint % 0x400) + 0xDC00;
|
||||||
|
codeUnits.push(highSurrogate, lowSurrogate);
|
||||||
|
}
|
||||||
|
if (index + 1 == length || codeUnits.length > MAX_SIZE) {
|
||||||
|
result += stringFromCharCode.apply(null, codeUnits);
|
||||||
|
codeUnits.length = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
};
|
||||||
|
if (defineProperty) {
|
||||||
|
defineProperty(String, 'fromCodePoint', {
|
||||||
|
'value': fromCodePoint,
|
||||||
|
'configurable': true,
|
||||||
|
'writable': true
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
String.fromCodePoint = fromCodePoint;
|
||||||
|
}
|
||||||
|
}());
|
||||||
|
}
|
Loading…
Reference in New Issue