forked from jasder/antlr
Merge pull request #1628 from bhamiltoncx/js-string-codepointat-polyfill
JavaScript runtime: Optionally support decoding UTF-16 sequences to Unicode code points
This commit is contained in:
commit
182f3c4647
25
LICENSE.txt
25
LICENSE.txt
|
@ -24,3 +24,28 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
=====
|
||||
|
||||
MIT License for codepointat.js from https://git.io/codepointat
|
||||
|
||||
Copyright Mathias Bynens <https://mathiasbynens.be/>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
|
|
@ -13,9 +13,9 @@ var InputStream = require('./InputStream').InputStream;
|
|||
var isNodeJs = typeof window === 'undefined' && typeof importScripts === 'undefined';
|
||||
var fs = isNodeJs ? require("fs") : null;
|
||||
|
||||
function FileStream(fileName) {
|
||||
function FileStream(fileName, decodeToUnicodeCodePoints) {
|
||||
var data = fs.readFileSync(fileName, "utf8");
|
||||
InputStream.call(this, data);
|
||||
InputStream.call(this, data, decodeToUnicodeCodePoints);
|
||||
this.fileName = fileName;
|
||||
return this;
|
||||
}
|
||||
|
|
|
@ -6,22 +6,37 @@
|
|||
//
|
||||
|
||||
var Token = require('./Token').Token;
|
||||
require('./polyfills/codepointat');
|
||||
|
||||
// Vacuum all input from a string and then treat it like a buffer.
|
||||
|
||||
function _loadString(stream) {
|
||||
function _loadString(stream, decodeToUnicodeCodePoints) {
|
||||
stream._index = 0;
|
||||
stream.data = [];
|
||||
if (decodeToUnicodeCodePoints) {
|
||||
for (var i = 0; i < stream.strdata.length; ) {
|
||||
var codePoint = stream.strdata.codePointAt(i);
|
||||
stream.data.push(codePoint);
|
||||
i += codePoint <= 0xFFFF ? 1 : 2;
|
||||
}
|
||||
} else {
|
||||
for (var i = 0; i < stream.strdata.length; i++) {
|
||||
stream.data.push(stream.strdata.charCodeAt(i));
|
||||
var codeUnit = stream.strdata.charCodeAt(i);
|
||||
stream.data.push(codeUnit);
|
||||
}
|
||||
}
|
||||
stream._size = stream.data.length;
|
||||
}
|
||||
|
||||
function InputStream(data) {
|
||||
// If decodeToUnicodeCodePoints is true, the input is treated
|
||||
// as a series of Unicode code points.
|
||||
//
|
||||
// Otherwise, the input is treated as a series of 16-bit UTF-16 code
|
||||
// units.
|
||||
function InputStream(data, decodeToUnicodeCodePoints) {
|
||||
this.name = "<empty>";
|
||||
this.strdata = data;
|
||||
_loadString(this);
|
||||
_loadString(this, decodeToUnicodeCodePoints || false);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
* can be found in the LICENSE.txt file in the project root.
|
||||
*/
|
||||
exports.atn = require('./atn/index');
|
||||
exports.codepointat = require('./polyfills/codepointat');
|
||||
exports.dfa = require('./dfa/index');
|
||||
exports.tree = require('./tree/index');
|
||||
exports.error = require('./error/index');
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
/*! https://mths.be/codepointat v0.2.0 by @mathias */
|
||||
if (!String.prototype.codePointAt) {
|
||||
(function() {
|
||||
'use strict'; // needed to support `apply`/`call` with `undefined`/`null`
|
||||
var defineProperty = (function() {
|
||||
// IE 8 only supports `Object.defineProperty` on DOM elements
|
||||
try {
|
||||
var object = {};
|
||||
var $defineProperty = Object.defineProperty;
|
||||
var result = $defineProperty(object, object, object) && $defineProperty;
|
||||
} catch(error) {}
|
||||
return result;
|
||||
}());
|
||||
var codePointAt = function(position) {
|
||||
if (this == null) {
|
||||
throw TypeError();
|
||||
}
|
||||
var string = String(this);
|
||||
var size = string.length;
|
||||
// `ToInteger`
|
||||
var index = position ? Number(position) : 0;
|
||||
if (index != index) { // better `isNaN`
|
||||
index = 0;
|
||||
}
|
||||
// Account for out-of-bounds indices:
|
||||
if (index < 0 || index >= size) {
|
||||
return undefined;
|
||||
}
|
||||
// Get the first code unit
|
||||
var first = string.charCodeAt(index);
|
||||
var second;
|
||||
if ( // check if it’s the start of a surrogate pair
|
||||
first >= 0xD800 && first <= 0xDBFF && // high surrogate
|
||||
size > index + 1 // there is a next code unit
|
||||
) {
|
||||
second = string.charCodeAt(index + 1);
|
||||
if (second >= 0xDC00 && second <= 0xDFFF) { // low surrogate
|
||||
// https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
|
||||
return (first - 0xD800) * 0x400 + second - 0xDC00 + 0x10000;
|
||||
}
|
||||
}
|
||||
return first;
|
||||
};
|
||||
if (defineProperty) {
|
||||
defineProperty(String.prototype, 'codePointAt', {
|
||||
'value': codePointAt,
|
||||
'configurable': true,
|
||||
'writable': true
|
||||
});
|
||||
} else {
|
||||
String.prototype.codePointAt = codePointAt;
|
||||
}
|
||||
}());
|
||||
}
|
Loading…
Reference in New Issue