forked from jasder/antlr
refactored Recognizer.js to use es6 classes
refactored Lexer.js to use es6 classes fix: dont wrap class in object for exporting use const/let for better scoping use jsdoc
This commit is contained in:
parent
a3c380d7d4
commit
24ae4274a0
|
@ -4,7 +4,7 @@
|
|||
*/
|
||||
|
||||
const {Token} = require('./Token');
|
||||
const {Lexer} = require('./Lexer');
|
||||
const Lexer = require('./Lexer');
|
||||
const {Interval} = require('./IntervalSet');
|
||||
|
||||
// this is just to keep meaningful parameter types to Parser
|
||||
|
|
|
@ -2,73 +2,364 @@
|
|||
* Use of this file is governed by the BSD 3-clause license that
|
||||
* can be found in the LICENSE.txt file in the project root.
|
||||
*/
|
||||
///
|
||||
|
||||
// A lexer is recognizer that draws input symbols from a character stream.
|
||||
// lexer grammars result in a subclass of this object. A Lexer object
|
||||
// uses simplified match() and error recovery mechanisms in the interest of speed.
|
||||
const {Token} = require('./Token');
|
||||
const Recognizer = require('./Recognizer');
|
||||
const CommonTokenFactory = require('./CommonTokenFactory');
|
||||
const {RecognitionException} = require('./error/Errors');
|
||||
const {LexerNoViableAltException} = require('./error/Errors');
|
||||
|
||||
var Token = require('./Token').Token;
|
||||
var Recognizer = require('./Recognizer').Recognizer;
|
||||
var CommonTokenFactory = require('./CommonTokenFactory');
|
||||
var RecognitionException = require('./error/Errors').RecognitionException;
|
||||
var LexerNoViableAltException = require('./error/Errors').LexerNoViableAltException;
|
||||
class TokenSource {}
|
||||
|
||||
function TokenSource() {
|
||||
return this;
|
||||
/**
|
||||
* A lexer is recognizer that draws input symbols from a character stream.
|
||||
* lexer grammars result in a subclass of this object. A Lexer object
|
||||
* uses simplified match() and error recovery mechanisms in the interest of speed.
|
||||
*/
|
||||
class Lexer extends Recognizer {
|
||||
constructor(input) {
|
||||
super();
|
||||
this._input = input;
|
||||
this._factory = CommonTokenFactory.DEFAULT;
|
||||
this._tokenFactorySourcePair = [ this, input ];
|
||||
|
||||
this._interp = null; // child classes must populate this
|
||||
|
||||
/**
|
||||
* The goal of all lexer rules/methods is to create a token object.
|
||||
* this is an instance variable as multiple rules may collaborate to
|
||||
* create a single token. nextToken will return this object after
|
||||
* matching lexer rule(s). If you subclass to allow multiple token
|
||||
* emissions, then set this to the last token to be matched or
|
||||
* something nonnull so that the auto token emit mechanism will not
|
||||
* emit another token.
|
||||
*/
|
||||
this._token = null;
|
||||
|
||||
/**
|
||||
* What character index in the stream did the current token start at?
|
||||
* Needed, for example, to get the text for current token. Set at
|
||||
* the start of nextToken.
|
||||
*/
|
||||
this._tokenStartCharIndex = -1;
|
||||
|
||||
// The line on which the first character of the token resides///
|
||||
this._tokenStartLine = -1;
|
||||
|
||||
// The character position of first character within the line///
|
||||
this._tokenStartColumn = -1;
|
||||
|
||||
// Once we see EOF on char stream, next token will be EOF.
|
||||
// If you have DONE : EOF ; then you see DONE EOF.
|
||||
this._hitEOF = false;
|
||||
|
||||
// The channel number for the current token///
|
||||
this._channel = Token.DEFAULT_CHANNEL;
|
||||
|
||||
// The token type for the current token///
|
||||
this._type = Token.INVALID_TYPE;
|
||||
|
||||
this._modeStack = [];
|
||||
this._mode = Lexer.DEFAULT_MODE;
|
||||
|
||||
/**
|
||||
* You can set the text for the current token to override what is in
|
||||
* the input char buffer. Use setText() or can set this instance var.
|
||||
*/
|
||||
this._text = null;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
reset() {
|
||||
// wack Lexer state variables
|
||||
if (this._input !== null) {
|
||||
this._input.seek(0); // rewind the input
|
||||
}
|
||||
this._token = null;
|
||||
this._type = Token.INVALID_TYPE;
|
||||
this._channel = Token.DEFAULT_CHANNEL;
|
||||
this._tokenStartCharIndex = -1;
|
||||
this._tokenStartColumn = -1;
|
||||
this._tokenStartLine = -1;
|
||||
this._text = null;
|
||||
|
||||
this._hitEOF = false;
|
||||
this._mode = Lexer.DEFAULT_MODE;
|
||||
this._modeStack = [];
|
||||
|
||||
this._interp.reset();
|
||||
}
|
||||
|
||||
// Return a token from this source; i.e., match a token on the char stream.
|
||||
nextToken() {
|
||||
if (this._input === null) {
|
||||
throw "nextToken requires a non-null input stream.";
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark start location in char stream so unbuffered streams are
|
||||
* guaranteed at least have text of current token
|
||||
*/
|
||||
const tokenStartMarker = this._input.mark();
|
||||
try {
|
||||
while (true) {
|
||||
if (this._hitEOF) {
|
||||
this.emitEOF();
|
||||
return this._token;
|
||||
}
|
||||
this._token = null;
|
||||
this._channel = Token.DEFAULT_CHANNEL;
|
||||
this._tokenStartCharIndex = this._input.index;
|
||||
this._tokenStartColumn = this._interp.column;
|
||||
this._tokenStartLine = this._interp.line;
|
||||
this._text = null;
|
||||
let continueOuter = false;
|
||||
while (true) {
|
||||
this._type = Token.INVALID_TYPE;
|
||||
let ttype = Lexer.SKIP;
|
||||
try {
|
||||
ttype = this._interp.match(this._input, this._mode);
|
||||
} catch (e) {
|
||||
if(e instanceof RecognitionException) {
|
||||
this.notifyListeners(e); // report error
|
||||
this.recover(e);
|
||||
} else {
|
||||
console.log(e.stack);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
if (this._input.LA(1) === Token.EOF) {
|
||||
this._hitEOF = true;
|
||||
}
|
||||
if (this._type === Token.INVALID_TYPE) {
|
||||
this._type = ttype;
|
||||
}
|
||||
if (this._type === Lexer.SKIP) {
|
||||
continueOuter = true;
|
||||
break;
|
||||
}
|
||||
if (this._type !== Lexer.MORE) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (continueOuter) {
|
||||
continue;
|
||||
}
|
||||
if (this._token === null) {
|
||||
this.emit();
|
||||
}
|
||||
return this._token;
|
||||
}
|
||||
} finally {
|
||||
// make sure we release marker after match or
|
||||
// unbuffered char stream will keep buffering
|
||||
this._input.release(tokenStartMarker);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Instruct the lexer to skip creating a token for current lexer rule
|
||||
* and look for another token. nextToken() knows to keep looking when
|
||||
* a lexer rule finishes with token set to SKIP_TOKEN. Recall that
|
||||
* if token==null at end of any token rule, it creates one for you
|
||||
* and emits it.
|
||||
*/
|
||||
skip() {
|
||||
this._type = Lexer.SKIP;
|
||||
}
|
||||
|
||||
more() {
|
||||
this._type = Lexer.MORE;
|
||||
}
|
||||
|
||||
mode(m) {
|
||||
this._mode = m;
|
||||
}
|
||||
|
||||
pushMode(m) {
|
||||
if (this._interp.debug) {
|
||||
console.log("pushMode " + m);
|
||||
}
|
||||
this._modeStack.push(this._mode);
|
||||
this.mode(m);
|
||||
}
|
||||
|
||||
popMode() {
|
||||
if (this._modeStack.length === 0) {
|
||||
throw "Empty Stack";
|
||||
}
|
||||
if (this._interp.debug) {
|
||||
console.log("popMode back to " + this._modeStack.slice(0, -1));
|
||||
}
|
||||
this.mode(this._modeStack.pop());
|
||||
return this._mode;
|
||||
}
|
||||
|
||||
/**
|
||||
* By default does not support multiple emits per nextToken invocation
|
||||
* for efficiency reasons. Subclass and override this method, nextToken,
|
||||
* and getToken (to push tokens into a list and pull from that list
|
||||
* rather than a single variable as this implementation does).
|
||||
*/
|
||||
emitToken(token) {
|
||||
this._token = token;
|
||||
}
|
||||
|
||||
/**
|
||||
* The standard method called to automatically emit a token at the
|
||||
* outermost lexical rule. The token object should point into the
|
||||
* char buffer start..stop. If there is a text override in 'text',
|
||||
* use that to set the token's text. Override this method to emit
|
||||
* custom Token objects or provide a new factory.
|
||||
*/
|
||||
emit() {
|
||||
const t = this._factory.create(this._tokenFactorySourcePair, this._type,
|
||||
this._text, this._channel, this._tokenStartCharIndex, this
|
||||
.getCharIndex() - 1, this._tokenStartLine,
|
||||
this._tokenStartColumn);
|
||||
this.emitToken(t);
|
||||
return t;
|
||||
}
|
||||
|
||||
emitEOF() {
|
||||
const cpos = this.column;
|
||||
const lpos = this.line;
|
||||
const eof = this._factory.create(this._tokenFactorySourcePair, Token.EOF,
|
||||
null, Token.DEFAULT_CHANNEL, this._input.index,
|
||||
this._input.index - 1, lpos, cpos);
|
||||
this.emitToken(eof);
|
||||
return eof;
|
||||
}
|
||||
|
||||
// What is the index of the current character of lookahead?///
|
||||
getCharIndex() {
|
||||
return this._input.index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a list of all Token objects in input char stream.
|
||||
* Forces load of all tokens. Does not include EOF token.
|
||||
*/
|
||||
getAllTokens() {
|
||||
const tokens = [];
|
||||
let t = this.nextToken();
|
||||
while (t.type !== Token.EOF) {
|
||||
tokens.push(t);
|
||||
t = this.nextToken();
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
notifyListeners(e) {
|
||||
const start = this._tokenStartCharIndex;
|
||||
const stop = this._input.index;
|
||||
const text = this._input.getText(start, stop);
|
||||
const msg = "token recognition error at: '" + this.getErrorDisplay(text) + "'";
|
||||
const listener = this.getErrorListenerDispatch();
|
||||
listener.syntaxError(this, null, this._tokenStartLine,
|
||||
this._tokenStartColumn, msg, e);
|
||||
}
|
||||
|
||||
getErrorDisplay(s) {
|
||||
const d = [];
|
||||
for (let i = 0; i < s.length; i++) {
|
||||
d.push(s[i]);
|
||||
}
|
||||
return d.join('');
|
||||
}
|
||||
|
||||
getErrorDisplayForChar(c) {
|
||||
if (c.charCodeAt(0) === Token.EOF) {
|
||||
return "<EOF>";
|
||||
} else if (c === '\n') {
|
||||
return "\\n";
|
||||
} else if (c === '\t') {
|
||||
return "\\t";
|
||||
} else if (c === '\r') {
|
||||
return "\\r";
|
||||
} else {
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
getCharErrorDisplay(c) {
|
||||
return "'" + this.getErrorDisplayForChar(c) + "'";
|
||||
}
|
||||
|
||||
/**
|
||||
* Lexers can normally match any char in it's vocabulary after matching
|
||||
* a token, so do the easy thing and just kill a character and hope
|
||||
* it all works out. You can instead use the rule invocation stack
|
||||
* to do sophisticated error recovery if you are in a fragment rule.
|
||||
*/
|
||||
recover(re) {
|
||||
if (this._input.LA(1) !== Token.EOF) {
|
||||
if (re instanceof LexerNoViableAltException) {
|
||||
// skip a char and try again
|
||||
this._interp.consume(this._input);
|
||||
} else {
|
||||
// TODO: Do we lose character or line position information?
|
||||
this._input.consume();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
get inputStream(){
|
||||
return this._input;
|
||||
}
|
||||
|
||||
set inputStream(input) {
|
||||
this._input = null;
|
||||
this._tokenFactorySourcePair = [ this, this._input ];
|
||||
this.reset();
|
||||
this._input = input;
|
||||
this._tokenFactorySourcePair = [ this, this._input ];
|
||||
}
|
||||
|
||||
get sourceName(){
|
||||
return this._input.sourceName;
|
||||
}
|
||||
|
||||
get type(){
|
||||
return this.type;
|
||||
}
|
||||
|
||||
set type(type) {
|
||||
this._type = type;
|
||||
}
|
||||
|
||||
get line(){
|
||||
return this._interp.line;
|
||||
}
|
||||
|
||||
set line(line) {
|
||||
this._interp.line = line;
|
||||
}
|
||||
|
||||
get column(){
|
||||
return this._interp.column;
|
||||
}
|
||||
|
||||
set column(column) {
|
||||
this._interp.column = column;
|
||||
}
|
||||
|
||||
get text(){
|
||||
if (this._text !== null) {
|
||||
return this._text;
|
||||
} else {
|
||||
return this._interp.getText(this._input);
|
||||
}
|
||||
}
|
||||
|
||||
set text(text) {
|
||||
this._text = text;
|
||||
}
|
||||
}
|
||||
|
||||
function Lexer(input) {
|
||||
Recognizer.call(this);
|
||||
this._input = input;
|
||||
this._factory = CommonTokenFactory.DEFAULT;
|
||||
this._tokenFactorySourcePair = [ this, input ];
|
||||
|
||||
this._interp = null; // child classes must populate this
|
||||
|
||||
// The goal of all lexer rules/methods is to create a token object.
|
||||
// this is an instance variable as multiple rules may collaborate to
|
||||
// create a single token. nextToken will return this object after
|
||||
// matching lexer rule(s). If you subclass to allow multiple token
|
||||
// emissions, then set this to the last token to be matched or
|
||||
// something nonnull so that the auto token emit mechanism will not
|
||||
// emit another token.
|
||||
this._token = null;
|
||||
|
||||
// What character index in the stream did the current token start at?
|
||||
// Needed, for example, to get the text for current token. Set at
|
||||
// the start of nextToken.
|
||||
this._tokenStartCharIndex = -1;
|
||||
|
||||
// The line on which the first character of the token resides///
|
||||
this._tokenStartLine = -1;
|
||||
|
||||
// The character position of first character within the line///
|
||||
this._tokenStartColumn = -1;
|
||||
|
||||
// Once we see EOF on char stream, next token will be EOF.
|
||||
// If you have DONE : EOF ; then you see DONE EOF.
|
||||
this._hitEOF = false;
|
||||
|
||||
// The channel number for the current token///
|
||||
this._channel = Token.DEFAULT_CHANNEL;
|
||||
|
||||
// The token type for the current token///
|
||||
this._type = Token.INVALID_TYPE;
|
||||
|
||||
this._modeStack = [];
|
||||
this._mode = Lexer.DEFAULT_MODE;
|
||||
|
||||
// You can set the text for the current token to override what is in
|
||||
// the input char buffer. Use setText() or can set this instance var.
|
||||
// /
|
||||
this._text = null;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
Lexer.prototype = Object.create(Recognizer.prototype);
|
||||
Lexer.prototype.constructor = Lexer;
|
||||
|
||||
Lexer.DEFAULT_MODE = 0;
|
||||
Lexer.MORE = -2;
|
||||
|
@ -79,293 +370,7 @@ Lexer.HIDDEN = Token.HIDDEN_CHANNEL;
|
|||
Lexer.MIN_CHAR_VALUE = 0x0000;
|
||||
Lexer.MAX_CHAR_VALUE = 0x10FFFF;
|
||||
|
||||
Lexer.prototype.reset = function() {
|
||||
// wack Lexer state variables
|
||||
if (this._input !== null) {
|
||||
this._input.seek(0); // rewind the input
|
||||
}
|
||||
this._token = null;
|
||||
this._type = Token.INVALID_TYPE;
|
||||
this._channel = Token.DEFAULT_CHANNEL;
|
||||
this._tokenStartCharIndex = -1;
|
||||
this._tokenStartColumn = -1;
|
||||
this._tokenStartLine = -1;
|
||||
this._text = null;
|
||||
|
||||
this._hitEOF = false;
|
||||
this._mode = Lexer.DEFAULT_MODE;
|
||||
this._modeStack = [];
|
||||
|
||||
this._interp.reset();
|
||||
};
|
||||
|
||||
// Return a token from this source; i.e., match a token on the char stream.
|
||||
Lexer.prototype.nextToken = function() {
|
||||
if (this._input === null) {
|
||||
throw "nextToken requires a non-null input stream.";
|
||||
}
|
||||
|
||||
// Mark start location in char stream so unbuffered streams are
|
||||
// guaranteed at least have text of current token
|
||||
var tokenStartMarker = this._input.mark();
|
||||
try {
|
||||
while (true) {
|
||||
if (this._hitEOF) {
|
||||
this.emitEOF();
|
||||
return this._token;
|
||||
}
|
||||
this._token = null;
|
||||
this._channel = Token.DEFAULT_CHANNEL;
|
||||
this._tokenStartCharIndex = this._input.index;
|
||||
this._tokenStartColumn = this._interp.column;
|
||||
this._tokenStartLine = this._interp.line;
|
||||
this._text = null;
|
||||
var continueOuter = false;
|
||||
while (true) {
|
||||
this._type = Token.INVALID_TYPE;
|
||||
var ttype = Lexer.SKIP;
|
||||
try {
|
||||
ttype = this._interp.match(this._input, this._mode);
|
||||
} catch (e) {
|
||||
if(e instanceof RecognitionException) {
|
||||
this.notifyListeners(e); // report error
|
||||
this.recover(e);
|
||||
} else {
|
||||
console.log(e.stack);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
if (this._input.LA(1) === Token.EOF) {
|
||||
this._hitEOF = true;
|
||||
}
|
||||
if (this._type === Token.INVALID_TYPE) {
|
||||
this._type = ttype;
|
||||
}
|
||||
if (this._type === Lexer.SKIP) {
|
||||
continueOuter = true;
|
||||
break;
|
||||
}
|
||||
if (this._type !== Lexer.MORE) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (continueOuter) {
|
||||
continue;
|
||||
}
|
||||
if (this._token === null) {
|
||||
this.emit();
|
||||
}
|
||||
return this._token;
|
||||
}
|
||||
} finally {
|
||||
// make sure we release marker after match or
|
||||
// unbuffered char stream will keep buffering
|
||||
this._input.release(tokenStartMarker);
|
||||
}
|
||||
};
|
||||
|
||||
// Instruct the lexer to skip creating a token for current lexer rule
|
||||
// and look for another token. nextToken() knows to keep looking when
|
||||
// a lexer rule finishes with token set to SKIP_TOKEN. Recall that
|
||||
// if token==null at end of any token rule, it creates one for you
|
||||
// and emits it.
|
||||
// /
|
||||
Lexer.prototype.skip = function() {
|
||||
this._type = Lexer.SKIP;
|
||||
};
|
||||
|
||||
Lexer.prototype.more = function() {
|
||||
this._type = Lexer.MORE;
|
||||
};
|
||||
|
||||
Lexer.prototype.mode = function(m) {
|
||||
this._mode = m;
|
||||
};
|
||||
|
||||
Lexer.prototype.pushMode = function(m) {
|
||||
if (this._interp.debug) {
|
||||
console.log("pushMode " + m);
|
||||
}
|
||||
this._modeStack.push(this._mode);
|
||||
this.mode(m);
|
||||
};
|
||||
|
||||
Lexer.prototype.popMode = function() {
|
||||
if (this._modeStack.length === 0) {
|
||||
throw "Empty Stack";
|
||||
}
|
||||
if (this._interp.debug) {
|
||||
console.log("popMode back to " + this._modeStack.slice(0, -1));
|
||||
}
|
||||
this.mode(this._modeStack.pop());
|
||||
return this._mode;
|
||||
};
|
||||
|
||||
// Set the char stream and reset the lexer
|
||||
Object.defineProperty(Lexer.prototype, "inputStream", {
|
||||
get : function() {
|
||||
return this._input;
|
||||
},
|
||||
set : function(input) {
|
||||
this._input = null;
|
||||
this._tokenFactorySourcePair = [ this, this._input ];
|
||||
this.reset();
|
||||
this._input = input;
|
||||
this._tokenFactorySourcePair = [ this, this._input ];
|
||||
}
|
||||
});
|
||||
|
||||
Object.defineProperty(Lexer.prototype, "sourceName", {
|
||||
get : function sourceName() {
|
||||
return this._input.sourceName;
|
||||
}
|
||||
});
|
||||
|
||||
// By default does not support multiple emits per nextToken invocation
|
||||
// for efficiency reasons. Subclass and override this method, nextToken,
|
||||
// and getToken (to push tokens into a list and pull from that list
|
||||
// rather than a single variable as this implementation does).
|
||||
// /
|
||||
Lexer.prototype.emitToken = function(token) {
|
||||
this._token = token;
|
||||
};
|
||||
|
||||
// The standard method called to automatically emit a token at the
|
||||
// outermost lexical rule. The token object should point into the
|
||||
// char buffer start..stop. If there is a text override in 'text',
|
||||
// use that to set the token's text. Override this method to emit
|
||||
// custom Token objects or provide a new factory.
|
||||
// /
|
||||
Lexer.prototype.emit = function() {
|
||||
var t = this._factory.create(this._tokenFactorySourcePair, this._type,
|
||||
this._text, this._channel, this._tokenStartCharIndex, this
|
||||
.getCharIndex() - 1, this._tokenStartLine,
|
||||
this._tokenStartColumn);
|
||||
this.emitToken(t);
|
||||
return t;
|
||||
};
|
||||
|
||||
Lexer.prototype.emitEOF = function() {
|
||||
var cpos = this.column;
|
||||
var lpos = this.line;
|
||||
var eof = this._factory.create(this._tokenFactorySourcePair, Token.EOF,
|
||||
null, Token.DEFAULT_CHANNEL, this._input.index,
|
||||
this._input.index - 1, lpos, cpos);
|
||||
this.emitToken(eof);
|
||||
return eof;
|
||||
};
|
||||
|
||||
Object.defineProperty(Lexer.prototype, "type", {
|
||||
get : function() {
|
||||
return this.type;
|
||||
},
|
||||
set : function(type) {
|
||||
this._type = type;
|
||||
}
|
||||
});
|
||||
|
||||
Object.defineProperty(Lexer.prototype, "line", {
|
||||
get : function() {
|
||||
return this._interp.line;
|
||||
},
|
||||
set : function(line) {
|
||||
this._interp.line = line;
|
||||
}
|
||||
});
|
||||
|
||||
Object.defineProperty(Lexer.prototype, "column", {
|
||||
get : function() {
|
||||
return this._interp.column;
|
||||
},
|
||||
set : function(column) {
|
||||
this._interp.column = column;
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
// What is the index of the current character of lookahead?///
|
||||
Lexer.prototype.getCharIndex = function() {
|
||||
return this._input.index;
|
||||
};
|
||||
|
||||
// Return the text matched so far for the current token or any text override.
|
||||
//Set the complete text of this token; it wipes any previous changes to the text.
|
||||
Object.defineProperty(Lexer.prototype, "text", {
|
||||
get : function() {
|
||||
if (this._text !== null) {
|
||||
return this._text;
|
||||
} else {
|
||||
return this._interp.getText(this._input);
|
||||
}
|
||||
},
|
||||
set : function(text) {
|
||||
this._text = text;
|
||||
}
|
||||
});
|
||||
// Return a list of all Token objects in input char stream.
|
||||
// Forces load of all tokens. Does not include EOF token.
|
||||
// /
|
||||
Lexer.prototype.getAllTokens = function() {
|
||||
var tokens = [];
|
||||
var t = this.nextToken();
|
||||
while (t.type !== Token.EOF) {
|
||||
tokens.push(t);
|
||||
t = this.nextToken();
|
||||
}
|
||||
return tokens;
|
||||
};
|
||||
|
||||
Lexer.prototype.notifyListeners = function(e) {
|
||||
var start = this._tokenStartCharIndex;
|
||||
var stop = this._input.index;
|
||||
var text = this._input.getText(start, stop);
|
||||
var msg = "token recognition error at: '" + this.getErrorDisplay(text) + "'";
|
||||
var listener = this.getErrorListenerDispatch();
|
||||
listener.syntaxError(this, null, this._tokenStartLine,
|
||||
this._tokenStartColumn, msg, e);
|
||||
};
|
||||
|
||||
Lexer.prototype.getErrorDisplay = function(s) {
|
||||
var d = [];
|
||||
for (var i = 0; i < s.length; i++) {
|
||||
d.push(s[i]);
|
||||
}
|
||||
return d.join('');
|
||||
};
|
||||
|
||||
Lexer.prototype.getErrorDisplayForChar = function(c) {
|
||||
if (c.charCodeAt(0) === Token.EOF) {
|
||||
return "<EOF>";
|
||||
} else if (c === '\n') {
|
||||
return "\\n";
|
||||
} else if (c === '\t') {
|
||||
return "\\t";
|
||||
} else if (c === '\r') {
|
||||
return "\\r";
|
||||
} else {
|
||||
return c;
|
||||
}
|
||||
};
|
||||
|
||||
Lexer.prototype.getCharErrorDisplay = function(c) {
|
||||
return "'" + this.getErrorDisplayForChar(c) + "'";
|
||||
};
|
||||
|
||||
// Lexers can normally match any char in it's vocabulary after matching
|
||||
// a token, so do the easy thing and just kill a character and hope
|
||||
// it all works out. You can instead use the rule invocation stack
|
||||
// to do sophisticated error recovery if you are in a fragment rule.
|
||||
// /
|
||||
Lexer.prototype.recover = function(re) {
|
||||
if (this._input.LA(1) !== Token.EOF) {
|
||||
if (re instanceof LexerNoViableAltException) {
|
||||
// skip a char and try again
|
||||
this._interp.consume(this._input);
|
||||
} else {
|
||||
// TODO: Do we lose character or line position information?
|
||||
this._input.consume();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
exports.Lexer = Lexer;
|
||||
module.exports = Lexer;
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
var Token = require('./Token').Token;
|
||||
var ParseTreeListener = require('./tree/Tree').ParseTreeListener;
|
||||
var Recognizer = require('./Recognizer').Recognizer;
|
||||
var Recognizer = require('./Recognizer');
|
||||
var DefaultErrorStrategy = require('./error/ErrorStrategy').DefaultErrorStrategy;
|
||||
var ATNDeserializer = require('./atn/ATNDeserializer');
|
||||
var ATNDeserializationOptions = require('./atn/ATNDeserializationOptions');
|
||||
|
@ -294,7 +294,7 @@ Parser.prototype.getATNWithBypassAlts = function() {
|
|||
// String id = m.get("ID");
|
||||
// </pre>
|
||||
|
||||
var Lexer = require('./Lexer').Lexer;
|
||||
var Lexer = require('./Lexer');
|
||||
|
||||
Parser.prototype.compileParseTreePattern = function(pattern, patternRuleIndex, lexer) {
|
||||
lexer = lexer || null;
|
||||
|
|
|
@ -1,147 +1,136 @@
|
|||
//
|
||||
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
|
||||
* Use of this file is governed by the BSD 3-clause license that
|
||||
* can be found in the LICENSE.txt file in the project root.
|
||||
*/
|
||||
//
|
||||
|
||||
var Token = require('./Token').Token;
|
||||
var ConsoleErrorListener = require('./error/ErrorListener').ConsoleErrorListener;
|
||||
var ProxyErrorListener = require('./error/ErrorListener').ProxyErrorListener;
|
||||
const {Token} = require('./Token');
|
||||
const {ConsoleErrorListener} = require('./error/ErrorListener');
|
||||
const {ProxyErrorListener} = require('./error/ErrorListener');
|
||||
|
||||
function Recognizer() {
|
||||
this._listeners = [ ConsoleErrorListener.INSTANCE ];
|
||||
this._interp = null;
|
||||
this._stateNumber = -1;
|
||||
return this;
|
||||
class Recognizer {
|
||||
constructor() {
|
||||
this._listeners = [ ConsoleErrorListener.INSTANCE ];
|
||||
this._interp = null;
|
||||
this._stateNumber = -1;
|
||||
}
|
||||
|
||||
checkVersion(toolVersion) {
|
||||
const runtimeVersion = "4.8";
|
||||
if (runtimeVersion!==toolVersion) {
|
||||
console.log("ANTLR runtime and generated code versions disagree: "+runtimeVersion+"!="+toolVersion);
|
||||
}
|
||||
}
|
||||
|
||||
addErrorListener(listener) {
|
||||
this._listeners.push(listener);
|
||||
}
|
||||
|
||||
removeErrorListeners() {
|
||||
this._listeners = [];
|
||||
}
|
||||
|
||||
getTokenTypeMap() {
|
||||
const tokenNames = this.getTokenNames();
|
||||
if (tokenNames===null) {
|
||||
throw("The current recognizer does not provide a list of token names.");
|
||||
}
|
||||
let result = this.tokenTypeMapCache[tokenNames];
|
||||
if(result===undefined) {
|
||||
result = tokenNames.reduce(function(o, k, i) { o[k] = i; });
|
||||
result.EOF = Token.EOF;
|
||||
this.tokenTypeMapCache[tokenNames] = result;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a map from rule names to rule indexes.
|
||||
* <p>Used for XPath and tree pattern compilation.</p>
|
||||
*/
|
||||
getRuleIndexMap() {
|
||||
const ruleNames = this.ruleNames;
|
||||
if (ruleNames===null) {
|
||||
throw("The current recognizer does not provide a list of rule names.");
|
||||
}
|
||||
let result = this.ruleIndexMapCache[ruleNames];
|
||||
if(result===undefined) {
|
||||
result = ruleNames.reduce(function(o, k, i) { o[k] = i; });
|
||||
this.ruleIndexMapCache[ruleNames] = result;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
getTokenType(tokenName) {
|
||||
const ttype = this.getTokenTypeMap()[tokenName];
|
||||
if (ttype !==undefined) {
|
||||
return ttype;
|
||||
} else {
|
||||
return Token.INVALID_TYPE;
|
||||
}
|
||||
}
|
||||
|
||||
// What is the error header, normally line/character position information?//
|
||||
getErrorHeader(e) {
|
||||
const line = e.getOffendingToken().line;
|
||||
const column = e.getOffendingToken().column;
|
||||
return "line " + line + ":" + column;
|
||||
}
|
||||
|
||||
/**
|
||||
* How should a token be displayed in an error message? The default
|
||||
* is to display just the text, but during development you might
|
||||
* want to have a lot of information spit out. Override in that case
|
||||
* to use t.toString() (which, for CommonToken, dumps everything about
|
||||
* the token). This is better than forcing you to override a method in
|
||||
* your token objects because you don't have to go modify your lexer
|
||||
* so that it creates a new Java type.
|
||||
*
|
||||
* @deprecated This method is not called by the ANTLR 4 Runtime. Specific
|
||||
* implementations of {@link ANTLRErrorStrategy} may provide a similar
|
||||
* feature when necessary. For example, see
|
||||
* {@link DefaultErrorStrategy//getTokenErrorDisplay}.*/
|
||||
getTokenErrorDisplay(t) {
|
||||
if (t===null) {
|
||||
return "<no token>";
|
||||
}
|
||||
let s = t.text;
|
||||
if (s===null) {
|
||||
if (t.type===Token.EOF) {
|
||||
s = "<EOF>";
|
||||
} else {
|
||||
s = "<" + t.type + ">";
|
||||
}
|
||||
}
|
||||
s = s.replace("\n","\\n").replace("\r","\\r").replace("\t","\\t");
|
||||
return "'" + s + "'";
|
||||
}
|
||||
|
||||
getErrorListenerDispatch() {
|
||||
return new ProxyErrorListener(this._listeners);
|
||||
}
|
||||
|
||||
/**
|
||||
* subclass needs to override these if there are sempreds or actions
|
||||
* that the ATN interp needs to execute
|
||||
*/
|
||||
sempred(localctx, ruleIndex, actionIndex) {
|
||||
return true;
|
||||
}
|
||||
|
||||
precpred(localctx , precedence) {
|
||||
return true;
|
||||
}
|
||||
|
||||
get state(){
|
||||
return this._stateNumber;
|
||||
}
|
||||
|
||||
set state(state) {
|
||||
this._stateNumber = state;
|
||||
}
|
||||
}
|
||||
|
||||
Recognizer.tokenTypeMapCache = {};
|
||||
Recognizer.ruleIndexMapCache = {};
|
||||
|
||||
|
||||
Recognizer.prototype.checkVersion = function(toolVersion) {
|
||||
var runtimeVersion = "4.8";
|
||||
if (runtimeVersion!==toolVersion) {
|
||||
console.log("ANTLR runtime and generated code versions disagree: "+runtimeVersion+"!="+toolVersion);
|
||||
}
|
||||
};
|
||||
|
||||
Recognizer.prototype.addErrorListener = function(listener) {
|
||||
this._listeners.push(listener);
|
||||
};
|
||||
|
||||
Recognizer.prototype.removeErrorListeners = function() {
|
||||
this._listeners = [];
|
||||
};
|
||||
|
||||
Recognizer.prototype.getTokenTypeMap = function() {
|
||||
var tokenNames = this.getTokenNames();
|
||||
if (tokenNames===null) {
|
||||
throw("The current recognizer does not provide a list of token names.");
|
||||
}
|
||||
var result = this.tokenTypeMapCache[tokenNames];
|
||||
if(result===undefined) {
|
||||
result = tokenNames.reduce(function(o, k, i) { o[k] = i; });
|
||||
result.EOF = Token.EOF;
|
||||
this.tokenTypeMapCache[tokenNames] = result;
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
// Get a map from rule names to rule indexes.
|
||||
//
|
||||
// <p>Used for XPath and tree pattern compilation.</p>
|
||||
//
|
||||
Recognizer.prototype.getRuleIndexMap = function() {
|
||||
var ruleNames = this.ruleNames;
|
||||
if (ruleNames===null) {
|
||||
throw("The current recognizer does not provide a list of rule names.");
|
||||
}
|
||||
var result = this.ruleIndexMapCache[ruleNames];
|
||||
if(result===undefined) {
|
||||
result = ruleNames.reduce(function(o, k, i) { o[k] = i; });
|
||||
this.ruleIndexMapCache[ruleNames] = result;
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
Recognizer.prototype.getTokenType = function(tokenName) {
|
||||
var ttype = this.getTokenTypeMap()[tokenName];
|
||||
if (ttype !==undefined) {
|
||||
return ttype;
|
||||
} else {
|
||||
return Token.INVALID_TYPE;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// What is the error header, normally line/character position information?//
|
||||
Recognizer.prototype.getErrorHeader = function(e) {
|
||||
var line = e.getOffendingToken().line;
|
||||
var column = e.getOffendingToken().column;
|
||||
return "line " + line + ":" + column;
|
||||
};
|
||||
|
||||
|
||||
// How should a token be displayed in an error message? The default
|
||||
// is to display just the text, but during development you might
|
||||
// want to have a lot of information spit out. Override in that case
|
||||
// to use t.toString() (which, for CommonToken, dumps everything about
|
||||
// the token). This is better than forcing you to override a method in
|
||||
// your token objects because you don't have to go modify your lexer
|
||||
// so that it creates a new Java type.
|
||||
//
|
||||
// @deprecated This method is not called by the ANTLR 4 Runtime. Specific
|
||||
// implementations of {@link ANTLRErrorStrategy} may provide a similar
|
||||
// feature when necessary. For example, see
|
||||
// {@link DefaultErrorStrategy//getTokenErrorDisplay}.
|
||||
//
|
||||
Recognizer.prototype.getTokenErrorDisplay = function(t) {
|
||||
if (t===null) {
|
||||
return "<no token>";
|
||||
}
|
||||
var s = t.text;
|
||||
if (s===null) {
|
||||
if (t.type===Token.EOF) {
|
||||
s = "<EOF>";
|
||||
} else {
|
||||
s = "<" + t.type + ">";
|
||||
}
|
||||
}
|
||||
s = s.replace("\n","\\n").replace("\r","\\r").replace("\t","\\t");
|
||||
return "'" + s + "'";
|
||||
};
|
||||
|
||||
Recognizer.prototype.getErrorListenerDispatch = function() {
|
||||
return new ProxyErrorListener(this._listeners);
|
||||
};
|
||||
|
||||
// subclass needs to override these if there are sempreds or actions
|
||||
// that the ATN interp needs to execute
|
||||
Recognizer.prototype.sempred = function(localctx, ruleIndex, actionIndex) {
|
||||
return true;
|
||||
};
|
||||
|
||||
Recognizer.prototype.precpred = function(localctx , precedence) {
|
||||
return true;
|
||||
};
|
||||
|
||||
//Indicate that the recognizer has changed internal state that is
|
||||
//consistent with the ATN state passed in. This way we always know
|
||||
//where we are in the ATN as the parser goes along. The rule
|
||||
//context objects form a stack that lets us see the stack of
|
||||
//invoking rules. Combine this and we have complete ATN
|
||||
//configuration information.
|
||||
|
||||
Object.defineProperty(Recognizer.prototype, "state", {
|
||||
get : function() {
|
||||
return this._stateNumber;
|
||||
},
|
||||
set : function(state) {
|
||||
this._stateNumber = state;
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
exports.Recognizer = Recognizer;
|
||||
module.exports = Recognizer;
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
*/
|
||||
|
||||
const {Token} = require('./../Token');
|
||||
const {Lexer} = require('./../Lexer');
|
||||
const Lexer = require('./../Lexer');
|
||||
const ATN = require('./ATN');
|
||||
const ATNSimulator = require('./ATNSimulator');
|
||||
const {DFAState} = require('./../dfa/DFAState');
|
||||
|
|
|
@ -14,7 +14,7 @@ exports.CommonToken = require('./Token').CommonToken;
|
|||
exports.InputStream = require('./InputStream');
|
||||
exports.FileStream = require('./FileStream');
|
||||
exports.CommonTokenStream = require('./CommonTokenStream');
|
||||
exports.Lexer = require('./Lexer').Lexer;
|
||||
exports.Lexer = require('./Lexer');
|
||||
exports.Parser = require('./Parser').Parser;
|
||||
var pc = require('./PredictionContext');
|
||||
exports.PredictionContextCache = pc.PredictionContextCache;
|
||||
|
|
Loading…
Reference in New Issue