forked from jasder/antlr
Merge pull request #1661 from bhamiltoncx/lexer-max-unicode
Change MAX_CHAR_VALUE to 0x10FFFF
This commit is contained in:
commit
1df5cd6206
|
@ -380,7 +380,7 @@ namespace Antlr4.Runtime.Atn
|
|||
|
||||
protected ATNState GetReachableTarget(Transition trans, int t)
|
||||
{
|
||||
if (trans.Matches(t, char.MinValue, char.MaxValue))
|
||||
if (trans.Matches(t, Lexer.MinCharValue, Lexer.MaxCharValue))
|
||||
{
|
||||
return trans.target;
|
||||
}
|
||||
|
@ -572,7 +572,7 @@ namespace Antlr4.Runtime.Atn
|
|||
case TransitionType.SET:
|
||||
if (treatEofAsEpsilon)
|
||||
{
|
||||
if (t.Matches(IntStreamConstants.EOF, char.MinValue, char.MaxValue))
|
||||
if (t.Matches(IntStreamConstants.EOF, Lexer.MinCharValue, Lexer.MaxCharValue))
|
||||
{
|
||||
c = new LexerATNConfig(config, t.target);
|
||||
break;
|
||||
|
|
|
@ -28,9 +28,9 @@ namespace Antlr4.Runtime
|
|||
|
||||
public const int Hidden = TokenConstants.HiddenChannel;
|
||||
|
||||
public const int MinCharValue = '\u0000';
|
||||
public const int MinCharValue = 0x0000;
|
||||
|
||||
public const int MaxCharValue = '\uFFFE';
|
||||
public const int MaxCharValue = 0x10FFFF;
|
||||
|
||||
private ICharStream _input;
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "atn/ActionTransition.h"
|
||||
#include "atn/ATN.h"
|
||||
#include "atn/RuleStopState.h"
|
||||
#include "Lexer.h"
|
||||
#include "Token.h"
|
||||
#include "Vocabulary.h"
|
||||
#include "InputMismatchException.h"
|
||||
|
@ -184,7 +185,7 @@ void ParserInterpreter::visitState(atn::ATNState *p) {
|
|||
case atn::Transition::RANGE:
|
||||
case atn::Transition::SET:
|
||||
case atn::Transition::NOT_SET:
|
||||
if (!transition->matches((int)_input->LA(1), Token::MIN_USER_TOKEN_TYPE, 65535)) {
|
||||
if (!transition->matches((int)_input->LA(1), Token::MIN_USER_TOKEN_TYPE, Lexer::MAX_CHAR_VALUE)) {
|
||||
recoverInline();
|
||||
}
|
||||
matchWildcard();
|
||||
|
|
|
@ -114,8 +114,8 @@ const (
|
|||
const (
|
||||
LexerDefaultTokenChannel = TokenDefaultChannel
|
||||
LexerHidden = TokenHiddenChannel
|
||||
LexerMinCharValue = '\u0000'
|
||||
LexerMaxCharValue = '\uFFFE'
|
||||
LexerMinCharValue = 0x0000
|
||||
LexerMaxCharValue = 0x10FFFF
|
||||
)
|
||||
|
||||
func (b *BaseLexer) reset() {
|
||||
|
|
|
@ -311,7 +311,7 @@ func (l *LexerATNSimulator) accept(input CharStream, lexerActionExecutor *LexerA
|
|||
}
|
||||
|
||||
func (l *LexerATNSimulator) getReachableTarget(trans Transition, t int) ATNState {
|
||||
if trans.Matches(t, 0, 0xFFFE) {
|
||||
if trans.Matches(t, 0, LexerMaxCharValue) {
|
||||
return trans.getTarget()
|
||||
}
|
||||
|
||||
|
@ -461,7 +461,7 @@ func (l *LexerATNSimulator) getEpsilonTarget(input CharStream, config *LexerATNC
|
|||
trans.getSerializationType() == TransitionRANGE ||
|
||||
trans.getSerializationType() == TransitionSET {
|
||||
if treatEOFAsEpsilon {
|
||||
if trans.Matches(TokenEOF, 0, 0xFFFF) {
|
||||
if trans.Matches(TokenEOF, 0, LexerMaxCharValue) {
|
||||
cfg = NewLexerATNConfig4(config, trans.getTarget())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,8 +28,8 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
|
|||
|
||||
public static final int DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL;
|
||||
public static final int HIDDEN = Token.HIDDEN_CHANNEL;
|
||||
public static final int MIN_CHAR_VALUE = '\u0000';
|
||||
public static final int MAX_CHAR_VALUE = '\uFFFE';
|
||||
public static final int MIN_CHAR_VALUE = 0x0000;
|
||||
public static final int MAX_CHAR_VALUE = 0x10FFFF;
|
||||
|
||||
public CharStream _input;
|
||||
protected Pair<TokenSource, CharStream> _tokenFactorySourcePair;
|
||||
|
|
|
@ -372,7 +372,7 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
|
||||
|
||||
protected ATNState getReachableTarget(Transition trans, int t) {
|
||||
if (trans.matches(t, Character.MIN_VALUE, Character.MAX_VALUE)) {
|
||||
if (trans.matches(t, Lexer.MIN_CHAR_VALUE, Lexer.MAX_CHAR_VALUE)) {
|
||||
return trans.target;
|
||||
}
|
||||
|
||||
|
@ -544,7 +544,7 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
case Transition.RANGE:
|
||||
case Transition.SET:
|
||||
if (treatEofAsEpsilon) {
|
||||
if (t.matches(CharStream.EOF, Character.MIN_VALUE, Character.MAX_VALUE)) {
|
||||
if (t.matches(CharStream.EOF, Lexer.MIN_CHAR_VALUE, Lexer.MAX_CHAR_VALUE)) {
|
||||
c = new LexerATNConfig(config, t.target);
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -76,8 +76,8 @@ Lexer.SKIP = -3;
|
|||
|
||||
Lexer.DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL;
|
||||
Lexer.HIDDEN = Token.HIDDEN_CHANNEL;
|
||||
Lexer.MIN_CHAR_VALUE = '\u0000';
|
||||
Lexer.MAX_CHAR_VALUE = '\uFFFE';
|
||||
Lexer.MIN_CHAR_VALUE = 0x0000;
|
||||
Lexer.MAX_CHAR_VALUE = 0x10FFFF;
|
||||
|
||||
Lexer.prototype.reset = function() {
|
||||
// wack Lexer state variables
|
||||
|
|
|
@ -326,7 +326,7 @@ LexerATNSimulator.prototype.accept = function(input, lexerActionExecutor,
|
|||
};
|
||||
|
||||
LexerATNSimulator.prototype.getReachableTarget = function(trans, t) {
|
||||
if (trans.matches(t, 0, 0xFFFE)) {
|
||||
if (trans.matches(t, 0, Lexer.MAX_CHAR_VALUE)) {
|
||||
return trans.target;
|
||||
} else {
|
||||
return null;
|
||||
|
@ -468,7 +468,7 @@ LexerATNSimulator.prototype.getEpsilonTarget = function(input, config, trans,
|
|||
trans.serializationType === Transition.RANGE ||
|
||||
trans.serializationType === Transition.SET) {
|
||||
if (treatEofAsEpsilon) {
|
||||
if (trans.matches(Token.EOF, 0, 0xFFFF)) {
|
||||
if (trans.matches(Token.EOF, 0, Lexer.MAX_CHAR_VALUE)) {
|
||||
cfg = new LexerATNConfig( { state:trans.target }, config);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,8 +29,8 @@ class Lexer(Recognizer, TokenSource):
|
|||
|
||||
DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL
|
||||
HIDDEN = Token.HIDDEN_CHANNEL
|
||||
MIN_CHAR_VALUE = '\u0000'
|
||||
MAX_CHAR_VALUE = '\uFFFE'
|
||||
MIN_CHAR_VALUE = 0x0000
|
||||
MAX_CHAR_VALUE = 0x10FFFF
|
||||
|
||||
def __init__(self, input, output=sys.stdout):
|
||||
super(Lexer, self).__init__()
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#
|
||||
from antlr4 import PredictionContextCache
|
||||
from antlr4.dfa.DFA import DFA
|
||||
from antlr4.Lexer import Lexer
|
||||
from antlr4.Parser import Parser
|
||||
from antlr4.ParserRuleContext import InterpreterRuleContext
|
||||
from antlr4.Token import Token
|
||||
|
@ -111,7 +112,7 @@ class ParserInterpreter(Parser):
|
|||
|
||||
elif tt in [ Transition.RANGE, Transition.SET, Transition.NOT_SET]:
|
||||
|
||||
if not transition.matches(self._input.LA(1), Token.MIN_USER_TOKEN_TYPE, 0xFFFF):
|
||||
if not transition.matches(self._input.LA(1), Token.MIN_USER_TOKEN_TYPE, Lexer.MAX_CHAR_VALUE):
|
||||
self._errHandler.recoverInline(self)
|
||||
self.matchWildcard()
|
||||
|
||||
|
|
|
@ -285,7 +285,7 @@ class LexerATNSimulator(ATNSimulator):
|
|||
lexerActionExecutor.execute(self.recog, input, startIndex)
|
||||
|
||||
def getReachableTarget(self, trans, t):
|
||||
if trans.matches(t, 0, 0xFFFE):
|
||||
if trans.matches(t, 0, Lexer.MAX_CHAR_VALUE):
|
||||
return trans.target
|
||||
else:
|
||||
return None
|
||||
|
@ -412,7 +412,7 @@ class LexerATNSimulator(ATNSimulator):
|
|||
|
||||
elif t.serializationType in [ Transition.ATOM, Transition.RANGE, Transition.SET ]:
|
||||
if treatEofAsEpsilon:
|
||||
if t.matches(Token.EOF, 0, 0xFFFF):
|
||||
if t.matches(Token.EOF, 0, Lexer.MAX_CHAR_VALUE):
|
||||
c = LexerATNConfig(state=t.target, config=config)
|
||||
|
||||
return c
|
||||
|
|
|
@ -31,8 +31,8 @@ class Lexer(Recognizer, TokenSource):
|
|||
|
||||
DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL
|
||||
HIDDEN = Token.HIDDEN_CHANNEL
|
||||
MIN_CHAR_VALUE = '\u0000'
|
||||
MAX_CHAR_VALUE = '\uFFFE'
|
||||
MIN_CHAR_VALUE = 0x0000
|
||||
MAX_CHAR_VALUE = 0x10FFFF
|
||||
|
||||
def __init__(self, input:InputStream, output:TextIO = sys.stdout):
|
||||
super().__init__()
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#
|
||||
from antlr4.dfa.DFA import DFA
|
||||
from antlr4.BufferedTokenStream import TokenStream
|
||||
from antlr4.Lexer import Lexer
|
||||
from antlr4.Parser import Parser
|
||||
from antlr4.ParserRuleContext import InterpreterRuleContext, ParserRuleContext
|
||||
from antlr4.Token import Token
|
||||
|
@ -113,7 +114,7 @@ class ParserInterpreter(Parser):
|
|||
|
||||
elif tt in [ Transition.RANGE, Transition.SET, Transition.NOT_SET]:
|
||||
|
||||
if not transition.matches(self._input.LA(1), Token.MIN_USER_TOKEN_TYPE, 0xFFFF):
|
||||
if not transition.matches(self._input.LA(1), Token.MIN_USER_TOKEN_TYPE, Lexer.MAX_CHAR_VALUE):
|
||||
self._errHandler.recoverInline(self)
|
||||
self.matchWildcard()
|
||||
|
||||
|
|
|
@ -291,7 +291,8 @@ class LexerATNSimulator(ATNSimulator):
|
|||
lexerActionExecutor.execute(self.recog, input, startIndex)
|
||||
|
||||
def getReachableTarget(self, trans:Transition, t:int):
|
||||
if trans.matches(t, 0, 0xFFFE):
|
||||
from antlr4.Lexer import Lexer
|
||||
if trans.matches(t, 0, Lexer.MAX_CHAR_VALUE):
|
||||
return trans.target
|
||||
else:
|
||||
return None
|
||||
|
@ -419,7 +420,8 @@ class LexerATNSimulator(ATNSimulator):
|
|||
|
||||
elif t.serializationType in [ Transition.ATOM, Transition.RANGE, Transition.SET ]:
|
||||
if treatEofAsEpsilon:
|
||||
if t.matches(Token.EOF, 0, 0xFFFF):
|
||||
from antlr4.Lexer import Lexer
|
||||
if t.matches(Token.EOF, 0, Lexer.MAX_CHAR_VALUE):
|
||||
c = LexerATNConfig(state=t.target, config=config)
|
||||
|
||||
return c
|
||||
|
|
|
@ -21,8 +21,8 @@ open class Lexer: Recognizer<LexerATNSimulator>
|
|||
|
||||
public static let DEFAULT_TOKEN_CHANNEL: Int = CommonToken.DEFAULT_CHANNEL
|
||||
public static let HIDDEN: Int = CommonToken.HIDDEN_CHANNEL
|
||||
public static let MIN_CHAR_VALUE: Int = Character("\u{0000}").unicodeValue
|
||||
public static let MAX_CHAR_VALUE: Int = Character("\u{FFFE}").unicodeValue
|
||||
public static let MIN_CHAR_VALUE: Int = Character.MIN_VALUE;
|
||||
public static let MAX_CHAR_VALUE: Int = Character.MAX_VALUE;
|
||||
|
||||
public var _input: CharStream?
|
||||
internal var _tokenFactorySourcePair: (TokenSource?, CharStream?)
|
||||
|
|
|
@ -40,7 +40,7 @@ extension Character {
|
|||
}
|
||||
|
||||
public static var MAX_VALUE: Int {
|
||||
let c: Character = "\u{FFFF}"
|
||||
let c: Character = "\u{10FFFF}"
|
||||
return c.unicodeValue
|
||||
}
|
||||
public static var MIN_VALUE: Int {
|
||||
|
|
Loading…
Reference in New Issue