Merge pull request #1661 from bhamiltoncx/lexer-max-unicode

Change MAX_CHAR_VALUE to 0x10FFFF
This commit is contained in:
Terence Parr 2017-02-16 10:16:18 -08:00 committed by GitHub
commit 1df5cd6206
17 changed files with 35 additions and 30 deletions

View File

@ -380,7 +380,7 @@ namespace Antlr4.Runtime.Atn
protected ATNState GetReachableTarget(Transition trans, int t)
{
if (trans.Matches(t, char.MinValue, char.MaxValue))
if (trans.Matches(t, Lexer.MinCharValue, Lexer.MaxCharValue))
{
return trans.target;
}
@ -572,7 +572,7 @@ namespace Antlr4.Runtime.Atn
case TransitionType.SET:
if (treatEofAsEpsilon)
{
if (t.Matches(IntStreamConstants.EOF, char.MinValue, char.MaxValue))
if (t.Matches(IntStreamConstants.EOF, Lexer.MinCharValue, Lexer.MaxCharValue))
{
c = new LexerATNConfig(config, t.target);
break;

View File

@ -28,9 +28,9 @@ namespace Antlr4.Runtime
public const int Hidden = TokenConstants.HiddenChannel;
public const int MinCharValue = '\u0000';
public const int MinCharValue = 0x0000;
public const int MaxCharValue = '\uFFFE';
public const int MaxCharValue = 0x10FFFF;
private ICharStream _input;

View File

@ -18,6 +18,7 @@
#include "atn/ActionTransition.h"
#include "atn/ATN.h"
#include "atn/RuleStopState.h"
#include "Lexer.h"
#include "Token.h"
#include "Vocabulary.h"
#include "InputMismatchException.h"
@ -184,7 +185,7 @@ void ParserInterpreter::visitState(atn::ATNState *p) {
case atn::Transition::RANGE:
case atn::Transition::SET:
case atn::Transition::NOT_SET:
if (!transition->matches((int)_input->LA(1), Token::MIN_USER_TOKEN_TYPE, 65535)) {
if (!transition->matches((int)_input->LA(1), Token::MIN_USER_TOKEN_TYPE, Lexer::MAX_CHAR_VALUE)) {
recoverInline();
}
matchWildcard();

View File

@ -114,8 +114,8 @@ const (
const (
LexerDefaultTokenChannel = TokenDefaultChannel
LexerHidden = TokenHiddenChannel
LexerMinCharValue = '\u0000'
LexerMaxCharValue = '\uFFFE'
LexerMinCharValue = 0x0000
LexerMaxCharValue = 0x10FFFF
)
func (b *BaseLexer) reset() {

View File

@ -311,7 +311,7 @@ func (l *LexerATNSimulator) accept(input CharStream, lexerActionExecutor *LexerA
}
func (l *LexerATNSimulator) getReachableTarget(trans Transition, t int) ATNState {
if trans.Matches(t, 0, 0xFFFE) {
if trans.Matches(t, 0, LexerMaxCharValue) {
return trans.getTarget()
}
@ -461,7 +461,7 @@ func (l *LexerATNSimulator) getEpsilonTarget(input CharStream, config *LexerATNC
trans.getSerializationType() == TransitionRANGE ||
trans.getSerializationType() == TransitionSET {
if treatEOFAsEpsilon {
if trans.Matches(TokenEOF, 0, 0xFFFF) {
if trans.Matches(TokenEOF, 0, LexerMaxCharValue) {
cfg = NewLexerATNConfig4(config, trans.getTarget())
}
}

View File

@ -28,8 +28,8 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
public static final int DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL;
public static final int HIDDEN = Token.HIDDEN_CHANNEL;
public static final int MIN_CHAR_VALUE = '\u0000';
public static final int MAX_CHAR_VALUE = '\uFFFE';
public static final int MIN_CHAR_VALUE = 0x0000;
public static final int MAX_CHAR_VALUE = 0x10FFFF;
public CharStream _input;
protected Pair<TokenSource, CharStream> _tokenFactorySourcePair;

View File

@ -372,7 +372,7 @@ public class LexerATNSimulator extends ATNSimulator {
protected ATNState getReachableTarget(Transition trans, int t) {
if (trans.matches(t, Character.MIN_VALUE, Character.MAX_VALUE)) {
if (trans.matches(t, Lexer.MIN_CHAR_VALUE, Lexer.MAX_CHAR_VALUE)) {
return trans.target;
}
@ -544,7 +544,7 @@ public class LexerATNSimulator extends ATNSimulator {
case Transition.RANGE:
case Transition.SET:
if (treatEofAsEpsilon) {
if (t.matches(CharStream.EOF, Character.MIN_VALUE, Character.MAX_VALUE)) {
if (t.matches(CharStream.EOF, Lexer.MIN_CHAR_VALUE, Lexer.MAX_CHAR_VALUE)) {
c = new LexerATNConfig(config, t.target);
break;
}

View File

@ -76,8 +76,8 @@ Lexer.SKIP = -3;
Lexer.DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL;
Lexer.HIDDEN = Token.HIDDEN_CHANNEL;
Lexer.MIN_CHAR_VALUE = '\u0000';
Lexer.MAX_CHAR_VALUE = '\uFFFE';
Lexer.MIN_CHAR_VALUE = 0x0000;
Lexer.MAX_CHAR_VALUE = 0x10FFFF;
Lexer.prototype.reset = function() {
// wack Lexer state variables

View File

@ -326,7 +326,7 @@ LexerATNSimulator.prototype.accept = function(input, lexerActionExecutor,
};
LexerATNSimulator.prototype.getReachableTarget = function(trans, t) {
if (trans.matches(t, 0, 0xFFFE)) {
if (trans.matches(t, 0, Lexer.MAX_CHAR_VALUE)) {
return trans.target;
} else {
return null;
@ -468,7 +468,7 @@ LexerATNSimulator.prototype.getEpsilonTarget = function(input, config, trans,
trans.serializationType === Transition.RANGE ||
trans.serializationType === Transition.SET) {
if (treatEofAsEpsilon) {
if (trans.matches(Token.EOF, 0, 0xFFFF)) {
if (trans.matches(Token.EOF, 0, Lexer.MAX_CHAR_VALUE)) {
cfg = new LexerATNConfig( { state:trans.target }, config);
}
}

View File

@ -29,8 +29,8 @@ class Lexer(Recognizer, TokenSource):
DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL
HIDDEN = Token.HIDDEN_CHANNEL
MIN_CHAR_VALUE = '\u0000'
MAX_CHAR_VALUE = '\uFFFE'
MIN_CHAR_VALUE = 0x0000
MAX_CHAR_VALUE = 0x10FFFF
def __init__(self, input, output=sys.stdout):
super(Lexer, self).__init__()

View File

@ -19,6 +19,7 @@
#
from antlr4 import PredictionContextCache
from antlr4.dfa.DFA import DFA
from antlr4.Lexer import Lexer
from antlr4.Parser import Parser
from antlr4.ParserRuleContext import InterpreterRuleContext
from antlr4.Token import Token
@ -111,7 +112,7 @@ class ParserInterpreter(Parser):
elif tt in [ Transition.RANGE, Transition.SET, Transition.NOT_SET]:
if not transition.matches(self._input.LA(1), Token.MIN_USER_TOKEN_TYPE, 0xFFFF):
if not transition.matches(self._input.LA(1), Token.MIN_USER_TOKEN_TYPE, Lexer.MAX_CHAR_VALUE):
self._errHandler.recoverInline(self)
self.matchWildcard()

View File

@ -285,7 +285,7 @@ class LexerATNSimulator(ATNSimulator):
lexerActionExecutor.execute(self.recog, input, startIndex)
def getReachableTarget(self, trans, t):
if trans.matches(t, 0, 0xFFFE):
if trans.matches(t, 0, Lexer.MAX_CHAR_VALUE):
return trans.target
else:
return None
@ -412,7 +412,7 @@ class LexerATNSimulator(ATNSimulator):
elif t.serializationType in [ Transition.ATOM, Transition.RANGE, Transition.SET ]:
if treatEofAsEpsilon:
if t.matches(Token.EOF, 0, 0xFFFF):
if t.matches(Token.EOF, 0, Lexer.MAX_CHAR_VALUE):
c = LexerATNConfig(state=t.target, config=config)
return c

View File

@ -31,8 +31,8 @@ class Lexer(Recognizer, TokenSource):
DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL
HIDDEN = Token.HIDDEN_CHANNEL
MIN_CHAR_VALUE = '\u0000'
MAX_CHAR_VALUE = '\uFFFE'
MIN_CHAR_VALUE = 0x0000
MAX_CHAR_VALUE = 0x10FFFF
def __init__(self, input:InputStream, output:TextIO = sys.stdout):
super().__init__()

View File

@ -19,6 +19,7 @@
#
from antlr4.dfa.DFA import DFA
from antlr4.BufferedTokenStream import TokenStream
from antlr4.Lexer import Lexer
from antlr4.Parser import Parser
from antlr4.ParserRuleContext import InterpreterRuleContext, ParserRuleContext
from antlr4.Token import Token
@ -113,7 +114,7 @@ class ParserInterpreter(Parser):
elif tt in [ Transition.RANGE, Transition.SET, Transition.NOT_SET]:
if not transition.matches(self._input.LA(1), Token.MIN_USER_TOKEN_TYPE, 0xFFFF):
if not transition.matches(self._input.LA(1), Token.MIN_USER_TOKEN_TYPE, Lexer.MAX_CHAR_VALUE):
self._errHandler.recoverInline(self)
self.matchWildcard()

View File

@ -291,7 +291,8 @@ class LexerATNSimulator(ATNSimulator):
lexerActionExecutor.execute(self.recog, input, startIndex)
def getReachableTarget(self, trans:Transition, t:int):
if trans.matches(t, 0, 0xFFFE):
from antlr4.Lexer import Lexer
if trans.matches(t, 0, Lexer.MAX_CHAR_VALUE):
return trans.target
else:
return None
@ -419,7 +420,8 @@ class LexerATNSimulator(ATNSimulator):
elif t.serializationType in [ Transition.ATOM, Transition.RANGE, Transition.SET ]:
if treatEofAsEpsilon:
if t.matches(Token.EOF, 0, 0xFFFF):
from antlr4.Lexer import Lexer
if t.matches(Token.EOF, 0, Lexer.MAX_CHAR_VALUE):
c = LexerATNConfig(state=t.target, config=config)
return c

View File

@ -21,8 +21,8 @@ open class Lexer: Recognizer<LexerATNSimulator>
public static let DEFAULT_TOKEN_CHANNEL: Int = CommonToken.DEFAULT_CHANNEL
public static let HIDDEN: Int = CommonToken.HIDDEN_CHANNEL
public static let MIN_CHAR_VALUE: Int = Character("\u{0000}").unicodeValue
public static let MAX_CHAR_VALUE: Int = Character("\u{FFFE}").unicodeValue
public static let MIN_CHAR_VALUE: Int = Character.MIN_VALUE;
public static let MAX_CHAR_VALUE: Int = Character.MAX_VALUE;
public var _input: CharStream?
internal var _tokenFactorySourcePair: (TokenSource?, CharStream?)

View File

@ -40,7 +40,7 @@ extension Character {
}
public static var MAX_VALUE: Int {
let c: Character = "\u{FFFF}"
let c: Character = "\u{10FFFF}"
return c.unicodeValue
}
public static var MIN_VALUE: Int {