Merge pull request #1748 from parrt/rm-xpath-grammar

Fixes #1620. Make handbuilt lexer to avoid cyclic dependence of tool and plugin.
This commit is contained in:
Terence Parr 2017-03-10 15:18:05 -08:00 committed by GitHub
commit 9bf3946515
2 changed files with 179 additions and 63 deletions

View File

@ -1,63 +0,0 @@
lexer grammar XPathLexer;
tokens { TOKEN_REF, RULE_REF }
/*
path : separator? word (separator word)* EOF ;
separator
: '/' '!'
| '//' '!'
| '/'
| '//'
;
word: TOKEN_REF
| RULE_REF
| STRING
| '*'
;
*/
ANYWHERE : '//' ;
ROOT : '/' ;
WILDCARD : '*' ;
BANG : '!' ;
ID : NameStartChar NameChar*
{
String text = getText();
if ( Character.isUpperCase(text.charAt(0)) ) setType(TOKEN_REF);
else setType(RULE_REF);
}
;
fragment
NameChar : NameStartChar
| '0'..'9'
| '_'
| '\u00B7'
| '\u0300'..'\u036F'
| '\u203F'..'\u2040'
;
fragment
NameStartChar
: 'A'..'Z' | 'a'..'z'
| '\u00C0'..'\u00D6'
| '\u00D8'..'\u00F6'
| '\u00F8'..'\u02FF'
| '\u0370'..'\u037D'
| '\u037F'..'\u1FFF'
| '\u200C'..'\u200D'
| '\u2070'..'\u218F'
| '\u2C00'..'\u2FEF'
| '\u3001'..'\uD7FF'
| '\uF900'..'\uFDCF'
| '\uFDF0'..'\uFFFD'
; // ignores | ['\u10000-'\uEFFFF] ;
STRING : '\'' .*? '\'' ;
//WS : [ \t\r\n]+ -> skip ;

View File

@ -0,0 +1,179 @@
/*
* Copyright (c) 2012-2016 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
package org.antlr.v4.runtime.tree.xpath;
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.CommonToken;
import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.LexerNoViableAltException;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.Vocabulary;
import org.antlr.v4.runtime.VocabularyImpl;
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.misc.Interval;
/** Mimic the old XPathLexer from .g4 file */
public class XPathLexer extends Lexer {
public static final int
TOKEN_REF=1, RULE_REF=2, ANYWHERE=3, ROOT=4, WILDCARD=5, BANG=6, ID=7,
STRING=8;
public static String[] modeNames = {
"DEFAULT_MODE"
};
public static final String[] ruleNames = {
"ANYWHERE", "ROOT", "WILDCARD", "BANG", "ID", "NameChar", "NameStartChar",
"STRING"
};
private static final String[] _LITERAL_NAMES = {
null, null, null, "'//'", "'/'", "'*'", "'!'"
};
private static final String[] _SYMBOLIC_NAMES = {
null, "TOKEN_REF", "RULE_REF", "ANYWHERE", "ROOT", "WILDCARD", "BANG",
"ID", "STRING"
};
public static final Vocabulary VOCABULARY = new VocabularyImpl(_LITERAL_NAMES, _SYMBOLIC_NAMES);
/**
* @deprecated Use {@link #VOCABULARY} instead.
*/
@Deprecated
public static final String[] tokenNames;
static {
tokenNames = new String[_SYMBOLIC_NAMES.length];
for (int i = 0; i < tokenNames.length; i++) {
tokenNames[i] = VOCABULARY.getLiteralName(i);
if (tokenNames[i] == null) {
tokenNames[i] = VOCABULARY.getSymbolicName(i);
}
if (tokenNames[i] == null) {
tokenNames[i] = "<INVALID>";
}
}
}
@Override
public String getGrammarFileName() { return "XPathLexer.g4"; }
@Override
public String[] getRuleNames() { return ruleNames; }
@Override
public String[] getModeNames() { return modeNames; }
@Override
@Deprecated
public String[] getTokenNames() {
return tokenNames;
}
@Override
public Vocabulary getVocabulary() {
return VOCABULARY;
}
@Override
public ATN getATN() {
return null;
}
protected int line = 1;
protected int charPositionInLine = 0;
public XPathLexer(CharStream input) {
super(input);
}
@Override
public Token nextToken() {
_tokenStartCharIndex = _input.index();
CommonToken t = null;
while ( t==null ) {
switch ( _input.LA(1) ) {
case '/':
consume();
if ( _input.LA(1)=='/' ) {
consume();
t = new CommonToken(ANYWHERE, "//");
}
else {
t = new CommonToken(ROOT, "/");
}
break;
case '*':
consume();
t = new CommonToken(WILDCARD, "*");
break;
case '!':
consume();
t = new CommonToken(BANG, "!");
break;
case '\'':
String s = matchString();
t = new CommonToken(STRING, s);
break;
case CharStream.EOF :
return new CommonToken(EOF, "<EOF>");
default:
if ( isNameStartChar(_input.LA(1)) ) {
String id = matchID();
if ( Character.isUpperCase(id.charAt(0)) ) t = new CommonToken(TOKEN_REF, id);
else t = new CommonToken(RULE_REF, id);
}
else {
throw new LexerNoViableAltException(this, _input, _tokenStartCharIndex, null);
}
break;
}
}
t.setStartIndex(_tokenStartCharIndex);
t.setCharPositionInLine(_tokenStartCharIndex);
t.setLine(line);
return t;
}
public void consume() {
int curChar = _input.LA(1);
if ( curChar=='\n' ) {
line++;
charPositionInLine=0;
}
else {
charPositionInLine++;
}
_input.consume();
}
@Override
public int getCharPositionInLine() {
return charPositionInLine;
}
public String matchID() {
int start = _input.index();
consume(); // drop start char
while ( isNameChar(_input.LA(1)) ) {
consume();
}
return _input.getText(Interval.of(start,_input.index()-1));
}
public String matchString() {
int start = _input.index();
consume(); // drop first quote
while ( _input.LA(1)!='\'' ) {
consume();
}
consume(); // drop last quote
return _input.getText(Interval.of(start,_input.index()-1));
}
public boolean isNameChar(int c) { return Character.isUnicodeIdentifierPart(c); }
public boolean isNameStartChar(int c) { return Character.isUnicodeIdentifierStart(c); }
}