forked from jasder/antlr
Implemented token vocab option
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6873]
This commit is contained in:
parent
118d225066
commit
e1ccf08680
|
@ -29,6 +29,7 @@ package org.antlr.v4.runtime;
|
|||
|
||||
import org.antlr.runtime.IntStream;
|
||||
import org.antlr.runtime.Token;
|
||||
import org.antlr.runtime.TokenStream;
|
||||
import org.antlr.v4.runtime.misc.LABitSet;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
@ -58,11 +59,13 @@ public abstract class BaseRecognizer {
|
|||
*/
|
||||
protected RecognizerSharedState state;
|
||||
|
||||
public BaseRecognizer() {
|
||||
public BaseRecognizer(IntStream input) {
|
||||
this.input = input;
|
||||
state = new RecognizerSharedState();
|
||||
}
|
||||
|
||||
public BaseRecognizer(RecognizerSharedState state) {
|
||||
public BaseRecognizer(IntStream input, RecognizerSharedState state) {
|
||||
this.input = input;
|
||||
if ( state==null ) {
|
||||
state = new RecognizerSharedState();
|
||||
}
|
||||
|
@ -102,8 +105,8 @@ public abstract class BaseRecognizer {
|
|||
public Object match(int ttype, LABitSet follow)
|
||||
throws RecognitionException
|
||||
{
|
||||
//System.out.println("match "+((TokenStream)input).LT(1));
|
||||
Object matchedSymbol = getCurrentInputSymbol(input);
|
||||
System.out.println("match "+((TokenStream)input).LT(1)+" vs expected "+ttype);
|
||||
Object matchedSymbol = getCurrentInputSymbol();
|
||||
if ( input.LA(1)==ttype ) {
|
||||
input.consume();
|
||||
state.errorRecovery = false;
|
||||
|
@ -601,7 +604,7 @@ public abstract class BaseRecognizer {
|
|||
endResync();
|
||||
reportError(e); // report after consuming so AW sees the token in the exception
|
||||
// we want to return the token we're actually matching
|
||||
Object matchedSymbol = getCurrentInputSymbol(input);
|
||||
Object matchedSymbol = getCurrentInputSymbol();
|
||||
input.consume(); // move past ttype token as if all were ok
|
||||
return matchedSymbol;
|
||||
}
|
||||
|
@ -638,10 +641,8 @@ public abstract class BaseRecognizer {
|
|||
* for input stream type or change the IntStream interface, I use
|
||||
* a simple method to ask the recognizer to tell me what the current
|
||||
* input symbol is.
|
||||
*
|
||||
* This is ignored for lexers.
|
||||
*/
|
||||
protected Object getCurrentInputSymbol(IntStream input) { return null; }
|
||||
protected Object getCurrentInputSymbol() { return null; }
|
||||
|
||||
/** Conjure up a missing token during error recovery.
|
||||
*
|
||||
|
|
|
@ -40,23 +40,16 @@ import org.antlr.v4.runtime.pda.PDA;
|
|||
public abstract class Lexer extends BaseRecognizer implements TokenSource {
|
||||
public static final int DEFAULT_MODE = 0;
|
||||
|
||||
/** Where is the lexer drawing characters from? */
|
||||
public CharStream input;
|
||||
|
||||
public int _mode = DEFAULT_MODE;
|
||||
|
||||
public static PDA[] modeToPDA;
|
||||
|
||||
public Lexer() {
|
||||
}
|
||||
|
||||
public Lexer(CharStream input) {
|
||||
this.input = input;
|
||||
super(input);
|
||||
}
|
||||
|
||||
public Lexer(CharStream input, RecognizerSharedState state) {
|
||||
super(state);
|
||||
this.input = input;
|
||||
super(input, state);
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
|
@ -85,8 +78,8 @@ public abstract class Lexer extends BaseRecognizer implements TokenSource {
|
|||
state.token = null;
|
||||
state.channel = Token.DEFAULT_CHANNEL;
|
||||
state.tokenStartCharIndex = input.index();
|
||||
state.tokenStartCharPositionInLine = input.getCharPositionInLine();
|
||||
state.tokenStartLine = input.getLine();
|
||||
state.tokenStartCharPositionInLine = ((CharStream)input).getCharPositionInLine();
|
||||
state.tokenStartLine = ((CharStream)input).getLine();
|
||||
state.text = null;
|
||||
if ( input.LA(1)==CharStream.EOF ) {
|
||||
Token eof = new CommonToken((CharStream)input,Token.EOF,
|
||||
|
@ -135,7 +128,7 @@ public abstract class Lexer extends BaseRecognizer implements TokenSource {
|
|||
}
|
||||
|
||||
public CharStream getCharStream() {
|
||||
return this.input;
|
||||
return ((CharStream)input);
|
||||
}
|
||||
|
||||
public String getSourceName() {
|
||||
|
@ -161,7 +154,9 @@ public abstract class Lexer extends BaseRecognizer implements TokenSource {
|
|||
* Parser or TreeParser.getMissingSymbol().
|
||||
*/
|
||||
public Token emit() {
|
||||
Token t = new CommonToken(input, state.type, state.channel, state.tokenStartCharIndex, getCharIndex()-1);
|
||||
Token t = new CommonToken(((CharStream)input), state.type,
|
||||
state.channel, state.tokenStartCharIndex,
|
||||
getCharIndex()-1);
|
||||
t.setLine(state.tokenStartLine);
|
||||
t.setText(state.text);
|
||||
t.setCharPositionInLine(state.tokenStartCharPositionInLine);
|
||||
|
@ -170,11 +165,11 @@ public abstract class Lexer extends BaseRecognizer implements TokenSource {
|
|||
}
|
||||
|
||||
public int getLine() {
|
||||
return input.getLine();
|
||||
return ((CharStream)input).getLine();
|
||||
}
|
||||
|
||||
public int getCharPositionInLine() {
|
||||
return input.getCharPositionInLine();
|
||||
return ((CharStream)input).getCharPositionInLine();
|
||||
}
|
||||
|
||||
/** What is the index of the current character of lookahead? */
|
||||
|
@ -189,7 +184,7 @@ public abstract class Lexer extends BaseRecognizer implements TokenSource {
|
|||
if ( state.text!=null ) {
|
||||
return state.text;
|
||||
}
|
||||
return input.substring(state.tokenStartCharIndex,getCharIndex()-1);
|
||||
return ((CharStream)input).substring(state.tokenStartCharIndex,getCharIndex()-1);
|
||||
}
|
||||
|
||||
/** Set the complete text of this token; it wipes any previous
|
||||
|
@ -282,12 +277,12 @@ public abstract class Lexer extends BaseRecognizer implements TokenSource {
|
|||
}
|
||||
|
||||
public void traceIn(String ruleName, int ruleIndex) {
|
||||
String inputSymbol = ((char)input.LT(1))+" line="+getLine()+":"+getCharPositionInLine();
|
||||
String inputSymbol = ((char)((CharStream)input).LT(1))+" line="+getLine()+":"+getCharPositionInLine();
|
||||
super.traceIn(ruleName, ruleIndex, inputSymbol);
|
||||
}
|
||||
|
||||
public void traceOut(String ruleName, int ruleIndex) {
|
||||
String inputSymbol = ((char)input.LT(1))+" line="+getLine()+":"+getCharPositionInLine();
|
||||
String inputSymbol = ((char)((CharStream)input).LT(1))+" line="+getLine()+":"+getCharPositionInLine();
|
||||
super.traceOut(ruleName, ruleIndex, inputSymbol);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,16 +37,12 @@ import org.antlr.v4.runtime.misc.LABitSet;
|
|||
* of this.
|
||||
*/
|
||||
public class Parser extends BaseRecognizer {
|
||||
public TokenStream input;
|
||||
|
||||
public Parser(TokenStream input) {
|
||||
super(); // highlight that we go to super to set state object
|
||||
setTokenStream(input);
|
||||
super(input);
|
||||
}
|
||||
|
||||
public Parser(TokenStream input, RecognizerSharedState state) {
|
||||
super(state); // share the state object with another parser
|
||||
this.input = input;
|
||||
super(input, state); // share the state object with another parser
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
|
@ -56,7 +52,7 @@ public class Parser extends BaseRecognizer {
|
|||
}
|
||||
}
|
||||
|
||||
protected Object getCurrentInputSymbol(IntStream input) {
|
||||
protected Object getCurrentInputSymbol() {
|
||||
return ((TokenStream)input).LT(1);
|
||||
}
|
||||
|
||||
|
@ -87,7 +83,7 @@ public class Parser extends BaseRecognizer {
|
|||
}
|
||||
|
||||
public TokenStream getTokenStream() {
|
||||
return input;
|
||||
return (TokenStream)input;
|
||||
}
|
||||
|
||||
public String getSourceName() {
|
||||
|
@ -95,10 +91,10 @@ public class Parser extends BaseRecognizer {
|
|||
}
|
||||
|
||||
public void traceIn(String ruleName, int ruleIndex) {
|
||||
super.traceIn(ruleName, ruleIndex, input.LT(1));
|
||||
super.traceIn(ruleName, ruleIndex, ((TokenStream)input).LT(1));
|
||||
}
|
||||
|
||||
public void traceOut(String ruleName, int ruleIndex) {
|
||||
super.traceOut(ruleName, ruleIndex, input.LT(1));
|
||||
super.traceOut(ruleName, ruleIndex, ((TokenStream)input).LT(1));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -66,7 +66,7 @@ import org.antlr.runtime.tree.TreeNodeStream;
|
|||
* Better to just say the recognizer had a problem and then let the parser
|
||||
* figure out a fancy report.
|
||||
*/
|
||||
public class RecognitionException extends Throwable {
|
||||
public class RecognitionException extends RuntimeException {
|
||||
/** What input stream did the error occur in? */
|
||||
public transient IntStream input;
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package org.antlr.v4.runtime.pda;
|
||||
|
||||
import org.antlr.runtime.CharStream;
|
||||
import org.antlr.runtime.IntStream;
|
||||
import org.antlr.runtime.Token;
|
||||
import org.antlr.v4.runtime.CommonToken;
|
||||
|
||||
|
@ -47,7 +48,7 @@ public class PDA {
|
|||
labelValues = new CommonToken[nLabels];
|
||||
}
|
||||
|
||||
public int execThompson(CharStream input) {
|
||||
public int execThompson(IntStream input) {
|
||||
int m = input.mark();
|
||||
Arrays.fill(labelValues, null);
|
||||
int ttype = execThompson(input, 0, false);
|
||||
|
@ -67,7 +68,7 @@ public class PDA {
|
|||
return ttype;
|
||||
}
|
||||
|
||||
public int execThompson(CharStream input, int ip, boolean doActions) {
|
||||
public int execThompson(IntStream input, int ip, boolean doActions) {
|
||||
int c = input.LA(1);
|
||||
if ( c==Token.EOF ) return Token.EOF;
|
||||
|
||||
|
@ -118,10 +119,10 @@ processOneChar:
|
|||
addToClosure(reach, ip, alt, context);
|
||||
}
|
||||
break;
|
||||
case Bytecode.LABEL :
|
||||
case Bytecode.LABEL : // lexers only
|
||||
int labelIndex = getShort(code, ip);
|
||||
labelValues[labelIndex] =
|
||||
new CommonToken(input, 0, 0, input.index(), -1);
|
||||
new CommonToken(((CharStream)input), 0, 0, input.index(), -1);
|
||||
break;
|
||||
case Bytecode.SAVE :
|
||||
labelIndex = getShort(code, ip);
|
||||
|
|
|
@ -229,11 +229,11 @@ import org.antlr.runtime.*;
|
|||
<lexer>
|
||||
>>
|
||||
|
||||
Lexer(lexerName,modes,pdas, actions, sempreds) ::= <<
|
||||
Lexer(lexerName, modes, pdas, tokens, actions, sempreds) ::= <<
|
||||
public class <lexerName> extends Lexer {
|
||||
<tokens.keys:{k | public static final int <k>=<tokens.(k)>;}; separator="\n">
|
||||
<modes:{m| public static final int <m> = <i0>;}; separator="\n">
|
||||
|
||||
public <lexerName>() {;}
|
||||
public <lexerName>(CharStream input) {
|
||||
this(input, new RecognizerSharedState());
|
||||
}
|
||||
|
|
|
@ -625,6 +625,27 @@ public class Tool {
|
|||
return new BufferedWriter(fw);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the Path to the directory in which ANTLR will search for ancillary
|
||||
* files such as .tokens vocab files and imported grammar files.
|
||||
*
|
||||
* @return the lib Directory
|
||||
*/
|
||||
public String getLibraryDirectory() {
|
||||
return libDirectory;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the Path to the base output directory, where ANTLR
|
||||
* will generate all the output files for the current language target as
|
||||
* well as any ancillary files such as .tokens vocab files.
|
||||
*
|
||||
* @return the output Directory
|
||||
*/
|
||||
public String getOutputDirectory() {
|
||||
return outputDirectory;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the location where ANTLR will generate output files for a given file. This is a
|
||||
* base directory and output files will be relative to here in some cases
|
||||
|
|
|
@ -1,12 +1,16 @@
|
|||
package org.antlr.v4.codegen;
|
||||
|
||||
import org.antlr.runtime.Token;
|
||||
import org.antlr.v4.codegen.src.OutputModelObject;
|
||||
import org.antlr.v4.misc.Utils;
|
||||
import org.antlr.v4.tool.ErrorType;
|
||||
import org.antlr.v4.tool.Grammar;
|
||||
import org.stringtemplate.v4.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Writer;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/** General controller for code gen. Can instantiate sub generator(s).
|
||||
*/
|
||||
|
@ -14,8 +18,8 @@ public class CodeGenerator {
|
|||
public static final String TEMPLATE_ROOT = "org/antlr/v4/tool/templates/codegen";
|
||||
public static final String VOCAB_FILE_EXTENSION = ".tokens";
|
||||
public final static String vocabFilePattern =
|
||||
"<tokens:{<attr.name>=<attr.type>\n}>" +
|
||||
"<literals:{<attr.name>=<attr.type>\n}>";
|
||||
"<tokens.keys:{t | <t>=<tokens.(t)>\n}>" +
|
||||
"<literals.keys:{t | <t>=<literals.(t)>\n}>";
|
||||
|
||||
public Grammar g;
|
||||
public Target target;
|
||||
|
@ -81,7 +85,39 @@ public class CodeGenerator {
|
|||
}
|
||||
|
||||
return outputFileST;
|
||||
}
|
||||
}
|
||||
|
||||
/** Generate a token vocab file with all the token names/types. For example:
|
||||
* ID=7
|
||||
* FOR=8
|
||||
* 'for'=8
|
||||
*
|
||||
* This is independent of the target language; used by antlr internally
|
||||
*/
|
||||
ST getTokenVocabOutput() {
|
||||
ST vocabFileST = new ST(vocabFilePattern);
|
||||
Map<String,Integer> tokens = new HashMap<String,Integer>();
|
||||
// make constants for the token names
|
||||
for (String t : g.tokenNameToTypeMap.keySet()) {
|
||||
int tokenType = g.tokenNameToTypeMap.get(t);
|
||||
if ( tokenType>=Token.MIN_TOKEN_TYPE ) {
|
||||
tokens.put(t, Utils.integer(tokenType));
|
||||
}
|
||||
}
|
||||
vocabFileST.add("tokens", tokens);
|
||||
|
||||
// now dump the strings
|
||||
Map<String,Integer> literals = new HashMap<String,Integer>();
|
||||
for (String literal : g.stringLiteralToTypeMap.keySet()) {
|
||||
int tokenType = g.stringLiteralToTypeMap.get(literal);
|
||||
if ( tokenType>=Token.MIN_TOKEN_TYPE ) {
|
||||
literals.put(literal, Utils.integer(tokenType));
|
||||
}
|
||||
}
|
||||
vocabFileST.add("literals", literals);
|
||||
|
||||
return vocabFileST;
|
||||
}
|
||||
|
||||
public void write(ST outputFileST) {
|
||||
// WRITE FILES
|
||||
|
@ -92,14 +128,13 @@ public class CodeGenerator {
|
|||
ST headerFileST = null;
|
||||
target.genRecognizerHeaderFile(this,g,headerFileST,extST.render());
|
||||
}
|
||||
// // write out the vocab interchange file; used by antlr,
|
||||
// // does not change per target
|
||||
// ST tokenVocabSerialization = genTokenVocabOutput();
|
||||
// String vocabFileName = getVocabFileName();
|
||||
// if ( vocabFileName!=null ) {
|
||||
// write(tokenVocabSerialization, vocabFileName);
|
||||
// }
|
||||
//System.out.println(outputFileST.getDOTForDependencyGraph(false));
|
||||
// write out the vocab interchange file; used by antlr,
|
||||
// does not change per target
|
||||
ST tokenVocabSerialization = getTokenVocabOutput();
|
||||
String vocabFileName = getVocabFileName();
|
||||
if ( vocabFileName!=null ) {
|
||||
write(tokenVocabSerialization, vocabFileName);
|
||||
}
|
||||
}
|
||||
catch (IOException ioe) {
|
||||
g.tool.errMgr.toolError(ErrorType.CANNOT_WRITE_FILE,
|
||||
|
|
|
@ -5,6 +5,7 @@ import org.antlr.v4.tool.LexerGrammar;
|
|||
import org.antlr.v4.tool.Rule;
|
||||
import org.stringtemplate.v4.ST;
|
||||
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Set;
|
||||
|
||||
/** */
|
||||
|
@ -48,6 +49,12 @@ public class LexerFactory {
|
|||
pdaST.add("name", modeName);
|
||||
pdaST.add("model", pda);
|
||||
lexerST.add("pdas", pdaST);
|
||||
LinkedHashMap<String,Integer> tokens = new LinkedHashMap<String,Integer>();
|
||||
for (String t : gen.g.tokenNameToTypeMap.keySet()) {
|
||||
Integer ttype = gen.g.tokenNameToTypeMap.get(t);
|
||||
if ( ttype>0 ) tokens.put(t, ttype);
|
||||
}
|
||||
lexerST.add("tokens", tokens);
|
||||
}
|
||||
return fileST;
|
||||
}
|
||||
|
|
|
@ -101,8 +101,8 @@ public abstract class OutputModelFactory {
|
|||
public BitSetDecl createFollowBitSet(GrammarAST ast, IntervalSet set) {
|
||||
String inRuleName = ast.nfaState.rule.name;
|
||||
String elementName = ast.getText(); // assume rule ref
|
||||
if ( ast.getType() == ANTLRParser.TOKEN_REF ) {
|
||||
elementName = gen.target.getTokenTypeAsTargetLabel(g, g.tokenNameToTypeMap.get(elementName));
|
||||
if ( ast.getType() == ANTLRParser.STRING_LITERAL ) {
|
||||
elementName = gen.target.getTokenTypeAsTargetLabel(g, g.stringLiteralToTypeMap.get(elementName));
|
||||
}
|
||||
String name = "FOLLOW_"+elementName+"_in_"+inRuleName+"_"+ast.token.getTokenIndex();
|
||||
BitSetDecl b = new BitSetDecl(this, name, set);
|
||||
|
|
|
@ -15,7 +15,8 @@ public class MatchToken extends SrcOp implements LabeledOp {
|
|||
|
||||
public MatchToken(OutputModelFactory factory, TerminalAST ast, GrammarAST labelAST) {
|
||||
this.factory = factory;
|
||||
name = ast.getText();
|
||||
int ttype = factory.g.getTokenType(ast.getText());
|
||||
name = factory.gen.target.getTokenTypeAsTargetLabel(factory.g, ttype);
|
||||
if ( labelAST!=null ) {
|
||||
label = labelAST.getText();
|
||||
TokenDecl d = new TokenDecl(factory, label);
|
||||
|
|
|
@ -0,0 +1,143 @@
|
|||
package org.antlr.v4.parse;
|
||||
|
||||
import org.antlr.codegen.CodeGenerator;
|
||||
import org.antlr.misc.Utils;
|
||||
import org.antlr.tool.ErrorManager;
|
||||
import org.antlr.v4.Tool;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/** */
|
||||
public class TokenVocabParser {
|
||||
Tool tool;
|
||||
String vocabName;
|
||||
|
||||
public TokenVocabParser(Tool tool, String vocabName) {
|
||||
this.tool = tool;
|
||||
this.vocabName = vocabName;
|
||||
}
|
||||
|
||||
/** Load a vocab file <vocabName>.tokens and return mapping. */
|
||||
public Map<String,Integer> load() {
|
||||
Map<String,Integer> tokens = new LinkedHashMap<String,Integer>();
|
||||
int maxTokenType = -1;
|
||||
File fullFile = getImportedVocabFile();
|
||||
try {
|
||||
FileReader fr = new FileReader(fullFile);
|
||||
BufferedReader br = new BufferedReader(fr);
|
||||
StreamTokenizer tokenizer = new StreamTokenizer(br);
|
||||
tokenizer.parseNumbers();
|
||||
tokenizer.wordChars('_', '_');
|
||||
tokenizer.eolIsSignificant(true);
|
||||
tokenizer.slashSlashComments(true);
|
||||
tokenizer.slashStarComments(true);
|
||||
tokenizer.ordinaryChar('=');
|
||||
tokenizer.quoteChar('\'');
|
||||
tokenizer.whitespaceChars(' ',' ');
|
||||
tokenizer.whitespaceChars('\t','\t');
|
||||
int lineNum = 1;
|
||||
int token = tokenizer.nextToken();
|
||||
while (token != StreamTokenizer.TT_EOF) {
|
||||
String tokenID;
|
||||
if ( token == StreamTokenizer.TT_WORD ) {
|
||||
tokenID = tokenizer.sval;
|
||||
}
|
||||
else if ( token == '\'' ) {
|
||||
tokenID = "'"+tokenizer.sval+"'";
|
||||
}
|
||||
else {
|
||||
ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
|
||||
vocabName+ CodeGenerator.VOCAB_FILE_EXTENSION,
|
||||
Utils.integer(lineNum));
|
||||
while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;}
|
||||
token = tokenizer.nextToken();
|
||||
continue;
|
||||
}
|
||||
token = tokenizer.nextToken();
|
||||
if ( token != '=' ) {
|
||||
ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
|
||||
vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
|
||||
Utils.integer(lineNum));
|
||||
while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;}
|
||||
token = tokenizer.nextToken();
|
||||
continue;
|
||||
}
|
||||
token = tokenizer.nextToken(); // skip '='
|
||||
if ( token != StreamTokenizer.TT_NUMBER ) {
|
||||
ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
|
||||
vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
|
||||
Utils.integer(lineNum));
|
||||
while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;}
|
||||
token = tokenizer.nextToken();
|
||||
continue;
|
||||
}
|
||||
int tokenType = (int)tokenizer.nval;
|
||||
token = tokenizer.nextToken();
|
||||
System.out.println("import "+tokenID+"="+tokenType);
|
||||
tokens.put(tokenID, tokenType);
|
||||
maxTokenType = Math.max(maxTokenType,tokenType);
|
||||
lineNum++;
|
||||
if ( token != StreamTokenizer.TT_EOL ) {
|
||||
ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
|
||||
vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
|
||||
Utils.integer(lineNum));
|
||||
while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;}
|
||||
token = tokenizer.nextToken();
|
||||
continue;
|
||||
}
|
||||
token = tokenizer.nextToken(); // skip newline
|
||||
}
|
||||
br.close();
|
||||
}
|
||||
catch (FileNotFoundException fnfe) {
|
||||
ErrorManager.error(ErrorManager.MSG_CANNOT_FIND_TOKENS_FILE,
|
||||
fullFile);
|
||||
}
|
||||
catch (IOException ioe) {
|
||||
ErrorManager.error(ErrorManager.MSG_ERROR_READING_TOKENS_FILE,
|
||||
fullFile,
|
||||
ioe);
|
||||
}
|
||||
catch (Exception e) {
|
||||
ErrorManager.error(ErrorManager.MSG_ERROR_READING_TOKENS_FILE,
|
||||
fullFile,
|
||||
e);
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
/** Return a File descriptor for vocab file. Look in library or
|
||||
* in -o output path. antlr -o foo T.g U.g where U needs T.tokens
|
||||
* won't work unless we look in foo too. If we do not find the
|
||||
* file in the lib directory then must assume that the .tokens file
|
||||
* is going to be generated as part of this build and we have defined
|
||||
* .tokens files so that they ALWAYS are generated in the base output
|
||||
* directory, which means the current directory for the command line tool if there
|
||||
* was no output directory specified.
|
||||
*/
|
||||
public File getImportedVocabFile() {
|
||||
|
||||
File f = new File(tool.getLibraryDirectory(),
|
||||
File.separator +
|
||||
vocabName +
|
||||
CodeGenerator.VOCAB_FILE_EXTENSION);
|
||||
if (f.exists()) {
|
||||
return f;
|
||||
}
|
||||
|
||||
// We did not find the vocab file in the lib directory, so we need
|
||||
// to look for it in the output directory which is where .tokens
|
||||
// files are generated (in the base, not relative to the input
|
||||
// location.)
|
||||
//
|
||||
if (tool.haveOutputDir) {
|
||||
f = new File(tool.getOutputDirectory(), vocabName + CodeGenerator.VOCAB_FILE_EXTENSION);
|
||||
}
|
||||
else {
|
||||
f = new File(vocabName + CodeGenerator.VOCAB_FILE_EXTENSION);
|
||||
}
|
||||
return f;
|
||||
}
|
||||
}
|
|
@ -5,6 +5,7 @@ import org.antlr.runtime.tree.BufferedTreeNodeStream;
|
|||
import org.antlr.v4.parse.ANTLRParser;
|
||||
import org.antlr.v4.parse.ASTVerifier;
|
||||
import org.antlr.v4.parse.GrammarASTAdaptor;
|
||||
import org.antlr.v4.parse.TokenVocabParser;
|
||||
import org.antlr.v4.tool.*;
|
||||
|
||||
import java.util.Map;
|
||||
|
@ -85,6 +86,16 @@ public class SemanticPipeline {
|
|||
AttributeChecks.checkAllAttributeExpressions(g);
|
||||
|
||||
// ASSIGN TOKEN TYPES
|
||||
String vocab = g.getOption("tokenVocab");
|
||||
if ( vocab!=null ) {
|
||||
TokenVocabParser vparser = new TokenVocabParser(g.tool, vocab);
|
||||
Map<String,Integer> tokens = vparser.load();
|
||||
System.out.println("tokens="+tokens);
|
||||
for (String t : tokens.keySet()) {
|
||||
if ( t.charAt(0)=='\'' ) g.defineStringLiteral(t, tokens.get(t));
|
||||
else g.defineTokenName(t, tokens.get(t));
|
||||
}
|
||||
}
|
||||
if ( g.isLexer() ) assignLexerTokenTypes(g, collector);
|
||||
else assignTokenTypes(g, collector, symcheck);
|
||||
|
||||
|
|
|
@ -97,9 +97,6 @@ public class Grammar implements AttributeResolver {
|
|||
*/
|
||||
public Map<String,ActionAST> namedActions = new HashMap<String,ActionAST>();
|
||||
|
||||
/** A list of options specified at the grammar level such as language=Java. */
|
||||
public Map<String, String> options;
|
||||
|
||||
public Map<String, AttributeDict> scopes = new LinkedHashMap<String, AttributeDict>();
|
||||
public static final String AUTO_GENERATED_TOKEN_NAME_PREFIX = "T__";
|
||||
|
||||
|
@ -159,8 +156,10 @@ public class Grammar implements AttributeResolver {
|
|||
// typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EPSILON, Label.EPSILON_STR);
|
||||
typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOF, "EOF");
|
||||
typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOR_TOKEN_TYPE-1, "EOR");
|
||||
typeToTokenList.set(Label.NUM_FAUX_LABELS+Token.DOWN-1, "DOWN");
|
||||
typeToTokenList.set(Label.NUM_FAUX_LABELS+Token.UP-1, "UP");
|
||||
if ( isTreeGrammar() ) {
|
||||
typeToTokenList.set(Label.NUM_FAUX_LABELS+Token.DOWN-1, "DOWN");
|
||||
typeToTokenList.set(Label.NUM_FAUX_LABELS+Token.UP-1, "UP");
|
||||
}
|
||||
tokenNameToTypeMap.put("<INVALID>", Label.INVALID);
|
||||
// tokenNameToTypeMap.put("<ACTION>", Label.ACTION);
|
||||
// tokenNameToTypeMap.put("<EPSILON>", Label.EPSILON);
|
||||
|
@ -169,8 +168,10 @@ public class Grammar implements AttributeResolver {
|
|||
tokenNameToTypeMap.put("<EOT>", Label.EOT);
|
||||
tokenNameToTypeMap.put("EOF", Label.EOF);
|
||||
tokenNameToTypeMap.put("EOR", Label.EOR_TOKEN_TYPE);
|
||||
tokenNameToTypeMap.put("DOWN", Token.DOWN);
|
||||
tokenNameToTypeMap.put("UP", Token.UP);
|
||||
if ( isTreeGrammar() ) {
|
||||
tokenNameToTypeMap.put("DOWN", Token.DOWN);
|
||||
tokenNameToTypeMap.put("UP", Token.UP);
|
||||
}
|
||||
}
|
||||
|
||||
public void loadImportedGrammars() {
|
||||
|
@ -432,17 +433,23 @@ public class Grammar implements AttributeResolver {
|
|||
}
|
||||
|
||||
public int defineTokenName(String name) {
|
||||
return defineTokenName(name, getNewTokenType());
|
||||
}
|
||||
|
||||
public int defineTokenName(String name, int ttype) {
|
||||
Integer prev = tokenNameToTypeMap.get(name);
|
||||
if ( prev!=null ) return prev;
|
||||
int ttype = getNewTokenType();
|
||||
tokenNameToTypeMap.put(name, ttype);
|
||||
setTokenForType(ttype, name);
|
||||
return ttype;
|
||||
}
|
||||
|
||||
public int defineStringLiteral(String lit) {
|
||||
return defineStringLiteral(lit, getNewTokenType());
|
||||
}
|
||||
|
||||
public int defineStringLiteral(String lit, int ttype) {
|
||||
if ( !stringLiteralToTypeMap.containsKey(lit) ) {
|
||||
int ttype = getNewTokenType();
|
||||
stringLiteralToTypeMap.put(lit, ttype);
|
||||
// track in reverse index too
|
||||
if ( ttype>=typeToStringLiteralList.size() ) {
|
||||
|
@ -537,13 +544,13 @@ public class Grammar implements AttributeResolver {
|
|||
}
|
||||
|
||||
public String getOption(String key) {
|
||||
if ( options==null ) return null;
|
||||
return options.get(key);
|
||||
if ( ast.options==null ) return null;
|
||||
return ast.options.get(key);
|
||||
}
|
||||
|
||||
public String getOption(String key, String defaultValue) {
|
||||
if ( options==null ) return defaultValue;
|
||||
String v = options.get(key);
|
||||
if ( ast.options==null ) return defaultValue;
|
||||
String v = ast.options.get(key);
|
||||
if ( v!=null ) return v;
|
||||
return defaultValue;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue