From ea434982fb9f4aa7d7cd68554b172079375055d8 Mon Sep 17 00:00:00 2001 From: Sam Harwell Date: Fri, 16 Mar 2012 08:07:46 -0500 Subject: [PATCH 01/34] Pull error reporting outside of try/finally to ensure errors are not hidden --- tool/test/org/antlr/v4/test/BaseTest.java | 40 ++++++++++++----------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/tool/test/org/antlr/v4/test/BaseTest.java b/tool/test/org/antlr/v4/test/BaseTest.java index 2261b40ea..f811e4568 100644 --- a/tool/test/org/antlr/v4/test/BaseTest.java +++ b/tool/test/org/antlr/v4/test/BaseTest.java @@ -358,37 +358,39 @@ public abstract class BaseTest { System.out.println("dir "+tmpdir); mkdir(tmpdir); writeFile(tmpdir, fileName, grammarStr); + ErrorQueue equeue = new ErrorQueue(); + final List options = new ArrayList(); + Collections.addAll(options, extraOptions); + options.add("-o"); + options.add(tmpdir); + options.add("-lib"); + options.add(tmpdir); + options.add(new File(tmpdir,grammarFileName).toString()); try { - final List options = new ArrayList(); - Collections.addAll(options, extraOptions); - options.add("-o"); - options.add(tmpdir); - options.add("-lib"); - options.add(tmpdir); - options.add(new File(tmpdir,grammarFileName).toString()); final String[] optionsA = new String[options.size()]; options.toArray(optionsA); - ErrorQueue equeue = new ErrorQueue(); Tool antlr = newTool(optionsA); antlr.addListener(equeue); antlr.processGrammarsOnCommandLine(); - if ( equeue.errors.size()>0 ) { - allIsWell = false; - System.err.println("antlr reports errors from "+options); - for (int i = 0; i < equeue.errors.size(); i++) { - ANTLRMessage msg = equeue.errors.get(i); - System.err.println(msg); - } - System.out.println("!!!\ngrammar:"); - System.out.println(grammarStr); - System.out.println("###"); - } } catch (Exception e) { allIsWell = false; System.err.println("problems building grammar: "+e); e.printStackTrace(System.err); } + + if ( equeue.errors.size()>0 ) { + allIsWell = false; + System.err.println("antlr reports errors from "+options); + for (int i = 0; i < equeue.errors.size(); i++) { + ANTLRMessage msg = equeue.errors.get(i); + System.err.println(msg); + } + System.out.println("!!!\ngrammar:"); + System.out.println(grammarStr); + System.out.println("###"); + } + return allIsWell; } From cc20a52cdd6f04d17f53050c7457aafd1ba51b60 Mon Sep 17 00:00:00 2001 From: Terence Parr Date: Sat, 31 Mar 2012 15:54:00 -0700 Subject: [PATCH 02/34] allow special "tokens" start rule name so we can test pure lexer grammars. --- .../org/antlr/v4/runtime/misc/TestRig.java | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/runtime/Java/src/org/antlr/v4/runtime/misc/TestRig.java b/runtime/Java/src/org/antlr/v4/runtime/misc/TestRig.java index f78d090f5..dd600321f 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/misc/TestRig.java +++ b/runtime/Java/src/org/antlr/v4/runtime/misc/TestRig.java @@ -57,6 +57,9 @@ import java.lang.reflect.Method; * [input-filename] */ public class TestRig { + + public static final String LEXER_START_RULE_NAME = "tokens"; + public static void main(String[] args) throws Exception { String grammarName; String startRuleName; @@ -69,10 +72,12 @@ public class TestRig { boolean diagnostics = false; String encoding = null; if ( args.length < 2 ) { - System.err.println("java org.antlr.v4.runtime.misc.TestRig GrammarName startRuleName" + - " [-tokens] [-print] [-gui] [-ps file.ps] [-encoding encodingname]" + - " [-trace] [-diagnostics]"+ - " [input-filename]"); + System.err.println("java org.antlr.v4.runtime.misc.TestRig GrammarName startRuleName\n" + + " [-tokens] [-print] [-gui] [-ps file.ps] [-encoding encodingname]\n" + + " [-trace] [-diagnostics]\n"+ + " [input-filename]"); + System.err.println("Use startRuleName='tokens' if GrammarName is a lexer grammar."); + System.err.println("Omitting input-filename makes rig read from stdin."); return; } int i=0; @@ -121,16 +126,11 @@ public class TestRig { } // System.out.println("exec "+grammarName+"."+startRuleName); String lexerName = grammarName+"Lexer"; - String parserName = grammarName+"Parser"; ClassLoader cl = Thread.currentThread().getContextClassLoader(); Class lexerClass = cl.loadClass(lexerName); if ( lexerClass==null ) { System.err.println("Can't load "+lexerName); } - Class parserClass = cl.loadClass(parserName); - if ( parserClass==null ) { - System.err.println("Can't load "+parserName); - } InputStream is = System.in; if ( inputFile!=null ) { @@ -158,6 +158,13 @@ public class TestRig { } } + if ( startRuleName.equals(LEXER_START_RULE_NAME) ) return; + + String parserName = grammarName+"Parser"; + Class parserClass = cl.loadClass(parserName); + if ( parserClass==null ) { + System.err.println("Can't load "+parserName); + } Constructor parserCtor = parserClass.getConstructor(TokenStream.class); Parser parser = parserCtor.newInstance(tokens); From f238d7579ef4de47535e16c9d5f98573b252e014 Mon Sep 17 00:00:00 2001 From: Terence Parr Date: Sat, 31 Mar 2012 17:27:11 -0700 Subject: [PATCH 03/34] added getText() to ParseTree. getText(tokens) is still really what you want but convenient for ctx.type().getText() when it's just one token. --- .../antlr/v4/runtime/BufferedTokenStream.java | 17 +++++++------ .../antlr/v4/runtime/ParserRuleContext.java | 25 +++++++++++++++++++ .../src/org/antlr/v4/runtime/RuleContext.java | 6 +++++ .../org/antlr/v4/runtime/tree/ParseTree.java | 9 +++++++ 4 files changed, 49 insertions(+), 8 deletions(-) diff --git a/runtime/Java/src/org/antlr/v4/runtime/BufferedTokenStream.java b/runtime/Java/src/org/antlr/v4/runtime/BufferedTokenStream.java index e038357c8..fc7a2fc41 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/BufferedTokenStream.java +++ b/runtime/Java/src/org/antlr/v4/runtime/BufferedTokenStream.java @@ -29,7 +29,11 @@ package org.antlr.v4.runtime; -import java.util.*; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.Set; /** Buffer all input tokens but do on-demand fetching of new tokens from * lexer. Useful when the parser or lexer has to set context/mode info before @@ -246,13 +250,10 @@ public class BufferedTokenStream implements TokenStream { if ( start<0 || stop<0 ) return ""; if ( p == -1 ) setup(); if ( stop>=tokens.size() ) stop = tokens.size()-1; - StringBuilder buf = new StringBuilder(); - for (int i = start; i <= stop; i++) { - T t = tokens.get(i); - if ( t.getType()==Token.EOF ) break; - buf.append(t.getText()); - } - return buf.toString(); + + int a = tokens.get(start).getStartIndex(); + int b = tokens.get(stop).getStopIndex(); + return tokenSource.getInputStream().substring(a, b); } @Override diff --git a/runtime/Java/src/org/antlr/v4/runtime/ParserRuleContext.java b/runtime/Java/src/org/antlr/v4/runtime/ParserRuleContext.java index 906c96b5a..48d748ab1 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/ParserRuleContext.java +++ b/runtime/Java/src/org/antlr/v4/runtime/ParserRuleContext.java @@ -297,6 +297,31 @@ public class ParserRuleContext extends RuleContext { return Interval.of(start.getTokenIndex(), stop.getTokenIndex()); } + /** Return the combined text of all leaf nodes. Does not get any + * off-channel tokens (if any) so won't return whitespace and + * comments if they are sent to parser on hidden channel. + * + * This just recursively collects all leaf nodes and combines text. + */ + @Override + public String getText() { + StringBuilder buf = new StringBuilder(); + getText_(this, buf); + return buf.toString(); + } + + protected void getText_(ParseTree p, StringBuilder buf) { + if ( p instanceof TerminalNode ) { + buf.append(((TerminalNode)p).getSymbol().getText()); + return; + } + int n = p.getChildCount(); + for (int i=0; i