/* * [The "BSD license"] * Copyright (c) 2013 Terence Parr * Copyright (c) 2013 Sam Harwell * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ using System; using System.Collections.Generic; using System.Text; using Antlr4.Runtime; using Antlr4.Runtime.Misc; using Sharpen; namespace Antlr4.Runtime { /// /// Useful for rewriting out a buffered input token stream after doing some /// augmentation or other manipulations on it. /// /// /// Useful for rewriting out a buffered input token stream after doing some /// augmentation or other manipulations on it. /// You can insert stuff, replace, and delete chunks. Note that the /// operations are done lazily--only if you convert the buffer to a /// String with getText(). This is very efficient because you are not moving /// data around all the time. As the buffer of tokens is converted to strings, /// the getText() method(s) scan the input token stream and check /// to see if there is an operation at the current index. /// If so, the operation is done and then normal String /// rendering continues on the buffer. This is like having multiple Turing /// machine instruction streams (programs) operating on a single input tape. :) /// This rewriter makes no modifications to the token stream. It does not /// ask the stream to fill itself up nor does it advance the input cursor. /// The token stream index() will return the same value before and after /// any getText() call. /// The rewriter only works on tokens that you have in the buffer and /// ignores the current input cursor. If you are buffering tokens on-demand, /// calling getText() halfway through the input will only do rewrites /// for those tokens in the first half of the file. /// Since the operations are done lazily at getText-time, operations do not /// screw up the token index values. That is, an insert operation at token /// index i does not change the index values for tokens i+1..n-1. /// Because operations never actually alter the buffer, you may always get /// the original token stream back without undoing anything. Since /// the instructions are queued up, you can easily simulate transactions and /// roll back any changes if there is an error just by removing instructions. /// For example, /// CharStream input = new ANTLRFileStream("input"); /// TLexer lex = new TLexer(input); /// CommonTokenStream tokens = new CommonTokenStream(lex); /// T parser = new T(tokens); /// TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens); /// parser.startRule(); /// Then in the rules, you can execute (assuming rewriter is visible): /// Token t,u; /// ... /// rewriter.insertAfter(t, "text to put after t");} /// rewriter.insertAfter(u, "text after u");} /// System.out.println(tokens.toString()); /// You can also have multiple "instruction streams" and get multiple /// rewrites from a single pass over the input. Just name the instruction /// streams and use that name again when printing the buffer. This could be /// useful for generating a C file and also its header file--all from the /// same buffer: /// tokens.insertAfter("pass1", t, "text to put after t");} /// tokens.insertAfter("pass2", u, "text after u");} /// System.out.println(tokens.toString("pass1")); /// System.out.println(tokens.toString("pass2")); /// If you don't use named rewrite streams, a "default" stream is used as /// the first example shows. /// public class TokenStreamRewriter { public static readonly string DefaultProgramName = "default"; public const int ProgramInitSize = 100; public const int MinTokenIndex = 0; public class RewriteOperation { protected internal readonly ITokenStream tokens; /// What index into rewrites List are we? protected internal int instructionIndex; /// Token buffer index. /// Token buffer index. protected internal int index; protected internal object text; protected internal RewriteOperation(ITokenStream tokens, int index) { // Define the rewrite operation hierarchy this.tokens = tokens; this.index = index; } protected internal RewriteOperation(ITokenStream tokens, int index, object text) { this.tokens = tokens; this.index = index; this.text = text; } /// Execute the rewrite operation by possibly adding to the buffer. /// /// Execute the rewrite operation by possibly adding to the buffer. /// Return the index of the next token to operate on. /// public virtual int Execute(StringBuilder buf) { return index; } public override string ToString() { string opName = GetType().FullName; int index = opName.IndexOf('$'); opName = Sharpen.Runtime.Substring(opName, index + 1, opName.Length); return "<" + opName + "@" + tokens.Get(this.index) + ":\"" + text + "\">"; } } internal class InsertBeforeOp : TokenStreamRewriter.RewriteOperation { protected internal InsertBeforeOp(ITokenStream tokens, int index, object text) : base(tokens, index, text) { } public override int Execute(StringBuilder buf) { buf.Append(text); if (tokens.Get(index).Type != IToken.Eof) { buf.Append(tokens.Get(index).Text); } return index + 1; } } /// /// I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp /// instructions. /// /// /// I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp /// instructions. /// internal class ReplaceOp : TokenStreamRewriter.RewriteOperation { protected internal int lastIndex; public ReplaceOp(ITokenStream tokens, int from, int to, object text) : base(tokens , from, text) { lastIndex = to; } public override int Execute(StringBuilder buf) { if (text != null) { buf.Append(text); } return lastIndex + 1; } public override string ToString() { if (text == null) { return ""; } return ""; } } /// Our source stream protected internal readonly ITokenStream tokens; /// You may have multiple, named streams of rewrite operations. /// /// You may have multiple, named streams of rewrite operations. /// I'm calling these things "programs." /// Maps String (name) -> rewrite (List) /// protected internal readonly IDictionary> programs; /// Map String (program name) -> Integer index protected internal readonly IDictionary lastRewriteTokenIndexes; public TokenStreamRewriter(ITokenStream tokens) { this.tokens = tokens; programs = new Dictionary>(); programs.Put(DefaultProgramName, new List(ProgramInitSize )); lastRewriteTokenIndexes = new Dictionary(); } public ITokenStream GetTokenStream() { return tokens; } public virtual void Rollback(int instructionIndex) { Rollback(DefaultProgramName, instructionIndex); } /// /// Rollback the instruction stream for a program so that /// the indicated instruction (via instructionIndex) is no /// longer in the stream. /// /// /// Rollback the instruction stream for a program so that /// the indicated instruction (via instructionIndex) is no /// longer in the stream. UNTESTED! /// public virtual void Rollback(string programName, int instructionIndex) { IList @is = programs.Get(programName); if (@is != null) { programs.Put(programName, @is.SubList(MinTokenIndex, instructionIndex)); } } public virtual void DeleteProgram() { DeleteProgram(DefaultProgramName); } /// Reset the program so that no instructions exist public virtual void DeleteProgram(string programName) { Rollback(programName, MinTokenIndex); } public virtual void InsertAfter(IToken t, object text) { InsertAfter(DefaultProgramName, t, text); } public virtual void InsertAfter(int index, object text) { InsertAfter(DefaultProgramName, index, text); } public virtual void InsertAfter(string programName, IToken t, object text) { InsertAfter(programName, t.TokenIndex, text); } public virtual void InsertAfter(string programName, int index, object text) { // to insert after, just insert before next index (even if past end) InsertBefore(programName, index + 1, text); } public virtual void InsertBefore(IToken t, object text) { InsertBefore(DefaultProgramName, t, text); } public virtual void InsertBefore(int index, object text) { InsertBefore(DefaultProgramName, index, text); } public virtual void InsertBefore(string programName, IToken t, object text) { InsertBefore(programName, t.TokenIndex, text); } public virtual void InsertBefore(string programName, int index, object text) { TokenStreamRewriter.RewriteOperation op = new TokenStreamRewriter.InsertBeforeOp( tokens, index, text); IList rewrites = GetProgram(programName); op.instructionIndex = rewrites.Count; rewrites.AddItem(op); } public virtual void Replace(int index, object text) { Replace(DefaultProgramName, index, index, text); } public virtual void Replace(int from, int to, object text) { Replace(DefaultProgramName, from, to, text); } public virtual void Replace(IToken indexT, object text) { Replace(DefaultProgramName, indexT, indexT, text); } public virtual void Replace(IToken from, IToken to, object text) { Replace(DefaultProgramName, from, to, text); } public virtual void Replace(string programName, int from, int to, object text) { if (from > to || from < 0 || to < 0 || to >= tokens.Size) { throw new ArgumentException("replace: range invalid: " + from + ".." + to + "(size=" + tokens.Size + ")"); } TokenStreamRewriter.RewriteOperation op = new TokenStreamRewriter.ReplaceOp(tokens , from, to, text); IList rewrites = GetProgram(programName); op.instructionIndex = rewrites.Count; rewrites.AddItem(op); } public virtual void Replace(string programName, IToken from, IToken to, object text ) { Replace(programName, from.TokenIndex, to.TokenIndex, text); } public virtual void Delete(int index) { Delete(DefaultProgramName, index, index); } public virtual void Delete(int from, int to) { Delete(DefaultProgramName, from, to); } public virtual void Delete(IToken indexT) { Delete(DefaultProgramName, indexT, indexT); } public virtual void Delete(IToken from, IToken to) { Delete(DefaultProgramName, from, to); } public virtual void Delete(string programName, int from, int to) { Replace(programName, from, to, null); } public virtual void Delete(string programName, IToken from, IToken to) { Replace(programName, from, to, null); } public virtual int GetLastRewriteTokenIndex() { return GetLastRewriteTokenIndex(DefaultProgramName); } protected internal virtual int GetLastRewriteTokenIndex(string programName) { int I = lastRewriteTokenIndexes.Get(programName); if (I == null) { return -1; } return I; } protected internal virtual void SetLastRewriteTokenIndex(string programName, int i) { lastRewriteTokenIndexes.Put(programName, i); } protected internal virtual IList GetProgram (string name) { IList @is = programs.Get(name); if (@is == null) { @is = InitializeProgram(name); } return @is; } private IList InitializeProgram(string name ) { IList @is = new List(ProgramInitSize); programs.Put(name, @is); return @is; } /// /// Return the text from the original tokens altered per the /// instructions given to this rewriter. /// /// /// Return the text from the original tokens altered per the /// instructions given to this rewriter. /// public virtual string GetText() { return GetText(DefaultProgramName, Interval.Of(0, tokens.Size - 1)); } /// /// Return the text associated with the tokens in the interval from the /// original token stream but with the alterations given to this rewriter. /// /// /// Return the text associated with the tokens in the interval from the /// original token stream but with the alterations given to this rewriter. /// The interval refers to the indexes in the original token stream. /// We do not alter the token stream in any way, so the indexes /// and intervals are still consistent. Includes any operations done /// to the first and last token in the interval. So, if you did an /// insertBefore on the first token, you would get that insertion. /// The same is true if you do an insertAfter the stop token. /// public virtual string GetText(Interval interval) { return GetText(DefaultProgramName, interval); } public virtual string GetText(string programName, Interval interval) { IList rewrites = programs.Get(programName); int start = interval.a; int stop = interval.b; // ensure start/end are in range if (stop > tokens.Size - 1) { stop = tokens.Size - 1; } if (start < 0) { start = 0; } if (rewrites == null || rewrites.IsEmpty()) { return tokens.GetText(interval); } // no instructions to execute StringBuilder buf = new StringBuilder(); // First, optimize instruction stream IDictionary indexToOp = ReduceToSingleOperationPerIndex (rewrites); // Walk buffer, executing instructions and emitting tokens int i = start; while (i <= stop && i < tokens.Size) { TokenStreamRewriter.RewriteOperation op = indexToOp.Get(i); Sharpen.Collections.Remove(indexToOp, i); // remove so any left have index size-1 IToken t = tokens.Get(i); if (op == null) { // no operation at that index, just dump token if (t.Type != IToken.Eof) { buf.Append(t.Text); } i++; } else { // move to next token i = op.Execute(buf); } } // execute operation and skip // include stuff after end if it's last index in buffer // So, if they did an insertAfter(lastValidIndex, "foo"), include // foo if end==lastValidIndex. if (stop == tokens.Size - 1) { // Scan any remaining operations after last token // should be included (they will be inserts). foreach (TokenStreamRewriter.RewriteOperation op in indexToOp.Values) { if (op.index >= tokens.Size - 1) { buf.Append(op.text); } } } return buf.ToString(); } /// /// We need to combine operations and report invalid operations (like /// overlapping replaces that are not completed nested). /// /// /// We need to combine operations and report invalid operations (like /// overlapping replaces that are not completed nested). Inserts to /// same index need to be combined etc... Here are the cases: /// I.i.u I.j.v leave alone, nonoverlapping /// I.i.u I.i.v combine: Iivu /// R.i-j.u R.x-y.v | i-j in x-y delete first R /// R.i-j.u R.i-j.v delete first R /// R.i-j.u R.x-y.v | x-y in i-j ERROR /// R.i-j.u R.x-y.v | boundaries overlap ERROR /// Delete special case of replace (text==null): /// D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) /// I.i.u R.x-y.v | i in (x+1)-y delete I (since insert before /// we're not deleting i) /// I.i.u R.x-y.v | i not in (x+1)-y leave alone, nonoverlapping /// R.x-y.v I.i.u | i in x-y ERROR /// R.x-y.v I.x.u R.x-y.uv (combine, delete I) /// R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping /// I.i.u = insert u before op @ index i /// R.x-y.u = replace x-y indexed tokens with u /// First we need to examine replaces. For any replace op: /// 1. wipe out any insertions before op within that range. /// 2. Drop any replace op before that is contained completely within /// that range. /// 3. Throw exception upon boundary overlap with any previous replace. /// Then we can deal with inserts: /// 1. for any inserts to same index, combine even if not adjacent. /// 2. for any prior replace with same left boundary, combine this /// insert with replace and delete this replace. /// 3. throw exception if index in same range as previous replace /// Don't actually delete; make op null in list. Easier to walk list. /// Later we can throw as we add to index -> op map. /// Note that I.2 R.2-2 will wipe out I.2 even though, technically, the /// inserted stuff would be before the replace range. But, if you /// add tokens in front of a method body '{' and then delete the method /// body, I think the stuff before the '{' you added should disappear too. /// Return a map from token index to operation. /// protected internal virtual IDictionary ReduceToSingleOperationPerIndex(IList rewrites) { // System.out.println("rewrites="+rewrites); // WALK REPLACES for (int i = 0; i < rewrites.Count; i++) { TokenStreamRewriter.RewriteOperation op = rewrites[i]; if (op == null) { continue; } if (!(op is TokenStreamRewriter.ReplaceOp)) { continue; } TokenStreamRewriter.ReplaceOp rop = (TokenStreamRewriter.ReplaceOp)rewrites[i]; // Wipe prior inserts within range IList inserts = GetKindOfOps(rewrites, i); foreach (TokenStreamRewriter.InsertBeforeOp iop in inserts) { if (iop.index == rop.index) { // E.g., insert before 2, delete 2..2; update replace // text to include insert before, kill insert rewrites.Set(iop.instructionIndex, null); rop.text = iop.text.ToString() + (rop.text != null ? rop.text.ToString() : string.Empty ); } else { if (iop.index > rop.index && iop.index <= rop.lastIndex) { // delete insert as it's a no-op. rewrites.Set(iop.instructionIndex, null); } } } // Drop any prior replaces contained within IList prevReplaces = GetKindOfOps(rewrites, i); foreach (TokenStreamRewriter.ReplaceOp prevRop in prevReplaces) { if (prevRop.index >= rop.index && prevRop.lastIndex <= rop.lastIndex) { // delete replace as it's a no-op. rewrites.Set(prevRop.instructionIndex, null); continue; } // throw exception unless disjoint or identical bool disjoint = prevRop.lastIndex < rop.index || prevRop.index > rop.lastIndex; bool same = prevRop.index == rop.index && prevRop.lastIndex == rop.lastIndex; // Delete special case of replace (text==null): // D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) if (prevRop.text == null && rop.text == null && !disjoint) { //System.out.println("overlapping deletes: "+prevRop+", "+rop); rewrites.Set(prevRop.instructionIndex, null); // kill first delete rop.index = Math.Min(prevRop.index, rop.index); rop.lastIndex = Math.Max(prevRop.lastIndex, rop.lastIndex); System.Console.Out.WriteLine("new rop " + rop); } else { if (!disjoint && !same) { throw new ArgumentException("replace op boundaries of " + rop + " overlap with previous " + prevRop); } } } } // WALK INSERTS for (int i_1 = 0; i_1 < rewrites.Count; i_1++) { TokenStreamRewriter.RewriteOperation op = rewrites[i_1]; if (op == null) { continue; } if (!(op is TokenStreamRewriter.InsertBeforeOp)) { continue; } TokenStreamRewriter.InsertBeforeOp iop = (TokenStreamRewriter.InsertBeforeOp)rewrites [i_1]; // combine current insert with prior if any at same index IList prevInserts = GetKindOfOps(rewrites, i_1); foreach (TokenStreamRewriter.InsertBeforeOp prevIop in prevInserts) { if (prevIop.index == iop.index) { // combine objects // convert to strings...we're in process of toString'ing // whole token buffer so no lazy eval issue with any templates iop.text = CatOpText(iop.text, prevIop.text); // delete redundant prior insert rewrites.Set(prevIop.instructionIndex, null); } } // look for replaces where iop.index is in range; error IList prevReplaces = GetKindOfOps(rewrites, i_1); foreach (TokenStreamRewriter.ReplaceOp rop in prevReplaces) { if (iop.index == rop.index) { rop.text = CatOpText(iop.text, rop.text); rewrites.Set(i_1, null); // delete current insert continue; } if (iop.index >= rop.index && iop.index <= rop.lastIndex) { throw new ArgumentException("insert op " + iop + " within boundaries of previous " + rop); } } } // System.out.println("rewrites after="+rewrites); IDictionary m = new Dictionary(); for (int i_2 = 0; i_2 < rewrites.Count; i_2++) { TokenStreamRewriter.RewriteOperation op = rewrites[i_2]; if (op == null) { continue; } // ignore deleted ops if (m.Get(op.index) != null) { throw new Error("should only be one op per index"); } m.Put(op.index, op); } //System.out.println("index to op: "+m); return m; } protected internal virtual string CatOpText(object a, object b) { string x = string.Empty; string y = string.Empty; if (a != null) { x = a.ToString(); } if (b != null) { y = b.ToString(); } return x + y; } /// Get all operations before an index of a particular kind protected internal virtual IList GetKindOfOps(IList<_T1> rewrites, int before) where T:TokenStreamRewriter.RewriteOperation where _T1:TokenStreamRewriter.RewriteOperation { System.Type kind = typeof(T); IList ops = new List(); for (int i = 0; i < before && i < rewrites.Count; i++) { TokenStreamRewriter.RewriteOperation op = rewrites[i]; if (op == null) { continue; } // ignore deleted if (kind.IsInstanceOfType(op)) { ops.AddItem(kind.Cast(op)); } } return ops; } } }