Overhaul of the TokenStreamRewriter class.

Fixes bug #49.
This commit is contained in:
Mike Lischke 2016-11-16 15:17:46 +01:00
parent e5aa03c0b0
commit 9ac9c1c79a
2 changed files with 46 additions and 70 deletions

View File

@ -33,12 +33,10 @@
#include "misc/Interval.h"
#include "Token.h"
#include "TokenStream.h"
#include "support/CPPUtils.h"
#include "TokenStreamRewriter.h"
using namespace antlr4;
using namespace antlrcpp;
using antlr4::misc::Interval;
@ -113,7 +111,7 @@ void TokenStreamRewriter::ReplaceOp::InitializeInstanceFields() {
const std::string TokenStreamRewriter::DEFAULT_PROGRAM_NAME = "default";
TokenStreamRewriter::TokenStreamRewriter(TokenStream *tokens) : tokens(tokens) {
_programs.insert({ DEFAULT_PROGRAM_NAME, std::vector<RewriteOperation*>(PROGRAM_INIT_SIZE) });
_programs[DEFAULT_PROGRAM_NAME].reserve(PROGRAM_INIT_SIZE);
}
TokenStreamRewriter::~TokenStreamRewriter() {
@ -254,17 +252,16 @@ void TokenStreamRewriter::setLastRewriteTokenIndex(const std::string &programNam
}
std::vector<TokenStreamRewriter::RewriteOperation*>& TokenStreamRewriter::getProgram(const std::string &name) {
std::vector<TokenStreamRewriter::RewriteOperation*> &is = _programs[name];
if (is.empty()) {
is = initializeProgram(name);
auto iterator = _programs.find(name);
if (iterator == _programs.end()) {
return initializeProgram(name);
}
return is;
return iterator->second;
}
std::vector<TokenStreamRewriter::RewriteOperation*> TokenStreamRewriter::initializeProgram(const std::string &name) {
std::vector<TokenStreamRewriter::RewriteOperation*> is(PROGRAM_INIT_SIZE);
_programs.insert({ name, is });
return is;
std::vector<TokenStreamRewriter::RewriteOperation*>& TokenStreamRewriter::initializeProgram(const std::string &name) {
_programs[name].reserve(PROGRAM_INIT_SIZE);
return _programs[name];
}
std::string TokenStreamRewriter::getText() {
@ -339,16 +336,12 @@ std::unordered_map<size_t, TokenStreamRewriter::RewriteOperation*> TokenStreamRe
// WALK REPLACES
for (size_t i = 0; i < rewrites.size(); ++i) {
TokenStreamRewriter::RewriteOperation *op = rewrites[i];
if (op == nullptr) {
ReplaceOp *rop = dynamic_cast<ReplaceOp *>(op);
if (rop == nullptr)
continue;
}
if (!is<ReplaceOp *>(op)) {
continue;
}
ReplaceOp *rop = static_cast<ReplaceOp*>(op);
// Wipe prior inserts within range
InsertBeforeOp *type = nullptr;
std::vector<InsertBeforeOp *> inserts = getKindOfOps(rewrites, type, i);
std::vector<InsertBeforeOp *> inserts = getKindOfOps<InsertBeforeOp>(rewrites, i);
for (auto iop : inserts) {
if (iop->index == rop->index) {
// E.g., insert before 2, delete 2..2; update replace
@ -364,8 +357,7 @@ std::unordered_map<size_t, TokenStreamRewriter::RewriteOperation*> TokenStreamRe
}
}
// Drop any prior replaces contained within
ReplaceOp* type2 = nullptr;
std::vector<ReplaceOp*> prevReplaces = getKindOfOps(rewrites, type2, i);
std::vector<ReplaceOp*> prevReplaces = getKindOfOps<ReplaceOp>(rewrites, i);
for (auto prevRop : prevReplaces) {
if (prevRop->index >= rop->index && prevRop->lastIndex <= rop->lastIndex) {
// delete replace as it's a no-op.
@ -395,16 +387,13 @@ std::unordered_map<size_t, TokenStreamRewriter::RewriteOperation*> TokenStreamRe
// WALK INSERTS
for (size_t i = 0; i < rewrites.size(); i++) {
RewriteOperation *op = rewrites[i];
if (op == nullptr) {
InsertBeforeOp *iop = dynamic_cast<InsertBeforeOp *>(rewrites[i]);
if (iop == nullptr)
continue;
}
if (!is<InsertBeforeOp*>(op)) {
continue;
}
InsertBeforeOp *iop = static_cast<InsertBeforeOp*>(rewrites[i]);
// combine current insert with prior if any at same index
std::vector<InsertBeforeOp*> prevInserts = getKindOfOps(rewrites, iop, i);
std::vector<InsertBeforeOp *> prevInserts = getKindOfOps<InsertBeforeOp>(rewrites, i);
for (auto prevIop : prevInserts) {
if (prevIop->index == iop->index) { // combine objects
// convert to strings...we're in process of toString'ing
@ -416,8 +405,7 @@ std::unordered_map<size_t, TokenStreamRewriter::RewriteOperation*> TokenStreamRe
}
}
// look for replaces where iop.index is in range; error
ReplaceOp *type = nullptr;
std::vector<ReplaceOp*> prevReplaces = getKindOfOps(rewrites, type, i);
std::vector<ReplaceOp*> prevReplaces = getKindOfOps<ReplaceOp>(rewrites, i);
for (auto rop : prevReplaces) {
if (iop->index == rop->index) {
rop->text = catOpText(&iop->text, &rop->text);

View File

@ -122,17 +122,14 @@ namespace antlr4 {
virtual void rollback(size_t instructionIndex);
/// <summary>
/// Rollback the instruction stream for a program so that
/// the indicated instruction (via instructionIndex) is no
/// longer in the stream. UNTESTED!
/// </summary>
/// the indicated instruction (via instructionIndex) is no
/// longer in the stream. UNTESTED!
virtual void rollback(const std::string &programName, size_t instructionIndex);
virtual void deleteProgram();
/// <summary>
/// Reset the program so that no instructions exist </summary>
/// Reset the program so that no instructions exist.
virtual void deleteProgram(const std::string &programName);
virtual void insertAfter(Token *t, const std::string& text);
virtual void insertAfter(size_t index, const std::string& text);
@ -161,7 +158,7 @@ namespace antlr4 {
virtual size_t getLastRewriteTokenIndex();
/// Return the text from the original tokens altered per the
/// instructions given to this rewriter.
/// instructions given to this rewriter.
virtual std::string getText();
/** Return the text from the original tokens altered per the
@ -169,46 +166,40 @@ namespace antlr4 {
*/
std::string getText(std::string programName);
/// <summary>
/// Return the text associated with the tokens in the interval from the
/// original token stream but with the alterations given to this rewriter.
/// The interval refers to the indexes in the original token stream.
/// We do not alter the token stream in any way, so the indexes
/// and intervals are still consistent. Includes any operations done
/// to the first and last token in the interval. So, if you did an
/// insertBefore on the first token, you would get that insertion.
/// The same is true if you do an insertAfter the stop token.
/// </summary>
/// original token stream but with the alterations given to this rewriter.
/// The interval refers to the indexes in the original token stream.
/// We do not alter the token stream in any way, so the indexes
/// and intervals are still consistent. Includes any operations done
/// to the first and last token in the interval. So, if you did an
/// insertBefore on the first token, you would get that insertion.
/// The same is true if you do an insertAfter the stop token.
virtual std::string getText(const misc::Interval &interval);
virtual std::string getText(const std::string &programName, const misc::Interval &interval);
protected:
class RewriteOperation {
private:
TokenStreamRewriter *const outerInstance;
public:
/// What index into rewrites List are we?
virtual ~RewriteOperation() {};
/// Token buffer index.
size_t index;
std::string text;
RewriteOperation(TokenStreamRewriter *outerInstance, size_t index);
RewriteOperation(TokenStreamRewriter *outerInstance, size_t index, const std::string& text);
/// Execute the rewrite operation by possibly adding to the buffer.
/// Return the index of the next token to operate on.
/// Token buffer index.
size_t instructionIndex;
virtual size_t execute(std::string *buf);
RewriteOperation(TokenStreamRewriter *outerInstance, size_t index);
RewriteOperation(TokenStreamRewriter *outerInstance, size_t index, const std::string& text);
virtual ~RewriteOperation() {};
/// Execute the rewrite operation by possibly adding to the buffer.
/// Return the index of the next token to operate on.
virtual size_t execute(std::string *buf);
virtual std::string toString();
private:
TokenStreamRewriter *const outerInstance;
void InitializeInstanceFields();
};
@ -306,25 +297,22 @@ namespace antlr4 {
virtual std::string catOpText(std::string *a, std::string *b);
/// <summary>
/// Get all operations before an index of a particular kind </summary>
template <typename T, typename T1>
std::vector<T*> getKindOfOps(std::vector<T1*> rewrites, T * /*kind*/, size_t before) {
std::vector<T*> ops = std::vector<T*>();
/// Get all operations before an index of a particular kind.
template <typename T>
std::vector<T *> getKindOfOps(std::vector<RewriteOperation *> rewrites, size_t before) {
std::vector<T *> ops;
for (size_t i = 0; i < before && i < rewrites.size(); i++) {
TokenStreamRewriter::RewriteOperation *op = dynamic_cast<RewriteOperation*>(rewrites[i]);
if (op == nullptr) { // ignore deleted
T *op = dynamic_cast<T *>(rewrites[i]);
if (op == nullptr) { // ignore deleted or non matching entries
continue;
}
if (op != nullptr) {
ops.push_back(dynamic_cast<T*>(op));
}
ops.push_back(op);
}
return ops;
}
private:
std::vector<RewriteOperation*> initializeProgram(const std::string &name);
std::vector<RewriteOperation *>& initializeProgram(const std::string &name);
};