diff --git a/Database/Strategy.cpp b/Database/Strategy.cpp index 942aec5..c54f072 100644 --- a/Database/Strategy.cpp +++ b/Database/Strategy.cpp @@ -47,7 +47,7 @@ Strategy::~Strategy() //however, this can be dealed due to several basic queries and linking bool -Strategy::handle(SPARQLquery& _query, ResultFilter* _result_filter) +Strategy::handle(SPARQLquery& _query) { #ifdef MULTI_INDEX Util::logging("IN GeneralEvaluation::handle"); @@ -117,7 +117,7 @@ Strategy::handle(SPARQLquery& _query, ResultFilter* _result_filter) //BETTER: use function pointer array in C++ class case 0: //default:filter by vstree and then verified by join - this->handler0(*iter, result_list, _result_filter); + this->handler0(*iter, result_list); break; case 1: this->handler1(*iter, result_list); @@ -160,7 +160,7 @@ Strategy::handle(SPARQLquery& _query, ResultFilter* _result_filter) } void -Strategy::handler0(BasicQuery* _bq, vector& _result_list, ResultFilter* _result_filter) +Strategy::handler0(BasicQuery* _bq, vector& _result_list) { //long before_filter = Util::get_cur_time(); cout << "this BasicQuery use query strategy 0" << endl; @@ -205,10 +205,6 @@ Strategy::handler0(BasicQuery* _bq, vector& _result_list, ResultFilte long tv_retrieve = Util::get_cur_time(); cout << "after Retrieve, used " << (tv_retrieve - tv_handle) << "ms." << endl; - //between retrieve and join - if (_result_filter != NULL) - _result_filter->candFilterWithResultHashTable(*_bq); - Join *join = new Join(kvstore, pre2num, this->limitID_predicate, this->limitID_literal,this->limitID_entity); join->join_basic(_bq); delete join; diff --git a/Database/Strategy.h b/Database/Strategy.h index a3f9917..923eaf6 100644 --- a/Database/Strategy.h +++ b/Database/Strategy.h @@ -17,7 +17,6 @@ #include "../Query/BasicQuery.h" #include "../KVstore/KVstore.h" #include "../VSTree/VSTree.h" -#include "../Query/ResultFilter.h" class Strategy { @@ -26,7 +25,7 @@ public: Strategy(KVstore*, VSTree*, TYPE_TRIPLE_NUM*, TYPE_PREDICATE_ID, TYPE_ENTITY_LITERAL_ID,TYPE_ENTITY_LITERAL_ID); ~Strategy(); //select efficient strategy to do the sparql query - bool handle(SPARQLquery&, ResultFilter* _result_filter = NULL); + bool handle(SPARQLquery&); private: int method; @@ -39,7 +38,7 @@ private: //NOTICE: even the ID type is int, it is no problem and no waste that we use unsigned in answer //(because -1, -2 or other invalid IDs can not be in answer) - void handler0(BasicQuery*, vector&, ResultFilter* _result_filter = NULL); + void handler0(BasicQuery*, vector&); void handler1(BasicQuery*, vector&); void handler2(BasicQuery*, vector&); void handler3(BasicQuery*, vector&); diff --git a/Query/GeneralEvaluation.cpp b/Query/GeneralEvaluation.cpp index ceb59e5..40e64f3 100644 --- a/Query/GeneralEvaluation.cpp +++ b/Query/GeneralEvaluation.cpp @@ -64,7 +64,10 @@ bool GeneralEvaluation::doQuery() printf("=================\n"); this->rewriting_evaluation_stack.clear(); - this->rewriting_evaluation_stack.push_back(this->query_tree.getGroupPattern()); + this->rewriting_evaluation_stack.push_back(EvaluationStackStruct()); + this->rewriting_evaluation_stack.back().grouppattern = this->query_tree.getGroupPattern(); + this->rewriting_evaluation_stack.back().sparql_query = NULL; + this->temp_result = this->rewritingBasedQueryEvaluation(0); } else @@ -2013,7 +2016,7 @@ void GeneralEvaluation::TempResultSet::print() GeneralEvaluation::TempResultSet* GeneralEvaluation::semanticBasedQueryEvaluation(QueryTree::GroupPattern &grouppattern) { - TempResultSet* result = new TempResultSet(); + TempResultSet *result = new TempResultSet(); grouppattern.initPatternBlockid(); @@ -2262,7 +2265,7 @@ bool GeneralEvaluation::expanseFirstOuterUnionGroupPattern(QueryTree::GroupPatte GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluation(int dep) { deque queue; - queue.push_back(this->rewriting_evaluation_stack[dep]); + queue.push_back(this->rewriting_evaluation_stack[dep].grouppattern); vector grouppattern_union; while (!queue.empty()) @@ -2276,8 +2279,8 @@ GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluati for (int i = 0; i < (int)grouppattern_union.size(); i++) { - this->rewriting_evaluation_stack[dep] = grouppattern_union[i]; - QueryTree::GroupPattern *grouppattern = &this->rewriting_evaluation_stack[dep]; + this->rewriting_evaluation_stack[dep].grouppattern = grouppattern_union[i]; + QueryTree::GroupPattern *grouppattern = &this->rewriting_evaluation_stack[dep].grouppattern; grouppattern->getVarset(); for (int j = 0; j < 80; j++) printf("="); printf("\n"); @@ -2287,7 +2290,6 @@ GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluati TempResultSet *sub_result = new TempResultSet(); QueryTree::GroupPattern triplepattern; - int grouppattern_triple_num = 0; for (int j = 0; j < (int)grouppattern->sub_grouppattern.size(); j++) if (grouppattern->sub_grouppattern[j].type == QueryTree::GroupPattern::SubGroupPattern::Pattern_type) { @@ -2296,34 +2298,16 @@ GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluati QueryTree::GroupPattern::Pattern::Element(grouppattern->sub_grouppattern[j].pattern.predicate.value), QueryTree::GroupPattern::Pattern::Element(grouppattern->sub_grouppattern[j].pattern.object.value) )); - grouppattern_triple_num++; } triplepattern.getVarset(); - for (int j = 0; j < dep; j++) - { - QueryTree::GroupPattern &parrent_grouppattern = this->rewriting_evaluation_stack[j]; - - for (int k = 0; k < (int)parrent_grouppattern.sub_grouppattern.size(); k++) - if (parrent_grouppattern.sub_grouppattern[k].type == QueryTree::GroupPattern::SubGroupPattern::Pattern_type) - if (triplepattern.grouppattern_subject_object_maximal_varset.hasCommonVar(parrent_grouppattern.sub_grouppattern[k].pattern.subject_object_varset)) - { - triplepattern.addOnePattern(QueryTree::GroupPattern::Pattern( - QueryTree::GroupPattern::Pattern::Element(parrent_grouppattern.sub_grouppattern[k].pattern.subject.value), - QueryTree::GroupPattern::Pattern::Element(parrent_grouppattern.sub_grouppattern[k].pattern.predicate.value), - QueryTree::GroupPattern::Pattern::Element(parrent_grouppattern.sub_grouppattern[k].pattern.object.value) - )); - } - } - triplepattern.getVarset(); - //get useful varset Varset useful = this->query_tree.getResultProjectionVarset() + this->query_tree.getGroupByVarset(); if (!this->query_tree.checkProjectionAsterisk()) { for (int j = 0; j < dep; j++) { - QueryTree::GroupPattern &parrent_grouppattern = this->rewriting_evaluation_stack[j]; + QueryTree::GroupPattern &parrent_grouppattern = this->rewriting_evaluation_stack[j].grouppattern; for (int k = 0; k < (int)parrent_grouppattern.sub_grouppattern.size(); k++) { @@ -2343,8 +2327,8 @@ GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluati } } - SPARQLquery sparql_query; - vector > encode_varset; + this->rewriting_evaluation_stack[dep].sparql_query = new SPARQLquery(); + this->rewriting_evaluation_stack[dep].encode_varset.clear(); //get connected block triplepattern.initPatternBlockid(); @@ -2363,21 +2347,19 @@ GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluati if (triplepattern.sub_grouppattern[j].type == QueryTree::GroupPattern::SubGroupPattern::Pattern_type) if (triplepattern.getRootPatternBlockID(j) == j) { - sparql_query.addBasicQuery(); + this->rewriting_evaluation_stack[dep].sparql_query->addBasicQuery(); Varset occur; for (int k = 0; k < (int)triplepattern.sub_grouppattern.size(); k++) if (triplepattern.sub_grouppattern[k].type == QueryTree::GroupPattern::SubGroupPattern::Pattern_type) if (triplepattern.getRootPatternBlockID(k) == j) { - sparql_query.addTriple(Triple( + this->rewriting_evaluation_stack[dep].sparql_query->addTriple(Triple( triplepattern.sub_grouppattern[k].pattern.subject.value, triplepattern.sub_grouppattern[k].pattern.predicate.value, triplepattern.sub_grouppattern[k].pattern.object.value )); - - if (j < grouppattern_triple_num) - occur += triplepattern.sub_grouppattern[k].pattern.varset; + occur += triplepattern.sub_grouppattern[k].pattern.varset; } //reduce return result vars @@ -2386,7 +2368,7 @@ GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluati else useful = occur; - encode_varset.push_back(useful.vars); + this->rewriting_evaluation_stack[dep].encode_varset.push_back(useful.vars); printf("select vars: "); useful.print(); @@ -2404,27 +2386,68 @@ GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluati } long tv_begin = Util::get_cur_time(); - sparql_query.encodeQuery(this->kvstore, encode_varset); + this->rewriting_evaluation_stack[dep].sparql_query->encodeQuery(this->kvstore, this->rewriting_evaluation_stack[dep].encode_varset); long tv_encode = Util::get_cur_time(); printf("after Encode, used %ld ms.\n", tv_encode - tv_begin); if (dep > 0) - this->strategy.handle(sparql_query, &this->result_filter); - else - this->strategy.handle(sparql_query); + { + SPARQLquery *this_sparql_query = this->rewriting_evaluation_stack[dep].sparql_query; + SPARQLquery *last_sparql_query = this->rewriting_evaluation_stack[dep - 1].sparql_query; + + for (int j = 0; j < this_sparql_query->getBasicQueryNum(); j++) + { + BasicQuery &this_basic_query = this_sparql_query->getBasicQuery(j); + vector &this_encode_varset = this->rewriting_evaluation_stack[dep].encode_varset[j]; + + for (int k = 0; k < last_sparql_query->getBasicQueryNum(); k++) + { + BasicQuery &last_basic_query = last_sparql_query->getBasicQuery(k); + vector &last_encode_varset = this->rewriting_evaluation_stack[dep - 1].encode_varset[k]; + + for (int p = 0; p < (int)this_encode_varset.size(); p++) + { + for (int q = 0; q < (int)last_encode_varset.size(); q++) + if (this_encode_varset[p] == last_encode_varset[q]) + { + vector &result = last_basic_query.getResultList(); + set result_set; + + for (int l = 0; l < (int)result.size(); l++) + result_set.insert(result[l][q]); + + vector result_vector; + result_vector.reserve(result_set.size()); + + for (set::iterator iter = result_set.begin(); iter != result_set.end(); iter++) + result_vector.push_back(*iter); + + this_basic_query.getCandidateList(p).copy(result_vector); + this_basic_query.setReady(p); + + printf("fill var %s CandidateList size %d\n", this_encode_varset[p].c_str(), (int)result_vector.size()); + } + } + } + } + } + long tv_fillcand = Util::get_cur_time(); + printf("after FillCand, used %ld ms.\n", tv_fillcand - tv_encode); + + this->strategy.handle(*this->rewriting_evaluation_stack[dep].sparql_query); long tv_handle = Util::get_cur_time(); - printf("after Handle, used %ld ms.\n", tv_handle - tv_encode); + printf("after Handle, used %ld ms.\n", tv_handle - tv_fillcand); //collect and join the result of each BasicQuery - for (int j = 0; j < sparql_query.getBasicQueryNum(); j++) + for (int j = 0; j < this->rewriting_evaluation_stack[dep].sparql_query->getBasicQueryNum(); j++) { TempResultSet *temp = new TempResultSet(); temp->results.push_back(TempResult()); - temp->results[0].id_varset = Varset(encode_varset[j]); - int varnum = (int)encode_varset[j].size(); + temp->results[0].id_varset = Varset(this->rewriting_evaluation_stack[dep].encode_varset[j]); + int varnum = (int)this->rewriting_evaluation_stack[dep].encode_varset[j].size(); - vector &basicquery_result = sparql_query.getBasicQuery(j).getResultList(); + vector &basicquery_result = this->rewriting_evaluation_stack[dep].sparql_query->getBasicQuery(j).getResultList(); int basicquery_result_num = (int)basicquery_result.size(); temp->results[0].result.reserve(basicquery_result_num); @@ -2494,24 +2517,17 @@ GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluati if (!sub_result->results[0].result.empty()) { - bool has_changed = false; - for (int j = 0; j < (int)grouppattern->sub_grouppattern.size(); j++) if (grouppattern->sub_grouppattern[j].type == QueryTree::GroupPattern::SubGroupPattern::Optional_type) { - if (!has_changed) - { - this->result_filter.changeResultHashTable(sparql_query, 1); - has_changed = true; - } - if ((int)this->rewriting_evaluation_stack.size() == dep + 1) { - this->rewriting_evaluation_stack.push_back(QueryTree::GroupPattern()); - grouppattern = &this->rewriting_evaluation_stack[dep]; + this->rewriting_evaluation_stack.push_back(EvaluationStackStruct()); + this->rewriting_evaluation_stack.back().sparql_query = NULL; + grouppattern = &this->rewriting_evaluation_stack[dep].grouppattern; } - this->rewriting_evaluation_stack[dep + 1] = grouppattern->sub_grouppattern[j].optional; + this->rewriting_evaluation_stack[dep + 1].grouppattern = grouppattern->sub_grouppattern[j].optional; TempResultSet *temp = rewritingBasedQueryEvaluation(dep + 1); @@ -2525,9 +2541,6 @@ GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluati sub_result = new_result; } - - if (has_changed) - this->result_filter.changeResultHashTable(sparql_query, -1); } for (int j = 0; j < (int)grouppattern->sub_grouppattern.size(); j++) @@ -2562,6 +2575,9 @@ GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluati result = new_result; } + + delete this->rewriting_evaluation_stack[dep].sparql_query; + this->rewriting_evaluation_stack[dep].sparql_query = NULL; } return result; diff --git a/Query/GeneralEvaluation.h b/Query/GeneralEvaluation.h index 5f086fd..ea9f441 100644 --- a/Query/GeneralEvaluation.h +++ b/Query/GeneralEvaluation.h @@ -22,7 +22,6 @@ #include "QueryTree.h" #include "Varset.h" #include "RegexExpression.h" -#include "ResultFilter.h" #include "../Util/Triple.h" class GeneralEvaluation @@ -34,7 +33,6 @@ class GeneralEvaluation KVstore *kvstore; StringIndex *stringindex; Strategy strategy; - ResultFilter result_filter; TYPE_TRIPLE_NUM *pre2num; TYPE_PREDICATE_ID limitID_predicate; @@ -194,8 +192,15 @@ class GeneralEvaluation }; private: - TempResultSet* temp_result; - std::vector rewriting_evaluation_stack; + TempResultSet *temp_result; + + struct EvaluationStackStruct + { + QueryTree::GroupPattern grouppattern; + SPARQLquery *sparql_query; + vector > encode_varset; + }; + std::vector rewriting_evaluation_stack; public: TempResultSet* semanticBasedQueryEvaluation(QueryTree::GroupPattern &grouppattern); diff --git a/StringIndex/StringIndex.cpp b/StringIndex/StringIndex.cpp index bc593a8..ffe17a3 100644 --- a/StringIndex/StringIndex.cpp +++ b/StringIndex/StringIndex.cpp @@ -97,11 +97,6 @@ bool StringIndexFile::randomAccess(unsigned id, string *str) this->buffer[length] = '\0'; *str = string(this->buffer); - //cout<<"check: read from string index - "<request.size(); i++) + { + if (min_begin == -1) + min_begin = this->request[i].offset; + else + min_begin = min(min_begin, this->request[i].offset); + max_end = max(max_end, this->request[i].offset + long(this->request[i].length)); + } if (this->type == Entity) cout << "Entity StringIndex "; @@ -123,37 +125,43 @@ void StringIndexFile::trySequenceAccess() cout << "Literal StringIndex "; if (this->type == Predicate) cout << "Predicate StringIndex "; - if (max_end / (long)1e5 < (long)this->request.size()) + + if ((max_end - min_begin) / 800000L < (long)this->request.size()) { cout << "sequence access." << endl; sort(this->request.begin(), this->request.end()); int pos = 0; - fseek(this->value_file, 0, SEEK_SET); char *block = new char[MAX_BLOCK_SIZE]; - long current_block_begin = 0; + + long current_block_begin = min_begin; + fseek(this->value_file, current_block_begin, SEEK_SET); + while (current_block_begin < max_end) { long current_block_end = min(current_block_begin + MAX_BLOCK_SIZE, max_end); + + if (current_block_end <= this->request[pos].offset) + { + current_block_begin = this->request[pos].offset; + fseek(this->value_file, current_block_begin, SEEK_SET); + current_block_end = min(current_block_begin + MAX_BLOCK_SIZE, max_end); + } + fread(block, sizeof(char), current_block_end - current_block_begin, this->value_file); while (pos < (int)this->request.size()) { long offset = this->request[pos].offset; long length = this->request[pos].length; - if (offset >= current_block_end) - break; + if (current_block_begin <= offset && offset + length <= current_block_end) { allocBuffer(length); memcpy(this->buffer, &block[offset - current_block_begin], length); this->buffer[length] = '\0'; *this->request[pos].str = string(this->buffer); - //if(string(this->buffer) == "") - //{ - //cout<<"Error in StringIndex"<buffer, &block[offset - current_block_begin], length); this->buffer[length] = '\0'; *this->request[pos].str = string(this->buffer); - //if(string(this->buffer) == "") - //{ - //cout<<"Error in StringIndex"<request.size() && this->request[pos - 1].offset == this->request[pos].offset) { *this->request[pos].str = *this->request[pos - 1].str; - //if(*this->request[pos].str == "") - //{ - //cout<<"Error in StringIndex"<buffer, block, length); this->buffer[length] = '\0'; *this->request[pos].str += string(this->buffer); - //if(*this->request[pos].str == "") - //{ - //cout<<"Error in StringIndex"<type == Predicate) - //{ - //if (id < 0) return; - //} - //else - //{ - //if (id == INVALID) return; - //} if (this->num <= id) { @@ -271,14 +259,6 @@ void StringIndexFile::disable(unsigned id) { //DEBUG: for predicate, -1 when invalid if (id >= this->num) return ; - //if(this->type == Predicate) - //{ - //if (id < 0 || id >= this->num) return ; - //} - //else - //{ - //if (id == INVALID) return; - //} this->index_table[id] = IndexInfo(); @@ -346,10 +326,6 @@ bool StringIndex::randomAccess(unsigned id, string *str, bool is_entity_or_liter { return true; } - //else - //{ - //cout<<"check: not found in string buffer - "<literal.randomAccess(id - Util::LITERAL_FIRST_ID, str); } } diff --git a/makefile b/makefile index 0339276..e4fc0d2 100644 --- a/makefile +++ b/makefile @@ -78,7 +78,7 @@ kvstoreobj = $(objdir)KVstore.o $(sitreeobj) $(istreeobj) $(ivtreeobj) #$(sstree utilobj = $(objdir)Util.o $(objdir)Bstr.o $(objdir)Stream.o $(objdir)Triple.o $(objdir)BloomFilter.o $(objdir)VList.o queryobj = $(objdir)SPARQLquery.o $(objdir)BasicQuery.o $(objdir)ResultSet.o $(objdir)IDList.o \ - $(objdir)Varset.o $(objdir)QueryTree.o $(objdir)ResultFilter.o $(objdir)GeneralEvaluation.o + $(objdir)Varset.o $(objdir)QueryTree.o $(objdir)GeneralEvaluation.o signatureobj = $(objdir)SigEntry.o $(objdir)Signature.o @@ -277,7 +277,7 @@ $(objdir)Join.o: Database/Join.cpp Database/Join.h $(objdir)IDList.o $(objdir)Ba $(CC) $(CFLAGS) Database/Join.cpp $(inc) -o $(objdir)Join.o $(objdir)Strategy.o: Database/Strategy.cpp Database/Strategy.h $(objdir)SPARQLquery.o $(objdir)BasicQuery.o \ - $(objdir)Triple.o $(objdir)IDList.o $(objdir)KVstore.o $(objdir)VSTree.o $(objdir)Util.o $(objdir)Join.o $(objdir)ResultFilter.o + $(objdir)Triple.o $(objdir)IDList.o $(objdir)KVstore.o $(objdir)VSTree.o $(objdir)Util.o $(objdir)Join.o $(CC) $(CFLAGS) Database/Strategy.cpp $(inc) -o $(objdir)Strategy.o #objects in Database/ end @@ -303,12 +303,9 @@ $(objdir)Varset.o: Query/Varset.cpp Query/Varset.h $(objdir)QueryTree.o: Query/QueryTree.cpp Query/QueryTree.h $(objdir)Varset.o $(CC) $(CFLAGS) Query/QueryTree.cpp $(inc) -o $(objdir)QueryTree.o -$(objdir)ResultFilter.o: Query/ResultFilter.cpp Query/ResultFilter.h $(objdir)BasicQuery.o $(objdir)SPARQLquery.o $(objdir)Util.o - $(CC) $(CFLAGS) Query/ResultFilter.cpp $(inc) -o $(objdir)ResultFilter.o - #no more using $(objdir)Database.o $(objdir)GeneralEvaluation.o: Query/GeneralEvaluation.cpp Query/GeneralEvaluation.h $(objdir)QueryParser.o $(objdir)QueryTree.o \ - $(objdir)SPARQLquery.o $(objdir)Varset.o $(objdir)KVstore.o $(objdir)ResultFilter.o $(objdir)Strategy.o $(objdir)StringIndex.o + $(objdir)SPARQLquery.o $(objdir)Varset.o $(objdir)KVstore.o $(objdir)Strategy.o $(objdir)StringIndex.o $(CC) $(CFLAGS) Query/GeneralEvaluation.cpp $(inc) -o $(objdir)GeneralEvaluation.o #objects in Query/ end