diff --git a/Database/Database.cpp b/Database/Database.cpp index 3f93e9b..c2f875e 100644 --- a/Database/Database.cpp +++ b/Database/Database.cpp @@ -602,10 +602,13 @@ Database::encodeRDF_new(const string _rdf_file) * objID 2 _list */ //this->o2s_o2ps_op2s(_p_id_tuples, _id_tuples_max); - this->s2p_s2po_sp2o(_p_id_tuples, _id_tuples_max); + //this->s2p_s2po_sp2o(_p_id_tuples, _id_tuples_max); + this->s2p_s2o_s2po_sp2o(_p_id_tuples, _id_tuples_max); this->o2p_o2s_o2ps_op2s(_p_id_tuples, _id_tuples_max); this->p2s_p2o_p2so(_p_id_tuples, _id_tuples_max); - this->so2p_s2o(_p_id_tuples, _id_tuples_max); + //WARN:thsi is too costly because s-o key num is too large + //100G+ for DBpedia2014 + //this->so2p_s2o(_p_id_tuples, _id_tuples_max); bool flag = this->saveDBInfoFile(); if (!flag) @@ -1438,15 +1441,21 @@ Database::o2s_o2ps_op2s(int** _p_id_tuples, int _id_tuples_max) //NOTICE: below are the the new ones bool -Database::s2p_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max) +Database::s2p_s2o_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max) { qsort(_p_id_tuples, this->triples_num, sizeof(int*), Database:: _spo_cmp); + + int* _oidlist_s = NULL; int* _pidlist_s = NULL; int* _oidlist_sp = NULL; int* _pidoidlist_s = NULL; + + int _oidlist_s_len = 0; int _pidlist_s_len = 0; int _oidlist_sp_len = 0; int _pidoidlist_s_len = 0; + + // only _oidlist_s will be assigned with space, _oidlist_sp is always a part of _oidlist_s, just a pointer is enough int _pidlist_max = 0; int _pidoidlist_max = 0; int _oidlist_max = 0; @@ -1460,6 +1469,7 @@ Database::s2p_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max) Util::logging("finish s2p_sp2o_s2po initial"); + (this->kvstore)->open_subID2objIDlist(KVstore::CREATE_MODE); (this->kvstore)->open_subID2preIDlist(KVstore::CREATE_MODE); (this->kvstore)->open_subIDpreID2objIDlist(KVstore::CREATE_MODE); (this->kvstore)->open_subID2preIDobjIDlist(KVstore::CREATE_MODE); @@ -1474,6 +1484,17 @@ Database::s2p_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max) _pidlist_s = new int[_pidlist_max]; _pidlist_s_len = 0; //pidoidlist + //_pidoidlist_max = 1000 * 2; + //_pidoidlist_s = new int[_pidoidlist_max]; + //_pidoidlist_s_len = 0; + + //oidlist + _oidlist_max = 1000; + _oidlist_s = new int[_oidlist_max]; + _oidlist_sp = _oidlist_s; + _oidlist_s_len = 0; + _oidlist_sp_len = 0; + /* pidoidlist */ _pidoidlist_max = 1000 * 2; _pidoidlist_s = new int[_pidoidlist_max]; _pidoidlist_s_len = 0; @@ -1496,15 +1517,26 @@ Database::s2p_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max) _pidlist_s = _new_pidlist_s; } + //enlarge the space when needed + if(_oidlist_s_len == _oidlist_max) + { + _oidlist_max *= 10; + int * _new_oidlist_s = new int[_oidlist_max]; + memcpy(_new_oidlist_s, _oidlist_s, sizeof(int) * _oidlist_s_len); + /* (_oidlist_sp-_oidlist_s) is the offset of _oidlist_sp */ + _oidlist_sp = _new_oidlist_s + (_oidlist_sp-_oidlist_s); + delete[] _oidlist_s; + _oidlist_s = _new_oidlist_s; + } //enalrge the space when needed - if(_oidlist_sp_len == _oidlist_max) - { - _oidlist_max *= 10; - int* _new_oidlist_sp = new int[_oidlist_max]; - memcpy(_new_oidlist_sp, _oidlist_sp, sizeof(int) * _oidlist_sp_len); - delete[] _oidlist_sp; - _oidlist_sp = _new_oidlist_sp; - } + //if(_oidlist_sp_len == _oidlist_max) + //{ + // _oidlist_max *= 10; + // int* _new_oidlist_sp = new int[_oidlist_max]; + // memcpy(_new_oidlist_sp, _oidlist_sp, sizeof(int) * _oidlist_sp_len); + // delete[] _oidlist_sp; + // _oidlist_sp = _new_oidlist_sp; + //} //enlarge the space when needed if(_pidoidlist_s_len == _pidoidlist_max) @@ -1525,6 +1557,14 @@ Database::s2p_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max) // Util::logging(_ss.str()); // } + _oidlist_s[_oidlist_s_len] = _obj_id; + if(_sub_pre_change) + { + _oidlist_sp = _oidlist_s + _oidlist_s_len; + } + _oidlist_s_len ++; + _oidlist_sp_len ++; + //add objid to list _oidlist_sp[_oidlist_sp_len++] = _obj_id; @@ -1550,7 +1590,8 @@ Database::s2p_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max) _pidlist_s[_pidlist_s_len++] = _pre_id; (this->kvstore)->setobjIDlistBysubIDpreID(_sub_id, _pre_id, _oidlist_sp, _oidlist_sp_len); - delete[] _oidlist_sp; + //if not use s2o memory + //delete[] _oidlist_sp; _oidlist_sp = NULL; _oidlist_sp_len = 0; } @@ -1566,6 +1607,12 @@ Database::s2p_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max) delete[] _pidoidlist_s; _pidoidlist_s = NULL; _pidoidlist_s_len = 0; + + Util::sort(_oidlist_s, _oidlist_s_len); + (this->kvstore)->setobjIDlistBysubID(_sub_id, _oidlist_s, _oidlist_s_len); + delete[] _oidlist_s; + _oidlist_s = NULL; + _oidlist_s_len = 0; } }//end for( 0 to this->triple_num) diff --git a/Database/Database.h b/Database/Database.h index c4cdeef..1a901ee 100644 --- a/Database/Database.h +++ b/Database/Database.h @@ -132,7 +132,8 @@ private: bool s2o_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max); bool o2s_o2ps_op2s(int** _p_id_tuples, int _id_tuples_max); //NOTICE: below is the new one - bool s2p_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max); + //bool s2p_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max); + bool s2p_s2o_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max); bool o2p_o2s_o2ps_op2s(int** _p_id_tuples, int _id_tuples_max); bool p2s_p2o_p2so(int** _p_id_tuples, int _id_tuples_max); bool so2p_s2o(int** _p_id_tuples, int _id_tuples_max); diff --git a/Database/Join.cpp b/Database/Join.cpp index 1466046..2e4e509 100644 --- a/Database/Join.cpp +++ b/Database/Join.cpp @@ -154,9 +154,9 @@ Join::join_basic(BasicQuery* _basic_query) this->init(_basic_query); long begin = Util::get_cur_time(); bool ret1 = this->filter_before_join(); - long after_filter = Util::get_cur_time(); + long after_constant_filter = Util::get_cur_time(); //fprintf(stderr, "after filter_before_join: used %ld ms\n", after_filter - begin); - cerr<<"after filter_before_join: used "<<(after_filter-begin)<<" ms"<clear(); @@ -165,11 +165,25 @@ Join::join_basic(BasicQuery* _basic_query) this->add_literal_candidate(); long after_add_literal = Util::get_cur_time(); - cerr<<"after add_literal_candidate: used "<<(after_add_literal - after_filter)<<" ms"<join(); + bool ret2 = this->allFilterByPres(); + long after_pre_filter = Util::get_cur_time(); + cerr<<"after allFilterByPres: used "<<(after_pre_filter - after_add_literal)<<" ms"<clear(); + return false; + } + + bool ret3 = this->join(); long after_joinbasic = Util::get_cur_time(); - cerr<<"after join_basic: used "<<(after_joinbasic - after_add_literal)<<" ms"<clear(); + return false; + } //NOTICE:we do pre_var_handler first, and generate all satellites when coping to result list // @@ -184,7 +198,7 @@ Join::join_basic(BasicQuery* _basic_query) //2. generate candidates for satellites first using sp2o or s2o(op2s or o2s), later filtered by pre vars //the generating process had better been placed at the final, just before copying result this->pre_var_handler(); - + //TODO+BETTER:maybe also reduce to empty, return false long after_pre_var = Util::get_cur_time(); cerr<<"after pre var: used "<<(after_pre_var-after_joinbasic)<<" ms"<kvstore->getEntityByID((*it)[this->id2pos[var1]])<kvstore->getEntityByID((*it)[this->id2pos[var2]])<kvstore->getpreIDlistBysubIDobjID((*it)[this->id2pos[var1]], (*it)[this->id2pos[var2]], id_list, id_list_len); + //this->kvstore->getpreIDlistBysubIDobjID((*it)[this->id2pos[var1]], (*it)[this->id2pos[var2]], id_list, id_list_len); + int sid = (*it)[this->id2pos[var1]], oid = (*it)[this->id2pos[var2]]; +#ifdef SO2P + this->kvstore->getpreIDlistBysubIDobjID(sid, oid, id_list, id_list_len); +#else + int *list1 = NULL, *list2 = NULL; + int len1 = 0, len2 = 0; + this->kvstore->getpreIDlistBysubID(sid, list1, len1); + this->kvstore->getpreIDlistByobjID(oid, list2, len2); + Util::intersect(id_list, id_list_len, list1, len1, list2, len2); +#endif //NOTICE:no need to add literals here because they are added when join using s2o } } @@ -352,7 +376,17 @@ Join::pre_var_handler() else if(sub_id != -1 && obj_id != -1) { //just use so2p in query graph to find predicates - this->kvstore->getpreIDlistBysubIDobjID(sub_id, obj_id, id_list, id_list_len); + //this->kvstore->getpreIDlistBysubIDobjID(sub_id, obj_id, id_list, id_list_len); + int sid = sub_id, oid = obj_id; +#ifdef SO2P + this->kvstore->getpreIDlistBysubIDobjID(sid, oid, id_list, id_list_len); +#else + int *list1 = NULL, *list2 = NULL; + int len1 = 0, len2 = 0; + this->kvstore->getpreIDlistBysubID(sid, list1, len1); + this->kvstore->getpreIDlistByobjID(oid, list2, len2); + Util::intersect(id_list, id_list_len, list1, len1, list2, len2); +#endif } //sub is var while obj is constant else if(sub_id == -1 && obj_id != -1) @@ -364,6 +398,16 @@ Join::pre_var_handler() else { this->kvstore->getpreIDlistBysubIDobjID((*it)[this->id2pos[var1]], obj_id, id_list, id_list_len); + int sid = (*it)[this->id2pos[var1]], oid = obj_id; +#ifdef SO2P + this->kvstore->getpreIDlistBysubIDobjID(sid, oid, id_list, id_list_len); +#else + int *list1 = NULL, *list2 = NULL; + int len1 = 0, len2 = 0; + this->kvstore->getpreIDlistBysubID(sid, list1, len1); + this->kvstore->getpreIDlistByobjID(oid, list2, len2); + Util::intersect(id_list, id_list_len, list1, len1, list2, len2); +#endif } } //sub is constant while obj is var @@ -376,7 +420,17 @@ Join::pre_var_handler() else { //NOTICE:no need to add literals here because they are added in add_literal_candidate using s2o - this->kvstore->getpreIDlistBysubIDobjID(sub_id, (*it)[this->id2pos[var2]], id_list, id_list_len); + //this->kvstore->getpreIDlistBysubIDobjID(sub_id, (*it)[this->id2pos[var2]], id_list, id_list_len); + int sid = sub_id, oid = (*it)[this->id2pos[var2]]; +#ifdef SO2P + this->kvstore->getpreIDlistBysubIDobjID(sid, oid, id_list, id_list_len); +#else + int *list1 = NULL, *list2 = NULL; + int len1 = 0, len2 = 0; + this->kvstore->getpreIDlistBysubID(sid, list1, len1); + this->kvstore->getpreIDlistByobjID(oid, list2, len2); + Util::intersect(id_list, id_list_len, list1, len1, list2, len2); +#endif } } @@ -1097,7 +1151,7 @@ Join::multi_join() // //NOTICE:this should be done just once, so use it before pushing candidates //pruning the original candidates first(satellites only concerned with itself) - this->allFilterBySatellites(this->start_id); + //this->filterBySatellites(this->start_id); IDList& start_table = this->basic_query->getCandidateList(this->start_id); int start_size = this->basic_query->getCandidateSize(this->start_id); @@ -1156,7 +1210,7 @@ Join::multi_join() //fprintf(stderr, "the next node id to join: %d\n", id2); cerr<<"the next node id to join: "<allFilterBySatellites(id2); + //this->filterBySatellites(id2); #ifdef DEBUG_JOIN cerr<<"the start size "<basic_query->getCandidateSize(id2)<var_num; ++i) + { + if(this->basic_query->isSatelliteInJoin(i)) + continue; + if(this->filterBySatellites(i) == false) + return false; + } + return true; +} + +bool //false when no result for this basicquery +Join::filterBySatellites(int _var) +{ + IDList& cans = this->basic_query->getCandidateList(_var); + int size = this->basic_query->getCandidateSize(_var); + //cerr << "var " << "_var " << "size after pre_filter " << cans.size() <basic_query->getVarDegree(_var); vector in_edge_pre_id; vector out_edge_pre_id; @@ -2121,75 +2194,218 @@ Join::allFilterBySatellites(int _var) return true; } + //QUERY:maybe we can divide edges into two separate groups according to the size of p2s + //NOTICE+BETTER: the cost should be due to the cans size, p2s size and s2p size + //generally, size of p2s is larger than s2p, but smaller than size of cans + //The best way is to extract the features of dataset and keep + //but we may use a simple strategy here: use p2s if cans size is too large, i.e. > size of p2s + //(assuming 5000 here) + //WARN:different edge may corresponding different size of subjects, like is too large + //QUERY: erase is too costly, use an invalid[] array, maybe bitset due to large candidates size //only consider valid ones when join loop, but how about intersect and union? // //we build a new idlist with all valid ones, and update to the original idlist //(consider in current_table is not good, too many duplicates) - vector valid_idlist; - IDList& cans = this->basic_query->getCandidateList(_var); - int size = this->basic_query->getCandidateSize(_var); - for(int i = 0; i < size; ++i) - { - int ele = cans[i]; - int* list = NULL; - int list_len = 0; - bool exist_preid = true; + IDList* valid_list = NULL; + int *list = NULL; + int len = 0; - if(exist_preid && !in_edge_pre_id.empty()) - { - //(this->kvstore)->getpreIDsubIDlistByobjID(entity_id, pair_list, pair_len); - (this->kvstore)->getpreIDlistByobjID(ele, list, list_len); + if(!in_edge_pre_id.empty()) + { + int size2 = in_edge_pre_id.size(); + for(int i = 0; i < size2; ++i) + { + int preid = in_edge_pre_id[i]; + this->kvstore->getobjIDlistBypreID(preid, list, len); + if(i == 0) + { + if(size > len) + { + valid_list = IDList::intersect(cans, list, len); + } + else + { + valid_list = new IDList; + int* list2 = NULL; + int len2 = 0; + for(int j = 0; j < size; ++j) + { + this->kvstore->getpreIDlistByobjID(cans[j], list2, len2); + if(Util::bsearch_int_uporder(preid, list2, len2) != -1) + { + valid_list->addID(cans[j]); + } + delete[] list2; + } + } + } + else + { + if(valid_list->size() > len) + { + valid_list->intersectList(list, len); + } + else + { + int* list2 = NULL; + int len2 = 0; + IDList* new_list = new IDList; + int size3 = valid_list->size(); + for(int j = 0; j < size3; ++j) + { + this->kvstore->getpreIDlistByobjID(valid_list->getID(j), list2, len2); + if(Util::bsearch_int_uporder(preid, list2, len2) != -1) + { + new_list->addID(cans[j]); + } + delete[] list2; + } + delete valid_list; + valid_list = new_list; + } + } + delete[] list; + } + } - for(vector::iterator itr_pre = in_edge_pre_id.begin(); itr_pre != in_edge_pre_id.end(); itr_pre++) - { - int pre_id = (*itr_pre); - //the return value is pos, -1 if not found - if(Util::bsearch_int_uporder(pre_id, list, list_len) == -1) - exist_preid = false; - if(!exist_preid) - { - break; - } - } - delete[] list; - } + if(!is_literal_var(_var) && valid_list != NULL && valid_list->empty()) + { + //cerr << "quit when empty in edge"<kvstore->getsubIDlistBypreID(preid, list, len); + //cerr<<"p2s len "< len) + { + valid_list = IDList::intersect(cans, list, len); + } + else + { + valid_list = new IDList; + int* list2 = NULL; + int len2 = 0; + for(int j = 0; j < size; ++j) + { + this->kvstore->getpreIDlistBysubID(cans[j], list2, len2); + if(Util::bsearch_int_uporder(preid, list2, len2) != -1) + { + valid_list->addID(cans[j]); + } + delete[] list2; + } + } + } + else + { + if(valid_list->size() > len) + { + valid_list->intersectList(list, len); + } + else + { + int* list2 = NULL; + int len2 = 0; + IDList* new_list = new IDList; + int size3 = valid_list->size(); + for(int j = 0; j < size3; ++j) + { + this->kvstore->getpreIDlistBysubID(valid_list->getID(j), list2, len2); + if(Util::bsearch_int_uporder(preid, list2, len2) != -1) + { + new_list->addID(cans[j]); + } + delete[] list2; + } + delete valid_list; + valid_list = new_list; + } + } + delete[] list; + } + } - if(exist_preid && !out_edge_pre_id.empty()) - { - //(this->kvstore)->getpreIDobjIDlistBysubID(entity_id, pair_list, pair_len); - (this->kvstore)->getpreIDlistBysubID(ele, list, list_len); + if(!is_literal_var(_var) && valid_list->empty()) + { + //cerr << "quit when empty out edge"<::iterator itr_pre = out_edge_pre_id.begin(); itr_pre != out_edge_pre_id.end(); itr_pre++) - { - int pre_id = (*itr_pre); - if(Util::bsearch_int_uporder(pre_id, list, list_len) == -1) - exist_preid = false; - if(!exist_preid) - { - break; - } - } - delete[] list; - } + //vector valid_idlist; + //for(int i = 0; i < size; ++i) + //{ + // int ele = cans[i]; + // int* list = NULL; + // int list_len = 0; + // bool exist_preid = true; - //result sequence is illegal when there exists any missing filter predicate id. - if(exist_preid) - { - valid_idlist.push_back(ele); - } - } + // if(exist_preid && !in_edge_pre_id.empty()) + // { + // //(this->kvstore)->getpreIDsubIDlistByobjID(entity_id, pair_list, pair_len); + // (this->kvstore)->getpreIDlistByobjID(ele, list, list_len); + + // for(vector::iterator itr_pre = in_edge_pre_id.begin(); itr_pre != in_edge_pre_id.end(); itr_pre++) + // { + // int pre_id = (*itr_pre); + // //the return value is pos, -1 if not found + // if(Util::bsearch_int_uporder(pre_id, list, list_len) == -1) + // exist_preid = false; + // if(!exist_preid) + // { + // break; + // } + // } + // delete[] list; + // } + + // //NOTICE:we do not use intersect here because the case is a little different + // //first the pre num is not so much in a query + // //second once a pre in query is not found, break directly + + // if(exist_preid && !out_edge_pre_id.empty()) + // { + // //(this->kvstore)->getpreIDobjIDlistBysubID(entity_id, pair_list, pair_len); + // (this->kvstore)->getpreIDlistBysubID(ele, list, list_len); + + // for(vector::iterator itr_pre = out_edge_pre_id.begin(); itr_pre != out_edge_pre_id.end(); itr_pre++) + // { + // int pre_id = (*itr_pre); + // if(Util::bsearch_int_uporder(pre_id, list, list_len) == -1) + // exist_preid = false; + // if(!exist_preid) + // { + // break; + // } + // } + // delete[] list; + // } + + // //result sequence is illegal when there exists any missing filter predicate id. + // if(exist_preid) + // { + // valid_idlist.push_back(ele); + // } + //} //this is a core vertex, so if not literal var, exit when empty - if(!is_literal_var(_var) && valid_idlist.empty()) - { - return false; - } - cans.copy(valid_idlist); + //if(!is_literal_var(_var) && valid_idlist.empty()) + //{ + // return false; + //} + //cans.copy(valid_idlist); + return true; } diff --git a/Database/Join.h b/Database/Join.h index 195e653..79bda6f 100644 --- a/Database/Join.h +++ b/Database/Join.h @@ -162,7 +162,8 @@ private: void add_literal_candidate(); bool pre_var_handler(); //bool filterBySatellites(int _var, int _ele); - bool allFilterBySatellites(int _var); + bool filterBySatellites(int _var); + bool allFilterByPres(); void generateAllSatellites(); void cartesian(int pos, int end); diff --git a/Database/Strategy.cpp b/Database/Strategy.cpp index f473ab6..4faba7d 100644 --- a/Database/Strategy.cpp +++ b/Database/Strategy.cpp @@ -113,7 +113,16 @@ Strategy::handle(SPARQLquery& _query) { oid = (this->kvstore)->getIDByLiteral(triple.object); } + +#ifdef SO2P this->kvstore->getpreIDlistBysubIDobjID(sid, oid, id_list, id_list_len); +#else + int *list1 = NULL, *list2 = NULL; + int len1 = 0, len2 = 0; + this->kvstore->getpreIDlistBysubID(sid, list1, len1); + this->kvstore->getpreIDlistByobjID(oid, list2, len2); + Util::intersect(id_list, id_list_len, list1, len1, list2, len2); +#endif //copy to result list for(int i = 0; i < id_list_len; ++i) diff --git a/KVstore/KVstore.cpp b/KVstore/KVstore.cpp index becf80e..e6f96eb 100644 --- a/KVstore/KVstore.cpp +++ b/KVstore/KVstore.cpp @@ -252,20 +252,20 @@ KVstore::updateTupleslist_insert(int _sub_id, int _pre_id, int _obj_id) } //update so2p - { - int* _so2plist = NULL; - int _so2p_len = 0; - this->getpreIDlistBysubIDobjID(_sub_id, _obj_id, _so2plist, _so2p_len); - bool _insert = this->insert_x(_so2plist, _so2p_len, _pre_id); - if(_insert) - { - this->setpreIDlistBysubIDobjID(_sub_id, _obj_id, _so2plist, _so2p_len); - } - updateListLen += _so2p_len; - delete[] _so2plist; - _so2plist = NULL; - _so2p_len = 0; - } + //{ + // int* _so2plist = NULL; + // int _so2p_len = 0; + // this->getpreIDlistBysubIDobjID(_sub_id, _obj_id, _so2plist, _so2p_len); + // bool _insert = this->insert_x(_so2plist, _so2p_len, _pre_id); + // if(_insert) + // { + // this->setpreIDlistBysubIDobjID(_sub_id, _obj_id, _so2plist, _so2p_len); + // } + // updateListLen += _so2p_len; + // delete[] _so2plist; + // _so2plist = NULL; + // _so2p_len = 0; + //} //update p2so { @@ -638,25 +638,25 @@ KVstore::updateTupleslist_remove(int _sub_id, int _pre_id, int _obj_id) } //update so2p - { - int* _so2plist = NULL; - int _so2p_len = 0; - this->getpreIDlistBysubIDobjID(_sub_id, _obj_id, _so2plist, _so2p_len); - bool _remove = this->remove_x(_so2plist, _so2p_len, _pre_id); - if(_so2p_len == 0) - { - int* _so = new int[2]; - _so[0] = _sub_id; - _so[1] = _obj_id; - this->removeKey(this->subIDobjID2preIDlist, (char*)_so, sizeof(int)*2 ); - delete[] _so; - } - else if(_remove) - { - this->setpreIDlistBysubIDobjID(_sub_id, _obj_id, _so2plist, _so2p_len); - } - delete[] _so2plist; - } + //{ + // int* _so2plist = NULL; + // int _so2p_len = 0; + // this->getpreIDlistBysubIDobjID(_sub_id, _obj_id, _so2plist, _so2p_len); + // bool _remove = this->remove_x(_so2plist, _so2p_len, _pre_id); + // if(_so2p_len == 0) + // { + // int* _so = new int[2]; + // _so[0] = _sub_id; + // _so[1] = _obj_id; + // this->removeKey(this->subIDobjID2preIDlist, (char*)_so, sizeof(int)*2 ); + // delete[] _so; + // } + // else if(_remove) + // { + // this->setpreIDlistBysubIDobjID(_sub_id, _obj_id, _so2plist, _so2p_len); + // } + // delete[] _so2plist; + //} //update p2so { @@ -1527,7 +1527,9 @@ void KVstore::open() this->open(this->preID2subIDlist, KVstore::s_pID2sIDlist, KVstore::READ_WRITE_MODE); this->open(this->objID2preIDlist, KVstore::s_oID2pIDlist, KVstore::READ_WRITE_MODE); this->open(this->preID2objIDlist, KVstore::s_pID2oIDlist, KVstore::READ_WRITE_MODE); +#ifdef SO2P this->open(this->subIDobjID2preIDlist, KVstore::s_sIDoID2pIDlist, KVstore::READ_WRITE_MODE); +#endif this->open(this->preID2subIDobjIDlist, KVstore::s_pID2sIDoIDlist, KVstore::READ_WRITE_MODE); } diff --git a/Query/IDList.cpp b/Query/IDList.cpp index 5e1a474..e161215 100644 --- a/Query/IDList.cpp +++ b/Query/IDList.cpp @@ -40,6 +40,12 @@ IDList::size()const return this->id_list.size(); } +bool +IDList::empty()const +{ + return this->id_list.size() == 0; +} + bool IDList::isExistID(int _id)const { @@ -99,11 +105,17 @@ IDList::clear() } void -IDList::copy(vector& _new_idlist) +IDList::copy(const vector& _new_idlist) { this->id_list = _new_idlist; } +void +IDList::copy(const IDList* _new_idlist) +{ + this->id_list = *(_new_idlist->getList()); +} + int IDList::intersectList(const int* _id_list, int _list_len) { diff --git a/Query/IDList.h b/Query/IDList.h index 298e3ba..663bb47 100644 --- a/Query/IDList.h +++ b/Query/IDList.h @@ -19,14 +19,16 @@ public: bool addID(int _id); //check whether _id exists in this IDList. - bool isExistID(int _id)const; - int size()const; + bool isExistID(int _id) const; + int size() const; + bool empty() const; const std::vector* getList()const; int& operator[] (const int & _i); std::string to_str(); int sort(); void clear(); - void copy(std::vector& _new_idlist); + void copy(const std::vector& _new_idlist); + void copy(const IDList* _new_idlist); // intersect/union _id_list to this IDList, note that the two list must be ordered before using these two functions. int intersectList(const int* _id_list, int _list_len); diff --git a/Util/Util.cpp b/Util/Util.cpp index 4f7c735..1462905 100644 --- a/Util/Util.cpp +++ b/Util/Util.cpp @@ -1091,3 +1091,100 @@ Util::logarithm(double _a, double _b) return -1.0; } +void +Util::intersect(int*& _id_list, int& _id_list_len, const int* _list1, int _len1, const int* _list2, int _len2) +{ + vector res; + if(_list1 == NULL || _len1 == 0 || _list2 == NULL || _len2 == 0) + { + _id_list = NULL; + _id_list_len = 0; + } + + //when size is almost the same, intersect O(n) + //when one size is small ratio, search in the larger one O(mlogn) + // + //n>0 m=nk(02 + //k<=k0 binary search; k>k0 intersect + int method = -1; //0: intersect 1: search in list1 2: search in list2 + int n = _len1; + double k = 0; + if(n < _len2) + { + k = (double)n / (double)_len2; + n = _len2; + method = 2; + } + else + { + k = (double)_len2 / (double)n; + method = 1; + } + if(n <= 2) + method = 0; + else + { + double limit = Util::logarithm(n/2, 2); + if(k > limit) + method = 0; + } + + switch(method) + { + case 0: + { //this bracket is needed if vars are defined in case + int id_i = 0; + int num = _len1; + for(int i = 0; i < num; ++i) + { + int can_id = _list1[i]; + while((id_i < _len2) && (_list2[id_i] < can_id)) + { + id_i ++; + } + + if(id_i == _len2) + { + break; + } + + if(can_id == _list2[id_i]) + { + res.push_back(can_id); + id_i ++; + } + } + break; + } + case 1: + { + for(int i = 0; i < _len2; ++i) + { + if(Util::bsearch_int_uporder(_list2[i], _list1, _len1) != -1) + res.push_back(_list2[i]); + } + break; + } + case 2: + { + int m = _len1, i; + for(i = 0; i < m; ++i) + { + int t = _list1[i]; + if(Util::bsearch_int_uporder(t, _list2, _len2) != -1) + res.push_back(t); + } + break; + } + default: + cerr << "no such method in Util::intersect()" << endl; + break; + } + + _id_list_len = res.size(); + _id_list = new int[_id_list_len]; + for(int i = 0; i < _id_list_len; ++i) + _id_list[i] = res[i]; +} + diff --git a/Util/Util.h b/Util/Util.h index c859ffa..6414bd1 100644 --- a/Util/Util.h +++ b/Util/Util.h @@ -68,6 +68,7 @@ in the sparql query can point to the same node in data graph) #define STREAM_ON 1 #define READLINE_ON 1 #define MULTI_INDEX 1 +//#define SO2P 1 //indicate that in debug mode //#define DEBUG_STREAM @@ -200,6 +201,7 @@ public: static HashFunction hash[]; static double logarithm(double _a, double _b); + static void intersect(int*& _id_list, int& _id_list_len, const int* _list1, int _len1, const int* _list2, int _len2); static char* l_trim(char * szOutput, const char *szInput); static char* r_trim(char *szOutput, const char *szInput);