remove wasteful so2p;use p2s or p2o instead of s2p s2o sometimes

This commit is contained in:
bookug 2016-05-20 21:57:15 +08:00
parent cb45d7cf45
commit 5aedbcd3aa
10 changed files with 509 additions and 120 deletions

View File

@ -602,10 +602,13 @@ Database::encodeRDF_new(const string _rdf_file)
* objID 2 <preIDsubID>_list */
//this->o2s_o2ps_op2s(_p_id_tuples, _id_tuples_max);
this->s2p_s2po_sp2o(_p_id_tuples, _id_tuples_max);
//this->s2p_s2po_sp2o(_p_id_tuples, _id_tuples_max);
this->s2p_s2o_s2po_sp2o(_p_id_tuples, _id_tuples_max);
this->o2p_o2s_o2ps_op2s(_p_id_tuples, _id_tuples_max);
this->p2s_p2o_p2so(_p_id_tuples, _id_tuples_max);
this->so2p_s2o(_p_id_tuples, _id_tuples_max);
//WARN:thsi is too costly because s-o key num is too large
//100G+ for DBpedia2014
//this->so2p_s2o(_p_id_tuples, _id_tuples_max);
bool flag = this->saveDBInfoFile();
if (!flag)
@ -1438,15 +1441,21 @@ Database::o2s_o2ps_op2s(int** _p_id_tuples, int _id_tuples_max)
//NOTICE: below are the the new ones
bool
Database::s2p_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max)
Database::s2p_s2o_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max)
{
qsort(_p_id_tuples, this->triples_num, sizeof(int*), Database:: _spo_cmp);
int* _oidlist_s = NULL;
int* _pidlist_s = NULL;
int* _oidlist_sp = NULL;
int* _pidoidlist_s = NULL;
int _oidlist_s_len = 0;
int _pidlist_s_len = 0;
int _oidlist_sp_len = 0;
int _pidoidlist_s_len = 0;
// only _oidlist_s will be assigned with space, _oidlist_sp is always a part of _oidlist_s, just a pointer is enough
int _pidlist_max = 0;
int _pidoidlist_max = 0;
int _oidlist_max = 0;
@ -1460,6 +1469,7 @@ Database::s2p_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max)
Util::logging("finish s2p_sp2o_s2po initial");
(this->kvstore)->open_subID2objIDlist(KVstore::CREATE_MODE);
(this->kvstore)->open_subID2preIDlist(KVstore::CREATE_MODE);
(this->kvstore)->open_subIDpreID2objIDlist(KVstore::CREATE_MODE);
(this->kvstore)->open_subID2preIDobjIDlist(KVstore::CREATE_MODE);
@ -1474,6 +1484,17 @@ Database::s2p_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max)
_pidlist_s = new int[_pidlist_max];
_pidlist_s_len = 0;
//pidoidlist
//_pidoidlist_max = 1000 * 2;
//_pidoidlist_s = new int[_pidoidlist_max];
//_pidoidlist_s_len = 0;
//oidlist
_oidlist_max = 1000;
_oidlist_s = new int[_oidlist_max];
_oidlist_sp = _oidlist_s;
_oidlist_s_len = 0;
_oidlist_sp_len = 0;
/* pidoidlist */
_pidoidlist_max = 1000 * 2;
_pidoidlist_s = new int[_pidoidlist_max];
_pidoidlist_s_len = 0;
@ -1496,15 +1517,26 @@ Database::s2p_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max)
_pidlist_s = _new_pidlist_s;
}
//enalrge the space when needed
if(_oidlist_sp_len == _oidlist_max)
//enlarge the space when needed
if(_oidlist_s_len == _oidlist_max)
{
_oidlist_max *= 10;
int* _new_oidlist_sp = new int[_oidlist_max];
memcpy(_new_oidlist_sp, _oidlist_sp, sizeof(int) * _oidlist_sp_len);
delete[] _oidlist_sp;
_oidlist_sp = _new_oidlist_sp;
int * _new_oidlist_s = new int[_oidlist_max];
memcpy(_new_oidlist_s, _oidlist_s, sizeof(int) * _oidlist_s_len);
/* (_oidlist_sp-_oidlist_s) is the offset of _oidlist_sp */
_oidlist_sp = _new_oidlist_s + (_oidlist_sp-_oidlist_s);
delete[] _oidlist_s;
_oidlist_s = _new_oidlist_s;
}
//enalrge the space when needed
//if(_oidlist_sp_len == _oidlist_max)
//{
// _oidlist_max *= 10;
// int* _new_oidlist_sp = new int[_oidlist_max];
// memcpy(_new_oidlist_sp, _oidlist_sp, sizeof(int) * _oidlist_sp_len);
// delete[] _oidlist_sp;
// _oidlist_sp = _new_oidlist_sp;
//}
//enlarge the space when needed
if(_pidoidlist_s_len == _pidoidlist_max)
@ -1525,6 +1557,14 @@ Database::s2p_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max)
// Util::logging(_ss.str());
// }
_oidlist_s[_oidlist_s_len] = _obj_id;
if(_sub_pre_change)
{
_oidlist_sp = _oidlist_s + _oidlist_s_len;
}
_oidlist_s_len ++;
_oidlist_sp_len ++;
//add objid to list
_oidlist_sp[_oidlist_sp_len++] = _obj_id;
@ -1550,7 +1590,8 @@ Database::s2p_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max)
_pidlist_s[_pidlist_s_len++] = _pre_id;
(this->kvstore)->setobjIDlistBysubIDpreID(_sub_id, _pre_id, _oidlist_sp, _oidlist_sp_len);
delete[] _oidlist_sp;
//if not use s2o memory
//delete[] _oidlist_sp;
_oidlist_sp = NULL;
_oidlist_sp_len = 0;
}
@ -1566,6 +1607,12 @@ Database::s2p_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max)
delete[] _pidoidlist_s;
_pidoidlist_s = NULL;
_pidoidlist_s_len = 0;
Util::sort(_oidlist_s, _oidlist_s_len);
(this->kvstore)->setobjIDlistBysubID(_sub_id, _oidlist_s, _oidlist_s_len);
delete[] _oidlist_s;
_oidlist_s = NULL;
_oidlist_s_len = 0;
}
}//end for( 0 to this->triple_num)

View File

@ -132,7 +132,8 @@ private:
bool s2o_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max);
bool o2s_o2ps_op2s(int** _p_id_tuples, int _id_tuples_max);
//NOTICE: below is the new one
bool s2p_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max);
//bool s2p_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max);
bool s2p_s2o_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max);
bool o2p_o2s_o2ps_op2s(int** _p_id_tuples, int _id_tuples_max);
bool p2s_p2o_p2so(int** _p_id_tuples, int _id_tuples_max);
bool so2p_s2o(int** _p_id_tuples, int _id_tuples_max);

View File

@ -154,9 +154,9 @@ Join::join_basic(BasicQuery* _basic_query)
this->init(_basic_query);
long begin = Util::get_cur_time();
bool ret1 = this->filter_before_join();
long after_filter = Util::get_cur_time();
long after_constant_filter = Util::get_cur_time();
//fprintf(stderr, "after filter_before_join: used %ld ms\n", after_filter - begin);
cerr<<"after filter_before_join: used "<<(after_filter-begin)<<" ms"<<endl;
cerr<<"after filter_before_join: used "<<(after_constant_filter - begin)<<" ms"<<endl;
if(!ret1)
{
this->clear();
@ -165,11 +165,25 @@ Join::join_basic(BasicQuery* _basic_query)
this->add_literal_candidate();
long after_add_literal = Util::get_cur_time();
cerr<<"after add_literal_candidate: used "<<(after_add_literal - after_filter)<<" ms"<<endl;
cerr<<"after add_literal_candidate: used "<<(after_add_literal - after_constant_filter)<<" ms"<<endl;
this->join();
bool ret2 = this->allFilterByPres();
long after_pre_filter = Util::get_cur_time();
cerr<<"after allFilterByPres: used "<<(after_pre_filter - after_add_literal)<<" ms"<<endl;
if(!ret2)
{
this->clear();
return false;
}
bool ret3 = this->join();
long after_joinbasic = Util::get_cur_time();
cerr<<"after join_basic: used "<<(after_joinbasic - after_add_literal)<<" ms"<<endl;
cerr<<"after join_basic: used "<<(after_joinbasic - after_pre_filter)<<" ms"<<endl;
if(!ret3)
{
this->clear();
return false;
}
//NOTICE:we do pre_var_handler first, and generate all satellites when coping to result list
//
@ -184,7 +198,7 @@ Join::join_basic(BasicQuery* _basic_query)
//2. generate candidates for satellites first using sp2o or s2o(op2s or o2s), later filtered by pre vars
//the generating process had better been placed at the final, just before copying result
this->pre_var_handler();
//TODO+BETTER:maybe also reduce to empty, return false
long after_pre_var = Util::get_cur_time();
cerr<<"after pre var: used "<<(after_pre_var-after_joinbasic)<<" ms"<<endl;
@ -344,7 +358,17 @@ Join::pre_var_handler()
//}
//cerr<<"sub str: "<<this->kvstore->getEntityByID((*it)[this->id2pos[var1]])<<endl;
//cerr<<"obj str: "<<this->kvstore->getEntityByID((*it)[this->id2pos[var2]])<<endl;
this->kvstore->getpreIDlistBysubIDobjID((*it)[this->id2pos[var1]], (*it)[this->id2pos[var2]], id_list, id_list_len);
//this->kvstore->getpreIDlistBysubIDobjID((*it)[this->id2pos[var1]], (*it)[this->id2pos[var2]], id_list, id_list_len);
int sid = (*it)[this->id2pos[var1]], oid = (*it)[this->id2pos[var2]];
#ifdef SO2P
this->kvstore->getpreIDlistBysubIDobjID(sid, oid, id_list, id_list_len);
#else
int *list1 = NULL, *list2 = NULL;
int len1 = 0, len2 = 0;
this->kvstore->getpreIDlistBysubID(sid, list1, len1);
this->kvstore->getpreIDlistByobjID(oid, list2, len2);
Util::intersect(id_list, id_list_len, list1, len1, list2, len2);
#endif
//NOTICE:no need to add literals here because they are added when join using s2o
}
}
@ -352,7 +376,17 @@ Join::pre_var_handler()
else if(sub_id != -1 && obj_id != -1)
{
//just use so2p in query graph to find predicates
this->kvstore->getpreIDlistBysubIDobjID(sub_id, obj_id, id_list, id_list_len);
//this->kvstore->getpreIDlistBysubIDobjID(sub_id, obj_id, id_list, id_list_len);
int sid = sub_id, oid = obj_id;
#ifdef SO2P
this->kvstore->getpreIDlistBysubIDobjID(sid, oid, id_list, id_list_len);
#else
int *list1 = NULL, *list2 = NULL;
int len1 = 0, len2 = 0;
this->kvstore->getpreIDlistBysubID(sid, list1, len1);
this->kvstore->getpreIDlistByobjID(oid, list2, len2);
Util::intersect(id_list, id_list_len, list1, len1, list2, len2);
#endif
}
//sub is var while obj is constant
else if(sub_id == -1 && obj_id != -1)
@ -364,6 +398,16 @@ Join::pre_var_handler()
else
{
this->kvstore->getpreIDlistBysubIDobjID((*it)[this->id2pos[var1]], obj_id, id_list, id_list_len);
int sid = (*it)[this->id2pos[var1]], oid = obj_id;
#ifdef SO2P
this->kvstore->getpreIDlistBysubIDobjID(sid, oid, id_list, id_list_len);
#else
int *list1 = NULL, *list2 = NULL;
int len1 = 0, len2 = 0;
this->kvstore->getpreIDlistBysubID(sid, list1, len1);
this->kvstore->getpreIDlistByobjID(oid, list2, len2);
Util::intersect(id_list, id_list_len, list1, len1, list2, len2);
#endif
}
}
//sub is constant while obj is var
@ -376,7 +420,17 @@ Join::pre_var_handler()
else
{
//NOTICE:no need to add literals here because they are added in add_literal_candidate using s2o
this->kvstore->getpreIDlistBysubIDobjID(sub_id, (*it)[this->id2pos[var2]], id_list, id_list_len);
//this->kvstore->getpreIDlistBysubIDobjID(sub_id, (*it)[this->id2pos[var2]], id_list, id_list_len);
int sid = sub_id, oid = (*it)[this->id2pos[var2]];
#ifdef SO2P
this->kvstore->getpreIDlistBysubIDobjID(sid, oid, id_list, id_list_len);
#else
int *list1 = NULL, *list2 = NULL;
int len1 = 0, len2 = 0;
this->kvstore->getpreIDlistBysubID(sid, list1, len1);
this->kvstore->getpreIDlistByobjID(oid, list2, len2);
Util::intersect(id_list, id_list_len, list1, len1, list2, len2);
#endif
}
}
@ -1097,7 +1151,7 @@ Join::multi_join()
//
//NOTICE:this should be done just once, so use it before pushing candidates
//pruning the original candidates first(satellites only concerned with itself)
this->allFilterBySatellites(this->start_id);
//this->filterBySatellites(this->start_id);
IDList& start_table = this->basic_query->getCandidateList(this->start_id);
int start_size = this->basic_query->getCandidateSize(this->start_id);
@ -1156,7 +1210,7 @@ Join::multi_join()
//fprintf(stderr, "the next node id to join: %d\n", id2);
cerr<<"the next node id to join: "<<id2<<endl;
#endif
this->allFilterBySatellites(id2);
//this->filterBySatellites(id2);
#ifdef DEBUG_JOIN
cerr<<"the start size "<<this->basic_query->getCandidateSize(id2)<<endl;
#endif
@ -2069,9 +2123,28 @@ Join::add_literal_candidate()
//(constants ar enot necessary considered here)
//this check is fast because predicate num is small, but the performance can be very good
//(instead of filter when joining, we do a precheck first!)
bool //false when no result for this basicquery
Join::allFilterBySatellites(int _var)
bool
Join::allFilterByPres()
{
for(int i = 0; i < this->var_num; ++i)
{
if(this->basic_query->isSatelliteInJoin(i))
continue;
if(this->filterBySatellites(i) == false)
return false;
}
return true;
}
bool //false when no result for this basicquery
Join::filterBySatellites(int _var)
{
IDList& cans = this->basic_query->getCandidateList(_var);
int size = this->basic_query->getCandidateSize(_var);
//cerr << "var " << "_var " << "size after pre_filter " << cans.size() <<endl;
if(size == 0 && !is_literal_var(_var))
return false;
int var_degree = this->basic_query->getVarDegree(_var);
vector<int> in_edge_pre_id;
vector<int> out_edge_pre_id;
@ -2121,75 +2194,218 @@ Join::allFilterBySatellites(int _var)
return true;
}
//QUERY:maybe we can divide edges into two separate groups according to the size of p2s
//NOTICE+BETTER: the cost should be due to the cans size, p2s size and s2p size
//generally, size of p2s is larger than s2p, but smaller than size of cans
//The best way is to extract the features of dataset and keep
//but we may use a simple strategy here: use p2s if cans size is too large, i.e. > size of p2s
//(assuming 5000 here)
//WARN:different edge may corresponding different size of subjects, like <rdf:type> is too large
//QUERY: erase is too costly, use an invalid[] array, maybe bitset due to large candidates size
//only consider valid ones when join loop, but how about intersect and union?
//
//we build a new idlist with all valid ones, and update to the original idlist
//(consider in current_table is not good, too many duplicates)
vector<int> valid_idlist;
IDList& cans = this->basic_query->getCandidateList(_var);
int size = this->basic_query->getCandidateSize(_var);
for(int i = 0; i < size; ++i)
{
int ele = cans[i];
int* list = NULL;
int list_len = 0;
bool exist_preid = true;
IDList* valid_list = NULL;
int *list = NULL;
int len = 0;
if(exist_preid && !in_edge_pre_id.empty())
if(!in_edge_pre_id.empty())
{
//(this->kvstore)->getpreIDsubIDlistByobjID(entity_id, pair_list, pair_len);
(this->kvstore)->getpreIDlistByobjID(ele, list, list_len);
for(vector<int>::iterator itr_pre = in_edge_pre_id.begin(); itr_pre != in_edge_pre_id.end(); itr_pre++)
int size2 = in_edge_pre_id.size();
for(int i = 0; i < size2; ++i)
{
int pre_id = (*itr_pre);
//the return value is pos, -1 if not found
if(Util::bsearch_int_uporder(pre_id, list, list_len) == -1)
exist_preid = false;
if(!exist_preid)
int preid = in_edge_pre_id[i];
this->kvstore->getobjIDlistBypreID(preid, list, len);
if(i == 0)
{
break;
if(size > len)
{
valid_list = IDList::intersect(cans, list, len);
}
else
{
valid_list = new IDList;
int* list2 = NULL;
int len2 = 0;
for(int j = 0; j < size; ++j)
{
this->kvstore->getpreIDlistByobjID(cans[j], list2, len2);
if(Util::bsearch_int_uporder(preid, list2, len2) != -1)
{
valid_list->addID(cans[j]);
}
delete[] list2;
}
}
}
else
{
if(valid_list->size() > len)
{
valid_list->intersectList(list, len);
}
else
{
int* list2 = NULL;
int len2 = 0;
IDList* new_list = new IDList;
int size3 = valid_list->size();
for(int j = 0; j < size3; ++j)
{
this->kvstore->getpreIDlistByobjID(valid_list->getID(j), list2, len2);
if(Util::bsearch_int_uporder(preid, list2, len2) != -1)
{
new_list->addID(cans[j]);
}
delete[] list2;
}
delete valid_list;
valid_list = new_list;
}
}
delete[] list;
}
//NOTICE:we do not use intersect here because the case is a little different
//first the pre num is not so much in a query
//second once a pre in query is not found, break directly
if(exist_preid && !out_edge_pre_id.empty())
{
//(this->kvstore)->getpreIDobjIDlistBysubID(entity_id, pair_list, pair_len);
(this->kvstore)->getpreIDlistBysubID(ele, list, list_len);
for(vector<int>::iterator itr_pre = out_edge_pre_id.begin(); itr_pre != out_edge_pre_id.end(); itr_pre++)
{
int pre_id = (*itr_pre);
if(Util::bsearch_int_uporder(pre_id, list, list_len) == -1)
exist_preid = false;
if(!exist_preid)
{
break;
}
}
delete[] list;
}
//result sequence is illegal when there exists any missing filter predicate id.
if(exist_preid)
{
valid_idlist.push_back(ele);
}
}
//this is a core vertex, so if not literal var, exit when empty
if(!is_literal_var(_var) && valid_idlist.empty())
if(!is_literal_var(_var) && valid_list != NULL && valid_list->empty())
{
//cerr << "quit when empty in edge"<<endl;
return false;
}
cans.copy(valid_idlist);
if(!out_edge_pre_id.empty())
{
int size2 = out_edge_pre_id.size();
for(int i = 0; i < size2; ++i)
{
int preid = out_edge_pre_id[i];
this->kvstore->getsubIDlistBypreID(preid, list, len);
//cerr<<"p2s len "<<len<<endl;
if(valid_list == NULL && i == 0)
{
if(size > len)
{
valid_list = IDList::intersect(cans, list, len);
}
else
{
valid_list = new IDList;
int* list2 = NULL;
int len2 = 0;
for(int j = 0; j < size; ++j)
{
this->kvstore->getpreIDlistBysubID(cans[j], list2, len2);
if(Util::bsearch_int_uporder(preid, list2, len2) != -1)
{
valid_list->addID(cans[j]);
}
delete[] list2;
}
}
}
else
{
if(valid_list->size() > len)
{
valid_list->intersectList(list, len);
}
else
{
int* list2 = NULL;
int len2 = 0;
IDList* new_list = new IDList;
int size3 = valid_list->size();
for(int j = 0; j < size3; ++j)
{
this->kvstore->getpreIDlistBysubID(valid_list->getID(j), list2, len2);
if(Util::bsearch_int_uporder(preid, list2, len2) != -1)
{
new_list->addID(cans[j]);
}
delete[] list2;
}
delete valid_list;
valid_list = new_list;
}
}
delete[] list;
}
}
if(!is_literal_var(_var) && valid_list->empty())
{
//cerr << "quit when empty out edge"<<endl;
return false;
}
cans.copy(valid_list);
delete valid_list;
cerr << "var " << _var << "size after pre_filter " << cans.size() <<endl;
//vector<int> valid_idlist;
//for(int i = 0; i < size; ++i)
//{
// int ele = cans[i];
// int* list = NULL;
// int list_len = 0;
// bool exist_preid = true;
// if(exist_preid && !in_edge_pre_id.empty())
// {
// //(this->kvstore)->getpreIDsubIDlistByobjID(entity_id, pair_list, pair_len);
// (this->kvstore)->getpreIDlistByobjID(ele, list, list_len);
// for(vector<int>::iterator itr_pre = in_edge_pre_id.begin(); itr_pre != in_edge_pre_id.end(); itr_pre++)
// {
// int pre_id = (*itr_pre);
// //the return value is pos, -1 if not found
// if(Util::bsearch_int_uporder(pre_id, list, list_len) == -1)
// exist_preid = false;
// if(!exist_preid)
// {
// break;
// }
// }
// delete[] list;
// }
// //NOTICE:we do not use intersect here because the case is a little different
// //first the pre num is not so much in a query
// //second once a pre in query is not found, break directly
// if(exist_preid && !out_edge_pre_id.empty())
// {
// //(this->kvstore)->getpreIDobjIDlistBysubID(entity_id, pair_list, pair_len);
// (this->kvstore)->getpreIDlistBysubID(ele, list, list_len);
// for(vector<int>::iterator itr_pre = out_edge_pre_id.begin(); itr_pre != out_edge_pre_id.end(); itr_pre++)
// {
// int pre_id = (*itr_pre);
// if(Util::bsearch_int_uporder(pre_id, list, list_len) == -1)
// exist_preid = false;
// if(!exist_preid)
// {
// break;
// }
// }
// delete[] list;
// }
// //result sequence is illegal when there exists any missing filter predicate id.
// if(exist_preid)
// {
// valid_idlist.push_back(ele);
// }
//}
//this is a core vertex, so if not literal var, exit when empty
//if(!is_literal_var(_var) && valid_idlist.empty())
//{
// return false;
//}
//cans.copy(valid_idlist);
return true;
}

View File

@ -162,7 +162,8 @@ private:
void add_literal_candidate();
bool pre_var_handler();
//bool filterBySatellites(int _var, int _ele);
bool allFilterBySatellites(int _var);
bool filterBySatellites(int _var);
bool allFilterByPres();
void generateAllSatellites();
void cartesian(int pos, int end);

View File

@ -113,7 +113,16 @@ Strategy::handle(SPARQLquery& _query)
{
oid = (this->kvstore)->getIDByLiteral(triple.object);
}
#ifdef SO2P
this->kvstore->getpreIDlistBysubIDobjID(sid, oid, id_list, id_list_len);
#else
int *list1 = NULL, *list2 = NULL;
int len1 = 0, len2 = 0;
this->kvstore->getpreIDlistBysubID(sid, list1, len1);
this->kvstore->getpreIDlistByobjID(oid, list2, len2);
Util::intersect(id_list, id_list_len, list1, len1, list2, len2);
#endif
//copy to result list
for(int i = 0; i < id_list_len; ++i)

View File

@ -252,20 +252,20 @@ KVstore::updateTupleslist_insert(int _sub_id, int _pre_id, int _obj_id)
}
//update so2p
{
int* _so2plist = NULL;
int _so2p_len = 0;
this->getpreIDlistBysubIDobjID(_sub_id, _obj_id, _so2plist, _so2p_len);
bool _insert = this->insert_x(_so2plist, _so2p_len, _pre_id);
if(_insert)
{
this->setpreIDlistBysubIDobjID(_sub_id, _obj_id, _so2plist, _so2p_len);
}
updateListLen += _so2p_len;
delete[] _so2plist;
_so2plist = NULL;
_so2p_len = 0;
}
//{
// int* _so2plist = NULL;
// int _so2p_len = 0;
// this->getpreIDlistBysubIDobjID(_sub_id, _obj_id, _so2plist, _so2p_len);
// bool _insert = this->insert_x(_so2plist, _so2p_len, _pre_id);
// if(_insert)
// {
// this->setpreIDlistBysubIDobjID(_sub_id, _obj_id, _so2plist, _so2p_len);
// }
// updateListLen += _so2p_len;
// delete[] _so2plist;
// _so2plist = NULL;
// _so2p_len = 0;
//}
//update p2so
{
@ -638,25 +638,25 @@ KVstore::updateTupleslist_remove(int _sub_id, int _pre_id, int _obj_id)
}
//update so2p
{
int* _so2plist = NULL;
int _so2p_len = 0;
this->getpreIDlistBysubIDobjID(_sub_id, _obj_id, _so2plist, _so2p_len);
bool _remove = this->remove_x(_so2plist, _so2p_len, _pre_id);
if(_so2p_len == 0)
{
int* _so = new int[2];
_so[0] = _sub_id;
_so[1] = _obj_id;
this->removeKey(this->subIDobjID2preIDlist, (char*)_so, sizeof(int)*2 );
delete[] _so;
}
else if(_remove)
{
this->setpreIDlistBysubIDobjID(_sub_id, _obj_id, _so2plist, _so2p_len);
}
delete[] _so2plist;
}
//{
// int* _so2plist = NULL;
// int _so2p_len = 0;
// this->getpreIDlistBysubIDobjID(_sub_id, _obj_id, _so2plist, _so2p_len);
// bool _remove = this->remove_x(_so2plist, _so2p_len, _pre_id);
// if(_so2p_len == 0)
// {
// int* _so = new int[2];
// _so[0] = _sub_id;
// _so[1] = _obj_id;
// this->removeKey(this->subIDobjID2preIDlist, (char*)_so, sizeof(int)*2 );
// delete[] _so;
// }
// else if(_remove)
// {
// this->setpreIDlistBysubIDobjID(_sub_id, _obj_id, _so2plist, _so2p_len);
// }
// delete[] _so2plist;
//}
//update p2so
{
@ -1527,7 +1527,9 @@ void KVstore::open()
this->open(this->preID2subIDlist, KVstore::s_pID2sIDlist, KVstore::READ_WRITE_MODE);
this->open(this->objID2preIDlist, KVstore::s_oID2pIDlist, KVstore::READ_WRITE_MODE);
this->open(this->preID2objIDlist, KVstore::s_pID2oIDlist, KVstore::READ_WRITE_MODE);
#ifdef SO2P
this->open(this->subIDobjID2preIDlist, KVstore::s_sIDoID2pIDlist, KVstore::READ_WRITE_MODE);
#endif
this->open(this->preID2subIDobjIDlist, KVstore::s_pID2sIDoIDlist, KVstore::READ_WRITE_MODE);
}

View File

@ -40,6 +40,12 @@ IDList::size()const
return this->id_list.size();
}
bool
IDList::empty()const
{
return this->id_list.size() == 0;
}
bool
IDList::isExistID(int _id)const
{
@ -99,11 +105,17 @@ IDList::clear()
}
void
IDList::copy(vector<int>& _new_idlist)
IDList::copy(const vector<int>& _new_idlist)
{
this->id_list = _new_idlist;
}
void
IDList::copy(const IDList* _new_idlist)
{
this->id_list = *(_new_idlist->getList());
}
int
IDList::intersectList(const int* _id_list, int _list_len)
{

View File

@ -19,14 +19,16 @@ public:
bool addID(int _id);
//check whether _id exists in this IDList.
bool isExistID(int _id)const;
int size()const;
bool isExistID(int _id) const;
int size() const;
bool empty() const;
const std::vector<int>* getList()const;
int& operator[] (const int & _i);
std::string to_str();
int sort();
void clear();
void copy(std::vector<int>& _new_idlist);
void copy(const std::vector<int>& _new_idlist);
void copy(const IDList* _new_idlist);
// intersect/union _id_list to this IDList, note that the two list must be ordered before using these two functions.
int intersectList(const int* _id_list, int _list_len);

View File

@ -1091,3 +1091,100 @@ Util::logarithm(double _a, double _b)
return -1.0;
}
void
Util::intersect(int*& _id_list, int& _id_list_len, const int* _list1, int _len1, const int* _list2, int _len2)
{
vector<int> res;
if(_list1 == NULL || _len1 == 0 || _list2 == NULL || _len2 == 0)
{
_id_list = NULL;
_id_list_len = 0;
}
//when size is almost the same, intersect O(n)
//when one size is small ratio, search in the larger one O(mlogn)
//
//n>0 m=nk(0<k<1)
//compare n(k+1) and nklogn: k0 = log(n/2)2 requiring that n>2
//k<=k0 binary search; k>k0 intersect
int method = -1; //0: intersect 1: search in list1 2: search in list2
int n = _len1;
double k = 0;
if(n < _len2)
{
k = (double)n / (double)_len2;
n = _len2;
method = 2;
}
else
{
k = (double)_len2 / (double)n;
method = 1;
}
if(n <= 2)
method = 0;
else
{
double limit = Util::logarithm(n/2, 2);
if(k > limit)
method = 0;
}
switch(method)
{
case 0:
{ //this bracket is needed if vars are defined in case
int id_i = 0;
int num = _len1;
for(int i = 0; i < num; ++i)
{
int can_id = _list1[i];
while((id_i < _len2) && (_list2[id_i] < can_id))
{
id_i ++;
}
if(id_i == _len2)
{
break;
}
if(can_id == _list2[id_i])
{
res.push_back(can_id);
id_i ++;
}
}
break;
}
case 1:
{
for(int i = 0; i < _len2; ++i)
{
if(Util::bsearch_int_uporder(_list2[i], _list1, _len1) != -1)
res.push_back(_list2[i]);
}
break;
}
case 2:
{
int m = _len1, i;
for(i = 0; i < m; ++i)
{
int t = _list1[i];
if(Util::bsearch_int_uporder(t, _list2, _len2) != -1)
res.push_back(t);
}
break;
}
default:
cerr << "no such method in Util::intersect()" << endl;
break;
}
_id_list_len = res.size();
_id_list = new int[_id_list_len];
for(int i = 0; i < _id_list_len; ++i)
_id_list[i] = res[i];
}

View File

@ -68,6 +68,7 @@ in the sparql query can point to the same node in data graph)
#define STREAM_ON 1
#define READLINE_ON 1
#define MULTI_INDEX 1
//#define SO2P 1
//indicate that in debug mode
//#define DEBUG_STREAM
@ -200,6 +201,7 @@ public:
static HashFunction hash[];
static double logarithm(double _a, double _b);
static void intersect(int*& _id_list, int& _id_list_len, const int* _list1, int _len1, const int* _list2, int _len2);
static char* l_trim(char * szOutput, const char *szInput);
static char* r_trim(char *szOutput, const char *szInput);