Merge branch 'hulin' into bookug

merge hulin's code to deal with special pre vars structure in Join
This commit is contained in:
bookug 2017-07-17 21:07:47 +08:00
commit 72526284fd
7 changed files with 52 additions and 23 deletions

View File

@ -1164,7 +1164,7 @@ Database::getPreNum()
int int
Database::query(const string _query, ResultSet& _result_set, FILE* _fp) Database::query(const string _query, ResultSet& _result_set, FILE* _fp)
{ {
GeneralEvaluation general_evaluation(this->vstree, this->kvstore, this->stringindex, this->pre2num, this->limitID_predicate, this->limitID_literal); GeneralEvaluation general_evaluation(this->vstree, this->kvstore, this->stringindex, this->pre2num, this->limitID_predicate, this->limitID_literal,this->limitID_entity);
long tv_begin = Util::get_cur_time(); long tv_begin = Util::get_cur_time();

View File

@ -16,13 +16,15 @@ Join::Join()
this->result_list = NULL; this->result_list = NULL;
} }
Join::Join(KVstore* _kvstore, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal) Join::Join(KVstore* _kvstore, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal,
TYPE_ENTITY_LITERAL_ID _limitID_entity)
{ {
this->kvstore = _kvstore; this->kvstore = _kvstore;
this->result_list = NULL; this->result_list = NULL;
this->pre2num = _pre2num; this->pre2num = _pre2num;
this->limitID_predicate = _limitID_predicate; this->limitID_predicate = _limitID_predicate;
this->limitID_literal = _limitID_literal; this->limitID_literal = _limitID_literal;
this->limitID_entity = _limitID_entity;
} }
Join::~Join() Join::~Join()
@ -1043,6 +1045,17 @@ Join::toStartJoin()
{ {
cout<<"Special Case: star graph whose pres are all var"<<endl; cout<<"Special Case: star graph whose pres are all var"<<endl;
//get all literals in this db //get all literals in this db
for(TYPE_ENTITY_LITERAL_ID i = 0; i < this->limitID_entity; ++i)
{
TYPE_ENTITY_LITERAL_ID id = i;
string literal = this->kvstore->getEntityByID(id);
if(literal == "")
{
continue;
}
//BETTER:cache the whole literal id list to improve the query throughput
literal_candidate_list.addID(id);
}
for(TYPE_ENTITY_LITERAL_ID i = 0; i < this->limitID_literal; ++i) for(TYPE_ENTITY_LITERAL_ID i = 0; i < this->limitID_literal; ++i)
{ {
TYPE_ENTITY_LITERAL_ID id = i + Util::LITERAL_FIRST_ID; TYPE_ENTITY_LITERAL_ID id = i + Util::LITERAL_FIRST_ID;
@ -1054,17 +1067,17 @@ Join::toStartJoin()
//BETTER:cache the whole literal id list to improve the query throughput //BETTER:cache the whole literal id list to improve the query throughput
literal_candidate_list.addID(id); literal_candidate_list.addID(id);
} }
}
IDList& origin_candidate_list = this->basic_query->getCandidateList(var_id); IDList& origin_candidate_list = this->basic_query->getCandidateList(var_id);
//int origin_candidate_list_len = origin_candidate_list.size(); //int origin_candidate_list_len = origin_candidate_list.size();
//origin_candidate_list.unionList(literal_candidate_list, true); origin_candidate_list.unionList(literal_candidate_list, false);
}
//int after_add_literal_candidate_list_len = origin_candidate_list.size(); //int after_add_literal_candidate_list_len = origin_candidate_list.size();
this->basic_query->setReady(var_id); this->basic_query->setReady(var_id);
cout<<"the prepared var id: "<<var_id<<endl; cout<<"the prepared var id: "<<var_id<<endl;
//cout<<"add literals num: "<<literal_candidate_list.size()<<endl; //cout<<"add literals num: "<<literal_candidate_list.size()<<endl;
cout<<"current can size: "<<origin_candidate_list.size()<<endl; //cout<<"current can size: "<<origin_candidate_list.size()<<endl;
} }
// use the appropriate method to join candidates // use the appropriate method to join candidates
@ -1212,11 +1225,19 @@ Join::add_new_to_results(TableIterator it, unsigned id)
//after remove VSTREE, modify here //after remove VSTREE, modify here
void void
Join::update_answer_list(IDList*& valid_ans_list, IDList& _can_list, unsigned* id_list, unsigned id_list_len, bool _is_literal) Join::update_answer_list(IDList*& valid_ans_list, IDList& _can_list, unsigned* id_list, unsigned id_list_len, bool _is_ready)
{ {
if (valid_ans_list == NULL) if (valid_ans_list == NULL)
{ {
if(_is_ready)
valid_ans_list = IDList::intersect(_can_list, id_list, id_list_len); valid_ans_list = IDList::intersect(_can_list, id_list, id_list_len);
else
{
valid_ans_list = new IDList();
for(int i = 0; i < id_list_len; i++)
valid_ans_list->addID(id_list[i]);
}
} }
else else
{ {
@ -1239,10 +1260,10 @@ Join::update_answer_list(IDList*& valid_ans_list, IDList& _can_list, unsigned* i
//However, the case is really rare in our test(the reason may be that the web graph is always very sparse) //However, the case is really rare in our test(the reason may be that the web graph is always very sparse)
//If we add a buffer for this case, will cause worse performance //If we add a buffer for this case, will cause worse performance
bool bool
Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, unsigned _can_list_size, int _id, bool _is_literal) Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, unsigned _can_list_size, int _id, bool _is_ready)
{ {
//if(_can_list_size == 0 && !_is_literal) //if(_can_list_size == 0 && !_is_literal)
if(_can_list_size == 0) if(_can_list_size == 0 && _is_ready)
{ {
return false; //empty result return false; //empty result
} }
@ -1370,7 +1391,7 @@ Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, unsigned _can_l
//only can occur the first time, means cnt == 0 //only can occur the first time, means cnt == 0
//if(valid_ans_list.size() == 0) //if(valid_ans_list.size() == 0)
update_answer_list(valid_ans_list, _can_list, id_list, id_list_len, _is_literal); update_answer_list(valid_ans_list, _can_list, id_list, id_list_len, _is_ready);
delete[] id_list; delete[] id_list;
if (valid_ans_list->size() == 0) if (valid_ans_list->size() == 0)
{ {
@ -1395,7 +1416,7 @@ Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, unsigned _can_l
unsigned* id_list2; unsigned* id_list2;
unsigned id_list2_len; unsigned id_list2_len;
this->kvstore->getobjIDlistBysubID(ele, id_list2, id_list2_len, true); this->kvstore->getobjIDlistBysubID(ele, id_list2, id_list2_len, true);
update_answer_list(valid_ans_list, _can_list, id_list2, id_list2_len, _is_literal); update_answer_list(valid_ans_list, _can_list, id_list2, id_list2_len, _is_ready);
delete[] id_list2; delete[] id_list2;
if (valid_ans_list->size() == 0) if (valid_ans_list->size() == 0)
{ {
@ -1408,7 +1429,7 @@ Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, unsigned _can_l
unsigned* id_list2; unsigned* id_list2;
unsigned id_list2_len; unsigned id_list2_len;
this->kvstore->getsubIDlistByobjID(ele, id_list2, id_list2_len, true); this->kvstore->getsubIDlistByobjID(ele, id_list2, id_list2_len, true);
update_answer_list(valid_ans_list, _can_list, id_list2, id_list2_len, _is_literal); update_answer_list(valid_ans_list, _can_list, id_list2, id_list2_len, _is_ready);
delete[] id_list2; delete[] id_list2;
if (valid_ans_list->size() == 0) if (valid_ans_list->size() == 0)
{ {
@ -1584,6 +1605,7 @@ Join::multi_join()
//NOTICE: not cancle the followings, to be used for later //NOTICE: not cancle the followings, to be used for later
//TODO: if queries contain predicate variables, it may be hard to prepare candidates for a node //TODO: if queries contain predicate variables, it may be hard to prepare candidates for a node
//(so it is not ready, can also be represented by is_literal_var()) //(so it is not ready, can also be represented by is_literal_var())
/*
bool is_literal = this->is_literal_var(id2); bool is_literal = this->is_literal_var(id2);
if(is_literal) if(is_literal)
{ {
@ -1598,12 +1620,12 @@ Join::multi_join()
cout << "this var not contain literals: " << id2 << endl; cout << "this var not contain literals: " << id2 << endl;
#endif #endif
} }
*/
bool flag = false; bool flag = false;
#ifdef DEBUG_PRECISE #ifdef DEBUG_PRECISE
cout << "this edge uses not-prepared-join way" << endl; cout << "this edge uses not-prepared-join way" << endl;
#endif #endif
flag = this->join_two(edges, can_list, can_list_size, id2, is_literal); flag = this->join_two(edges, can_list, can_list_size, id2, this->basic_query->isReady(id2));
//if current_table is empty, ends directly //if current_table is empty, ends directly
if (!flag) if (!flag)

View File

@ -50,6 +50,7 @@ private:
TYPE_TRIPLE_NUM* pre2num; TYPE_TRIPLE_NUM* pre2num;
TYPE_PREDICATE_ID limitID_predicate; TYPE_PREDICATE_ID limitID_predicate;
TYPE_ENTITY_LITERAL_ID limitID_literal; TYPE_ENTITY_LITERAL_ID limitID_literal;
TYPE_ENTITY_LITERAL_ID limitID_entity;
//used by score_node for parameters //used by score_node for parameters
static const unsigned PARAM_DEGREE = 1; static const unsigned PARAM_DEGREE = 1;
static const unsigned PARAM_SIZE = 1000000; static const unsigned PARAM_SIZE = 1000000;
@ -128,7 +129,7 @@ private:
//BETTER?:change these params to members in class //BETTER?:change these params to members in class
//void acquire_all_id_lists(IdLists& _id_lists, IdListsLen& _id_lists_len, IDList& _can_list, vector<int>& _edges, int _id, unsigned _can_list_size); //void acquire_all_id_lists(IdLists& _id_lists, IdListsLen& _id_lists_len, IDList& _can_list, vector<int>& _edges, int _id, unsigned _can_list_size);
void update_answer_list(IDList*& valid_ans_list, IDList& _can_list, unsigned* id_list, unsigned id_list_len, bool _is_literal); void update_answer_list(IDList*& valid_ans_list, IDList& _can_list, unsigned* id_list, unsigned id_list_len, bool _is_literal);
bool join_two(vector< vector<int> >& _edges, IDList& _can_list, unsigned _can_list_size, int _id, bool _is_literal); bool join_two(vector< vector<int> >& _edges, IDList& _can_list, unsigned _can_list_size, int _id, bool _is_ready);
bool multi_join(); bool multi_join();
//NOTICE:this is only used to join a BasicQuery //NOTICE:this is only used to join a BasicQuery
@ -136,7 +137,8 @@ private:
public: public:
Join(); Join();
Join(KVstore* _kvstore, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal); Join(KVstore* _kvstore, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal,
TYPE_ENTITY_LITERAL_ID _limitID_entity);
//these functions can be called by Database //these functions can be called by Database
bool join_sparql(SPARQLquery& _sparql_query); bool join_sparql(SPARQLquery& _sparql_query);
bool join_basic(BasicQuery* _basic_query); bool join_basic(BasicQuery* _basic_query);

View File

@ -18,7 +18,7 @@ Strategy::Strategy()
//this->prepare_handler(); //this->prepare_handler();
} }
Strategy::Strategy(KVstore* _kvstore, VSTree* _vstree, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal) Strategy::Strategy(KVstore* _kvstore, VSTree* _vstree, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal,TYPE_ENTITY_LITERAL_ID _limitID_entity)
{ {
this->method = 0; this->method = 0;
this->kvstore = _kvstore; this->kvstore = _kvstore;
@ -26,6 +26,8 @@ Strategy::Strategy(KVstore* _kvstore, VSTree* _vstree, TYPE_TRIPLE_NUM* _pre2num
this->pre2num = _pre2num; this->pre2num = _pre2num;
this->limitID_predicate = _limitID_predicate; this->limitID_predicate = _limitID_predicate;
this->limitID_literal = _limitID_literal; this->limitID_literal = _limitID_literal;
this->limitID_entity = _limitID_entity;
//this->prepare_handler(); //this->prepare_handler();
} }
@ -207,7 +209,7 @@ Strategy::handler0(BasicQuery* _bq, vector<unsigned*>& _result_list, ResultFilte
if (_result_filter != NULL) if (_result_filter != NULL)
_result_filter->candFilterWithResultHashTable(*_bq); _result_filter->candFilterWithResultHashTable(*_bq);
Join *join = new Join(kvstore, pre2num, this->limitID_predicate, this->limitID_literal); Join *join = new Join(kvstore, pre2num, this->limitID_predicate, this->limitID_literal,this->limitID_entity);
join->join_basic(_bq); join->join_basic(_bq);
delete join; delete join;

View File

@ -23,7 +23,7 @@ class Strategy
{ {
public: public:
Strategy(); Strategy();
Strategy(KVstore*, VSTree*, TYPE_TRIPLE_NUM*, TYPE_PREDICATE_ID, TYPE_ENTITY_LITERAL_ID); Strategy(KVstore*, VSTree*, TYPE_TRIPLE_NUM*, TYPE_PREDICATE_ID, TYPE_ENTITY_LITERAL_ID,TYPE_ENTITY_LITERAL_ID);
~Strategy(); ~Strategy();
//select efficient strategy to do the sparql query //select efficient strategy to do the sparql query
bool handle(SPARQLquery&, ResultFilter* _result_filter = NULL); bool handle(SPARQLquery&, ResultFilter* _result_filter = NULL);
@ -35,6 +35,7 @@ private:
TYPE_TRIPLE_NUM* pre2num; TYPE_TRIPLE_NUM* pre2num;
TYPE_PREDICATE_ID limitID_predicate; TYPE_PREDICATE_ID limitID_predicate;
TYPE_ENTITY_LITERAL_ID limitID_literal; TYPE_ENTITY_LITERAL_ID limitID_literal;
TYPE_ENTITY_LITERAL_ID limitID_entity;
//NOTICE: even the ID type is int, it is no problem and no waste that we use unsigned in answer //NOTICE: even the ID type is int, it is no problem and no waste that we use unsigned in answer
//(because -1, -2 or other invalid IDs can not be in answer) //(because -1, -2 or other invalid IDs can not be in answer)

View File

@ -56,7 +56,7 @@ bool GeneralEvaluation::doQuery()
return false; return false;
} }
this->strategy = Strategy(this->kvstore, this->vstree, this->pre2num, this->limitID_predicate, this->limitID_literal); this->strategy = Strategy(this->kvstore, this->vstree, this->pre2num, this->limitID_predicate, this->limitID_literal,this->limitID_entity);
if (this->query_tree.checkWellDesigned()) if (this->query_tree.checkWellDesigned())
{ {
printf("=================\n"); printf("=================\n");

View File

@ -39,10 +39,12 @@ class GeneralEvaluation
TYPE_TRIPLE_NUM *pre2num; TYPE_TRIPLE_NUM *pre2num;
TYPE_PREDICATE_ID limitID_predicate; TYPE_PREDICATE_ID limitID_predicate;
TYPE_ENTITY_LITERAL_ID limitID_literal; TYPE_ENTITY_LITERAL_ID limitID_literal;
TYPE_ENTITY_LITERAL_ID limitID_entity;
public: public:
GeneralEvaluation(VSTree *_vstree, KVstore *_kvstore, StringIndex *_stringindex, TYPE_TRIPLE_NUM *_pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal): GeneralEvaluation(VSTree *_vstree, KVstore *_kvstore, StringIndex *_stringindex, TYPE_TRIPLE_NUM *_pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal,TYPE_ENTITY_LITERAL_ID _limitID_entity):
vstree(_vstree), kvstore(_kvstore), stringindex(_stringindex), pre2num(_pre2num), limitID_predicate(_limitID_predicate), limitID_literal(_limitID_literal), temp_result(NULL) vstree(_vstree), kvstore(_kvstore), stringindex(_stringindex), pre2num(_pre2num), limitID_predicate(_limitID_predicate), limitID_literal(_limitID_literal), limitID_entity(_limitID_entity),temp_result(NULL)
{ {
} }