Merge branch 'hulin' into bookug

merge hulin's code to deal with special pre vars structure in Join
This commit is contained in:
bookug 2017-07-17 21:07:47 +08:00
commit 72526284fd
7 changed files with 52 additions and 23 deletions

View File

@ -1164,7 +1164,7 @@ Database::getPreNum()
int
Database::query(const string _query, ResultSet& _result_set, FILE* _fp)
{
GeneralEvaluation general_evaluation(this->vstree, this->kvstore, this->stringindex, this->pre2num, this->limitID_predicate, this->limitID_literal);
GeneralEvaluation general_evaluation(this->vstree, this->kvstore, this->stringindex, this->pre2num, this->limitID_predicate, this->limitID_literal,this->limitID_entity);
long tv_begin = Util::get_cur_time();

View File

@ -16,13 +16,15 @@ Join::Join()
this->result_list = NULL;
}
Join::Join(KVstore* _kvstore, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal)
Join::Join(KVstore* _kvstore, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal,
TYPE_ENTITY_LITERAL_ID _limitID_entity)
{
this->kvstore = _kvstore;
this->result_list = NULL;
this->pre2num = _pre2num;
this->limitID_predicate = _limitID_predicate;
this->limitID_literal = _limitID_literal;
this->limitID_entity = _limitID_entity;
}
Join::~Join()
@ -1043,6 +1045,17 @@ Join::toStartJoin()
{
cout<<"Special Case: star graph whose pres are all var"<<endl;
//get all literals in this db
for(TYPE_ENTITY_LITERAL_ID i = 0; i < this->limitID_entity; ++i)
{
TYPE_ENTITY_LITERAL_ID id = i;
string literal = this->kvstore->getEntityByID(id);
if(literal == "")
{
continue;
}
//BETTER:cache the whole literal id list to improve the query throughput
literal_candidate_list.addID(id);
}
for(TYPE_ENTITY_LITERAL_ID i = 0; i < this->limitID_literal; ++i)
{
TYPE_ENTITY_LITERAL_ID id = i + Util::LITERAL_FIRST_ID;
@ -1054,17 +1067,17 @@ Join::toStartJoin()
//BETTER:cache the whole literal id list to improve the query throughput
literal_candidate_list.addID(id);
}
IDList& origin_candidate_list = this->basic_query->getCandidateList(var_id);
//int origin_candidate_list_len = origin_candidate_list.size();
origin_candidate_list.unionList(literal_candidate_list, false);
}
IDList& origin_candidate_list = this->basic_query->getCandidateList(var_id);
//int origin_candidate_list_len = origin_candidate_list.size();
//origin_candidate_list.unionList(literal_candidate_list, true);
//int after_add_literal_candidate_list_len = origin_candidate_list.size();
this->basic_query->setReady(var_id);
cout<<"the prepared var id: "<<var_id<<endl;
//cout<<"add literals num: "<<literal_candidate_list.size()<<endl;
cout<<"current can size: "<<origin_candidate_list.size()<<endl;
//cout<<"current can size: "<<origin_candidate_list.size()<<endl;
}
// use the appropriate method to join candidates
@ -1212,11 +1225,19 @@ Join::add_new_to_results(TableIterator it, unsigned id)
//after remove VSTREE, modify here
void
Join::update_answer_list(IDList*& valid_ans_list, IDList& _can_list, unsigned* id_list, unsigned id_list_len, bool _is_literal)
Join::update_answer_list(IDList*& valid_ans_list, IDList& _can_list, unsigned* id_list, unsigned id_list_len, bool _is_ready)
{
if (valid_ans_list == NULL)
{
valid_ans_list = IDList::intersect(_can_list, id_list, id_list_len);
if(_is_ready)
valid_ans_list = IDList::intersect(_can_list, id_list, id_list_len);
else
{
valid_ans_list = new IDList();
for(int i = 0; i < id_list_len; i++)
valid_ans_list->addID(id_list[i]);
}
}
else
{
@ -1239,10 +1260,10 @@ Join::update_answer_list(IDList*& valid_ans_list, IDList& _can_list, unsigned* i
//However, the case is really rare in our test(the reason may be that the web graph is always very sparse)
//If we add a buffer for this case, will cause worse performance
bool
Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, unsigned _can_list_size, int _id, bool _is_literal)
Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, unsigned _can_list_size, int _id, bool _is_ready)
{
//if(_can_list_size == 0 && !_is_literal)
if(_can_list_size == 0)
if(_can_list_size == 0 && _is_ready)
{
return false; //empty result
}
@ -1370,7 +1391,7 @@ Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, unsigned _can_l
//only can occur the first time, means cnt == 0
//if(valid_ans_list.size() == 0)
update_answer_list(valid_ans_list, _can_list, id_list, id_list_len, _is_literal);
update_answer_list(valid_ans_list, _can_list, id_list, id_list_len, _is_ready);
delete[] id_list;
if (valid_ans_list->size() == 0)
{
@ -1395,7 +1416,7 @@ Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, unsigned _can_l
unsigned* id_list2;
unsigned id_list2_len;
this->kvstore->getobjIDlistBysubID(ele, id_list2, id_list2_len, true);
update_answer_list(valid_ans_list, _can_list, id_list2, id_list2_len, _is_literal);
update_answer_list(valid_ans_list, _can_list, id_list2, id_list2_len, _is_ready);
delete[] id_list2;
if (valid_ans_list->size() == 0)
{
@ -1408,7 +1429,7 @@ Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, unsigned _can_l
unsigned* id_list2;
unsigned id_list2_len;
this->kvstore->getsubIDlistByobjID(ele, id_list2, id_list2_len, true);
update_answer_list(valid_ans_list, _can_list, id_list2, id_list2_len, _is_literal);
update_answer_list(valid_ans_list, _can_list, id_list2, id_list2_len, _is_ready);
delete[] id_list2;
if (valid_ans_list->size() == 0)
{
@ -1584,6 +1605,7 @@ Join::multi_join()
//NOTICE: not cancle the followings, to be used for later
//TODO: if queries contain predicate variables, it may be hard to prepare candidates for a node
//(so it is not ready, can also be represented by is_literal_var())
/*
bool is_literal = this->is_literal_var(id2);
if(is_literal)
{
@ -1598,12 +1620,12 @@ Join::multi_join()
cout << "this var not contain literals: " << id2 << endl;
#endif
}
*/
bool flag = false;
#ifdef DEBUG_PRECISE
cout << "this edge uses not-prepared-join way" << endl;
#endif
flag = this->join_two(edges, can_list, can_list_size, id2, is_literal);
flag = this->join_two(edges, can_list, can_list_size, id2, this->basic_query->isReady(id2));
//if current_table is empty, ends directly
if (!flag)

View File

@ -50,6 +50,7 @@ private:
TYPE_TRIPLE_NUM* pre2num;
TYPE_PREDICATE_ID limitID_predicate;
TYPE_ENTITY_LITERAL_ID limitID_literal;
TYPE_ENTITY_LITERAL_ID limitID_entity;
//used by score_node for parameters
static const unsigned PARAM_DEGREE = 1;
static const unsigned PARAM_SIZE = 1000000;
@ -128,7 +129,7 @@ private:
//BETTER?:change these params to members in class
//void acquire_all_id_lists(IdLists& _id_lists, IdListsLen& _id_lists_len, IDList& _can_list, vector<int>& _edges, int _id, unsigned _can_list_size);
void update_answer_list(IDList*& valid_ans_list, IDList& _can_list, unsigned* id_list, unsigned id_list_len, bool _is_literal);
bool join_two(vector< vector<int> >& _edges, IDList& _can_list, unsigned _can_list_size, int _id, bool _is_literal);
bool join_two(vector< vector<int> >& _edges, IDList& _can_list, unsigned _can_list_size, int _id, bool _is_ready);
bool multi_join();
//NOTICE:this is only used to join a BasicQuery
@ -136,7 +137,8 @@ private:
public:
Join();
Join(KVstore* _kvstore, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal);
Join(KVstore* _kvstore, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal,
TYPE_ENTITY_LITERAL_ID _limitID_entity);
//these functions can be called by Database
bool join_sparql(SPARQLquery& _sparql_query);
bool join_basic(BasicQuery* _basic_query);

View File

@ -18,7 +18,7 @@ Strategy::Strategy()
//this->prepare_handler();
}
Strategy::Strategy(KVstore* _kvstore, VSTree* _vstree, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal)
Strategy::Strategy(KVstore* _kvstore, VSTree* _vstree, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal,TYPE_ENTITY_LITERAL_ID _limitID_entity)
{
this->method = 0;
this->kvstore = _kvstore;
@ -26,6 +26,8 @@ Strategy::Strategy(KVstore* _kvstore, VSTree* _vstree, TYPE_TRIPLE_NUM* _pre2num
this->pre2num = _pre2num;
this->limitID_predicate = _limitID_predicate;
this->limitID_literal = _limitID_literal;
this->limitID_entity = _limitID_entity;
//this->prepare_handler();
}
@ -207,7 +209,7 @@ Strategy::handler0(BasicQuery* _bq, vector<unsigned*>& _result_list, ResultFilte
if (_result_filter != NULL)
_result_filter->candFilterWithResultHashTable(*_bq);
Join *join = new Join(kvstore, pre2num, this->limitID_predicate, this->limitID_literal);
Join *join = new Join(kvstore, pre2num, this->limitID_predicate, this->limitID_literal,this->limitID_entity);
join->join_basic(_bq);
delete join;

View File

@ -23,7 +23,7 @@ class Strategy
{
public:
Strategy();
Strategy(KVstore*, VSTree*, TYPE_TRIPLE_NUM*, TYPE_PREDICATE_ID, TYPE_ENTITY_LITERAL_ID);
Strategy(KVstore*, VSTree*, TYPE_TRIPLE_NUM*, TYPE_PREDICATE_ID, TYPE_ENTITY_LITERAL_ID,TYPE_ENTITY_LITERAL_ID);
~Strategy();
//select efficient strategy to do the sparql query
bool handle(SPARQLquery&, ResultFilter* _result_filter = NULL);
@ -35,6 +35,7 @@ private:
TYPE_TRIPLE_NUM* pre2num;
TYPE_PREDICATE_ID limitID_predicate;
TYPE_ENTITY_LITERAL_ID limitID_literal;
TYPE_ENTITY_LITERAL_ID limitID_entity;
//NOTICE: even the ID type is int, it is no problem and no waste that we use unsigned in answer
//(because -1, -2 or other invalid IDs can not be in answer)

View File

@ -56,7 +56,7 @@ bool GeneralEvaluation::doQuery()
return false;
}
this->strategy = Strategy(this->kvstore, this->vstree, this->pre2num, this->limitID_predicate, this->limitID_literal);
this->strategy = Strategy(this->kvstore, this->vstree, this->pre2num, this->limitID_predicate, this->limitID_literal,this->limitID_entity);
if (this->query_tree.checkWellDesigned())
{
printf("=================\n");

View File

@ -39,10 +39,12 @@ class GeneralEvaluation
TYPE_TRIPLE_NUM *pre2num;
TYPE_PREDICATE_ID limitID_predicate;
TYPE_ENTITY_LITERAL_ID limitID_literal;
TYPE_ENTITY_LITERAL_ID limitID_entity;
public:
GeneralEvaluation(VSTree *_vstree, KVstore *_kvstore, StringIndex *_stringindex, TYPE_TRIPLE_NUM *_pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal):
vstree(_vstree), kvstore(_kvstore), stringindex(_stringindex), pre2num(_pre2num), limitID_predicate(_limitID_predicate), limitID_literal(_limitID_literal), temp_result(NULL)
GeneralEvaluation(VSTree *_vstree, KVstore *_kvstore, StringIndex *_stringindex, TYPE_TRIPLE_NUM *_pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal,TYPE_ENTITY_LITERAL_ID _limitID_entity):
vstree(_vstree), kvstore(_kvstore), stringindex(_stringindex), pre2num(_pre2num), limitID_predicate(_limitID_predicate), limitID_literal(_limitID_literal), limitID_entity(_limitID_entity),temp_result(NULL)
{
}