From 029f8cfdf7f757d91e2303c4f289bd20adb2c2a9 Mon Sep 17 00:00:00 2001 From: hulin12138 Date: Mon, 17 Jul 2017 16:27:41 +0800 Subject: [PATCH] add handler for pre vars --- Database/Database.cpp | 2 +- Database/Join.cpp | 50 ++++++++++++++++++++++++++----------- Database/Join.h | 6 +++-- Database/Strategy.cpp | 6 +++-- Database/Strategy.h | 3 ++- Query/GeneralEvaluation.cpp | 2 +- Query/GeneralEvaluation.h | 6 +++-- 7 files changed, 52 insertions(+), 23 deletions(-) diff --git a/Database/Database.cpp b/Database/Database.cpp index 7a79652..eb14e90 100644 --- a/Database/Database.cpp +++ b/Database/Database.cpp @@ -876,7 +876,7 @@ Database::getPreNum() int Database::query(const string _query, ResultSet& _result_set, FILE* _fp) { - GeneralEvaluation general_evaluation(this->vstree, this->kvstore, this->stringindex, this->pre2num, this->limitID_predicate, this->limitID_literal); + GeneralEvaluation general_evaluation(this->vstree, this->kvstore, this->stringindex, this->pre2num, this->limitID_predicate, this->limitID_literal,this->limitID_entity); long tv_begin = Util::get_cur_time(); diff --git a/Database/Join.cpp b/Database/Join.cpp index 292bc64..475b19f 100644 --- a/Database/Join.cpp +++ b/Database/Join.cpp @@ -16,13 +16,15 @@ Join::Join() this->result_list = NULL; } -Join::Join(KVstore* _kvstore, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal) +Join::Join(KVstore* _kvstore, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal, + TYPE_ENTITY_LITERAL_ID _limitID_entity) { this->kvstore = _kvstore; this->result_list = NULL; this->pre2num = _pre2num; this->limitID_predicate = _limitID_predicate; this->limitID_literal = _limitID_literal; + this->limitID_entity = _limitID_entity; } Join::~Join() @@ -1043,6 +1045,17 @@ Join::toStartJoin() { cout<<"Special Case: star graph whose pres are all var"<limitID_entity; ++i) + { + TYPE_ENTITY_LITERAL_ID id = i; + string literal = this->kvstore->getEntityByID(id); + if(literal == "") + { + continue; + } + //BETTER:cache the whole literal id list to improve the query throughput + literal_candidate_list.addID(id); + } for(TYPE_ENTITY_LITERAL_ID i = 0; i < this->limitID_literal; ++i) { TYPE_ENTITY_LITERAL_ID id = i + Util::LITERAL_FIRST_ID; @@ -1054,17 +1067,17 @@ Join::toStartJoin() //BETTER:cache the whole literal id list to improve the query throughput literal_candidate_list.addID(id); } + IDList& origin_candidate_list = this->basic_query->getCandidateList(var_id); + //int origin_candidate_list_len = origin_candidate_list.size(); + origin_candidate_list.unionList(literal_candidate_list, false); } - IDList& origin_candidate_list = this->basic_query->getCandidateList(var_id); - //int origin_candidate_list_len = origin_candidate_list.size(); - //origin_candidate_list.unionList(literal_candidate_list, true); //int after_add_literal_candidate_list_len = origin_candidate_list.size(); this->basic_query->setReady(var_id); cout<<"the prepared var id: "<addID(id_list[i]); + } + } else { @@ -1239,10 +1260,10 @@ Join::update_answer_list(IDList*& valid_ans_list, IDList& _can_list, unsigned* i //However, the case is really rare in our test(the reason may be that the web graph is always very sparse) //If we add a buffer for this case, will cause worse performance bool -Join::join_two(vector< vector >& _edges, IDList& _can_list, unsigned _can_list_size, int _id, bool _is_literal) +Join::join_two(vector< vector >& _edges, IDList& _can_list, unsigned _can_list_size, int _id, bool _is_ready) { //if(_can_list_size == 0 && !_is_literal) - if(_can_list_size == 0) + if(_can_list_size == 0 && _is_ready) { return false; //empty result } @@ -1370,7 +1391,7 @@ Join::join_two(vector< vector >& _edges, IDList& _can_list, unsigned _can_l //only can occur the first time, means cnt == 0 //if(valid_ans_list.size() == 0) - update_answer_list(valid_ans_list, _can_list, id_list, id_list_len, _is_literal); + update_answer_list(valid_ans_list, _can_list, id_list, id_list_len, _is_ready); delete[] id_list; if (valid_ans_list->size() == 0) { @@ -1395,7 +1416,7 @@ Join::join_two(vector< vector >& _edges, IDList& _can_list, unsigned _can_l unsigned* id_list2; unsigned id_list2_len; this->kvstore->getobjIDlistBysubID(ele, id_list2, id_list2_len, true); - update_answer_list(valid_ans_list, _can_list, id_list2, id_list2_len, _is_literal); + update_answer_list(valid_ans_list, _can_list, id_list2, id_list2_len, _is_ready); delete[] id_list2; if (valid_ans_list->size() == 0) { @@ -1408,7 +1429,7 @@ Join::join_two(vector< vector >& _edges, IDList& _can_list, unsigned _can_l unsigned* id_list2; unsigned id_list2_len; this->kvstore->getsubIDlistByobjID(ele, id_list2, id_list2_len, true); - update_answer_list(valid_ans_list, _can_list, id_list2, id_list2_len, _is_literal); + update_answer_list(valid_ans_list, _can_list, id_list2, id_list2_len, _is_ready); delete[] id_list2; if (valid_ans_list->size() == 0) { @@ -1584,6 +1605,7 @@ Join::multi_join() //NOTICE: not cancle the followings, to be used for later //TODO: if queries contain predicate variables, it may be hard to prepare candidates for a node //(so it is not ready, can also be represented by is_literal_var()) + /* bool is_literal = this->is_literal_var(id2); if(is_literal) { @@ -1598,12 +1620,12 @@ Join::multi_join() cout << "this var not contain literals: " << id2 << endl; #endif } - + */ bool flag = false; #ifdef DEBUG_PRECISE cout << "this edge uses not-prepared-join way" << endl; #endif - flag = this->join_two(edges, can_list, can_list_size, id2, is_literal); + flag = this->join_two(edges, can_list, can_list_size, id2, this->basic_query->isReady(id2)); //if current_table is empty, ends directly if (!flag) diff --git a/Database/Join.h b/Database/Join.h index 415e626..1662cdb 100644 --- a/Database/Join.h +++ b/Database/Join.h @@ -50,6 +50,7 @@ private: TYPE_TRIPLE_NUM* pre2num; TYPE_PREDICATE_ID limitID_predicate; TYPE_ENTITY_LITERAL_ID limitID_literal; + TYPE_ENTITY_LITERAL_ID limitID_entity; //used by score_node for parameters static const unsigned PARAM_DEGREE = 1; static const unsigned PARAM_SIZE = 1000000; @@ -128,7 +129,7 @@ private: //BETTER?:change these params to members in class //void acquire_all_id_lists(IdLists& _id_lists, IdListsLen& _id_lists_len, IDList& _can_list, vector& _edges, int _id, unsigned _can_list_size); void update_answer_list(IDList*& valid_ans_list, IDList& _can_list, unsigned* id_list, unsigned id_list_len, bool _is_literal); - bool join_two(vector< vector >& _edges, IDList& _can_list, unsigned _can_list_size, int _id, bool _is_literal); + bool join_two(vector< vector >& _edges, IDList& _can_list, unsigned _can_list_size, int _id, bool _is_ready); bool multi_join(); //NOTICE:this is only used to join a BasicQuery @@ -136,7 +137,8 @@ private: public: Join(); - Join(KVstore* _kvstore, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal); + Join(KVstore* _kvstore, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal, + TYPE_ENTITY_LITERAL_ID _limitID_entity); //these functions can be called by Database bool join_sparql(SPARQLquery& _sparql_query); bool join_basic(BasicQuery* _basic_query); diff --git a/Database/Strategy.cpp b/Database/Strategy.cpp index f588648..942aec5 100644 --- a/Database/Strategy.cpp +++ b/Database/Strategy.cpp @@ -18,7 +18,7 @@ Strategy::Strategy() //this->prepare_handler(); } -Strategy::Strategy(KVstore* _kvstore, VSTree* _vstree, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal) +Strategy::Strategy(KVstore* _kvstore, VSTree* _vstree, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal,TYPE_ENTITY_LITERAL_ID _limitID_entity) { this->method = 0; this->kvstore = _kvstore; @@ -26,6 +26,8 @@ Strategy::Strategy(KVstore* _kvstore, VSTree* _vstree, TYPE_TRIPLE_NUM* _pre2num this->pre2num = _pre2num; this->limitID_predicate = _limitID_predicate; this->limitID_literal = _limitID_literal; + this->limitID_entity = _limitID_entity; + //this->prepare_handler(); } @@ -207,7 +209,7 @@ Strategy::handler0(BasicQuery* _bq, vector& _result_list, ResultFilte if (_result_filter != NULL) _result_filter->candFilterWithResultHashTable(*_bq); - Join *join = new Join(kvstore, pre2num, this->limitID_predicate, this->limitID_literal); + Join *join = new Join(kvstore, pre2num, this->limitID_predicate, this->limitID_literal,this->limitID_entity); join->join_basic(_bq); delete join; diff --git a/Database/Strategy.h b/Database/Strategy.h index 05e498b..a3f9917 100644 --- a/Database/Strategy.h +++ b/Database/Strategy.h @@ -23,7 +23,7 @@ class Strategy { public: Strategy(); - Strategy(KVstore*, VSTree*, TYPE_TRIPLE_NUM*, TYPE_PREDICATE_ID, TYPE_ENTITY_LITERAL_ID); + Strategy(KVstore*, VSTree*, TYPE_TRIPLE_NUM*, TYPE_PREDICATE_ID, TYPE_ENTITY_LITERAL_ID,TYPE_ENTITY_LITERAL_ID); ~Strategy(); //select efficient strategy to do the sparql query bool handle(SPARQLquery&, ResultFilter* _result_filter = NULL); @@ -35,6 +35,7 @@ private: TYPE_TRIPLE_NUM* pre2num; TYPE_PREDICATE_ID limitID_predicate; TYPE_ENTITY_LITERAL_ID limitID_literal; + TYPE_ENTITY_LITERAL_ID limitID_entity; //NOTICE: even the ID type is int, it is no problem and no waste that we use unsigned in answer //(because -1, -2 or other invalid IDs can not be in answer) diff --git a/Query/GeneralEvaluation.cpp b/Query/GeneralEvaluation.cpp index 841085c..ceb59e5 100644 --- a/Query/GeneralEvaluation.cpp +++ b/Query/GeneralEvaluation.cpp @@ -56,7 +56,7 @@ bool GeneralEvaluation::doQuery() return false; } - this->strategy = Strategy(this->kvstore, this->vstree, this->pre2num, this->limitID_predicate, this->limitID_literal); + this->strategy = Strategy(this->kvstore, this->vstree, this->pre2num, this->limitID_predicate, this->limitID_literal,this->limitID_entity); if (this->query_tree.checkWellDesigned()) { printf("=================\n"); diff --git a/Query/GeneralEvaluation.h b/Query/GeneralEvaluation.h index 41ba0aa..5f086fd 100644 --- a/Query/GeneralEvaluation.h +++ b/Query/GeneralEvaluation.h @@ -39,10 +39,12 @@ class GeneralEvaluation TYPE_TRIPLE_NUM *pre2num; TYPE_PREDICATE_ID limitID_predicate; TYPE_ENTITY_LITERAL_ID limitID_literal; + TYPE_ENTITY_LITERAL_ID limitID_entity; + public: - GeneralEvaluation(VSTree *_vstree, KVstore *_kvstore, StringIndex *_stringindex, TYPE_TRIPLE_NUM *_pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal): - vstree(_vstree), kvstore(_kvstore), stringindex(_stringindex), pre2num(_pre2num), limitID_predicate(_limitID_predicate), limitID_literal(_limitID_literal), temp_result(NULL) + GeneralEvaluation(VSTree *_vstree, KVstore *_kvstore, StringIndex *_stringindex, TYPE_TRIPLE_NUM *_pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal,TYPE_ENTITY_LITERAL_ID _limitID_entity): + vstree(_vstree), kvstore(_kvstore), stringindex(_stringindex), pre2num(_pre2num), limitID_predicate(_limitID_predicate), limitID_literal(_limitID_literal), limitID_entity(_limitID_entity),temp_result(NULL) { }