fix bugs in Database::join() and Database::only_pre_filter_after_join().

bugs was found by jialonghan@RUC.

author: hanshuo
This commit is contained in:
zengli 2015-04-15 15:38:51 +08:00
parent 257e90ffa6
commit 57a89e57e4
3 changed files with 66 additions and 63 deletions

View File

@ -1630,18 +1630,21 @@ bool Database::join
} }
else else
/* pre_id == -1 means we cannot find such predicate in rdf file, so the result set of this sparql should be empty.
* note that we cannot support to query sparqls with predicate variables ?p.
*/
{ {
if (_edge_type == BasicQuery::EDGE_IN) id_list_len = 0;
{ // if (_edge_type == BasicQuery::EDGE_IN)
// std::cout << "\t\to2s" << std::endl; // {
kvstore->getsubIDlistByobjID(itr_result[_var_id], // kvstore->getsubIDlistByobjID(itr_result[_var_id],
id_list, id_list_len); // id_list, id_list_len);
} // }
else // else
{ // {
kvstore->getobjIDlistBysubID(itr_result[_var_id], // kvstore->getobjIDlistBysubID(itr_result[_var_id],
id_list, id_list_len); // id_list, id_list_len);
} // }
} }
if (id_list_len == 0) if (id_list_len == 0)
@ -1779,17 +1782,21 @@ bool Database::select(vector<int*>& _result_list,int _var_id,int _pre_id,int _va
} }
} }
else else
/* pre_id == -1 means we cannot find such predicate in rdf file, so the result set of this sparql should be empty.
* note that we cannot support to query sparqls with predicate variables ?p.
*/
{ {
if (_edge_type == BasicQuery::EDGE_IN) id_list_len = 0;
{ // if (_edge_type == BasicQuery::EDGE_IN)
kvstore->getsubIDlistByobjID(itr_result[_var_id], // {
id_list, id_list_len); // kvstore->getsubIDlistByobjID(itr_result[_var_id],
} // id_list, id_list_len);
else // }
{ // else
kvstore->getobjIDlistBysubID(itr_result[_var_id], // {
id_list, id_list_len); // kvstore->getobjIDlistBysubID(itr_result[_var_id],
} // id_list, id_list_len);
// }
} }
if (id_list_len == 0) if (id_list_len == 0)
@ -1849,6 +1856,10 @@ bool Database::join(SPARQLquery& _sparql_query)
this->only_pre_filter_after_join(basic_query); this->only_pre_filter_after_join(basic_query);
long after_pre_filter_after_join = util::get_cur_time(); long after_pre_filter_after_join = util::get_cur_time();
cout << "after only_pre_filter_after_join : used " << (after_pre_filter_after_join-after_joinbasic) << " ms" << endl; cout << "after only_pre_filter_after_join : used " << (after_pre_filter_after_join-after_joinbasic) << " ms" << endl;
// remove invalid and duplicate result at the end.
basic_query->dupRemoval_invalidRemoval();
std::cout << "Final result:" << (basic_query->getResultList()).size() << std::endl;
} }
return true; return true;
} }
@ -1946,12 +1957,6 @@ bool Database::join_basic(BasicQuery* basic_query)
} }
} }
basic_query->dupRemoval_invalidRemoval();
vector<int*> &result = basic_query->getResultList();
int result_size = result.size();
std::cout << "\t\tFinal result:" << result_size << std::endl;
cout << "OOOOOUT join basic" << endl; cout << "OOOOOUT join basic" << endl;
return true; return true;
} }
@ -2050,13 +2055,13 @@ void Database::literal_edge_filter(BasicQuery* basic_query, int _var_i)
// std::cout << "\t\tedge[" << j << "] "<< lit_string << " has id " << lit_id << ""; // std::cout << "\t\tedge[" << j << "] "<< lit_string << " has id " << lit_id << "";
// std::cout << " preid:" << pre_id << " type:" << edge_type // std::cout << " preid:" << pre_id << " type:" << edge_type
// << std::endl; // << std::endl;
{ // {
// stringstream _ss; // stringstream _ss;
// _ss << "\t\tedge[" << j << "] "<< lit_string << " has id " << lit_id << ""; // _ss << "\t\tedge[" << j << "] "<< lit_string << " has id " << lit_id << "";
// _ss << " preid:" << pre_id << " type:" << edge_type // _ss << " preid:" << pre_id << " type:" << edge_type
// << std::endl; // << std::endl;
// Database::log(_ss.str()); // Database::log(_ss.str());
} // }
int id_list_len = 0; int id_list_len = 0;
int* id_list = NULL; int* id_list = NULL;
@ -2088,16 +2093,16 @@ void Database::literal_edge_filter(BasicQuery* basic_query, int _var_i)
} }
//debug //debug
// { // {
// stringstream _ss; // stringstream _ss;
// _ss << "id_list: "; // _ss << "id_list: ";
// for (int i=0;i<id_list_len;i++) // for (int i=0;i<id_list_len;i++)
// { // {
// _ss << "[" << id_list[i] << "]\t"; // _ss << "[" << id_list[i] << "]\t";
// } // }
// _ss<<endl; // _ss<<endl;
// Database::log(_ss.str()); // Database::log(_ss.str());
// } // }
if(id_list_len == 0) if(id_list_len == 0)
{ {
@ -2286,7 +2291,7 @@ void Database::only_pre_filter_after_join(BasicQuery* basic_query)
for (vector<int>::iterator itr_pre = in_edge_pre_id.begin(); itr_pre != in_edge_pre_id.end(); itr_pre++) for (vector<int>::iterator itr_pre = in_edge_pre_id.begin(); itr_pre != in_edge_pre_id.end(); itr_pre++)
{ {
int pre_id = (*itr_pre); int pre_id = (*itr_pre);
bool exist_preid = util::bsearch_preid_uporder(pre_id, pair_list, pair_len); exist_preid = util::bsearch_preid_uporder(pre_id, pair_list, pair_len);
if (!exist_preid) if (!exist_preid)
{ {
break; break;
@ -2300,12 +2305,13 @@ void Database::only_pre_filter_after_join(BasicQuery* basic_query)
for (vector<int>::iterator itr_pre = out_edge_pre_id.begin(); itr_pre != out_edge_pre_id.end(); itr_pre++) for (vector<int>::iterator itr_pre = out_edge_pre_id.begin(); itr_pre != out_edge_pre_id.end(); itr_pre++)
{ {
int pre_id = (*itr_pre); int pre_id = (*itr_pre);
bool exist_preid = util::bsearch_preid_uporder(pre_id, pair_list, pair_len); exist_preid = util::bsearch_preid_uporder(pre_id, pair_list, pair_len);
if (!exist_preid) if (!exist_preid)
{ {
break; break;
} }
} }
} }
delete []pair_list; delete []pair_list;

View File

@ -462,7 +462,6 @@ void BasicQuery::initial()
void BasicQuery::addInVarNotInSelect() void BasicQuery::addInVarNotInSelect()
{ {
/* all vars in this set is met before at least once */ /* all vars in this set is met before at least once */
int _v_n_i_s_next_id = this->var_str2id.size() + 0; int _v_n_i_s_next_id = this->var_str2id.size() + 0;
for(int i = 0; i < this->triple_vt.size(); i ++) for(int i = 0; i < this->triple_vt.size(); i ++)
{ {
@ -671,6 +670,9 @@ int BasicQuery::cmp_result(const void* _a, const void* _b)
return 0; return 0;
} }
/*
* I think this function is inefficient and inferior, we should re-write it later. by hanshuo
*/
bool BasicQuery::dupRemoval_invalidRemoval() bool BasicQuery::dupRemoval_invalidRemoval()
{ {
int result_size = this->result_list.size(); int result_size = this->result_list.size();

View File

@ -113,20 +113,15 @@ void VSTree::retrieve(SPARQLquery& _query)
this->retrieveEntity(entityBitSet, idListPtr); this->retrieveEntity(entityBitSet, idListPtr);
//debug //debug
{
std::stringstream _ss;
_ss << "candidate num: " << idListPtr->size() << endl;
// if (i == 0)
// { // {
// for (int j=0;j<idListPtr->size();j++) // std::stringstream _ss;
// _ss << idListPtr->getID(j) << " "; // _ss << "candidate num: " << idListPtr->size() << endl;
// _ss << endl;
// _ss << "isExist 473738: " << (idListPtr->isExistID(473738)?"true":"false") <<endl;
// _ss << "isExist 473472: " << (idListPtr->isExistID(473472)?"true":"false") <<endl;
// _ss << "isExist 473473: " << (idListPtr->isExistID(473473)?"true":"false") <<endl;
// Database::log(_ss.str());
// } // }
_ss << endl;
_ss << "isExist 473738: " << (idListPtr->isExistID(473738)?"true":"false") <<endl;
_ss << "isExist 473472: " << (idListPtr->isExistID(473472)?"true":"false") <<endl;
_ss << "isExist 473473: " << (idListPtr->isExistID(473473)?"true":"false") <<endl;
Database::log(_ss.str());
}
} }
} }