/* * Database.cpp * * Created on: 2014-6-20 * Author: liyouhuan */ #include "Database.h" Database::Database(std::string _name){ this->name = _name; std::string store_path = this->name; this->signature_binary_file = "signature.binary"; this->six_tuples_file = "six_tuples"; std::string kv_store_path = store_path + "/kv_store"; this->kvstore = new KVstore(kv_store_path); std::string vstree_store_path = store_path + "/vs_store"; this->vstree = new VSTree(vstree_store_path); this->encode_mode = Database::STRING_MODE; this->sub_num = 0; this->pre_num = 0; this->literal_num = 0; if(Database::fp_debug == NULL) { fp_debug = fopen("db.log", "w"); if(fp_debug == NULL){ cerr << "debug_log open failed" << endl; } } } Database::~Database() { delete this->vstree; delete this->kvstore; fclose(fp_debug); } bool Database::load() { bool flag = (this->vstree)->loadTree(); if (!flag) { return false; } (this->kvstore)->open(); cout << "finish load" << endl; return true; } bool Database::unload() { (this->kvstore)->release(); delete this->vstree; return true; } string Database::getName() { return this->name; } bool Database::query(const string _query, ResultSet& _result_set) { long tv_begin = util::get_cur_time(); DBparser _parser; SPARQLquery _sparql_q(_query); _parser.sparqlParser(_query, _sparql_q); long tv_parse = util::get_cur_time(); cout << "after Parsing, used " << (tv_parse - tv_begin) << endl; cout << "after Parsing..." << endl << _sparql_q.triple_str() << endl; _sparql_q.encodeQuery(this->kvstore); cout << "sparqlSTR:\t" << _sparql_q.to_str() << endl; long tv_encode = util::get_cur_time(); cout << "after Encode, used " << (tv_encode - tv_parse) << "ms." << endl; _result_set.select_var_num = _sparql_q.getQueryVarNum(); (this->vstree)->retrieve(_sparql_q); long tv_retrieve = util::get_cur_time(); cout << "after Retrieve, used " << (tv_retrieve - tv_encode) << "ms." << endl; this->join(_sparql_q); long tv_join = util::get_cur_time(); cout << "after Join, used " << (tv_join - tv_retrieve) << "ms." << endl; this->getFinalResult(_sparql_q, _result_set); long tv_final = util::get_cur_time(); cout << "after finalResult, used " << (tv_final - tv_join) << "ms." << endl; cout << "Total time used: " << (tv_final - tv_begin) << "ms." << endl; //testing... cout << "final result is : " << endl; cout << _result_set.to_str() << endl; return true; } bool Database::insert(const Triple& _triple) { int _sub_id = (this->kvstore)->getIDByEntity(_triple.subject); bool _is_new_sub = false; /* if sub does not exist */ if(_sub_id == -1){ _is_new_sub = true; int _new_subid = this->sub_num; this->sub_num ++; _sub_id = _new_subid; (this->kvstore)->setIDByEntity(_triple.subject, _sub_id); (this->kvstore)->setEntityByID(_sub_id, _triple.subject); } int _pre_id = (this->kvstore)->getIDByPredicate(_triple.predicate); bool _is_new_pre = false; if(_pre_id == -1){ _is_new_pre = true; int _new_pre_id = this->pre_num; this->pre_num ++; _pre_id = _new_pre_id; (this->kvstore)->setIDByPredicate(_triple.predicate, _pre_id); (this->kvstore)->setPredicateByID(_pre_id, _triple.predicate); } /* object is either entity or literal */ int _obj_id = (this->kvstore)->getIDByEntity(_triple.object); if(_obj_id == -1) { _obj_id = (this->kvstore)->getIDByLiteral(_triple.object); } bool _is_new_obj = false; if(_obj_id == -1) { _is_new_obj = true; int _new_literal_id = Database::LITERAL_FIRST_ID + this->literal_num; this->literal_num ++; _obj_id = _new_literal_id; (this->kvstore)->setIDByLiteral(_triple.object, _obj_id); (this->kvstore)->setLiteralByID(_obj_id, _triple.object); } int _entity_id = _sub_id; /* if this is not a new triple, return directly */ bool _triple_exist = false; if(!_is_new_sub && !_is_new_pre && !_is_new_obj ) { _triple_exist = this->exist_triple(_sub_id, _pre_id, _obj_id); } if(_triple_exist) { return false; } /* update sp2o op2s s2po o2ps s2o o2s */ (this->kvstore)->updateTupleslist_insert(_sub_id, _pre_id, _obj_id); EntityBitSet _entity_bitset; _entity_bitset.reset(); this->encodeTriple2EntityBitSet(_entity_bitset, &_triple); /* if new entity then insert it, else update it */ if(_is_new_sub) { SigEntry _sig(_sub_id, _entity_bitset); (this->vstree)->insertEntry(_sig); } else { (this->vstree)->updateEntry(_sub_id, _entity_bitset); } return true; } bool Database::remove(const Triple& _triple) { int _sub_id = (this->kvstore)->getIDByEntity(_triple.subject); int _pre_id = (this->kvstore)->getIDByPredicate(_triple.predicate); int _obj_id = (this->kvstore)->getIDByEntity(_triple.object); if(_obj_id == -1){ _obj_id = (this->kvstore)->getIDByLiteral(_triple.object); } if(_sub_id == -1 || _pre_id == -1 || _obj_id == -1) { return false; } bool _exist_triple = this->exist_triple(_sub_id, _pre_id, _obj_id); if(! _exist_triple) { return false; } /* remove from sp2o op2s s2po o2ps s2o o2s * sub2id, pre2id and obj2id will not be updated */ (this->kvstore)->updateTupleslist_remove(_sub_id, _pre_id, _obj_id); int _sub_degree = (this->kvstore)->getEntityDegree(_sub_id); /* if subject become an isolated point, remove its corresponding entry */ if(_sub_degree == 0) { (this->vstree)->removeEntry(_sub_id); } /* else re-calculate the signature of subject & replace that in vstree */ else { EntityBitSet _entity_bitset; _entity_bitset.reset(); this->calculateEntityBitSet(_sub_id, _entity_bitset); (this->vstree)->replaceEntry(_sub_id, _entity_bitset); } return true; } bool Database::build(const string& _rdf_file) { std::string store_path = this->name; util::create_dir(store_path); std::string kv_store_path = store_path + "/kv_store"; util::create_dir(kv_store_path); std::string vstree_store_path = store_path + "/vs_store"; util::create_dir(vstree_store_path); cout << "begin encode RDF from : " << _rdf_file << " ..." << endl; this->encodeRDF(_rdf_file); cout << "finish encode." << endl; std::string _entry_file = this->getSignatureBFile(); (this->kvstore)->open(); cout << "begin build VS-Tree on " << _rdf_file << "..." << endl; (this->vstree)->buildTree(_entry_file); cout << "finish build VS-Tree." << endl; return true; } /* root Path of this DB + sixTuplesFile */ string Database::getSixTuplesFile() { return this->getStorePath() + "/" + this->six_tuples_file; } /* root Path of this DB + signatureBFile */ string Database::getSignatureBFile() { return this->getStorePath() + "/" + this->signature_binary_file; } /* * private methods: */ string Database::getStorePath() { return this->name; } /* encode relative signature data of the query graph */ void Database::buildSparqlSignature(SPARQLquery & _sparql_q) { std::vector& _query_union = _sparql_q.getBasicQueryVec(); for(unsigned int i_bq = 0; i_bq < _query_union.size(); i_bq ++) { BasicQuery* _basic_q = _query_union[i_bq]; _basic_q->encodeBasicQuery(this->kvstore, _sparql_q.getQueryVar()); } } bool Database::calculateEntityBitSet(int _sub_id, EntityBitSet & _bitset) { int* _polist = NULL; int _list_len = 0; (this->kvstore)->getpreIDobjIDlistBysubID(_sub_id, _polist, _list_len); Triple _triple; _triple.subject = (this->kvstore)->getEntityByID(_sub_id); for(int i = 0; i < _list_len; i += 2) { int _pre_id = _polist[i]; int _obj_id = _polist[i+1]; _triple.object = (this->kvstore)->getEntityByID(_obj_id); if(_triple.object == "") { _triple.object = (this->kvstore)->getLiteralByID(_obj_id); } _triple.predicate = (this->kvstore)->getPredicateByID(_pre_id); this->encodeTriple2EntityBitSet(_bitset, &_triple); } return true; } /* encode Triple into SigEntry */ bool Database::encodeTriple2EntityBitSet(EntityBitSet& _bitset, const Triple* _p_triple) { int _pre_id = -1; { _pre_id = (this->kvstore)->getIDByPredicate(_p_triple->predicate); /* checking whether _pre_id is -1 or not will be more reliable */ } Signature::encodePredicate2Entity(_pre_id, _bitset, BasicQuery::EDGE_OUT); if(this->encode_mode == Database::ID_MODE) { /* TBD */ } else if(this->encode_mode == Database::STRING_MODE) { Signature::encodeStr2Entity( (_p_triple->subject).c_str(), _bitset); Signature::encodeStr2Entity( (_p_triple->object ).c_str(), _bitset); } return true; } /* check whether the relative 3-tuples exist * usually, through sp2olist */ bool Database::exist_triple(int _sub_id, int _pre_id, int _obj_id) { int* _objidlist = NULL; int _list_len = 0; (this->kvstore)->getobjIDlistBysubIDpreID(_sub_id, _pre_id, _objidlist, _list_len); for(int i = 0; i < _list_len; i ++) { if(_objidlist[i] == _obj_id) { delete[] _objidlist; return true; } } return false; } /* * _rdf_file denotes the path of the RDF file, where stores the rdf data * there are many step will be finished in this function: * 1. assign tuples of RDF data with id, and store the map into KVstore * 2. build signature of each entity * * multi-thread implementation may save lots of time */ bool Database::encodeRDF(const string _rdf_file) { Database::log("In encodeRDF"); int ** _p_id_tuples = NULL; int _id_tuples_max = 0; /* map sub2id and pre2id, storing in kvstore */ this->sub2id_pre2id(_rdf_file, _p_id_tuples, _id_tuples_max); /* map literal2id, and encode RDF data into signature in the meantime */ this->literal2id_RDFintoSignature(_rdf_file, _p_id_tuples, _id_tuples_max); /* map subid 2 objid_list & * subIDpreID 2 objid_list & * subID 2 _list */ this->s2o_sp2o_s2po(_p_id_tuples, _id_tuples_max); /* map objid 2 subid_list & * objIDpreID 2 subid_list & * objID 2 _list */ this->o2s_op2s_o2ps(_p_id_tuples, _id_tuples_max); Database::log("finish encodeRDF"); return true; } /* * only after we determine the entityID(subid), * we can determine the literalID(objid) */ bool Database::sub2id_pre2id(const string _rdf_file, int**& _p_id_tuples, int & _id_tuples_max) { int _id_tuples_size;; {/* initial */ _id_tuples_max = 10*1000*1000; _p_id_tuples = new int*[_id_tuples_max]; _id_tuples_size = 0; this->sub_num = 0; this->pre_num = 0; this->triples_num = 0; (this->kvstore)->open_entity2id(KVstore::CREATE_MODE); (this->kvstore)->open_id2entity(KVstore::CREATE_MODE); (this->kvstore)->open_predicate2id(KVstore::CREATE_MODE); (this->kvstore)->open_id2predicate(KVstore::CREATE_MODE); } Database::log("finish initial sub2id_pre2id"); {/* map sub2id and pre2id */ ifstream _fin(_rdf_file.c_str()); if(!_fin){ cerr << "sub2id&pre2id: Fail to open : " << _rdf_file << endl; exit(0); } Triple* triple_array = new Triple[DBparser::TRIPLE_NUM_PER_GROUP]; DBparser _parser; /* In while(true): For sub2id and pre2id. * parsed all RDF triples one group by one group * when parsed out an group RDF triples * for each triple * assign subject with subid, and predicate with preid * when get all sub2id, * we can assign object with objid in next while(true) * so that we can differentiate subject and object by their id * */ Database::log("==> while(true)"); while(true) { int parse_triple_num = 0; _parser.rdfParser(_fin, triple_array, parse_triple_num); { stringstream _ss; _ss << "finish rdfparser" << this->triples_num << endl; Database::log(_ss.str()); cout << _ss.str() << endl; } if(parse_triple_num == 0){ break; } /* Process the Triple one by one */ for(int i = 0; i < parse_triple_num; i ++) { this->triples_num ++; /* if the _id_tuples exceeds, double the space */ if(_id_tuples_size == _id_tuples_max){ int _new_tuples_len = _id_tuples_max * 2; int** _new_id_tuples = new int*[_new_tuples_len]; memcpy(_new_id_tuples, _p_id_tuples, sizeof(int*) * _id_tuples_max); delete[] _p_id_tuples; _p_id_tuples = _new_id_tuples; _id_tuples_max = _new_tuples_len; } /* * For subject * (all subject is entity, some object is entity, the other is literal) * */ std::string _sub = triple_array[i].subject; int _sub_id = (this->kvstore)->getIDByEntity(_sub); if(_sub_id == -1){ _sub_id = this->sub_num; (this->kvstore)->setIDByEntity(_sub, _sub_id); (this->kvstore)->setEntityByID(_sub_id, _sub); this->sub_num ++; } /* * For predicate * */ std::string _pre = triple_array[i].predicate; int _pre_id = (this->kvstore)->getIDByPredicate(_pre); if(_pre_id == -1){ _pre_id = this->pre_num; (this->kvstore)->setIDByPredicate(_pre, _pre_id); (this->kvstore)->setPredicateByID(_pre_id, _pre); this->pre_num ++; } //debug // { // stringstream _ss; // _ss << "sub: " << _sub << "\tpre: " << _pre <entity_num = this->sub_num; (this->kvstore)->release(); } { std::stringstream _ss; _ss << "finish sub2id pre2id" << endl; _ss << "tripleNum is " << this->triples_num << endl; _ss << "subNum is " << this->sub_num << endl; _ss << "preNum is " << this->pre_num << endl; Database::log(_ss.str()); cout << _ss.str() << endl; } return true; } /* map literal2id and encode RDF data into signature in the meantime * literal id begin with Database::LITERAL_FIRST_ID */ bool Database::literal2id_RDFintoSignature(const string _rdf_file, int** _p_id_tuples, int _id_tuples_max) { Database::log("IN literal2id..."); EntityBitSet* _entity_bitset = new EntityBitSet[this->sub_num]; for(int i = 0; i < this->sub_num; i ++){ _entity_bitset[i].reset(); } (this->kvstore)->open_id2literal(KVstore::CREATE_MODE); (this->kvstore)->open_literal2id(KVstore::CREATE_MODE); (this->kvstore)->open_entity2id(KVstore::READ_WRITE_MODE); /* map obj2id */ ifstream _fin(_rdf_file.c_str()); if(!_fin){ cerr << "obj2id: Fail to open : " << _rdf_file << endl; exit(0); } std::string _six_tuples_file = this->getSixTuplesFile(); std::ofstream _six_tuples_fout(_six_tuples_file.c_str()); if(! _six_tuples_fout){ cerr << "obj2id: failed to open: " << _six_tuples_file << endl; exit(0); } Triple* triple_array = new Triple[DBparser::TRIPLE_NUM_PER_GROUP]; DBparser _parser; this->entity_num = this->sub_num; int _i_tuples = 0; EntityBitSet _tmp_bitset; /* In while(true): For obj2id . * parsed all RDF triples one group by one group * when parsed out an group RDF triples * for each triple * assign object with objid * */ Database::log("literal2id: while(true)"); while(true) { /* get next group of triples from rdfParser */ int parse_triple_num = 0; _parser.rdfParser(_fin, triple_array, parse_triple_num); { stringstream _ss; _ss << "finish rdfparser" << _i_tuples << endl; Database::log(_ss.str()); cout << _ss.str() << endl; } if(parse_triple_num == 0){ break; } /* Process the Triple one by one */ for(int i = 0; i < parse_triple_num; i ++) { /* * For object(literal) * */ std::string _obj = triple_array[i].object; /* check whether obj is an entity or not * if not, obj is a literal and assign it with a literal id */ int _obj_id = (this->kvstore)->getIDByEntity(_obj); if(Database::debug_2) { } /* if obj is an literal */ if(_obj_id == -1) { int _literal_id = (this->kvstore)->getIDByLiteral(_obj); /* if this literal does not exist before */ if(_literal_id == -1) { int _new_literal_id = Database::LITERAL_FIRST_ID + (this->literal_num); (this->kvstore)->setIDByLiteral(_obj, _new_literal_id); (this->kvstore)->setLiteralByID(_new_literal_id, _obj); this->literal_num ++; _obj_id = _new_literal_id; } else { _obj_id = _literal_id; } } // { // stringstream _ss; // _ss << "object: " << _obj << " has id " << _obj_id << endl; // Database::log(_ss.str()); // } _p_id_tuples[_i_tuples][2] = _obj_id; /* * save six tuples * */ { _six_tuples_fout << _p_id_tuples[_i_tuples][0] << '\t' << _p_id_tuples[_i_tuples][1] << '\t' << _p_id_tuples[_i_tuples][2] << '\t' << triple_array[i].subject << '\t' << triple_array[i].predicate << '\t' << triple_array[i].object << endl; } /* * calculate entity signature */ int _sub_id = _p_id_tuples[_i_tuples][0]; int _pre_id = _p_id_tuples[_i_tuples][1]; _tmp_bitset.reset(); Signature::encodePredicate2Entity(_pre_id, _tmp_bitset, BasicQuery::EDGE_OUT); Signature::encodeStr2Entity((triple_array[i].object).c_str(), _tmp_bitset); _entity_bitset[_sub_id] |= _tmp_bitset; if(this->objIDIsEntityID(_obj_id)) { _tmp_bitset.reset(); Signature::encodePredicate2Entity(_pre_id, _tmp_bitset, BasicQuery::EDGE_IN); Signature::encodeStr2Entity((triple_array[i].subject).c_str(), _tmp_bitset); _entity_bitset[_obj_id] |= _tmp_bitset; } _i_tuples ++; } }/* end for while(true) */ //debug { string debug_entity_name = ""; int debug_entity_id = (this->kvstore)->getIDByEntity(debug_entity_name); stringstream _ss; _ss << "display [" << debug_entity_name << ":" << debug_entity_id << "] bitset:" <kvstore)->release(); {/* save all entity_signature into binary file */ string _sig_binary_file = this->getSignatureBFile(); FILE* _sig_fp = fopen(_sig_binary_file.c_str(), "wb"); if(_sig_fp == NULL){ cerr << "Failed to open : " << _sig_binary_file << endl; } EntityBitSet _all_bitset; for(int i = 0; i < this->sub_num; i ++) { SigEntry* _sig = new SigEntry(EntitySig(_entity_bitset[i]), i); //debug // if(i == 0 || i == 2) // { // stringstream _ss; // _ss << "encodeRDF: " << i << " =" << _sig->getEntitySig().entityBitSet << endl; // Database::log(_ss.str()); // } fwrite(_sig, sizeof(SigEntry), 1, _sig_fp); _all_bitset |= _entity_bitset[i]; delete _sig; } fclose(_sig_fp); //debug // { // int sub_id = (this->kvstore)->getIDByEntity(""); // stringstream _ss; // _ss << "sub: " << "(" << sub_id << ")" << endl; // _ss << "bitset: " << Signature::BitSet2str(_entity_bitset[sub_id]) << endl; // Database::log(_ss.str()); // } delete[] _entity_bitset; } Database::log("OUT literal2id..."); return true; } /* map subid 2 objid_list & * subIDpreID 2 objid_list & * subID 2 _list */ bool Database::s2o_sp2o_s2po(int** _p_id_tuples, int _id_tuples_max) { qsort(_p_id_tuples, this->triples_num, sizeof(int*), Database:: _spo_cmp); int* _oidlist_s = NULL; int* _oidlist_sp = NULL; int* _pidoidlist_s = NULL; int _oidlist_s_len = 0; int _oidlist_sp_len = 0; int _pidoidlist_s_len = 0; /* only _oidlist_s will be assigned with space * _oidlist_sp is always a part of _oidlist_s * just a pointer is enough * */ int _oidlist_max = 0; int _pidoidlist_max = 0; /* true means next sub is a different one from the previous one */ bool _sub_change = true; /* true means next is different from the previous pair */ bool _sub_pre_change = true; /* true means next pre is different from the previous one */ bool _pre_change = true; Database::log("finish s2p_sp2o_s2po initial"); (this->kvstore)->open_subid2objidlist(KVstore::CREATE_MODE); (this->kvstore)->open_subIDpreID2objIDlist(KVstore::CREATE_MODE); (this->kvstore)->open_subID2preIDobjIDlist(KVstore::CREATE_MODE); for(int i = 0; i < this->triples_num; i ++) { if(_sub_change) { /* oidlist */ _oidlist_max = 1000; _oidlist_s = new int[_oidlist_max]; _oidlist_sp = _oidlist_s; _oidlist_s_len = 0; _oidlist_sp_len = 0; /* pidoidlist */ _pidoidlist_max = 1000 * 2; _pidoidlist_s = new int[_pidoidlist_max]; _pidoidlist_s_len = 0; } /* enlarge the space when needed */ if(_oidlist_s_len == _oidlist_max) { _oidlist_max *= 10; int * _new_oidlist_s = new int[_oidlist_max]; memcpy(_new_oidlist_s, _oidlist_s, sizeof(int) * _oidlist_s_len); /* (_oidlist_sp-_oidlist_s) is the offset of _oidlist_sp */ _oidlist_sp = _new_oidlist_s + (_oidlist_sp-_oidlist_s); delete[] _oidlist_s; _oidlist_s = _new_oidlist_s; } /* enlarge the space when needed */ if(_pidoidlist_s_len == _pidoidlist_max) { _pidoidlist_max *= 10; int* _new_pidoidlist_s = new int[_pidoidlist_max]; memcpy(_new_pidoidlist_s, _pidoidlist_s, sizeof(int) * _pidoidlist_s_len); delete[] _pidoidlist_s; _pidoidlist_s = _new_pidoidlist_s; } int _sub_id = _p_id_tuples[i][0]; int _pre_id = _p_id_tuples[i][1]; int _obj_id = _p_id_tuples[i][2]; // { // stringstream _ss; // _ss << _sub_id << "\t" << _pre_id << "\t" << _obj_id << endl; // Database::log(_ss.str()); // } /* add objid to list */ _oidlist_s[_oidlist_s_len] = _obj_id; /* if changes, _oidlist_sp should be adjusted */ if(_sub_pre_change){ _oidlist_sp = _oidlist_s + _oidlist_s_len; } _oidlist_s_len ++; _oidlist_sp_len ++; /* add to list */ _pidoidlist_s[_pidoidlist_s_len] = _pre_id; _pidoidlist_s[_pidoidlist_s_len+1] = _obj_id; _pidoidlist_s_len += 2; /* whether sub in new triple changes or not */ _sub_change = (i+1 == this->triples_num) || (_p_id_tuples[i][0] != _p_id_tuples[i+1][0]); /* whether pre in new triple changes or not */ _pre_change = (i+1 == this->triples_num) || (_p_id_tuples[i][1] != _p_id_tuples[i+1][1]); /* whether in new triple changes or not */ _sub_pre_change = _sub_change || _pre_change; if(_sub_pre_change) { (this->kvstore)->setobjIDlistBysubIDpreID(_sub_id, _pre_id, _oidlist_sp, _oidlist_sp_len); _oidlist_sp = NULL; _oidlist_sp_len = 0; } if(_sub_change) { /* map subid 2 objidlist */ util::sort(_oidlist_s, _oidlist_s_len); (this->kvstore)->setobjIDlistBysubID(_sub_id, _oidlist_s, _oidlist_s_len); delete[] _oidlist_s; _oidlist_s = NULL; _oidlist_sp = NULL; _oidlist_s_len = 0; /* map subid 2 preid&objidlist */ (this->kvstore)->setpreIDobjIDlistBysubID(_sub_id, _pidoidlist_s, _pidoidlist_s_len); delete[] _pidoidlist_s; _pidoidlist_s = NULL; _pidoidlist_s_len = 0; } }/* end for( 0 to this->triple_num) */ (this->kvstore)->release(); Database::log("OUT s2po..."); return true; } /* map objid 2 subid_list & * objIDpreID 2 subid_list & * objID 2 _list */ bool Database::o2s_op2s_o2ps(int** _p_id_tuples, int _id_tuples_max) { Database::log("IN o2ps..."); qsort(_p_id_tuples, this->triples_num, sizeof(int**), Database::_ops_cmp); int* _sidlist_o = NULL; int* _sidlist_op = NULL; int* _pidsidlist_o = NULL; int _sidlist_o_len = 0; int _sidlist_op_len = 0; int _pidsidlist_o_len = 0; /* only _sidlist_o will be assigned with space * _sidlist_op is always a part of _sidlist_o * just a pointer is enough */ int _sidlist_max = 0; int _pidsidlist_max = 0; /* true means next obj is a different one from the previous one */ bool _obj_change = true; /* true means next is different from the previous pair */ bool _obj_pre_change = true; /* true means next pre is a different one from the previous one */ bool _pre_change = true; (this->kvstore)->open_objid2subidlist(KVstore::CREATE_MODE); (this->kvstore)->open_objIDpreID2subIDlist(KVstore::CREATE_MODE); (this->kvstore)->open_objID2preIDsubIDlist(KVstore::CREATE_MODE); for(int i = 0; i < this->triples_num; i ++) { if(_obj_change) { /* sidlist */ _sidlist_max = 1000; _sidlist_o = new int[_sidlist_max]; _sidlist_op = _sidlist_o; _sidlist_o_len = 0; _sidlist_op_len = 0; /* pidsidlist */ _pidsidlist_max = 1000 * 2; _pidsidlist_o = new int[_pidsidlist_max]; _pidsidlist_o_len = 0; } /* enlarge the space when needed */ if(_sidlist_o_len == _sidlist_max) { _sidlist_max *= 10; int * _new_sidlist_o = new int[_sidlist_max]; memcpy(_new_sidlist_o, _sidlist_o, sizeof(int)*_sidlist_o_len); /* (_sidlist_op-_sidlist_o) is the offset of _sidlist_op */ _sidlist_op = _new_sidlist_o + (_sidlist_op-_sidlist_o); delete[] _sidlist_o; _sidlist_o = _new_sidlist_o; } /* enlarge the space when needed */ if(_pidsidlist_o_len == _pidsidlist_max) { _pidsidlist_max *= 10; int* _new_pidsidlist_o = new int[_pidsidlist_max]; memcpy(_new_pidsidlist_o, _pidsidlist_o, sizeof(int) * _pidsidlist_o_len); delete[] _pidsidlist_o; _pidsidlist_o = _new_pidsidlist_o; } int _sub_id = _p_id_tuples[i][0]; int _pre_id = _p_id_tuples[i][1]; int _obj_id = _p_id_tuples[i][2]; /* add subid to list */ _sidlist_o[_sidlist_o_len] = _sub_id; /* if changes, _sidlist_op should be adjusted */ if(_obj_pre_change){ _sidlist_op = _sidlist_o + _sidlist_o_len; } _sidlist_o_len ++; _sidlist_op_len ++; /* add to list */ _pidsidlist_o[_pidsidlist_o_len] = _pre_id; _pidsidlist_o[_pidsidlist_o_len+1] = _sub_id;; _pidsidlist_o_len += 2; /* whether sub in new triple changes or not */ _obj_change = (i+1 == this->triples_num) || (_p_id_tuples[i][2] != _p_id_tuples[i+1][2]); /* whether pre in new triple changes or not */ _pre_change = (i+1 == this->triples_num) || (_p_id_tuples[i][1] != _p_id_tuples[i+1][1]); /* whether in new triple changes or not */ _obj_pre_change = _obj_change || _pre_change; if(_obj_pre_change) { (this->kvstore)->setsubIDlistByobjIDpreID(_obj_id, _pre_id, _sidlist_op, _sidlist_op_len); _sidlist_op = NULL; _sidlist_op_len = 0; } if(_obj_change) { /* map objid 2 subidlist */ util::sort(_sidlist_o, _sidlist_o_len); (this->kvstore)->setsubIDlistByobjID(_obj_id, _sidlist_o, _sidlist_o_len); delete[] _sidlist_o; _sidlist_o = NULL; _sidlist_op = NULL; _sidlist_o_len = 0; /* map objid 2 preid&subidlist */ (this->kvstore)->setpreIDsubIDlistByobjID(_obj_id, _pidsidlist_o, _pidsidlist_o_len); delete[] _pidsidlist_o; _pidsidlist_o = NULL; _pidsidlist_o_len = 0; } }/* end for( 0 to this->triple_num) */ (this->kvstore)->release(); Database::log("OUT o2ps..."); return true; } /* compare function for qsort */ int Database::_spo_cmp(const void* _a, const void* _b) { int** _p_a = (int**)_a; int** _p_b = (int**)_b; {/* compare subid first */ int _sub_id_a = (*_p_a)[0]; int _sub_id_b = (*_p_b)[0]; if(_sub_id_a != _sub_id_b){ return _sub_id_a - _sub_id_b; } } {/* then preid */ int _pre_id_a = (*_p_a)[1]; int _pre_id_b = (*_p_b)[1]; if(_pre_id_a != _pre_id_b){ return _pre_id_a - _pre_id_b; } } {/* objid at last */ int _obj_id_a = (*_p_a)[2]; int _obj_id_b = (*_p_b)[2]; if(_obj_id_a != _obj_id_b){ return _obj_id_a - _obj_id_b; } } return 0; } /* compare function for qsort */ int Database::_ops_cmp(const void* _a, const void* _b) { int** _p_a = (int**)_a; int** _p_b = (int**)_b; {/* compare objid first */ int _obj_id_a = (*_p_a)[2]; int _obj_id_b = (*_p_b)[2]; if(_obj_id_a != _obj_id_b){ return _obj_id_a - _obj_id_b; } } {/* then preid */ int _pre_id_a = (*_p_a)[1]; int _pre_id_b = (*_p_b)[1]; if(_pre_id_a != _pre_id_b){ return _pre_id_a - _pre_id_b; } } {/* subid at last */ int _sub_id_a = (*_p_a)[0]; int _sub_id_b = (*_p_b)[0]; if(_sub_id_a != _sub_id_b){ return _sub_id_a - _sub_id_b; } } return 0; } bool Database::objIDIsEntityID(int _id) { return _id < Database::LITERAL_FIRST_ID; } bool Database::join (vector& _result_list, int _var_id, int _pre_id, int _var_id2, const char _edge_type, int _var_num, bool shouldAddLiteral, IDList& _can_list) { // std::cout << "*****Join [" << _var_id << "]\tpre:" << _pre_id << "\t[" << _var_id2 << "]\t" // << "result before: " << _result_list.size() << "\t etype:" << _edge_type // << std::endl; { // stringstream _ss; // _ss << "\n\n\n\n*****Join [" << _var_id << "]\tpre:" << _pre_id << "\t[" << _var_id2 << "]\t" // << "result before: " << _result_list.size() << "\t etype:" << _edge_type // << std::endl; // Database::log(_ss.str()); } // cout << _can_list.to_str() << endl; int* id_list; int id_list_len; vector new_result_list; new_result_list.clear(); vector::iterator itr = _result_list.begin(); bool has_preid = (_pre_id >= 0); for ( ; itr != _result_list.end(); itr++) { int* itr_result = (*itr); if (itr_result[_var_num] == -1) { continue; } if (_can_list.size()==0 && !shouldAddLiteral) { itr_result[_var_num] = -1; continue; } // std::string _can_str = (this->kvstore)->getEntityByID((itr_result[_var_id])); // std::cout << "\t\t v[" << _var_id << "] has: [" // << _can_str << ", " << (*itr)[_var_id] << "]" // << std::endl; // { // stringstream _ss; // _ss << "\t\t v[" << _var_id << "] has: [" // << _can_str << ", " << (*itr)[_var_id] << "]" // << std::endl; // Database::log(_ss.str()); // } if (has_preid) { if (_edge_type == BasicQuery::EDGE_IN) { kvstore->getsubIDlistByobjIDpreID(itr_result[_var_id], _pre_id, id_list, id_list_len); } else { kvstore->getobjIDlistBysubIDpreID(itr_result[_var_id], _pre_id, id_list, id_list_len); } } else { if (_edge_type == BasicQuery::EDGE_IN) { // std::cout << "\t\to2s" << std::endl; kvstore->getsubIDlistByobjID(itr_result[_var_id], id_list, id_list_len); } else { kvstore->getobjIDlistBysubID(itr_result[_var_id], id_list, id_list_len); } } if (id_list_len == 0) { itr_result[_var_num] = -1; continue; } // std::cout << "\t\tid_list_len: " << id_list_len << std::endl << "\t\t"; // for(int i = 0; i < id_list_len; i ++){ // cout << "[" << id_list[i] << "] "; // } // cout << endl; { // stringstream _ss; // _ss << "\t\tid_list_len: " << id_list_len << std::endl << "\t\t"; // for(int i = 0; i < id_list_len; i ++){ // _ss << "[" << id_list[i] << ", " << this->kvstore->getEntityByID(id_list[i])<< "] "; // } // _ss << endl; // Database::log(_ss.str()); } bool no_any_match_yet = true; stringstream _tmp_ss; for (int i = 0; i < id_list_len; i++) { bool found_in_id_list = _can_list.bsearch_uporder(id_list[i]) >= 0; bool should_add_this_literal = shouldAddLiteral && !this->objIDIsEntityID(id_list[i]); // if we found this element(entity/literal) in var1's candidate list, // or this is a literal element and var2 is a free literal variable, // we should add this one to result array. if (found_in_id_list || should_add_this_literal) { if (no_any_match_yet) { no_any_match_yet = false; itr_result[_var_id2] = id_list[i]; // _tmp_ss << "[first, " << id_list[i] << ", " // << this->kvstore->getEntityByID(id_list[i]) << "\n"; // cout << "\t\tfirst" ; // cout << "\t\tpair : " << id_list[i] << endl; } else { int* result = new int[_var_num + 1]; memcpy(result, itr_result, sizeof(int) * (_var_num + 1)); result[_var_id2] = id_list[i]; new_result_list.push_back(result); // cout << "\t\tpair : " << result[_var_id2] << endl; // cout << "\t\t new result has size " << new_result_list.size() << endl; { // _tmp_ss << "\t\tp: [" << result[_var_id2] << ", " << this->kvstore->getEntityByID(result[_var_id2])<< "]\t"; } } } } if(no_any_match_yet) { // _tmp_ss << "no-match" << endl; itr_result[_var_num] = -1; // Database::log(_tmp_ss.str()); } // _tmp_ss << "match" << endl; // Database::log(_tmp_ss.str()); delete[] id_list; } if (!new_result_list.empty()) { vector::iterator _begin = new_result_list.begin(); vector::iterator _end = new_result_list.end(); _result_list.insert(_result_list.end(), _begin, _end); } int invalid_num = 0; for(int i = 0; i < _result_list.size(); i ++) { if(_result_list[i][_var_num] == -1) { invalid_num ++; } } // cout << "\t\tresult size: " << _result_list.size() << " invalid:" << invalid_num << endl; { // stringstream _ss; // _ss << "\t\tresult size: " << _result_list.size() << " invalid:" << invalid_num << endl; // for(int i = 0; i < _result_list.size(); i ++) // { // for(int j = 0; j <= _var_num; j ++){ // _ss << "[" << this->kvstore->getEntityByID(_result_list[i][j]) << "(" // << _result_list[i][j] << ")] "; // } // _ss << "\n"; // } // Database::log(_ss.str()); } // cout << _result_list[0][0] << " & " << _result_list[0][1] << endl; std::cout << "*****Join done" << std::endl; return true; } bool Database::select(vector& _result_list,int _var_id,int _pre_id,int _var_id2,const char _edge_type,int _var_num) { cout << "*****In select" << endl; int* id_list; int id_list_len; vector::iterator itr = _result_list.begin(); for ( ; itr != _result_list.end(); itr++) { int* itr_result = (*itr); if (itr_result[_var_num] == -1) { continue; } bool ret = false; if (_pre_id >= 0) { if (_edge_type == BasicQuery::EDGE_IN) { kvstore->getsubIDlistByobjIDpreID(itr_result[_var_id], _pre_id, id_list, id_list_len); } else { kvstore->getobjIDlistBysubIDpreID(itr_result[_var_id], _pre_id, id_list, id_list_len); } } else { if (_edge_type == BasicQuery::EDGE_IN) { kvstore->getsubIDlistByobjID(itr_result[_var_id], id_list, id_list_len); } else { kvstore->getobjIDlistBysubID(itr_result[_var_id], id_list, id_list_len); } } if (id_list_len == 0) { itr_result[_var_num] = -1; continue; } if (util::bsearch_int_uporder(itr_result[_var_id2], id_list, id_list_len) == -1) { itr_result[_var_num] = -1; } delete[] id_list; } int invalid_num = 0; for(int i = 0; i < _result_list.size(); i ++) { if(_result_list[i][_var_num] == -1) { invalid_num ++; } } cout << "\t\tresult size: " << _result_list.size() << " invalid:" << invalid_num << endl; // cout << "*****Select done" << endl; return true; } /* * join on the vector of CandidateList, available after retrieve from the VSTREE * and store the resut in _result_set * */ bool Database::join(SPARQLquery& _sparql_query) { int basic_query_num=_sparql_query.getBasicQueryNum(); //join each basic query for (int i=0; i< basic_query_num; i++){ cout<<"Basic query "<filter_before_join(basic_query); long after_filter = util::get_cur_time(); cout << "after filter_before_join: used " << (after_filter-begin) << " ms" << endl; this->add_literal_candidate(basic_query); long after_add_literal = util::get_cur_time(); cout << "after add_literal_candidate: used " << (after_add_literal-after_filter) << " ms" << endl; this->join_basic(basic_query); long after_joinbasic = util::get_cur_time(); cout << "after join_basic : used " << (after_joinbasic-after_add_literal) << " ms" << endl; this->only_pre_filter_after_join(basic_query); long after_pre_filter_after_join = util::get_cur_time(); cout << "after only_pre_filter_after_join : used " << (after_pre_filter_after_join-after_joinbasic) << " ms" << endl; } return true; } bool Database::join_basic(BasicQuery* basic_query) { cout << "IIIIIIN join basic" << endl; int var_num = basic_query->getVarNum(); int triple_num = basic_query->getTripleNum(); //mark dealed_id_list and dealed_triple, 0 not processed, 1 for processed bool* dealed_id_list = new bool[var_num]; bool* dealed_triple = new bool[triple_num]; memset(dealed_id_list, 0, sizeof(bool) * var_num); memset(dealed_triple, 0, sizeof(bool) * triple_num); int start_var_id = basic_query->getVarID_FirstProcessWhenJoin(); int start_var_size = basic_query->getCandidateSize(start_var_id); // initial p_result_list, push min_var_list in vector* p_result_list = &basic_query->getResultList(); p_result_list->clear(); //start_var_size == 0 no answer in this basic query if (start_var_size == 0) { return false; } //debug { stringstream _ss; _ss << "start_var_size=" << start_var_size << endl; _ss << "star_var=" << basic_query->getVarName(start_var_id) << "(var[" << start_var_id << "])" << endl; Database::log(_ss.str()); } IDList* p_min_var_list = &(basic_query->getCandidateList(start_var_id)); for (int i = 0; i < start_var_size; i++) { int* result_var = new int[var_num + 1]; memset(result_var, 0, sizeof(int) * (var_num + 1)); result_var[start_var_id] = p_min_var_list->getID(i); p_result_list->push_back(result_var); } //BFS search stack var_stack; var_stack.push(start_var_id); dealed_id_list[start_var_id] = true; while (!var_stack.empty()) { int var_id = var_stack.top(); var_stack.pop(); int var_degree = basic_query->getVarDegree(var_id); for (int i = 0; i < var_degree; i++) { // each triple/edge need to be processed only once. int edge_id = basic_query->getEdgeID(var_id, i); if (dealed_triple[edge_id]) { continue; } int var_id2 = basic_query->getEdgeNeighborID(var_id, i); if (var_id2 == -1) { continue; } int pre_id = basic_query->getEdgePreID(var_id, i); char edge_type = basic_query->getEdgeType(var_id, i); IDList& can_list = basic_query->getCandidateList(var_id2); if (!dealed_id_list[var_id2]) { //join bool shouldVar2AddLiteralCandidateWhenJoin = basic_query->isFreeLiteralVariable(var_id2) && !basic_query->isAddedLiteralCandidate(var_id2); join(*p_result_list, var_id, pre_id, var_id2, edge_type, var_num, shouldVar2AddLiteralCandidateWhenJoin, can_list); var_stack.push(var_id2); basic_query->setAddedLiteralCandidate(var_id2); dealed_id_list[var_id2] = true; } else { //select select(*p_result_list, var_id, pre_id, var_id2, edge_type,var_num); } dealed_triple[edge_id] = true; } } basic_query->dupRemoval_invalidRemoval(); vector &result = basic_query->getResultList(); int result_size = result.size(); std::cout << "\t\tFinal result:" << result_size << std::endl; cout << "OOOOOUT join basic" << endl; return true; } void Database::filter_before_join(BasicQuery* basic_query) { cout << "*****IIIIIIN filter_before_join" << endl; int var_num = 0; var_num = basic_query->getVarNum(); for (int i = 0; i < var_num; i++) { std::cout << "\tVar" << i << " " << basic_query->getVarName(i) << std::endl; IDList &can_list = basic_query->getCandidateList(i); cout << "\t\tsize of canlist before filter: " << can_list.size() << endl; // must sort before using binary search. can_list.sort(); long begin = util::get_cur_time(); this->literal_edge_filter(basic_query, i); long after_literal_edge_filter = util::get_cur_time(); cout << "\t\tliteral_edge_filter: used " << (after_literal_edge_filter-begin) << " ms" << endl; // this->preid_filter(basic_query, i); // long after_preid_filter = util::get_cur_time(); // cout << "\t\tafter_preid_filter: used " << (after_preid_filter-after_literal_edge_filter) << " ms" << endl; cout << "\t\t[" << i << "] after filter, candidate size = " << can_list.size() << endl << endl << endl; //debug // { // stringstream _ss; // for(int i = 0; i < can_list.size(); i ++) // { // string _can = this->kvstore->getEntityByID(can_list[i]); // _ss << "[" << _can << ", " << can_list[i] << "]\t"; // } // _ss << endl; // Database::log(_ss.str()); // cout << can_list.to_str() << endl; // } } cout << "OOOOOOUT filter_before_join" << endl; } void Database::literal_edge_filter(BasicQuery* basic_query, int _var_i) { Database::log("IN literal_edge_filter"); //debug int var_degree = basic_query->getVarDegree(_var_i); for(int j = 0; j < var_degree; j ++) { int neighbor_id = basic_query->getEdgeNeighborID(_var_i, j); // continue; cout << "\t\t\tneighbor_id=" << neighbor_id << endl; if (neighbor_id != -1) { continue; } char edge_type = basic_query->getEdgeType(_var_i, j); int triple_id = basic_query->getEdgeID(_var_i, j); Triple triple = basic_query->getTriple(triple_id); std::string neighbor_name; if (edge_type == BasicQuery::EDGE_OUT) { neighbor_name = triple.object; } else { neighbor_name = triple.subject; } /* if neightbor is an var, but not in select * then, if its degree is 1, it has none contribution to filter * only its sole edge property(predicate) makes sense * we should make sure that current candidateVar has an edge matching the predicate * */ bool only_preid_filter = (basic_query->isOneDegreeNotSelectVar(neighbor_name)); if(only_preid_filter) { continue; } int pre_id = basic_query->getEdgePreID(_var_i, j); IDList &_list = basic_query->getCandidateList(_var_i); int lit_id = (this->kvstore)->getIDByEntity(neighbor_name); if(lit_id == -1) { lit_id = (this->kvstore)->getIDByLiteral(neighbor_name); } // std::cout << "\t\tedge[" << j << "] "<< lit_string << " has id " << lit_id << ""; // std::cout << " preid:" << pre_id << " type:" << edge_type // << std::endl; { // stringstream _ss; // _ss << "\t\tedge[" << j << "] "<< lit_string << " has id " << lit_id << ""; // _ss << " preid:" << pre_id << " type:" << edge_type // << std::endl; // Database::log(_ss.str()); } int id_list_len = 0; int* id_list = NULL; if (pre_id >= 0) { if (edge_type == BasicQuery::EDGE_OUT) { (this->kvstore)->getsubIDlistByobjIDpreID(lit_id, pre_id, id_list, id_list_len); } else { (this->kvstore)->getobjIDlistBysubIDpreID(lit_id, pre_id, id_list, id_list_len); } } else { if (edge_type == BasicQuery::EDGE_OUT) { (this->kvstore)->getsubIDlistByobjID(lit_id, id_list, id_list_len); } else { (this->kvstore)->getobjIDlistBysubID(lit_id, id_list, id_list_len); } } /* //debug { stringstream _ss; _ss << "id_list: "; for (int i=0;igetVarDegree(_var_i); j++) { int neighbor_id = basic_query->getEdgeNeighborID(_var_i, j); // continue; cout << "\t\t\tneighbor_id=" << neighbor_id << endl; if (neighbor_id != -1) { continue; } char edge_type = basic_query->getEdgeType(_var_i, j); int triple_id = basic_query->getEdgeID(_var_i, j); Triple triple = basic_query->getTriple(triple_id); std::string neighbor_name; if (edge_type == BasicQuery::EDGE_OUT) { neighbor_name = triple.object; } else { neighbor_name = triple.subject; } /* if neightbor is an var, but not in select * then, if its degree is 1, it has none contribution to filter * only its sole edge property(predicate) makes sense * we should make sure that current candidateVar has an edge matching the predicate * */ bool only_preid_filter = (basic_query->isOneDegreeNotSelectVar(neighbor_name)); if (!only_preid_filter) { continue; } int pre_id = basic_query->getEdgePreID(_var_i, j); IDList& _list = basic_query->getCandidateList(_var_i); int* remain_list = new int[_list.size()]; int remain_len = 0; int _entity_id = -1; int* pair_list = NULL; int pair_len = 0; for (int i = 0; i < _list.size(); i++) { _entity_id = _list[i]; if (edge_type == BasicQuery::EDGE_IN) { (this->kvstore)->getpreIDsubIDlistByobjID (_entity_id, pair_list, pair_len); } else { (this->kvstore)->getpreIDobjIDlistBysubID (_entity_id, pair_list, pair_len); } bool exist_preid = util::bsearch_preid_uporder (pre_id, pair_list, pair_len); if (exist_preid) { remain_list[remain_len] = _entity_id; remain_len++; } delete[] pair_list; pair_len = 0; }/* end for i 0 to _list.size */ _list.intersectList(remain_list, remain_len); /* can be imported */ delete[] remain_list; }/* end for j : varDegree */ } void Database::only_pre_filter_after_join(BasicQuery* basic_query) { int var_num = basic_query->getVarNum(); vector& result_list = basic_query->getResultList(); for (int var_id = 0; var_id < var_num; var_id++) { int var_degree = basic_query->getVarDegree(var_id); // get all the only predicate filter edges for this variable. vector in_edge_pre_id; vector out_edge_pre_id; for (int i = 0; i < var_degree; i++) { char edge_type = basic_query->getEdgeType(var_id, i); int triple_id = basic_query->getEdgeID(var_id, i); Triple triple = basic_query->getTriple(triple_id); std::string neighbor_name; if (edge_type == BasicQuery::EDGE_OUT) { neighbor_name = triple.object; } else { neighbor_name = triple.subject; } bool only_preid_filter = (basic_query->isOneDegreeNotSelectVar(neighbor_name)); if (!only_preid_filter) { continue; } int pre_id = basic_query->getEdgePreID(var_id, i); if (edge_type == BasicQuery::EDGE_OUT) { out_edge_pre_id.push_back(pre_id); } else { in_edge_pre_id.push_back(pre_id); } } if (in_edge_pre_id.empty() && out_edge_pre_id.empty()) { continue; } for (vector::iterator itr = result_list.begin(); itr != result_list.end(); itr++) { int* res_seq = (*itr); if (res_seq[var_num] == -1) { continue; } int entity_id = res_seq[var_id]; int* pair_list = NULL; int pair_len = 0; bool exist_preid = true; if (exist_preid && !in_edge_pre_id.empty()) { (this->kvstore)->getpreIDsubIDlistByobjID(entity_id, pair_list, pair_len); for (vector::iterator itr_pre = in_edge_pre_id.begin(); itr_pre != in_edge_pre_id.end(); itr_pre++) { int pre_id = (*itr_pre); bool exist_preid = util::bsearch_preid_uporder(pre_id, pair_list, pair_len); if (!exist_preid) { break; } } } if (exist_preid && !out_edge_pre_id.empty()) { (this->kvstore)->getpreIDobjIDlistBysubID(entity_id, pair_list, pair_len); for (vector::iterator itr_pre = out_edge_pre_id.begin(); itr_pre != out_edge_pre_id.end(); itr_pre++) { int pre_id = (*itr_pre); bool exist_preid = util::bsearch_preid_uporder(pre_id, pair_list, pair_len); if (!exist_preid) { break; } } } delete []pair_list; // result sequence is illegal when there exists any missing filter predicate id. if (!exist_preid) { res_seq[var_num] = -1; } } } } /* add literal candidates to these variables' candidate list which may include literal results. */ void Database::add_literal_candidate(BasicQuery* basic_query) { Database::log("IN add_literal_candidate"); int var_num = basic_query->getVarNum(); // deal with literal variable candidate list. // because we do not insert any literal elements into VSTree, we can not retrieve them from VSTree. // for these variable which may include some literal results, we should add all possible literal candidates to the candidate list. for (int i = 0; i < var_num; i++) { //debug { stringstream _ss; _ss << "var[" << i << "]\t"; if (basic_query->isLiteralVariable(i)) { _ss << "may have literal result."; } else { _ss << "do not have literal result."; } _ss << endl; Database::log(_ss.str()); } if (!basic_query->isLiteralVariable(i)) { // if this variable is not literal variable, we can assume that its literal candidates have been added. basic_query->setAddedLiteralCandidate(i); continue; } // for these literal variable without any linking entities(we call free literal variable), // we will add their literal candidates when join-step. if (basic_query->isFreeLiteralVariable(i)) { continue; } int var_id = i; int var_degree = basic_query->getVarDegree(var_id); IDList literal_candidate_list; // intersect each edge's literal candidate. for (int j = 0; j < var_degree; j ++) { int neighbor_id = basic_query->getEdgeNeighborID(var_id, j); int predicate_id = basic_query->getEdgePreID(var_id, j); int triple_id = basic_query->getEdgeID(var_id, j); Triple triple = basic_query->getTriple(triple_id); std::string neighbor_name = triple.subject; IDList this_edge_literal_list; // if the neighbor of this edge is an entity, we can add all literals which has an exact predicate edge linking to this entity. if (neighbor_id == -1) { int subject_id = (this->kvstore)->getIDByEntity(neighbor_name); int* object_list = NULL; int object_list_len = 0; (this->kvstore)->getobjIDlistBysubIDpreID(subject_id, predicate_id, object_list, object_list_len); this_edge_literal_list.unionList(object_list, object_list_len); delete []object_list; } // if the neighbor of this edge is variable, then the neighbor variable can not have any literal results, // we should add literals when join these two variables, see the Database::join function for details. // deprecated... // if the neighbor of this edge is variable, we should add all this neighbor variable's candidate entities' neighbor literal, // which has one corresponding predicate edge linking to this variable. else { /* IDList& neighbor_candidate_list = basic_query->getCandidateList(neighbor_id); int neighbor_candidate_list_size = neighbor_candidate_list.size(); for (int k = 0;k < neighbor_candidate_list_size; k ++) { int subject_id = neighbor_candidate_list.getID(k); int* object_list = NULL; int object_list_len = 0; (this->kvstore)->getobjIDlistBysubIDpreID(subject_id, predicate_id, object_list, object_list_len); this_edge_literal_list.unionList(object_list, object_list_len); delete []object_list; } */ } if (j == 0) { literal_candidate_list.unionList(this_edge_literal_list); } else { literal_candidate_list.intersectList(this_edge_literal_list); } } // add the literal_candidate_list to the original candidate list. IDList& origin_candidate_list = basic_query->getCandidateList(var_id); int origin_candidate_list_len = origin_candidate_list.size(); origin_candidate_list.unionList(literal_candidate_list); int after_add_literal_candidate_list_len = origin_candidate_list.size(); // this variable's literal candidates have been added. basic_query->setAddedLiteralCandidate(var_id); //debug { stringstream _ss; _ss << "var[" << var_id << "] candidate list after add literal:\t" << origin_candidate_list_len << "-->" << after_add_literal_candidate_list_len << endl; /* for (int i = 0; i < after_add_literal_candidate_list_len; i ++) { int candidate_id = origin_candidate_list.getID(i); std::string candidate_name; if (i < origin_candidate_list_len) { candidate_name = (this->kvstore)->getEntityByID(origin_candidate_list.getID(i)); } else { candidate_name = (this->kvstore)->getLiteralByID(origin_candidate_list.getID(i)); } _ss << candidate_name << "(" << candidate_id << ")\t"; } */ Database::log(_ss.str()); } } Database::log("OUT add_literal_candidate"); } /* get the final string result_set from SPARQLquery */ bool Database::getFinalResult(SPARQLquery& _sparql_q, ResultSet& _result_set) { int _var_num = _sparql_q.getQueryVarNum(); _result_set.setVar(_sparql_q.getQueryVar()); std::vector& query_vec = _sparql_q.getBasicQueryVec(); /* sum the answer number */ int _ans_num = 0; for(int i = 0; i < query_vec.size(); i ++) { _ans_num += query_vec[i]->getResultList().size(); } _result_set.ansNum = _ans_num; _result_set.answer = new string*[_ans_num]; for(int i = 0; i < _result_set.ansNum; i ++) { _result_set.answer[i] = NULL; } int tmp_ans_count = 0; /* map int ans into string ans * union every basic result into total result * */ for(int i = 0; i < query_vec.size(); i ++) { std::vector& tmp_vec = query_vec[i]->getResultList(); std::vector::iterator itr = tmp_vec.begin(); /* for every result group in resultlist */ for(; itr != tmp_vec.end(); itr ++) { _result_set.answer[tmp_ans_count] = new string[_var_num]; /* map every ans_id into ans_str */ for(int v = 0; v < _var_num; v ++) { int ans_id = (*itr)[v]; string ans_str; if (this->objIDIsEntityID(ans_id)) { ans_str = (this->kvstore)->getEntityByID(ans_id); } else { ans_str = (this->kvstore)->getLiteralByID(ans_id); } _result_set.answer[tmp_ans_count][v] = ans_str; } tmp_ans_count ++; } } return true; } FILE* Database::fp_debug = NULL; void Database::log(std::string _str) { _str += "\n"; if(Database::debug_1) { fputs(_str.c_str(), fp_debug); fflush(fp_debug); } if(Database::debug_vstree) { fputs(_str.c_str(), fp_debug); fflush(fp_debug); } } void Database::printIDlist(int _i, int* _list, int _len, std::string _log) { std::stringstream _ss; _ss << "[" << _i << "] "; for(int i = 0; i < _len; i ++){ _ss << _list[i] << "\t"; } Database::log("=="+_log + ":"); Database::log(_ss.str()); } void Database::printPairList(int _i, int* _list, int _len, std::string _log) { std::stringstream _ss; _ss << "[" << _i << "] "; for(int i = 0; i < _len; i += 2){ _ss << "[" << _list[i] << "," << _list[i+1] << "]\t"; } Database::log("=="+_log + ":"); Database::log(_ss.str()); } void Database::test() { int subNum = 9, preNum = 20, objNum = 90; int* _id_list = NULL; int _list_len = 0; {/* x2ylist */ for (int i = 0; i < subNum; i++) { (this->kvstore)->getobjIDlistBysubID(i, _id_list, _list_len); if (_list_len != 0) { stringstream _ss; this->printIDlist(i, _id_list, _list_len, "s2olist["+_ss.str()+"]"); delete[] _id_list; } /* o2slist */ (this->kvstore)->getsubIDlistByobjID(i, _id_list, _list_len); if (_list_len != 0) { stringstream _ss; this->printIDlist(i, _id_list, _list_len, "o(sub)2slist["+_ss.str()+"]"); delete[] _id_list; } } for (int i = 0; i < objNum; i++) { int _i = Database::LITERAL_FIRST_ID + i; (this->kvstore)->getsubIDlistByobjID(_i, _id_list, _list_len); if (_list_len != 0) { stringstream _ss; this->printIDlist(_i, _id_list, _list_len, "o(literal)2slist["+_ss.str()+"]"); delete[] _id_list; } } } {/* xy2zlist */ for (int i = 0; i < subNum; i++) { for (int j = 0; j < preNum; j++) { (this->kvstore)->getobjIDlistBysubIDpreID(i, j, _id_list, _list_len); if (_list_len != 0) { stringstream _ss; _ss << "preid:" << j ; this->printIDlist(i, _id_list, _list_len, "sp2olist["+_ss.str()+"]"); delete[] _id_list; } (this->kvstore)->getsubIDlistByobjIDpreID(i, j, _id_list, _list_len); if (_list_len != 0) { stringstream _ss; _ss << "preid:" << j ; this->printIDlist(i, _id_list, _list_len, "o(sub)p2slist["+_ss.str()+"]"); delete[] _id_list; } } } for (int i = 0; i < objNum; i++) { int _i = Database::LITERAL_FIRST_ID + i; for (int j = 0; j < preNum; j++) { (this->kvstore)->getsubIDlistByobjIDpreID(_i, j, _id_list, _list_len); if (_list_len != 0) { stringstream _ss; _ss << "preid:" << j ; this->printIDlist(_i, _id_list, _list_len, "*o(literal)p2slist["+_ss.str()+"]"); delete[] _id_list; } } } } {/* x2yzlist */ for (int i = 0; i < subNum; i++) { (this->kvstore)->getpreIDobjIDlistBysubID(i, _id_list, _list_len); if (_list_len != 0) { this->printPairList(i, _id_list, _list_len, "s2polist"); delete[] _id_list; _list_len = 0; } } for (int i = 0; i < subNum; i++) { (this->kvstore)->getpreIDsubIDlistByobjID(i, _id_list, _list_len); if (_list_len != 0) { this->printPairList(i, _id_list, _list_len, "o(sub)2pslist"); delete[] _id_list; } } for (int i = 0; i < objNum; i++) { int _i = Database::LITERAL_FIRST_ID + i; (this->kvstore)->getpreIDsubIDlistByobjID(_i, _id_list, _list_len); if (_list_len != 0) { this->printPairList(_i, _id_list, _list_len, "o(literal)2pslist"); delete[] _id_list; } } } } void Database::test_build_sig() { BasicQuery* _bq = new BasicQuery(""); /* * y:created . * y:created . * y:hasSuccessor * rdf:type * * id of is 0 * id of is 2 * * * ?x1 y:created ?x2. * ?x1 y:created . * ?x2 y:hasSuccessor . * ?x2 rdf:type */ { Triple _triple("?x1", "y:created", "?x2"); _bq->addTriple(_triple); } { Triple _triple("?x1", "y:created", ""); _bq->addTriple(_triple); } { Triple _triple("?x2", "y:hasSuccessor", ""); _bq->addTriple(_triple); } { Triple _triple("?x2", "rdf:type", ""); _bq->addTriple(_triple); } std::vector _v; _v.push_back("?x1"); _v.push_back("?x2"); _bq->encodeBasicQuery(this->kvstore, _v); Database::log(_bq->to_str()); SPARQLquery _q; _q.addBasicQuery(_bq); (this->vstree)->retrieve(_q); Database::log("\n\n"); Database::log("candidate:\n\n"+_q.candidate_str()); } void Database::test_join() { BasicQuery* _bq = new BasicQuery(""); /* * y:created . * y:created . * y:hasSuccessor * rdf:type * * id of is 0 * id of is 2 * * * ?x1 y:created ?x2. * ?x1 y:created . * ?x2 y:hasSuccessor . * ?x2 rdf:type */ { Triple _triple("?x1", "y:created", "?x2"); _bq->addTriple(_triple); } { Triple _triple("?x1", "y:created", ""); _bq->addTriple(_triple); } { Triple _triple("?x2", "y:hasSuccessor", ""); _bq->addTriple(_triple); } { Triple _triple("?x2", "rdf:type", ""); _bq->addTriple(_triple); } std::vector _v; _v.push_back("?x1"); _v.push_back("?x2"); _bq->encodeBasicQuery(this->kvstore, _v); Database::log(_bq->to_str()); SPARQLquery _q; _q.addBasicQuery(_bq); (this->vstree)->retrieve(_q); Database::log("\n\n"); Database::log("candidate:\n\n"+_q.candidate_str()); _q.print(std::cout); this->join(_q); ResultSet _rs; this->getFinalResult(_q, _rs); cout << _rs.to_str() << endl; }