/*============================================================================= # Filename: KVstore.h # Author: Bookug Lobert # Mail: 1181955272@qq.com # Last Modified: 2015-10-23 14:23 # Description: Modified by Wang Libo =============================================================================*/ #ifndef _KVSTORE_KVSTORE_H #define _KVSTORE_KVSTORE_H #include "../Util/Util.h" #include "../Util/VList.h" #include "Tree.h" //TODO: is it needed to keep a length in Bstr?? especially for IVTree? //add a length: sizeof bstr from 8 to 16(4 -> 8 for alignment) //add a \0 in tail: only add 1 char //QUERY: but to count the length each time maybe very costly? //No, because triple num is stored in char* now!!!! we do not need to save it again // //QUERY: but to implement vlist, we need a unsigned flag //What is more, we need to store the string in disk, how can we store it if without the length? //unsigned type stored as chars, maybe will have '\0' //In memory, we do not know when the oidlist ends if without the original length (butthe triple num will answer this!) // //TODO: entity_border in s2values list is not needed!!! not waste memory here //STRUCT: //1. s2xx //Triple Num Pre Num Entity Num p1 offset1 p2 offset2 ... pn offsetn (olist-p1) (olist-p2) ... (olist-pn) //2. o2xx //Triple Num Pre Num p1 offset1 p2 offset2 ... pn offsetn (slist-p1) (slist-p2) ... (slist-pn) //3. p2xx //Triple Num (sid list) (oid list) (not sorted, matched with sid one by one) class KVstore { public: static const int READ_WRITE_MODE = 1; //Open a B tree, which must exist static const int CREATE_MODE = 2; //Build a new B tree and delete existing ones (if any) KVstore(std::string _store_path = "."); ~KVstore(); void flush(); void release(); void open(); std::string getStringByID(TYPE_ENTITY_LITERAL_ID _id); TYPE_ENTITY_LITERAL_ID getIDByString(std::string _str); //=============================================================================== //including IN-neighbor & OUT-neighbor unsigned getEntityDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const; unsigned getEntityInDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const; unsigned getEntityOutDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const; unsigned getLiteralDegree(TYPE_ENTITY_LITERAL_ID _literal_id) const; unsigned getPredicateDegree(TYPE_PREDICATE_ID _predicate_id) const; unsigned getSubjectPredicateDegree(TYPE_ENTITY_LITERAL_ID _subid, TYPE_PREDICATE_ID _preid) const; unsigned getObjectPredicateDegree(TYPE_ENTITY_LITERAL_ID _objid, TYPE_PREDICATE_ID _preid) const; //=============================================================================== //Before calling these functions, we are sure that the triples doesn't exist. bool updateTupleslist_insert(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); bool updateTupleslist_remove(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); bool updateInsert_s2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); bool updateRemove_s2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); bool updateInsert_s2values(TYPE_ENTITY_LITERAL_ID _subid, const std::vector& _pidoidlist); bool updateRemove_s2values(TYPE_ENTITY_LITERAL_ID _subid, const std::vector& _pidoidlist); bool updateInsert_o2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); bool updateRemove_o2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); bool updateInsert_o2values(TYPE_ENTITY_LITERAL_ID _objid, const std::vector& _pidsidlist); bool updateRemove_o2values(TYPE_ENTITY_LITERAL_ID _objid, const std::vector& _pidsidlist); bool updateInsert_p2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); bool updateRemove_p2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); bool updateInsert_p2values(TYPE_PREDICATE_ID _preid, const std::vector& _sidoidlist); bool updateRemove_p2values(TYPE_PREDICATE_ID _preid, const std::vector& _sidoidlist); //=============================================================================== //for entity2id bool open_entity2id(int _mode); bool close_entity2id(); bool subIDByEntity(std::string _entity); TYPE_ENTITY_LITERAL_ID getIDByEntity(std::string _entity) const; bool setIDByEntity(std::string _entity, TYPE_ENTITY_LITERAL_ID _id); //for id2entity bool open_id2entity(int _mode); bool close_id2entity(); bool subEntityByID(TYPE_ENTITY_LITERAL_ID _id); std::string getEntityByID(TYPE_ENTITY_LITERAL_ID _id) const; bool setEntityByID(TYPE_ENTITY_LITERAL_ID _id, std::string _entity); //for predicate2id bool open_predicate2id(int _mode); bool close_predicate2id(); bool subIDByPredicate(std::string _predicate); TYPE_PREDICATE_ID getIDByPredicate(std::string _predicate) const; bool setIDByPredicate(std::string _predicate, TYPE_PREDICATE_ID _id); //for id2predicate bool open_id2predicate(int _mode); bool close_id2predicate(); bool subPredicateByID(TYPE_PREDICATE_ID _id); std::string getPredicateByID(TYPE_PREDICATE_ID _id) const; bool setPredicateByID(TYPE_PREDICATE_ID _id, std::string _predicate); //for literal2id bool open_literal2id(int _mode); bool close_literal2id(); bool subIDByLiteral(std::string _literal); TYPE_ENTITY_LITERAL_ID getIDByLiteral(std::string _literal) const; bool setIDByLiteral(std::string _literal, TYPE_ENTITY_LITERAL_ID _id); //for id2literal bool open_id2literal(int _mode); bool close_id2literal(); bool subLiteralByID(TYPE_ENTITY_LITERAL_ID _id); std::string getLiteralByID(TYPE_ENTITY_LITERAL_ID _id) const; bool setLiteralByID(TYPE_ENTITY_LITERAL_ID _id, std::string _literal); //=============================================================================== //for subID2values bool open_subID2values(int _mode); bool close_subID2values(); bool build_subID2values(ID_TUPLE* _p_id_tuples, TYPE_TRIPLE_NUM _triples_num); bool getpreIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _preidlist, unsigned& _list_len, bool _no_duplicate = false) const; bool getobjIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _objidlist, unsigned& _list_len, bool _no_duplicate = false) const; bool getobjIDlistBysubIDpreID(TYPE_ENTITY_LITERAL_ID _subid, TYPE_PREDICATE_ID _preid, unsigned*& _objidlist, unsigned& _list_len, bool _no_duplicate = false) const; bool getpreIDobjIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _preid_objidlist, unsigned& _list_len, bool _no_duplicate = false) const; //for objID2values bool open_objID2values(int _mode); bool close_objID2values(); bool build_objID2values(ID_TUPLE* _p_id_tuples, TYPE_TRIPLE_NUM _triples_num); bool getpreIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _preidlist, unsigned& _list_len, bool _no_duplicate = false) const; bool getsubIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _subidlist, unsigned& _list_len, bool _no_duplicate = false) const; bool getsubIDlistByobjIDpreID(TYPE_ENTITY_LITERAL_ID _objid, TYPE_PREDICATE_ID _preid, unsigned*& _subidlist, unsigned& _list_len, bool _no_duplicate = false) const; bool getpreIDsubIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _preid_subidlist, unsigned& _list_len, bool _no_duplicate = false) const; //for preID2values bool open_preID2values(int _mode); bool close_preID2values(); bool build_preID2values(ID_TUPLE* _p_id_tuples, TYPE_TRIPLE_NUM _triples_num); bool getsubIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _subidlist, unsigned& _list_len, bool _no_duplicate = false) const; bool getobjIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _objidlist, unsigned& _list_len, bool _no_duplicate = false) const; bool getsubIDobjIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _subid_objidlist, unsigned& _list_len, bool _no_duplicate = false) const; //for so2p bool getpreIDlistBysubIDobjID(TYPE_ENTITY_LITERAL_ID _subID, TYPE_ENTITY_LITERAL_ID _objID, unsigned*& _preidlist, unsigned& _list_len, bool _no_duplicate = false) const; private: std::string store_path; SITree* entity2id; ISTree* id2entity; static std::string s_entity2id; static std::string s_id2entity; static unsigned short buffer_entity2id_build; static unsigned short buffer_id2entity_build; static unsigned short buffer_entity2id_query; static unsigned short buffer_id2entity_query; SITree* predicate2id; ISTree* id2predicate; static std::string s_predicate2id; static std::string s_id2predicate; static unsigned short buffer_predicate2id_build; static unsigned short buffer_id2predicate_build; static unsigned short buffer_predicate2id_query; static unsigned short buffer_id2predicate_query; SITree* literal2id; ISTree* id2literal; static std::string s_literal2id; static std::string s_id2literal; static unsigned short buffer_literal2id_build; static unsigned short buffer_id2literal_build; static unsigned short buffer_literal2id_query; static unsigned short buffer_id2literal_query; IVTree* subID2values; IVTree* objID2values; IVTree* preID2values; static std::string s_sID2values; static std::string s_oID2values; static std::string s_pID2values; static unsigned short buffer_sID2values_build; static unsigned short buffer_oID2values_build; static unsigned short buffer_pID2values_build; static unsigned short buffer_sID2values_query; static unsigned short buffer_oID2values_query; static unsigned short buffer_pID2values_query; //=============================================================================== bool open(SITree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size); bool open(ISTree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size); bool open(IVTree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size); void flush(SITree* _p_btree); void flush(ISTree* _p_btree); void flush(IVTree* _p_btree); bool addValueByKey(SITree* _p_btree, char* _key, unsigned _klen, unsigned _val); bool addValueByKey(ISTree* _p_btree, unsigned _key, char* _val, unsigned _vlen); bool addValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen); bool setValueByKey(SITree* _p_btree, char* _key, unsigned _klen, unsigned _val); bool setValueByKey(ISTree* _p_btree, unsigned _key, char* _val, unsigned _vlen); bool setValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen); bool getValueByKey(SITree* _p_btree, const char* _key, unsigned _klen, unsigned* _val) const; bool getValueByKey(ISTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const; bool getValueByKey(IVTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const; TYPE_ENTITY_LITERAL_ID getIDByStr(SITree* _p_btree, const char* _key, unsigned _klen) const; bool removeKey(SITree* _p_btree, const char* _key, unsigned _klen); bool removeKey(ISTree* _p_btree, unsigned _key); bool removeKey(IVTree* _p_btree, unsigned _key); static std::vector intersect(const unsigned* _list1, const unsigned* _list2, unsigned _len1, unsigned _len2); static unsigned binarySearch(unsigned key, const unsigned* _list, unsigned _list_len, int step = 1); static bool isEntity(TYPE_ENTITY_LITERAL_ID id); }; #endif //_KVSTORE_KVSTORE_H