/*============================================================================= # Filename: VSTree.h # Author: Bookug Lobert # Mail: 1181955272@qq.com # Last Modified: 2015-11-01 13:02 # Description: originally written by liyouhuan, modified by zengli =============================================================================*/ #ifndef _VSTREE_VSTREE_H #define _VSTREE_VSTREE_H #include "../Util/Util.h" #include "../Query/SPARQLquery.h" #include "VNode.h" #include "LRUCache.h" #include "EntryBuffer.h" //NOTICE:R/W more than 4G class VSTree { friend class VNode; public: VSTree(std::string _store_path); ~VSTree(); int getHeight()const; //build the VSTree from the _entity_signature_file. bool buildTree(std::string _entity_signature_file); bool deleteTree(); //if the tree is empty bool isEmpty() const; //Incrementally update bitset of _entity_id conduct OR operation on Entry(_entity_id)'s EntityBitSet with _bitset //Entry of _entity_id must exists bool updateEntry(int _entity_id, const EntityBitSet& _bitset); //Replace the Entry(_enitty_id)'s EntityBitSet with _bitset Entry of _entity_id must exists bool replaceEntry(int _entity_id, const EntityBitSet& _bitset); //insert an new Entry, whose entity doesn't exist before bool insertEntry(const SigEntry& _entry); //remove an existed Entry(_entity_id) from VSTree bool removeEntry(int _entity_id); //save the tree information to tree_info_file_path, and flush the tree nodes in memory to tree_node_file_path. bool saveTree(); //load tree from tree_info_file_path and tree_node_file_path files. bool loadTree(); //get the tree's root node pointer. VNode* getRoot(); //get the node pointer by its file line. VNode* getNode(int _line); //retrieve candidate result set by the var_sig in the _query. void retrieve(SPARQLquery& _query); //retrieve the candidate entity ID which signature can cover the_entity_bit_set, and add them to the _p_id_list. void retrieveEntity(const EntityBitSet& _entity_bit_set, IDList* _p_id_list); private: //TODO:add a tree lock(read-write), if a thread is writing, lock the whole tree //NOTICE: all updates occur in one-thread, the buffer ensures that 3*h nodes can be loaded is ok //However, only-read queries can occur in many threads, but a query at a time only need to keep one node //But, how can you ensure that for a thread, its original node is at the top of the list? we must keep a lock for a node(but no need to write to disk)!!! //(and each time select a unlocked node to swap out) int root_file_line; int node_num; int entry_num; int height; LRUCache* node_buffer; EntryBuffer* entry_buffer; map entityID2FileLineMap; // record the mapping from entityID to their node's file line. static std::string tree_file_foler_path; static std::string tree_node_file_path; static std::string tree_info_file_path; //manage the node id to deal with insert/delete(only when node is created or removed). //To create node, if free list is empty, then max_nid_alloc++;else, get one from free list //To remove node, add its ID to free list std::list free_nid_list; //max_nid_alloc-1 is the maxium using ID(need to maintain) int max_nid_alloc; //choose the best leaf node to insert the _entry, return the choosed leaf node's pointer. VNode* chooseNode(VNode* _p_node, const SigEntry& _entry); //split the _p_full_node to two new node when it is full. //the parameter _insert_entry and _p_insert_node are the entry/node //need to be insert to the _p_full_node. void split(VNode* _p_full_node, const SigEntry& _insert_entry, VNode* _p_insert_node); //deal when _child key num not enough void coalesce(VNode*& _child, int _entry_index); //create a new node when one node need splitting. VNode* createNode(bool _is_leaf = true); //swap two nodes' file line, their related nodes(father and children nodes) will also be updated. void swapNodeFileLine(VNode* _p_node_a, VNode* _p_node_b); //save VSTree's information to tree_info_file_path, such as node_num, entry_num, height, etc. bool saveTreeInfo(); //load VSTree's information from tree_info_file_path. bool loadTreeInfo(); //traverse the tree_node_file_path file, load the mapping from entity id to file line. bool loadEntityID2FileLineMap(); //update the entityID2FileLineMap with the _p_node's child entries, the _p_node should be leaf node. void updateEntityID2FileLineMap(VNode* _p_node); //get the leaf node pointer by the given _entityID VNode* getLeafNodeByEntityID(int _entityID); //delete node and update the LRUCache and file storage void removeNode(VNode* _vp); std::string to_str(); }; #endif // _VSTREE_VSTREE_H