120 lines
4.9 KiB
C++
120 lines
4.9 KiB
C++
/*=============================================================================
|
|
# Filename: VSTree.h
|
|
# Author: Bookug Lobert
|
|
# Mail: 1181955272@qq.com
|
|
# Last Modified: 2015-11-01 13:02
|
|
# Description: originally written by liyouhuan, modified by zengli
|
|
=============================================================================*/
|
|
|
|
#ifndef _VSTREE_VSTREE_H
|
|
#define _VSTREE_VSTREE_H
|
|
|
|
#include "../Util/Util.h"
|
|
#include "../Query/SPARQLquery.h"
|
|
#include "VNode.h"
|
|
#include "LRUCache.h"
|
|
#include "EntryBuffer.h"
|
|
|
|
//NOTICE:R/W more than 4G
|
|
|
|
//TODO: in multiple threads case, to ensure the vstree and cache is correct, maybe lock the whole vstree!
|
|
//(at one time, only one thread can query/update the vstree)
|
|
|
|
class VSTree
|
|
{
|
|
friend class VNode;
|
|
public:
|
|
VSTree(std::string _store_path);
|
|
~VSTree();
|
|
int getHeight()const;
|
|
//build the VSTree from the _entity_signature_file.
|
|
bool buildTree(std::string _entity_signature_file);
|
|
bool deleteTree();
|
|
|
|
//if the tree is empty
|
|
bool isEmpty() const;
|
|
|
|
//Incrementally update bitset of _entity_id conduct OR operation on Entry(_entity_id)'s EntityBitSet with _bitset
|
|
//Entry of _entity_id must exists
|
|
bool updateEntry(int _entity_id, const EntityBitSet& _bitset);
|
|
|
|
//Replace the Entry(_enitty_id)'s EntityBitSet with _bitset Entry of _entity_id must exists
|
|
bool replaceEntry(int _entity_id, const EntityBitSet& _bitset);
|
|
|
|
//insert an new Entry, whose entity doesn't exist before
|
|
bool insertEntry(const SigEntry& _entry);
|
|
|
|
//remove an existed Entry(_entity_id) from VSTree
|
|
bool removeEntry(int _entity_id);
|
|
|
|
//save the tree information to tree_info_file_path, and flush the tree nodes in memory to tree_node_file_path.
|
|
bool saveTree();
|
|
//load tree from tree_info_file_path and tree_node_file_path files.
|
|
bool loadTree();
|
|
//get the tree's root node pointer.
|
|
VNode* getRoot();
|
|
//get the node pointer by its file line.
|
|
VNode* getNode(int _line);
|
|
//retrieve candidate result set by the var_sig in the _query.
|
|
void retrieve(SPARQLquery& _query);
|
|
//retrieve the candidate entity ID which signature can cover the_entity_bit_set, and add them to the _p_id_list.
|
|
void retrieveEntity(const EntityBitSet& _entity_bit_set, IDList* _p_id_list);
|
|
|
|
private:
|
|
//TODO:add a tree lock(read-write), if a thread is writing, lock the whole tree
|
|
//NOTICE: all updates occur in one-thread, the buffer ensures that 3*h nodes can be loaded is ok
|
|
//However, only-read queries can occur in many threads, but a query at a time only need to keep one node
|
|
//But, how can you ensure that for a thread, its original node is at the top of the list? we must keep a lock for a node(but no need to write to disk)!!!
|
|
//(and each time select a unlocked node to swap out)
|
|
int root_file_line;
|
|
int node_num;
|
|
int entry_num;
|
|
int height;
|
|
|
|
LRUCache* node_buffer;
|
|
EntryBuffer* entry_buffer;
|
|
map<int, int> entityID2FileLineMap; // record the mapping from entityID to their node's file line.
|
|
|
|
static std::string tree_file_foler_path;
|
|
static std::string tree_node_file_path;
|
|
static std::string tree_info_file_path;
|
|
|
|
//manage the node id to deal with insert/delete(only when node is created or removed).
|
|
//To create node, if free list is empty, then max_nid_alloc++;else, get one from free list
|
|
//To remove node, add its ID to free list
|
|
std::list<int> free_nid_list;
|
|
//max_nid_alloc-1 is the maxium using ID(need to maintain)
|
|
int max_nid_alloc;
|
|
|
|
//choose the best leaf node to insert the _entry, return the choosed leaf node's pointer.
|
|
VNode* chooseNode(VNode* _p_node, const SigEntry& _entry);
|
|
//split the _p_full_node to two new node when it is full.
|
|
//the parameter _insert_entry and _p_insert_node are the entry/node
|
|
//need to be insert to the _p_full_node.
|
|
void split(VNode* _p_full_node, const SigEntry& _insert_entry, VNode* _p_insert_node);
|
|
//deal when _child key num not enough
|
|
void coalesce(VNode*& _child, int _entry_index);
|
|
//create a new node when one node need splitting.
|
|
VNode* createNode(bool _is_leaf = true);
|
|
//swap two nodes' file line, their related nodes(father and children nodes) will also be updated.
|
|
void swapNodeFileLine(VNode* _p_node_a, VNode* _p_node_b);
|
|
//save VSTree's information to tree_info_file_path, such as node_num, entry_num, height, etc.
|
|
bool saveTreeInfo();
|
|
//load VSTree's information from tree_info_file_path.
|
|
bool loadTreeInfo();
|
|
//traverse the tree_node_file_path file, load the mapping from entity id to file line.
|
|
bool loadEntityID2FileLineMap();
|
|
//update the entityID2FileLineMap with the _p_node's child entries, the _p_node should be leaf node.
|
|
void updateEntityID2FileLineMap(VNode* _p_node);
|
|
//get the leaf node pointer by the given _entityID
|
|
VNode* getLeafNodeByEntityID(int _entityID);
|
|
|
|
//delete node and update the LRUCache and file storage
|
|
void removeNode(VNode* _vp);
|
|
|
|
std::string to_str();
|
|
};
|
|
|
|
#endif // _VSTREE_VSTREE_H
|
|
|