diff --git a/Database/Database.cpp b/Database/Database.cpp index ade436d..875d140 100644 --- a/Database/Database.cpp +++ b/Database/Database.cpp @@ -562,7 +562,9 @@ Database::load() return true; } - bool flag = (this->vstree)->loadTree(); + //TODO: acquire this arg from memory manager + unsigned vstree_cache = LRUCache::DEFAULT_CAPACITY; + bool flag = (this->vstree)->loadTree(vstree_cache); if (!flag) { cout << "load tree error. @Database::load()" << endl; @@ -806,9 +808,12 @@ Database::build(const string& _rdf_file) string _entry_file = this->getSignatureBFile(); cout << "begin build VS-Tree on " << ret << "..." << endl; - //TODO: we can use larger buffer for vstree in building process, because it does not compete with others + //NOTICE: we can use larger buffer for vstree in building process, because it does not compete with others //we only need to build vstree in this phase(no need for id tuples anymore) - (this->vstree)->buildTree(_entry_file); + //TODO: acquire this arg from memory manager + unsigned vstree_cache_size = 4 * LRUCache::DEFAULT_CAPACITY; + //BETTER: we should set the parameter according to current memory usage + (this->vstree)->buildTree(_entry_file, vstree_cache_size); long tv_build_end = Util::get_cur_time(); diff --git a/VSTree/LRUCache.cpp b/VSTree/LRUCache.cpp index 9b15054..ceac195 100644 --- a/VSTree/LRUCache.cpp +++ b/VSTree/LRUCache.cpp @@ -27,6 +27,8 @@ int LRUCache::DEFAULT_CAPACITY = 1 * 1000 * 1000; //about 20G memory for vstree //int LRUCache::DEFAULT_CAPACITY = 1000; //NOTICE:10^6 is a good parameter, at most use 20G +//NOTICE: it is ok to set it 4000000 when building!!! better to adjust according to the current memory usage +//also use 2000000 or smaller for query() LRUCache::LRUCache(int _capacity) { //initialize the lock @@ -39,7 +41,7 @@ LRUCache::LRUCache(int _capacity) cout << "LRUCache initial..." << endl; this->capacity = _capacity > 0 ? _capacity : LRUCache::DEFAULT_CAPACITY; - // TODO+DEBUG:it seems that a minium size is required, for example, multiple path down(the height?) + //DEBUG:it seems that a minium size is required, for example, multiple path down(the height?) //at least 3*h // // we should guarantee the cache is big enough. diff --git a/VSTree/VSTree.cpp b/VSTree/VSTree.cpp index a2ee983..9fea1a9 100644 --- a/VSTree/VSTree.cpp +++ b/VSTree/VSTree.cpp @@ -160,14 +160,18 @@ void VSTree::retrieve(SPARQLquery& _query) //NOTICE:this can only be done by one thread //build the VSTree from the _entity_signature_file. bool -VSTree::buildTree(std::string _entry_file_path) +VSTree::buildTree(std::string _entry_file_path, int _cache_size) { Util::logging("IN VSTree::buildTree"); + + //NOTICE: entry buffer don't need to store all entities, just loop, read and deal + //not so much memory: 2 * 10^6 * (4+800/8) < 1G // create the entry buffer and node buffer. this->entry_buffer = new EntryBuffer(EntryBuffer::DEFAULT_CAPACITY); //cout<<"entry buffer newed"<node_buffer = new LRUCache(LRUCache::DEFAULT_CAPACITY); + this->node_buffer = new LRUCache(_cache_size); + //this->node_buffer = new LRUCache(LRUCache::DEFAULT_CAPACITY); // create the root node. //VNode* rootNodePtr = new VNode(); @@ -643,10 +647,11 @@ VSTree::saveTree() } bool -VSTree::loadTree() +VSTree::loadTree(int _cache_size) { cout << "load VSTree..." << endl; - (this->node_buffer) = new LRUCache(LRUCache::DEFAULT_CAPACITY); + (this->node_buffer) = new LRUCache(_cache_size); + //(this->node_buffer) = new LRUCache(LRUCache::DEFAULT_CAPACITY); cout<<"LRU cache built"<loadTreeInfo(); diff --git a/VSTree/VSTree.h b/VSTree/VSTree.h index 8ab1c3b..9cedc2a 100644 --- a/VSTree/VSTree.h +++ b/VSTree/VSTree.h @@ -28,7 +28,7 @@ public: ~VSTree(); int getHeight()const; //build the VSTree from the _entity_signature_file. - bool buildTree(std::string _entity_signature_file); + bool buildTree(std::string _entity_signature_file, int _cache_size = -1); bool deleteTree(); //if the tree is empty @@ -50,7 +50,7 @@ public: //save the tree information to tree_info_file_path, and flush the tree nodes in memory to tree_node_file_path. bool saveTree(); //load tree from tree_info_file_path and tree_node_file_path files. - bool loadTree(); + bool loadTree(int _cache_size = -1); //get the tree's root node pointer. VNode* getRoot(); //get the node pointer by its file line.