refactor: use different lrucache size for build and query

by zengli, no influence on others
This commit is contained in:
bookug 2017-04-06 23:48:45 +08:00
parent 6f49685360
commit 9d760ed643
4 changed files with 22 additions and 10 deletions

View File

@ -562,7 +562,9 @@ Database::load()
return true; return true;
} }
bool flag = (this->vstree)->loadTree(); //TODO: acquire this arg from memory manager
unsigned vstree_cache = LRUCache::DEFAULT_CAPACITY;
bool flag = (this->vstree)->loadTree(vstree_cache);
if (!flag) if (!flag)
{ {
cout << "load tree error. @Database::load()" << endl; cout << "load tree error. @Database::load()" << endl;
@ -806,9 +808,12 @@ Database::build(const string& _rdf_file)
string _entry_file = this->getSignatureBFile(); string _entry_file = this->getSignatureBFile();
cout << "begin build VS-Tree on " << ret << "..." << endl; cout << "begin build VS-Tree on " << ret << "..." << endl;
//TODO: we can use larger buffer for vstree in building process, because it does not compete with others //NOTICE: we can use larger buffer for vstree in building process, because it does not compete with others
//we only need to build vstree in this phase(no need for id tuples anymore) //we only need to build vstree in this phase(no need for id tuples anymore)
(this->vstree)->buildTree(_entry_file); //TODO: acquire this arg from memory manager
unsigned vstree_cache_size = 4 * LRUCache::DEFAULT_CAPACITY;
//BETTER: we should set the parameter according to current memory usage
(this->vstree)->buildTree(_entry_file, vstree_cache_size);
long tv_build_end = Util::get_cur_time(); long tv_build_end = Util::get_cur_time();

View File

@ -27,6 +27,8 @@ int LRUCache::DEFAULT_CAPACITY = 1 * 1000 * 1000; //about 20G memory for vstree
//int LRUCache::DEFAULT_CAPACITY = 1000; //int LRUCache::DEFAULT_CAPACITY = 1000;
//NOTICE:10^6 is a good parameter, at most use 20G //NOTICE:10^6 is a good parameter, at most use 20G
//NOTICE: it is ok to set it 4000000 when building!!! better to adjust according to the current memory usage
//also use 2000000 or smaller for query()
LRUCache::LRUCache(int _capacity) LRUCache::LRUCache(int _capacity)
{ {
//initialize the lock //initialize the lock
@ -39,7 +41,7 @@ LRUCache::LRUCache(int _capacity)
cout << "LRUCache initial..." << endl; cout << "LRUCache initial..." << endl;
this->capacity = _capacity > 0 ? _capacity : LRUCache::DEFAULT_CAPACITY; this->capacity = _capacity > 0 ? _capacity : LRUCache::DEFAULT_CAPACITY;
// TODO+DEBUG:it seems that a minium size is required, for example, multiple path down(the height?) //DEBUG:it seems that a minium size is required, for example, multiple path down(the height?)
//at least 3*h //at least 3*h
// //
// we should guarantee the cache is big enough. // we should guarantee the cache is big enough.

View File

@ -160,14 +160,18 @@ void VSTree::retrieve(SPARQLquery& _query)
//NOTICE:this can only be done by one thread //NOTICE:this can only be done by one thread
//build the VSTree from the _entity_signature_file. //build the VSTree from the _entity_signature_file.
bool bool
VSTree::buildTree(std::string _entry_file_path) VSTree::buildTree(std::string _entry_file_path, int _cache_size)
{ {
Util::logging("IN VSTree::buildTree"); Util::logging("IN VSTree::buildTree");
//NOTICE: entry buffer don't need to store all entities, just loop, read and deal
//not so much memory: 2 * 10^6 * (4+800/8) < 1G
// create the entry buffer and node buffer. // create the entry buffer and node buffer.
this->entry_buffer = new EntryBuffer(EntryBuffer::DEFAULT_CAPACITY); this->entry_buffer = new EntryBuffer(EntryBuffer::DEFAULT_CAPACITY);
//cout<<"entry buffer newed"<<endl; //cout<<"entry buffer newed"<<endl;
this->node_buffer = new LRUCache(LRUCache::DEFAULT_CAPACITY); this->node_buffer = new LRUCache(_cache_size);
//this->node_buffer = new LRUCache(LRUCache::DEFAULT_CAPACITY);
// create the root node. // create the root node.
//VNode* rootNodePtr = new VNode(); //VNode* rootNodePtr = new VNode();
@ -643,10 +647,11 @@ VSTree::saveTree()
} }
bool bool
VSTree::loadTree() VSTree::loadTree(int _cache_size)
{ {
cout << "load VSTree..." << endl; cout << "load VSTree..." << endl;
(this->node_buffer) = new LRUCache(LRUCache::DEFAULT_CAPACITY); (this->node_buffer) = new LRUCache(_cache_size);
//(this->node_buffer) = new LRUCache(LRUCache::DEFAULT_CAPACITY);
cout<<"LRU cache built"<<endl; cout<<"LRU cache built"<<endl;
bool flag = this->loadTreeInfo(); bool flag = this->loadTreeInfo();

View File

@ -28,7 +28,7 @@ public:
~VSTree(); ~VSTree();
int getHeight()const; int getHeight()const;
//build the VSTree from the _entity_signature_file. //build the VSTree from the _entity_signature_file.
bool buildTree(std::string _entity_signature_file); bool buildTree(std::string _entity_signature_file, int _cache_size = -1);
bool deleteTree(); bool deleteTree();
//if the tree is empty //if the tree is empty
@ -50,7 +50,7 @@ public:
//save the tree information to tree_info_file_path, and flush the tree nodes in memory to tree_node_file_path. //save the tree information to tree_info_file_path, and flush the tree nodes in memory to tree_node_file_path.
bool saveTree(); bool saveTree();
//load tree from tree_info_file_path and tree_node_file_path files. //load tree from tree_info_file_path and tree_node_file_path files.
bool loadTree(); bool loadTree(int _cache_size = -1);
//get the tree's root node pointer. //get the tree's root node pointer.
VNode* getRoot(); VNode* getRoot();
//get the node pointer by its file line. //get the node pointer by its file line.