From ceff3544aecd1b903825e866d658a17e40ae8123 Mon Sep 17 00:00:00 2001 From: bookug Date: Fri, 31 Mar 2017 00:23:16 +0800 Subject: [PATCH] refactor: add value list for IVTree; lower the copy cost of string not done, waiting to be debugged by zengli, long list must be cleared after got, no changes besides KVstore --- KVstore/IVTree/IVTree.cpp | 52 ++++++------ KVstore/IVTree/IVTree.h | 16 ++-- KVstore/IVTree/node/IVLeafNode.cpp | 118 ++++++++++++++++++++++----- KVstore/IVTree/node/IVLeafNode.h | 2 + KVstore/IVTree/node/IVNode.h | 1 + KVstore/IVTree/storage/IVStorage.cpp | 10 ++- KVstore/KVstore.cpp | 86 ++++++++++++------- KVstore/KVstore.h | 11 ++- KVstore/Tree.h | 2 +- NOTES.md | 5 +- Util/Bstr.cpp | 9 +- Util/Bstr.h | 3 + Util/VList.cpp | 90 +++++++++++++++----- Util/VList.h | 16 ++-- makefile | 9 +- 15 files changed, 306 insertions(+), 124 deletions(-) diff --git a/KVstore/IVTree/IVTree.cpp b/KVstore/IVTree/IVTree.cpp index eac36a1..e1eb236 100644 --- a/KVstore/IVTree/IVTree.cpp +++ b/KVstore/IVTree/IVTree.cpp @@ -115,13 +115,13 @@ IVTree::prepare(IVNode* _np) } bool -IVTree::search(int _key, char*& _str, int& _len) +IVTree::search(unsigned _key, char*& _str, unsigned& _len) { - if (_key < 0) - { - printf("error in IVTree-search: empty string\n"); - return false; - } + //if (_key < 0) + //{ + //printf("error in IVTree-search: empty string\n"); + //return false; + //} this->request = 0; int store; @@ -142,13 +142,13 @@ IVTree::search(int _key, char*& _str, int& _len) } bool -IVTree::insert(int _key, char* _str, unsigned _len) +IVTree::insert(unsigned _key, char* _str, unsigned _len) { - if (_key < 0) - { - printf("error in IVTree-insert: empty string\n"); - return false; - } + //if (_key < 0) + //{ + //printf("error in IVTree-insert: empty string\n"); + //return false; + //} //this->CopyToTransfer(_str, _len, 2); //const Bstr* val = &(this->transfer[2]); @@ -250,13 +250,13 @@ IVTree::insert(int _key, char* _str, unsigned _len) } bool -IVTree::modify(int _key, char* _str, unsigned _len) +IVTree::modify(unsigned _key, char* _str, unsigned _len) { - if (_key < 0) - { - printf("error in IVTree-modify: empty string\n"); - return false; - } + //if (_key < 0) + //{ + //printf("error in IVTree-modify: empty string\n"); + //return false; + //} //this->CopyToTransfer(_str, _len, 2); //not check value //const Bstr* val = &(this->transfer[2]); @@ -291,7 +291,7 @@ IVTree::modify(int _key, char* _str, unsigned _len) //this function is useful for search and modify, and range-query IVNode* //return the first key's position that >= *_key -IVTree::find(int _key, int* _store, bool ifmodify) +IVTree::find(unsigned _key, int* _store, bool ifmodify) { //to assign value for this->bstr, function shouldn't be const! if (this->root == NULL) return NULL; //IVTree Is Empty @@ -334,13 +334,13 @@ IVTree::find(unsigned _len, const char* _str, int* store) const */ bool -IVTree::remove(int _key) +IVTree::remove(unsigned _key) { - if (_key < 0) - { - printf("error in IVTree-remove: empty string\n"); - return false; - } + //if (_key < 0) + //{ + //printf("error in IVTree-remove: empty string\n"); + //return false; + //} this->request = 0; IVNode* ret; @@ -468,7 +468,7 @@ IVTree::resetStream() //TODO: change to using value list, getValue() maybe not get real long list bool //special case: not exist, one-edge-case -IVTree::range_query(int _key1, int _key2) +IVTree::range_query(unsigned _key1, unsigned _key2) { //the range is: *_key1 <= x < *_key2 //if(_key1 <0 && _key2 <0) //return false; diff --git a/KVstore/IVTree/IVTree.h b/KVstore/IVTree/IVTree.h index 3ae3897..11184fe 100644 --- a/KVstore/IVTree/IVTree.h +++ b/KVstore/IVTree/IVTree.h @@ -29,7 +29,7 @@ class IVTree { protected: - unsigned int height; //0 indicates an empty tree + unsigned height; //0 indicates an empty tree IVNode* root; IVNode* leaves_head; //the head of LeafNode-list IVNode* leaves_tail; //the tail of LeafNode-list @@ -74,19 +74,19 @@ protected: public: IVTree(); //always need to initial transfer IVTree(std::string _storepath, std::string _filename, std::string _mode, unsigned long long _buffer_size); - unsigned int getHeight() const; + unsigned getHeight() const; void setHeight(unsigned _h); IVNode* getRoot() const; //void setRoot(Node* _root); //insert, search, remove, set - bool search(int _key, char*& _str, int& _len); - bool insert(int _key, char* _str, unsigned _len); - bool modify(int _key, char* _str, unsigned _len); - IVNode* find(int _key, int* store, bool ifmodify); - bool remove(int _key); + bool search(unsigned _key, char*& _str, unsigned& _len); + bool insert(unsigned _key, char* _str, unsigned _len); + bool modify(unsigned _key, char* _str, unsigned _len); + IVNode* find(unsigned _key, int* store, bool ifmodify); + bool remove(unsigned _key); const Bstr* getRangeValue(); void resetStream(); - bool range_query(int _key1, int _key2); + bool range_query(unsigned _key1, unsigned _key2); bool save(); ~IVTree(); void print(std::string s); //DEBUG(print the tree) diff --git a/KVstore/IVTree/node/IVLeafNode.cpp b/KVstore/IVTree/node/IVLeafNode.cpp index bba580b..eb8de5c 100644 --- a/KVstore/IVTree/node/IVLeafNode.cpp +++ b/KVstore/IVTree/node/IVLeafNode.cpp @@ -89,25 +89,74 @@ IVLeafNode::getValue(int _index) const return this->values + _index; } -//TODO!!! bool -IVLeafNode::getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const +IVLeafNode::setValue(const Bstr* _value, int _index, bool _ifcopy) { - //TODO: read long list - return true; -} - -bool -IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy) -{ - //TODO: consider the long list, how to cancel and reset int num = this->getNum(); if (_index < 0 || _index >= num) { print(string("error in setValue: Invalid index ") + Util::int2string(_index)); return false; } + this->values[_index].release(); //NOTICE: only used in modify + + if(_ifcopy) + { + this->values[_index].copy(_value); + } + else + { + this->values[_index] = *_value; + } + + return true; +} + +bool +IVLeafNode::getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + //print(string("error in getValue: Invalid index ") + Util::int2string(_index)); + return NULL; + } + + //read long list + if(this->values[_index].isBstrLongList()) + { + unsigned block_num = this->values[_index].getLen(); + _vlist->readValue(block_num, _str, _len); + } + else + { + _str = this->values[_index].getStr(); + _len = this->values[_index].getLen(); + } + + return true; +} + +bool +IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy) +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in setValue: Invalid index ") + Util::int2string(_index)); + return false; + } + + if(this->values[_index].isBstrLongList()) + { + unsigned block_num = this->values[_index].getLen(); + _vlist->removeValue(block_num); + } + else + { + this->values[_index].release(); //NOTICE: only used in modify + } //DEBUG: we do not need to copy here //we just need to ensure that the pointer's memory is not released @@ -119,8 +168,17 @@ IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool //else //{ //this->values[_index] = *_value; + if(VList::isLongList(_len)) + { + unsigned block_num = _vlist->writeValue(_str, _len); + this->values[_index].setStr(NULL); + this->values[_index].setLen(block_num); + } + else + { this->values[_index].setStr(_str); this->values[_index].setLen(_len); + } //} return true; } @@ -128,23 +186,34 @@ IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool bool IVLeafNode::addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy) { - //TODO:if the list is too large int num = this->getNum(); if (_index < 0 || _index > num) { print(string("error in addValue: Invalid index ") + Util::int2string(_index)); return false; } - int i; - for (i = num - 1; i >= _index; --i) + + for (int i = num - 1; i >= _index; --i) this->values[i + 1] = this->values[i]; //if (ifcopy) //this->values[_index].copy(_value); //else //this->values[_index] = *_value; - this->values[_index].setStr(_str); - this->values[_index].setLen(_len); + + if(VList::isLongList(_len)) + { + unsigned block_num = _vlist->writeValue(_str, _len); + this->values[_index].setStr(NULL); + this->values[_index].setLen(block_num); + } + else + { + this->values[_index].setStr(_str); + this->values[_index].setLen(_len); + } + //this->values[_index].setStr(_str); + //this->values[_index].setLen(_len); return true; } @@ -152,7 +221,6 @@ IVLeafNode::addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool bool IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel) { - //TODO: if is to sub long list int num = this->getNum(); if (_index < 0 || _index >= num) { @@ -160,10 +228,20 @@ IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel) return false; } - int i; - if (ifdel) - values[_index].release(); - for (i = _index; i < num - 1; ++i) + if(this->values[_index].isBstrLongList()) + { + unsigned block_num = this->values[_index].getLen(); + _vlist->removeValue(block_num); + } + else + { + if (ifdel) + { + values[_index].release(); + } + } + + for (int i = _index; i < num - 1; ++i) this->values[i] = this->values[i + 1]; return true; diff --git a/KVstore/IVTree/node/IVLeafNode.h b/KVstore/IVTree/node/IVLeafNode.h index 56638bd..6b2439a 100644 --- a/KVstore/IVTree/node/IVLeafNode.h +++ b/KVstore/IVTree/node/IVLeafNode.h @@ -28,6 +28,8 @@ public: IVNode* getPrev() const; IVNode* getNext() const; const Bstr* getValue(int _index) const; + bool setValue(const Bstr* _value, int _index, bool _ifcopy=false); + bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const; bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false); diff --git a/KVstore/IVTree/node/IVNode.h b/KVstore/IVTree/node/IVNode.h index a7d6b59..a942042 100644 --- a/KVstore/IVTree/node/IVNode.h +++ b/KVstore/IVTree/node/IVNode.h @@ -83,6 +83,7 @@ public: virtual IVNode* getNext() const { return NULL; }; virtual const Bstr* getValue(int _index) const { return NULL; }; + virtual bool setValue(const Bstr* _value, int _index, bool _ifcopy=false) { return true; }; virtual bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const { return NULL; }; virtual bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false) { return true; }; diff --git a/KVstore/IVTree/storage/IVStorage.cpp b/KVstore/IVTree/storage/IVStorage.cpp index d203d81..2c5b72a 100644 --- a/KVstore/IVTree/storage/IVStorage.cpp +++ b/KVstore/IVTree/storage/IVStorage.cpp @@ -348,6 +348,11 @@ IVStorage::createNode(IVNode*& _np) //cretae virtual nodes, not in-mem return true; } +//BETTER: Does SpecialBlock really needed? why can't we place next before flag?? +// +//NOTICE: root num begins from 1, if root num is 0, then it is invalid, i.e. the tree is NULL +//(and ftell(root address) will be 0 either) + bool IVStorage::writeNode(IVNode* _np) { @@ -446,7 +451,10 @@ IVStorage::readBstr(Bstr* _bp, unsigned* _next) } //this->request(len); - char* s = (char*)malloc(len); + + //NOTICE: we use new for all, consistent with Bstr and KVstore + //char* s = (char*)malloc(len); + char* s = new char[len]; _bp->setLen(len); for (i = 0; i + 4 < len; i += 4) { diff --git a/KVstore/KVstore.cpp b/KVstore/KVstore.cpp index 9b57917..e9c4e2a 100644 --- a/KVstore/KVstore.cpp +++ b/KVstore/KVstore.cpp @@ -99,8 +99,8 @@ int KVstore::getEntityDegree(int _entity_id) const { int KVstore::getEntityInDegree(int _entity_id) const { //cout << "In getEntityInDegree " << _entity_id << endl; - int* _tmp = NULL; - int _len = 0; + unsigned* _tmp = NULL; + unsigned _len = 0; bool _get = this->getValueByKey(this->objID2values, _entity_id, (char*&)_tmp, _len); if (!_get) { return 0; @@ -110,8 +110,8 @@ int KVstore::getEntityInDegree(int _entity_id) const { int KVstore::getEntityOutDegree(int _entity_id) const { //cout << "In getEntityOutDegree " << _entity_id << endl; - int* _tmp = NULL; - int _len = 0; + unsigned* _tmp = NULL; + unsigned _len = 0; bool _get = this->getValueByKey(this->subID2values, _entity_id, (char*&)_tmp, _len); if (!_get) { return 0; @@ -121,8 +121,8 @@ int KVstore::getEntityOutDegree(int _entity_id) const { int KVstore::getLiteralDegree(int _literal_id) const { //cout << "In getLiteralDegree " << _literal_id << endl; - int* _tmp = NULL; - int _len = 0; + unsigned* _tmp = NULL; + unsigned _len = 0; bool _get = this->getValueByKey(this->objID2values, _literal_id, (char*&)_tmp, _len); if (!_get) { return 0; @@ -132,8 +132,8 @@ int KVstore::getLiteralDegree(int _literal_id) const { int KVstore::getPredicateDegree(int _predicate_id) const { //cout << "In getPredicate Degree " << _predicate_id << endl; - int* _tmp = NULL; - int _len = 0; + unsigned* _tmp = NULL; + unsigned _len = 0; bool _get = this->getValueByKey(this->preID2values, _predicate_id, (char*&)_tmp, _len); if (!_get) { return 0; @@ -143,8 +143,10 @@ int KVstore::getPredicateDegree(int _predicate_id) const { int KVstore::getSubjectPredicateDegree(int _subid, int _preid) const { //cout << "In getSubjectPredicateDegree " << _subid << ' ' << _preid << endl; + + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); if (!_get) { return 0; @@ -166,8 +168,10 @@ int KVstore::getSubjectPredicateDegree(int _subid, int _preid) const { int KVstore::getObjectPredicateDegree(int _objid, int _preid) const { //cout << "In getObjectPredicateDegree " << _objid << _preid << endl; + + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); if (!_get) { return 0; @@ -352,8 +356,9 @@ bool KVstore::updateTupleslist_remove(int _sub_id, int _pre_id, int _obj_id) { } bool KVstore::updateInsert_s2values(int _sub_id, int _pre_id, int _obj_id) { + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len); bool _is_entity = Util::is_entity_ele(_obj_id); @@ -453,8 +458,9 @@ bool KVstore::updateInsert_s2values(int _sub_id, int _pre_id, int _obj_id) { } bool KVstore::updateRemove_s2values(int _sub_id, int _pre_id, int _obj_id) { + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len); bool _is_entity = Util::is_entity_ele(_obj_id); @@ -564,8 +570,9 @@ bool KVstore::updateRemove_s2values(int _subid, const std::vector& _pidoidl } bool KVstore::updateInsert_o2values(int _sub_id, int _pre_id, int _obj_id) { + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->objID2values, _obj_id, (char*&)_tmp, _len); //objID doesn't exist @@ -659,8 +666,9 @@ bool KVstore::updateInsert_o2values(int _sub_id, int _pre_id, int _obj_id) { } bool KVstore::updateRemove_o2values(int _sub_id, int _pre_id, int _obj_id) { + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->objID2values, _obj_id, (char*&)_tmp, _len); if (!_get) { @@ -763,8 +771,9 @@ bool KVstore::updateRemove_o2values(int _objid, const std::vector& _pidsidl } bool KVstore::updateInsert_p2values(int _sub_id, int _pre_id, int _obj_id) { + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->preID2values, _pre_id, (char*&)_tmp, _len); //preid doesn't exist @@ -804,8 +813,9 @@ bool KVstore::updateInsert_p2values(int _sub_id, int _pre_id, int _obj_id) { } bool KVstore::updateRemove_p2values(int _sub_id, int _pre_id, int _obj_id) { + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->preID2values, _pre_id, (char*&)_tmp, _len); if (!_get) { @@ -1405,8 +1415,9 @@ KVstore::getpreIDlistBysubID(int _subid, int*& _preidlist, int& _list_len, bool _list_len = 0; return false; } + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); if (!_get) @@ -1442,8 +1453,9 @@ KVstore::getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len, bool _list_len = 0; return false; } + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); if (!_get) { @@ -1481,8 +1493,9 @@ KVstore::getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist, int& return false; } + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); if (!_get) { _objidlist = NULL; @@ -1531,8 +1544,9 @@ KVstore::getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _list return false; } + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); if (!_get) { _preid_objidlist = NULL; @@ -1682,8 +1696,10 @@ bool KVstore::getpreIDlistByobjID(int _objid, int*& _preidlist, int& _list_len, bool _no_duplicate) const { //cout << "In getpreIDlistByobjID " << _objid << endl; + + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); if (!_get) { _preidlist = NULL; @@ -1711,8 +1727,10 @@ bool KVstore::getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len, bool _no_duplicate) const { //cout << "In getsubIDlistByobjID " << _objid << endl; + + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); if (!_get) { _subidlist = NULL; @@ -1743,8 +1761,9 @@ bool KVstore::getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, int& _list_len, bool _no_duplicate) const { //cout << "In getsubIDlistByobjIDpreID " << _objid << ' ' << _preid << endl; + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); if (!_get) { _subidlist = NULL; @@ -1786,8 +1805,9 @@ bool KVstore::getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _list_len, bool _no_duplicate) const { //cout << "In getpreIDsubIDlistByobjID " << _objid << endl; + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); if (!_get) { _preid_subidlist = NULL; @@ -1915,8 +1935,9 @@ bool KVstore::getsubIDlistBypreID(int _preid, int*& _subidlist, int& _list_len, bool _no_duplicate) const { //cout << "In getsubIDlistBypreID " << _preid << endl; + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->preID2values, _preid, (char*&)_tmp, _len); if (!_get) { _subidlist = NULL; @@ -1946,8 +1967,9 @@ bool KVstore::getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len, bool _no_duplicate) const { //cout << "In getobjIDlistBypreID " << _preid << endl; + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->preID2values, _preid, (char*&)_tmp, _len); if (!_get) { _objidlist = NULL; @@ -1978,8 +2000,9 @@ bool KVstore::getsubIDobjIDlistBypreID(int _preid, int*& _subid_objidlist, int& _list_len, bool _no_duplicate) const { //cout << "In getsubIDobjIDlistBypreID " << _preid << endl; + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->preID2values, _preid, (char*&)_tmp, _len); if (!_get) { _subid_objidlist = NULL; @@ -2032,8 +2055,9 @@ KVstore::getpreIDlistBysubIDobjID(int _subid, int _objid, int*& _preidlist, int& return false; } + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); _list_len = len; int _result = 0; @@ -2199,7 +2223,7 @@ KVstore::addValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen) } bool -KVstore::addValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen) +KVstore::addValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen) { return _p_btree->insert(_key, _val, _vlen); } @@ -2217,7 +2241,7 @@ KVstore::setValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen) } bool -KVstore::setValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen) +KVstore::setValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen) { return _p_btree->modify(_key, _val, _vlen); } @@ -2235,7 +2259,7 @@ KVstore::getValueByKey(ISTree* _p_btree, int _key, char*& _val, int& _vlen) cons } bool -KVstore::getValueByKey(IVTree* _p_btree, int _key, char*& _val, int& _vlen) const +KVstore::getValueByKey(IVTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const { return _p_btree->search(_key, _val, _vlen); } diff --git a/KVstore/KVstore.h b/KVstore/KVstore.h index 5372e63..abea790 100644 --- a/KVstore/KVstore.h +++ b/KVstore/KVstore.h @@ -19,6 +19,11 @@ //QUERY: but to count the length each time maybe very costly? //No, because triple num is stored in char* now!!!! we do not need to save it again //TODO: entity_border in s2values list is not needed!!! not waste memory here +// +//QUERY: but to implement vlist, we need a unsigned flag +//What is more, we need to store the string in disk, how can we store it if without the length? +//unsigned type stored as chars, maybe will have '\0' +//In memory, we do not know when the oidlist ends if without the original length (butthe triple num will answer this!) class KVstore { @@ -197,15 +202,15 @@ private: bool addValueByKey(SITree* _p_btree, char* _key, int _klen, int _val); bool addValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen); - bool addValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen); + bool addValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen); bool setValueByKey(SITree* _p_btree, char* _key, int _klen, int _val); bool setValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen); - bool setValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen); + bool setValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen); bool getValueByKey(SITree* _p_btree, const char* _key, int _klen, int* _val) const; bool getValueByKey(ISTree* _p_btree, int _key, char*& _val, int& _vlen) const; - bool getValueByKey(IVTree* _p_btree, int _key, char*& _val, int& _vlen) const; + bool getValueByKey(IVTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const; int getIDByStr(SITree* _p_btree, const char* _key, int _klen) const; diff --git a/KVstore/Tree.h b/KVstore/Tree.h index b73612b..2574b2f 100644 --- a/KVstore/Tree.h +++ b/KVstore/Tree.h @@ -1,5 +1,5 @@ //headers wrapper for all kinds of BPlusTree -#include "IVTree/IVTree.h" #include "ISTree/ISTree.h" #include "SITree/SITree.h" +#include "IVTree/IVTree.h" diff --git a/NOTES.md b/NOTES.md index deef0fb..d797830 100644 --- a/NOTES.md +++ b/NOTES.md @@ -88,9 +88,8 @@ http://blog.csdn.net/infoworld/article/details/8670951 要在单机支持到10亿triple,最坏情况下最多有20亿entity和20亿literal,目前的编号方式是不行的(int扩展为unsigned) 最好在单机100G内存上支持起freebase(2.5B triples)这个规模的数据集,就像jena和virtuoso一样,慢不要紧 -type分支中query过程可能还有问题,需要修改Query/里面的类型 -去掉tree里面的复制,另外kvstore里面的复制可以考虑通过一个或若干个bstr buffer来实现,避免每次都重新new,但这会影响多线程程序 -而且在kvstore中往往需要对原始list做一些额外处理 +type分支中query过程可能还有问题,需要修改Query/里面的类型,另外stringindex中也要修改,分界线已经是20亿且非法不再是-1 +vstree在build和query时可以用不同大小的缓存,来加速build过程 --- UBSTR: 类型bstr的length问题也需要解决 如果把类型直接改成long long,空间开销一下子就上升了一倍 解决方法:对于ID2string,仍然用char*和unsigned,但对于s2xx p2xx o2xx,应该用unsigned long long*和unsigned来表示,这样最高可支持到40亿triple diff --git a/Util/Bstr.cpp b/Util/Bstr.cpp index 8157700..a3282f2 100644 --- a/Util/Bstr.cpp +++ b/Util/Bstr.cpp @@ -122,7 +122,8 @@ unsigned Bstr::getLen() const { //NOTICE: this is for VList - if(this->str == NULL) + if(this->isBstrLongList()) + //if(this->str == NULL) { return 0; } @@ -214,3 +215,9 @@ Bstr::print(string s) const //#endif } +bool +Bstr::isBstrLongList() const +{ + return this->str == NULL; +} + diff --git a/Util/Bstr.h b/Util/Bstr.h index fc2cd9f..aaaf84a 100644 --- a/Util/Bstr.h +++ b/Util/Bstr.h @@ -48,6 +48,9 @@ public: //int write(FILE* _fp); ~Bstr(); void print(std::string s) const; //DEBUG + + //judge if this Bstr represent a long list value, and waiting to be each time on need + bool isBstrLongList() const; }; #endif // _UTIL_BSTR_H diff --git a/Util/VList.cpp b/Util/VList.cpp index 7b772f3..261d887 100644 --- a/Util/VList.cpp +++ b/Util/VList.cpp @@ -149,6 +149,10 @@ VList::FreeBlock(unsigned _blocknum) //NOTICE: all reads are aligned to 4 bytes(including a string) //a string may acrossseveral blocks +// +//NOTICE: not use buffer, read/write on need, update at once, so no need to write back at last +//NOTICE: the next is placed at the begin of a block + void VList::ReadAlign(unsigned* _next) @@ -161,38 +165,68 @@ VList::ReadAlign(unsigned* _next) } void -VList::WriteAlign(unsigned* _curnum, bool& _SpecialBlock) +VList::WriteAlign(unsigned* _curnum) { if (ftell(valfp) % BLOCK_SIZE == 0) { unsigned blocknum = this->AllocBlock(); fseek(valfp, Address(*_curnum), SEEK_SET); - if (_SpecialBlock) - { - fseek(valfp, 4, SEEK_CUR); - _SpecialBlock = false; - } fwrite(&blocknum, sizeof(unsigned), 1, valfp); fseek(valfp, Address(blocknum) + 4, SEEK_SET); *_curnum = blocknum; } } -//TODO: check , read/write a long list, across several blocks -//not use buffer, read/write on need, update at once, so no need to write back at last +bool +VList::readValue(unsigned _block_num, char*& _str, unsigned& _len) +{ + fseek(valfp, Address(_block_num), SEEK_SET); + unsigned next; + fread(&next, sizeof(unsigned), 1, valfp); + this->readBstr(_str, _len, &next); + + return true; +} + +unsigned +VList::writeValue(const char* _str, unsigned _len) +{ + unsigned blocknum = this->AllocBlock(); + unsigned curnum = blocknum; + this->writeBstr(_str, _len, &curnum); + + return blocknum; +} + +bool +VList::removeValue(unsigned _block_num) +{ + unsigned store = _block_num, next; + fseek(this->valfp, Address(store), SEEK_SET); + fread(&next, sizeof(unsigned), 1, valfp); + + while (store != 0) + { + this->FreeBlock(store); + store = next; + fseek(valfp, Address(store), SEEK_SET); + fread(&next, sizeof(unsigned), 1, valfp); + } + + return true; +} -//TODO: still use Bstr?? how can we get the next pointer?? use NULL to init -//NOTICE: the next is placed at the begin of a block bool -VList::readBstr(Bstr* _bp, unsigned* _next) +VList::readBstr(char*& _str, unsigned& _len, unsigned* _next) { //long address; unsigned len, i, j; fread(&len, sizeof(unsigned), 1, this->valfp); this->ReadAlign(_next); - //this->request(len); + char* s = (char*)malloc(len); - _bp->setLen(len); + _len = len; + for (i = 0; i + 4 < len; i += 4) { fread(s + i, sizeof(char), 4, valfp); @@ -203,38 +237,52 @@ VList::readBstr(Bstr* _bp, unsigned* _next) fread(s + i, sizeof(char), 1, valfp); //BETTER i++; } + j = len % 4; if (j > 0) j = 4 - j; fseek(valfp, j, SEEK_CUR); - this->ReadAlign(_next); - _bp->setStr(s); + //NOTICE+DEBUG: I think no need to align here, later no data to read + //(if need to read, then fseek again to find a new value) + //this->ReadAlign(_next); + + _str = s; return true; } bool -VList::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock) +VList::writeBstr(const char* _str, unsigned _len, unsigned* _curnum) { - unsigned i, j, len = _bp->getLen(); + unsigned i, j, len = _len; fwrite(&len, sizeof(unsigned), 1, valfp); - this->WriteAlign(_curnum, _SpecialBlock); - char* s = _bp->getStr(); + this->WriteAlign(_curnum); + + //BETTER: compute this need how many blocks first, then write a block a time + + const char* s = _str; for (i = 0; i + 4 < len; i += 4) { fwrite(s + i, sizeof(char), 4, valfp); - this->WriteAlign(_curnum, _SpecialBlock); + this->WriteAlign(_curnum); } while (i < len) { fwrite(s + i, sizeof(char), 1, valfp); i++; } + j = len % 4; if (j > 0) j = 4 - j; fseek(valfp, j, SEEK_CUR); - this->WriteAlign(_curnum, _SpecialBlock); + + //NOTICE+DEBUG: I think no need to align here, later no data to write + //(if need to write, then fseek again to write a new value) + //this->WriteAlign(_curnum); + fseek(valfp, Address(*_curnum), SEEK_SET); + unsigned t = 0; + fwrite(&t, sizeof(unsigned), 1, valfp); return true; } diff --git a/Util/VList.h b/Util/VList.h index a328b83..2719bf5 100644 --- a/Util/VList.h +++ b/Util/VList.h @@ -12,7 +12,10 @@ #include "Util.h" #include "Bstr.h" -//TODO: not keep long list in memory, read each time +//TODO: all use new/delete for Bstr, KVstore and trees, including Stream +//then give a full test, including valgrind + +//NOTICE: not keep long list in memory, read each time //but when can you free the long list(kvstore should release it after parsing) // //CONSIDER: if to keep long list in memory, should adjust the bstr in memory: @@ -61,15 +64,16 @@ private: unsigned AllocBlock(); void FreeBlock(unsigned _blocknum); void ReadAlign(unsigned* _next); - void WriteAlign(unsigned* _next, bool& _SpecialBlock); + void WriteAlign(unsigned* _next); + bool readBstr(char*& _bp, unsigned& _len, unsigned* _next); + bool writeBstr(const char* _str, unsigned _len, unsigned* _curnum); public: VList(); VList(std::string& _filepath, std::string& _mode, unsigned long long _buffer_size);//create a fixed-size file or open an existence - bool readBstr(Bstr* _bp, unsigned* _next); - bool writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock); - bool readValue(unsigned _block_num); - bool writeValue(const Bstr* _bp); + bool readValue(unsigned _block_num, char*& _str, unsigned& _len); + unsigned writeValue(const char* _str, unsigned _len); + bool removeValue(unsigned _block_num); ~VList(); static bool isLongList(unsigned _len); diff --git a/makefile b/makefile index da62f17..7fb6524 100644 --- a/makefile +++ b/makefile @@ -72,9 +72,9 @@ sitreeobj = $(objdir)SITree.o $(objdir)SIStorage.o $(objdir)SINode.o $(objdir)SI istreeobj = $(objdir)ISTree.o $(objdir)ISStorage.o $(objdir)ISNode.o $(objdir)ISIntlNode.o $(objdir)ISLeafNode.o $(objdir)ISHeap.o ivtreeobj = $(objdir)IVTree.o $(objdir)IVStorage.o $(objdir)IVNode.o $(objdir)IVIntlNode.o $(objdir)IVLeafNode.o $(objdir)IVHeap.o -kvstoreobj = $(objdir)KVstore.o $(sitreeobj) $(istreeobj) #$(sstreeobj) +kvstoreobj = $(objdir)KVstore.o $(sitreeobj) $(istreeobj) $(ivtreeobj) #$(sstreeobj) -utilobj = $(objdir)Util.o $(objdir)Bstr.o $(objdir)Stream.o $(objdir)Triple.o $(objdir)BloomFilter.o +utilobj = $(objdir)Util.o $(objdir)Bstr.o $(objdir)Stream.o $(objdir)Triple.o $(objdir)BloomFilter.o $(objdir)VList.o queryobj = $(objdir)SPARQLquery.o $(objdir)BasicQuery.o $(objdir)ResultSet.o $(objdir)IDList.o \ $(objdir)Varset.o $(objdir)QueryTree.o $(objdir)ResultFilter.o $(objdir)GeneralEvaluation.o @@ -219,7 +219,7 @@ $(objdir)ISHeap.o: KVstore/ISTree/heap/ISHeap.cpp KVstore/ISTree/heap/ISHeap.h $ #objects in istree/ end #objects in ivtree/ begin -$(objdir)IVTree.o: KVstore/IVTree/IVTree.cpp KVstore/IVTree/IVTree.h $(objdir)Stream.o +$(objdir)IVTree.o: KVstore/IVTree/IVTree.cpp KVstore/IVTree/IVTree.h $(objdir)Stream.o $(objdir)VList.o $(CC) $(CFLAGS) KVstore/IVTree/IVTree.cpp -o $(objdir)IVTree.o $(objdir)IVStorage.o: KVstore/IVTree/storage/IVStorage.cpp KVstore/IVTree/storage/IVStorage.h $(objdir)Util.o @@ -323,6 +323,9 @@ $(objdir)Triple.o: Util/Triple.cpp Util/Triple.h $(objdir)Util.o $(objdir)BloomFilter.o: Util/BloomFilter.cpp Util/BloomFilter.h $(objdir)Util.o $(CC) $(CFLAGS) Util/BloomFilter.cpp -o $(objdir)BloomFilter.o +$(objdir)VList.o: Util/VList.cpp Util/VList.h + $(CC) $(CFLAGS) Util/VList.cpp -o $(objdir)VList.o + #objects in util/ end