From 939e84c8bff7160e90a8ceeff31a0ddbfda16113 Mon Sep 17 00:00:00 2001 From: bookug Date: Wed, 29 Mar 2017 23:57:09 +0800 Subject: [PATCH] refactor: to add long list value TODO: vlist support and IVLeafNode by zengli, long list need to be freed each time --- KVstore/ISTree/ISTree.cpp | 89 ++++++++-------- KVstore/ISTree/ISTree.h | 10 +- KVstore/ISTree/node/ISLeafNode.cpp | 38 ++++++- KVstore/ISTree/node/ISLeafNode.h | 7 +- KVstore/ISTree/node/ISNode.h | 8 +- KVstore/IVTree/IVTree.cpp | 17 ++- KVstore/IVTree/IVTree.h | 6 +- KVstore/IVTree/node/IVLeafNode.cpp | 41 +++---- KVstore/IVTree/node/IVNode.h | 5 +- KVstore/IVTree/storage/IVStorage.h | 2 +- KVstore/KVstore.cpp | 109 +++++++++++++++++-- KVstore/KVstore.h | 20 ++-- KVstore/SITree/SITree.cpp | 2 +- KVstore/SITree/SITree.h | 2 +- KVstore/SITree/node/SILeafNode.cpp | 2 +- KVstore/SITree/node/SILeafNode.h | 8 +- KVstore/SITree/node/SINode.cpp | 76 ++++++++++++- KVstore/SITree/node/SINode.h | 11 +- NOTES.md | 2 +- Util/Bstr.h | 1 + {KVstore/IVTree/vlist => Util}/VList.cpp | 129 +++++++++++------------ {KVstore/IVTree/vlist => Util}/VList.h | 22 ++-- 22 files changed, 436 insertions(+), 171 deletions(-) rename {KVstore/IVTree/vlist => Util}/VList.cpp (56%) rename {KVstore/IVTree/vlist => Util}/VList.h (76%) diff --git a/KVstore/ISTree/ISTree.cpp b/KVstore/ISTree/ISTree.cpp index 5a23fcb..02aab9f 100644 --- a/KVstore/ISTree/ISTree.cpp +++ b/KVstore/ISTree/ISTree.cpp @@ -20,7 +20,7 @@ ISTree::ISTree() TSM = NULL; storepath = ""; filename = ""; - transfer_size[0] = transfer_size[1] = transfer_size[2] = 0; + //transfer_size[0] = transfer_size[1] = transfer_size[2] = 0; this->stream = NULL; this->request = 0; } @@ -37,10 +37,10 @@ ISTree::ISTree(string _storepath, string _filename, string _mode, unsigned long this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail); else this->root = NULL; - this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE)); - this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE)); - this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE)); - this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M + //this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M this->stream = NULL; this->request = 0; } @@ -51,30 +51,30 @@ ISTree::getFilePath() return storepath + "/" + filename; } -void //WARN: not check _str and _len -ISTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index) -{ - if (_index > 2) - return; - /* - if(_str == NULL || _len == 0) - { - printf("error in CopyToTransfer: empty string\n"); - return; - } - */ - //unsigned length = _bstr->getLen(); - unsigned length = _len; - if (length + 1 > this->transfer_size[_index]) - { - transfer[_index].release(); - transfer[_index].setStr((char*)malloc(length + 1)); - this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0 - } - memcpy(this->transfer[_index].getStr(), _str, length); - this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore - this->transfer[_index].setLen(length); -} +//void //WARN: not check _str and _len +//ISTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index) +//{ + //if (_index > 2) + //return; + //[> + //if(_str == NULL || _len == 0) + //{ + //printf("error in CopyToTransfer: empty string\n"); + //return; + //} + //*/ + ////unsigned length = _bstr->getLen(); + //unsigned length = _len; + //if (length + 1 > this->transfer_size[_index]) + //{ + //transfer[_index].release(); + //transfer[_index].setStr((char*)malloc(length + 1)); + //this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0 + //} + //memcpy(this->transfer[_index].getStr(), _str, length); + //this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore + //this->transfer[_index].setLen(length); +//} unsigned ISTree::getHeight() const @@ -121,15 +121,18 @@ ISTree::search(int _key, char*& _str, int& _len) } const Bstr* val = ret->getValue(store); - this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request - _str = this->transfer[0].getStr(); - _len = this->transfer[0].getLen(); + //this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request + //_str = this->transfer[0].getStr(); + //_len = this->transfer[0].getLen(); + _str = val->getStr(); + _len = val->getLen(); + this->TSM->request(request); return true; } bool -ISTree::insert(int _key, const char* _str, unsigned _len) +ISTree::insert(int _key, char* _str, unsigned _len) { if (_key < 0) { @@ -137,8 +140,8 @@ ISTree::insert(int _key, const char* _str, unsigned _len) return false; } - this->CopyToTransfer(_str, _len, 2); - const Bstr* val = &(this->transfer[2]); + //this->CopyToTransfer(_str, _len, 2); + //const Bstr* val = &(this->transfer[2]); this->request = 0; ISNode* ret; if (this->root == NULL) //tree is empty @@ -222,9 +225,9 @@ ISTree::insert(int _key, const char* _str, unsigned _len) else { p->addKey(_key, i); - p->addValue(val, i, true); + p->addValue(_str, _len, i, true); p->addNum(); - request += val->getLen(); + request += _len; p->setDirty(); this->TSM->updateHeap(p, p->getRank(), true); //_key->clear(); @@ -235,7 +238,7 @@ ISTree::insert(int _key, const char* _str, unsigned _len) } bool -ISTree::modify(int _key, const char* _str, unsigned _len) +ISTree::modify(int _key, char* _str, unsigned _len) { if (_key < 0) { @@ -243,8 +246,8 @@ ISTree::modify(int _key, const char* _str, unsigned _len) return false; } - this->CopyToTransfer(_str, _len, 2); //not check value - const Bstr* val = &(this->transfer[2]); + //this->CopyToTransfer(_str, _len, 2); //not check value + //const Bstr* val = &(this->transfer[2]); this->request = 0; int store; ISNode* ret = this->find(_key, &store, true); @@ -255,16 +258,17 @@ ISTree::modify(int _key, const char* _str, unsigned _len) } //cout<<"ISTree::modify() - key is found, now to remove"<getValue(store)->getLen(); - ret->setValue(val, store, true); + ret->setValue(_str, _len, store, true); //cout<<"value reset"<getLen()<<" oldlen: "<getLen() - len); - this->request = val->getLen(); + this->request = _len; this->request -= len; ret->setDirty(); //cout<<"to request"<TSM->request(request); //cout<<"memory requested"<getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in setValue: Invalid index ") + Util::int2string(_index)); + return false; + } + this->values[_index].release(); //NOTICE: only used in modify + + this->values[_index].setStr(_str); + this->values[_index].setLen(_len); + + return true; +} + +bool +ISLeafNode::addValue(char* _str, unsigned _len, int _index, bool ifcopy) +{ + int num = this->getNum(); + if (_index < 0 || _index > num) + { + print(string("error in addValue: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + for (i = num - 1; i >= _index; --i) + this->values[i + 1] = this->values[i]; + + this->values[_index].setStr(_str); + this->values[_index].setLen(_len); + + return true; +} + bool ISLeafNode::subValue(int _index, bool ifdel) { @@ -373,4 +409,4 @@ ISLeafNode::print(string s) } else; #endif -} \ No newline at end of file +} diff --git a/KVstore/ISTree/node/ISLeafNode.h b/KVstore/ISTree/node/ISLeafNode.h index 0d965f2..d288bfb 100644 --- a/KVstore/ISTree/node/ISLeafNode.h +++ b/KVstore/ISTree/node/ISLeafNode.h @@ -27,6 +27,7 @@ public: void Normal(); ISNode* getPrev() const; ISNode* getNext() const; + const Bstr* getValue(int _index) const; bool setValue(const Bstr* _value, int _index, bool ifcopy = false); bool addValue(const Bstr* _value, int _index, bool ifcopy = false); @@ -34,6 +35,10 @@ public: void setPrev(ISNode* _prev); void setNext(ISNode* _next); unsigned getSize() const; + + bool setValue(char* _str, unsigned _len, int _index, bool ifcopy = false); + bool addValue(char* _str, unsigned _len, int _index, bool ifcopy = false); + ISNode* split(ISNode* _father, int _index); ISNode* coalesce(ISNode* _father, int _index); void release(); @@ -47,4 +52,4 @@ public: }; //BETTER: prev isn't a must, and reverse-range can be achieved using recursive-next -#endif \ No newline at end of file +#endif diff --git a/KVstore/ISTree/node/ISNode.h b/KVstore/ISTree/node/ISNode.h index b25b544..7c8fc1d 100644 --- a/KVstore/ISTree/node/ISNode.h +++ b/KVstore/ISTree/node/ISNode.h @@ -80,12 +80,18 @@ public: virtual bool subChild(int _index) { return true; }; virtual ISNode* getPrev() const { return NULL; }; virtual ISNode* getNext() const { return NULL; }; + virtual const Bstr* getValue(int _index) const { return NULL; }; virtual bool setValue(const Bstr* _value, int _index, bool ifcopy = false) { return true; }; virtual bool addValue(const Bstr* _value, int _index, bool ifcopy = false) { return true; }; virtual bool subValue(int _index, bool ifdel = false) { return true; }; virtual void setPrev(ISNode* _prev) {}; virtual void setNext(ISNode* _next) {}; + + virtual bool setValue(char* _str, unsigned _len, int _index, bool ifcopy = false) { return true; }; + virtual bool addValue(char* _str, unsigned _len, int _index, bool ifcopy = false) { return true; }; + + //pure virtual function virtual void Virtual() = 0; virtual void Normal() = 0; virtual unsigned getSize() const = 0; //return all memory owned @@ -110,4 +116,4 @@ public: *to release the whole(pointer is invalid and rebuild problem) */ -#endif \ No newline at end of file +#endif diff --git a/KVstore/IVTree/IVTree.cpp b/KVstore/IVTree/IVTree.cpp index 1dee1cf..eac36a1 100644 --- a/KVstore/IVTree/IVTree.cpp +++ b/KVstore/IVTree/IVTree.cpp @@ -36,7 +36,7 @@ IVTree::IVTree(string _storepath, string _filename, string _mode, unsigned long string filepath = this->getFilePath(); string vlist_file = filepath + "_vlist"; - this->value_list = new VList(vlist_file, 1<<30); + this->value_list = new VList(vlist_file, this->mode, 1<<30); TSM = new IVStorage(filepath, this->mode, &this->height, _buffer_size, this->value_list); if (this->mode == "open") @@ -142,7 +142,7 @@ IVTree::search(int _key, char*& _str, int& _len) } bool -IVTree::insert(int _key, const char* _str, unsigned _len) +IVTree::insert(int _key, char* _str, unsigned _len) { if (_key < 0) { @@ -244,12 +244,13 @@ IVTree::insert(int _key, const char* _str, unsigned _len) //_key->clear(); //_value->clear(); } + this->TSM->request(request); return !ifexist; //QUERY(which case:return false) } bool -IVTree::modify(int _key, const char* _str, unsigned _len) +IVTree::modify(int _key, char* _str, unsigned _len) { if (_key < 0) { @@ -268,11 +269,14 @@ IVTree::modify(int _key, const char* _str, unsigned _len) return false; } //cout<<"IVTree::modify() - key is found, now to remove"<getValue(store)->getLen(); ret->setValue(this->value_list, store, _str, _len, true); //ret->setValue(val, store, true); //cout<<"value reset"<getLen()<<" oldlen: "<getLen() - len); this->request = _len; //this->request = val->getLen(); @@ -386,6 +390,7 @@ IVTree::remove(int _key) this->TSM->updateHeap(p, p->getRank(), true); p = q; } + bool flag = false; //j = p->getNum(); //LeafNode(maybe root) //for(i = 0; i < j; ++i) @@ -414,7 +419,7 @@ IVTree::remove(int _key) { request -= p->getValue(i)->getLen(); p->subKey(i); //to release - p->subValue(i, true); //to release + p->subValue(this->value_list, i, true); //to release p->subNum(); if (p->getNum() == 0) //root leaf 0 key { @@ -461,6 +466,7 @@ IVTree::resetStream() this->stream->setEnd(); } +//TODO: change to using value list, getValue() maybe not get real long list bool //special case: not exist, one-edge-case IVTree::range_query(int _key1, int _key2) { //the range is: *_key1 <= x < *_key2 @@ -555,6 +561,7 @@ IVTree::range_query(int _key1, int _key2) for (i = l; i < r; ++i) { //NOTICE:Bstr* in an array, used as Bstr[] + //DEBUG+TODO: if long list?? clean this->stream->write(p->getValue(i)); } this->TSM->request(request); @@ -563,7 +570,9 @@ IVTree::range_query(int _key1, int _key2) else break; } + this->stream->setEnd(); + return true; } diff --git a/KVstore/IVTree/IVTree.h b/KVstore/IVTree/IVTree.h index 86fbf27..3ae3897 100644 --- a/KVstore/IVTree/IVTree.h +++ b/KVstore/IVTree/IVTree.h @@ -11,11 +11,11 @@ #include "../../Util/Util.h" #include "../../Util/Stream.h" +#include "../../Util/VList.h" #include "node/IVNode.h" #include "node/IVIntlNode.h" #include "node/IVLeafNode.h" #include "storage/IVStorage.h" -#include "./vlist/VList.h" //TODO: for long list, do not read in time, just on need //the memory is kept with the node, updat ewith node @@ -80,8 +80,8 @@ public: //void setRoot(Node* _root); //insert, search, remove, set bool search(int _key, char*& _str, int& _len); - bool insert(int _key, const char* _str, unsigned _len); - bool modify(int _key, const char* _str, unsigned _len); + bool insert(int _key, char* _str, unsigned _len); + bool modify(int _key, char* _str, unsigned _len); IVNode* find(int _key, int* store, bool ifmodify); bool remove(int _key); const Bstr* getRangeValue(); diff --git a/KVstore/IVTree/node/IVLeafNode.cpp b/KVstore/IVTree/node/IVLeafNode.cpp index a35bd6f..bba580b 100644 --- a/KVstore/IVTree/node/IVLeafNode.cpp +++ b/KVstore/IVTree/node/IVLeafNode.cpp @@ -89,6 +89,7 @@ IVLeafNode::getValue(int _index) const return this->values + _index; } +//TODO!!! bool IVLeafNode::getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const { @@ -148,6 +149,26 @@ IVLeafNode::addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool return true; } +bool +IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel) +{ + //TODO: if is to sub long list + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in subValue: Invalid index ") + Util::int2string(_index)); + return false; + } + + int i; + if (ifdel) + values[_index].release(); + for (i = _index; i < num - 1; ++i) + this->values[i] = this->values[i + 1]; + + return true; +} + bool IVLeafNode::addValue(const Bstr* _value, int _index, bool ifcopy) { @@ -169,26 +190,6 @@ IVLeafNode::addValue(const Bstr* _value, int _index, bool ifcopy) return true; } -bool -IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel) -{ - //TODO: if is to sub long list - int num = this->getNum(); - if (_index < 0 || _index >= num) - { - print(string("error in subValue: Invalid index ") + Util::int2string(_index)); - return false; - } - - int i; - if (ifdel) - values[_index].release(); - for (i = _index; i < num - 1; ++i) - this->values[i] = this->values[i + 1]; - - return true; -} - bool IVLeafNode::subValue(int _index, bool ifdel) { diff --git a/KVstore/IVTree/node/IVNode.h b/KVstore/IVTree/node/IVNode.h index 20d6cfe..a7d6b59 100644 --- a/KVstore/IVTree/node/IVNode.h +++ b/KVstore/IVTree/node/IVNode.h @@ -11,7 +11,7 @@ #include "../../../Util/Util.h" #include "../../../Util/Bstr.h" -#include "../vlist/VList.h" +#include "../../../Util/VList.h" class IVNode //abstract basic class { @@ -81,6 +81,7 @@ public: virtual bool subChild(int _index) { return true; }; virtual IVNode* getPrev() const { return NULL; }; virtual IVNode* getNext() const { return NULL; }; + virtual const Bstr* getValue(int _index) const { return NULL; }; virtual bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const { return NULL; }; virtual bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false) { return true; }; @@ -92,6 +93,8 @@ public: virtual void setPrev(IVNode* _prev) {}; virtual void setNext(IVNode* _next) {}; + + //pure virtual functions virtual void Virtual() = 0; virtual void Normal() = 0; virtual unsigned getSize() const = 0; //return all memory owned diff --git a/KVstore/IVTree/storage/IVStorage.h b/KVstore/IVTree/storage/IVStorage.h index 88525e2..37e13ae 100644 --- a/KVstore/IVTree/storage/IVStorage.h +++ b/KVstore/IVTree/storage/IVStorage.h @@ -9,10 +9,10 @@ #ifndef _KVSTORE_IVTREE_STORAGE_IVSTORAGE_H #define _KVSTORE_IVTREE_STORAGE_IVSTORAGE_H +#include "../../../Util/VList.h" #include "../node/IVIntlNode.h" #include "../node/IVLeafNode.h" #include "../heap/IVHeap.h" -#include "../vlist/VList.h" //It controls read, write, swap class IVStorage diff --git a/KVstore/KVstore.cpp b/KVstore/KVstore.cpp index bd9cd93..9b57917 100644 --- a/KVstore/KVstore.cpp +++ b/KVstore/KVstore.cpp @@ -1422,6 +1422,14 @@ KVstore::getpreIDlistBysubID(int _subid, int*& _preidlist, int& _list_len, bool _preidlist[i] = _tmp[2 * i + 3]; } + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1452,6 +1460,14 @@ KVstore::getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len, bool _list_len = Util::removeDuplicate(_objidlist, _list_len); } + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1493,6 +1509,14 @@ KVstore::getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist, int& _objidlist = new int[_list_len]; memcpy(_objidlist, _tmp + _offset, sizeof(int) * _list_len); + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1533,6 +1557,14 @@ KVstore::getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _list } } + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1664,6 +1696,14 @@ KVstore::getpreIDlistByobjID(int _objid, int*& _preidlist, int& _list_len, bool _preidlist[i] = _tmp[2 * i + 2]; } + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1688,6 +1728,14 @@ KVstore::getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len, bool _list_len = Util::removeDuplicate(_subidlist, _list_len); } + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1723,6 +1771,14 @@ KVstore::getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, int& _subidlist = new int[_list_len]; memcpy(_subidlist, _tmp + _offset, sizeof(int) * _list_len); + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1756,6 +1812,14 @@ KVstore::getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _list } } + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1867,6 +1931,14 @@ KVstore::getsubIDlistBypreID(int _preid, int*& _subidlist, int& _list_len, bool _list_len = Util::removeDuplicate(_subidlist, _list_len); } + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1891,6 +1963,14 @@ KVstore::getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len, bool _list_len = Util::removeDuplicate(_objidlist, _list_len); } + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1913,6 +1993,14 @@ KVstore::getsubIDobjIDlistBypreID(int _preid, int*& _subid_objidlist, int& _list _subid_objidlist[2 * i + 1] = _tmp[1 + _tmp[0] + i]; } + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1977,6 +2065,14 @@ KVstore::getpreIDlistBysubIDobjID(int _subid, int _objid, int*& _preidlist, int& } } + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + if (_list_len == 0) { _preidlist = NULL; return false; @@ -2091,37 +2187,37 @@ KVstore::flush(IVTree* _p_btree) } bool -KVstore::addValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val) +KVstore::addValueByKey(SITree* _p_btree, char* _key, int _klen, int _val) { return _p_btree->insert(_key, _klen, _val); } bool -KVstore::addValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen) +KVstore::addValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen) { return _p_btree->insert(_key, _val, _vlen); } bool -KVstore::addValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen) +KVstore::addValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen) { return _p_btree->insert(_key, _val, _vlen); } bool -KVstore::setValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val) +KVstore::setValueByKey(SITree* _p_btree, char* _key, int _klen, int _val) { return _p_btree->modify(_key, _klen, _val); } bool -KVstore::setValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen) +KVstore::setValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen) { return _p_btree->modify(_key, _val, _vlen); } bool -KVstore::setValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen) +KVstore::setValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen) { return _p_btree->modify(_key, _val, _vlen); } @@ -2218,6 +2314,7 @@ KVstore::binarySearch(int _key, const int* _list, int _list_len, int _step) } //TODO: better to adjust these parameters according to memory usage and entity num +//need a memory manager first string KVstore::s_entity2id = "s_entity2id"; string KVstore::s_id2entity = "s_id2entity"; unsigned short KVstore::buffer_entity2id_build = 8; diff --git a/KVstore/KVstore.h b/KVstore/KVstore.h index c74940b..5372e63 100644 --- a/KVstore/KVstore.h +++ b/KVstore/KVstore.h @@ -10,8 +10,16 @@ #define _KVSTORE_KVSTORE_H #include "../Util/Util.h" +#include "../Util/VList.h" #include "Tree.h" +//TODO: is it needed to keep a length in Bstr?? especially for IVTree? +//add a length: sizeof bstr from 8 to 16(4 -> 8 for alignment) +//add a \0 in tail: only add 1 char +//QUERY: but to count the length each time maybe very costly? +//No, because triple num is stored in char* now!!!! we do not need to save it again +//TODO: entity_border in s2values list is not needed!!! not waste memory here + class KVstore { public: @@ -187,13 +195,13 @@ private: void flush(ISTree* _p_btree); void flush(IVTree* _p_btree); - bool addValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val); - bool addValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen); - bool addValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen); + bool addValueByKey(SITree* _p_btree, char* _key, int _klen, int _val); + bool addValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen); + bool addValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen); - bool setValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val); - bool setValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen); - bool setValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen); + bool setValueByKey(SITree* _p_btree, char* _key, int _klen, int _val); + bool setValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen); + bool setValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen); bool getValueByKey(SITree* _p_btree, const char* _key, int _klen, int* _val) const; bool getValueByKey(ISTree* _p_btree, int _key, char*& _val, int& _vlen) const; diff --git a/KVstore/SITree/SITree.cpp b/KVstore/SITree/SITree.cpp index 1dfef4d..3502611 100644 --- a/KVstore/SITree/SITree.cpp +++ b/KVstore/SITree/SITree.cpp @@ -134,7 +134,7 @@ SITree::search(const char* _str, unsigned _len, int* _val) } bool -SITree::insert(const char* _str, unsigned _len, int _val) +SITree::insert(char* _str, unsigned _len, int _val) { if (_str == NULL || _len == 0) { diff --git a/KVstore/SITree/SITree.h b/KVstore/SITree/SITree.h index 52c27ee..6041e65 100644 --- a/KVstore/SITree/SITree.h +++ b/KVstore/SITree/SITree.h @@ -65,7 +65,7 @@ public: SINode* getRoot() const; //insert, search, remove, set bool search(const char* _str, unsigned _len, int* _val); - bool insert(const char* _str, unsigned _len, int _val); + bool insert(char* _str, unsigned _len, int _val); bool modify(const char* _str, unsigned _len, int _val); SINode* find(const char* _key, unsigned _len, int* store, bool ifmodify); bool remove(const char* _str, unsigned _len); diff --git a/KVstore/SITree/node/SILeafNode.cpp b/KVstore/SITree/node/SILeafNode.cpp index 1e70488..13a2bfe 100644 --- a/KVstore/SITree/node/SILeafNode.cpp +++ b/KVstore/SITree/node/SILeafNode.cpp @@ -362,4 +362,4 @@ SILeafNode::print(string s) } else; #endif -} \ No newline at end of file +} diff --git a/KVstore/SITree/node/SILeafNode.h b/KVstore/SITree/node/SILeafNode.h index 7aa05f4..605ff09 100644 --- a/KVstore/SITree/node/SILeafNode.h +++ b/KVstore/SITree/node/SILeafNode.h @@ -27,18 +27,24 @@ public: void Normal(); SINode* getPrev() const; SINode* getNext() const; + int getValue(int _index) const; bool setValue(int _val, int _index); bool addValue(int _val, int _index); bool subValue(int _index); + void setPrev(SINode* _prev); void setNext(SINode* _next); + unsigned getSize() const; + SINode* split(SINode* _father, int _index); SINode* coalesce(SINode* _father, int _index); + void release(); ~SILeafNode(); void print(std::string s); //DEBUG + /*non-sense virtual function Node* getChild(int _index) const; bool addChild(Node* _child, int _index); @@ -47,4 +53,4 @@ public: }; //BETTER: prev isn't a must, and reverse-range can be achieved using recursive-next -#endif \ No newline at end of file +#endif diff --git a/KVstore/SITree/node/SINode.cpp b/KVstore/SITree/node/SINode.cpp index d97ee47..5c18727 100644 --- a/KVstore/SITree/node/SINode.cpp +++ b/KVstore/SITree/node/SINode.cpp @@ -254,6 +254,27 @@ SINode::addKey(const Bstr* _key, int _index, bool ifcopy) return true; } +bool +SINode::addKey(char* _str, unsigned _len, int _index, bool ifcopy) +{ + int num = this->getNum(); + if (_index < 0 || _index > num) + { + print(string("error in addKey: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + //NOTICE: if num == MAX_KEY_NUM, will visit keys[MAX_KEY_NUM], not legal!!! + //however. tree operations ensure that: when node is full, not add but split first! + for (i = num - 1; i >= _index; --i) + keys[i + 1] = keys[i]; + + keys[_index].setStr(_str); + keys[_index].setLen(_len); + + return true; +} + bool SINode::subKey(int _index, bool ifdel) { @@ -325,4 +346,57 @@ SINode::searchKey_lessEqual(const Bstr& _bstr) const return ret - 1; else return ret; -} \ No newline at end of file +} + +int +SINode::searchKey_less(const char* _str, unsigned _len) const +{ + int num = this->getNum(); + + int low = 0, high = num - 1, mid = -1; + while (low <= high) + { + mid = (low + high) / 2; + //if (this->keys[mid] > _bstr) + if (Util::compare(this->keys[mid].getStr(), this->keys[mid].getLen(), _str, _len) > 0) + { + if (low == mid) + break; + high = mid; + } + else + { + low = mid + 1; + } + } + + return low; +} + +int +SINode::searchKey_equal(const char* _str, unsigned _len) const +{ + int num = this->getNum(); + //for(i = 0; i < num; ++i) + // if(bstr == *(p->getKey(i))) + // { + + int ret = this->searchKey_less(_str, _len); + //if (ret > 0 && this->keys[ret - 1] == _bstr) + if (ret > 0 && Util::compare(this->keys[ret-1].getStr(), this->keys[ret-1].getLen(), _str, _len) == 0) + return ret - 1; + else + return num; +} + +int +SINode::searchKey_lessEqual(const char* _str, unsigned _len) const +{ + int ret = this->searchKey_less(_str, _len); + //if (ret > 0 && this->keys[ret - 1] == _bstr) + if (ret > 0 && Util::compare(this->keys[ret-1].getStr(), this->keys[ret-1].getLen(), _str, _len) == 0) + return ret - 1; + else + return ret; +} + diff --git a/KVstore/SITree/node/SINode.h b/KVstore/SITree/node/SINode.h index 83c1eb9..297e489 100644 --- a/KVstore/SITree/node/SINode.h +++ b/KVstore/SITree/node/SINode.h @@ -64,9 +64,11 @@ public: void setStore(unsigned _store); unsigned getFlag() const; void setFlag(unsigned _flag); + const Bstr* getKey(int _index) const; //need to check the index bool setKey(const Bstr* _key, int _index, bool ifcopy = false); bool addKey(const Bstr* _key, int _index, bool ifcopy = false); + bool addKey(char* _str, unsigned _len, int _index, bool ifcopy = false); bool subKey(int _index, bool ifdel = false); //several binary key search utilities @@ -74,7 +76,12 @@ public: int searchKey_equal(const Bstr& _bstr) const; int searchKey_lessEqual(const Bstr& _bstr) const; + int searchKey_less(const char* _str, unsigned _len) const; + int searchKey_equal(const char* _str, unsigned _len) const; + int searchKey_lessEqual(const char* _str, unsigned _len) const; + //virtual functions: polymorphic + //NOTICE: not pure-virtual, not required to be implemented again, can be used now virtual SINode* getChild(int _index) const { return NULL; }; virtual bool setChild(SINode* _child, int _index) { return true; }; virtual bool addChild(SINode* _child, int _index) { return true; }; @@ -87,6 +94,8 @@ public: virtual bool subValue(int _index) { return true; }; virtual void setPrev(SINode* _prev) {}; virtual void setNext(SINode* _next) {}; + + //NOTICE: pure-virtual, must to be implemented again in the sub-class virtual void Virtual() = 0; virtual void Normal() = 0; virtual unsigned getSize() const = 0; //return all memory owned @@ -111,4 +120,4 @@ public: *to release the whole(pointer is invalid and rebuild problem) */ -#endif \ No newline at end of file +#endif diff --git a/NOTES.md b/NOTES.md index cf80444..deef0fb 100644 --- a/NOTES.md +++ b/NOTES.md @@ -88,7 +88,7 @@ http://blog.csdn.net/infoworld/article/details/8670951 要在单机支持到10亿triple,最坏情况下最多有20亿entity和20亿literal,目前的编号方式是不行的(int扩展为unsigned) 最好在单机100G内存上支持起freebase(2.5B triples)这个规模的数据集,就像jena和virtuoso一样,慢不要紧 -在type分支中,sub2id_pre2id_obj2id函数中,每次double增长可能无法充分利用unsigned空间,只能利用到2560000000,超过后最好直接设置为最大 +type分支中query过程可能还有问题,需要修改Query/里面的类型 去掉tree里面的复制,另外kvstore里面的复制可以考虑通过一个或若干个bstr buffer来实现,避免每次都重新new,但这会影响多线程程序 而且在kvstore中往往需要对原始list做一些额外处理 --- diff --git a/Util/Bstr.h b/Util/Bstr.h index 63d8cd9..fc2cd9f 100644 --- a/Util/Bstr.h +++ b/Util/Bstr.h @@ -18,6 +18,7 @@ class Bstr { private: char* str; //pointers consume 8 byte in 64-bit system + //TODO: the length maybe not needed unsigned length; public: diff --git a/KVstore/IVTree/vlist/VList.cpp b/Util/VList.cpp similarity index 56% rename from KVstore/IVTree/vlist/VList.cpp rename to Util/VList.cpp index 798b553..7b772f3 100644 --- a/KVstore/IVTree/vlist/VList.cpp +++ b/Util/VList.cpp @@ -10,55 +10,64 @@ using namespace std; +bool +VList::isLongList(unsigned _len) +{ + return _len > VList::LENGTH_BORDER; +} + VList::VList() { //not use ../logs/, notice the location of program cur_block_num = SET_BLOCK_NUM; filepath = ""; freelist = NULL; - treefp = NULL; - minheap = NULL; max_buffer_size = Util::MAX_BUFFER_SIZE; - heap_size = max_buffer_size / IVNode::INTL_SIZE; freemem = max_buffer_size; } -VList::VList(string& _filepath, unsigned long long _buffer_size) +VList::VList(string& _filepath, string& _mode, unsigned long long _buffer_size) { cur_block_num = SET_BLOCK_NUM; //initialize this->filepath = _filepath; + if (_mode == string("build")) - treefp = fopen(_filepath.c_str(), "w+b"); + valfp = fopen(_filepath.c_str(), "w+b"); else if (_mode == string("open")) - treefp = fopen(_filepath.c_str(), "r+b"); + valfp = fopen(_filepath.c_str(), "r+b"); else { - print(string("error in IVStorage: Invalid mode ") + _mode); + cout<treeheight = _height; //originally set to 0 + this->max_buffer_size = _buffer_size; - this->heap_size = this->max_buffer_size / IVNode::INTL_SIZE; this->freemem = this->max_buffer_size; this->freelist = new BlockInfo; //null-head + + //TODO: read/write by char is too slow, how about read all and deal , then clear? + // + //BETTER: hwo about assign IDs in a dynamic way? + //limitID freelist + //QUETY: can free id list consume very large memory?? + unsigned i, j, k; //j = (SuperNum-1)*BLOCK_SIZE BlockInfo* bp; if (_mode == "build") { //write basic information i = 0; - fwrite(&i, sizeof(unsigned), 1, this->treefp); //height - fwrite(&i, sizeof(unsigned), 1, this->treefp); //rootnum - fwrite(&cur_block_num, sizeof(unsigned), 1, this->treefp); //current block num - fseek(this->treefp, BLOCK_SIZE, SEEK_SET); + fwrite(&cur_block_num, sizeof(unsigned), 1, this->valfp); //current block num + //NOTICE: use a 1M block for a unsigned?? not ok! + fseek(this->valfp, BLOCK_SIZE, SEEK_SET); bp = this->freelist; j = cur_block_num / 8; for (i = 0; i < j; ++i) { - fputc(0, this->treefp); + fputc(0, this->valfp); for (k = 0; k < 8; ++k) { bp->next = new BlockInfo(i * 8 + k + 1, NULL); @@ -69,17 +78,14 @@ VList::VList(string& _filepath, unsigned long long _buffer_size) else //_mode == "open" { //read basic information - int rootnum; char c; - fread(this->treeheight, sizeof(unsigned), 1, this->treefp); - fread(&rootnum, sizeof(unsigned), 1, this->treefp); - fread(&cur_block_num, sizeof(unsigned), 1, this->treefp); - fseek(this->treefp, BLOCK_SIZE, SEEK_SET); + fread(&cur_block_num, sizeof(unsigned), 1, this->valfp); + fseek(this->valfp, BLOCK_SIZE, SEEK_SET); bp = this->freelist; j = cur_block_num / 8; for (i = 0; i < j; ++i) { - c = fgetc(treefp); + c = fgetc(valfp); for (k = 0; k < 8; ++k) { if ((c & (1 << k)) == 0) @@ -89,14 +95,13 @@ VList::VList(string& _filepath, unsigned long long _buffer_size) } } } - fseek(treefp, Address(rootnum), SEEK_SET); - //treefp is now ahead of root-block } - this->minheap = new IVHeap(this->heap_size); + + //NOTICE: we do not need to alloc the blocks for free block id list, AllocBlock is only for later blocks } long //8-byte in 64-bit machine -IVStorage::Address(unsigned _blocknum) const //BETTER: inline function +VList::Address(unsigned _blocknum) const //BETTER: inline function { if (_blocknum == 0) return 0; @@ -110,13 +115,13 @@ IVStorage::Address(unsigned _blocknum) const //BETTER: inline function } unsigned -IVStorage::Blocknum(long address) const +VList::Blocknum(long address) const { return (address / BLOCK_SIZE) + 1 - this->SuperNum; } unsigned -IVStorage::AllocBlock() +VList::AllocBlock() { BlockInfo* p = this->freelist->next; if (p == NULL) @@ -131,11 +136,12 @@ IVStorage::AllocBlock() unsigned t = p->num; this->freelist->next = p->next; delete p; + return t; } void -IVStorage::FreeBlock(unsigned _blocknum) +VList::FreeBlock(unsigned _blocknum) { //QUERY: head-sub and tail-add will be better? BlockInfo* bp = new BlockInfo(_blocknum, this->freelist->next); this->freelist->next = bp; @@ -145,93 +151,96 @@ IVStorage::FreeBlock(unsigned _blocknum) //a string may acrossseveral blocks void -IVStorage::ReadAlign(unsigned* _next) +VList::ReadAlign(unsigned* _next) { - if (ftell(treefp) % BLOCK_SIZE == 0) + if (ftell(valfp) % BLOCK_SIZE == 0) { - fseek(treefp, Address(*_next), SEEK_SET); - fread(_next, sizeof(unsigned), 1, treefp); + fseek(valfp, Address(*_next), SEEK_SET); + fread(_next, sizeof(unsigned), 1, valfp); } } void -IVStorage::WriteAlign(unsigned* _curnum, bool& _SpecialBlock) +VList::WriteAlign(unsigned* _curnum, bool& _SpecialBlock) { - if (ftell(treefp) % BLOCK_SIZE == 0) + if (ftell(valfp) % BLOCK_SIZE == 0) { unsigned blocknum = this->AllocBlock(); - fseek(treefp, Address(*_curnum), SEEK_SET); + fseek(valfp, Address(*_curnum), SEEK_SET); if (_SpecialBlock) { - fseek(treefp, 4, SEEK_CUR); + fseek(valfp, 4, SEEK_CUR); _SpecialBlock = false; } - fwrite(&blocknum, sizeof(unsigned), 1, treefp); - fseek(treefp, Address(blocknum) + 4, SEEK_SET); + fwrite(&blocknum, sizeof(unsigned), 1, valfp); + fseek(valfp, Address(blocknum) + 4, SEEK_SET); *_curnum = blocknum; } } +//TODO: check , read/write a long list, across several blocks +//not use buffer, read/write on need, update at once, so no need to write back at last + +//TODO: still use Bstr?? how can we get the next pointer?? use NULL to init +//NOTICE: the next is placed at the begin of a block bool -IVStorage::readBstr(Bstr* _bp, unsigned* _next) +VList::readBstr(Bstr* _bp, unsigned* _next) { //long address; unsigned len, i, j; - fread(&len, sizeof(unsigned), 1, this->treefp); + fread(&len, sizeof(unsigned), 1, this->valfp); this->ReadAlign(_next); //this->request(len); char* s = (char*)malloc(len); _bp->setLen(len); for (i = 0; i + 4 < len; i += 4) { - fread(s + i, sizeof(char), 4, treefp); + fread(s + i, sizeof(char), 4, valfp); this->ReadAlign(_next); } while (i < len) { - fread(s + i, sizeof(char), 1, treefp); //BETTER + fread(s + i, sizeof(char), 1, valfp); //BETTER i++; } j = len % 4; if (j > 0) j = 4 - j; - fseek(treefp, j, SEEK_CUR); + fseek(valfp, j, SEEK_CUR); this->ReadAlign(_next); _bp->setStr(s); + return true; } bool -IVStorage::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock) +VList::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock) { unsigned i, j, len = _bp->getLen(); - fwrite(&len, sizeof(unsigned), 1, treefp); + fwrite(&len, sizeof(unsigned), 1, valfp); this->WriteAlign(_curnum, _SpecialBlock); char* s = _bp->getStr(); for (i = 0; i + 4 < len; i += 4) { - fwrite(s + i, sizeof(char), 4, treefp); + fwrite(s + i, sizeof(char), 4, valfp); this->WriteAlign(_curnum, _SpecialBlock); } while (i < len) { - fwrite(s + i, sizeof(char), 1, treefp); + fwrite(s + i, sizeof(char), 1, valfp); i++; } j = len % 4; if (j > 0) j = 4 - j; - fseek(treefp, j, SEEK_CUR); + fseek(valfp, j, SEEK_CUR); this->WriteAlign(_curnum, _SpecialBlock); + return true; } VList::~VList() { - //release heap and freelist... -#ifdef DEBUG_KVSTORE - printf("now to release the kvstore!\n"); -#endif BlockInfo* bp = this->freelist; BlockInfo* next; while (bp != NULL) @@ -240,18 +249,6 @@ VList::~VList() delete bp; bp = next; } -#ifdef DEBUG_KVSTORE - printf("already empty the freelist!\n"); -#endif - delete this->minheap; -#ifdef DEBUG_KVSTORE - printf("already empty the buffer heap!\n"); -#endif - fclose(this->treefp); - //#ifdef DEBUG_KVSTORE - //NOTICE:there is more than one tree - //fclose(Util::debug_kvstore); //NULL is ok! - //Util::debug_kvstore = NULL; - //#endif + fclose(this->valfp); } diff --git a/KVstore/IVTree/vlist/VList.h b/Util/VList.h similarity index 76% rename from KVstore/IVTree/vlist/VList.h rename to Util/VList.h index 61911f1..a328b83 100644 --- a/KVstore/IVTree/vlist/VList.h +++ b/Util/VList.h @@ -6,11 +6,11 @@ # Description: =============================================================================*/ -#ifndef _KVSTORE_IVTREE_STORAGE_VLIST_H -#define _KVSTORE_IVTREE_STORAGE_VLIST_H +#ifndef _UTIL_VLIST_H +#define _UTIL_VLIST_H -#include "../../../Util/Util.h" -#include "../../../Util/Bstr.h" +#include "Util.h" +#include "Bstr.h" //TODO: not keep long list in memory, read each time //but when can you free the long list(kvstore should release it after parsing) @@ -22,15 +22,21 @@ //BETTER: use two kind of blocks in two files, like 1M and 1G (split the block num into two parts) +//STRUCT: a long list real-address is the block ID in file2(only for long value lists, a list across several 1M blocks) +//tree-value Bstr: unsigned=the real address char*=NULL +//in disk: +//file1 is tree file, the long list is represented as: 0 real-address +//NOTICE: long list is not kept in mmeory for cache, it is read/update each time on need! + class VList { public: //NOTICE:the border is 10^6, but the block is larger, 1M static const unsigned LENGTH_BORDER = 1000000; static const unsigned BLOCK_SIZE = 1 << 20; //fixed size of disk-block - static const unsigned MAX_BLOCK_NUM = 1 << 16; //max block-num + static const unsigned MAX_BLOCK_NUM = 1 << 23; //max block-num //below two constants: must can be exactly divided by 8 - static const unsigned SET_BLOCK_NUM = 1 << 8; //initial blocks num + static const unsigned SET_BLOCK_NUM = 1 << 2; //initial blocks num static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1; @@ -59,12 +65,14 @@ private: public: VList(); - VList(std::string& _filepath, unsigned long long _buffer_size);//create a fixed-size file or open an existence + VList(std::string& _filepath, std::string& _mode, unsigned long long _buffer_size);//create a fixed-size file or open an existence bool readBstr(Bstr* _bp, unsigned* _next); bool writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock); bool readValue(unsigned _block_num); bool writeValue(const Bstr* _bp); ~VList(); + + static bool isLongList(unsigned _len); }; #endif