From 464d994ba432f428e26f2bd7acc0bc2862b1d771 Mon Sep 17 00:00:00 2001 From: bookug Date: Sun, 26 Mar 2017 21:10:37 +0800 Subject: [PATCH 1/6] use constexpr for c++11 --- Database/Database.cpp | 1 + Database/Database.h | 2 +- Database/Join.h | 5 +++++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Database/Database.cpp b/Database/Database.cpp index 4a8816e..70754e9 100644 --- a/Database/Database.cpp +++ b/Database/Database.cpp @@ -1317,6 +1317,7 @@ Database::build_p2xx(int** _p_id_tuples) bool Database::sub2id_pre2id_obj2id_RDFintoSignature(const string _rdf_file, int**& _p_id_tuples, int & _id_tuples_max) { + //TODO:unsigned double not to max?? set to max directly int _id_tuples_size; { //initial diff --git a/Database/Database.h b/Database/Database.h index b992de5..6d23d72 100644 --- a/Database/Database.h +++ b/Database/Database.h @@ -188,7 +188,7 @@ private: int remove(const TripleWithObjType* _triples, int _triple_num); //bool remove(const vector& _triples, vector& _vertices, vector& _predicates); - bool sub2id_pre2id_obj2id_RDFintoSignature(const string _rdf_file, int**& _p_id_tuples, int & _id_tuples_max); + bool sub2id_pre2id_obj2id_RDFintoSignature(const string _rdf_file, unsigned**& _p_id_tuples, unsigned & _id_tuples_max); bool literal2id_RDFintoSignature(const string _rdf_file, int** _p_id_tuples, int _id_tuples_max); bool objIDIsEntityID(int _id); diff --git a/Database/Join.h b/Database/Join.h index 41d8eca..aa913af 100644 --- a/Database/Join.h +++ b/Database/Join.h @@ -55,7 +55,12 @@ private: static const unsigned PARAM_SIZE = 1000000; static const unsigned PARAM_PRE = 10000; static const unsigned PARAM_DENSE = 1; + static const double JUDGE_LIMIT = 0.5; + //NOTICE+DEBUG: please use constexpr below instead of the phase above(constexpr is supported in C++11) + //http://www.cnblogs.com/wanyuanchun/p/4041080.html + //constexpr static const double JUDGE_LIMIT = 0.5; + static const int LIMIT_CANDIDATE_LIST_SIZE = 1000; //BETTER?:predefine size to avoid copy cost TableType current_table; From d64358b60687eba5d2fb11c1deb24559e3557d5b Mon Sep 17 00:00:00 2001 From: bookug Date: Tue, 28 Mar 2017 16:56:16 +0800 Subject: [PATCH 2/6] perf: add VList for IVTree; lower the copy cost in KVstore this work doe not ends by zengli, maybe conflicates with type branch --- .gitignore | 3 + Database/Database.cpp | 5 +- Database/Database.h | 2 +- Database/Join.cpp | 5 + KVstore/ISTree/ISTree.h | 4 +- KVstore/IVTree/IVTree.cpp | 677 +++++++++++++++++++++++++ KVstore/IVTree/IVTree.h | 98 ++++ KVstore/IVTree/heap/IVHeap.cpp | 186 +++++++ KVstore/IVTree/heap/IVHeap.h | 41 ++ KVstore/IVTree/node/IVIntlNode.cpp | 293 +++++++++++ KVstore/IVTree/node/IVIntlNode.h | 48 ++ KVstore/IVTree/node/IVLeafNode.cpp | 443 ++++++++++++++++ KVstore/IVTree/node/IVLeafNode.h | 56 +++ KVstore/IVTree/node/IVNode.cpp | 320 ++++++++++++ KVstore/IVTree/node/IVNode.h | 119 +++++ KVstore/IVTree/storage/IVStorage.cpp | 722 +++++++++++++++++++++++++++ KVstore/IVTree/storage/IVStorage.h | 84 ++++ KVstore/IVTree/vlist/VList.cpp | 257 ++++++++++ KVstore/IVTree/vlist/VList.h | 71 +++ KVstore/KVstore.cpp | 652 ++++++++++++++++++------ KVstore/KVstore.h | 12 +- KVstore/SITree/SITree.cpp | 167 ++++--- KVstore/SITree/SITree.h | 19 +- KVstore/SITree/storage/SIStorage.h | 8 + KVstore/Tree.h | 3 +- Main/gadd.cpp | 4 +- Main/gbuild.cpp | 4 +- Main/gclient.cpp | 6 +- Main/gconsole.cpp | 4 +- Main/gquery.cpp | 4 +- Main/gserver.cpp | 115 +++-- Main/gsub.cpp | 4 +- NOTES.md | 34 +- Query/BasicQuery.h | 4 +- Util/Bstr.cpp | 17 +- Util/Bstr.h | 2 +- Util/Util.cpp | 13 +- Util/Util.h | 11 +- logs/.gitignore | 1 + makefile | 21 + package.json | 12 + test/package.json | 15 + 42 files changed, 4267 insertions(+), 299 deletions(-) create mode 100644 KVstore/IVTree/IVTree.cpp create mode 100644 KVstore/IVTree/IVTree.h create mode 100644 KVstore/IVTree/heap/IVHeap.cpp create mode 100644 KVstore/IVTree/heap/IVHeap.h create mode 100644 KVstore/IVTree/node/IVIntlNode.cpp create mode 100644 KVstore/IVTree/node/IVIntlNode.h create mode 100644 KVstore/IVTree/node/IVLeafNode.cpp create mode 100644 KVstore/IVTree/node/IVLeafNode.h create mode 100644 KVstore/IVTree/node/IVNode.cpp create mode 100644 KVstore/IVTree/node/IVNode.h create mode 100644 KVstore/IVTree/storage/IVStorage.cpp create mode 100644 KVstore/IVTree/storage/IVStorage.h create mode 100644 KVstore/IVTree/vlist/VList.cpp create mode 100644 KVstore/IVTree/vlist/VList.h create mode 100644 logs/.gitignore create mode 100644 package.json create mode 100644 test/package.json diff --git a/.gitignore b/.gitignore index ceb77fe..70c39fd 100644 --- a/.gitignore +++ b/.gitignore @@ -91,3 +91,6 @@ tags *.out *.bak~ +# modules +node_modules + diff --git a/Database/Database.cpp b/Database/Database.cpp index 70754e9..d088fa9 100644 --- a/Database/Database.cpp +++ b/Database/Database.cpp @@ -1317,7 +1317,6 @@ Database::build_p2xx(int** _p_id_tuples) bool Database::sub2id_pre2id_obj2id_RDFintoSignature(const string _rdf_file, int**& _p_id_tuples, int & _id_tuples_max) { - //TODO:unsigned double not to max?? set to max directly int _id_tuples_size; { //initial @@ -1477,6 +1476,10 @@ Database::sub2id_pre2id_obj2id_RDFintoSignature(const string _rdf_file, int**& _ } } + //NOTICE: we assume that there is no duplicates in the dataset + //if not, this->triple_num will be not right, and _p_id_tuples will save useless triples + //However, we can not use exist_triple to detect duplicates here, because it is too time-costly + // For id_tuples _p_id_tuples[_id_tuples_size] = new int[3]; _p_id_tuples[_id_tuples_size][0] = _sub_id; diff --git a/Database/Database.h b/Database/Database.h index 6d23d72..b992de5 100644 --- a/Database/Database.h +++ b/Database/Database.h @@ -188,7 +188,7 @@ private: int remove(const TripleWithObjType* _triples, int _triple_num); //bool remove(const vector& _triples, vector& _vertices, vector& _predicates); - bool sub2id_pre2id_obj2id_RDFintoSignature(const string _rdf_file, unsigned**& _p_id_tuples, unsigned & _id_tuples_max); + bool sub2id_pre2id_obj2id_RDFintoSignature(const string _rdf_file, int**& _p_id_tuples, int & _id_tuples_max); bool literal2id_RDFintoSignature(const string _rdf_file, int** _p_id_tuples, int _id_tuples_max); bool objIDIsEntityID(int _id); diff --git a/Database/Join.cpp b/Database/Join.cpp index aa0e675..9e9a6a5 100644 --- a/Database/Join.cpp +++ b/Database/Join.cpp @@ -951,6 +951,11 @@ Join::update_answer_list(IDList*& valid_ans_list, IDList& _can_list, int* id_lis } } +//TODO: multiple lists intersect, how about sort and intersect from small to big? +//but this need to generate all first, I think sort by pre2num if better! +// +//TODO: set the entity_literal border in kvstore, and intersect entity part and literal part respectively + //NOTICE: consider two directions according to table1 size and table2 size //1. -> add ID mapping record for the first linking column, whole(offset, size) zengli //2. <- join using inverted index for each column, offset and size for each column, hulin diff --git a/KVstore/ISTree/ISTree.h b/KVstore/ISTree/ISTree.h index 9620bd8..e0499dd 100644 --- a/KVstore/ISTree/ISTree.h +++ b/KVstore/ISTree/ISTree.h @@ -3,7 +3,7 @@ # Author: syzz # Mail: 1181955272@qq.com # Last Modified: 2015-04-26 16:44 -# Description: struct and interface of the B+ tree +# Description: ID2string, including id2entity, id2literal and id2predicate =============================================================================*/ #ifndef _KVSTORE_ISTREE_ISTREE_H @@ -76,4 +76,4 @@ public: //(problem range between two extremes: not-modified, totally-modified) //After saved, it's ok to continue operations on tree! -#endif \ No newline at end of file +#endif diff --git a/KVstore/IVTree/IVTree.cpp b/KVstore/IVTree/IVTree.cpp new file mode 100644 index 0000000..1dee1cf --- /dev/null +++ b/KVstore/IVTree/IVTree.cpp @@ -0,0 +1,677 @@ +/*============================================================================= +# Filename: IVTree.cpp +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:45 +# Description: achieve functions in IVTree.h +=============================================================================*/ + +#include "IVTree.h" + +using namespace std; + +IVTree::IVTree() +{ + height = 0; + mode = ""; + root = NULL; + leaves_head = NULL; + leaves_tail = NULL; + TSM = NULL; + storepath = ""; + filename = ""; + //transfer_size[0] = transfer_size[1] = transfer_size[2] = 0; + //transfer_size = 0; + this->stream = NULL; + this->request = 0; + this->value_list = NULL; +} + +IVTree::IVTree(string _storepath, string _filename, string _mode, unsigned long long _buffer_size) +{ + storepath = _storepath; + filename = _filename; + this->height = 0; + this->mode = string(_mode); + string filepath = this->getFilePath(); + + string vlist_file = filepath + "_vlist"; + this->value_list = new VList(vlist_file, 1<<30); + + TSM = new IVStorage(filepath, this->mode, &this->height, _buffer_size, this->value_list); + if (this->mode == "open") + this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail); + else + this->root = NULL; + + //this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M + //this->transfer.setStr((char*)malloc(Util::TRANSFER_SIZE)); + + this->stream = NULL; + this->request = 0; +} + +string +IVTree::getFilePath() +{ + return storepath + "/" + filename; +} + +//void //WARN: not check _str and _len +//IVTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index) +//{ + //if (_index > 2) + //return; + //[> + //if(_str == NULL || _len == 0) + //{ + //printf("error in CopyToTransfer: empty string\n"); + //return; + //} + //*/ + ////unsigned length = _bstr->getLen(); + //unsigned length = _len; + //if (length + 1 > this->transfer_size[_index]) + //{ + //transfer[_index].release(); + //transfer[_index].setStr((char*)malloc(length + 1)); + //this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0 + //} + //memcpy(this->transfer[_index].getStr(), _str, length); + //this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore + //this->transfer[_index].setLen(length); +//} + +unsigned +IVTree::getHeight() const +{ + return this->height; +} + +void +IVTree::setHeight(unsigned _h) +{ + this->height = _h; +} + +IVNode* +IVTree::getRoot() const +{ + return this->root; +} + +void +IVTree::prepare(IVNode* _np) +{ + //this->request = 0; + bool flag = _np->inMem(); + if (!flag) + { + this->TSM->readNode(_np, &request); //readNode deal with request + } +} + +bool +IVTree::search(int _key, char*& _str, int& _len) +{ + if (_key < 0) + { + printf("error in IVTree-search: empty string\n"); + return false; + } + + this->request = 0; + int store; + IVNode* ret = this->find(_key, &store, false); + if (ret == NULL || store == -1 || _key != ret->getKey(store)) //tree is empty or not found + { + return false; + } + + ret->getValue(this->value_list, store, _str, _len); + //const Bstr* val = ret->getValue(store); + //this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request + //_str = this->transfer[0].getStr(); + //_len = this->transfer[0].getLen(); + + this->TSM->request(request); + return true; +} + +bool +IVTree::insert(int _key, const char* _str, unsigned _len) +{ + if (_key < 0) + { + printf("error in IVTree-insert: empty string\n"); + return false; + } + + //this->CopyToTransfer(_str, _len, 2); + //const Bstr* val = &(this->transfer[2]); + this->request = 0; + IVNode* ret; + if (this->root == NULL) //tree is empty + { + leaves_tail = leaves_head = root = new IVLeafNode; + request += IVNode::LEAF_SIZE; + this->height = 1; + root->setHeight(1); //add to heap later + } + + //this->prepare(this->root); //root must be in-mem + if (root->getNum() == IVNode::MAX_KEY_NUM) + { + IVNode* father = new IVIntlNode; + request += IVNode::INTL_SIZE; + father->addChild(root, 0); + ret = root->split(father, 0); + if (ret->isLeaf() && ret->getNext() == NULL) + this->leaves_tail = ret; + if (ret->isLeaf()) + request += IVNode::LEAF_SIZE; + else + request += IVNode::INTL_SIZE; + this->height++; //height rises only when root splits + //WARN: height area in Node: 4 bit! + father->setHeight(this->height); //add to heap later + this->TSM->updateHeap(ret, ret->getRank(), false); + this->root = father; + } + + IVNode* p = this->root; + IVNode* q; + int i; + while (!p->isLeaf()) + { + //j = p->getNum(); + //for(i = 0; i < j; ++i) + //if(bstr < *(p->getKey(i))) + //break; + //NOTICE: using binary search is better here + i = p->searchKey_less(_key); + + q = p->getChild(i); + this->prepare(q); + if (q->getNum() == IVNode::MAX_KEY_NUM) + { + ret = q->split(p, i); + if (ret->isLeaf() && ret->getNext() == NULL) + this->leaves_tail = ret; + if (ret->isLeaf()) + request += IVNode::LEAF_SIZE; + else + request += IVNode::INTL_SIZE; + //BETTER: in loop may update multiple times + this->TSM->updateHeap(ret, ret->getRank(), false); + this->TSM->updateHeap(q, q->getRank(), true); + this->TSM->updateHeap(p, p->getRank(), true); + if (_key < p->getKey(i)) + p = q; + else + p = ret; + } + else + { + p->setDirty(); + this->TSM->updateHeap(p, p->getRank(), true); + p = q; + } + } + //j = p->getNum(); + //for(i = 0; i < j; ++i) + //if(bstr < *(p->getKey(i))) + //break; + i = p->searchKey_less(_key); + + //insert existing key is ok, but not inserted in + //however, the tree-shape may change due to possible split in former code + bool ifexist = false; + if (i > 0 && _key == p->getKey(i - 1)) + ifexist = true; + else + { + p->addKey(_key, i); + p->addValue(this->value_list, i, _str, _len, true); + p->addNum(); + request += _len; + //request += val->getLen(); + p->setDirty(); + this->TSM->updateHeap(p, p->getRank(), true); + //_key->clear(); + //_value->clear(); + } + this->TSM->request(request); + return !ifexist; //QUERY(which case:return false) +} + +bool +IVTree::modify(int _key, const char* _str, unsigned _len) +{ + if (_key < 0) + { + printf("error in IVTree-modify: empty string\n"); + return false; + } + + //this->CopyToTransfer(_str, _len, 2); //not check value + //const Bstr* val = &(this->transfer[2]); + this->request = 0; + int store; + IVNode* ret = this->find(_key, &store, true); + if (ret == NULL || store == -1 || _key != ret->getKey(store)) //tree is empty or not found + { + cerr << "tree is empty or not found" << endl; + return false; + } + //cout<<"IVTree::modify() - key is found, now to remove"<getValue(store)->getLen(); + ret->setValue(this->value_list, store, _str, _len, true); + //ret->setValue(val, store, true); + //cout<<"value reset"<getLen()<<" oldlen: "<getLen() - len); + this->request = _len; + //this->request = val->getLen(); + this->request -= len; + ret->setDirty(); + //cout<<"to request"<TSM->request(request); + //cout<<"memory requested"<= *_key +IVTree::find(int _key, int* _store, bool ifmodify) +{ //to assign value for this->bstr, function shouldn't be const! + if (this->root == NULL) + return NULL; //IVTree Is Empty + + IVNode* p = root; + int i, j; + while (!p->isLeaf()) + { + if (ifmodify) + p->setDirty(); + //j = p->getNum(); + //for(i = 0; i < j; ++i) //BETTER(Binary-Search) + //if(bstr < *(p->getKey(i))) + //break; + i = p->searchKey_less(_key); + + p = p->getChild(i); + this->prepare(p); + } + + j = p->getNum(); + //for(i = 0; i < j; ++i) + //if(bstr <= *(p->getKey(i))) + //break; + i = p->searchKey_lessEqual(_key); + + if (i == j) + *_store = -1; //Not Found + else + *_store = i; + + return p; +} + +/* +Node* +IVTree::find(unsigned _len, const char* _str, int* store) const +{ +} +*/ + +bool +IVTree::remove(int _key) +{ + if (_key < 0) + { + printf("error in IVTree-remove: empty string\n"); + return false; + } + + this->request = 0; + IVNode* ret; + if (this->root == NULL) //tree is empty + return false; + + IVNode* p = this->root; + IVNode* q; + int i, j; + while (!p->isLeaf()) + { + j = p->getNum(); + //for(i = 0; i < j; ++i) + //if(bstr < *(p->getKey(i))) + //break; + i = p->searchKey_less(_key); + + q = p->getChild(i); + this->prepare(q); + if (q->getNum() < IVNode::MIN_CHILD_NUM) //==MIN_KEY_NUM + { + if (i > 0) + this->prepare(p->getChild(i - 1)); + if (i < j) + this->prepare(p->getChild(i + 1)); + ret = q->coalesce(p, i); + if (ret != NULL) + this->TSM->updateHeap(ret, 0, true);//non-sense node + this->TSM->updateHeap(q, q->getRank(), true); + if (q->isLeaf()) + { + if (q->getPrev() == NULL) + this->leaves_head = q; + if (q->getNext() == NULL) + this->leaves_tail = q; + } + if (p->getNum() == 0) //root shrinks + { + //this->leaves_head = q; + this->root = q; + this->TSM->updateHeap(p, 0, true); //instead of delete p + this->height--; + } + } + else + p->setDirty(); + this->TSM->updateHeap(p, p->getRank(), true); + p = q; + } + bool flag = false; + //j = p->getNum(); //LeafNode(maybe root) + //for(i = 0; i < j; ++i) + // if(bstr == *(p->getKey(i))) + // { + // request -= p->getKey(i)->getLen(); + // request -= p->getValue(i)->getLen(); + // p->subKey(i, true); //to release + // p->subValue(i, true); //to release + // p->subNum(); + // if(p->getNum() == 0) //root leaf 0 key + // { + // this->root = NULL; + // this->leaves_head = NULL; + // this->leaves_tail = NULL; + // this->height = 0; + // this->TSM->updateHeap(p, 0, true); //instead of delete p + // } + // p->setDirty(); + // flag = true; + // break; + // } + i = p->searchKey_equal(_key); + //WARN+NOTICE:here must check, because the key to remove maybe not exist + if (i != (int)p->getNum()) + { + request -= p->getValue(i)->getLen(); + p->subKey(i); //to release + p->subValue(i, true); //to release + p->subNum(); + if (p->getNum() == 0) //root leaf 0 key + { + this->root = NULL; + this->leaves_head = NULL; + this->leaves_tail = NULL; + this->height = 0; + this->TSM->updateHeap(p, 0, true); //instead of delete p + } + p->setDirty(); + flag = true; + } + + this->TSM->request(request); + return flag; //i == j, not found +} + +const Bstr* +IVTree::getRangeValue() +{ + if (this->stream == NULL) + { + fprintf(stderr, "IVTree::getRangeValue(): no results now!\n"); + return NULL; + } + if (this->stream->isEnd()) + { + fprintf(stderr, "IVTree::getRangeValue(): read till end now!\n"); + return NULL; + } + //NOTICE:this is one record, and donot free the memory! + //NOTICE:Bstr[] but only one element, used as Bstr* + return this->stream->read(); +} + +void +IVTree::resetStream() +{ + if (this->stream == NULL) + { + fprintf(stderr, "no results now!\n"); + return; + } + this->stream->setEnd(); +} + +bool //special case: not exist, one-edge-case +IVTree::range_query(int _key1, int _key2) +{ //the range is: *_key1 <= x < *_key2 + //if(_key1 <0 && _key2 <0) + //return false; + //ok to search one-edge, requiring only one be negative + //find and write value + int store1, store2; + IVNode *p1, *p2; + if (_key1 >= 0) + { + request = 0; + p1 = this->find(_key1, &store1, false); + if (p1 == NULL || store1 == -1) + return false; //no element + this->TSM->request(request); + } + else + { + p1 = this->leaves_head; + store1 = 0; + } + if (_key2 >= 0) + { //QUERY: another strategy is to getnext and compare every time to tell end + request = 0; + p2 = this->find(_key2, &store2, false); + if (p2 == NULL) + return false; + else if (store2 == -1) + store2 = p2->getNum(); + else if (store2 == 0) + { + p2 = p2->getPrev(); + if (p2 == NULL) + return false; //no element + store2 = p2->getNum(); + } + this->TSM->request(request); + } + else + { + p2 = this->leaves_tail; + store2 = p2->getNum(); + } + + IVNode* p = p1; + unsigned i, l, r; + //get the num of answers first, not need to prepare the node + unsigned ansNum = 0; + while (true) + { + //request = 0; + //this->prepare(p); + if (p == p1) + l = store1; + else + l = 0; + if (p == p2) + r = store2; + else + r = p->getNum(); + ansNum += (r - l); + //this->TSM->request(request); + if (p != p2) + p = p->getNext(); + else + break; + } + + if (this->stream != NULL) + { + delete this->stream; + this->stream = NULL; + } + vector keys; + vector desc; + this->stream = new Stream(keys, desc, ansNum, 1, false); + + p = p1; + while (1) + { + request = 0; + this->prepare(p); + if (p == p1) + l = store1; + else + l = 0; + if (p == p2) + r = store2; + else + r = p->getNum(); + for (i = l; i < r; ++i) + { + //NOTICE:Bstr* in an array, used as Bstr[] + this->stream->write(p->getValue(i)); + } + this->TSM->request(request); + if (p != p2) + p = p->getNext(); + else + break; + } + this->stream->setEnd(); + return true; +} + +bool +IVTree::save() //save the whole tree to disk +{ +#ifdef DEBUG_KVSTORE + printf("now to save tree!\n"); +#endif + if (TSM->writeTree(this->root)) + return true; + else + return false; +} + +void +IVTree::release(IVNode* _np) const +{ + if (_np == NULL) return; + if (_np->isLeaf()) + { + delete _np; + return; + } + int cnt = _np->getNum(); + for (; cnt >= 0; --cnt) + release(_np->getChild(cnt)); + delete _np; +} + +IVTree::~IVTree() +{ + delete this->stream; //maybe NULL + delete TSM; +#ifdef DEBUG_KVSTORE + printf("already empty the buffer, now to delete all nodes in tree!\n"); +#endif + //recursively delete each Node + release(root); +} + +void +IVTree::print(string s) +{ +#ifdef DEBUG_KVSTORE + fputs(Util::showtime().c_str(), Util::debug_kvstore); + fputs("Class IVTree\n", Util::debug_kvstore); + fputs("Message: ", Util::debug_kvstore); + fputs(s.c_str(), Util::debug_kvstore); + fputs("\n", Util::debug_kvstore); + fprintf(Util::debug_kvstore, "Height: %d\n", this->height); + if (s == "tree" || s == "TREE") + { + if (this->root == NULL) + { + fputs("Null IVTree\n", Util::debug_kvstore); + return; + } + IVNode** ns = new IVNode*[this->height]; + int* ni = new int[this->height]; + IVNode* np; + int i, pos = 0; + ns[pos] = this->root; + ni[pos] = this->root->getNum(); + pos++; + while (pos > 0) + { + np = ns[pos - 1]; + i = ni[pos - 1]; + this->prepare(np); + if (np->isLeaf() || i < 0) //LeafNode or ready IntlNode + { //child-num ranges: 0~num + if (s == "tree") + np->print("node"); + else + np->print("NODE"); //print full node-information + pos--; + continue; + } + else + { + ns[pos] = np->getChild(i); + ni[pos - 1]--; + ni[pos] = ns[pos]->getNum(); + pos++; + } + } + delete[] ns; + delete[] ni; + } + else if (s == "LEAVES" || s == "leaves") + { + IVNode* np; + for (np = this->leaves_head; np != NULL; np = np->getNext()) + { + this->prepare(np); + if (s == "leaves") + np->print("node"); + else + np->print("NODE"); + } + } + else if (s == "check tree") + { + //check the tree, if satisfy B+ definition + //TODO + } + else; +#endif +} + diff --git a/KVstore/IVTree/IVTree.h b/KVstore/IVTree/IVTree.h new file mode 100644 index 0000000..86fbf27 --- /dev/null +++ b/KVstore/IVTree/IVTree.h @@ -0,0 +1,98 @@ +/*============================================================================= +# Filename: IVTree.h +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:44 +# Description: ID2valueList, including s2po, p2so and o2ps +=============================================================================*/ + +#ifndef _KVSTORE_IVTREE_IVTREE_H +#define _KVSTORE_IVTREE_IVTREE_H + +#include "../../Util/Util.h" +#include "../../Util/Stream.h" +#include "node/IVNode.h" +#include "node/IVIntlNode.h" +#include "node/IVLeafNode.h" +#include "storage/IVStorage.h" +#include "./vlist/VList.h" + +//TODO: for long list, do not read in time, just on need +//the memory is kept with the node, updat ewith node +//NOTICE: to release the node, maybe the value list is NULL +//value bstr: unsigned=address, NULL +//BETTER?: build a new block store for long list?? + +//NOTICE: we do not need to use transfer bstr here, neithor for two directions +//when insert/query, we do not release the value in kvstore + +class IVTree +{ +protected: + unsigned int height; //0 indicates an empty tree + IVNode* root; + IVNode* leaves_head; //the head of LeafNode-list + IVNode* leaves_tail; //the tail of LeafNode-list + std::string mode; //BETTER(to use enum) + IVStorage* TSM; //Tree-Storage-Manage + //BETTER:multiple stream maybe needed:) + Stream* stream; + + //always alloc one more byte than length, then user can add a '\0' + //to get a real string, instead of new and copy + //other operations will be harmful to search, so store value in + //transfer temporally, while length adjusted. + //TODO: in multi-user case, multiple-search will cause problem, + //so lock is a must. Add lock to transfer is better than to add + //lock to every key/value. However, modify requires a lock for a + //key/value, and multiple search for different keys are ok!!! + //Bstr transfer; + //unsigned transfer_size; + //Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char* + //unsigned transfer_size[3]; + + //tree's operations should be atom(if read nodes) + //sum the request and send to IVStorage at last + //ensure that all nodes operated are in memory + long long request; + void prepare(IVNode* _np); + + std::string storepath; + std::string filename; //ok for user to change + /* some private functions */ + std::string getFilePath(); //in UNIX system + //void CopyToTransfer(const char* _str, unsigned _len, unsigned _index); + //void CopyToTransfer(const char* _str, unsigned _len); + void release(IVNode* _np) const; + + //very long value list are stored in a separate file(with large block) + // + //NOTICE: according to the summary result, 90% value lists are just below 100 bytes + //<10%: 5000000~100M bytes + VList* value_list; + +public: + IVTree(); //always need to initial transfer + IVTree(std::string _storepath, std::string _filename, std::string _mode, unsigned long long _buffer_size); + unsigned int getHeight() const; + void setHeight(unsigned _h); + IVNode* getRoot() const; + //void setRoot(Node* _root); + //insert, search, remove, set + bool search(int _key, char*& _str, int& _len); + bool insert(int _key, const char* _str, unsigned _len); + bool modify(int _key, const char* _str, unsigned _len); + IVNode* find(int _key, int* store, bool ifmodify); + bool remove(int _key); + const Bstr* getRangeValue(); + void resetStream(); + bool range_query(int _key1, int _key2); + bool save(); + ~IVTree(); + void print(std::string s); //DEBUG(print the tree) +}; +//NOTICE: need to save tree manually before delete, otherwise will cause problem. +//(problem range between two extremes: not-modified, totally-modified) +//After saved, it's ok to continue operations on tree! + +#endif diff --git a/KVstore/IVTree/heap/IVHeap.cpp b/KVstore/IVTree/heap/IVHeap.cpp new file mode 100644 index 0000000..5cc291f --- /dev/null +++ b/KVstore/IVTree/heap/IVHeap.cpp @@ -0,0 +1,186 @@ +/*============================================================================= +# Filename: IVHeap.cpp +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:37 +# Description: achieve functions in IVHeap.h +=============================================================================*/ + +#include "IVHeap.h" + +using namespace std; + +IVHeap::IVHeap() +{ + this->length = this->size = 0; + this->heap = NULL; +} + +IVHeap::IVHeap(unsigned _size) +{ + this->length = 0; + this->size = _size; + //this->heap = (Node**)malloc(this->size * sizeof(Node*)); //not use 4 or 8 + this->heap = new IVNode*[this->size]; + if (this->heap == NULL) + { + this->print("error in IVHeap: Allocation fail!"); + exit(1); + } + /* + this->npmap = (Map*)malloc(this->size * sizeof(struct Map)); + if(this->npmap == NULL) + { + this->print("error in IVHeap: Allocation fail!"); + exit(1); + } + */ +} + +IVNode* +IVHeap::getTop() const +{ + if (this->length > 0) + return this->heap[0]; + else + return NULL; +} + +unsigned +IVHeap::getLen() const +{ + return this->length; +} + +unsigned +IVHeap::getSize() const +{ + return this->size; +} + +bool +IVHeap::isEmpty() const +{ + return this->length == 0; +} + +bool +IVHeap::insert(IVNode* _np) +{ + if (this->length == this->size) //when full, reallocate + { + this->heap = (IVNode**)realloc(this->heap, 2 * this->size * sizeof(IVNode*)); + if (this->heap == NULL) + { + print("error in isert: Reallocation fail!"); + return false; + } + /* + this->npmap = (struct Map*)realloc(this->npmap, 2 * this->size * sizeof(struct Map)); + if(this->npmap == NULL) + { + print("error in insert: Reallocation fail!"); + return false; + } + */ + this->size = 2 * this->size; + } + unsigned i = this->length, j; + while (i != 0) + { + j = (i - 1) / 2; + if (_np->getRank() >= this->heap[j]->getRank()) + break; + heap[i] = heap[j]; + //this->npmap[k].pos = i; //adjust the position + i = j; + } + this->heap[i] = _np; + this->length++; + return true; +} + +bool +IVHeap::remove() +{ + if (this->length == 0) + { + print("error in remove: remove from empty heap!"); + return false; + } + //Node* tp = this->heap[0]; + this->length--; + if (this->length == 0) + return true; + IVNode* xp = this->heap[this->length]; + unsigned i = 0, j = 1; + while (j < this->length) + { + if (j < this->length - 1 && this->heap[j]->getRank() > this->heap[j + 1]->getRank()) + j++; + if (xp->getRank() <= this->heap[j]->getRank()) + break; + this->heap[i] = this->heap[j]; + i = j; + j = 2 * i + 1; + } + this->heap[i] = xp; + return true; +} + +bool +IVHeap::modify(IVNode* _np, bool _flag) //control direction +{ + //search and adjust + unsigned i, j; + for (i = 0; i < this->length; ++i) + if (this->heap[i] == _np) + break; + if (_flag == true) //move up + { + while (i != 0) + { + j = (i - 1) / 2; + if (_np->getRank() < heap[j]->getRank()) + { + heap[i] = heap[j]; + heap[j] = _np; + i = j; + } + else + break; + } + } + else //move down + { + j = 2 * i + 1; + while (j < this->length) + { + if (j < this->length - 1 && heap[j]->getRank() > heap[j + 1]->getRank()) + j++; + if (heap[j]->getRank() < _np->getRank()) + { + heap[i] = heap[j]; + heap[j] = _np; + i = j; + } + else + break; + } + } + return true; +} + +IVHeap::~IVHeap() +{ + delete[] this->heap; + this->heap = NULL; + this->length = this->size = 0; +} + +void +IVHeap::print(string s) +{ +#ifdef DEBUG_KVSTORE +#endif +} diff --git a/KVstore/IVTree/heap/IVHeap.h b/KVstore/IVTree/heap/IVHeap.h new file mode 100644 index 0000000..0e418fd --- /dev/null +++ b/KVstore/IVTree/heap/IVHeap.h @@ -0,0 +1,41 @@ +/*============================================================================= +# Filename: IVHeap.h +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:37 +# Description: set and deal of IVNode*s in memory +=============================================================================*/ + +#ifndef _KVSTORE_IVTREE_HEAP_IVHEAP_H +#define _KVSTORE_IVTREE_HEAP_IVHEAP_H + +#include "../../../Util/Util.h" +#include "../node/IVNode.h" + +/* add, sub, modify: all can be done within O(logn) using adjust-function */ +//QUERY: when modified, finding right position consumes O(n). How about keeping smallest? +//(add O(1), sub O(2n), modify O(n) +//TODO: to solve this probem, use another hash: (pointer, pos), to find the right position of +//given p in O(lgn) time + +class IVHeap +{ +private: + IVNode** heap; //dynamic array + unsigned length; //valid elements num + unsigned size; //max-size of heap +public: + IVHeap(); + IVHeap(unsigned _size); + IVNode* getTop() const; //return the top element + unsigned getLen() const; + unsigned getSize() const; + bool isEmpty() const; + bool insert(IVNode* _np); //insert and adjust + bool remove(); //remove top and adjust + bool modify(IVNode* _np, bool _flag); //searech modified element and adjust + ~IVHeap(); + void print(std::string s); //DEBUG +}; + +#endif diff --git a/KVstore/IVTree/node/IVIntlNode.cpp b/KVstore/IVTree/node/IVIntlNode.cpp new file mode 100644 index 0000000..3f338b3 --- /dev/null +++ b/KVstore/IVTree/node/IVIntlNode.cpp @@ -0,0 +1,293 @@ +/*============================================================================= +# Filename: IVIntlNode.cpp +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:40 +# Description: achieve functions in IVIntlNode.h +=============================================================================*/ + +#include "IVIntlNode.h" + +using namespace std; + +/* +void +IVIntlNode::AllocChilds() +{ +childs = (Node**)malloc(sizeof(Node*) * MAX_CHILD_NUM); +} +*/ + +IVIntlNode::IVIntlNode() +{ + memset(childs, 0, sizeof(IVNode*) * MAX_CHILD_NUM); + //this->AllocChilds(); +} + +IVIntlNode::IVIntlNode(bool isVirtual) //call father-class's constructor automaticlly +{ + memset(childs, 0, sizeof(IVNode*) * MAX_CHILD_NUM); + //this->AllocChilds(); +} + +/* +IVIntlNode::IntlNode(Storage* TSM) //QUERY +{ +TSM->readNode(this, Storage::OVER); +} +*/ + +void +IVIntlNode::Virtual() +{ + //this->FreeKeys(); + this->release(); + this->delMem(); +} + +void +IVIntlNode::Normal() +{ + this->AllocKeys(); + this->setMem(); +} + +IVNode* +IVIntlNode::getChild(int _index) const +{ + int num = this->getNum(); + if (_index < 0 || _index > num) //num keys, num+1 childs + { + //print(string("error in getChild: Invalid index ") + Util::int2string(_index)); + return NULL; + } + else + return childs[_index]; +} + +bool +IVIntlNode::setChild(IVNode* _child, int _index) +{ + int num = this->getNum(); + if (_index < 0 || _index > num) + { + print(string("error in setChild: Invalid index ") + Util::int2string(_index)); + return false; + } + this->childs[_index] = _child; + return true; +} + +bool +IVIntlNode::addChild(IVNode* _child, int _index) +{ + int num = this->getNum(); + if (_index < 0 || _index > num + 1) + { + print(string("error in addChild: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + for (i = num; i >= _index; --i) //DEBUG: right bounder!!! + childs[i + 1] = childs[i]; + childs[_index] = _child; + return true; +} + +bool +IVIntlNode::subChild(int _index) +{ + int num = this->getNum(); + if (_index < 0 || _index > num) + { + print(string("error in subchild: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + for (i = _index; i < num; ++i) //DEBUG: right bounder!!! + childs[i] = childs[i + 1]; + return true; +} + +unsigned +IVIntlNode::getSize() const +{ + //unsigned sum = INTL_SIZE, num = this->getNum(), i; + //return sum; + return INTL_SIZE; +} + +IVNode* +IVIntlNode::split(IVNode* _father, int _index) +{ + int num = this->getNum(); + IVNode* p = new IVIntlNode; //right child + p->setHeight(this->getHeight()); + int i, k; + for (i = MIN_CHILD_NUM, k = 0; i < num; ++i, ++k) + { + p->addKey(this->keys[i], k); + p->addChild(this->childs[i], k); + p->addNum(); + } + p->addChild(this->childs[i], k); + int tp = this->keys[MIN_KEY_NUM]; + this->setNum(MIN_KEY_NUM); + _father->addKey(tp, _index); + _father->addChild(p, _index + 1); //DEBUG(check the index) + _father->addNum(); + _father->setDirty(); + p->setDirty(); + this->setDirty(); + return p; +} + +IVNode* +IVIntlNode::coalesce(IVNode* _father, int _index) +{ + //int num = this->getNum(); + int i, j = _father->getNum(), k; //BETTER: unsigned? + IVNode* p; + int ccase = 0; + //const Bstr* bstr; + if (_index < j) //the right neighbor + { + p = _father->getChild(_index + 1); + k = p->getNum(); + if ((unsigned)k > MIN_KEY_NUM) + ccase = 2; + else //==MIN_KEY_NUM + ccase = 1; + } + if (_index > 0) //the left neighbor + { + IVNode* tp = _father->getChild(_index - 1); + unsigned tk = tp->getNum(); + if (ccase < 2) + { + if (ccase == 0) + ccase = 3; + if (tk > MIN_KEY_NUM) + ccase = 4; + } + if (ccase > 2) + { + p = tp; + k = tk; + } + } + + int tmp = 0; + switch (ccase) + { + case 1: //union right to this + this->addKey(_father->getKey(_index), this->getNum()); + this->addNum(); + for (i = 0; i < k; ++i) + { + this->addKey(p->getKey(i), this->getNum()); + this->addChild(p->getChild(i), this->getNum()); + this->addNum(); + } + this->setChild(p->getChild(i), this->getNum()); + _father->subKey(_index); + _father->subChild(_index + 1); + _father->subNum(); + p->setNum(0); + //delete p; + break; + case 2: //move one form right + this->addKey(_father->getKey(_index), this->getNum()); + _father->setKey(p->getKey(0), _index); + p->subKey(0); + this->addChild(p->getChild(0), this->getNum() + 1); + p->subChild(0); + this->addNum(); + p->subNum(); + break; + case 3: //union left to this + this->addKey(_father->getKey(_index - 1), 0); + this->addNum(); + for (i = k; i > 0; --i) + { + int t = i - 1; + this->addKey(p->getKey(t), 0); + this->addChild(p->getChild(i), 0); + this->addNum(); + } + this->addChild(p->getChild(0), 0); + _father->subKey(_index - 1); + _father->subChild(_index - 1); + _father->subNum(); + p->setNum(0); + //delete p; + break; + case 4: //move one from left + tmp = p->getKey(k - 1); + p->subKey(k - 1); + this->addKey(_father->getKey(_index - 1), 0); + _father->setKey(tmp, _index - 1); + this->addChild(p->getChild(k), 0); + p->subChild(k); + this->addNum(); + p->subNum(); + break; + default: + print("error in coalesce: Invalid case!"); + //printf("error in coalesce: Invalid case!"); + } + _father->setDirty(); + p->setDirty(); + this->setDirty(); + if (ccase == 1 || ccase == 3) + return p; + else + return NULL; +} + +void +IVIntlNode::release() +{ + if (!this->inMem()) + return; + //unsigned num = this->getNum(); + delete[] keys; //this will release all!!! +} + +IVIntlNode::~IVIntlNode() +{ + release(); + //free(childs); +} + +void +IVIntlNode::print(string s) +{ +#ifdef DEBUG_KVSTORE + int num = this->getNum(); + fputs(Util::showtime().c_str(), Util::debug_kvstore); + fputs("Class IVIntlNode\n", Util::debug_kvstore); + fputs("Message: ", Util::debug_kvstore); + fputs(s.c_str(), Util::debug_kvstore); + fputs("\n", Util::debug_kvstore); + if (s == "node" || s == "NODE") + { + fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag); + /* + int i; + for (i = 0; i < num; ++i) + { + if (s == "node") + this->keys[i].print("bstr"); + else + this->keys[i].print("BSTR"); + } + */ + } + else if (s == "check node") + { + //TODO(check node, if satisfy B+ definition) + } + else; +#endif +} diff --git a/KVstore/IVTree/node/IVIntlNode.h b/KVstore/IVTree/node/IVIntlNode.h new file mode 100644 index 0000000..5d0932f --- /dev/null +++ b/KVstore/IVTree/node/IVIntlNode.h @@ -0,0 +1,48 @@ +/*============================================================================= +# Filename: IVIntlNode.h +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:40 +# Description: the internal-node of a B+ tree +=============================================================================*/ + +#ifndef _KVSTORE_IVTREE_NODE_IVINTLNODE_H +#define _KVSTORE_IVTREE_NODE_IVINTLNODE_H + +#include "IVNode.h" + +class IVIntlNode : public IVNode +{ +protected: + IVNode* childs[MAX_CHILD_NUM + 1]; + //Node** childs; + //void AllocChilds(); +public: + IVIntlNode(); + IVIntlNode(bool isVirtual); + //IntlNode(Storage* TSM); + void Virtual(); + void Normal(); + IVNode* getChild(int _index) const; + bool setChild(IVNode* _child, int _index); + bool addChild(IVNode* _child, int _index); + bool subChild(int _index); + unsigned getSize() const; + IVNode* split(IVNode* _father, int _index); + IVNode* coalesce(IVNode* _father, int _index); + void release(); + ~IVIntlNode(); + void print(std::string s); //DEBUG + /*non-sense functions: polymorphic + Node* getPrev() const; + Node* getNext() const; + const Bstr* getValue(int _index) const; + bool setValue(const Bstr* _value, int _index); + bool addValue(const Bstr* _value, int _index); + bool subValue(int _index); + void setPrev(Node* _prev); + void setNext(Node* _next); + */ +}; + +#endif diff --git a/KVstore/IVTree/node/IVLeafNode.cpp b/KVstore/IVTree/node/IVLeafNode.cpp new file mode 100644 index 0000000..a35bd6f --- /dev/null +++ b/KVstore/IVTree/node/IVLeafNode.cpp @@ -0,0 +1,443 @@ +/*============================================================================= +# Filename: IVLeafNode.cpp +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:40 +# Description: ahieve functions in IVLeafNode.h +=============================================================================*/ + +#include "IVLeafNode.h" + +using namespace std; + +void +IVLeafNode::AllocValues() +{ + values = new Bstr[MAX_KEY_NUM]; +} + +/* +void +IVLeafNode::FreeValues() +{ +delete[] values; +} +*/ + +IVLeafNode::IVLeafNode() +{ + flag |= NF_IL; //leaf flag + prev = next = NULL; + AllocValues(); +} + +IVLeafNode::IVLeafNode(bool isVirtual) +{ + flag |= NF_IL; + prev = next = NULL; + if (!isVirtual) + AllocValues(); +} + +/* +IVLeafNode::LeafNode(Storage* TSM) +{ +AllocValues(); +TSM->readNode(this, Storage::OVER); +} +*/ + +void +IVLeafNode::Virtual() +{ + //this->FreeKeys(); + //this->FreeValues(); + this->release(); + this->delMem(); +} + +void +IVLeafNode::Normal() +{ + this->AllocKeys(); + this->AllocValues(); + this->setMem(); +} + +IVNode* +IVLeafNode::getPrev() const +{ + return prev; +} + +IVNode* +IVLeafNode::getNext() const +{ + return next; +} + +const Bstr* +IVLeafNode::getValue(int _index) const +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + //print(string("error in getValue: Invalid index ") + Util::int2string(_index)); + return NULL; + } + else + return this->values + _index; +} + +bool +IVLeafNode::getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const +{ + //TODO: read long list + return true; +} + +bool +IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy) +{ + //TODO: consider the long list, how to cancel and reset + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in setValue: Invalid index ") + Util::int2string(_index)); + return false; + } + this->values[_index].release(); //NOTICE: only used in modify + + //DEBUG: we do not need to copy here + //we just need to ensure that the pointer's memory is not released + + //if (ifcopy) + //{ + //this->values[_index].copy(_value); + //} + //else + //{ + //this->values[_index] = *_value; + this->values[_index].setStr(_str); + this->values[_index].setLen(_len); + //} + return true; +} + +bool +IVLeafNode::addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy) +{ + //TODO:if the list is too large + int num = this->getNum(); + if (_index < 0 || _index > num) + { + print(string("error in addValue: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + for (i = num - 1; i >= _index; --i) + this->values[i + 1] = this->values[i]; + + //if (ifcopy) + //this->values[_index].copy(_value); + //else + //this->values[_index] = *_value; + this->values[_index].setStr(_str); + this->values[_index].setLen(_len); + + return true; +} + +bool +IVLeafNode::addValue(const Bstr* _value, int _index, bool ifcopy) +{ + int num = this->getNum(); + if (_index < 0 || _index > num) + { + print(string("error in addValue: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + for (i = num - 1; i >= _index; --i) + this->values[i + 1] = this->values[i]; + + if (ifcopy) + this->values[_index].copy(_value); + else + this->values[_index] = *_value; + + return true; +} + +bool +IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel) +{ + //TODO: if is to sub long list + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in subValue: Invalid index ") + Util::int2string(_index)); + return false; + } + + int i; + if (ifdel) + values[_index].release(); + for (i = _index; i < num - 1; ++i) + this->values[i] = this->values[i + 1]; + + return true; +} + +bool +IVLeafNode::subValue(int _index, bool ifdel) +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in subValue: Invalid index ") + Util::int2string(_index)); + return false; + } + + int i; + if (ifdel) + values[_index].release(); + for (i = _index; i < num - 1; ++i) + this->values[i] = this->values[i + 1]; + + return true; +} + +void +IVLeafNode::setPrev(IVNode* _prev) +{ + this->prev = _prev; +} + +void +IVLeafNode::setNext(IVNode* _next) +{ + this->next = _next; +} + +unsigned +IVLeafNode::getSize() const +{ + unsigned sum = LEAF_SIZE, num = this->getNum(), i; + for (i = 0; i < num; ++i) + { + sum += values[i].getLen(); + } + return sum; +} + +IVNode* +IVLeafNode::split(IVNode* _father, int _index) +{ + int num = this->getNum(); + IVNode* p = new IVLeafNode; //right child + p->setHeight(this->getHeight()); //NOTICE: assign height for new node + p->setNext(this->next); + this->setNext(p); + p->setPrev(this); + int i, k; + for (i = MIN_KEY_NUM, k = 0; i < num; ++i, ++k) + { + p->addKey(this->keys[i], k); + p->addValue(this->values + i, k); + p->addNum(); + } + int tp = this->keys[MIN_KEY_NUM]; + this->setNum(MIN_KEY_NUM); + _father->addKey(tp, _index); + _father->addChild(p, _index + 1); //DEBUG(check the index) + _father->addNum(); + _father->setDirty(); + p->setDirty(); + this->setDirty(); + return p; +} + +IVNode* +IVLeafNode::coalesce(IVNode* _father, int _index) +{ //add a key or coalesce a neighbor to this + int i, j = _father->getNum(), k; //BETTER: unsigned? + IVNode* p = NULL; + int ccase = 0; + //const Bstr* bstr; + if (_index < j) //the right neighbor + { + p = _father->getChild(_index + 1); + k = p->getNum(); + if ((unsigned)k > MIN_KEY_NUM) + ccase = 2; + else //==MIN_KEY_NUM + ccase = 1; + } + if (_index > 0) //the left neighbor + { + IVNode* tp = _father->getChild(_index - 1); + unsigned tk = tp->getNum(); + if (ccase < 2) + { + if (ccase == 0) + ccase = 3; + if (tk > MIN_KEY_NUM) + ccase = 4; + } + if (ccase > 2) + { + p = tp; + k = tk; + } + } + + int tmp = 0; + switch (ccase) + { + case 1: //union right to this + for (i = 0; i < k; ++i) + { + this->addKey(p->getKey(i), this->getNum()); + this->addValue(p->getValue(i), this->getNum()); + this->addNum(); + } + _father->subKey(_index); + _father->subChild(_index + 1); + _father->subNum(); + this->next = p->getNext(); + if (this->next != NULL) + this->next->setPrev(this); + p->setNum(0); //NOTICE: adjust num before delete! + //delete p; + break; + case 2: //move one from right + this->addKey(p->getKey(0), this->getNum()); + _father->setKey(p->getKey(1), _index); + p->subKey(0); + this->addValue(p->getValue(0), this->getNum()); + p->subValue(0); + this->addNum(); + p->subNum(); + break; + case 3: //union left to this + //BETTER: move all keys/etc one time + for (i = k; i > 0; --i) + { + int t = i - 1; + this->addKey(p->getKey(t), 0); + this->addValue(p->getValue(t), 0); + this->addNum(); + } + _father->subKey(_index - 1); + _father->subChild(_index - 1); + _father->subNum(); + this->prev = p->getPrev(); + if (this->prev != NULL) //else: leaves-list + this->prev->setNext(this); + p->setNum(0); + //delete p; + break; + case 4: //move one from left + tmp = p->getKey(k - 1); + p->subKey(k - 1); + this->addKey(tmp, 0); + _father->setKey(tmp, _index - 1); + this->addValue(p->getValue(k - 1), 0); + p->subValue(k - 1); + this->addNum(); + p->subNum(); + break; + default: + print("error in coalesce: Invalid case!"); + //printf("error in coalesce: Invalid case!"); + } + _father->setDirty(); + p->setDirty(); + this->setDirty(); + if (ccase == 1 || ccase == 3) + return p; + else + return NULL; +} + +void +IVLeafNode::release() +{ + if (!this->inMem()) + return; + unsigned num = this->getNum(); + /* + for(int i = 0; i < num; ++i) + { + keys[i].release(); + values[i].release(); + } + */ + for (unsigned i = num; i < MAX_KEY_NUM; ++i) + { + values[i].clear(); + } + delete[] keys; + delete[] values; +} + +IVLeafNode::~IVLeafNode() +{ + release(); +} + +void +IVLeafNode::print(string s) +{ +#ifdef DEBUG_KVSTORE + unsigned num = this->getNum(); + fputs(Util::showtime().c_str(), Util::debug_kvstore); + fputs("Class IVLeafNode\n", Util::debug_kvstore); + fputs("Message: ", Util::debug_kvstore); + fputs(s.c_str(), Util::debug_kvstore); + fputs("\n", Util::debug_kvstore); + unsigned i; + if (s == "NODE") + { + fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag); + fprintf(Util::debug_kvstore, "prev: %p\tnext: %p\n", this->prev, this->next); + for (i = 0; i < num; ++i) + { + //this->keys[i].print("BSTR"); + this->values[i].print("BSTR"); + } + } + else if (s == "node") + { + fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag); + fprintf(Util::debug_kvstore, "prev: %p\tnext: %p\n", this->prev, this->next); + } + else if (s == "check node") + { + //check the node, if satisfy B+ definition + bool flag = true; + if (num < MIN_KEY_NUM || num > MAX_KEY_NUM) + flag = false; + if (flag) + { + for (i = 1; i < num; ++i) + { + if (keys[i] > keys[i - 1]) + continue; + else + break; + } + if (i < num) + flag = false; + } + this->print("node"); + if (flag) + fprintf(Util::debug_kvstore, "This node is good\n"); + else + fprintf(Util::debug_kvstore, "This node is bad\n"); + } + else; +#endif +} + diff --git a/KVstore/IVTree/node/IVLeafNode.h b/KVstore/IVTree/node/IVLeafNode.h new file mode 100644 index 0000000..56638bd --- /dev/null +++ b/KVstore/IVTree/node/IVLeafNode.h @@ -0,0 +1,56 @@ +/*============================================================================= +# Filename: IVLeafNode.h +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:39 +# Description: the leaf-node of a B+ tree +=============================================================================*/ + +#ifndef _KVSTORE_IVTREE_NODE_IVLEAFNODE_H +#define _KVSTORE_IVTREE_NODE_IVLEAFNODE_H + +#include "IVNode.h" + +class IVLeafNode : public IVNode +{ +protected: + IVNode* prev; //LeafNode + IVNode* next; + Bstr* values; + void AllocValues(); + //void FreeValues(); +public: + IVLeafNode(); + IVLeafNode(bool isVirtual); + //LeafNode(Storage* TSM); + void Virtual(); + void Normal(); + IVNode* getPrev() const; + IVNode* getNext() const; + const Bstr* getValue(int _index) const; + bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const; + bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false); + + bool addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false); + bool subValue(VList* _vlist, int _index, bool ifdel = false); + bool addValue(const Bstr* _val, int _index, bool ifcopy = false); + bool subValue(int _index, bool ifdel = false); + + void setPrev(IVNode* _prev); + void setNext(IVNode* _next); + unsigned getSize() const; + IVNode* split(IVNode* _father, int _index); + IVNode* coalesce(IVNode* _father, int _index); + void release(); + ~IVLeafNode(); + void print(std::string s); //DEBUG + /*non-sense virtual function + Node* getChild(int _index) const; + bool addChild(Node* _child, int _index); + bool subChild(int _index); + */ +}; +//BETTER: prev isn't a must, and reverse-range can be achieved using recursive-next + +#endif + diff --git a/KVstore/IVTree/node/IVNode.cpp b/KVstore/IVTree/node/IVNode.cpp new file mode 100644 index 0000000..f688303 --- /dev/null +++ b/KVstore/IVTree/node/IVNode.cpp @@ -0,0 +1,320 @@ +/*============================================================================= +# Filename: IVNode.cpp +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:39 +# Description: achieve functions in IVNode.h +=============================================================================*/ + +#include "IVNode.h" + +using namespace std; + +void +IVNode::AllocKeys() +{ + keys = new int[MAX_KEY_NUM]; +} + +/* +void +IVNode::FreeKeys() +{ +delete[] keys; +} +*/ + +IVNode::IVNode() +{ + store = flag = 0; + flag |= NF_IM; + AllocKeys(); +} + +IVNode::IVNode(bool isVirtual) +{ + store = flag = 0; + if (!isVirtual) + { + flag |= NF_IM; + AllocKeys(); + } +} + +/* +IVNode::Node(Storage* TSM) +{ +AllocKeys(); +TSM->readIVNode(this, Storage::OVER); +} +*/ +bool +IVNode::isLeaf() const +{ + return this->flag & NF_IL; +} + +bool +IVNode::isDirty() const +{ + return this->flag & NF_ID; +} + +void +IVNode::setDirty() +{ + this->flag |= NF_ID; +} + +void +IVNode::delDirty() +{ + this->flag &= ~NF_ID; +} + +bool +IVNode::inMem() const +{ + return this->flag & NF_IM; +} + +void +IVNode::setMem() +{ + this->flag |= NF_IM; +} + +void +IVNode::delMem() +{ + this->flag &= ~NF_IM; +} + +/* +bool +IVNode::isVirtual() const +{ +return this->flag & NF_IV; +} + +void +IVNode::setVirtual() +{ +this->flag |= NF_IV; +} + +void +IVNode::delVirtual() +{ +this->flag &= ~NF_IV; +} +*/ + +unsigned +IVNode::getRank() const +{ + return this->flag & NF_RK; +} + +void +IVNode::setRank(unsigned _rank) +{ + this->flag &= ~NF_RK; + this->flag |= _rank; +} + +unsigned +IVNode::getHeight() const +{ + return (this->flag & NF_HT) >> 20; +} + +void +IVNode::setHeight(unsigned _h) +{ + this->flag &= ~NF_HT; + this->flag |= (_h << 20); +} + +unsigned +IVNode::getNum() const +{ + return (this->flag & NF_KN) >> 12; +} + +bool +IVNode::setNum(int _num) +{ + if (_num < 0 || (unsigned)_num > MAX_KEY_NUM) + { + print(string("error in setNum: Invalid num ") + Util::int2string(_num)); + return false; + } + this->flag &= ~NF_KN; + this->flag |= (_num << 12); + return true; +} + +bool +IVNode::addNum() +{ + if (this->getNum() + 1 > MAX_KEY_NUM) + { + print("error in addNum: Invalid!"); + return false; + } + this->flag += (1 << 12); + return true; +} + +bool +IVNode::subNum() +{ + if (this->getNum() < 1) + { + print("error in subNum: Invalid!"); + return false; + } + this->flag -= (1 << 12); + return true; +} + +unsigned +IVNode::getStore() const +{ + return this->store; +} + +void +IVNode::setStore(unsigned _store) +{ + this->store = _store; +} + +unsigned +IVNode::getFlag() const +{ + return flag; +} + +void +IVNode::setFlag(unsigned _flag) +{ + this->flag = _flag; +} + +int +IVNode::getKey(int _index) const +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + //print(string("error in getKey: Invalid index ") + Util::int2string(_index)); + printf("error in getKey: Invalid index\n"); + return -1; + } + else + return this->keys[_index]; +} + +bool +IVNode::setKey(int _key, int _index) +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in setKey: Invalid index ") + Util::int2string(_index)); + return false; + } + keys[_index] = _key; + return true; +} + +bool +IVNode::addKey(int _key, int _index) +{ + int num = this->getNum(); + if (_index < 0 || _index > num) + { + print(string("error in addKey: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + //NOTICE: if num == MAX_KEY_NUM, will visit keys[MAX_KEY_NUM], not legal!!! + //however. tree operations ensure that: when node is full, not add but split first! + for (i = num - 1; i >= _index; --i) + keys[i + 1] = keys[i]; + keys[_index] = _key; + return true; +} + +bool +IVNode::subKey(int _index) +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in subKey: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + for (i = _index; i < num - 1; ++i) + keys[i] = keys[i + 1]; + return true; +} + +int +IVNode::searchKey_less(int _key) const +{ + int num = this->getNum(); + //for(i = 0; i < num; ++i) + //if(bstr < *(p->getKey(i))) + //break; + + int low = 0, high = num - 1, mid = -1; + while (low <= high) + { + mid = (low + high) / 2; + if (this->keys[mid] > _key) + { + if (low == mid) + break; + high = mid; + } + else + { + low = mid + 1; + } + } + return low; +} + +int +IVNode::searchKey_equal(int _key) const +{ + int num = this->getNum(); + //for(i = 0; i < num; ++i) + // if(bstr == *(p->getKey(i))) + // { + + int ret = this->searchKey_less(_key); + if (ret > 0 && this->keys[ret - 1] == _key) + return ret - 1; + else + return num; +} + +int +IVNode::searchKey_lessEqual(int _key) const +{ + //int num = this->getNum(); + //for(i = 0; i < num; ++i) + //if(bstr <= *(p->getKey(i))) + //break; + + int ret = this->searchKey_less(_key); + if (ret > 0 && this->keys[ret - 1] == _key) + return ret - 1; + else + return ret; +} diff --git a/KVstore/IVTree/node/IVNode.h b/KVstore/IVTree/node/IVNode.h new file mode 100644 index 0000000..20d6cfe --- /dev/null +++ b/KVstore/IVTree/node/IVNode.h @@ -0,0 +1,119 @@ +/*============================================================================= +# Filename: IVNode.h +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:38 +# Description: basic Node class, father of IVIntlNode and IVLeafNode +=============================================================================*/ + +#ifndef _KVSTORE_IVTREE_NODE_IVNODE_H +#define _KVSTORE_IVTREE_NODE_IVNODE_H + +#include "../../../Util/Util.h" +#include "../../../Util/Bstr.h" +#include "../vlist/VList.h" + +class IVNode //abstract basic class +{ +public: + static const unsigned DEGREE = 2 * 63; //the degree of B+ tree + static const unsigned MAX_CHILD_NUM = DEGREE; + static const unsigned MIN_CHILD_NUM = DEGREE >> 1; + static const unsigned MAX_KEY_NUM = MAX_CHILD_NUM - 1; //max key-num + static const unsigned MIN_KEY_NUM = MIN_CHILD_NUM - 1; //min key-num + /* diffrent flags for tree-nodes, 32-bit put rank in low-bits means no need to move*/ + static const unsigned NF_IL = 0x80000000; //is leaf + static const unsigned NF_ID = 0x00080000; //is dirty, in rank-area + static const unsigned NF_IM = 0x20000000; //in memory, not virtual + //static const unsigned NF_IV = 0x10000000; //is virtual + static const unsigned NF_RK = 0x00ffffff; //select-rank, in Storage + static const unsigned NF_HT = 0xf00000; //height area in rank + static const unsigned NF_KN = 0x07f000; //NOTICE: decided by DEGREE + static const unsigned INTL_SIZE = sizeof(int) * MAX_KEY_NUM; + static const unsigned LEAF_SIZE = INTL_SIZE + sizeof(Bstr) * MAX_KEY_NUM; +protected: + unsigned store; //store address, the BLock index + unsigned flag; //NF_RK, NF_IL,NF_ID, NF_IV, propety + //int num; //totle keys num + //Node* father; //point to father-node, which must be IntlNode + int* keys; + void AllocKeys(); + //void FreeKeys(); +public: + IVNode(); + IVNode(bool isVirtual); + bool isLeaf() const; + bool isDirty() const; + void setDirty(); + void delDirty(); + bool inMem() const; + void setMem(); + void delMem(); + //bool isVirtual() const; + //void setVirtual(); + //void delVirtual(); + unsigned getRank() const; + void setRank(unsigned _rank); + unsigned getHeight() const; + void setHeight(unsigned _h); + unsigned getNum() const; + bool setNum(int _num); + bool addNum(); + bool subNum(); + unsigned getStore() const; + void setStore(unsigned _store); + unsigned getFlag() const; + void setFlag(unsigned _flag); + int getKey(int _index) const; //need to check the index + bool setKey(int _key, int _index); + bool addKey(int _key, int _index); + bool subKey(int _index); + + //several binary key search utilities + int searchKey_less(int _key) const; + int searchKey_equal(int _key) const; + int searchKey_lessEqual(int _key) const; + + //virtual functions: polymorphic + virtual IVNode* getChild(int _index) const { return NULL; }; + virtual bool setChild(IVNode* _child, int _index) { return true; }; + virtual bool addChild(IVNode* _child, int _index) { return true; }; + virtual bool subChild(int _index) { return true; }; + virtual IVNode* getPrev() const { return NULL; }; + virtual IVNode* getNext() const { return NULL; }; + virtual const Bstr* getValue(int _index) const { return NULL; }; + virtual bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const { return NULL; }; + virtual bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false) { return true; }; + + virtual bool addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false) { return true; }; + virtual bool subValue(VList* _vlist, int _index, bool ifdel = false) { return true; }; + virtual bool addValue(const Bstr* _val, int _index, bool ifcopy = false) { return true; }; + virtual bool subValue(int _index, bool ifdel = false) { return true; }; + + virtual void setPrev(IVNode* _prev) {}; + virtual void setNext(IVNode* _next) {}; + virtual void Virtual() = 0; + virtual void Normal() = 0; + virtual unsigned getSize() const = 0; //return all memory owned + virtual IVNode* split(IVNode* _father, int _index) = 0; + virtual IVNode* coalesce(IVNode* _father, int _index) = 0; + virtual void release() = 0; //release the node, only remain necessary information + virtual ~IVNode() {}; + virtual void print(std::string s) = 0; //DEBUG(print the Node) +}; + +/*NOTICE(operations in release()) +*To save memory, we can only remain store and flag(childs added for Leaf). +*However, in this way childs'pointers is ok to change, use Node** or Node*& is also nonsense +*because the pointer variable may die. +*Another way is only to release dynamic memory, and store thw whole, read the Bstr only to +*build. In this way nodes'pointer doesn't change, and operation is simplified, while memory +*is consumed a bit more. Because Bstrs consume the most memory, and memory-disk swapping is +*the most time-consuming thing, it seems to be a better way. +*WARN:when a node is in memory and not deleted, its basic content is always being! If nodes are +*really too many, this will cause disaster because we can't swap them out until tree is closed! +*To solve this problem, there should be two types of release-function: one to release Bstr, one +*to release the whole(pointer is invalid and rebuild problem) +*/ + +#endif diff --git a/KVstore/IVTree/storage/IVStorage.cpp b/KVstore/IVTree/storage/IVStorage.cpp new file mode 100644 index 0000000..d203d81 --- /dev/null +++ b/KVstore/IVTree/storage/IVStorage.cpp @@ -0,0 +1,722 @@ +/*============================================================================= +# Filename: IVStorage.cpp +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:43 +# Description: achieve functions in IVStorage.h +=============================================================================*/ + +#include "IVStorage.h" + +using namespace std; + +IVStorage::IVStorage() +{ //not use ../logs/, notice the location of program + cur_block_num = SET_BLOCK_NUM; + filepath = ""; + freelist = NULL; + treefp = NULL; + max_buffer_size = Util::MAX_BUFFER_SIZE; + heap_size = max_buffer_size / IVNode::INTL_SIZE; + freemem = max_buffer_size; + minheap = NULL; + this->value_list = NULL; +} + +IVStorage::IVStorage(string& _filepath, string& _mode, unsigned* _height, unsigned long long _buffer_size, VList* _vlist) +{ + cur_block_num = SET_BLOCK_NUM; //initialize + this->filepath = _filepath; + if (_mode == string("build")) + treefp = fopen(_filepath.c_str(), "w+b"); + else if (_mode == string("open")) + treefp = fopen(_filepath.c_str(), "r+b"); + else + { + print(string("error in IVStorage: Invalid mode ") + _mode); + return; + } + if (treefp == NULL) + { + print(string("error in IVStorage: Open error ") + _filepath); + return; + } + this->treeheight = _height; //originally set to 0 + this->max_buffer_size = _buffer_size; + this->heap_size = this->max_buffer_size / IVNode::INTL_SIZE; + this->freemem = this->max_buffer_size; + this->freelist = new BlockInfo; //null-head + unsigned i, j, k; //j = (SuperNum-1)*BLOCK_SIZE + BlockInfo* bp; + if (_mode == "build") + { //write basic information + i = 0; + fwrite(&i, sizeof(unsigned), 1, this->treefp); //height + fwrite(&i, sizeof(unsigned), 1, this->treefp); //rootnum + fwrite(&cur_block_num, sizeof(unsigned), 1, this->treefp); //current block num + fseek(this->treefp, BLOCK_SIZE, SEEK_SET); + bp = this->freelist; + j = cur_block_num / 8; + for (i = 0; i < j; ++i) + { + fputc(0, this->treefp); + for (k = 0; k < 8; ++k) + { + bp->next = new BlockInfo(i * 8 + k + 1, NULL); + bp = bp->next; + } + } + } + else //_mode == "open" + { + //read basic information + int rootnum; + char c; + fread(this->treeheight, sizeof(unsigned), 1, this->treefp); + fread(&rootnum, sizeof(unsigned), 1, this->treefp); + fread(&cur_block_num, sizeof(unsigned), 1, this->treefp); + fseek(this->treefp, BLOCK_SIZE, SEEK_SET); + bp = this->freelist; + j = cur_block_num / 8; + for (i = 0; i < j; ++i) + { + c = fgetc(treefp); + for (k = 0; k < 8; ++k) + { + if ((c & (1 << k)) == 0) + { + bp->next = new BlockInfo(i * 8 + 7 - k + 1, NULL); + bp = bp->next; + } + } + } + fseek(treefp, Address(rootnum), SEEK_SET); + //treefp is now ahead of root-block + } + + this->minheap = new IVHeap(this->heap_size); + this->value_list = _vlist; +} + +bool +IVStorage::preRead(IVNode*& _root, IVNode*& _leaves_head, IVNode*& _leaves_tail) //pre-read and build whole tree +{ //set root(in memory) and leaves_head + //TODO: false when exceed memory + _leaves_tail = _leaves_head = _root = NULL; + if (ftell(this->treefp) == 0) //root is null + { + return true; + } + unsigned next, store, j, pos = 0; + unsigned h = *this->treeheight; + IVNode* p; + //read root node + this->createNode(p); + _root = p; + fread(&next, sizeof(unsigned), 1, treefp); + //use stack to achieve + long address[h]; //current address + unsigned used[h]; //used child num + unsigned total[h]; //total child num + unsigned block[h]; //next block num + IVNode* nodes[h]; + address[pos] = ftell(treefp); + used[pos] = 0; + total[pos] = p->getNum() + 1; + block[pos] = next; + nodes[pos] = p; + pos++; + IVNode* prev = NULL; + while (pos > 0) + { + j = pos - 1; + if (nodes[j]->isLeaf() || used[j] == total[j]) //LeafNode or ready IntlNode + { + if (nodes[j]->isLeaf()) + { + if (prev != NULL) + { + prev->setNext(nodes[j]); + nodes[j]->setPrev(prev); + } + prev = nodes[j]; + } + pos--; + continue; + } + fseek(this->treefp, address[j], SEEK_SET); + fread(&store, sizeof(unsigned), 1, treefp); + this->ReadAlign(block + j); + address[j] = ftell(treefp); + fseek(treefp, Address(store), SEEK_SET); + this->createNode(p); + nodes[j]->setChild(p, used[j]); + used[j]++; + fread(&next, sizeof(unsigned), 1, treefp); + address[pos] = ftell(treefp); + used[pos] = 0; + total[pos] = p->getNum() + 1; + block[pos] = next; + nodes[pos] = p; + pos++; + } + //set leaves and read root, which is always keeped in-mem + p = _root; + while (!p->isLeaf()) + { + p = p->getChild(0); + } + _leaves_head = p; + p = _root; + while (!p->isLeaf()) + { + p = p->getChild(p->getNum()); + } + _leaves_tail = p; + long long memory = 0; + this->readNode(_root, &memory); + this->request(memory); + return true; +} + +long //8-byte in 64-bit machine +IVStorage::Address(unsigned _blocknum) const //BETTER: inline function +{ + if (_blocknum == 0) + return 0; + else if (_blocknum > cur_block_num) + { + //print(string("error in Address: Invalid blocknum ") + Util::int2string(_blocknum)); + return -1; //address should be non-negative + } + //NOTICE: here should explictly use long + return (long)(this->SuperNum + _blocknum - 1) * (long)BLOCK_SIZE; +} + +unsigned +IVStorage::Blocknum(long address) const +{ + return (address / BLOCK_SIZE) + 1 - this->SuperNum; +} + +unsigned +IVStorage::AllocBlock() +{ + BlockInfo* p = this->freelist->next; + if (p == NULL) + { + for (unsigned i = 0; i < SET_BLOCK_INC; ++i) + { + cur_block_num++; //BETTER: check if > MAX_BLOCK_NUM + this->FreeBlock(cur_block_num); + } + p = this->freelist->next; + } + unsigned t = p->num; + this->freelist->next = p->next; + delete p; + return t; +} + +void +IVStorage::FreeBlock(unsigned _blocknum) +{ //QUERY: head-sub and tail-add will be better? + BlockInfo* bp = new BlockInfo(_blocknum, this->freelist->next); + this->freelist->next = bp; +} + +//NOTICE: all reads are aligned to 4 bytes(including a string) +//a string may acrossseveral blocks + +void +IVStorage::ReadAlign(unsigned* _next) +{ + if (ftell(treefp) % BLOCK_SIZE == 0) + { + fseek(treefp, Address(*_next), SEEK_SET); + fread(_next, sizeof(unsigned), 1, treefp); + } +} + +void +IVStorage::WriteAlign(unsigned* _curnum, bool& _SpecialBlock) +{ + if (ftell(treefp) % BLOCK_SIZE == 0) + { + unsigned blocknum = this->AllocBlock(); + fseek(treefp, Address(*_curnum), SEEK_SET); + if (_SpecialBlock) + { + fseek(treefp, 4, SEEK_CUR); + _SpecialBlock = false; + } + fwrite(&blocknum, sizeof(unsigned), 1, treefp); + fseek(treefp, Address(blocknum) + 4, SEEK_SET); + *_curnum = blocknum; + } +} + +bool +IVStorage::readNode(IVNode* _np, long long* _request) +{ + if (_np == NULL || _np->inMem()) + return false; //can't read or needn't + + fseek(treefp, Address(_np->getStore()), SEEK_SET); + bool flag = _np->isLeaf(); + unsigned next; + unsigned i, num = _np->getNum(); + Bstr bstr; + fseek(treefp, 4, SEEK_CUR); + fread(&next, sizeof(unsigned), 1, treefp); + + //read data, use readBstr... + //fread(treefp, "%u", &num); + //_np->setNum(num); + if (flag) + *_request += IVNode::LEAF_SIZE; + else + *_request += IVNode::INTL_SIZE; + _np->Normal(); + if (!flag) + fseek(treefp, 4 * (num + 1), SEEK_CUR); + + //to read all keys + int tmp = -1; + for (i = 0; i < num; ++i) + { + fread(&tmp, sizeof(int), 1, treefp); + this->ReadAlign(&next); + _np->setKey(tmp, i); + } + + if (flag) + { + //to read all values + for (i = 0; i < num; ++i) + { + this->readBstr(&bstr, &next); + //if not long list value + if(bstr.getStr() != NULL) + { + *_request += bstr.getLen(); + } + _np->setValue(&bstr, i); + } + } + //_np->setFlag((_np->getFlag() & ~Node::NF_IV & ~Node::NF_ID) | Node::NF_IM); + //_np->delVirtual(); + _np->delDirty(); + //_np->setMem(); + this->updateHeap(_np, _np->getRank(), false); + bstr.clear(); + return true; +} + +bool +IVStorage::createNode(IVNode*& _np) //cretae virtual nodes, not in-mem +{ + /* + if(ftell(this->treefp)== 0) //null root + { + _np = NULL; + return false; + } + */ + unsigned t; //QUERY: maybe next-flag... will be better-storage? + bool flag = false; //IntlNode + fread(&t, sizeof(unsigned), 1, treefp); + if ((t & IVNode::NF_IL) > 0) //WARN: according to setting + flag = true; //LeafNode + if (flag) + { + //this->request(sizeof(LeafNode)); + _np = new IVLeafNode(true); + } + else + { + //this->request(sizeof(IntlNode)); + _np = new IVIntlNode(true); + } + //fseek(treefp, -4, SEEK_CUR); + //_np->setFlag(_np->getFlag() | (t & Node::NF_RK)); + //_np->setRank(t); + _np->setFlag(t); + _np->delDirty(); + _np->delMem(); + _np->setStore(Blocknum(ftell(treefp) - 4)); + return true; +} + +bool +IVStorage::writeNode(IVNode* _np) +{ + if (_np == NULL || !_np->inMem() || (_np->getRank() > 0 && !_np->isDirty())) + return false; //not need to write back + + unsigned num = _np->getNum(), i; + bool flag = _np->isLeaf(), SpecialBlock = true; + /* + if(!flag) + { + for(i = 0; i <= num; ++i) + if(_np->getChild(i)->isDirty()) + return false; //NOTICE: all childs must be clean! + } + */ + //to release original blocks + unsigned store = _np->getStore(), next; + //if first store is 0, meaning a new node + fseek(this->treefp, Address(store) + 4, SEEK_SET); + fread(&next, sizeof(unsigned), 1, treefp); + while (store != 0) + { + this->FreeBlock(store); + store = next; + fseek(treefp, Address(store), SEEK_SET); + fread(&next, sizeof(unsigned), 1, treefp); + } + if (num == 0) + return true; //node is empty! + unsigned t; + //write Node information + unsigned blocknum = this->AllocBlock(); + _np->setStore(blocknum); + long address = this->Address(blocknum); + fseek(this->treefp, address, SEEK_SET); + t = _np->getFlag(); + fwrite(&t, sizeof(unsigned), 1, treefp); //DEBUG + fseek(treefp, 4, SEEK_CUR); + if (!flag) + { + for (i = 0; i <= num; ++i) + { + t = _np->getChild(i)->getStore(); + fwrite(&t, sizeof(unsigned), 1, treefp); //DEBUG + this->WriteAlign(&blocknum, SpecialBlock); + } + } + + int tmp = 0; + //to write all keys + for (i = 0; i < num; ++i) + { + tmp = _np->getKey(i); + fwrite(&tmp, sizeof(int), 1, treefp); + this->WriteAlign(&blocknum, SpecialBlock); + } + + if (flag) + { + //to write all values + for (i = 0; i < num; ++i) + { + this->writeBstr(_np->getValue(i), &blocknum, SpecialBlock); + } + } + fseek(treefp, Address(blocknum), SEEK_SET); + if (SpecialBlock) + fseek(treefp, 4, SEEK_CUR); + t = 0; + fwrite(&t, sizeof(unsigned), 1, treefp); //the end-block + //_np->setFlag(_np->getFlag() & ~Node::NF_ID); + //NOTICE:we may store the dirty bit into the tree file, but that is ok + //Each time we read the tree file to construct a node, we always set the drity bit to 0 + _np->delDirty(); + return true; +} + +bool +IVStorage::readBstr(Bstr* _bp, unsigned* _next) +{ + //long address; + unsigned len, i, j; + fread(&len, sizeof(unsigned), 1, this->treefp); + this->ReadAlign(_next); + + //NOTICE: if this is a long list as value + if(len == 0) + { + unsigned addr = 0; + fread(&addr, sizeof(unsigned), 1, this->treefp); + _bp->setLen(addr); + _bp->setStr(NULL); + this->ReadAlign(_next); + return true; + } + + //this->request(len); + char* s = (char*)malloc(len); + _bp->setLen(len); + for (i = 0; i + 4 < len; i += 4) + { + fread(s + i, sizeof(char), 4, treefp); + this->ReadAlign(_next); + } + while (i < len) + { + fread(s + i, sizeof(char), 1, treefp); //BETTER + i++; + } + j = len % 4; + if (j > 0) + j = 4 - j; + fseek(treefp, j, SEEK_CUR); + this->ReadAlign(_next); + _bp->setStr(s); + + return true; +} + +bool +IVStorage::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock) +{ + unsigned i, j, len = _bp->getLen(); + + //NOTICE: to write long list value + if(_bp->getStr() == NULL) + { + unsigned flag = 0; + fwrite(&flag, sizeof(unsigned), 1, treefp); + this->WriteAlign(_curnum, _SpecialBlock); + //then this is the real block num + fwrite(&len, sizeof(unsigned), 1, treefp); + this->WriteAlign(_curnum, _SpecialBlock); + return true; + } + + fwrite(&len, sizeof(unsigned), 1, treefp); + this->WriteAlign(_curnum, _SpecialBlock); + + char* s = _bp->getStr(); + for (i = 0; i + 4 < len; i += 4) + { + fwrite(s + i, sizeof(char), 4, treefp); + this->WriteAlign(_curnum, _SpecialBlock); + } + while (i < len) + { + fwrite(s + i, sizeof(char), 1, treefp); + i++; + } + + j = len % 4; + if (j > 0) + j = 4 - j; + fseek(treefp, j, SEEK_CUR); + this->WriteAlign(_curnum, _SpecialBlock); + + return true; +} + +bool +IVStorage::writeTree(IVNode* _root) //write the whole tree back and close treefp +{ + fseek(this->treefp, 0, SEEK_SET); + fwrite(this->treeheight, sizeof(unsigned), 1, treefp); + //delete all nonsense-node in heap, otherwise will waste storage permanently + IVNode* p; + while (1) + { //all non-sense nodes will be in-head-area, due to minimal rank + p = minheap->getTop(); + if (p == NULL) //heap is empty, only when root==NULL + break; + if (p->getRank() == 0) //indicate non-sense node + { + this->minheap->remove(); + this->writeNode(p); + delete p; + } + else + break; + } + + unsigned i, j, t; + //QUERY: another way to write all nodes back is to print out all nodes in heap + //but this method will cause no node in heap any more, while operations may be + //afetr tree-saving. Which method is better? + //write nodes recursively using stack, including root-num + if (_root != NULL) + { + IVNode* p = _root; + unsigned h = *this->treeheight, pos = 0; + IVNode* ns[h]; + int ni[h]; + ns[pos] = p; + ni[pos] = p->getNum(); + pos++; + while (pos > 0) + { + j = pos - 1; + p = ns[j]; + if (p->isLeaf() || ni[j] < 0) //leaf or all childs are ready + { + this->writeNode(p); + pos--; + continue; + } + ns[pos] = p->getChild(ni[j]); + ni[pos] = ns[pos]->getNum(); + pos++; + ni[j]--; + } + t = _root->getStore(); + } + else + t = 0; + + fseek(this->treefp, 4, SEEK_SET); + fwrite(&t, sizeof(unsigned), 1, treefp); //write the root num + fwrite(&cur_block_num, sizeof(unsigned), 1, treefp);//write current blocks num + fseek(treefp, BLOCK_SIZE, SEEK_SET); + j = cur_block_num / 8; //(SuperNum-1)*BLOCK_SIZE; + //reset to 1 first + for (i = 0; i < j; ++i) + { + fputc(0xff, treefp); + } + char c; + BlockInfo* bp = this->freelist->next; + while (bp != NULL) + { + //if not-use then set 0, aligned to byte! +#ifdef DEBUG_KVSTORE + if (bp->num > cur_block_num) + { + printf("blocks num exceed, cur_block_num: %u\n", cur_block_num); + exit(1); + } +#endif + j = bp->num - 1; + i = j / 8; + j = 7 - j % 8; + fseek(treefp, BLOCK_SIZE + i, SEEK_SET); + c = fgetc(treefp); + fseek(treefp, -1, SEEK_CUR); + fputc(c & ~(1 << j), treefp); + bp = bp->next; + } + //fclose(this->treefp); + return true; +} + +void +IVStorage::updateHeap(IVNode* _np, unsigned _rank, bool _inheap) const +{ + if (_inheap) //already in heap, to modify + { + unsigned t = _np->getRank(); + _np->setRank(_rank); + if (t < _rank) + this->minheap->modify(_np, false); + else if (t > _rank) + this->minheap->modify(_np, true); + else; + } + else //not in heap, to add + { + _np->setRank(_rank); + this->minheap->insert(_np); + } +} + +bool +IVStorage::request(long long _needmem) //aligned to byte +{ //NOTICE: <0 means release + //cout<<"freemem: "<freemem<<" needmem: "<<_needmem< 0 && this->freemem < (unsigned long long)_needmem) + if (!this->handler(_needmem - freemem)) //disaster in buffer memory + { + print(string("error in request: out of buffer-mem, now to exit")); + //exit(1); + return false; + } + this->freemem -= _needmem; + return true; +} + +bool +IVStorage::handler(unsigned long long _needmem) //>0 +{ + //cout<<"swap happen"<minheap->getTop(); + //cout<<"get heap top"<minheap->remove(); + //cout<<"node removed in heap"<getSize(); + this->freemem += size; + this->writeNode(p); + //cout<<"node write back"<getNum() > 0) + p->Virtual(); + else + delete p; //non-sense node + //cout<<"node memory released"< size) + { + //cout<<"reduce the request"<freelist; + BlockInfo* next; + while (bp != NULL) + { + next = bp->next; + delete bp; + bp = next; + } +#ifdef DEBUG_KVSTORE + printf("already empty the freelist!\n"); +#endif + delete this->minheap; +#ifdef DEBUG_KVSTORE + printf("already empty the buffer heap!\n"); +#endif + fclose(this->treefp); + //#ifdef DEBUG_KVSTORE + //NOTICE:there is more than one tree + //fclose(Util::debug_kvstore); //NULL is ok! + //Util::debug_kvstore = NULL; + //#endif +} + +void +IVStorage::print(string s) +{ +#ifdef DEBUG_KVSTORE + fputs(Util::showtime().c_str(), Util::debug_kvstore); + fputs("Class IVStorage\n", Util::debug_kvstore); + fputs("Message: ", Util::debug_kvstore); + fputs(s.c_str(), Util::debug_kvstore); + fputs("\n", Util::debug_kvstore); +#endif +} diff --git a/KVstore/IVTree/storage/IVStorage.h b/KVstore/IVTree/storage/IVStorage.h new file mode 100644 index 0000000..88525e2 --- /dev/null +++ b/KVstore/IVTree/storage/IVStorage.h @@ -0,0 +1,84 @@ +/*============================================================================= +# Filename: IVStorage.h +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:43 +# Description: swap between memory and disk, achieving system-like method +=============================================================================*/ + +#ifndef _KVSTORE_IVTREE_STORAGE_IVSTORAGE_H +#define _KVSTORE_IVTREE_STORAGE_IVSTORAGE_H + +#include "../node/IVIntlNode.h" +#include "../node/IVLeafNode.h" +#include "../heap/IVHeap.h" +#include "../vlist/VList.h" + +//It controls read, write, swap +class IVStorage +{ +public: + static const unsigned BLOCK_SIZE = Util::STORAGE_BLOCK_SIZE; //fixed size of disk-block + //there are 18 B+Tree indexes and one vstree index, so set 3G buffer size + //static const unsigned long long MAX_BUFFER_SIZE = Util::MAX_BUFFER_SIZE; //max buffer size + //static const unsigned SET_BUFFER_SIZE = 1 << 30; //set buffer size + //static const unsigned HEAP_SIZE = MAX_BUFFER_SIZE / IVNode::INTL_SIZE; + + //DEBUG: maybe need to set larger, now the file size is 64G at most + static const unsigned MAX_BLOCK_NUM = 1 << 24; //max block-num + //below two constants: must can be exactly divided by 8 + static const unsigned SET_BLOCK_NUM = 1 << 8; //initial blocks num + static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc + static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1; + //static const unsigned TRANSFER_CAPACITY = BLOCK_SIZE; + //enum ReadType { OVER = 0, EXPAND, NORMAL }; + +private: + unsigned long long max_buffer_size; + unsigned heap_size; + unsigned cur_block_num; + std::string filepath; + unsigned* treeheight; + BlockInfo* freelist; + FILE* treefp; //file: tree nodes + IVHeap* minheap; //heap of Nodes's pointer, sorted in NF_RK + + //very long value list are stored in a separate file(with large block) + // + //NOTICE: according to the summary result, 90% value lists are just below 100 bytes + //<10%: 5000000~100M bytes + VList* value_list; + + //NOTICE: freemem's type is long long here, due to large memory in server. + //However, needmem in handler() and request() is ok to be int/unsigned. + //Because the bstr' size is controlled, so is the node. + unsigned long long freemem; //free memory to use, non-negative + //unsigned long long time; //QUERY(achieving an old-swap startegy?) + long Address(unsigned _blocknum) const; + unsigned Blocknum(long address) const; + unsigned AllocBlock(); + void FreeBlock(unsigned _blocknum); + void ReadAlign(unsigned* _next); + void WriteAlign(unsigned* _next, bool& _SpecialBlock); + +public: + IVStorage(); + IVStorage(std::string& _filepath, std::string& _mode, unsigned* _height, unsigned long long _buffer_size, VList* _vlist); //create a fixed-size file or open an existence + bool preRead(IVNode*& _root, IVNode*& _leaves_head, IVNode*& _leaves_tail); //read and build all nodes, only root in memory + bool readNode(IVNode* _np, long long* _request); //read, if virtual + bool createNode(IVNode*& _np); //use fp to create a new node + //NOTICE(if children and child not exist, build children's Nodes) + bool writeNode(IVNode* _np); + bool readBstr(Bstr* _bp, unsigned* _next); + bool writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock); + bool writeTree(IVNode* _np); + void updateHeap(IVNode* _np, unsigned _rank, bool _inheap) const; + bool request(long long _needmem); //deal with memory request + bool handler(unsigned long long _needmem); //swap some nodes out + //bool update(); //update InMem Node's rank, with clock + ~IVStorage(); + void print(std::string s); //DEBUG +}; + +#endif + diff --git a/KVstore/IVTree/vlist/VList.cpp b/KVstore/IVTree/vlist/VList.cpp new file mode 100644 index 0000000..798b553 --- /dev/null +++ b/KVstore/IVTree/vlist/VList.cpp @@ -0,0 +1,257 @@ +/*============================================================================= +# Filename: VList.cpp +# Author: Bookug Lobert +# Mail: zengli-bookug@pku.edu.cn +# Last Modified: 2017-03-27 15:47 +# Description: +=============================================================================*/ + +#include "VList.h" + +using namespace std; + +VList::VList() +{ //not use ../logs/, notice the location of program + cur_block_num = SET_BLOCK_NUM; + filepath = ""; + freelist = NULL; + treefp = NULL; + minheap = NULL; + max_buffer_size = Util::MAX_BUFFER_SIZE; + heap_size = max_buffer_size / IVNode::INTL_SIZE; + freemem = max_buffer_size; +} + +VList::VList(string& _filepath, unsigned long long _buffer_size) +{ + cur_block_num = SET_BLOCK_NUM; //initialize + this->filepath = _filepath; + if (_mode == string("build")) + treefp = fopen(_filepath.c_str(), "w+b"); + else if (_mode == string("open")) + treefp = fopen(_filepath.c_str(), "r+b"); + else + { + print(string("error in IVStorage: Invalid mode ") + _mode); + return; + } + if (treefp == NULL) + { + print(string("error in IVStorage: Open error ") + _filepath); + return; + } + this->treeheight = _height; //originally set to 0 + this->max_buffer_size = _buffer_size; + this->heap_size = this->max_buffer_size / IVNode::INTL_SIZE; + this->freemem = this->max_buffer_size; + this->freelist = new BlockInfo; //null-head + unsigned i, j, k; //j = (SuperNum-1)*BLOCK_SIZE + BlockInfo* bp; + if (_mode == "build") + { //write basic information + i = 0; + fwrite(&i, sizeof(unsigned), 1, this->treefp); //height + fwrite(&i, sizeof(unsigned), 1, this->treefp); //rootnum + fwrite(&cur_block_num, sizeof(unsigned), 1, this->treefp); //current block num + fseek(this->treefp, BLOCK_SIZE, SEEK_SET); + bp = this->freelist; + j = cur_block_num / 8; + for (i = 0; i < j; ++i) + { + fputc(0, this->treefp); + for (k = 0; k < 8; ++k) + { + bp->next = new BlockInfo(i * 8 + k + 1, NULL); + bp = bp->next; + } + } + } + else //_mode == "open" + { + //read basic information + int rootnum; + char c; + fread(this->treeheight, sizeof(unsigned), 1, this->treefp); + fread(&rootnum, sizeof(unsigned), 1, this->treefp); + fread(&cur_block_num, sizeof(unsigned), 1, this->treefp); + fseek(this->treefp, BLOCK_SIZE, SEEK_SET); + bp = this->freelist; + j = cur_block_num / 8; + for (i = 0; i < j; ++i) + { + c = fgetc(treefp); + for (k = 0; k < 8; ++k) + { + if ((c & (1 << k)) == 0) + { + bp->next = new BlockInfo(i * 8 + 7 - k + 1, NULL); + bp = bp->next; + } + } + } + fseek(treefp, Address(rootnum), SEEK_SET); + //treefp is now ahead of root-block + } + this->minheap = new IVHeap(this->heap_size); +} + +long //8-byte in 64-bit machine +IVStorage::Address(unsigned _blocknum) const //BETTER: inline function +{ + if (_blocknum == 0) + return 0; + else if (_blocknum > cur_block_num) + { + //print(string("error in Address: Invalid blocknum ") + Util::int2string(_blocknum)); + return -1; //address should be non-negative + } + //NOTICE: here should explictly use long + return (long)(this->SuperNum + _blocknum - 1) * (long)BLOCK_SIZE; +} + +unsigned +IVStorage::Blocknum(long address) const +{ + return (address / BLOCK_SIZE) + 1 - this->SuperNum; +} + +unsigned +IVStorage::AllocBlock() +{ + BlockInfo* p = this->freelist->next; + if (p == NULL) + { + for (unsigned i = 0; i < SET_BLOCK_INC; ++i) + { + cur_block_num++; //BETTER: check if > MAX_BLOCK_NUM + this->FreeBlock(cur_block_num); + } + p = this->freelist->next; + } + unsigned t = p->num; + this->freelist->next = p->next; + delete p; + return t; +} + +void +IVStorage::FreeBlock(unsigned _blocknum) +{ //QUERY: head-sub and tail-add will be better? + BlockInfo* bp = new BlockInfo(_blocknum, this->freelist->next); + this->freelist->next = bp; +} + +//NOTICE: all reads are aligned to 4 bytes(including a string) +//a string may acrossseveral blocks + +void +IVStorage::ReadAlign(unsigned* _next) +{ + if (ftell(treefp) % BLOCK_SIZE == 0) + { + fseek(treefp, Address(*_next), SEEK_SET); + fread(_next, sizeof(unsigned), 1, treefp); + } +} + +void +IVStorage::WriteAlign(unsigned* _curnum, bool& _SpecialBlock) +{ + if (ftell(treefp) % BLOCK_SIZE == 0) + { + unsigned blocknum = this->AllocBlock(); + fseek(treefp, Address(*_curnum), SEEK_SET); + if (_SpecialBlock) + { + fseek(treefp, 4, SEEK_CUR); + _SpecialBlock = false; + } + fwrite(&blocknum, sizeof(unsigned), 1, treefp); + fseek(treefp, Address(blocknum) + 4, SEEK_SET); + *_curnum = blocknum; + } +} + +bool +IVStorage::readBstr(Bstr* _bp, unsigned* _next) +{ + //long address; + unsigned len, i, j; + fread(&len, sizeof(unsigned), 1, this->treefp); + this->ReadAlign(_next); + //this->request(len); + char* s = (char*)malloc(len); + _bp->setLen(len); + for (i = 0; i + 4 < len; i += 4) + { + fread(s + i, sizeof(char), 4, treefp); + this->ReadAlign(_next); + } + while (i < len) + { + fread(s + i, sizeof(char), 1, treefp); //BETTER + i++; + } + j = len % 4; + if (j > 0) + j = 4 - j; + fseek(treefp, j, SEEK_CUR); + this->ReadAlign(_next); + _bp->setStr(s); + return true; +} + +bool +IVStorage::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock) +{ + unsigned i, j, len = _bp->getLen(); + fwrite(&len, sizeof(unsigned), 1, treefp); + this->WriteAlign(_curnum, _SpecialBlock); + char* s = _bp->getStr(); + for (i = 0; i + 4 < len; i += 4) + { + fwrite(s + i, sizeof(char), 4, treefp); + this->WriteAlign(_curnum, _SpecialBlock); + } + while (i < len) + { + fwrite(s + i, sizeof(char), 1, treefp); + i++; + } + j = len % 4; + if (j > 0) + j = 4 - j; + fseek(treefp, j, SEEK_CUR); + this->WriteAlign(_curnum, _SpecialBlock); + return true; +} + +VList::~VList() +{ + //release heap and freelist... +#ifdef DEBUG_KVSTORE + printf("now to release the kvstore!\n"); +#endif + BlockInfo* bp = this->freelist; + BlockInfo* next; + while (bp != NULL) + { + next = bp->next; + delete bp; + bp = next; + } +#ifdef DEBUG_KVSTORE + printf("already empty the freelist!\n"); +#endif + delete this->minheap; +#ifdef DEBUG_KVSTORE + printf("already empty the buffer heap!\n"); +#endif + fclose(this->treefp); + //#ifdef DEBUG_KVSTORE + //NOTICE:there is more than one tree + //fclose(Util::debug_kvstore); //NULL is ok! + //Util::debug_kvstore = NULL; + //#endif +} + diff --git a/KVstore/IVTree/vlist/VList.h b/KVstore/IVTree/vlist/VList.h new file mode 100644 index 0000000..61911f1 --- /dev/null +++ b/KVstore/IVTree/vlist/VList.h @@ -0,0 +1,71 @@ +/*============================================================================= +# Filename: VList.h +# Author: Bookug Lobert +# Mail: zengli-bookug@pku.edu.cn +# Last Modified: 2017-03-27 15:40 +# Description: +=============================================================================*/ + +#ifndef _KVSTORE_IVTREE_STORAGE_VLIST_H +#define _KVSTORE_IVTREE_STORAGE_VLIST_H + +#include "../../../Util/Util.h" +#include "../../../Util/Bstr.h" + +//TODO: not keep long list in memory, read each time +//but when can you free the long list(kvstore should release it after parsing) +// +//CONSIDER: if to keep long list in memory, should adjust the bstr in memory: +//unsigned: 0 char*: an object (if in memory, if modified, length, content, block num) +//when reading a long list in a node, generate the object first, and the object will tell you whether +//the list is in mmeory or not + +//BETTER: use two kind of blocks in two files, like 1M and 1G (split the block num into two parts) + +class VList +{ +public: + //NOTICE:the border is 10^6, but the block is larger, 1M + static const unsigned LENGTH_BORDER = 1000000; + static const unsigned BLOCK_SIZE = 1 << 20; //fixed size of disk-block + static const unsigned MAX_BLOCK_NUM = 1 << 16; //max block-num + //below two constants: must can be exactly divided by 8 + static const unsigned SET_BLOCK_NUM = 1 << 8; //initial blocks num + static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc + static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1; + +private: + unsigned long long max_buffer_size; + unsigned cur_block_num; + std::string filepath; + BlockInfo* freelist; + //very long value list are stored in a separate file(with large block) + // + //NOTICE: according to the summary result, 90% value lists are just below 100 bytes + //<10%: 5000000~100M bytes + FILE* valfp; + + //NOTICE: freemem's type is long long here, due to large memory in server. + //However, needmem in handler() and request() is ok to be int/unsigned. + //Because the bstr' size is controlled, so is the node. + unsigned long long freemem; //free memory to use, non-negative + //unsigned long long time; //QUERY(achieving an old-swap startegy?) + long Address(unsigned _blocknum) const; + unsigned Blocknum(long address) const; + unsigned AllocBlock(); + void FreeBlock(unsigned _blocknum); + void ReadAlign(unsigned* _next); + void WriteAlign(unsigned* _next, bool& _SpecialBlock); + +public: + VList(); + VList(std::string& _filepath, unsigned long long _buffer_size);//create a fixed-size file or open an existence + bool readBstr(Bstr* _bp, unsigned* _next); + bool writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock); + bool readValue(unsigned _block_num); + bool writeValue(const Bstr* _bp); + ~VList(); +}; + +#endif + diff --git a/KVstore/KVstore.cpp b/KVstore/KVstore.cpp index 1928799..bd9cd93 100644 --- a/KVstore/KVstore.cpp +++ b/KVstore/KVstore.cpp @@ -355,17 +355,19 @@ bool KVstore::updateInsert_s2values(int _sub_id, int _pre_id, int _obj_id) { int* _tmp = NULL; int _len = 0; bool _get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len); - bool _is_entity = KVstore::isEntity(_obj_id); + bool _is_entity = Util::is_entity_ele(_obj_id); //subID doesn't exist if (!_get) { - int _values[6]; + //int _values[6]; + int* _values = new int[6]; _values[0] = 1; _values[1] = 1; _values[2] = _is_entity ? 1 : 0; _values[3] = _pre_id; _values[4] = 5; _values[5] = _obj_id; + //NOTICE: not use array in stack here, otherwise it will be freed, and data in B+Tree, too this->addValueByKey(this->subID2values, _sub_id, (char*)_values, sizeof(int) * 6); } @@ -444,7 +446,7 @@ bool KVstore::updateInsert_s2values(int _sub_id, int _pre_id, int _obj_id) { } this->setValueByKey(this->subID2values, _sub_id, (char*)_values, sizeof(int) * _values_len); - delete[] _values; + //delete[] _values; } return true; @@ -454,7 +456,7 @@ bool KVstore::updateRemove_s2values(int _sub_id, int _pre_id, int _obj_id) { int* _tmp = NULL; int _len = 0; bool _get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len); - bool _is_entity = KVstore::isEntity(_obj_id); + bool _is_entity = Util::is_entity_ele(_obj_id); if (!_get) { return false; @@ -529,7 +531,7 @@ bool KVstore::updateRemove_s2values(int _sub_id, int _pre_id, int _obj_id) { } this->setValueByKey(this->subID2values, _sub_id, (char*)_values, sizeof(int) * _values_len); - delete[] _values; + //delete[] _values; } return true; @@ -568,7 +570,8 @@ bool KVstore::updateInsert_o2values(int _sub_id, int _pre_id, int _obj_id) { //objID doesn't exist if (!_get) { - int _values[5]; + //int _values[5]; + int* _values = new int[5]; _values[0] = 1; _values[1] = 1; _values[2] = _pre_id; @@ -649,7 +652,7 @@ bool KVstore::updateInsert_o2values(int _sub_id, int _pre_id, int _obj_id) { } this->setValueByKey(this->objID2values, _obj_id, (char*)_values, sizeof(int) * _values_len); - delete[] _values; + //delete[] _values; } return true; @@ -727,7 +730,7 @@ bool KVstore::updateRemove_o2values(int _sub_id, int _pre_id, int _obj_id) { } this->setValueByKey(this->objID2values, _obj_id, (char*)_values, sizeof(int) * _values_len); - delete[] _values; + //delete[] _values; } return true; @@ -766,7 +769,8 @@ bool KVstore::updateInsert_p2values(int _sub_id, int _pre_id, int _obj_id) { //preid doesn't exist if (!_get) { - int _values[3]; + //int _values[3]; + int* _values = new int[3]; _values[0] = 1; _values[1] = _sub_id; _values[2] = _obj_id; @@ -793,7 +797,7 @@ bool KVstore::updateInsert_p2values(int _sub_id, int _pre_id, int _obj_id) { _values[j + _tmp[0] + 1] = _tmp[i + _tmp[0]]; } this->setValueByKey(this->preID2values, _pre_id, (char*)_values, sizeof(int) * _values_len); - delete[] _values; + //delete[] _values; } return true; @@ -831,7 +835,7 @@ bool KVstore::updateRemove_p2values(int _sub_id, int _pre_id, int _obj_id) { _values[j + _tmp[0] - 1] = _tmp[i + _tmp[0]]; } this->setValueByKey(this->preID2values, _pre_id, (char*)_values, sizeof(int) * _values_len); - delete[] _values; + //delete[] _values; } return true; @@ -865,290 +869,452 @@ bool KVstore::updateRemove_p2values(int _preid, const std::vector& _sidoidl //for entity2id //_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE -bool KVstore::open_entity2id(int _mode) { +bool +KVstore::open_entity2id(int _mode) +{ unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { + if (_mode == KVstore::CREATE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_entity2id_build; } - else if (_mode == KVstore::READ_WRITE_MODE) { + else if (_mode == KVstore::READ_WRITE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_entity2id_query; } - else { + else + { cerr << "Invalid open mode in open_entity2id, mode = " << _mode << endl; return false; } + return this->open(this->entity2id, KVstore::s_entity2id, _mode, buffer_size); } -bool KVstore::close_entity2id() { - if (this->entity2id == NULL) { +bool +KVstore::close_entity2id() +{ + if (this->entity2id == NULL) + { return true; } + this->entity2id->save(); delete this->entity2id; this->entity2id = NULL; + return true; } -bool KVstore::subIDByEntity(string _entity) { +bool +KVstore::subIDByEntity(string _entity) +{ + //NOTICE: no need to copy _entity to a char* buffer + //_entity will not be released befor ethis function ends + //so _entity.c_str() is a valid const char* return this->entity2id->remove(_entity.c_str(), _entity.length()); } -int KVstore::getIDByEntity(string _entity) const { +int +KVstore::getIDByEntity(string _entity) const +{ return this->getIDByStr(this->entity2id, _entity.c_str(), _entity.length()); } -bool KVstore::setIDByEntity(string _entity, int _id) { - return this->addValueByKey(this->entity2id, _entity.c_str(), _entity.length(), _id); +bool +KVstore::setIDByEntity(string _entity, int _id) +{ + //int len = _entity.length() + 1; + int len = _entity.length(); + char* str = new char[len]; + memcpy(str, _entity.c_str(), len); + return this->addValueByKey(this->entity2id, str, len, _id); } //for id2entity //_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE -bool KVstore::open_id2entity(int _mode) { +bool +KVstore::open_id2entity(int _mode) +{ unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { + if (_mode == KVstore::CREATE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2entity_build; } - else if (_mode == KVstore::READ_WRITE_MODE) { + else if (_mode == KVstore::READ_WRITE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2entity_query; } - else { + else + { cerr << "Invalid open mode in open_id2entity, mode = " << _mode << endl; return false; } + return this->open(this->id2entity, KVstore::s_id2entity, _mode, buffer_size); } -bool KVstore::close_id2entity() { - if (this->id2entity == NULL) { +bool +KVstore::close_id2entity() +{ + if (this->id2entity == NULL) + { return true; } + this->id2entity->save(); delete this->id2entity; this->id2entity = NULL; + return true; } -bool KVstore::subEntityByID(int _id) { +bool +KVstore::subEntityByID(int _id) +{ return this->id2entity->remove(_id); } -string KVstore::getEntityByID(int _id) const { +string +KVstore::getEntityByID(int _id) const +{ char* _tmp = NULL; int _len = 0; + bool _get = this->getValueByKey(this->id2entity, _id, _tmp, _len); - if (!_get) { + if (!_get) + { return ""; } - string _ret = string(_tmp); + + //NOTICE: no need to add \0 at last if we indicate the length + string _ret = string(_tmp, _len); + return _ret; } -bool KVstore::setEntityByID(int _id, string _entity) { - return this->addValueByKey(this->id2entity, _id, _entity.c_str(), _entity.length()); +bool +KVstore::setEntityByID(int _id, string _entity) +{ + //int len = _entity.length() + 1; + int len = _entity.length(); + char* str = new char[len]; + memcpy(str, _entity.c_str(), len); + + return this->addValueByKey(this->id2entity, _id, str, len); } //for predicate2id //_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE -bool KVstore::open_predicate2id(int _mode) { +bool +KVstore::open_predicate2id(int _mode) +{ unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { + if (_mode == KVstore::CREATE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_predicate2id_build; } - else if (_mode == KVstore::READ_WRITE_MODE) { + else if (_mode == KVstore::READ_WRITE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_predicate2id_query; } - else { + else + { cerr << "Invalid open mode in open_predicate2id, mode = " << _mode << endl; return false; } + return this->open(this->predicate2id, KVstore::s_predicate2id, _mode, buffer_size); } -bool KVstore::close_predicate2id() { - if (this->predicate2id == NULL) { +bool +KVstore::close_predicate2id() +{ + if (this->predicate2id == NULL) + { return true; } + this->predicate2id->save(); delete this->predicate2id; this->predicate2id = NULL; + return true; } -bool KVstore::subIDByPredicate(string _predicate) { +bool +KVstore::subIDByPredicate(string _predicate) +{ return this->predicate2id->remove(_predicate.c_str(), _predicate.length()); } -int KVstore::getIDByPredicate(string _predicate) const { +int +KVstore::getIDByPredicate(string _predicate) const +{ return this->getIDByStr(this->predicate2id, _predicate.c_str(), _predicate.length()); } -bool KVstore::setIDByPredicate(string _predicate, int _id) { - return this->addValueByKey(this->predicate2id, _predicate.c_str(), _predicate.length(), _id); +bool +KVstore::setIDByPredicate(string _predicate, int _id) +{ + //int len = _predicate.length() + 1; + int len = _predicate.length(); + char* str = new char[len]; + memcpy(str, _predicate.c_str(), len); + return this->addValueByKey(this->predicate2id, str, len, _id); } //for id2predicate //_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE -bool KVstore::open_id2predicate(int _mode) { +bool +KVstore::open_id2predicate(int _mode) +{ unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { + if (_mode == KVstore::CREATE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2predicate_build; } - else if (_mode == KVstore::READ_WRITE_MODE) { + else if (_mode == KVstore::READ_WRITE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2predicate_query; } - else { + else + { cerr << "Invalid open mode in open_id2predicate, mode = " << _mode << endl; return false; } + return this->open(this->id2predicate, KVstore::s_id2predicate, _mode, buffer_size); } -bool KVstore::close_id2predicate() { - if (this->id2predicate == NULL) { +bool +KVstore::close_id2predicate() +{ + if (this->id2predicate == NULL) + { return true; } + this->id2predicate->save(); delete this->id2predicate; this->id2predicate = NULL; + return true; } -bool KVstore::subPredicateByID(int _id) { +bool +KVstore::subPredicateByID(int _id) +{ return this->id2predicate->remove(_id); } -string KVstore::getPredicateByID(int _id) const { +string +KVstore::getPredicateByID(int _id) const +{ char* _tmp = NULL; int _len = 0; + bool _get = this->getValueByKey(this->id2predicate, _id, _tmp, _len); - if (!_get) { + if (!_get) + { return ""; } - string _ret = string(_tmp); + string _ret = string(_tmp, _len); + return _ret; } -bool KVstore::setPredicateByID(int _id, string _predicate) { - return this->addValueByKey(this->id2predicate, _id, _predicate.c_str(), _predicate.length()); +bool +KVstore::setPredicateByID(int _id, string _predicate) +{ + //int len = _predicate.length() + 1; + int len = _predicate.length(); + char* str = new char[len]; + memcpy(str, _predicate.c_str(), len); + + return this->addValueByKey(this->id2predicate, _id, str, len); } //for literal2id //_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE -bool KVstore::open_literal2id(int _mode) { +bool +KVstore::open_literal2id(int _mode) +{ unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { + if (_mode == KVstore::CREATE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_literal2id_build; } - else if (_mode == KVstore::READ_WRITE_MODE) { + else if (_mode == KVstore::READ_WRITE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_literal2id_query; } - else { + else + { cerr << "Invalid open mode in open_literal2id, mode = " << _mode << endl; return false; } + return this->open(this->literal2id, KVstore::s_literal2id, _mode, buffer_size); } -bool KVstore::close_literal2id() { - if (this->literal2id == NULL) { +bool +KVstore::close_literal2id() +{ + if (this->literal2id == NULL) + { return true; } + this->literal2id->save(); delete this->literal2id; this->literal2id = NULL; + return true; } -bool KVstore::subIDByLiteral(string _literal) { +bool +KVstore::subIDByLiteral(string _literal) +{ return this->literal2id->remove(_literal.c_str(), _literal.length()); } -int KVstore::getIDByLiteral(string _literal) const { +int +KVstore::getIDByLiteral(string _literal) const +{ return this->getIDByStr(this->literal2id, _literal.c_str(), _literal.length()); } -bool KVstore::setIDByLiteral(string _literal, int _id) { - return this->addValueByKey(this->literal2id, _literal.c_str(), _literal.length(), _id); +bool +KVstore::setIDByLiteral(string _literal, int _id) +{ + //int len = _literal.length() + 1; + int len = _literal.length(); + char* str = new char[len]; + memcpy(str, _literal.c_str(), len); + + return this->addValueByKey(this->literal2id, str, len, _id); } //for id2literal //_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE -bool KVstore::open_id2literal(int _mode) { +bool +KVstore::open_id2literal(int _mode) +{ unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { + if (_mode == KVstore::CREATE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2literal_build; } - else if (_mode == KVstore::READ_WRITE_MODE) { + else if (_mode == KVstore::READ_WRITE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2literal_query; } - else { + else + { cerr << "Invalid open mode in open_id2literal, mode = " << _mode << endl; return false; } + return this->open(this->id2literal, KVstore::s_id2literal, _mode, buffer_size); } -bool KVstore::close_id2literal() { - if (this->id2literal == NULL) { +bool +KVstore::close_id2literal() +{ + if (this->id2literal == NULL) + { return true; } + this->id2literal->save(); delete this->id2literal; this->id2literal = NULL; + return true; } -bool KVstore::subLiteralByID(int _id) { +bool +KVstore::subLiteralByID(int _id) +{ return this->id2literal->remove(_id); } -string KVstore::getLiteralByID(int _id) const { +string +KVstore::getLiteralByID(int _id) const +{ char* _tmp = NULL; int _len = 0; + bool _get = this->getValueByKey(this->id2literal, _id, _tmp, _len); - if (!_get) { + if (!_get) + { //NOTICE:here assumes that all literals cannot be empty: "" return ""; } - string _ret = string(_tmp); + string _ret = string(_tmp, _len); + return _ret; } -bool KVstore::setLiteralByID(int _id, string _literal) { - return this->addValueByKey(this->id2literal, _id, _literal.c_str(), _literal.length()); +bool +KVstore::setLiteralByID(int _id, string _literal) +{ + //int len = _literal.length() + 1; + int len = _literal.length(); + char* str = new char[len]; + memcpy(str, _literal.c_str(), len); + + return this->addValueByKey(this->id2literal, _id, str, len); } -bool KVstore::open_subID2values(int _mode) { +bool +KVstore::open_subID2values(int _mode) +{ unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { + if (_mode == KVstore::CREATE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_sID2values_build; } - else if (_mode == KVstore::READ_WRITE_MODE) { + else if (_mode == KVstore::READ_WRITE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_sID2values_query; } - else { + else + { cerr << "Invalid open mode in open_subID2values, mode = " << _mode << endl; return false; } + return this->open(this->subID2values, KVstore::s_sID2values, _mode, buffer_size); } -bool KVstore::close_subID2values() { - if (this->subID2values == NULL) { +bool +KVstore::close_subID2values() +{ + if (this->subID2values == NULL) + { return true; } + this->subID2values->save(); delete this->subID2values; this->subID2values = NULL; + return true; } -bool KVstore::build_subID2values(int** _p_id_tuples, int _triples_num) { +//STRUCT of s2xx: triple_number pre_num entity_border p1 offset1 p2 offset2 ... pn offsetn +//p1-list(in offset1) p2-list(in offset2) ... pn-list(in offsetn) +//(the final whole list is a unsorted olist) +bool +KVstore::build_subID2values(int** _p_id_tuples, int _triples_num) +{ cout << "Begin building subID2values..." << endl; //qsort(_p_id_tuples, _triples_num, sizeof(int*), Util::_spo_cmp); vector _oidlist_s; vector _pidoffsetlist_s; + //NOTICE: this is used for entity-literal border, but not used now + //it is only set for the whole olist in s2po, not for sp2o int _entity_num = 0; //true means the next sub is a different one from the current one @@ -1160,10 +1326,12 @@ bool KVstore::build_subID2values(int** _p_id_tuples, int _triples_num) { this->open_subID2values(KVstore::CREATE_MODE); - for (int i = 0; i < _triples_num; i++) { - if (i + 1 == _triples_num || _p_id_tuples[i][0] != _p_id_tuples[i + 1][0] - || _p_id_tuples[i][1] != _p_id_tuples[i + 1][1] || _p_id_tuples[i][2] != _p_id_tuples[i + 1][2]) { - if (_sub_change) { + for (int i = 0; i < _triples_num; i++) + { + if (i + 1 == _triples_num || _p_id_tuples[i][0] != _p_id_tuples[i + 1][0] || _p_id_tuples[i][1] != _p_id_tuples[i + 1][1] || _p_id_tuples[i][2] != _p_id_tuples[i + 1][2]) + { + if (_sub_change) + { _pidoffsetlist_s.clear(); _oidlist_s.clear(); _entity_num = 0; @@ -1173,13 +1341,15 @@ bool KVstore::build_subID2values(int** _p_id_tuples, int _triples_num) { int _pre_id = _p_id_tuples[i][1]; int _obj_id = _p_id_tuples[i][2]; - if (_sub_pre_change) { + if (_sub_pre_change) + { _pidoffsetlist_s.push_back(_pre_id); _pidoffsetlist_s.push_back(_oidlist_s.size()); } _oidlist_s.push_back(_obj_id); - if (KVstore::isEntity(_obj_id)) { + if (Util::is_entity_ele(_obj_id)) + { _entity_num++; } @@ -1187,8 +1357,10 @@ bool KVstore::build_subID2values(int** _p_id_tuples, int _triples_num) { _pre_change = (i + 1 == _triples_num) || (_p_id_tuples[i][1] != _p_id_tuples[i + 1][1]); _sub_pre_change = _sub_change || _pre_change; - if (_sub_change) { - for (unsigned j = 1; j < _pidoffsetlist_s.size(); j += 2) { + if (_sub_change) + { + for (unsigned j = 1; j < _pidoffsetlist_s.size(); j += 2) + { _pidoffsetlist_s[j] += 3 + _pidoffsetlist_s.size(); } int* _entrylist_s = new int[3 + _pidoffsetlist_s.size() + _oidlist_s.size()]; @@ -1200,28 +1372,35 @@ bool KVstore::build_subID2values(int** _p_id_tuples, int _triples_num) { _entrylist_s[2] = _entity_num; unsigned j, k; //pidoffsetlist - for (j = 3, k = 0; k < _pidoffsetlist_s.size(); j++, k++) { + for (j = 3, k = 0; k < _pidoffsetlist_s.size(); j++, k++) + { _entrylist_s[j] = _pidoffsetlist_s[k]; } //unsorted oidlist - for (k = 0; k < _oidlist_s.size(); j++, k++) { + for (k = 0; k < _oidlist_s.size(); j++, k++) + { _entrylist_s[j] = _oidlist_s[k]; } this->addValueByKey(this->subID2values, _sub_id, (char*)_entrylist_s, sizeof(int) * j); - delete[] _entrylist_s; + //delete[] _entrylist_s; } } } this->close_subID2values(); cout << "Finished building subID2values" << endl; + return true; } -bool KVstore::getpreIDlistBysubID(int _subid, int*& _preidlist, int& _list_len, bool _no_duplicate) const { +//TODO: for long list in all get functions, should free the long list +//the 0th element can be used to identify if is the long list +bool +KVstore::getpreIDlistBysubID(int _subid, int*& _preidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getpreIDlistBysubID " << _subid << endl; - if (!isEntity(_subid)) { + if (!Util::is_entity_ele(_subid)) { _preidlist = NULL; _list_len = 0; return false; @@ -1229,22 +1408,28 @@ bool KVstore::getpreIDlistBysubID(int _subid, int*& _preidlist, int& _list_len, int* _tmp = NULL; int _len = 0; bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); - if (!_get) { + + if (!_get) + { _preidlist = NULL; _list_len = 0; return false; } + _list_len = _tmp[1]; _preidlist = new int[_list_len]; for (int i = 0; i < _list_len; i++) { _preidlist[i] = _tmp[2 * i + 3]; } + return true; } -bool KVstore::getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getobjIDlistBysubID " << _subid << endl; - if (!isEntity(_subid)) { + if (!Util::is_entity_ele(_subid)) { _objidlist = NULL; _list_len = 0; return false; @@ -1252,11 +1437,13 @@ bool KVstore::getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len, int* _tmp = NULL; int _len = 0; bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); - if (!_get) { + if (!_get) + { _objidlist = NULL; _list_len = 0; return false; } + _list_len = _tmp[0]; _objidlist = new int[_list_len]; memcpy(_objidlist, _tmp + 3 + 2 * _tmp[1], sizeof(int) * _list_len); @@ -1264,16 +1451,20 @@ bool KVstore::getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len, if (_no_duplicate) { _list_len = Util::removeDuplicate(_objidlist, _list_len); } + return true; } -bool KVstore::getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getobjIDlistBysubIDpreID " << _subid << ' ' << _preid << endl; - if (!isEntity(_subid)) { + if (!Util::is_entity_ele(_subid)) { _objidlist = NULL; _list_len = 0; return false; } + int* _tmp = NULL; int _len = 0; bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); @@ -1282,12 +1473,14 @@ bool KVstore::getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist, _list_len = 0; return false; } + int _result = KVstore::binarySearch(_preid, _tmp + 3, _tmp[1], 2); if (_result == -1) { _objidlist = NULL; _list_len = 0; return false; } + int _offset = _tmp[4 + 2 * _result]; int _offset_next; if (_result == _tmp[1] - 1) { @@ -1299,16 +1492,21 @@ bool KVstore::getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist, _list_len = _offset_next - _offset; _objidlist = new int[_list_len]; memcpy(_objidlist, _tmp + _offset, sizeof(int) * _list_len); + return true; } -bool KVstore::getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getpreIDobjIDlistBysubID " << _subid << endl; - if (!isEntity(_subid)) { + if (!Util::is_entity_ele(_subid)) + { _preid_objidlist = NULL; _list_len = 0; return false; } + int* _tmp = NULL; int _len = 0; bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); @@ -1317,6 +1515,7 @@ bool KVstore::getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _list_len = 0; return false; } + _list_len = 2 * _tmp[0]; _preid_objidlist = new int[_list_len]; int _offset_next; @@ -1333,35 +1532,53 @@ bool KVstore::getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _preid_objidlist[2 * j + 1] = _tmp[3 + 2 * _tmp[1] + j]; } } + return true; } -bool KVstore::open_objID2values(int _mode) { +bool +KVstore::open_objID2values(int _mode) +{ unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { + if (_mode == KVstore::CREATE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_oID2values_build; } - else if (_mode == KVstore::READ_WRITE_MODE) { + else if (_mode == KVstore::READ_WRITE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_oID2values_query; } - else { + else + { cerr << "Invalid open mode in open_objID2values, mode = " << _mode << endl; return false; } + return this->open(this->objID2values, KVstore::s_oID2values, _mode, buffer_size); } -bool KVstore::close_objID2values() { - if (this->objID2values == NULL) { +bool +KVstore::close_objID2values() +{ + if (this->objID2values == NULL) + { return true; } + this->objID2values->save(); delete this->objID2values; this->objID2values = NULL; + return true; } -bool KVstore::build_objID2values(int** _p_id_tuples, int _triples_num) { +//NOTICE: do not need entity border here, because no literal in o2pslist +//STRUCT of o2xx: triple_number pre_num p1 offset1 p2 offset2 ... pn offsetn +//p1-list(in offset1) p2-list(in offset2) ... pn-list(in offsetn) +//(the final whole list is a unsorted slist) +bool +KVstore::build_objID2values(int** _p_id_tuples, int _triples_num) +{ cout << "Begin building objID2values..." << endl; //qsort(_p_id_tuples, _triples_num, sizeof(int*), Util::_ops_cmp); vector _sidlist_o; @@ -1376,9 +1593,10 @@ bool KVstore::build_objID2values(int** _p_id_tuples, int _triples_num) { this->open_objID2values(KVstore::CREATE_MODE); - for (int i = 0; i < _triples_num; i++) { - if (i + 1 == _triples_num || _p_id_tuples[i][2] != _p_id_tuples[i + 1][2] - || _p_id_tuples[i][1] != _p_id_tuples[i + 1][1] || _p_id_tuples[i][0] != _p_id_tuples[i + 1][0]) { + for (int i = 0; i < _triples_num; i++) + { + if (i + 1 == _triples_num || _p_id_tuples[i][2] != _p_id_tuples[i + 1][2] || _p_id_tuples[i][1] != _p_id_tuples[i + 1][1] || _p_id_tuples[i][0] != _p_id_tuples[i + 1][0]) + { if (_obj_change) { _pidoffsetlist_o.clear(); _sidlist_o.clear(); @@ -1418,7 +1636,7 @@ bool KVstore::build_objID2values(int** _p_id_tuples, int _triples_num) { _entrylist_o[j] = _sidlist_o[k]; } this->addValueByKey(this->objID2values, _obj_id, (char*)_entrylist_o, sizeof(int) * j); - delete[] _entrylist_o; + //delete[] _entrylist_o; } } } @@ -1428,7 +1646,9 @@ bool KVstore::build_objID2values(int** _p_id_tuples, int _triples_num) { return true; } -bool KVstore::getpreIDlistByobjID(int _objid, int*& _preidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getpreIDlistByobjID(int _objid, int*& _preidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getpreIDlistByobjID " << _objid << endl; int* _tmp = NULL; int _len = 0; @@ -1443,10 +1663,13 @@ bool KVstore::getpreIDlistByobjID(int _objid, int*& _preidlist, int& _list_len, for (int i = 0; i < _list_len; i++) { _preidlist[i] = _tmp[2 * i + 2]; } + return true; } -bool KVstore::getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getsubIDlistByobjID " << _objid << endl; int* _tmp = NULL; int _len = 0; @@ -1456,6 +1679,7 @@ bool KVstore::getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len, _list_len = 0; return false; } + _list_len = _tmp[0]; _subidlist = new int[_list_len]; memcpy(_subidlist, _tmp + 2 + 2 * _tmp[1], sizeof(int) * _list_len); @@ -1463,10 +1687,13 @@ bool KVstore::getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len, if (_no_duplicate) { _list_len = Util::removeDuplicate(_subidlist, _list_len); } + return true; } -bool KVstore::getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getsubIDlistByobjIDpreID " << _objid << ' ' << _preid << endl; int* _tmp = NULL; int _len = 0; @@ -1476,12 +1703,14 @@ bool KVstore::getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, _list_len = 0; return false; } + int _result = KVstore::binarySearch(_preid, _tmp + 2, _tmp[1], 2); if (_result == -1) { _subidlist = NULL; _list_len = 0; return false; } + int _offset = _tmp[3 + 2 * _result]; int _offset_next; if (_result == _tmp[1] - 1) { @@ -1493,10 +1722,13 @@ bool KVstore::getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, _list_len = _offset_next - _offset; _subidlist = new int[_list_len]; memcpy(_subidlist, _tmp + _offset, sizeof(int) * _list_len); + return true; } -bool KVstore::getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getpreIDsubIDlistByobjID " << _objid << endl; int* _tmp = NULL; int _len = 0; @@ -1506,6 +1738,7 @@ bool KVstore::getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _list_len = 0; return false; } + _list_len = 2 * _tmp[0]; _preid_subidlist = new int[_list_len]; int _offset_next; @@ -1522,10 +1755,13 @@ bool KVstore::getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _preid_subidlist[2 * j + 1] = _tmp[2 + 2 * _tmp[1] + j]; } } + return true; } -bool KVstore::open_preID2values(int _mode) { +bool +KVstore::open_preID2values(int _mode) +{ unsigned long long buffer_size; if (_mode == KVstore::CREATE_MODE) { buffer_size = Util::MAX_BUFFER_SIZE * buffer_pID2values_build; @@ -1540,17 +1776,25 @@ bool KVstore::open_preID2values(int _mode) { return this->open(this->preID2values, KVstore::s_pID2values, _mode, buffer_size); } -bool KVstore::close_preID2values() { +bool +KVstore::close_preID2values() +{ if (this->preID2values == NULL) { return true; } + this->preID2values->save(); delete this->preID2values; this->preID2values = NULL; + return true; } -bool KVstore::build_preID2values(int** _p_id_tuples, int _triples_num) { +//NOTICE: if we sort sidlist, then oidlist is not sorted; otherwise if we sort oidlist, then sidlist is not sorted +//STRUCT of p2xx: triple_number sidlist oidlist(not sorted, linked with sidlist one by one) +bool +KVstore::build_preID2values(int** _p_id_tuples, int _triples_num) +{ cout << "Begin building preID2values..." << endl; //qsort(_p_id_tuples, _triples_num, sizeof(int*), Util::_pso_cmp); vector _sidlist_p; @@ -1561,9 +1805,10 @@ bool KVstore::build_preID2values(int** _p_id_tuples, int _triples_num) { this->open_preID2values(KVstore::CREATE_MODE); - for (int i = 0; i < _triples_num; i++) { - if (i + 1 == _triples_num || _p_id_tuples[i][0] != _p_id_tuples[i + 1][0] - || _p_id_tuples[i][1] != _p_id_tuples[i + 1][1] || _p_id_tuples[i][2] != _p_id_tuples[i + 1][2]) { + for (int i = 0; i < _triples_num; i++) + { + if (i + 1 == _triples_num || _p_id_tuples[i][0] != _p_id_tuples[i + 1][0] || _p_id_tuples[i][1] != _p_id_tuples[i + 1][1] || _p_id_tuples[i][2] != _p_id_tuples[i + 1][2]) + { if (_pre_change) { _sidlist_p.clear(); _oidlist_p.clear(); @@ -1592,7 +1837,7 @@ bool KVstore::build_preID2values(int** _p_id_tuples, int _triples_num) { _entrylist_p[j] = _oidlist_p[k]; } this->addValueByKey(this->preID2values, _pre_id, (char*)_entrylist_p, sizeof(int) * j); - delete[] _entrylist_p; + //delete[] _entrylist_p; } } } @@ -1602,7 +1847,9 @@ bool KVstore::build_preID2values(int** _p_id_tuples, int _triples_num) { return true; } -bool KVstore::getsubIDlistBypreID(int _preid, int*& _subidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getsubIDlistBypreID(int _preid, int*& _subidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getsubIDlistBypreID " << _preid << endl; int* _tmp = NULL; int _len = 0; @@ -1612,16 +1859,20 @@ bool KVstore::getsubIDlistBypreID(int _preid, int*& _subidlist, int& _list_len, _list_len = 0; return false; } + _list_len = _tmp[0]; _subidlist = new int[_list_len]; memcpy(_subidlist, _tmp + 1, sizeof(int) * _list_len); if (_no_duplicate) { _list_len = Util::removeDuplicate(_subidlist, _list_len); } + return true; } -bool KVstore::getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getobjIDlistBypreID " << _preid << endl; int* _tmp = NULL; int _len = 0; @@ -1631,6 +1882,7 @@ bool KVstore::getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len, _list_len = 0; return false; } + _list_len = _tmp[0]; _objidlist = new int[_list_len]; memcpy(_objidlist, _tmp + 1 + _tmp[0], sizeof(int) * _list_len); @@ -1638,10 +1890,13 @@ bool KVstore::getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len, if (_no_duplicate) { _list_len = Util::removeDuplicate(_objidlist, _list_len); } + return true; } -bool KVstore::getsubIDobjIDlistBypreID(int _preid, int*& _subid_objidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getsubIDobjIDlistBypreID(int _preid, int*& _subid_objidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getsubIDobjIDlistBypreID " << _preid << endl; int* _tmp = NULL; int _len = 0; @@ -1657,10 +1912,13 @@ bool KVstore::getsubIDobjIDlistBypreID(int _preid, int*& _subid_objidlist, int& _subid_objidlist[2 * i] = _tmp[1 + i]; _subid_objidlist[2 * i + 1] = _tmp[1 + _tmp[0] + i]; } + return true; } -bool KVstore::getpreIDlistBysubIDobjID(int _subid, int _objid, int*& _preidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getpreIDlistBysubIDobjID(int _subid, int _objid, int*& _preidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getpreIDlistBysubIDobjID " << _subid << ' ' << _objid << endl; int *list1 = NULL, *list2 = NULL; int len1 = 0, len2 = 0; @@ -1669,11 +1927,13 @@ bool KVstore::getpreIDlistBysubIDobjID(int _subid, int _objid, int*& _preidlist, _list_len = 0; return false; } + if (!this->getpreIDlistByobjID(_objid, list2, len2, true)) { _preidlist = NULL; _list_len = 0; return false; } + vector list = KVstore::intersect(list1, list2, len1, len2); delete[] list1; delete[] list2; @@ -1683,6 +1943,7 @@ bool KVstore::getpreIDlistBysubIDobjID(int _subid, int _objid, int*& _preidlist, _list_len = 0; return false; } + int* _tmp = NULL; int _len = 0; this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); @@ -1715,10 +1976,12 @@ bool KVstore::getpreIDlistBysubIDobjID(int _subid, int _objid, int*& _preidlist, _list_len--; } } + if (_list_len == 0) { _preidlist = NULL; return false; } + _preidlist = new int[_list_len]; int i = 0, j = 0; while (i < len) { @@ -1731,11 +1994,14 @@ bool KVstore::getpreIDlistBysubIDobjID(int _subid, int _objid, int*& _preidlist, i++; } } + return true; } -bool KVstore::open(SITree*& _p_btree, string _tree_name, int _mode, unsigned long long _buffer_size) { +bool +KVstore::open(SITree*& _p_btree, string _tree_name, int _mode, unsigned long long _buffer_size) +{ if (_p_btree != NULL) { return false; } @@ -1754,7 +2020,9 @@ bool KVstore::open(SITree*& _p_btree, string _tree_name, int _mode, unsigned lon return true; } -bool KVstore::open(ISTree*& _p_btree, string _tree_name, int _mode, unsigned long long _buffer_size) { +bool +KVstore::open(ISTree*& _p_btree, string _tree_name, int _mode, unsigned long long _buffer_size) +{ if (_p_btree != NULL) { return false; } @@ -1773,43 +2041,112 @@ bool KVstore::open(ISTree*& _p_btree, string _tree_name, int _mode, unsigned lon return true; } -void KVstore::flush(SITree* _p_btree) { +bool +KVstore::open(IVTree*& _p_btree, string _tree_name, int _mode, unsigned long long _buffer_size) +{ if (_p_btree != NULL) { + return false; + } + string smode; + if (_mode == KVstore::CREATE_MODE) { + smode = "build"; + } + else if (_mode == KVstore::READ_WRITE_MODE) { + smode = "open"; + } + else { + cerr << "Invalid open mode of: " << _tree_name << " mode = " << _mode << endl; + return false; + } + _p_btree = new IVTree(this->store_path, _tree_name, smode, _buffer_size); + + return true; +} + +void +KVstore::flush(SITree* _p_btree) +{ + if (_p_btree != NULL) + { _p_btree->save(); } } -void KVstore::flush(ISTree* _p_btree) { - if (_p_btree != NULL) { +void +KVstore::flush(ISTree* _p_btree) +{ + if (_p_btree != NULL) + { _p_btree->save(); } } -bool KVstore::addValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val) { +void +KVstore::flush(IVTree* _p_btree) +{ + if (_p_btree != NULL) + { + _p_btree->save(); + } +} + +bool +KVstore::addValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val) +{ return _p_btree->insert(_key, _klen, _val); } -bool KVstore::addValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen) { +bool +KVstore::addValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen) +{ return _p_btree->insert(_key, _val, _vlen); } -bool KVstore::setValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val) { +bool +KVstore::addValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen) +{ + return _p_btree->insert(_key, _val, _vlen); +} + +bool +KVstore::setValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val) +{ return _p_btree->modify(_key, _klen, _val); } -bool KVstore::setValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen) { +bool +KVstore::setValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen) +{ return _p_btree->modify(_key, _val, _vlen); } -bool KVstore::getValueByKey(SITree* _p_btree, const char* _key, int _klen, int* _val) const { +bool +KVstore::setValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen) +{ + return _p_btree->modify(_key, _val, _vlen); +} + +bool +KVstore::getValueByKey(SITree* _p_btree, const char* _key, int _klen, int* _val) const +{ return _p_btree->search(_key, _klen, _val); } -bool KVstore::getValueByKey(ISTree* _p_btree, int _key, char*& _val, int& _vlen) const { +bool +KVstore::getValueByKey(ISTree* _p_btree, int _key, char*& _val, int& _vlen) const +{ return _p_btree->search(_key, _val, _vlen); } -int KVstore::getIDByStr(SITree* _p_btree, const char* _key, int _klen) const { +bool +KVstore::getValueByKey(IVTree* _p_btree, int _key, char*& _val, int& _vlen) const +{ + return _p_btree->search(_key, _val, _vlen); +} + +int +KVstore::getIDByStr(SITree* _p_btree, const char* _key, int _klen) const +{ int val = 0; bool ret = _p_btree->search(_key, _klen, &val); if (!ret) @@ -1819,15 +2156,27 @@ int KVstore::getIDByStr(SITree* _p_btree, const char* _key, int _klen) const { return val; } -bool KVstore::removeKey(SITree* _p_btree, const char* _key, int _klen) { +bool +KVstore::removeKey(SITree* _p_btree, const char* _key, int _klen) +{ return _p_btree->remove(_key, _klen); } -bool KVstore::removeKey(ISTree* _p_btree, int _key) { +bool +KVstore::removeKey(ISTree* _p_btree, int _key) +{ return _p_btree->remove(_key); } -vector KVstore::intersect(const int* _list1, const int* _list2, int _len1, int _len2) { +bool +KVstore::removeKey(IVTree* _p_btree, int _key) +{ + return _p_btree->remove(_key); +} + +vector +KVstore::intersect(const int* _list1, const int* _list2, int _len1, int _len2) +{ int i = 0, j = 0; vector ret; while (i < _len1 && j < _len2) { @@ -1846,7 +2195,9 @@ vector KVstore::intersect(const int* _list1, const int* _list2, int _len1, return ret; } -int KVstore::binarySearch(int _key, const int* _list, int _list_len, int _step) { +int +KVstore::binarySearch(int _key, const int* _list, int _list_len, int _step) +{ int _left = 0; int _right = _list_len - 1; int _mid; @@ -1862,13 +2213,11 @@ int KVstore::binarySearch(int _key, const int* _list, int _list_len, int _step) _left = _mid + 1; } } + return -1; } -bool KVstore::isEntity(int id) { - return id < Util::LITERAL_FIRST_ID; -} - +//TODO: better to adjust these parameters according to memory usage and entity num string KVstore::s_entity2id = "s_entity2id"; string KVstore::s_id2entity = "s_id2entity"; unsigned short KVstore::buffer_entity2id_build = 8; @@ -1899,3 +2248,4 @@ unsigned short KVstore::buffer_pID2values_build = 16; unsigned short KVstore::buffer_sID2values_query = 16; unsigned short KVstore::buffer_oID2values_query = 16; unsigned short KVstore::buffer_pID2values_query = 8; + diff --git a/KVstore/KVstore.h b/KVstore/KVstore.h index dfdfd77..c74940b 100644 --- a/KVstore/KVstore.h +++ b/KVstore/KVstore.h @@ -164,9 +164,9 @@ private: static unsigned short buffer_literal2id_query; static unsigned short buffer_id2literal_query; - ISTree* subID2values; - ISTree* objID2values; - ISTree* preID2values; + IVTree* subID2values; + IVTree* objID2values; + IVTree* preID2values; static std::string s_sID2values; static std::string s_oID2values; static std::string s_pID2values; @@ -181,23 +181,29 @@ private: bool open(SITree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size); bool open(ISTree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size); + bool open(IVTree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size); void flush(SITree* _p_btree); void flush(ISTree* _p_btree); + void flush(IVTree* _p_btree); bool addValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val); bool addValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen); + bool addValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen); bool setValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val); bool setValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen); + bool setValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen); bool getValueByKey(SITree* _p_btree, const char* _key, int _klen, int* _val) const; bool getValueByKey(ISTree* _p_btree, int _key, char*& _val, int& _vlen) const; + bool getValueByKey(IVTree* _p_btree, int _key, char*& _val, int& _vlen) const; int getIDByStr(SITree* _p_btree, const char* _key, int _klen) const; bool removeKey(SITree* _p_btree, const char* _key, int _klen); bool removeKey(ISTree* _p_btree, int _key); + bool removeKey(IVTree* _p_btree, int _key); static std::vector intersect(const int* _list1, const int* _list2, int _len1, int _len2); static int binarySearch(int key, const int* _list, int _list_len, int step = 1); diff --git a/KVstore/SITree/SITree.cpp b/KVstore/SITree/SITree.cpp index 70245cf..1dfef4d 100644 --- a/KVstore/SITree/SITree.cpp +++ b/KVstore/SITree/SITree.cpp @@ -20,7 +20,7 @@ SITree::SITree() TSM = NULL; storepath = ""; filename = ""; - transfer_size[0] = transfer_size[1] = transfer_size[2] = 0; + //transfer_size[0] = transfer_size[1] = transfer_size[2] = 0; this->request = 0; } @@ -36,10 +36,10 @@ SITree::SITree(string _storepath, string _filename, string _mode, unsigned long this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail); else this->root = NULL; - this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE)); - this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE)); - this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE)); - this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M + //this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M this->request = 0; } @@ -49,30 +49,30 @@ SITree::getFilePath() return storepath + "/" + filename; } -void //WARN: not check _str and _len -SITree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index) -{ - if (_index > 2) - return; - /* - if(_str == NULL || _len == 0) - { - printf("error in CopyToTransfer: empty string\n"); - return; - } - */ - //unsigned length = _bstr->getLen(); - unsigned length = _len; - if (length + 1 > this->transfer_size[_index]) - { - transfer[_index].release(); - transfer[_index].setStr((char*)malloc(length + 1)); - this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0 - } - memcpy(this->transfer[_index].getStr(), _str, length); - this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore - this->transfer[_index].setLen(length); -} +//void //WARN: not check _str and _len +//SITree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index) +//{ + //if (_index > 2) + //return; + //[> + //if(_str == NULL || _len == 0) + //{ + //printf("error in CopyToTransfer: empty string\n"); + //return; + //} + //*/ + ////unsigned length = _bstr->getLen(); + //unsigned length = _len; + //if (length + 1 > this->transfer_size[_index]) + //{ + //transfer[_index].release(); + //transfer[_index].setStr((char*)malloc(length + 1)); + //this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0 + //} + //memcpy(this->transfer[_index].getStr(), _str, length); + //this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore + //this->transfer[_index].setLen(length); +//} unsigned SITree::getHeight() const @@ -110,20 +110,26 @@ SITree::search(const char* _str, unsigned _len, int* _val) *_val = -1; return false; } - this->CopyToTransfer(_str, _len, 1); + //this->CopyToTransfer(_str, _len, 1); request = 0; - Bstr bstr = this->transfer[1]; //not to modify its memory + //Bstr bstr = this->transfer[1]; //not to modify its memory + //Bstr bstr(_str, _len, true); int store; - SINode* ret = this->find(&transfer[1], &store, false); - if (ret == NULL || store == -1 || bstr != *(ret->getKey(store))) //tree is empty or not found + SINode* ret = this->find(_str, _len, &store, false); + if (ret == NULL || store == -1) //tree is empty or not found + { + //bstr.clear(); + return false; + } + const Bstr* tmp = ret->getKey(store); + if (Util::compare(_str, _len, tmp->getStr(), tmp->getLen()) != 0) //tree is empty or not found { - bstr.clear(); return false; } *_val = ret->getValue(store); this->TSM->request(request); - bstr.clear(); + //bstr.clear(); return true; } @@ -135,7 +141,7 @@ SITree::insert(const char* _str, unsigned _len, int _val) printf("error in SITree-insert: empty string\n"); return false; } - this->CopyToTransfer(_str, _len, 1); + //this->CopyToTransfer(_str, _len, 1); this->request = 0; SINode* ret; @@ -170,8 +176,8 @@ SITree::insert(const char* _str, unsigned _len, int _val) SINode* p = this->root; SINode* q; int i; - const Bstr* _key = &transfer[1]; - Bstr bstr = *_key; + //const Bstr* _key = &transfer[1]; + //Bstr bstr = *_key; while (!p->isLeaf()) { //j = p->getNum(); @@ -179,7 +185,7 @@ SITree::insert(const char* _str, unsigned _len, int _val) //if(bstr < *(p->getKey(i))) //break; //NOTICE: using binary search is better here - i = p->searchKey_less(bstr); + i = p->searchKey_less(_str, _len); q = p->getChild(i); this->prepare(q); @@ -196,7 +202,10 @@ SITree::insert(const char* _str, unsigned _len, int _val) this->TSM->updateHeap(ret, ret->getRank(), false); this->TSM->updateHeap(q, q->getRank(), true); this->TSM->updateHeap(p, p->getRank(), true); - if (bstr < *(p->getKey(i))) + //if (bstr < *(p->getKey(i))) + const Bstr* tmp = p->getKey(i); + int cmp_res = Util::compare(_str, _len, tmp->getStr(), tmp->getLen()); + if (cmp_res < 0) p = q; else p = ret; @@ -212,24 +221,35 @@ SITree::insert(const char* _str, unsigned _len, int _val) //for(i = 0; i < j; ++i) //if(bstr < *(p->getKey(i))) //break; - i = p->searchKey_less(bstr); + i = p->searchKey_less(_str, _len); //insert existing key is ok, but not inserted in //however, the tree-shape may change due to possible split in former code bool ifexist = false; - if (i > 0 && bstr == *(p->getKey(i - 1))) - ifexist = true; - else + //if (i > 0 && bstr == *(p->getKey(i - 1))) + if (i > 0) { - p->addKey(_key, i, true); + const Bstr* tmp = p->getKey(i-1); + int cmp_res = Util::compare(_str, _len, tmp->getStr(), tmp->getLen()); + if(cmp_res == 0) + { + ifexist = true; + } + } + + if(!ifexist) + { + p->addKey(_str, _len, i, true); p->addValue(_val, i); p->addNum(); - request += _key->getLen(); + request += _len; p->setDirty(); this->TSM->updateHeap(p, p->getRank(), true); } + this->TSM->request(request); - bstr.clear(); //NOTICE: must be cleared! + //bstr.clear(); //NOTICE: must be cleared! + return !ifexist; //QUERY(which case:return false) } @@ -241,34 +261,42 @@ SITree::modify(const char* _str, unsigned _len, int _val) printf("error in SITree-modify: empty string\n"); return false; } - this->CopyToTransfer(_str, _len, 1); + //this->CopyToTransfer(_str, _len, 1); this->request = 0; - const Bstr* _key = &transfer[1]; - Bstr bstr = *_key; + //const Bstr* _key = &transfer[1]; + //Bstr bstr = *_key; int store; - SINode* ret = this->find(_key, &store, true); - if (ret == NULL || store == -1 || bstr != *(ret->getKey(store))) //tree is empty or not found + SINode* ret = this->find(_str, _len, &store, true); + if (ret == NULL || store == -1) //tree is empty or not found { - bstr.clear(); + //bstr.clear(); return false; } + const Bstr* tmp = ret->getKey(store); + if (Util::compare(_str, _len, tmp->getStr(), tmp->getLen()) != 0) //tree is empty or not found + { + return false; + } + ret->setValue(_val, store); ret->setDirty(); this->TSM->request(request); - bstr.clear(); + //bstr.clear(); + return true; } //this function is useful for search and modify, and range-query SINode* //return the first key's position that >= *_key -SITree::find(const Bstr* _key, int* _store, bool ifmodify) +SITree::find(const char* _str, unsigned _len, int* _store, bool ifmodify) { //to assign value for this->bstr, function shouldn't be const! if (this->root == NULL) return NULL; //SITree Is Empty + SINode* p = root; int i, j; - Bstr bstr = *_key; //local Bstr: multiple delete + //Bstr bstr = *_key; //local Bstr: multiple delete while (!p->isLeaf()) { if (ifmodify) @@ -277,7 +305,7 @@ SITree::find(const Bstr* _key, int* _store, bool ifmodify) //for(i = 0; i < j; ++i) //BETTER(Binary-Search) //if(bstr < *(p->getKey(i))) //break; - i = p->searchKey_less(bstr); + i = p->searchKey_less(_str, _len); p = p->getChild(i); this->prepare(p); @@ -287,13 +315,15 @@ SITree::find(const Bstr* _key, int* _store, bool ifmodify) //for(i = 0; i < j; ++i) //if(bstr <= *(p->getKey(i))) //break; - i = p->searchKey_lessEqual(bstr); + i = p->searchKey_lessEqual(_str, _len); if (i == j) *_store = -1; //Not Found else *_store = i; - bstr.clear(); + + //bstr.clear(); + return p; } @@ -312,24 +342,25 @@ SITree::remove(const char* _str, unsigned _len) printf("error in SITree-remove: empty string\n"); return false; } - this->CopyToTransfer(_str, _len, 1); + //this->CopyToTransfer(_str, _len, 1); request = 0; - const Bstr* _key = &transfer[1]; + //const Bstr* _key = &transfer[1]; SINode* ret; if (this->root == NULL) //tree is empty return false; + SINode* p = this->root; SINode* q; int i, j; - Bstr bstr = *_key; + //Bstr bstr = *_key; while (!p->isLeaf()) { j = p->getNum(); //for(i = 0; i < j; ++i) //if(bstr < *(p->getKey(i))) //break; - i = p->searchKey_less(bstr); + i = p->searchKey_less(_str, _len); q = p->getChild(i); this->prepare(q); @@ -343,6 +374,7 @@ SITree::remove(const char* _str, unsigned _len) if (ret != NULL) this->TSM->updateHeap(ret, 0, true);//non-sense node this->TSM->updateHeap(q, q->getRank(), true); + if (q->isLeaf()) { if (q->getPrev() == NULL) @@ -350,6 +382,7 @@ SITree::remove(const char* _str, unsigned _len) if (q->getNext() == NULL) this->leaves_tail = q; } + if (p->getNum() == 0) //root shrinks { //this->leaves_head = q; @@ -365,7 +398,7 @@ SITree::remove(const char* _str, unsigned _len) } bool flag = false; - i = p->searchKey_equal(bstr); + i = p->searchKey_equal(_str, _len); //WARN+NOTICE:here must check, because the key to remove maybe not exist if (i != (int)p->getNum()) { @@ -386,7 +419,8 @@ SITree::remove(const char* _str, unsigned _len) } this->TSM->request(request); - bstr.clear(); + //bstr.clear(); + return flag; //i == j, not found } @@ -495,4 +529,5 @@ SITree::print(string s) } else; #endif -} \ No newline at end of file +} + diff --git a/KVstore/SITree/SITree.h b/KVstore/SITree/SITree.h index 49aa75f..52c27ee 100644 --- a/KVstore/SITree/SITree.h +++ b/KVstore/SITree/SITree.h @@ -3,7 +3,7 @@ # Author: syzz # Mail: 1181955272@qq.com # Last Modified: 2015-04-26 16:44 -# Description: struct and interface of the B+ tree +# Description: string2ID, including entity2id, literal2id, predicate2id =============================================================================*/ #ifndef _KVSTORE_SITREE_SITREE_H @@ -36,13 +36,19 @@ private: //so lock is a must. Add lock to transfer is better than to add //lock to every key/value. However, modify requires a lock for a //key/value, and multiple search for different keys are ok!!! - Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char* - unsigned transfer_size[3]; + //Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char* + //unsigned transfer_size[3]; + + //TODO: in all B+ trees, updat eoperation should lock the whole tree, while search operations not + //However, the transfer bstr maybe cause the parallism error!!!! + //Why we need the transfer? It is ok to pass the original string pointer to return + //A problem is that before the caller ends, the tree can not be modified(so a read-writ elock is required) + std::string storepath; std::string filename; //ok for user to change /* some private functions */ std::string getFilePath(); //in UNIX system - void CopyToTransfer(const char* _str, unsigned _len, unsigned _index); + //void CopyToTransfer(const char* _str, unsigned _len, unsigned _index); void release(SINode* _np) const; //tree's operations should be atom(if read nodes) @@ -61,7 +67,7 @@ public: bool search(const char* _str, unsigned _len, int* _val); bool insert(const char* _str, unsigned _len, int _val); bool modify(const char* _str, unsigned _len, int _val); - SINode* find(const Bstr* _key, int* store, bool ifmodify); + SINode* find(const char* _key, unsigned _len, int* store, bool ifmodify); bool remove(const char* _str, unsigned _len); bool save(); ~SITree(); @@ -71,4 +77,5 @@ public: //(problem range between two extremes: not-modified, totally-modified) //After saved, it's ok to continue operations on tree! -#endif \ No newline at end of file +#endif + diff --git a/KVstore/SITree/storage/SIStorage.h b/KVstore/SITree/storage/SIStorage.h index 3b454ba..4e9e0eb 100644 --- a/KVstore/SITree/storage/SIStorage.h +++ b/KVstore/SITree/storage/SIStorage.h @@ -13,6 +13,14 @@ #include "../node/SILeafNode.h" #include "../heap/SIHeap.h" +//TODO: whether to use heap or not, is a big question +//For single-query application, it seems that LRU list like VSTree is a better choice(no much cost in the buffer itself) +//But in multiple-queries case, things maybe different +//BETTER: +//add a heap position in node, to speed up the node-pointer searching +//lower the update times of heap, if the size is 128M, then each update is 27 at most +//if not update in time, then the heap maybe not be a heap, then why do we use heap? why not a simple array? + //It controls read, write, swap class SIStorage { diff --git a/KVstore/Tree.h b/KVstore/Tree.h index 528d2eb..b73612b 100644 --- a/KVstore/Tree.h +++ b/KVstore/Tree.h @@ -1,4 +1,5 @@ //headers wrapper for all kinds of BPlusTree +#include "IVTree/IVTree.h" #include "ISTree/ISTree.h" -#include "SITree/SITree.h" \ No newline at end of file +#include "SITree/SITree.h" diff --git a/Main/gadd.cpp b/Main/gadd.cpp index 498f86d..c7e8cdc 100644 --- a/Main/gadd.cpp +++ b/Main/gadd.cpp @@ -7,9 +7,9 @@ int main(int argc, char * argv[]) { //chdir(dirname(argv[0])); -#ifdef DEBUG +//#ifdef DEBUG Util util; -#endif +//#endif cout << "argc: " << argc << "\t"; cout << "DB_store:" << argv[1] << "\t"; diff --git a/Main/gbuild.cpp b/Main/gbuild.cpp index 23791c2..bca5c16 100644 --- a/Main/gbuild.cpp +++ b/Main/gbuild.cpp @@ -17,9 +17,9 @@ int main(int argc, char * argv[]) { //chdir(dirname(argv[0])); -#ifdef DEBUG +//#ifdef DEBUG Util util; -#endif +//#endif if(argc < 3) //./gbuild { //output help info here diff --git a/Main/gclient.cpp b/Main/gclient.cpp index c42de4d..9946ebd 100644 --- a/Main/gclient.cpp +++ b/Main/gclient.cpp @@ -12,9 +12,9 @@ int main(int argc, char * argv[]) { //chdir(dirname(argv[0])); -#ifdef DEBUG +//#ifdef DEBUG Util util; -#endif +//#endif std::string ip = Socket::DEFAULT_SERVER_IP; unsigned short port = Socket::DEFAULT_CONNECT_PORT; @@ -38,4 +38,4 @@ int main(int argc, char * argv[]) client.run(); return 0; -} \ No newline at end of file +} diff --git a/Main/gconsole.cpp b/Main/gconsole.cpp index f5252bb..2be9e35 100644 --- a/Main/gconsole.cpp +++ b/Main/gconsole.cpp @@ -122,9 +122,9 @@ main(int argc, char **argv) //NOTICE:this is needed to ensure the file path is the work path //chdir(dirname(argv[0])); //NOTICE:this is needed to set several debug files -#ifdef DEBUG +//#ifdef DEBUG Util util; -#endif +//#endif db_home = Util::global_config["db_home"]; diff --git a/Main/gquery.cpp b/Main/gquery.cpp index 7121b40..70c91c1 100644 --- a/Main/gquery.cpp +++ b/Main/gquery.cpp @@ -38,9 +38,9 @@ int main(int argc, char * argv[]) { //chdir(dirname(argv[0])); -#ifdef DEBUG +//#ifdef DEBUG Util util; -#endif +//#endif if (argc == 1 || strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0) { diff --git a/Main/gserver.cpp b/Main/gserver.cpp index f04948b..d6ae3d2 100644 --- a/Main/gserver.cpp +++ b/Main/gserver.cpp @@ -11,9 +11,9 @@ using namespace std; -#define GSERVER_PORT_FILE "bin/.gserver_port" -#define GSERVER_PORT_SWAP "bin/.gserver_port.swap" -#define GSERVER_LOG "logs/gserver.log" +//#define GSERVER_PORT_FILE "bin/.gserver_port" +//#define GSERVER_PORT_SWAP "bin/.gserver_port.swap" +//#define GSERVER_LOG "logs/gserver.log" bool isOnlyProcess(const char* argv0); void checkSwap(); @@ -22,9 +22,9 @@ bool stopServer(); int main(int argc, char* argv[]) { -#ifdef DEBUG +//#ifdef DEBUG Util util; -#endif +//#endif string mode; if (argc == 1) { @@ -61,7 +61,7 @@ int main(int argc, char* argv[]) unsigned short port = Socket::DEFAULT_CONNECT_PORT; if (argc == 3) { if (!Util::isValidPort(string(argv[2]))) { - cout << "Invalid port: " << argv[2] << endl; + cerr << "Invalid port: " << argv[2] << endl; return -1; } else { @@ -70,9 +70,9 @@ int main(int argc, char* argv[]) } } if (!isOnlyProcess(argv[0])) { - ofstream out(GSERVER_PORT_SWAP, ios::out); + ofstream out(Util::gserver_port_swap.c_str()); if (!out) { - cout << "Failed to change port!" << endl; + cerr << "Failed to change port!" << endl; return -1; } out << port; @@ -80,9 +80,9 @@ int main(int argc, char* argv[]) cout << "Port will be changed to " << port << " after the current server stops or restarts." << endl; return 0; } - ofstream out(GSERVER_PORT_FILE, ios::out); + ofstream out(Util::gserver_port_file.c_str()); if (!out) { - cout << "Failed to change port!" << endl; + cerr << "Failed to change port!" << endl; return -1; } out << port; @@ -93,10 +93,15 @@ int main(int argc, char* argv[]) if (mode == "-s" || mode == "--start") { if (!isOnlyProcess(argv[0])) { - cout << "gServer already running!" << endl; + cerr << "gServer already running!" << endl; return -1; } if (startServer()) { + sleep(1); + if (isOnlyProcess(argv[0])) { + cerr << "Server stopped unexpectedly. Check for port conflicts!" << endl; + return -1; + } return 0; } else { @@ -106,7 +111,7 @@ int main(int argc, char* argv[]) if (mode == "-t" || mode == "--stop") { if (isOnlyProcess(argv[0])) { - cout << "gServer not running!" << endl; + cerr << "gServer not running!" << endl; return -1; } if (stopServer()) { @@ -119,7 +124,7 @@ int main(int argc, char* argv[]) if (mode == "-r" || mode == "--restart") { if (isOnlyProcess(argv[0])) { - cout << "gServer not running!" << endl; + cerr << "gServer not running!" << endl; return -1; } if (!stopServer()) { @@ -133,14 +138,14 @@ int main(int argc, char* argv[]) if (mode == "-P" || mode == "--printport") { unsigned short port = Socket::DEFAULT_CONNECT_PORT; - ifstream in(GSERVER_PORT_FILE); + ifstream in(Util::gserver_port_file.c_str()); if (in) { in >> port; in.close(); } cout << "Current connection port is " << port << '.' << endl; unsigned short portSwap = 0; - ifstream inSwap(GSERVER_PORT_SWAP); + ifstream inSwap(Util::gserver_port_swap.c_str()); if (inSwap) { inSwap >> portSwap; inSwap.close(); @@ -153,14 +158,14 @@ int main(int argc, char* argv[]) if (mode == "-k" || mode == "--kill") { if (isOnlyProcess(argv[0])) { - cout << "No process to kill!" << endl; + cerr << "No process to kill!" << endl; return -1; } execl("/usr/bin/killall", "killall", Util::getExactPath(argv[0]).c_str(), NULL); return 0; } - cout << "Invalid arguments! Input \"bin/gserver -h\" for help." << endl; + cerr << "Invalid arguments! Type \"bin/gserver -h\" for help." << endl; return -1; } @@ -169,38 +174,38 @@ bool isOnlyProcess(const char* argv0) { } void checkSwap() { - if (access(GSERVER_PORT_SWAP, 00) != 0) { + if (access(Util::gserver_port_swap.c_str(), 00) != 0) { return; } - ifstream in(GSERVER_PORT_SWAP, ios::in); + ifstream in(Util::gserver_port_swap.c_str()); if (!in) { - cout << "Failed in checkSwap(), port may not be changed." << endl; + cerr << "Failed in checkSwap(), port may not be changed." << endl; return; } unsigned short port; in >> port; in.close(); - ofstream out(GSERVER_PORT_FILE, ios::out); + ofstream out(Util::gserver_port_file.c_str()); if (!out) { - cout << "Failed in checkSwap(), port may not be changed." << endl; + cerr << "Failed in checkSwap(), port may not be changed." << endl; return; } out << port; out.close(); - chmod(GSERVER_PORT_FILE, 0644); - string cmd = string("rm ") + GSERVER_PORT_SWAP; + chmod(Util::gserver_port_file.c_str(), 0644); + string cmd = string("rm ") + Util::gserver_port_swap; system(cmd.c_str()); } bool startServer() { unsigned short port = Socket::DEFAULT_CONNECT_PORT; - ifstream in(GSERVER_PORT_FILE, ios::in); + ifstream in(Util::gserver_port_file.c_str()); if (!in) { - ofstream out(GSERVER_PORT_FILE, ios::out); + ofstream out(Util::gserver_port_file.c_str()); if (out) { out << port; out.close(); - chmod(GSERVER_PORT_FILE, 0644); + chmod(Util::gserver_port_file.c_str(), 0644); } } else { @@ -215,47 +220,75 @@ bool startServer() { if (!Util::dir_exist("logs")) { Util::create_dir("logs"); } - freopen(GSERVER_LOG, "a", stdout); - freopen(GSERVER_LOG, "a", stderr); - Server server(port); - if (!server.createConnection()) { - cout << Util::getTimeString() << "Failed to create connection at port " << port << '.' << endl; - return false; + freopen(Util::gserver_log.c_str(), "a", stdout); + freopen(Util::gserver_log.c_str(), "a", stderr); + + int status; + + while (true) { + fpid = fork(); + + // child, main process + if (fpid == 0) { + Server server(port); + if (!server.createConnection()) { + cerr << Util::getTimeString() << "Failed to create connection at port " << port << '.' << endl; + return false; + } + cout << Util::getTimeString() << "Server started at port " << port << '.' << endl; + server.listen(); + exit(0); + return true; + } + + // parent, deamon process + else if (fpid > 0) { + waitpid(fpid, &status, 0); + if (WIFEXITED(status)) { + exit(0); + return true; + } + cerr << Util::getTimeString() << "Server stopped abnormally, restarting server..." << endl; + } + + // fork failure + else { + cerr << Util::getTimeString() << "Failed to start server: deamon fork failure." << endl; + return false; + } } - cout << Util::getTimeString() << "Server started at port " << port << '.' << endl; - server.listen(); - exit(0); - return true; } + // parent else if (fpid > 0) { cout << "Server started at port " << port << '.' << endl; return true; } + // fork failure else { - cout << "Failed to start server at port " << port << '.' << endl; + cerr << "Failed to start server at port " << port << '.' << endl; return false; } } bool stopServer() { unsigned short port = Socket::DEFAULT_CONNECT_PORT; - ifstream in(GSERVER_PORT_FILE, ios::in); + ifstream in(Util::gserver_port_file.c_str()); if (in) { in >> port; in.close(); } Socket socket; if (!socket.create() || !socket.connect("127.0.0.1", port) || !socket.send("stop")) { - cout << "Failed to stop server at port " << port << '.' << endl; + cerr << "Failed to stop server at port " << port << '.' << endl; return false; } string recv_msg; socket.recv(recv_msg); socket.close(); if (recv_msg != "server stopped.") { - cout << "Failed to stop server at port " << port << '.' << endl; + cerr << "Failed to stop server at port " << port << '.' << endl; return false; } cout << "Server stopped at port " << port << '.' << endl; diff --git a/Main/gsub.cpp b/Main/gsub.cpp index a7b1b5a..14a4938 100644 --- a/Main/gsub.cpp +++ b/Main/gsub.cpp @@ -12,9 +12,9 @@ int main(int argc, char * argv[]) { //chdir(dirname(argv[0])); -#ifdef DEBUG +//#ifdef DEBUG Util util; -#endif +//#endif cout << "argc: " << argc << "\t"; cout << "DB_store:" << argv[1] << "\t"; diff --git a/NOTES.md b/NOTES.md index 8d1c549..cf80444 100644 --- a/NOTES.md +++ b/NOTES.md @@ -7,6 +7,8 @@ 在使用gserver时,不能在数据库没有unload时再用gbuild或其他命令修改数据库,仅限于C/S模式 将IRC聊天放到gstore文档上,freenode #gStore +storage中大量使用long类型,文件大小也可能达到64G,最好在64位机器上运行。 + # 推广 必须建立一个官方网站,可以展示下团队、demo,需要建立社区/论坛并维护 @@ -86,13 +88,13 @@ http://blog.csdn.net/infoworld/article/details/8670951 要在单机支持到10亿triple,最坏情况下最多有20亿entity和20亿literal,目前的编号方式是不行的(int扩展为unsigned) 最好在单机100G内存上支持起freebase(2.5B triples)这个规模的数据集,就像jena和virtuoso一样,慢不要紧 -同时将ID的编码改为unsigned,无效标志-1改为最大值的宏, triple数目的类型也要改为unsigned -注意pre的ID还可以为-2,或者对于pre仍然用int,或者改函数的返回值为long long (还有一些没有用-1而是>=0) +在type分支中,sub2id_pre2id_obj2id函数中,每次double增长可能无法充分利用unsigned空间,只能利用到2560000000,超过后最好直接设置为最大 +去掉tree里面的复制,另外kvstore里面的复制可以考虑通过一个或若干个bstr buffer来实现,避免每次都重新new,但这会影响多线程程序 +而且在kvstore中往往需要对原始list做一些额外处理 --- -将B+tree中叶节点的大的value分离出来,新建一套缓存,使用block机制,标记length为0表示未读取 -类型bstr的length问题也需要解决 -如果把类型直接改成long long,空间开销一下子就上升了一倍 +UBSTR: 类型bstr的length问题也需要解决 如果把类型直接改成long long,空间开销一下子就上升了一倍 解决方法:对于ID2string,仍然用char*和unsigned,但对于s2xx p2xx o2xx,应该用unsigned long long*和unsigned来表示,这样最高可支持到40亿triple +(其实这个不是特别必要,很少会有这种情况,我们处理的triple数目一般限制在20亿,就算是type这种边,po对数也就是跟entity数目持平,很难达到5亿) --- 那么是否可以调整entity与literal的分界线,如果entity数目一般都比literal数目多的话 直接把literal从大到小编号,可在ID模块中指定顺序,这样每个Datbase模块应该有自己独特的分界线,其他模块用时也需要注意 @@ -518,6 +520,8 @@ http://www.oschina.net/question/188977_58777 # ADVICE +#### 考虑利用hdfs或者hbase,这样就可以利用各公司已有的数据库系统,但这是否会和已有的内外存交换冲突? + #### 数值型查询 实数域 [-bound, bound] 类型很难匹配,有必要单独编码么? 数据集中不应有范围 Query中编码过滤后还需验证 x>a, x=, <=, a时不直接取字符串,而是转换为数值并编码 @@ -602,3 +606,23 @@ Consider the use of Bloom Filter and FM-sketches http://www.hprd.org/download/ + + +## GIT + +#### how to commit a message + +package.json +http://www.json.cn/ +https://www.oschina.net/news/69705/git-commit-message-and-changelog-guide +https://sanwen8.cn/p/44eCof7.html + +1. commit one by one, a commit just do one thing + +2. place a empty line between head and body, body and footer + +3. the first letter of header should be in uppercase, and the header should not be too long, just a wonderful summary +FIX: ... ADD:... REF:... 代码重构 SUB:... + +4. each line should not be too long, add your real name and the influence in footer(maybe cause the code struct to change) + diff --git a/Query/BasicQuery.h b/Query/BasicQuery.h index 94128b3..642b0c8 100644 --- a/Query/BasicQuery.h +++ b/Query/BasicQuery.h @@ -175,8 +175,8 @@ private: map selected_var_position; public: - static const int MAX_VAR_NUM = 10; - static const int MAX_PRE_VAR_NUM = 10; + static const int MAX_VAR_NUM = 20; + static const int MAX_PRE_VAR_NUM = 20; static const char NOT_JUST_SELECT = 'a'; static const char SELECT_VAR = 's'; diff --git a/Util/Bstr.cpp b/Util/Bstr.cpp index 2ddc7ae..8157700 100644 --- a/Util/Bstr.cpp +++ b/Util/Bstr.cpp @@ -17,12 +17,17 @@ Bstr::Bstr() this->str = NULL; } -Bstr::Bstr(const char* _str, unsigned _len) +Bstr::Bstr(const char* _str, unsigned _len, bool _nocopy) { //WARN: if need a string .please add '\0' in your own! this->length = _len; - //DEBUG:if copy memory? - //this->str = _str; //not valid:const char* -> char* + + //if(_nocopy) + //{ + //this->str = _str; //not valid:const char* -> char* + //return; + //} + this->str = (char*)malloc(_len); memcpy(this->str, _str, sizeof(char) * _len); //this->str[_len]='\0'; @@ -116,6 +121,12 @@ Bstr::operator != (const Bstr& _bstr) unsigned Bstr::getLen() const { + //NOTICE: this is for VList + if(this->str == NULL) + { + return 0; + } + return length; } diff --git a/Util/Bstr.h b/Util/Bstr.h index 1d93bc6..63d8cd9 100644 --- a/Util/Bstr.h +++ b/Util/Bstr.h @@ -24,7 +24,7 @@ public: Bstr(); //if copy memory, then use const char*, but slow //else, can not use const char* -> char* - Bstr(const char* _str, unsigned _len); + Bstr(const char* _str, unsigned _len, bool _nocopy = false); //Bstr(char* _str, unsigned _len); Bstr(const Bstr& _bstr); //Bstr& operate = (const Bstr& _bstr); diff --git a/Util/Util.cpp b/Util/Util.cpp index 9a0a883..bbf6077 100644 --- a/Util/Util.cpp +++ b/Util/Util.cpp @@ -48,6 +48,10 @@ map Util::global_config; //================================================================================================================== +string Util::gserver_port_file = "bin/.gserver_port"; +string Util::gserver_port_swap = "bin/.gserver_port.swap"; +string Util::gserver_log = "logs/gserver.log"; + //NOTICE:used in Database, Join and Strategy //int Util::triple_num = 0; //int Util::pre_num = 0; @@ -441,11 +445,18 @@ Util::memoryLeft() } bool -Util::is_literal_ele(int _id) +Util::is_literal_ele(unsigned _id) { return _id >= Util::LITERAL_FIRST_ID; } +bool +Util::is_entity_ele(unsigned id) +{ + return id < Util::LITERAL_FIRST_ID; +} + + //NOTICE: require that the list is ordered int Util::removeDuplicate(int* _list, int _len) diff --git a/Util/Util.h b/Util/Util.h index b5c02af..1098da1 100644 --- a/Util/Util.h +++ b/Util/Util.h @@ -37,6 +37,7 @@ in the sparql query can point to the same node in data graph) #include #include #include +#include #include #include @@ -87,7 +88,7 @@ in the sparql query can point to the same node in data graph) //#define DEBUG_STREAM //#define DEBUG_PRECISE 1 all information //#define DEBUG_KVSTORE 1 //in KVstore -#define DEBUG_VSTREE 1 //in Database +//#define DEBUG_VSTREE 1 //in Database //#define DEBUG_LRUCACHE 1 //#define DEBUG_DATABASE 1 //in Database // @@ -218,7 +219,9 @@ public: static std::string getTimeString(); static std::string node2string(const char* _raw_str); - static bool is_literal_ele(int); + static bool is_literal_ele(unsigned id); + static bool is_entity_ele(unsigned id); + static int removeDuplicate(int*, int); static std::string getQueryFromFile(const char* _file_path); static std::string getSystemOutput(std::string cmd); @@ -279,6 +282,10 @@ public: static FILE* debug_database; static FILE* debug_vstree; + static std::string gserver_port_file; + static std::string gserver_port_swap; + static std::string gserver_log; + private: static bool isValidIPV4(std::string); diff --git a/logs/.gitignore b/logs/.gitignore new file mode 100644 index 0000000..397b4a7 --- /dev/null +++ b/logs/.gitignore @@ -0,0 +1 @@ +*.log diff --git a/makefile b/makefile index 2d27bda..da62f17 100644 --- a/makefile +++ b/makefile @@ -70,6 +70,7 @@ api_java = api/java/lib/GstoreJavaAPI.jar #sstreeobj = $(objdir)Tree.o $(objdir)Storage.o $(objdir)Node.o $(objdir)IntlNode.o $(objdir)LeafNode.o $(objdir)Heap.o sitreeobj = $(objdir)SITree.o $(objdir)SIStorage.o $(objdir)SINode.o $(objdir)SIIntlNode.o $(objdir)SILeafNode.o $(objdir)SIHeap.o istreeobj = $(objdir)ISTree.o $(objdir)ISStorage.o $(objdir)ISNode.o $(objdir)ISIntlNode.o $(objdir)ISLeafNode.o $(objdir)ISHeap.o +ivtreeobj = $(objdir)IVTree.o $(objdir)IVStorage.o $(objdir)IVNode.o $(objdir)IVIntlNode.o $(objdir)IVLeafNode.o $(objdir)IVHeap.o kvstoreobj = $(objdir)KVstore.o $(sitreeobj) $(istreeobj) #$(sstreeobj) @@ -217,6 +218,26 @@ $(objdir)ISHeap.o: KVstore/ISTree/heap/ISHeap.cpp KVstore/ISTree/heap/ISHeap.h $ $(CC) $(CFLAGS) KVstore/ISTree/heap/ISHeap.cpp -o $(objdir)ISHeap.o #objects in istree/ end +#objects in ivtree/ begin +$(objdir)IVTree.o: KVstore/IVTree/IVTree.cpp KVstore/IVTree/IVTree.h $(objdir)Stream.o + $(CC) $(CFLAGS) KVstore/IVTree/IVTree.cpp -o $(objdir)IVTree.o + +$(objdir)IVStorage.o: KVstore/IVTree/storage/IVStorage.cpp KVstore/IVTree/storage/IVStorage.h $(objdir)Util.o + $(CC) $(CFLAGS) KVstore/IVTree/storage/IVStorage.cpp -o $(objdir)IVStorage.o $(def64IO) + +$(objdir)IVNode.o: KVstore/IVTree/node/IVNode.cpp KVstore/IVTree/node/IVNode.h $(objdir)Util.o + $(CC) $(CFLAGS) KVstore/IVTree/node/IVNode.cpp -o $(objdir)IVNode.o + +$(objdir)IVIntlNode.o: KVstore/IVTree/node/IVIntlNode.cpp KVstore/IVTree/node/IVIntlNode.h + $(CC) $(CFLAGS) KVstore/IVTree/node/IVIntlNode.cpp -o $(objdir)IVIntlNode.o + +$(objdir)IVLeafNode.o: KVstore/IVTree/node/IVLeafNode.cpp KVstore/IVTree/node/IVLeafNode.h + $(CC) $(CFLAGS) KVstore/IVTree/node/IVLeafNode.cpp -o $(objdir)IVLeafNode.o + +$(objdir)IVHeap.o: KVstore/IVTree/heap/IVHeap.cpp KVstore/IVTree/heap/IVHeap.h $(objdir)Util.o + $(CC) $(CFLAGS) KVstore/IVTree/heap/IVHeap.cpp -o $(objdir)IVHeap.o +#objects in ivtree/ end + $(objdir)KVstore.o: KVstore/KVstore.cpp KVstore/KVstore.h KVstore/Tree.h $(CC) $(CFLAGS) KVstore/KVstore.cpp $(inc) -o $(objdir)KVstore.o diff --git a/package.json b/package.json new file mode 100644 index 0000000..fe9e70c --- /dev/null +++ b/package.json @@ -0,0 +1,12 @@ +{ + "config": { + "ghooks": { + "commit-msg": "validate-commit-msg" + } + }, + + "scripts": { + "changelog-all": "conventional-changelog -p angular -i CHANGELOG.md -w -r 0", + "changelog": "conventional-changelog -p angular -i CHANGELOG.md -w" + } +} diff --git a/test/package.json b/test/package.json new file mode 100644 index 0000000..7ba7f79 --- /dev/null +++ b/test/package.json @@ -0,0 +1,15 @@ +{ + "config": { + "ghooks": { + //"pre-commit": "gulp lint", + "commit-msg": "validate-commit-msg", + //"pre-push": "make test", + //"post-merge": "npm install", + //"post-rewrite": "npm install", + } + } + "scripts": { + "changelog-all": "conventional-changelog -p angular -i CHANGELOG.md -w -r 0", + "changelog": "conventional-changelog -p angular -i CHANGELOG.md -w", + } +} From 939e84c8bff7160e90a8ceeff31a0ddbfda16113 Mon Sep 17 00:00:00 2001 From: bookug Date: Wed, 29 Mar 2017 23:57:09 +0800 Subject: [PATCH 3/6] refactor: to add long list value TODO: vlist support and IVLeafNode by zengli, long list need to be freed each time --- KVstore/ISTree/ISTree.cpp | 89 ++++++++-------- KVstore/ISTree/ISTree.h | 10 +- KVstore/ISTree/node/ISLeafNode.cpp | 38 ++++++- KVstore/ISTree/node/ISLeafNode.h | 7 +- KVstore/ISTree/node/ISNode.h | 8 +- KVstore/IVTree/IVTree.cpp | 17 ++- KVstore/IVTree/IVTree.h | 6 +- KVstore/IVTree/node/IVLeafNode.cpp | 41 +++---- KVstore/IVTree/node/IVNode.h | 5 +- KVstore/IVTree/storage/IVStorage.h | 2 +- KVstore/KVstore.cpp | 109 +++++++++++++++++-- KVstore/KVstore.h | 20 ++-- KVstore/SITree/SITree.cpp | 2 +- KVstore/SITree/SITree.h | 2 +- KVstore/SITree/node/SILeafNode.cpp | 2 +- KVstore/SITree/node/SILeafNode.h | 8 +- KVstore/SITree/node/SINode.cpp | 76 ++++++++++++- KVstore/SITree/node/SINode.h | 11 +- NOTES.md | 2 +- Util/Bstr.h | 1 + {KVstore/IVTree/vlist => Util}/VList.cpp | 129 +++++++++++------------ {KVstore/IVTree/vlist => Util}/VList.h | 22 ++-- 22 files changed, 436 insertions(+), 171 deletions(-) rename {KVstore/IVTree/vlist => Util}/VList.cpp (56%) rename {KVstore/IVTree/vlist => Util}/VList.h (76%) diff --git a/KVstore/ISTree/ISTree.cpp b/KVstore/ISTree/ISTree.cpp index 5a23fcb..02aab9f 100644 --- a/KVstore/ISTree/ISTree.cpp +++ b/KVstore/ISTree/ISTree.cpp @@ -20,7 +20,7 @@ ISTree::ISTree() TSM = NULL; storepath = ""; filename = ""; - transfer_size[0] = transfer_size[1] = transfer_size[2] = 0; + //transfer_size[0] = transfer_size[1] = transfer_size[2] = 0; this->stream = NULL; this->request = 0; } @@ -37,10 +37,10 @@ ISTree::ISTree(string _storepath, string _filename, string _mode, unsigned long this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail); else this->root = NULL; - this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE)); - this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE)); - this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE)); - this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M + //this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M this->stream = NULL; this->request = 0; } @@ -51,30 +51,30 @@ ISTree::getFilePath() return storepath + "/" + filename; } -void //WARN: not check _str and _len -ISTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index) -{ - if (_index > 2) - return; - /* - if(_str == NULL || _len == 0) - { - printf("error in CopyToTransfer: empty string\n"); - return; - } - */ - //unsigned length = _bstr->getLen(); - unsigned length = _len; - if (length + 1 > this->transfer_size[_index]) - { - transfer[_index].release(); - transfer[_index].setStr((char*)malloc(length + 1)); - this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0 - } - memcpy(this->transfer[_index].getStr(), _str, length); - this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore - this->transfer[_index].setLen(length); -} +//void //WARN: not check _str and _len +//ISTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index) +//{ + //if (_index > 2) + //return; + //[> + //if(_str == NULL || _len == 0) + //{ + //printf("error in CopyToTransfer: empty string\n"); + //return; + //} + //*/ + ////unsigned length = _bstr->getLen(); + //unsigned length = _len; + //if (length + 1 > this->transfer_size[_index]) + //{ + //transfer[_index].release(); + //transfer[_index].setStr((char*)malloc(length + 1)); + //this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0 + //} + //memcpy(this->transfer[_index].getStr(), _str, length); + //this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore + //this->transfer[_index].setLen(length); +//} unsigned ISTree::getHeight() const @@ -121,15 +121,18 @@ ISTree::search(int _key, char*& _str, int& _len) } const Bstr* val = ret->getValue(store); - this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request - _str = this->transfer[0].getStr(); - _len = this->transfer[0].getLen(); + //this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request + //_str = this->transfer[0].getStr(); + //_len = this->transfer[0].getLen(); + _str = val->getStr(); + _len = val->getLen(); + this->TSM->request(request); return true; } bool -ISTree::insert(int _key, const char* _str, unsigned _len) +ISTree::insert(int _key, char* _str, unsigned _len) { if (_key < 0) { @@ -137,8 +140,8 @@ ISTree::insert(int _key, const char* _str, unsigned _len) return false; } - this->CopyToTransfer(_str, _len, 2); - const Bstr* val = &(this->transfer[2]); + //this->CopyToTransfer(_str, _len, 2); + //const Bstr* val = &(this->transfer[2]); this->request = 0; ISNode* ret; if (this->root == NULL) //tree is empty @@ -222,9 +225,9 @@ ISTree::insert(int _key, const char* _str, unsigned _len) else { p->addKey(_key, i); - p->addValue(val, i, true); + p->addValue(_str, _len, i, true); p->addNum(); - request += val->getLen(); + request += _len; p->setDirty(); this->TSM->updateHeap(p, p->getRank(), true); //_key->clear(); @@ -235,7 +238,7 @@ ISTree::insert(int _key, const char* _str, unsigned _len) } bool -ISTree::modify(int _key, const char* _str, unsigned _len) +ISTree::modify(int _key, char* _str, unsigned _len) { if (_key < 0) { @@ -243,8 +246,8 @@ ISTree::modify(int _key, const char* _str, unsigned _len) return false; } - this->CopyToTransfer(_str, _len, 2); //not check value - const Bstr* val = &(this->transfer[2]); + //this->CopyToTransfer(_str, _len, 2); //not check value + //const Bstr* val = &(this->transfer[2]); this->request = 0; int store; ISNode* ret = this->find(_key, &store, true); @@ -255,16 +258,17 @@ ISTree::modify(int _key, const char* _str, unsigned _len) } //cout<<"ISTree::modify() - key is found, now to remove"<getValue(store)->getLen(); - ret->setValue(val, store, true); + ret->setValue(_str, _len, store, true); //cout<<"value reset"<getLen()<<" oldlen: "<getLen() - len); - this->request = val->getLen(); + this->request = _len; this->request -= len; ret->setDirty(); //cout<<"to request"<TSM->request(request); //cout<<"memory requested"<getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in setValue: Invalid index ") + Util::int2string(_index)); + return false; + } + this->values[_index].release(); //NOTICE: only used in modify + + this->values[_index].setStr(_str); + this->values[_index].setLen(_len); + + return true; +} + +bool +ISLeafNode::addValue(char* _str, unsigned _len, int _index, bool ifcopy) +{ + int num = this->getNum(); + if (_index < 0 || _index > num) + { + print(string("error in addValue: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + for (i = num - 1; i >= _index; --i) + this->values[i + 1] = this->values[i]; + + this->values[_index].setStr(_str); + this->values[_index].setLen(_len); + + return true; +} + bool ISLeafNode::subValue(int _index, bool ifdel) { @@ -373,4 +409,4 @@ ISLeafNode::print(string s) } else; #endif -} \ No newline at end of file +} diff --git a/KVstore/ISTree/node/ISLeafNode.h b/KVstore/ISTree/node/ISLeafNode.h index 0d965f2..d288bfb 100644 --- a/KVstore/ISTree/node/ISLeafNode.h +++ b/KVstore/ISTree/node/ISLeafNode.h @@ -27,6 +27,7 @@ public: void Normal(); ISNode* getPrev() const; ISNode* getNext() const; + const Bstr* getValue(int _index) const; bool setValue(const Bstr* _value, int _index, bool ifcopy = false); bool addValue(const Bstr* _value, int _index, bool ifcopy = false); @@ -34,6 +35,10 @@ public: void setPrev(ISNode* _prev); void setNext(ISNode* _next); unsigned getSize() const; + + bool setValue(char* _str, unsigned _len, int _index, bool ifcopy = false); + bool addValue(char* _str, unsigned _len, int _index, bool ifcopy = false); + ISNode* split(ISNode* _father, int _index); ISNode* coalesce(ISNode* _father, int _index); void release(); @@ -47,4 +52,4 @@ public: }; //BETTER: prev isn't a must, and reverse-range can be achieved using recursive-next -#endif \ No newline at end of file +#endif diff --git a/KVstore/ISTree/node/ISNode.h b/KVstore/ISTree/node/ISNode.h index b25b544..7c8fc1d 100644 --- a/KVstore/ISTree/node/ISNode.h +++ b/KVstore/ISTree/node/ISNode.h @@ -80,12 +80,18 @@ public: virtual bool subChild(int _index) { return true; }; virtual ISNode* getPrev() const { return NULL; }; virtual ISNode* getNext() const { return NULL; }; + virtual const Bstr* getValue(int _index) const { return NULL; }; virtual bool setValue(const Bstr* _value, int _index, bool ifcopy = false) { return true; }; virtual bool addValue(const Bstr* _value, int _index, bool ifcopy = false) { return true; }; virtual bool subValue(int _index, bool ifdel = false) { return true; }; virtual void setPrev(ISNode* _prev) {}; virtual void setNext(ISNode* _next) {}; + + virtual bool setValue(char* _str, unsigned _len, int _index, bool ifcopy = false) { return true; }; + virtual bool addValue(char* _str, unsigned _len, int _index, bool ifcopy = false) { return true; }; + + //pure virtual function virtual void Virtual() = 0; virtual void Normal() = 0; virtual unsigned getSize() const = 0; //return all memory owned @@ -110,4 +116,4 @@ public: *to release the whole(pointer is invalid and rebuild problem) */ -#endif \ No newline at end of file +#endif diff --git a/KVstore/IVTree/IVTree.cpp b/KVstore/IVTree/IVTree.cpp index 1dee1cf..eac36a1 100644 --- a/KVstore/IVTree/IVTree.cpp +++ b/KVstore/IVTree/IVTree.cpp @@ -36,7 +36,7 @@ IVTree::IVTree(string _storepath, string _filename, string _mode, unsigned long string filepath = this->getFilePath(); string vlist_file = filepath + "_vlist"; - this->value_list = new VList(vlist_file, 1<<30); + this->value_list = new VList(vlist_file, this->mode, 1<<30); TSM = new IVStorage(filepath, this->mode, &this->height, _buffer_size, this->value_list); if (this->mode == "open") @@ -142,7 +142,7 @@ IVTree::search(int _key, char*& _str, int& _len) } bool -IVTree::insert(int _key, const char* _str, unsigned _len) +IVTree::insert(int _key, char* _str, unsigned _len) { if (_key < 0) { @@ -244,12 +244,13 @@ IVTree::insert(int _key, const char* _str, unsigned _len) //_key->clear(); //_value->clear(); } + this->TSM->request(request); return !ifexist; //QUERY(which case:return false) } bool -IVTree::modify(int _key, const char* _str, unsigned _len) +IVTree::modify(int _key, char* _str, unsigned _len) { if (_key < 0) { @@ -268,11 +269,14 @@ IVTree::modify(int _key, const char* _str, unsigned _len) return false; } //cout<<"IVTree::modify() - key is found, now to remove"<getValue(store)->getLen(); ret->setValue(this->value_list, store, _str, _len, true); //ret->setValue(val, store, true); //cout<<"value reset"<getLen()<<" oldlen: "<getLen() - len); this->request = _len; //this->request = val->getLen(); @@ -386,6 +390,7 @@ IVTree::remove(int _key) this->TSM->updateHeap(p, p->getRank(), true); p = q; } + bool flag = false; //j = p->getNum(); //LeafNode(maybe root) //for(i = 0; i < j; ++i) @@ -414,7 +419,7 @@ IVTree::remove(int _key) { request -= p->getValue(i)->getLen(); p->subKey(i); //to release - p->subValue(i, true); //to release + p->subValue(this->value_list, i, true); //to release p->subNum(); if (p->getNum() == 0) //root leaf 0 key { @@ -461,6 +466,7 @@ IVTree::resetStream() this->stream->setEnd(); } +//TODO: change to using value list, getValue() maybe not get real long list bool //special case: not exist, one-edge-case IVTree::range_query(int _key1, int _key2) { //the range is: *_key1 <= x < *_key2 @@ -555,6 +561,7 @@ IVTree::range_query(int _key1, int _key2) for (i = l; i < r; ++i) { //NOTICE:Bstr* in an array, used as Bstr[] + //DEBUG+TODO: if long list?? clean this->stream->write(p->getValue(i)); } this->TSM->request(request); @@ -563,7 +570,9 @@ IVTree::range_query(int _key1, int _key2) else break; } + this->stream->setEnd(); + return true; } diff --git a/KVstore/IVTree/IVTree.h b/KVstore/IVTree/IVTree.h index 86fbf27..3ae3897 100644 --- a/KVstore/IVTree/IVTree.h +++ b/KVstore/IVTree/IVTree.h @@ -11,11 +11,11 @@ #include "../../Util/Util.h" #include "../../Util/Stream.h" +#include "../../Util/VList.h" #include "node/IVNode.h" #include "node/IVIntlNode.h" #include "node/IVLeafNode.h" #include "storage/IVStorage.h" -#include "./vlist/VList.h" //TODO: for long list, do not read in time, just on need //the memory is kept with the node, updat ewith node @@ -80,8 +80,8 @@ public: //void setRoot(Node* _root); //insert, search, remove, set bool search(int _key, char*& _str, int& _len); - bool insert(int _key, const char* _str, unsigned _len); - bool modify(int _key, const char* _str, unsigned _len); + bool insert(int _key, char* _str, unsigned _len); + bool modify(int _key, char* _str, unsigned _len); IVNode* find(int _key, int* store, bool ifmodify); bool remove(int _key); const Bstr* getRangeValue(); diff --git a/KVstore/IVTree/node/IVLeafNode.cpp b/KVstore/IVTree/node/IVLeafNode.cpp index a35bd6f..bba580b 100644 --- a/KVstore/IVTree/node/IVLeafNode.cpp +++ b/KVstore/IVTree/node/IVLeafNode.cpp @@ -89,6 +89,7 @@ IVLeafNode::getValue(int _index) const return this->values + _index; } +//TODO!!! bool IVLeafNode::getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const { @@ -148,6 +149,26 @@ IVLeafNode::addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool return true; } +bool +IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel) +{ + //TODO: if is to sub long list + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in subValue: Invalid index ") + Util::int2string(_index)); + return false; + } + + int i; + if (ifdel) + values[_index].release(); + for (i = _index; i < num - 1; ++i) + this->values[i] = this->values[i + 1]; + + return true; +} + bool IVLeafNode::addValue(const Bstr* _value, int _index, bool ifcopy) { @@ -169,26 +190,6 @@ IVLeafNode::addValue(const Bstr* _value, int _index, bool ifcopy) return true; } -bool -IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel) -{ - //TODO: if is to sub long list - int num = this->getNum(); - if (_index < 0 || _index >= num) - { - print(string("error in subValue: Invalid index ") + Util::int2string(_index)); - return false; - } - - int i; - if (ifdel) - values[_index].release(); - for (i = _index; i < num - 1; ++i) - this->values[i] = this->values[i + 1]; - - return true; -} - bool IVLeafNode::subValue(int _index, bool ifdel) { diff --git a/KVstore/IVTree/node/IVNode.h b/KVstore/IVTree/node/IVNode.h index 20d6cfe..a7d6b59 100644 --- a/KVstore/IVTree/node/IVNode.h +++ b/KVstore/IVTree/node/IVNode.h @@ -11,7 +11,7 @@ #include "../../../Util/Util.h" #include "../../../Util/Bstr.h" -#include "../vlist/VList.h" +#include "../../../Util/VList.h" class IVNode //abstract basic class { @@ -81,6 +81,7 @@ public: virtual bool subChild(int _index) { return true; }; virtual IVNode* getPrev() const { return NULL; }; virtual IVNode* getNext() const { return NULL; }; + virtual const Bstr* getValue(int _index) const { return NULL; }; virtual bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const { return NULL; }; virtual bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false) { return true; }; @@ -92,6 +93,8 @@ public: virtual void setPrev(IVNode* _prev) {}; virtual void setNext(IVNode* _next) {}; + + //pure virtual functions virtual void Virtual() = 0; virtual void Normal() = 0; virtual unsigned getSize() const = 0; //return all memory owned diff --git a/KVstore/IVTree/storage/IVStorage.h b/KVstore/IVTree/storage/IVStorage.h index 88525e2..37e13ae 100644 --- a/KVstore/IVTree/storage/IVStorage.h +++ b/KVstore/IVTree/storage/IVStorage.h @@ -9,10 +9,10 @@ #ifndef _KVSTORE_IVTREE_STORAGE_IVSTORAGE_H #define _KVSTORE_IVTREE_STORAGE_IVSTORAGE_H +#include "../../../Util/VList.h" #include "../node/IVIntlNode.h" #include "../node/IVLeafNode.h" #include "../heap/IVHeap.h" -#include "../vlist/VList.h" //It controls read, write, swap class IVStorage diff --git a/KVstore/KVstore.cpp b/KVstore/KVstore.cpp index bd9cd93..9b57917 100644 --- a/KVstore/KVstore.cpp +++ b/KVstore/KVstore.cpp @@ -1422,6 +1422,14 @@ KVstore::getpreIDlistBysubID(int _subid, int*& _preidlist, int& _list_len, bool _preidlist[i] = _tmp[2 * i + 3]; } + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1452,6 +1460,14 @@ KVstore::getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len, bool _list_len = Util::removeDuplicate(_objidlist, _list_len); } + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1493,6 +1509,14 @@ KVstore::getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist, int& _objidlist = new int[_list_len]; memcpy(_objidlist, _tmp + _offset, sizeof(int) * _list_len); + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1533,6 +1557,14 @@ KVstore::getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _list } } + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1664,6 +1696,14 @@ KVstore::getpreIDlistByobjID(int _objid, int*& _preidlist, int& _list_len, bool _preidlist[i] = _tmp[2 * i + 2]; } + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1688,6 +1728,14 @@ KVstore::getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len, bool _list_len = Util::removeDuplicate(_subidlist, _list_len); } + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1723,6 +1771,14 @@ KVstore::getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, int& _subidlist = new int[_list_len]; memcpy(_subidlist, _tmp + _offset, sizeof(int) * _list_len); + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1756,6 +1812,14 @@ KVstore::getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _list } } + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1867,6 +1931,14 @@ KVstore::getsubIDlistBypreID(int _preid, int*& _subidlist, int& _list_len, bool _list_len = Util::removeDuplicate(_subidlist, _list_len); } + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1891,6 +1963,14 @@ KVstore::getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len, bool _list_len = Util::removeDuplicate(_objidlist, _list_len); } + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1913,6 +1993,14 @@ KVstore::getsubIDobjIDlistBypreID(int _preid, int*& _subid_objidlist, int& _list _subid_objidlist[2 * i + 1] = _tmp[1 + _tmp[0] + i]; } + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + return true; } @@ -1977,6 +2065,14 @@ KVstore::getpreIDlistBysubIDobjID(int _subid, int _objid, int*& _preidlist, int& } } + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + if (_list_len == 0) { _preidlist = NULL; return false; @@ -2091,37 +2187,37 @@ KVstore::flush(IVTree* _p_btree) } bool -KVstore::addValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val) +KVstore::addValueByKey(SITree* _p_btree, char* _key, int _klen, int _val) { return _p_btree->insert(_key, _klen, _val); } bool -KVstore::addValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen) +KVstore::addValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen) { return _p_btree->insert(_key, _val, _vlen); } bool -KVstore::addValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen) +KVstore::addValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen) { return _p_btree->insert(_key, _val, _vlen); } bool -KVstore::setValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val) +KVstore::setValueByKey(SITree* _p_btree, char* _key, int _klen, int _val) { return _p_btree->modify(_key, _klen, _val); } bool -KVstore::setValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen) +KVstore::setValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen) { return _p_btree->modify(_key, _val, _vlen); } bool -KVstore::setValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen) +KVstore::setValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen) { return _p_btree->modify(_key, _val, _vlen); } @@ -2218,6 +2314,7 @@ KVstore::binarySearch(int _key, const int* _list, int _list_len, int _step) } //TODO: better to adjust these parameters according to memory usage and entity num +//need a memory manager first string KVstore::s_entity2id = "s_entity2id"; string KVstore::s_id2entity = "s_id2entity"; unsigned short KVstore::buffer_entity2id_build = 8; diff --git a/KVstore/KVstore.h b/KVstore/KVstore.h index c74940b..5372e63 100644 --- a/KVstore/KVstore.h +++ b/KVstore/KVstore.h @@ -10,8 +10,16 @@ #define _KVSTORE_KVSTORE_H #include "../Util/Util.h" +#include "../Util/VList.h" #include "Tree.h" +//TODO: is it needed to keep a length in Bstr?? especially for IVTree? +//add a length: sizeof bstr from 8 to 16(4 -> 8 for alignment) +//add a \0 in tail: only add 1 char +//QUERY: but to count the length each time maybe very costly? +//No, because triple num is stored in char* now!!!! we do not need to save it again +//TODO: entity_border in s2values list is not needed!!! not waste memory here + class KVstore { public: @@ -187,13 +195,13 @@ private: void flush(ISTree* _p_btree); void flush(IVTree* _p_btree); - bool addValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val); - bool addValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen); - bool addValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen); + bool addValueByKey(SITree* _p_btree, char* _key, int _klen, int _val); + bool addValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen); + bool addValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen); - bool setValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val); - bool setValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen); - bool setValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen); + bool setValueByKey(SITree* _p_btree, char* _key, int _klen, int _val); + bool setValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen); + bool setValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen); bool getValueByKey(SITree* _p_btree, const char* _key, int _klen, int* _val) const; bool getValueByKey(ISTree* _p_btree, int _key, char*& _val, int& _vlen) const; diff --git a/KVstore/SITree/SITree.cpp b/KVstore/SITree/SITree.cpp index 1dfef4d..3502611 100644 --- a/KVstore/SITree/SITree.cpp +++ b/KVstore/SITree/SITree.cpp @@ -134,7 +134,7 @@ SITree::search(const char* _str, unsigned _len, int* _val) } bool -SITree::insert(const char* _str, unsigned _len, int _val) +SITree::insert(char* _str, unsigned _len, int _val) { if (_str == NULL || _len == 0) { diff --git a/KVstore/SITree/SITree.h b/KVstore/SITree/SITree.h index 52c27ee..6041e65 100644 --- a/KVstore/SITree/SITree.h +++ b/KVstore/SITree/SITree.h @@ -65,7 +65,7 @@ public: SINode* getRoot() const; //insert, search, remove, set bool search(const char* _str, unsigned _len, int* _val); - bool insert(const char* _str, unsigned _len, int _val); + bool insert(char* _str, unsigned _len, int _val); bool modify(const char* _str, unsigned _len, int _val); SINode* find(const char* _key, unsigned _len, int* store, bool ifmodify); bool remove(const char* _str, unsigned _len); diff --git a/KVstore/SITree/node/SILeafNode.cpp b/KVstore/SITree/node/SILeafNode.cpp index 1e70488..13a2bfe 100644 --- a/KVstore/SITree/node/SILeafNode.cpp +++ b/KVstore/SITree/node/SILeafNode.cpp @@ -362,4 +362,4 @@ SILeafNode::print(string s) } else; #endif -} \ No newline at end of file +} diff --git a/KVstore/SITree/node/SILeafNode.h b/KVstore/SITree/node/SILeafNode.h index 7aa05f4..605ff09 100644 --- a/KVstore/SITree/node/SILeafNode.h +++ b/KVstore/SITree/node/SILeafNode.h @@ -27,18 +27,24 @@ public: void Normal(); SINode* getPrev() const; SINode* getNext() const; + int getValue(int _index) const; bool setValue(int _val, int _index); bool addValue(int _val, int _index); bool subValue(int _index); + void setPrev(SINode* _prev); void setNext(SINode* _next); + unsigned getSize() const; + SINode* split(SINode* _father, int _index); SINode* coalesce(SINode* _father, int _index); + void release(); ~SILeafNode(); void print(std::string s); //DEBUG + /*non-sense virtual function Node* getChild(int _index) const; bool addChild(Node* _child, int _index); @@ -47,4 +53,4 @@ public: }; //BETTER: prev isn't a must, and reverse-range can be achieved using recursive-next -#endif \ No newline at end of file +#endif diff --git a/KVstore/SITree/node/SINode.cpp b/KVstore/SITree/node/SINode.cpp index d97ee47..5c18727 100644 --- a/KVstore/SITree/node/SINode.cpp +++ b/KVstore/SITree/node/SINode.cpp @@ -254,6 +254,27 @@ SINode::addKey(const Bstr* _key, int _index, bool ifcopy) return true; } +bool +SINode::addKey(char* _str, unsigned _len, int _index, bool ifcopy) +{ + int num = this->getNum(); + if (_index < 0 || _index > num) + { + print(string("error in addKey: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + //NOTICE: if num == MAX_KEY_NUM, will visit keys[MAX_KEY_NUM], not legal!!! + //however. tree operations ensure that: when node is full, not add but split first! + for (i = num - 1; i >= _index; --i) + keys[i + 1] = keys[i]; + + keys[_index].setStr(_str); + keys[_index].setLen(_len); + + return true; +} + bool SINode::subKey(int _index, bool ifdel) { @@ -325,4 +346,57 @@ SINode::searchKey_lessEqual(const Bstr& _bstr) const return ret - 1; else return ret; -} \ No newline at end of file +} + +int +SINode::searchKey_less(const char* _str, unsigned _len) const +{ + int num = this->getNum(); + + int low = 0, high = num - 1, mid = -1; + while (low <= high) + { + mid = (low + high) / 2; + //if (this->keys[mid] > _bstr) + if (Util::compare(this->keys[mid].getStr(), this->keys[mid].getLen(), _str, _len) > 0) + { + if (low == mid) + break; + high = mid; + } + else + { + low = mid + 1; + } + } + + return low; +} + +int +SINode::searchKey_equal(const char* _str, unsigned _len) const +{ + int num = this->getNum(); + //for(i = 0; i < num; ++i) + // if(bstr == *(p->getKey(i))) + // { + + int ret = this->searchKey_less(_str, _len); + //if (ret > 0 && this->keys[ret - 1] == _bstr) + if (ret > 0 && Util::compare(this->keys[ret-1].getStr(), this->keys[ret-1].getLen(), _str, _len) == 0) + return ret - 1; + else + return num; +} + +int +SINode::searchKey_lessEqual(const char* _str, unsigned _len) const +{ + int ret = this->searchKey_less(_str, _len); + //if (ret > 0 && this->keys[ret - 1] == _bstr) + if (ret > 0 && Util::compare(this->keys[ret-1].getStr(), this->keys[ret-1].getLen(), _str, _len) == 0) + return ret - 1; + else + return ret; +} + diff --git a/KVstore/SITree/node/SINode.h b/KVstore/SITree/node/SINode.h index 83c1eb9..297e489 100644 --- a/KVstore/SITree/node/SINode.h +++ b/KVstore/SITree/node/SINode.h @@ -64,9 +64,11 @@ public: void setStore(unsigned _store); unsigned getFlag() const; void setFlag(unsigned _flag); + const Bstr* getKey(int _index) const; //need to check the index bool setKey(const Bstr* _key, int _index, bool ifcopy = false); bool addKey(const Bstr* _key, int _index, bool ifcopy = false); + bool addKey(char* _str, unsigned _len, int _index, bool ifcopy = false); bool subKey(int _index, bool ifdel = false); //several binary key search utilities @@ -74,7 +76,12 @@ public: int searchKey_equal(const Bstr& _bstr) const; int searchKey_lessEqual(const Bstr& _bstr) const; + int searchKey_less(const char* _str, unsigned _len) const; + int searchKey_equal(const char* _str, unsigned _len) const; + int searchKey_lessEqual(const char* _str, unsigned _len) const; + //virtual functions: polymorphic + //NOTICE: not pure-virtual, not required to be implemented again, can be used now virtual SINode* getChild(int _index) const { return NULL; }; virtual bool setChild(SINode* _child, int _index) { return true; }; virtual bool addChild(SINode* _child, int _index) { return true; }; @@ -87,6 +94,8 @@ public: virtual bool subValue(int _index) { return true; }; virtual void setPrev(SINode* _prev) {}; virtual void setNext(SINode* _next) {}; + + //NOTICE: pure-virtual, must to be implemented again in the sub-class virtual void Virtual() = 0; virtual void Normal() = 0; virtual unsigned getSize() const = 0; //return all memory owned @@ -111,4 +120,4 @@ public: *to release the whole(pointer is invalid and rebuild problem) */ -#endif \ No newline at end of file +#endif diff --git a/NOTES.md b/NOTES.md index cf80444..deef0fb 100644 --- a/NOTES.md +++ b/NOTES.md @@ -88,7 +88,7 @@ http://blog.csdn.net/infoworld/article/details/8670951 要在单机支持到10亿triple,最坏情况下最多有20亿entity和20亿literal,目前的编号方式是不行的(int扩展为unsigned) 最好在单机100G内存上支持起freebase(2.5B triples)这个规模的数据集,就像jena和virtuoso一样,慢不要紧 -在type分支中,sub2id_pre2id_obj2id函数中,每次double增长可能无法充分利用unsigned空间,只能利用到2560000000,超过后最好直接设置为最大 +type分支中query过程可能还有问题,需要修改Query/里面的类型 去掉tree里面的复制,另外kvstore里面的复制可以考虑通过一个或若干个bstr buffer来实现,避免每次都重新new,但这会影响多线程程序 而且在kvstore中往往需要对原始list做一些额外处理 --- diff --git a/Util/Bstr.h b/Util/Bstr.h index 63d8cd9..fc2cd9f 100644 --- a/Util/Bstr.h +++ b/Util/Bstr.h @@ -18,6 +18,7 @@ class Bstr { private: char* str; //pointers consume 8 byte in 64-bit system + //TODO: the length maybe not needed unsigned length; public: diff --git a/KVstore/IVTree/vlist/VList.cpp b/Util/VList.cpp similarity index 56% rename from KVstore/IVTree/vlist/VList.cpp rename to Util/VList.cpp index 798b553..7b772f3 100644 --- a/KVstore/IVTree/vlist/VList.cpp +++ b/Util/VList.cpp @@ -10,55 +10,64 @@ using namespace std; +bool +VList::isLongList(unsigned _len) +{ + return _len > VList::LENGTH_BORDER; +} + VList::VList() { //not use ../logs/, notice the location of program cur_block_num = SET_BLOCK_NUM; filepath = ""; freelist = NULL; - treefp = NULL; - minheap = NULL; max_buffer_size = Util::MAX_BUFFER_SIZE; - heap_size = max_buffer_size / IVNode::INTL_SIZE; freemem = max_buffer_size; } -VList::VList(string& _filepath, unsigned long long _buffer_size) +VList::VList(string& _filepath, string& _mode, unsigned long long _buffer_size) { cur_block_num = SET_BLOCK_NUM; //initialize this->filepath = _filepath; + if (_mode == string("build")) - treefp = fopen(_filepath.c_str(), "w+b"); + valfp = fopen(_filepath.c_str(), "w+b"); else if (_mode == string("open")) - treefp = fopen(_filepath.c_str(), "r+b"); + valfp = fopen(_filepath.c_str(), "r+b"); else { - print(string("error in IVStorage: Invalid mode ") + _mode); + cout<treeheight = _height; //originally set to 0 + this->max_buffer_size = _buffer_size; - this->heap_size = this->max_buffer_size / IVNode::INTL_SIZE; this->freemem = this->max_buffer_size; this->freelist = new BlockInfo; //null-head + + //TODO: read/write by char is too slow, how about read all and deal , then clear? + // + //BETTER: hwo about assign IDs in a dynamic way? + //limitID freelist + //QUETY: can free id list consume very large memory?? + unsigned i, j, k; //j = (SuperNum-1)*BLOCK_SIZE BlockInfo* bp; if (_mode == "build") { //write basic information i = 0; - fwrite(&i, sizeof(unsigned), 1, this->treefp); //height - fwrite(&i, sizeof(unsigned), 1, this->treefp); //rootnum - fwrite(&cur_block_num, sizeof(unsigned), 1, this->treefp); //current block num - fseek(this->treefp, BLOCK_SIZE, SEEK_SET); + fwrite(&cur_block_num, sizeof(unsigned), 1, this->valfp); //current block num + //NOTICE: use a 1M block for a unsigned?? not ok! + fseek(this->valfp, BLOCK_SIZE, SEEK_SET); bp = this->freelist; j = cur_block_num / 8; for (i = 0; i < j; ++i) { - fputc(0, this->treefp); + fputc(0, this->valfp); for (k = 0; k < 8; ++k) { bp->next = new BlockInfo(i * 8 + k + 1, NULL); @@ -69,17 +78,14 @@ VList::VList(string& _filepath, unsigned long long _buffer_size) else //_mode == "open" { //read basic information - int rootnum; char c; - fread(this->treeheight, sizeof(unsigned), 1, this->treefp); - fread(&rootnum, sizeof(unsigned), 1, this->treefp); - fread(&cur_block_num, sizeof(unsigned), 1, this->treefp); - fseek(this->treefp, BLOCK_SIZE, SEEK_SET); + fread(&cur_block_num, sizeof(unsigned), 1, this->valfp); + fseek(this->valfp, BLOCK_SIZE, SEEK_SET); bp = this->freelist; j = cur_block_num / 8; for (i = 0; i < j; ++i) { - c = fgetc(treefp); + c = fgetc(valfp); for (k = 0; k < 8; ++k) { if ((c & (1 << k)) == 0) @@ -89,14 +95,13 @@ VList::VList(string& _filepath, unsigned long long _buffer_size) } } } - fseek(treefp, Address(rootnum), SEEK_SET); - //treefp is now ahead of root-block } - this->minheap = new IVHeap(this->heap_size); + + //NOTICE: we do not need to alloc the blocks for free block id list, AllocBlock is only for later blocks } long //8-byte in 64-bit machine -IVStorage::Address(unsigned _blocknum) const //BETTER: inline function +VList::Address(unsigned _blocknum) const //BETTER: inline function { if (_blocknum == 0) return 0; @@ -110,13 +115,13 @@ IVStorage::Address(unsigned _blocknum) const //BETTER: inline function } unsigned -IVStorage::Blocknum(long address) const +VList::Blocknum(long address) const { return (address / BLOCK_SIZE) + 1 - this->SuperNum; } unsigned -IVStorage::AllocBlock() +VList::AllocBlock() { BlockInfo* p = this->freelist->next; if (p == NULL) @@ -131,11 +136,12 @@ IVStorage::AllocBlock() unsigned t = p->num; this->freelist->next = p->next; delete p; + return t; } void -IVStorage::FreeBlock(unsigned _blocknum) +VList::FreeBlock(unsigned _blocknum) { //QUERY: head-sub and tail-add will be better? BlockInfo* bp = new BlockInfo(_blocknum, this->freelist->next); this->freelist->next = bp; @@ -145,93 +151,96 @@ IVStorage::FreeBlock(unsigned _blocknum) //a string may acrossseveral blocks void -IVStorage::ReadAlign(unsigned* _next) +VList::ReadAlign(unsigned* _next) { - if (ftell(treefp) % BLOCK_SIZE == 0) + if (ftell(valfp) % BLOCK_SIZE == 0) { - fseek(treefp, Address(*_next), SEEK_SET); - fread(_next, sizeof(unsigned), 1, treefp); + fseek(valfp, Address(*_next), SEEK_SET); + fread(_next, sizeof(unsigned), 1, valfp); } } void -IVStorage::WriteAlign(unsigned* _curnum, bool& _SpecialBlock) +VList::WriteAlign(unsigned* _curnum, bool& _SpecialBlock) { - if (ftell(treefp) % BLOCK_SIZE == 0) + if (ftell(valfp) % BLOCK_SIZE == 0) { unsigned blocknum = this->AllocBlock(); - fseek(treefp, Address(*_curnum), SEEK_SET); + fseek(valfp, Address(*_curnum), SEEK_SET); if (_SpecialBlock) { - fseek(treefp, 4, SEEK_CUR); + fseek(valfp, 4, SEEK_CUR); _SpecialBlock = false; } - fwrite(&blocknum, sizeof(unsigned), 1, treefp); - fseek(treefp, Address(blocknum) + 4, SEEK_SET); + fwrite(&blocknum, sizeof(unsigned), 1, valfp); + fseek(valfp, Address(blocknum) + 4, SEEK_SET); *_curnum = blocknum; } } +//TODO: check , read/write a long list, across several blocks +//not use buffer, read/write on need, update at once, so no need to write back at last + +//TODO: still use Bstr?? how can we get the next pointer?? use NULL to init +//NOTICE: the next is placed at the begin of a block bool -IVStorage::readBstr(Bstr* _bp, unsigned* _next) +VList::readBstr(Bstr* _bp, unsigned* _next) { //long address; unsigned len, i, j; - fread(&len, sizeof(unsigned), 1, this->treefp); + fread(&len, sizeof(unsigned), 1, this->valfp); this->ReadAlign(_next); //this->request(len); char* s = (char*)malloc(len); _bp->setLen(len); for (i = 0; i + 4 < len; i += 4) { - fread(s + i, sizeof(char), 4, treefp); + fread(s + i, sizeof(char), 4, valfp); this->ReadAlign(_next); } while (i < len) { - fread(s + i, sizeof(char), 1, treefp); //BETTER + fread(s + i, sizeof(char), 1, valfp); //BETTER i++; } j = len % 4; if (j > 0) j = 4 - j; - fseek(treefp, j, SEEK_CUR); + fseek(valfp, j, SEEK_CUR); this->ReadAlign(_next); _bp->setStr(s); + return true; } bool -IVStorage::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock) +VList::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock) { unsigned i, j, len = _bp->getLen(); - fwrite(&len, sizeof(unsigned), 1, treefp); + fwrite(&len, sizeof(unsigned), 1, valfp); this->WriteAlign(_curnum, _SpecialBlock); char* s = _bp->getStr(); for (i = 0; i + 4 < len; i += 4) { - fwrite(s + i, sizeof(char), 4, treefp); + fwrite(s + i, sizeof(char), 4, valfp); this->WriteAlign(_curnum, _SpecialBlock); } while (i < len) { - fwrite(s + i, sizeof(char), 1, treefp); + fwrite(s + i, sizeof(char), 1, valfp); i++; } j = len % 4; if (j > 0) j = 4 - j; - fseek(treefp, j, SEEK_CUR); + fseek(valfp, j, SEEK_CUR); this->WriteAlign(_curnum, _SpecialBlock); + return true; } VList::~VList() { - //release heap and freelist... -#ifdef DEBUG_KVSTORE - printf("now to release the kvstore!\n"); -#endif BlockInfo* bp = this->freelist; BlockInfo* next; while (bp != NULL) @@ -240,18 +249,6 @@ VList::~VList() delete bp; bp = next; } -#ifdef DEBUG_KVSTORE - printf("already empty the freelist!\n"); -#endif - delete this->minheap; -#ifdef DEBUG_KVSTORE - printf("already empty the buffer heap!\n"); -#endif - fclose(this->treefp); - //#ifdef DEBUG_KVSTORE - //NOTICE:there is more than one tree - //fclose(Util::debug_kvstore); //NULL is ok! - //Util::debug_kvstore = NULL; - //#endif + fclose(this->valfp); } diff --git a/KVstore/IVTree/vlist/VList.h b/Util/VList.h similarity index 76% rename from KVstore/IVTree/vlist/VList.h rename to Util/VList.h index 61911f1..a328b83 100644 --- a/KVstore/IVTree/vlist/VList.h +++ b/Util/VList.h @@ -6,11 +6,11 @@ # Description: =============================================================================*/ -#ifndef _KVSTORE_IVTREE_STORAGE_VLIST_H -#define _KVSTORE_IVTREE_STORAGE_VLIST_H +#ifndef _UTIL_VLIST_H +#define _UTIL_VLIST_H -#include "../../../Util/Util.h" -#include "../../../Util/Bstr.h" +#include "Util.h" +#include "Bstr.h" //TODO: not keep long list in memory, read each time //but when can you free the long list(kvstore should release it after parsing) @@ -22,15 +22,21 @@ //BETTER: use two kind of blocks in two files, like 1M and 1G (split the block num into two parts) +//STRUCT: a long list real-address is the block ID in file2(only for long value lists, a list across several 1M blocks) +//tree-value Bstr: unsigned=the real address char*=NULL +//in disk: +//file1 is tree file, the long list is represented as: 0 real-address +//NOTICE: long list is not kept in mmeory for cache, it is read/update each time on need! + class VList { public: //NOTICE:the border is 10^6, but the block is larger, 1M static const unsigned LENGTH_BORDER = 1000000; static const unsigned BLOCK_SIZE = 1 << 20; //fixed size of disk-block - static const unsigned MAX_BLOCK_NUM = 1 << 16; //max block-num + static const unsigned MAX_BLOCK_NUM = 1 << 23; //max block-num //below two constants: must can be exactly divided by 8 - static const unsigned SET_BLOCK_NUM = 1 << 8; //initial blocks num + static const unsigned SET_BLOCK_NUM = 1 << 2; //initial blocks num static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1; @@ -59,12 +65,14 @@ private: public: VList(); - VList(std::string& _filepath, unsigned long long _buffer_size);//create a fixed-size file or open an existence + VList(std::string& _filepath, std::string& _mode, unsigned long long _buffer_size);//create a fixed-size file or open an existence bool readBstr(Bstr* _bp, unsigned* _next); bool writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock); bool readValue(unsigned _block_num); bool writeValue(const Bstr* _bp); ~VList(); + + static bool isLongList(unsigned _len); }; #endif From ceff3544aecd1b903825e866d658a17e40ae8123 Mon Sep 17 00:00:00 2001 From: bookug Date: Fri, 31 Mar 2017 00:23:16 +0800 Subject: [PATCH 4/6] refactor: add value list for IVTree; lower the copy cost of string not done, waiting to be debugged by zengli, long list must be cleared after got, no changes besides KVstore --- KVstore/IVTree/IVTree.cpp | 52 ++++++------ KVstore/IVTree/IVTree.h | 16 ++-- KVstore/IVTree/node/IVLeafNode.cpp | 118 ++++++++++++++++++++++----- KVstore/IVTree/node/IVLeafNode.h | 2 + KVstore/IVTree/node/IVNode.h | 1 + KVstore/IVTree/storage/IVStorage.cpp | 10 ++- KVstore/KVstore.cpp | 86 ++++++++++++------- KVstore/KVstore.h | 11 ++- KVstore/Tree.h | 2 +- NOTES.md | 5 +- Util/Bstr.cpp | 9 +- Util/Bstr.h | 3 + Util/VList.cpp | 90 +++++++++++++++----- Util/VList.h | 16 ++-- makefile | 9 +- 15 files changed, 306 insertions(+), 124 deletions(-) diff --git a/KVstore/IVTree/IVTree.cpp b/KVstore/IVTree/IVTree.cpp index eac36a1..e1eb236 100644 --- a/KVstore/IVTree/IVTree.cpp +++ b/KVstore/IVTree/IVTree.cpp @@ -115,13 +115,13 @@ IVTree::prepare(IVNode* _np) } bool -IVTree::search(int _key, char*& _str, int& _len) +IVTree::search(unsigned _key, char*& _str, unsigned& _len) { - if (_key < 0) - { - printf("error in IVTree-search: empty string\n"); - return false; - } + //if (_key < 0) + //{ + //printf("error in IVTree-search: empty string\n"); + //return false; + //} this->request = 0; int store; @@ -142,13 +142,13 @@ IVTree::search(int _key, char*& _str, int& _len) } bool -IVTree::insert(int _key, char* _str, unsigned _len) +IVTree::insert(unsigned _key, char* _str, unsigned _len) { - if (_key < 0) - { - printf("error in IVTree-insert: empty string\n"); - return false; - } + //if (_key < 0) + //{ + //printf("error in IVTree-insert: empty string\n"); + //return false; + //} //this->CopyToTransfer(_str, _len, 2); //const Bstr* val = &(this->transfer[2]); @@ -250,13 +250,13 @@ IVTree::insert(int _key, char* _str, unsigned _len) } bool -IVTree::modify(int _key, char* _str, unsigned _len) +IVTree::modify(unsigned _key, char* _str, unsigned _len) { - if (_key < 0) - { - printf("error in IVTree-modify: empty string\n"); - return false; - } + //if (_key < 0) + //{ + //printf("error in IVTree-modify: empty string\n"); + //return false; + //} //this->CopyToTransfer(_str, _len, 2); //not check value //const Bstr* val = &(this->transfer[2]); @@ -291,7 +291,7 @@ IVTree::modify(int _key, char* _str, unsigned _len) //this function is useful for search and modify, and range-query IVNode* //return the first key's position that >= *_key -IVTree::find(int _key, int* _store, bool ifmodify) +IVTree::find(unsigned _key, int* _store, bool ifmodify) { //to assign value for this->bstr, function shouldn't be const! if (this->root == NULL) return NULL; //IVTree Is Empty @@ -334,13 +334,13 @@ IVTree::find(unsigned _len, const char* _str, int* store) const */ bool -IVTree::remove(int _key) +IVTree::remove(unsigned _key) { - if (_key < 0) - { - printf("error in IVTree-remove: empty string\n"); - return false; - } + //if (_key < 0) + //{ + //printf("error in IVTree-remove: empty string\n"); + //return false; + //} this->request = 0; IVNode* ret; @@ -468,7 +468,7 @@ IVTree::resetStream() //TODO: change to using value list, getValue() maybe not get real long list bool //special case: not exist, one-edge-case -IVTree::range_query(int _key1, int _key2) +IVTree::range_query(unsigned _key1, unsigned _key2) { //the range is: *_key1 <= x < *_key2 //if(_key1 <0 && _key2 <0) //return false; diff --git a/KVstore/IVTree/IVTree.h b/KVstore/IVTree/IVTree.h index 3ae3897..11184fe 100644 --- a/KVstore/IVTree/IVTree.h +++ b/KVstore/IVTree/IVTree.h @@ -29,7 +29,7 @@ class IVTree { protected: - unsigned int height; //0 indicates an empty tree + unsigned height; //0 indicates an empty tree IVNode* root; IVNode* leaves_head; //the head of LeafNode-list IVNode* leaves_tail; //the tail of LeafNode-list @@ -74,19 +74,19 @@ protected: public: IVTree(); //always need to initial transfer IVTree(std::string _storepath, std::string _filename, std::string _mode, unsigned long long _buffer_size); - unsigned int getHeight() const; + unsigned getHeight() const; void setHeight(unsigned _h); IVNode* getRoot() const; //void setRoot(Node* _root); //insert, search, remove, set - bool search(int _key, char*& _str, int& _len); - bool insert(int _key, char* _str, unsigned _len); - bool modify(int _key, char* _str, unsigned _len); - IVNode* find(int _key, int* store, bool ifmodify); - bool remove(int _key); + bool search(unsigned _key, char*& _str, unsigned& _len); + bool insert(unsigned _key, char* _str, unsigned _len); + bool modify(unsigned _key, char* _str, unsigned _len); + IVNode* find(unsigned _key, int* store, bool ifmodify); + bool remove(unsigned _key); const Bstr* getRangeValue(); void resetStream(); - bool range_query(int _key1, int _key2); + bool range_query(unsigned _key1, unsigned _key2); bool save(); ~IVTree(); void print(std::string s); //DEBUG(print the tree) diff --git a/KVstore/IVTree/node/IVLeafNode.cpp b/KVstore/IVTree/node/IVLeafNode.cpp index bba580b..eb8de5c 100644 --- a/KVstore/IVTree/node/IVLeafNode.cpp +++ b/KVstore/IVTree/node/IVLeafNode.cpp @@ -89,25 +89,74 @@ IVLeafNode::getValue(int _index) const return this->values + _index; } -//TODO!!! bool -IVLeafNode::getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const +IVLeafNode::setValue(const Bstr* _value, int _index, bool _ifcopy) { - //TODO: read long list - return true; -} - -bool -IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy) -{ - //TODO: consider the long list, how to cancel and reset int num = this->getNum(); if (_index < 0 || _index >= num) { print(string("error in setValue: Invalid index ") + Util::int2string(_index)); return false; } + this->values[_index].release(); //NOTICE: only used in modify + + if(_ifcopy) + { + this->values[_index].copy(_value); + } + else + { + this->values[_index] = *_value; + } + + return true; +} + +bool +IVLeafNode::getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + //print(string("error in getValue: Invalid index ") + Util::int2string(_index)); + return NULL; + } + + //read long list + if(this->values[_index].isBstrLongList()) + { + unsigned block_num = this->values[_index].getLen(); + _vlist->readValue(block_num, _str, _len); + } + else + { + _str = this->values[_index].getStr(); + _len = this->values[_index].getLen(); + } + + return true; +} + +bool +IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy) +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in setValue: Invalid index ") + Util::int2string(_index)); + return false; + } + + if(this->values[_index].isBstrLongList()) + { + unsigned block_num = this->values[_index].getLen(); + _vlist->removeValue(block_num); + } + else + { + this->values[_index].release(); //NOTICE: only used in modify + } //DEBUG: we do not need to copy here //we just need to ensure that the pointer's memory is not released @@ -119,8 +168,17 @@ IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool //else //{ //this->values[_index] = *_value; + if(VList::isLongList(_len)) + { + unsigned block_num = _vlist->writeValue(_str, _len); + this->values[_index].setStr(NULL); + this->values[_index].setLen(block_num); + } + else + { this->values[_index].setStr(_str); this->values[_index].setLen(_len); + } //} return true; } @@ -128,23 +186,34 @@ IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool bool IVLeafNode::addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy) { - //TODO:if the list is too large int num = this->getNum(); if (_index < 0 || _index > num) { print(string("error in addValue: Invalid index ") + Util::int2string(_index)); return false; } - int i; - for (i = num - 1; i >= _index; --i) + + for (int i = num - 1; i >= _index; --i) this->values[i + 1] = this->values[i]; //if (ifcopy) //this->values[_index].copy(_value); //else //this->values[_index] = *_value; - this->values[_index].setStr(_str); - this->values[_index].setLen(_len); + + if(VList::isLongList(_len)) + { + unsigned block_num = _vlist->writeValue(_str, _len); + this->values[_index].setStr(NULL); + this->values[_index].setLen(block_num); + } + else + { + this->values[_index].setStr(_str); + this->values[_index].setLen(_len); + } + //this->values[_index].setStr(_str); + //this->values[_index].setLen(_len); return true; } @@ -152,7 +221,6 @@ IVLeafNode::addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool bool IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel) { - //TODO: if is to sub long list int num = this->getNum(); if (_index < 0 || _index >= num) { @@ -160,10 +228,20 @@ IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel) return false; } - int i; - if (ifdel) - values[_index].release(); - for (i = _index; i < num - 1; ++i) + if(this->values[_index].isBstrLongList()) + { + unsigned block_num = this->values[_index].getLen(); + _vlist->removeValue(block_num); + } + else + { + if (ifdel) + { + values[_index].release(); + } + } + + for (int i = _index; i < num - 1; ++i) this->values[i] = this->values[i + 1]; return true; diff --git a/KVstore/IVTree/node/IVLeafNode.h b/KVstore/IVTree/node/IVLeafNode.h index 56638bd..6b2439a 100644 --- a/KVstore/IVTree/node/IVLeafNode.h +++ b/KVstore/IVTree/node/IVLeafNode.h @@ -28,6 +28,8 @@ public: IVNode* getPrev() const; IVNode* getNext() const; const Bstr* getValue(int _index) const; + bool setValue(const Bstr* _value, int _index, bool _ifcopy=false); + bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const; bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false); diff --git a/KVstore/IVTree/node/IVNode.h b/KVstore/IVTree/node/IVNode.h index a7d6b59..a942042 100644 --- a/KVstore/IVTree/node/IVNode.h +++ b/KVstore/IVTree/node/IVNode.h @@ -83,6 +83,7 @@ public: virtual IVNode* getNext() const { return NULL; }; virtual const Bstr* getValue(int _index) const { return NULL; }; + virtual bool setValue(const Bstr* _value, int _index, bool _ifcopy=false) { return true; }; virtual bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const { return NULL; }; virtual bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false) { return true; }; diff --git a/KVstore/IVTree/storage/IVStorage.cpp b/KVstore/IVTree/storage/IVStorage.cpp index d203d81..2c5b72a 100644 --- a/KVstore/IVTree/storage/IVStorage.cpp +++ b/KVstore/IVTree/storage/IVStorage.cpp @@ -348,6 +348,11 @@ IVStorage::createNode(IVNode*& _np) //cretae virtual nodes, not in-mem return true; } +//BETTER: Does SpecialBlock really needed? why can't we place next before flag?? +// +//NOTICE: root num begins from 1, if root num is 0, then it is invalid, i.e. the tree is NULL +//(and ftell(root address) will be 0 either) + bool IVStorage::writeNode(IVNode* _np) { @@ -446,7 +451,10 @@ IVStorage::readBstr(Bstr* _bp, unsigned* _next) } //this->request(len); - char* s = (char*)malloc(len); + + //NOTICE: we use new for all, consistent with Bstr and KVstore + //char* s = (char*)malloc(len); + char* s = new char[len]; _bp->setLen(len); for (i = 0; i + 4 < len; i += 4) { diff --git a/KVstore/KVstore.cpp b/KVstore/KVstore.cpp index 9b57917..e9c4e2a 100644 --- a/KVstore/KVstore.cpp +++ b/KVstore/KVstore.cpp @@ -99,8 +99,8 @@ int KVstore::getEntityDegree(int _entity_id) const { int KVstore::getEntityInDegree(int _entity_id) const { //cout << "In getEntityInDegree " << _entity_id << endl; - int* _tmp = NULL; - int _len = 0; + unsigned* _tmp = NULL; + unsigned _len = 0; bool _get = this->getValueByKey(this->objID2values, _entity_id, (char*&)_tmp, _len); if (!_get) { return 0; @@ -110,8 +110,8 @@ int KVstore::getEntityInDegree(int _entity_id) const { int KVstore::getEntityOutDegree(int _entity_id) const { //cout << "In getEntityOutDegree " << _entity_id << endl; - int* _tmp = NULL; - int _len = 0; + unsigned* _tmp = NULL; + unsigned _len = 0; bool _get = this->getValueByKey(this->subID2values, _entity_id, (char*&)_tmp, _len); if (!_get) { return 0; @@ -121,8 +121,8 @@ int KVstore::getEntityOutDegree(int _entity_id) const { int KVstore::getLiteralDegree(int _literal_id) const { //cout << "In getLiteralDegree " << _literal_id << endl; - int* _tmp = NULL; - int _len = 0; + unsigned* _tmp = NULL; + unsigned _len = 0; bool _get = this->getValueByKey(this->objID2values, _literal_id, (char*&)_tmp, _len); if (!_get) { return 0; @@ -132,8 +132,8 @@ int KVstore::getLiteralDegree(int _literal_id) const { int KVstore::getPredicateDegree(int _predicate_id) const { //cout << "In getPredicate Degree " << _predicate_id << endl; - int* _tmp = NULL; - int _len = 0; + unsigned* _tmp = NULL; + unsigned _len = 0; bool _get = this->getValueByKey(this->preID2values, _predicate_id, (char*&)_tmp, _len); if (!_get) { return 0; @@ -143,8 +143,10 @@ int KVstore::getPredicateDegree(int _predicate_id) const { int KVstore::getSubjectPredicateDegree(int _subid, int _preid) const { //cout << "In getSubjectPredicateDegree " << _subid << ' ' << _preid << endl; + + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); if (!_get) { return 0; @@ -166,8 +168,10 @@ int KVstore::getSubjectPredicateDegree(int _subid, int _preid) const { int KVstore::getObjectPredicateDegree(int _objid, int _preid) const { //cout << "In getObjectPredicateDegree " << _objid << _preid << endl; + + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); if (!_get) { return 0; @@ -352,8 +356,9 @@ bool KVstore::updateTupleslist_remove(int _sub_id, int _pre_id, int _obj_id) { } bool KVstore::updateInsert_s2values(int _sub_id, int _pre_id, int _obj_id) { + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len); bool _is_entity = Util::is_entity_ele(_obj_id); @@ -453,8 +458,9 @@ bool KVstore::updateInsert_s2values(int _sub_id, int _pre_id, int _obj_id) { } bool KVstore::updateRemove_s2values(int _sub_id, int _pre_id, int _obj_id) { + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len); bool _is_entity = Util::is_entity_ele(_obj_id); @@ -564,8 +570,9 @@ bool KVstore::updateRemove_s2values(int _subid, const std::vector& _pidoidl } bool KVstore::updateInsert_o2values(int _sub_id, int _pre_id, int _obj_id) { + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->objID2values, _obj_id, (char*&)_tmp, _len); //objID doesn't exist @@ -659,8 +666,9 @@ bool KVstore::updateInsert_o2values(int _sub_id, int _pre_id, int _obj_id) { } bool KVstore::updateRemove_o2values(int _sub_id, int _pre_id, int _obj_id) { + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->objID2values, _obj_id, (char*&)_tmp, _len); if (!_get) { @@ -763,8 +771,9 @@ bool KVstore::updateRemove_o2values(int _objid, const std::vector& _pidsidl } bool KVstore::updateInsert_p2values(int _sub_id, int _pre_id, int _obj_id) { + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->preID2values, _pre_id, (char*&)_tmp, _len); //preid doesn't exist @@ -804,8 +813,9 @@ bool KVstore::updateInsert_p2values(int _sub_id, int _pre_id, int _obj_id) { } bool KVstore::updateRemove_p2values(int _sub_id, int _pre_id, int _obj_id) { + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->preID2values, _pre_id, (char*&)_tmp, _len); if (!_get) { @@ -1405,8 +1415,9 @@ KVstore::getpreIDlistBysubID(int _subid, int*& _preidlist, int& _list_len, bool _list_len = 0; return false; } + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); if (!_get) @@ -1442,8 +1453,9 @@ KVstore::getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len, bool _list_len = 0; return false; } + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); if (!_get) { @@ -1481,8 +1493,9 @@ KVstore::getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist, int& return false; } + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); if (!_get) { _objidlist = NULL; @@ -1531,8 +1544,9 @@ KVstore::getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _list return false; } + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); if (!_get) { _preid_objidlist = NULL; @@ -1682,8 +1696,10 @@ bool KVstore::getpreIDlistByobjID(int _objid, int*& _preidlist, int& _list_len, bool _no_duplicate) const { //cout << "In getpreIDlistByobjID " << _objid << endl; + + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); if (!_get) { _preidlist = NULL; @@ -1711,8 +1727,10 @@ bool KVstore::getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len, bool _no_duplicate) const { //cout << "In getsubIDlistByobjID " << _objid << endl; + + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); if (!_get) { _subidlist = NULL; @@ -1743,8 +1761,9 @@ bool KVstore::getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, int& _list_len, bool _no_duplicate) const { //cout << "In getsubIDlistByobjIDpreID " << _objid << ' ' << _preid << endl; + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); if (!_get) { _subidlist = NULL; @@ -1786,8 +1805,9 @@ bool KVstore::getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _list_len, bool _no_duplicate) const { //cout << "In getpreIDsubIDlistByobjID " << _objid << endl; + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); if (!_get) { _preid_subidlist = NULL; @@ -1915,8 +1935,9 @@ bool KVstore::getsubIDlistBypreID(int _preid, int*& _subidlist, int& _list_len, bool _no_duplicate) const { //cout << "In getsubIDlistBypreID " << _preid << endl; + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->preID2values, _preid, (char*&)_tmp, _len); if (!_get) { _subidlist = NULL; @@ -1946,8 +1967,9 @@ bool KVstore::getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len, bool _no_duplicate) const { //cout << "In getobjIDlistBypreID " << _preid << endl; + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->preID2values, _preid, (char*&)_tmp, _len); if (!_get) { _objidlist = NULL; @@ -1978,8 +2000,9 @@ bool KVstore::getsubIDobjIDlistBypreID(int _preid, int*& _subid_objidlist, int& _list_len, bool _no_duplicate) const { //cout << "In getsubIDobjIDlistBypreID " << _preid << endl; + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; bool _get = this->getValueByKey(this->preID2values, _preid, (char*&)_tmp, _len); if (!_get) { _subid_objidlist = NULL; @@ -2032,8 +2055,9 @@ KVstore::getpreIDlistBysubIDobjID(int _subid, int _objid, int*& _preidlist, int& return false; } + //TODO: use unsigned int* _tmp = NULL; - int _len = 0; + unsigned _len = 0; this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); _list_len = len; int _result = 0; @@ -2199,7 +2223,7 @@ KVstore::addValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen) } bool -KVstore::addValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen) +KVstore::addValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen) { return _p_btree->insert(_key, _val, _vlen); } @@ -2217,7 +2241,7 @@ KVstore::setValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen) } bool -KVstore::setValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen) +KVstore::setValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen) { return _p_btree->modify(_key, _val, _vlen); } @@ -2235,7 +2259,7 @@ KVstore::getValueByKey(ISTree* _p_btree, int _key, char*& _val, int& _vlen) cons } bool -KVstore::getValueByKey(IVTree* _p_btree, int _key, char*& _val, int& _vlen) const +KVstore::getValueByKey(IVTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const { return _p_btree->search(_key, _val, _vlen); } diff --git a/KVstore/KVstore.h b/KVstore/KVstore.h index 5372e63..abea790 100644 --- a/KVstore/KVstore.h +++ b/KVstore/KVstore.h @@ -19,6 +19,11 @@ //QUERY: but to count the length each time maybe very costly? //No, because triple num is stored in char* now!!!! we do not need to save it again //TODO: entity_border in s2values list is not needed!!! not waste memory here +// +//QUERY: but to implement vlist, we need a unsigned flag +//What is more, we need to store the string in disk, how can we store it if without the length? +//unsigned type stored as chars, maybe will have '\0' +//In memory, we do not know when the oidlist ends if without the original length (butthe triple num will answer this!) class KVstore { @@ -197,15 +202,15 @@ private: bool addValueByKey(SITree* _p_btree, char* _key, int _klen, int _val); bool addValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen); - bool addValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen); + bool addValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen); bool setValueByKey(SITree* _p_btree, char* _key, int _klen, int _val); bool setValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen); - bool setValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen); + bool setValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen); bool getValueByKey(SITree* _p_btree, const char* _key, int _klen, int* _val) const; bool getValueByKey(ISTree* _p_btree, int _key, char*& _val, int& _vlen) const; - bool getValueByKey(IVTree* _p_btree, int _key, char*& _val, int& _vlen) const; + bool getValueByKey(IVTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const; int getIDByStr(SITree* _p_btree, const char* _key, int _klen) const; diff --git a/KVstore/Tree.h b/KVstore/Tree.h index b73612b..2574b2f 100644 --- a/KVstore/Tree.h +++ b/KVstore/Tree.h @@ -1,5 +1,5 @@ //headers wrapper for all kinds of BPlusTree -#include "IVTree/IVTree.h" #include "ISTree/ISTree.h" #include "SITree/SITree.h" +#include "IVTree/IVTree.h" diff --git a/NOTES.md b/NOTES.md index deef0fb..d797830 100644 --- a/NOTES.md +++ b/NOTES.md @@ -88,9 +88,8 @@ http://blog.csdn.net/infoworld/article/details/8670951 要在单机支持到10亿triple,最坏情况下最多有20亿entity和20亿literal,目前的编号方式是不行的(int扩展为unsigned) 最好在单机100G内存上支持起freebase(2.5B triples)这个规模的数据集,就像jena和virtuoso一样,慢不要紧 -type分支中query过程可能还有问题,需要修改Query/里面的类型 -去掉tree里面的复制,另外kvstore里面的复制可以考虑通过一个或若干个bstr buffer来实现,避免每次都重新new,但这会影响多线程程序 -而且在kvstore中往往需要对原始list做一些额外处理 +type分支中query过程可能还有问题,需要修改Query/里面的类型,另外stringindex中也要修改,分界线已经是20亿且非法不再是-1 +vstree在build和query时可以用不同大小的缓存,来加速build过程 --- UBSTR: 类型bstr的length问题也需要解决 如果把类型直接改成long long,空间开销一下子就上升了一倍 解决方法:对于ID2string,仍然用char*和unsigned,但对于s2xx p2xx o2xx,应该用unsigned long long*和unsigned来表示,这样最高可支持到40亿triple diff --git a/Util/Bstr.cpp b/Util/Bstr.cpp index 8157700..a3282f2 100644 --- a/Util/Bstr.cpp +++ b/Util/Bstr.cpp @@ -122,7 +122,8 @@ unsigned Bstr::getLen() const { //NOTICE: this is for VList - if(this->str == NULL) + if(this->isBstrLongList()) + //if(this->str == NULL) { return 0; } @@ -214,3 +215,9 @@ Bstr::print(string s) const //#endif } +bool +Bstr::isBstrLongList() const +{ + return this->str == NULL; +} + diff --git a/Util/Bstr.h b/Util/Bstr.h index fc2cd9f..aaaf84a 100644 --- a/Util/Bstr.h +++ b/Util/Bstr.h @@ -48,6 +48,9 @@ public: //int write(FILE* _fp); ~Bstr(); void print(std::string s) const; //DEBUG + + //judge if this Bstr represent a long list value, and waiting to be each time on need + bool isBstrLongList() const; }; #endif // _UTIL_BSTR_H diff --git a/Util/VList.cpp b/Util/VList.cpp index 7b772f3..261d887 100644 --- a/Util/VList.cpp +++ b/Util/VList.cpp @@ -149,6 +149,10 @@ VList::FreeBlock(unsigned _blocknum) //NOTICE: all reads are aligned to 4 bytes(including a string) //a string may acrossseveral blocks +// +//NOTICE: not use buffer, read/write on need, update at once, so no need to write back at last +//NOTICE: the next is placed at the begin of a block + void VList::ReadAlign(unsigned* _next) @@ -161,38 +165,68 @@ VList::ReadAlign(unsigned* _next) } void -VList::WriteAlign(unsigned* _curnum, bool& _SpecialBlock) +VList::WriteAlign(unsigned* _curnum) { if (ftell(valfp) % BLOCK_SIZE == 0) { unsigned blocknum = this->AllocBlock(); fseek(valfp, Address(*_curnum), SEEK_SET); - if (_SpecialBlock) - { - fseek(valfp, 4, SEEK_CUR); - _SpecialBlock = false; - } fwrite(&blocknum, sizeof(unsigned), 1, valfp); fseek(valfp, Address(blocknum) + 4, SEEK_SET); *_curnum = blocknum; } } -//TODO: check , read/write a long list, across several blocks -//not use buffer, read/write on need, update at once, so no need to write back at last +bool +VList::readValue(unsigned _block_num, char*& _str, unsigned& _len) +{ + fseek(valfp, Address(_block_num), SEEK_SET); + unsigned next; + fread(&next, sizeof(unsigned), 1, valfp); + this->readBstr(_str, _len, &next); + + return true; +} + +unsigned +VList::writeValue(const char* _str, unsigned _len) +{ + unsigned blocknum = this->AllocBlock(); + unsigned curnum = blocknum; + this->writeBstr(_str, _len, &curnum); + + return blocknum; +} + +bool +VList::removeValue(unsigned _block_num) +{ + unsigned store = _block_num, next; + fseek(this->valfp, Address(store), SEEK_SET); + fread(&next, sizeof(unsigned), 1, valfp); + + while (store != 0) + { + this->FreeBlock(store); + store = next; + fseek(valfp, Address(store), SEEK_SET); + fread(&next, sizeof(unsigned), 1, valfp); + } + + return true; +} -//TODO: still use Bstr?? how can we get the next pointer?? use NULL to init -//NOTICE: the next is placed at the begin of a block bool -VList::readBstr(Bstr* _bp, unsigned* _next) +VList::readBstr(char*& _str, unsigned& _len, unsigned* _next) { //long address; unsigned len, i, j; fread(&len, sizeof(unsigned), 1, this->valfp); this->ReadAlign(_next); - //this->request(len); + char* s = (char*)malloc(len); - _bp->setLen(len); + _len = len; + for (i = 0; i + 4 < len; i += 4) { fread(s + i, sizeof(char), 4, valfp); @@ -203,38 +237,52 @@ VList::readBstr(Bstr* _bp, unsigned* _next) fread(s + i, sizeof(char), 1, valfp); //BETTER i++; } + j = len % 4; if (j > 0) j = 4 - j; fseek(valfp, j, SEEK_CUR); - this->ReadAlign(_next); - _bp->setStr(s); + //NOTICE+DEBUG: I think no need to align here, later no data to read + //(if need to read, then fseek again to find a new value) + //this->ReadAlign(_next); + + _str = s; return true; } bool -VList::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock) +VList::writeBstr(const char* _str, unsigned _len, unsigned* _curnum) { - unsigned i, j, len = _bp->getLen(); + unsigned i, j, len = _len; fwrite(&len, sizeof(unsigned), 1, valfp); - this->WriteAlign(_curnum, _SpecialBlock); - char* s = _bp->getStr(); + this->WriteAlign(_curnum); + + //BETTER: compute this need how many blocks first, then write a block a time + + const char* s = _str; for (i = 0; i + 4 < len; i += 4) { fwrite(s + i, sizeof(char), 4, valfp); - this->WriteAlign(_curnum, _SpecialBlock); + this->WriteAlign(_curnum); } while (i < len) { fwrite(s + i, sizeof(char), 1, valfp); i++; } + j = len % 4; if (j > 0) j = 4 - j; fseek(valfp, j, SEEK_CUR); - this->WriteAlign(_curnum, _SpecialBlock); + + //NOTICE+DEBUG: I think no need to align here, later no data to write + //(if need to write, then fseek again to write a new value) + //this->WriteAlign(_curnum); + fseek(valfp, Address(*_curnum), SEEK_SET); + unsigned t = 0; + fwrite(&t, sizeof(unsigned), 1, valfp); return true; } diff --git a/Util/VList.h b/Util/VList.h index a328b83..2719bf5 100644 --- a/Util/VList.h +++ b/Util/VList.h @@ -12,7 +12,10 @@ #include "Util.h" #include "Bstr.h" -//TODO: not keep long list in memory, read each time +//TODO: all use new/delete for Bstr, KVstore and trees, including Stream +//then give a full test, including valgrind + +//NOTICE: not keep long list in memory, read each time //but when can you free the long list(kvstore should release it after parsing) // //CONSIDER: if to keep long list in memory, should adjust the bstr in memory: @@ -61,15 +64,16 @@ private: unsigned AllocBlock(); void FreeBlock(unsigned _blocknum); void ReadAlign(unsigned* _next); - void WriteAlign(unsigned* _next, bool& _SpecialBlock); + void WriteAlign(unsigned* _next); + bool readBstr(char*& _bp, unsigned& _len, unsigned* _next); + bool writeBstr(const char* _str, unsigned _len, unsigned* _curnum); public: VList(); VList(std::string& _filepath, std::string& _mode, unsigned long long _buffer_size);//create a fixed-size file or open an existence - bool readBstr(Bstr* _bp, unsigned* _next); - bool writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock); - bool readValue(unsigned _block_num); - bool writeValue(const Bstr* _bp); + bool readValue(unsigned _block_num, char*& _str, unsigned& _len); + unsigned writeValue(const char* _str, unsigned _len); + bool removeValue(unsigned _block_num); ~VList(); static bool isLongList(unsigned _len); diff --git a/makefile b/makefile index da62f17..7fb6524 100644 --- a/makefile +++ b/makefile @@ -72,9 +72,9 @@ sitreeobj = $(objdir)SITree.o $(objdir)SIStorage.o $(objdir)SINode.o $(objdir)SI istreeobj = $(objdir)ISTree.o $(objdir)ISStorage.o $(objdir)ISNode.o $(objdir)ISIntlNode.o $(objdir)ISLeafNode.o $(objdir)ISHeap.o ivtreeobj = $(objdir)IVTree.o $(objdir)IVStorage.o $(objdir)IVNode.o $(objdir)IVIntlNode.o $(objdir)IVLeafNode.o $(objdir)IVHeap.o -kvstoreobj = $(objdir)KVstore.o $(sitreeobj) $(istreeobj) #$(sstreeobj) +kvstoreobj = $(objdir)KVstore.o $(sitreeobj) $(istreeobj) $(ivtreeobj) #$(sstreeobj) -utilobj = $(objdir)Util.o $(objdir)Bstr.o $(objdir)Stream.o $(objdir)Triple.o $(objdir)BloomFilter.o +utilobj = $(objdir)Util.o $(objdir)Bstr.o $(objdir)Stream.o $(objdir)Triple.o $(objdir)BloomFilter.o $(objdir)VList.o queryobj = $(objdir)SPARQLquery.o $(objdir)BasicQuery.o $(objdir)ResultSet.o $(objdir)IDList.o \ $(objdir)Varset.o $(objdir)QueryTree.o $(objdir)ResultFilter.o $(objdir)GeneralEvaluation.o @@ -219,7 +219,7 @@ $(objdir)ISHeap.o: KVstore/ISTree/heap/ISHeap.cpp KVstore/ISTree/heap/ISHeap.h $ #objects in istree/ end #objects in ivtree/ begin -$(objdir)IVTree.o: KVstore/IVTree/IVTree.cpp KVstore/IVTree/IVTree.h $(objdir)Stream.o +$(objdir)IVTree.o: KVstore/IVTree/IVTree.cpp KVstore/IVTree/IVTree.h $(objdir)Stream.o $(objdir)VList.o $(CC) $(CFLAGS) KVstore/IVTree/IVTree.cpp -o $(objdir)IVTree.o $(objdir)IVStorage.o: KVstore/IVTree/storage/IVStorage.cpp KVstore/IVTree/storage/IVStorage.h $(objdir)Util.o @@ -323,6 +323,9 @@ $(objdir)Triple.o: Util/Triple.cpp Util/Triple.h $(objdir)Util.o $(objdir)BloomFilter.o: Util/BloomFilter.cpp Util/BloomFilter.h $(objdir)Util.o $(CC) $(CFLAGS) Util/BloomFilter.cpp -o $(objdir)BloomFilter.o +$(objdir)VList.o: Util/VList.cpp Util/VList.h + $(CC) $(CFLAGS) Util/VList.cpp -o $(objdir)VList.o + #objects in util/ end From 80080d1bca86231f5c04fbebe08cc0385e4f992b Mon Sep 17 00:00:00 2001 From: bookug Date: Sat, 1 Apr 2017 16:03:05 +0800 Subject: [PATCH 5/6] refactor: add VList for IVTree also, lower the copy cost in KVstore by zengli, all changes closed in KVstore, using new/delete for all instead of malloc/free --- Database/Database.cpp | 1 + KVstore/ISTree/storage/ISStorage.cpp | 3 +- KVstore/IVTree/IVTree.cpp | 22 ++- KVstore/IVTree/node/IVLeafNode.cpp | 16 +++ KVstore/IVTree/storage/IVStorage.cpp | 6 + KVstore/KVstore.cpp | 202 ++++++++++++++++++++------- KVstore/SITree/storage/SIStorage.cpp | 3 +- NOTES.md | 3 + Util/Bstr.cpp | 28 ++-- Util/Stream.cpp | 22 ++- Util/Util.cpp | 9 +- Util/Util.h | 7 + Util/VList.cpp | 50 ++++++- Util/VList.h | 11 +- data/bbug0.sql | 5 + data/bbug0d.sql | 1 + data/bbug1.sql | 1 + data/bbug2.sql | 1 + data/bbug3.sql | 1 + data/bbug4.sql | 5 + data/bbug5.sql | 1 + data/bbug6.sql | 1 + 22 files changed, 317 insertions(+), 82 deletions(-) create mode 100644 data/bbug0.sql create mode 100644 data/bbug0d.sql create mode 100644 data/bbug1.sql create mode 100644 data/bbug2.sql create mode 100644 data/bbug3.sql create mode 100644 data/bbug4.sql create mode 100644 data/bbug5.sql create mode 100644 data/bbug6.sql diff --git a/Database/Database.cpp b/Database/Database.cpp index d088fa9..c2a985c 100644 --- a/Database/Database.cpp +++ b/Database/Database.cpp @@ -800,6 +800,7 @@ Database::build(const string& _rdf_file) //sync(); //cout << "sync vstree" << endl; + //TODO: use fopen w+ to remove signature.binary file //string cmd = "rm -rf " + _entry_file; //system(cmd.c_str()); //cout << "signature file removed" << endl; diff --git a/KVstore/ISTree/storage/ISStorage.cpp b/KVstore/ISTree/storage/ISStorage.cpp index 4f6e223..c205d1e 100644 --- a/KVstore/ISTree/storage/ISStorage.cpp +++ b/KVstore/ISTree/storage/ISStorage.cpp @@ -419,7 +419,8 @@ ISStorage::readBstr(Bstr* _bp, unsigned* _next) fread(&len, sizeof(unsigned), 1, this->treefp); this->ReadAlign(_next); //this->request(len); - char* s = (char*)malloc(len); + //char* s = (char*)malloc(len); + char* s = new char[len]; _bp->setLen(len); for (i = 0; i + 4 < len; i += 4) { diff --git a/KVstore/IVTree/IVTree.cpp b/KVstore/IVTree/IVTree.cpp index e1eb236..97f3c1c 100644 --- a/KVstore/IVTree/IVTree.cpp +++ b/KVstore/IVTree/IVTree.cpp @@ -237,7 +237,11 @@ IVTree::insert(unsigned _key, char* _str, unsigned _len) p->addKey(_key, i); p->addValue(this->value_list, i, _str, _len, true); p->addNum(); - request += _len; + //NOTICE: is this is a vlist, then it will be freed, and should not be included in the request memory + if(!VList::isLongList(_len)) + { + request += _len; + } //request += val->getLen(); p->setDirty(); this->TSM->updateHeap(p, p->getRank(), true); @@ -272,13 +276,20 @@ IVTree::modify(unsigned _key, char* _str, unsigned _len) //NOTICE+DEBUG: if this value is a long list, then it is not saved in memory, here should return 0 in Bstr unsigned len = ret->getValue(store)->getLen(); + if(ret->getValue(store)->isBstrLongList()) + { + len = 0; + } ret->setValue(this->value_list, store, _str, _len, true); //ret->setValue(val, store, true); //cout<<"value reset"<getLen()<<" oldlen: "<getLen() - len); - this->request = _len; + if(!VList::isLongList(_len)) + { + this->request += _len; + } //this->request = val->getLen(); this->request -= len; ret->setDirty(); @@ -417,7 +428,10 @@ IVTree::remove(unsigned _key) //WARN+NOTICE:here must check, because the key to remove maybe not exist if (i != (int)p->getNum()) { - request -= p->getValue(i)->getLen(); + if(!p->getValue(i)->isBstrLongList()) + { + request -= p->getValue(i)->getLen(); + } p->subKey(i); //to release p->subValue(this->value_list, i, true); //to release p->subNum(); @@ -605,6 +619,8 @@ IVTree::release(IVNode* _np) const IVTree::~IVTree() { + delete this->value_list; + delete this->stream; //maybe NULL delete TSM; #ifdef DEBUG_KVSTORE diff --git a/KVstore/IVTree/node/IVLeafNode.cpp b/KVstore/IVTree/node/IVLeafNode.cpp index eb8de5c..f2ac757 100644 --- a/KVstore/IVTree/node/IVLeafNode.cpp +++ b/KVstore/IVTree/node/IVLeafNode.cpp @@ -126,6 +126,9 @@ IVLeafNode::getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) con //read long list if(this->values[_index].isBstrLongList()) { +#ifdef DEBUG_VLIST + cout<<"this is a vlist in get()"<values[_index].getLen(); _vlist->readValue(block_num, _str, _len); } @@ -150,6 +153,9 @@ IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool if(this->values[_index].isBstrLongList()) { +#ifdef DEBUG_VLIST + cout<<"this is a vlist in set()"<values[_index].getLen(); _vlist->removeValue(block_num); } @@ -173,6 +179,8 @@ IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool unsigned block_num = _vlist->writeValue(_str, _len); this->values[_index].setStr(NULL); this->values[_index].setLen(block_num); + //NOTICE: we need to free the long list value + delete[] _str; } else { @@ -203,9 +211,17 @@ IVLeafNode::addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool if(VList::isLongList(_len)) { +#ifdef DEBUG_VLIST + cout<<"this is a vlist in add()"<writeValue(_str, _len); this->values[_index].setStr(NULL); this->values[_index].setLen(block_num); + //NOTICE: we need to free the long list value + delete[] _str; +#ifdef DEBUG_VLIST + //cout<<"to check vlist: "<values[_index].getLen()<treefp); +#ifdef DEBUG_VLIST + cout<<"read a vlist in IVStorage - addr: "<setLen(addr); _bp->setStr(NULL); this->ReadAlign(_next); @@ -489,6 +492,9 @@ IVStorage::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock) this->WriteAlign(_curnum, _SpecialBlock); //then this is the real block num fwrite(&len, sizeof(unsigned), 1, treefp); +#ifdef DEBUG_VLIST + cout<<"to write a vlist in IVStorage::writeBstr() - blocknum: "<WriteAlign(_curnum, _SpecialBlock); return true; } diff --git a/KVstore/KVstore.cpp b/KVstore/KVstore.cpp index e9c4e2a..9fb9f67 100644 --- a/KVstore/KVstore.cpp +++ b/KVstore/KVstore.cpp @@ -12,7 +12,8 @@ using namespace std; //sets store_path as the root dir of this KVstore //initial all Tree pointers as NULL -KVstore::KVstore(string _store_path) { +KVstore::KVstore(string _store_path) +{ this->store_path = _store_path; this->entity2id = NULL; @@ -30,14 +31,17 @@ KVstore::KVstore(string _store_path) { } //Release all the memory used in this KVstore before destruction -KVstore::~KVstore() { +KVstore::~KVstore() +{ this->flush(); this->release(); } //Flush all modified parts into the disk, which will not release any memory //Does nothing to null tree pointers or parts that has not been modified -void KVstore::flush() { +void +KVstore::flush() +{ this->flush(this->entity2id); this->flush(this->id2entity); @@ -52,7 +56,9 @@ void KVstore::flush() { this->flush(this->objID2values); } -void KVstore::release() { +void +KVstore::release() +{ delete this->entity2id; this->entity2id = NULL; delete this->id2entity; @@ -76,7 +82,9 @@ void KVstore::release() { this->objID2values = NULL; } -void KVstore::open() { +void +KVstore::open() +{ cout << "open KVstore" << endl; this->open_entity2id(KVstore::READ_WRITE_MODE); @@ -93,102 +101,192 @@ void KVstore::open() { this->open_preID2values(KVstore::READ_WRITE_MODE); } -int KVstore::getEntityDegree(int _entity_id) const { +int +KVstore::getEntityDegree(int _entity_id) const +{ return this->getEntityInDegree(_entity_id) + this->getEntityOutDegree(_entity_id); } -int KVstore::getEntityInDegree(int _entity_id) const { +int +KVstore::getEntityInDegree(int _entity_id) const +{ //cout << "In getEntityInDegree " << _entity_id << endl; unsigned* _tmp = NULL; unsigned _len = 0; bool _get = this->getValueByKey(this->objID2values, _entity_id, (char*&)_tmp, _len); - if (!_get) { - return 0; + + int ret = 0; + if (_get) + { + ret = _tmp[0]; } - return _tmp[0]; + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + + return ret; } -int KVstore::getEntityOutDegree(int _entity_id) const { +int +KVstore::getEntityOutDegree(int _entity_id) const +{ //cout << "In getEntityOutDegree " << _entity_id << endl; unsigned* _tmp = NULL; unsigned _len = 0; bool _get = this->getValueByKey(this->subID2values, _entity_id, (char*&)_tmp, _len); - if (!_get) { - return 0; + + int ret = 0; + if (_get) + { + ret = _tmp[0]; } - return _tmp[0]; + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + + return ret; } -int KVstore::getLiteralDegree(int _literal_id) const { +int +KVstore::getLiteralDegree(int _literal_id) const +{ //cout << "In getLiteralDegree " << _literal_id << endl; unsigned* _tmp = NULL; unsigned _len = 0; bool _get = this->getValueByKey(this->objID2values, _literal_id, (char*&)_tmp, _len); - if (!_get) { - return 0; + + int ret = 0; + if (_get) + { + ret = _tmp[0]; } - return _tmp[0]; + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + + return ret; } -int KVstore::getPredicateDegree(int _predicate_id) const { +int +KVstore::getPredicateDegree(int _predicate_id) const +{ //cout << "In getPredicate Degree " << _predicate_id << endl; unsigned* _tmp = NULL; unsigned _len = 0; bool _get = this->getValueByKey(this->preID2values, _predicate_id, (char*&)_tmp, _len); - if (!_get) { - return 0; + + int ret = 0; + if (_get) + { + ret = _tmp[0]; } - return _tmp[0]; + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + + return ret; } -int KVstore::getSubjectPredicateDegree(int _subid, int _preid) const { +int +KVstore::getSubjectPredicateDegree(int _subid, int _preid) const +{ //cout << "In getSubjectPredicateDegree " << _subid << ' ' << _preid << endl; //TODO: use unsigned int* _tmp = NULL; unsigned _len = 0; bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); - if (!_get) { - return 0; + + int ret = 0; + if(_get) + { + int _result = KVstore::binarySearch(_preid, _tmp + 3, _tmp[1], 2); + if (_result != -1) + { + int _offset = _tmp[4 + 2 * _result]; + int _offset_next; + if (_result == _tmp[1] - 1) + { + _offset_next = 3 + 2 * _tmp[1] + _tmp[0]; + } + else + { + _offset_next = _tmp[6 + 2 * _result]; + } + ret = _offset_next - _offset; + } } - int _result = KVstore::binarySearch(_preid, _tmp + 3, _tmp[1], 2); - if (_result == -1) { - return 0; + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; } - int _offset = _tmp[4 + 2 * _result]; - int _offset_next; - if (_result == _tmp[1] - 1) { - _offset_next = 3 + 2 * _tmp[1] + _tmp[0]; - } - else { - _offset_next = _tmp[6 + 2 * _result]; - } - return _offset_next - _offset; + + return ret; } -int KVstore::getObjectPredicateDegree(int _objid, int _preid) const { +int +KVstore::getObjectPredicateDegree(int _objid, int _preid) const +{ //cout << "In getObjectPredicateDegree " << _objid << _preid << endl; //TODO: use unsigned int* _tmp = NULL; unsigned _len = 0; bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); - if (!_get) { - return 0; + + int ret = 0; + if (_get) + { + int _result = KVstore::binarySearch(_preid, _tmp + 2, _tmp[1], 2); + if (_result != -1) + { + int _offset = _tmp[3 + 2 * _result]; + int _offset_next; + if (_result == _tmp[1] - 1) + { + _offset_next = 2 + 2 * _tmp[1] + _tmp[0]; + } + else + { + _offset_next = _tmp[5 + 2 * _result]; + } + ret = _offset_next - _offset; + } } - int _result = KVstore::binarySearch(_preid, _tmp + 2, _tmp[1], 2); - if (_result == -1) { - return 0; + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; } - int _offset = _tmp[3 + 2 * _result]; - int _offset_next; - if (_result == _tmp[1] - 1) { - _offset_next = 2 + 2 * _tmp[1] + _tmp[0]; - } - else { - _offset_next = _tmp[5 + 2 * _result]; - } - return _offset_next - _offset; + + return ret; } bool KVstore::updateTupleslist_insert(int _sub_id, int _pre_id, int _obj_id) { diff --git a/KVstore/SITree/storage/SIStorage.cpp b/KVstore/SITree/storage/SIStorage.cpp index 39022c0..2c610ed 100644 --- a/KVstore/SITree/storage/SIStorage.cpp +++ b/KVstore/SITree/storage/SIStorage.cpp @@ -419,7 +419,8 @@ SIStorage::readBstr(Bstr* _bp, unsigned* _next) fread(&len, sizeof(unsigned), 1, this->treefp); this->ReadAlign(_next); //this->request(len); - char* s = (char*)malloc(len); + //char* s = (char*)malloc(len); + char* s = new char[len]; _bp->setLen(len); for (i = 0; i + 4 < len; i += 4) { diff --git a/NOTES.md b/NOTES.md index d797830..1c57430 100644 --- a/NOTES.md +++ b/NOTES.md @@ -89,6 +89,7 @@ http://blog.csdn.net/infoworld/article/details/8670951 最好在单机100G内存上支持起freebase(2.5B triples)这个规模的数据集,就像jena和virtuoso一样,慢不要紧 type分支中query过程可能还有问题,需要修改Query/里面的类型,另外stringindex中也要修改,分界线已经是20亿且非法不再是-1 +remove signature.binary, 合并两个分支type value vstree在build和query时可以用不同大小的缓存,来加速build过程 --- UBSTR: 类型bstr的length问题也需要解决 如果把类型直接改成long long,空间开销一下子就上升了一倍 @@ -469,6 +470,8 @@ build db error if triple num > 500M # BETTER +#### 添加数据访问层,数据范式和生成数据访问的源码 + #### 在BasicQuery.cpp中的encodeBasicQuery函数中发现有pre_id==-1时就可以直接中止查询,返回空值! #### 将KVstore模块中在堆中寻找Node*的操作改为用treap实现(或多存指针避开搜索?) diff --git a/Util/Bstr.cpp b/Util/Bstr.cpp index a3282f2..c81ab29 100644 --- a/Util/Bstr.cpp +++ b/Util/Bstr.cpp @@ -28,7 +28,9 @@ Bstr::Bstr(const char* _str, unsigned _len, bool _nocopy) //return; //} - this->str = (char*)malloc(_len); + //NOTICE: we decide to use new/delete in global area + //this->str = (char*)malloc(_len); + this->str = new char[_len]; memcpy(this->str, _str, sizeof(char) * _len); //this->str[_len]='\0'; } @@ -121,12 +123,14 @@ Bstr::operator != (const Bstr& _bstr) unsigned Bstr::getLen() const { +//WARN: we should not include too complicate logic here!!!! + //NOTICE: this is for VList - if(this->isBstrLongList()) - //if(this->str == NULL) - { - return 0; - } + //if(this->isBstrLongList()) + ////if(this->str == NULL) + //{ + //return 0; + //} return length; } @@ -158,15 +162,18 @@ Bstr::copy(const Bstr* _bp) this->length = _bp->getLen(); //DEBUG!!! //cerr<<"bstr length: "<length<str = (char*)malloc(this->length); - memcpy(this->str, _bp->getStr(), this->length); + + //this->str = (char*)malloc(this->length); + this->str = new char[this->length]; + memcpy(this->str, _bp->getStr(), sizeof(char) * this->length); } void Bstr::copy(const char* _str, unsigned _len) { this->length = _len; - this->str = (char*)malloc(this->length); + //this->str = (char*)malloc(this->length); + this->str = new char[this->length]; memcpy(this->str, _str, this->length); } @@ -180,7 +187,8 @@ Bstr::clear() void Bstr::release() { - free(this->str); //ok to be null, do nothing + //free(this->str); //ok to be null, do nothing + delete[] this->str; clear(); } diff --git a/Util/Stream.cpp b/Util/Stream.cpp index d5191ba..9394472 100644 --- a/Util/Stream.cpp +++ b/Util/Stream.cpp @@ -58,7 +58,8 @@ Stream::Stream(std::vector& _keys, std::vector& _desc, unsigned _rown this->record_size = new unsigned[this->colnum]; for(unsigned i = 0; i < this->colnum; ++i) { - this->record[i].setStr((char*)malloc(Util::TRANSFER_SIZE)); + char* tmptr = new char[Util::TRANSFER_SIZE]; + this->record[i].setStr(tmptr); this->record_size[i] = Util::TRANSFER_SIZE; } @@ -148,7 +149,8 @@ Stream::copyToRecord(const char* _str, unsigned _len, unsigned _idx) if(length + 1 > this->record_size[_idx]) { this->record[_idx].release(); - this->record[_idx].setStr((char*)malloc((length + 1) * sizeof(char))); + char* tmptr = new char[length+1]; + this->record[_idx].setStr(tmptr); this->record_size[_idx] = length + 1; //one more byte: convenient to add \0 } @@ -187,7 +189,8 @@ Stream::outputCache() { unsigned len; fread(&len, sizeof(unsigned), 1, this->tempfp); - char* p = (char*)malloc(len * sizeof(char)); + //char* p = (char*)malloc(len * sizeof(char)); + char* p = new char[len]; fread(p, sizeof(char), len, this->tempfp); bp[i].setLen(len); bp[i].setStr(p); @@ -320,13 +323,16 @@ Stream::read() //FILE* fp = (FILE*)(this->ans); for(unsigned i = 0; i < this->colnum; ++i) { - //BETTER:alloca and reuse the space in Bstr? + //BETTER:alloc and reuse the space in Bstr? unsigned len; fread(&len, sizeof(unsigned), 1, this->ansDisk); - char* s = (char*)calloc(len + 1, sizeof(char)); + //char* s = (char*)calloc(len + 1, sizeof(char)); + char* s = new char[len+1]; fread(s, sizeof(char), len, this->ansDisk); + s[len] = '\0'; this->copyToRecord(s, len, i); - free(s); + //free(s); + delete[] s; } } this->xpos++; @@ -420,7 +426,9 @@ Stream::mergeSort() #endif break; } - s = (char*)malloc(sizeof(char) * len); + + //s = (char*)malloc(sizeof(char) * len); + s = new char[len]; fread(s, sizeof(char), len, tp); bp[i].setLen(len); bp[i].setStr(s); diff --git a/Util/Util.cpp b/Util/Util.cpp index bbf6077..d3af37f 100644 --- a/Util/Util.cpp +++ b/Util/Util.cpp @@ -651,7 +651,14 @@ Util::result_id_str(vector& _v, int _var_num) bool Util::dir_exist(const string _dir) { - return (opendir(_dir.c_str()) != NULL); + DIR* dirptr = opendir(_dir.c_str()); + if(dirptr != NULL) + { + closedir(dirptr); + return true; + } + + return false; } bool diff --git a/Util/Util.h b/Util/Util.h index 1098da1..1f37fc7 100644 --- a/Util/Util.h +++ b/Util/Util.h @@ -91,6 +91,7 @@ in the sparql query can point to the same node in data graph) //#define DEBUG_VSTREE 1 //in Database //#define DEBUG_LRUCACHE 1 //#define DEBUG_DATABASE 1 //in Database +//#define DEBUG_VLIST 1 // // @@ -124,6 +125,12 @@ in the sparql query can point to the same node in data graph) #endif #endif +#ifdef DEBUG_VLIST +#ifndef DEBUG +#define DEBUG +#endif +#endif + #ifndef DEBUG //#define DEBUG #endif diff --git a/Util/VList.cpp b/Util/VList.cpp index 261d887..6407d86 100644 --- a/Util/VList.cpp +++ b/Util/VList.cpp @@ -180,6 +180,9 @@ VList::WriteAlign(unsigned* _curnum) bool VList::readValue(unsigned _block_num, char*& _str, unsigned& _len) { +#ifdef DEBUG_VLIST + cout<<"to get value of block num: "<<_block_num<AllocBlock(); unsigned curnum = blocknum; + + //NOTICE: here we must skip the next position first + fseek(valfp, Address(curnum) + 4, SEEK_SET); this->writeBstr(_str, _len, &curnum); +#ifdef DEBUG_VLIST + cout<<"to write value - block num: "<valfp); +#ifdef DEBUG_VLIST + cout<<"the length of value: "<ReadAlign(_next); - char* s = (char*)malloc(len); + //char* s = (char*)malloc(len); + char* s = new char[len]; _len = len; for (i = 0; i + 4 < len; i += 4) @@ -257,6 +270,7 @@ VList::writeBstr(const char* _str, unsigned _len, unsigned* _curnum) unsigned i, j, len = _len; fwrite(&len, sizeof(unsigned), 1, valfp); this->WriteAlign(_curnum); + //cout<<"to write bstr, length: "<freelist; + //write the info back + fseek(this->valfp, 0, SEEK_SET); + fwrite(&cur_block_num, sizeof(unsigned), 1, valfp);//write current blocks num + fseek(valfp, BLOCK_SIZE, SEEK_SET); + int i, j = cur_block_num / 8; //(SuperNum-1)*BLOCK_SIZE; + for (i = 0; i < j; ++i) + { + //reset to 1 first + fputc(0xff, valfp); + } + char c; + BlockInfo* bp = this->freelist->next; + while (bp != NULL) + { + //if not-use then set 0, aligned to byte! +#ifdef DEBUG_KVSTORE + if (bp->num > cur_block_num) + { + printf("blocks num exceed, cur_block_num: %u\n", cur_block_num); + exit(1); + } +#endif + j = bp->num - 1; + i = j / 8; + j = 7 - j % 8; + fseek(valfp, BLOCK_SIZE + i, SEEK_SET); + c = fgetc(valfp); + fseek(valfp, -1, SEEK_CUR); + fputc(c & ~(1 << j), valfp); + bp = bp->next; + } + + bp = this->freelist; BlockInfo* next; while (bp != NULL) { diff --git a/Util/VList.h b/Util/VList.h index 2719bf5..45fdcfc 100644 --- a/Util/VList.h +++ b/Util/VList.h @@ -12,9 +12,6 @@ #include "Util.h" #include "Bstr.h" -//TODO: all use new/delete for Bstr, KVstore and trees, including Stream -//then give a full test, including valgrind - //NOTICE: not keep long list in memory, read each time //but when can you free the long list(kvstore should release it after parsing) // @@ -31,15 +28,19 @@ //file1 is tree file, the long list is represented as: 0 real-address //NOTICE: long list is not kept in mmeory for cache, it is read/update each time on need! +//TODO: use fread/fwrite here instead of fgetc/fputc +//including other trees + class VList { public: //NOTICE:the border is 10^6, but the block is larger, 1M - static const unsigned LENGTH_BORDER = 1000000; + //static const unsigned LENGTH_BORDER = 1000000; + static const unsigned LENGTH_BORDER = 1000; static const unsigned BLOCK_SIZE = 1 << 20; //fixed size of disk-block static const unsigned MAX_BLOCK_NUM = 1 << 23; //max block-num //below two constants: must can be exactly divided by 8 - static const unsigned SET_BLOCK_NUM = 1 << 2; //initial blocks num + static const unsigned SET_BLOCK_NUM = 1 << 3; //initial blocks num static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1; diff --git a/data/bbug0.sql b/data/bbug0.sql new file mode 100644 index 0000000..d89b0e3 --- /dev/null +++ b/data/bbug0.sql @@ -0,0 +1,5 @@ +INSERT DATA +{ + . + . +} diff --git a/data/bbug0d.sql b/data/bbug0d.sql new file mode 100644 index 0000000..b7dc5d5 --- /dev/null +++ b/data/bbug0d.sql @@ -0,0 +1 @@ +DELETE DATA { . } diff --git a/data/bbug1.sql b/data/bbug1.sql new file mode 100644 index 0000000..bee32b2 --- /dev/null +++ b/data/bbug1.sql @@ -0,0 +1 @@ +select ?subject ?predict ?object WHERE { ?subject ?object; ?predict ?object . } diff --git a/data/bbug2.sql b/data/bbug2.sql new file mode 100644 index 0000000..0201729 --- /dev/null +++ b/data/bbug2.sql @@ -0,0 +1 @@ +DELETE WHERE { ?predict ?object . } diff --git a/data/bbug3.sql b/data/bbug3.sql new file mode 100644 index 0000000..89665fa --- /dev/null +++ b/data/bbug3.sql @@ -0,0 +1 @@ +select ?predict where { ?predict .} diff --git a/data/bbug4.sql b/data/bbug4.sql new file mode 100644 index 0000000..1138aea --- /dev/null +++ b/data/bbug4.sql @@ -0,0 +1,5 @@ +select ?subject ?predict ?object where +{ + ?object. + ?subject ?predict ?object. +} diff --git a/data/bbug5.sql b/data/bbug5.sql new file mode 100644 index 0000000..03312cd --- /dev/null +++ b/data/bbug5.sql @@ -0,0 +1 @@ +select ?subject ?predict ?object where {?subject ; ?predict ?object . } diff --git a/data/bbug6.sql b/data/bbug6.sql new file mode 100644 index 0000000..192c12c --- /dev/null +++ b/data/bbug6.sql @@ -0,0 +1 @@ +DELETE WHERE { ?subject ?objcet. } From 58501a97fe00055590682f964e3dbefbd0392c03 Mon Sep 17 00:00:00 2001 From: bookug Date: Sat, 1 Apr 2017 16:08:52 +0800 Subject: [PATCH 6/6] refactor: change VList border from 1000 to 1000000 1000 is just for test with lubm or bbug, 1000000 is for real case by zengli, no changes to other modules --- Util/VList.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Util/VList.h b/Util/VList.h index 45fdcfc..2281391 100644 --- a/Util/VList.h +++ b/Util/VList.h @@ -35,8 +35,8 @@ class VList { public: //NOTICE:the border is 10^6, but the block is larger, 1M - //static const unsigned LENGTH_BORDER = 1000000; - static const unsigned LENGTH_BORDER = 1000; + static const unsigned LENGTH_BORDER = 1000000; + //static const unsigned LENGTH_BORDER = 1000; static const unsigned BLOCK_SIZE = 1 << 20; //fixed size of disk-block static const unsigned MAX_BLOCK_NUM = 1 << 23; //max block-num //below two constants: must can be exactly divided by 8