diff --git a/.gitignore b/.gitignore index ceb77fe..70c39fd 100644 --- a/.gitignore +++ b/.gitignore @@ -91,3 +91,6 @@ tags *.out *.bak~ +# modules +node_modules + diff --git a/Database/Database.cpp b/Database/Database.cpp index 70754e9..d088fa9 100644 --- a/Database/Database.cpp +++ b/Database/Database.cpp @@ -1317,7 +1317,6 @@ Database::build_p2xx(int** _p_id_tuples) bool Database::sub2id_pre2id_obj2id_RDFintoSignature(const string _rdf_file, int**& _p_id_tuples, int & _id_tuples_max) { - //TODO:unsigned double not to max?? set to max directly int _id_tuples_size; { //initial @@ -1477,6 +1476,10 @@ Database::sub2id_pre2id_obj2id_RDFintoSignature(const string _rdf_file, int**& _ } } + //NOTICE: we assume that there is no duplicates in the dataset + //if not, this->triple_num will be not right, and _p_id_tuples will save useless triples + //However, we can not use exist_triple to detect duplicates here, because it is too time-costly + // For id_tuples _p_id_tuples[_id_tuples_size] = new int[3]; _p_id_tuples[_id_tuples_size][0] = _sub_id; diff --git a/Database/Database.h b/Database/Database.h index 6d23d72..b992de5 100644 --- a/Database/Database.h +++ b/Database/Database.h @@ -188,7 +188,7 @@ private: int remove(const TripleWithObjType* _triples, int _triple_num); //bool remove(const vector& _triples, vector& _vertices, vector& _predicates); - bool sub2id_pre2id_obj2id_RDFintoSignature(const string _rdf_file, unsigned**& _p_id_tuples, unsigned & _id_tuples_max); + bool sub2id_pre2id_obj2id_RDFintoSignature(const string _rdf_file, int**& _p_id_tuples, int & _id_tuples_max); bool literal2id_RDFintoSignature(const string _rdf_file, int** _p_id_tuples, int _id_tuples_max); bool objIDIsEntityID(int _id); diff --git a/Database/Join.cpp b/Database/Join.cpp index aa0e675..9e9a6a5 100644 --- a/Database/Join.cpp +++ b/Database/Join.cpp @@ -951,6 +951,11 @@ Join::update_answer_list(IDList*& valid_ans_list, IDList& _can_list, int* id_lis } } +//TODO: multiple lists intersect, how about sort and intersect from small to big? +//but this need to generate all first, I think sort by pre2num if better! +// +//TODO: set the entity_literal border in kvstore, and intersect entity part and literal part respectively + //NOTICE: consider two directions according to table1 size and table2 size //1. -> add ID mapping record for the first linking column, whole(offset, size) zengli //2. <- join using inverted index for each column, offset and size for each column, hulin diff --git a/KVstore/ISTree/ISTree.h b/KVstore/ISTree/ISTree.h index 9620bd8..e0499dd 100644 --- a/KVstore/ISTree/ISTree.h +++ b/KVstore/ISTree/ISTree.h @@ -3,7 +3,7 @@ # Author: syzz # Mail: 1181955272@qq.com # Last Modified: 2015-04-26 16:44 -# Description: struct and interface of the B+ tree +# Description: ID2string, including id2entity, id2literal and id2predicate =============================================================================*/ #ifndef _KVSTORE_ISTREE_ISTREE_H @@ -76,4 +76,4 @@ public: //(problem range between two extremes: not-modified, totally-modified) //After saved, it's ok to continue operations on tree! -#endif \ No newline at end of file +#endif diff --git a/KVstore/IVTree/IVTree.cpp b/KVstore/IVTree/IVTree.cpp new file mode 100644 index 0000000..1dee1cf --- /dev/null +++ b/KVstore/IVTree/IVTree.cpp @@ -0,0 +1,677 @@ +/*============================================================================= +# Filename: IVTree.cpp +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:45 +# Description: achieve functions in IVTree.h +=============================================================================*/ + +#include "IVTree.h" + +using namespace std; + +IVTree::IVTree() +{ + height = 0; + mode = ""; + root = NULL; + leaves_head = NULL; + leaves_tail = NULL; + TSM = NULL; + storepath = ""; + filename = ""; + //transfer_size[0] = transfer_size[1] = transfer_size[2] = 0; + //transfer_size = 0; + this->stream = NULL; + this->request = 0; + this->value_list = NULL; +} + +IVTree::IVTree(string _storepath, string _filename, string _mode, unsigned long long _buffer_size) +{ + storepath = _storepath; + filename = _filename; + this->height = 0; + this->mode = string(_mode); + string filepath = this->getFilePath(); + + string vlist_file = filepath + "_vlist"; + this->value_list = new VList(vlist_file, 1<<30); + + TSM = new IVStorage(filepath, this->mode, &this->height, _buffer_size, this->value_list); + if (this->mode == "open") + this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail); + else + this->root = NULL; + + //this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M + //this->transfer.setStr((char*)malloc(Util::TRANSFER_SIZE)); + + this->stream = NULL; + this->request = 0; +} + +string +IVTree::getFilePath() +{ + return storepath + "/" + filename; +} + +//void //WARN: not check _str and _len +//IVTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index) +//{ + //if (_index > 2) + //return; + //[> + //if(_str == NULL || _len == 0) + //{ + //printf("error in CopyToTransfer: empty string\n"); + //return; + //} + //*/ + ////unsigned length = _bstr->getLen(); + //unsigned length = _len; + //if (length + 1 > this->transfer_size[_index]) + //{ + //transfer[_index].release(); + //transfer[_index].setStr((char*)malloc(length + 1)); + //this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0 + //} + //memcpy(this->transfer[_index].getStr(), _str, length); + //this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore + //this->transfer[_index].setLen(length); +//} + +unsigned +IVTree::getHeight() const +{ + return this->height; +} + +void +IVTree::setHeight(unsigned _h) +{ + this->height = _h; +} + +IVNode* +IVTree::getRoot() const +{ + return this->root; +} + +void +IVTree::prepare(IVNode* _np) +{ + //this->request = 0; + bool flag = _np->inMem(); + if (!flag) + { + this->TSM->readNode(_np, &request); //readNode deal with request + } +} + +bool +IVTree::search(int _key, char*& _str, int& _len) +{ + if (_key < 0) + { + printf("error in IVTree-search: empty string\n"); + return false; + } + + this->request = 0; + int store; + IVNode* ret = this->find(_key, &store, false); + if (ret == NULL || store == -1 || _key != ret->getKey(store)) //tree is empty or not found + { + return false; + } + + ret->getValue(this->value_list, store, _str, _len); + //const Bstr* val = ret->getValue(store); + //this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request + //_str = this->transfer[0].getStr(); + //_len = this->transfer[0].getLen(); + + this->TSM->request(request); + return true; +} + +bool +IVTree::insert(int _key, const char* _str, unsigned _len) +{ + if (_key < 0) + { + printf("error in IVTree-insert: empty string\n"); + return false; + } + + //this->CopyToTransfer(_str, _len, 2); + //const Bstr* val = &(this->transfer[2]); + this->request = 0; + IVNode* ret; + if (this->root == NULL) //tree is empty + { + leaves_tail = leaves_head = root = new IVLeafNode; + request += IVNode::LEAF_SIZE; + this->height = 1; + root->setHeight(1); //add to heap later + } + + //this->prepare(this->root); //root must be in-mem + if (root->getNum() == IVNode::MAX_KEY_NUM) + { + IVNode* father = new IVIntlNode; + request += IVNode::INTL_SIZE; + father->addChild(root, 0); + ret = root->split(father, 0); + if (ret->isLeaf() && ret->getNext() == NULL) + this->leaves_tail = ret; + if (ret->isLeaf()) + request += IVNode::LEAF_SIZE; + else + request += IVNode::INTL_SIZE; + this->height++; //height rises only when root splits + //WARN: height area in Node: 4 bit! + father->setHeight(this->height); //add to heap later + this->TSM->updateHeap(ret, ret->getRank(), false); + this->root = father; + } + + IVNode* p = this->root; + IVNode* q; + int i; + while (!p->isLeaf()) + { + //j = p->getNum(); + //for(i = 0; i < j; ++i) + //if(bstr < *(p->getKey(i))) + //break; + //NOTICE: using binary search is better here + i = p->searchKey_less(_key); + + q = p->getChild(i); + this->prepare(q); + if (q->getNum() == IVNode::MAX_KEY_NUM) + { + ret = q->split(p, i); + if (ret->isLeaf() && ret->getNext() == NULL) + this->leaves_tail = ret; + if (ret->isLeaf()) + request += IVNode::LEAF_SIZE; + else + request += IVNode::INTL_SIZE; + //BETTER: in loop may update multiple times + this->TSM->updateHeap(ret, ret->getRank(), false); + this->TSM->updateHeap(q, q->getRank(), true); + this->TSM->updateHeap(p, p->getRank(), true); + if (_key < p->getKey(i)) + p = q; + else + p = ret; + } + else + { + p->setDirty(); + this->TSM->updateHeap(p, p->getRank(), true); + p = q; + } + } + //j = p->getNum(); + //for(i = 0; i < j; ++i) + //if(bstr < *(p->getKey(i))) + //break; + i = p->searchKey_less(_key); + + //insert existing key is ok, but not inserted in + //however, the tree-shape may change due to possible split in former code + bool ifexist = false; + if (i > 0 && _key == p->getKey(i - 1)) + ifexist = true; + else + { + p->addKey(_key, i); + p->addValue(this->value_list, i, _str, _len, true); + p->addNum(); + request += _len; + //request += val->getLen(); + p->setDirty(); + this->TSM->updateHeap(p, p->getRank(), true); + //_key->clear(); + //_value->clear(); + } + this->TSM->request(request); + return !ifexist; //QUERY(which case:return false) +} + +bool +IVTree::modify(int _key, const char* _str, unsigned _len) +{ + if (_key < 0) + { + printf("error in IVTree-modify: empty string\n"); + return false; + } + + //this->CopyToTransfer(_str, _len, 2); //not check value + //const Bstr* val = &(this->transfer[2]); + this->request = 0; + int store; + IVNode* ret = this->find(_key, &store, true); + if (ret == NULL || store == -1 || _key != ret->getKey(store)) //tree is empty or not found + { + cerr << "tree is empty or not found" << endl; + return false; + } + //cout<<"IVTree::modify() - key is found, now to remove"<getValue(store)->getLen(); + ret->setValue(this->value_list, store, _str, _len, true); + //ret->setValue(val, store, true); + //cout<<"value reset"<getLen()<<" oldlen: "<getLen() - len); + this->request = _len; + //this->request = val->getLen(); + this->request -= len; + ret->setDirty(); + //cout<<"to request"<TSM->request(request); + //cout<<"memory requested"<= *_key +IVTree::find(int _key, int* _store, bool ifmodify) +{ //to assign value for this->bstr, function shouldn't be const! + if (this->root == NULL) + return NULL; //IVTree Is Empty + + IVNode* p = root; + int i, j; + while (!p->isLeaf()) + { + if (ifmodify) + p->setDirty(); + //j = p->getNum(); + //for(i = 0; i < j; ++i) //BETTER(Binary-Search) + //if(bstr < *(p->getKey(i))) + //break; + i = p->searchKey_less(_key); + + p = p->getChild(i); + this->prepare(p); + } + + j = p->getNum(); + //for(i = 0; i < j; ++i) + //if(bstr <= *(p->getKey(i))) + //break; + i = p->searchKey_lessEqual(_key); + + if (i == j) + *_store = -1; //Not Found + else + *_store = i; + + return p; +} + +/* +Node* +IVTree::find(unsigned _len, const char* _str, int* store) const +{ +} +*/ + +bool +IVTree::remove(int _key) +{ + if (_key < 0) + { + printf("error in IVTree-remove: empty string\n"); + return false; + } + + this->request = 0; + IVNode* ret; + if (this->root == NULL) //tree is empty + return false; + + IVNode* p = this->root; + IVNode* q; + int i, j; + while (!p->isLeaf()) + { + j = p->getNum(); + //for(i = 0; i < j; ++i) + //if(bstr < *(p->getKey(i))) + //break; + i = p->searchKey_less(_key); + + q = p->getChild(i); + this->prepare(q); + if (q->getNum() < IVNode::MIN_CHILD_NUM) //==MIN_KEY_NUM + { + if (i > 0) + this->prepare(p->getChild(i - 1)); + if (i < j) + this->prepare(p->getChild(i + 1)); + ret = q->coalesce(p, i); + if (ret != NULL) + this->TSM->updateHeap(ret, 0, true);//non-sense node + this->TSM->updateHeap(q, q->getRank(), true); + if (q->isLeaf()) + { + if (q->getPrev() == NULL) + this->leaves_head = q; + if (q->getNext() == NULL) + this->leaves_tail = q; + } + if (p->getNum() == 0) //root shrinks + { + //this->leaves_head = q; + this->root = q; + this->TSM->updateHeap(p, 0, true); //instead of delete p + this->height--; + } + } + else + p->setDirty(); + this->TSM->updateHeap(p, p->getRank(), true); + p = q; + } + bool flag = false; + //j = p->getNum(); //LeafNode(maybe root) + //for(i = 0; i < j; ++i) + // if(bstr == *(p->getKey(i))) + // { + // request -= p->getKey(i)->getLen(); + // request -= p->getValue(i)->getLen(); + // p->subKey(i, true); //to release + // p->subValue(i, true); //to release + // p->subNum(); + // if(p->getNum() == 0) //root leaf 0 key + // { + // this->root = NULL; + // this->leaves_head = NULL; + // this->leaves_tail = NULL; + // this->height = 0; + // this->TSM->updateHeap(p, 0, true); //instead of delete p + // } + // p->setDirty(); + // flag = true; + // break; + // } + i = p->searchKey_equal(_key); + //WARN+NOTICE:here must check, because the key to remove maybe not exist + if (i != (int)p->getNum()) + { + request -= p->getValue(i)->getLen(); + p->subKey(i); //to release + p->subValue(i, true); //to release + p->subNum(); + if (p->getNum() == 0) //root leaf 0 key + { + this->root = NULL; + this->leaves_head = NULL; + this->leaves_tail = NULL; + this->height = 0; + this->TSM->updateHeap(p, 0, true); //instead of delete p + } + p->setDirty(); + flag = true; + } + + this->TSM->request(request); + return flag; //i == j, not found +} + +const Bstr* +IVTree::getRangeValue() +{ + if (this->stream == NULL) + { + fprintf(stderr, "IVTree::getRangeValue(): no results now!\n"); + return NULL; + } + if (this->stream->isEnd()) + { + fprintf(stderr, "IVTree::getRangeValue(): read till end now!\n"); + return NULL; + } + //NOTICE:this is one record, and donot free the memory! + //NOTICE:Bstr[] but only one element, used as Bstr* + return this->stream->read(); +} + +void +IVTree::resetStream() +{ + if (this->stream == NULL) + { + fprintf(stderr, "no results now!\n"); + return; + } + this->stream->setEnd(); +} + +bool //special case: not exist, one-edge-case +IVTree::range_query(int _key1, int _key2) +{ //the range is: *_key1 <= x < *_key2 + //if(_key1 <0 && _key2 <0) + //return false; + //ok to search one-edge, requiring only one be negative + //find and write value + int store1, store2; + IVNode *p1, *p2; + if (_key1 >= 0) + { + request = 0; + p1 = this->find(_key1, &store1, false); + if (p1 == NULL || store1 == -1) + return false; //no element + this->TSM->request(request); + } + else + { + p1 = this->leaves_head; + store1 = 0; + } + if (_key2 >= 0) + { //QUERY: another strategy is to getnext and compare every time to tell end + request = 0; + p2 = this->find(_key2, &store2, false); + if (p2 == NULL) + return false; + else if (store2 == -1) + store2 = p2->getNum(); + else if (store2 == 0) + { + p2 = p2->getPrev(); + if (p2 == NULL) + return false; //no element + store2 = p2->getNum(); + } + this->TSM->request(request); + } + else + { + p2 = this->leaves_tail; + store2 = p2->getNum(); + } + + IVNode* p = p1; + unsigned i, l, r; + //get the num of answers first, not need to prepare the node + unsigned ansNum = 0; + while (true) + { + //request = 0; + //this->prepare(p); + if (p == p1) + l = store1; + else + l = 0; + if (p == p2) + r = store2; + else + r = p->getNum(); + ansNum += (r - l); + //this->TSM->request(request); + if (p != p2) + p = p->getNext(); + else + break; + } + + if (this->stream != NULL) + { + delete this->stream; + this->stream = NULL; + } + vector keys; + vector desc; + this->stream = new Stream(keys, desc, ansNum, 1, false); + + p = p1; + while (1) + { + request = 0; + this->prepare(p); + if (p == p1) + l = store1; + else + l = 0; + if (p == p2) + r = store2; + else + r = p->getNum(); + for (i = l; i < r; ++i) + { + //NOTICE:Bstr* in an array, used as Bstr[] + this->stream->write(p->getValue(i)); + } + this->TSM->request(request); + if (p != p2) + p = p->getNext(); + else + break; + } + this->stream->setEnd(); + return true; +} + +bool +IVTree::save() //save the whole tree to disk +{ +#ifdef DEBUG_KVSTORE + printf("now to save tree!\n"); +#endif + if (TSM->writeTree(this->root)) + return true; + else + return false; +} + +void +IVTree::release(IVNode* _np) const +{ + if (_np == NULL) return; + if (_np->isLeaf()) + { + delete _np; + return; + } + int cnt = _np->getNum(); + for (; cnt >= 0; --cnt) + release(_np->getChild(cnt)); + delete _np; +} + +IVTree::~IVTree() +{ + delete this->stream; //maybe NULL + delete TSM; +#ifdef DEBUG_KVSTORE + printf("already empty the buffer, now to delete all nodes in tree!\n"); +#endif + //recursively delete each Node + release(root); +} + +void +IVTree::print(string s) +{ +#ifdef DEBUG_KVSTORE + fputs(Util::showtime().c_str(), Util::debug_kvstore); + fputs("Class IVTree\n", Util::debug_kvstore); + fputs("Message: ", Util::debug_kvstore); + fputs(s.c_str(), Util::debug_kvstore); + fputs("\n", Util::debug_kvstore); + fprintf(Util::debug_kvstore, "Height: %d\n", this->height); + if (s == "tree" || s == "TREE") + { + if (this->root == NULL) + { + fputs("Null IVTree\n", Util::debug_kvstore); + return; + } + IVNode** ns = new IVNode*[this->height]; + int* ni = new int[this->height]; + IVNode* np; + int i, pos = 0; + ns[pos] = this->root; + ni[pos] = this->root->getNum(); + pos++; + while (pos > 0) + { + np = ns[pos - 1]; + i = ni[pos - 1]; + this->prepare(np); + if (np->isLeaf() || i < 0) //LeafNode or ready IntlNode + { //child-num ranges: 0~num + if (s == "tree") + np->print("node"); + else + np->print("NODE"); //print full node-information + pos--; + continue; + } + else + { + ns[pos] = np->getChild(i); + ni[pos - 1]--; + ni[pos] = ns[pos]->getNum(); + pos++; + } + } + delete[] ns; + delete[] ni; + } + else if (s == "LEAVES" || s == "leaves") + { + IVNode* np; + for (np = this->leaves_head; np != NULL; np = np->getNext()) + { + this->prepare(np); + if (s == "leaves") + np->print("node"); + else + np->print("NODE"); + } + } + else if (s == "check tree") + { + //check the tree, if satisfy B+ definition + //TODO + } + else; +#endif +} + diff --git a/KVstore/IVTree/IVTree.h b/KVstore/IVTree/IVTree.h new file mode 100644 index 0000000..86fbf27 --- /dev/null +++ b/KVstore/IVTree/IVTree.h @@ -0,0 +1,98 @@ +/*============================================================================= +# Filename: IVTree.h +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:44 +# Description: ID2valueList, including s2po, p2so and o2ps +=============================================================================*/ + +#ifndef _KVSTORE_IVTREE_IVTREE_H +#define _KVSTORE_IVTREE_IVTREE_H + +#include "../../Util/Util.h" +#include "../../Util/Stream.h" +#include "node/IVNode.h" +#include "node/IVIntlNode.h" +#include "node/IVLeafNode.h" +#include "storage/IVStorage.h" +#include "./vlist/VList.h" + +//TODO: for long list, do not read in time, just on need +//the memory is kept with the node, updat ewith node +//NOTICE: to release the node, maybe the value list is NULL +//value bstr: unsigned=address, NULL +//BETTER?: build a new block store for long list?? + +//NOTICE: we do not need to use transfer bstr here, neithor for two directions +//when insert/query, we do not release the value in kvstore + +class IVTree +{ +protected: + unsigned int height; //0 indicates an empty tree + IVNode* root; + IVNode* leaves_head; //the head of LeafNode-list + IVNode* leaves_tail; //the tail of LeafNode-list + std::string mode; //BETTER(to use enum) + IVStorage* TSM; //Tree-Storage-Manage + //BETTER:multiple stream maybe needed:) + Stream* stream; + + //always alloc one more byte than length, then user can add a '\0' + //to get a real string, instead of new and copy + //other operations will be harmful to search, so store value in + //transfer temporally, while length adjusted. + //TODO: in multi-user case, multiple-search will cause problem, + //so lock is a must. Add lock to transfer is better than to add + //lock to every key/value. However, modify requires a lock for a + //key/value, and multiple search for different keys are ok!!! + //Bstr transfer; + //unsigned transfer_size; + //Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char* + //unsigned transfer_size[3]; + + //tree's operations should be atom(if read nodes) + //sum the request and send to IVStorage at last + //ensure that all nodes operated are in memory + long long request; + void prepare(IVNode* _np); + + std::string storepath; + std::string filename; //ok for user to change + /* some private functions */ + std::string getFilePath(); //in UNIX system + //void CopyToTransfer(const char* _str, unsigned _len, unsigned _index); + //void CopyToTransfer(const char* _str, unsigned _len); + void release(IVNode* _np) const; + + //very long value list are stored in a separate file(with large block) + // + //NOTICE: according to the summary result, 90% value lists are just below 100 bytes + //<10%: 5000000~100M bytes + VList* value_list; + +public: + IVTree(); //always need to initial transfer + IVTree(std::string _storepath, std::string _filename, std::string _mode, unsigned long long _buffer_size); + unsigned int getHeight() const; + void setHeight(unsigned _h); + IVNode* getRoot() const; + //void setRoot(Node* _root); + //insert, search, remove, set + bool search(int _key, char*& _str, int& _len); + bool insert(int _key, const char* _str, unsigned _len); + bool modify(int _key, const char* _str, unsigned _len); + IVNode* find(int _key, int* store, bool ifmodify); + bool remove(int _key); + const Bstr* getRangeValue(); + void resetStream(); + bool range_query(int _key1, int _key2); + bool save(); + ~IVTree(); + void print(std::string s); //DEBUG(print the tree) +}; +//NOTICE: need to save tree manually before delete, otherwise will cause problem. +//(problem range between two extremes: not-modified, totally-modified) +//After saved, it's ok to continue operations on tree! + +#endif diff --git a/KVstore/IVTree/heap/IVHeap.cpp b/KVstore/IVTree/heap/IVHeap.cpp new file mode 100644 index 0000000..5cc291f --- /dev/null +++ b/KVstore/IVTree/heap/IVHeap.cpp @@ -0,0 +1,186 @@ +/*============================================================================= +# Filename: IVHeap.cpp +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:37 +# Description: achieve functions in IVHeap.h +=============================================================================*/ + +#include "IVHeap.h" + +using namespace std; + +IVHeap::IVHeap() +{ + this->length = this->size = 0; + this->heap = NULL; +} + +IVHeap::IVHeap(unsigned _size) +{ + this->length = 0; + this->size = _size; + //this->heap = (Node**)malloc(this->size * sizeof(Node*)); //not use 4 or 8 + this->heap = new IVNode*[this->size]; + if (this->heap == NULL) + { + this->print("error in IVHeap: Allocation fail!"); + exit(1); + } + /* + this->npmap = (Map*)malloc(this->size * sizeof(struct Map)); + if(this->npmap == NULL) + { + this->print("error in IVHeap: Allocation fail!"); + exit(1); + } + */ +} + +IVNode* +IVHeap::getTop() const +{ + if (this->length > 0) + return this->heap[0]; + else + return NULL; +} + +unsigned +IVHeap::getLen() const +{ + return this->length; +} + +unsigned +IVHeap::getSize() const +{ + return this->size; +} + +bool +IVHeap::isEmpty() const +{ + return this->length == 0; +} + +bool +IVHeap::insert(IVNode* _np) +{ + if (this->length == this->size) //when full, reallocate + { + this->heap = (IVNode**)realloc(this->heap, 2 * this->size * sizeof(IVNode*)); + if (this->heap == NULL) + { + print("error in isert: Reallocation fail!"); + return false; + } + /* + this->npmap = (struct Map*)realloc(this->npmap, 2 * this->size * sizeof(struct Map)); + if(this->npmap == NULL) + { + print("error in insert: Reallocation fail!"); + return false; + } + */ + this->size = 2 * this->size; + } + unsigned i = this->length, j; + while (i != 0) + { + j = (i - 1) / 2; + if (_np->getRank() >= this->heap[j]->getRank()) + break; + heap[i] = heap[j]; + //this->npmap[k].pos = i; //adjust the position + i = j; + } + this->heap[i] = _np; + this->length++; + return true; +} + +bool +IVHeap::remove() +{ + if (this->length == 0) + { + print("error in remove: remove from empty heap!"); + return false; + } + //Node* tp = this->heap[0]; + this->length--; + if (this->length == 0) + return true; + IVNode* xp = this->heap[this->length]; + unsigned i = 0, j = 1; + while (j < this->length) + { + if (j < this->length - 1 && this->heap[j]->getRank() > this->heap[j + 1]->getRank()) + j++; + if (xp->getRank() <= this->heap[j]->getRank()) + break; + this->heap[i] = this->heap[j]; + i = j; + j = 2 * i + 1; + } + this->heap[i] = xp; + return true; +} + +bool +IVHeap::modify(IVNode* _np, bool _flag) //control direction +{ + //search and adjust + unsigned i, j; + for (i = 0; i < this->length; ++i) + if (this->heap[i] == _np) + break; + if (_flag == true) //move up + { + while (i != 0) + { + j = (i - 1) / 2; + if (_np->getRank() < heap[j]->getRank()) + { + heap[i] = heap[j]; + heap[j] = _np; + i = j; + } + else + break; + } + } + else //move down + { + j = 2 * i + 1; + while (j < this->length) + { + if (j < this->length - 1 && heap[j]->getRank() > heap[j + 1]->getRank()) + j++; + if (heap[j]->getRank() < _np->getRank()) + { + heap[i] = heap[j]; + heap[j] = _np; + i = j; + } + else + break; + } + } + return true; +} + +IVHeap::~IVHeap() +{ + delete[] this->heap; + this->heap = NULL; + this->length = this->size = 0; +} + +void +IVHeap::print(string s) +{ +#ifdef DEBUG_KVSTORE +#endif +} diff --git a/KVstore/IVTree/heap/IVHeap.h b/KVstore/IVTree/heap/IVHeap.h new file mode 100644 index 0000000..0e418fd --- /dev/null +++ b/KVstore/IVTree/heap/IVHeap.h @@ -0,0 +1,41 @@ +/*============================================================================= +# Filename: IVHeap.h +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:37 +# Description: set and deal of IVNode*s in memory +=============================================================================*/ + +#ifndef _KVSTORE_IVTREE_HEAP_IVHEAP_H +#define _KVSTORE_IVTREE_HEAP_IVHEAP_H + +#include "../../../Util/Util.h" +#include "../node/IVNode.h" + +/* add, sub, modify: all can be done within O(logn) using adjust-function */ +//QUERY: when modified, finding right position consumes O(n). How about keeping smallest? +//(add O(1), sub O(2n), modify O(n) +//TODO: to solve this probem, use another hash: (pointer, pos), to find the right position of +//given p in O(lgn) time + +class IVHeap +{ +private: + IVNode** heap; //dynamic array + unsigned length; //valid elements num + unsigned size; //max-size of heap +public: + IVHeap(); + IVHeap(unsigned _size); + IVNode* getTop() const; //return the top element + unsigned getLen() const; + unsigned getSize() const; + bool isEmpty() const; + bool insert(IVNode* _np); //insert and adjust + bool remove(); //remove top and adjust + bool modify(IVNode* _np, bool _flag); //searech modified element and adjust + ~IVHeap(); + void print(std::string s); //DEBUG +}; + +#endif diff --git a/KVstore/IVTree/node/IVIntlNode.cpp b/KVstore/IVTree/node/IVIntlNode.cpp new file mode 100644 index 0000000..3f338b3 --- /dev/null +++ b/KVstore/IVTree/node/IVIntlNode.cpp @@ -0,0 +1,293 @@ +/*============================================================================= +# Filename: IVIntlNode.cpp +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:40 +# Description: achieve functions in IVIntlNode.h +=============================================================================*/ + +#include "IVIntlNode.h" + +using namespace std; + +/* +void +IVIntlNode::AllocChilds() +{ +childs = (Node**)malloc(sizeof(Node*) * MAX_CHILD_NUM); +} +*/ + +IVIntlNode::IVIntlNode() +{ + memset(childs, 0, sizeof(IVNode*) * MAX_CHILD_NUM); + //this->AllocChilds(); +} + +IVIntlNode::IVIntlNode(bool isVirtual) //call father-class's constructor automaticlly +{ + memset(childs, 0, sizeof(IVNode*) * MAX_CHILD_NUM); + //this->AllocChilds(); +} + +/* +IVIntlNode::IntlNode(Storage* TSM) //QUERY +{ +TSM->readNode(this, Storage::OVER); +} +*/ + +void +IVIntlNode::Virtual() +{ + //this->FreeKeys(); + this->release(); + this->delMem(); +} + +void +IVIntlNode::Normal() +{ + this->AllocKeys(); + this->setMem(); +} + +IVNode* +IVIntlNode::getChild(int _index) const +{ + int num = this->getNum(); + if (_index < 0 || _index > num) //num keys, num+1 childs + { + //print(string("error in getChild: Invalid index ") + Util::int2string(_index)); + return NULL; + } + else + return childs[_index]; +} + +bool +IVIntlNode::setChild(IVNode* _child, int _index) +{ + int num = this->getNum(); + if (_index < 0 || _index > num) + { + print(string("error in setChild: Invalid index ") + Util::int2string(_index)); + return false; + } + this->childs[_index] = _child; + return true; +} + +bool +IVIntlNode::addChild(IVNode* _child, int _index) +{ + int num = this->getNum(); + if (_index < 0 || _index > num + 1) + { + print(string("error in addChild: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + for (i = num; i >= _index; --i) //DEBUG: right bounder!!! + childs[i + 1] = childs[i]; + childs[_index] = _child; + return true; +} + +bool +IVIntlNode::subChild(int _index) +{ + int num = this->getNum(); + if (_index < 0 || _index > num) + { + print(string("error in subchild: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + for (i = _index; i < num; ++i) //DEBUG: right bounder!!! + childs[i] = childs[i + 1]; + return true; +} + +unsigned +IVIntlNode::getSize() const +{ + //unsigned sum = INTL_SIZE, num = this->getNum(), i; + //return sum; + return INTL_SIZE; +} + +IVNode* +IVIntlNode::split(IVNode* _father, int _index) +{ + int num = this->getNum(); + IVNode* p = new IVIntlNode; //right child + p->setHeight(this->getHeight()); + int i, k; + for (i = MIN_CHILD_NUM, k = 0; i < num; ++i, ++k) + { + p->addKey(this->keys[i], k); + p->addChild(this->childs[i], k); + p->addNum(); + } + p->addChild(this->childs[i], k); + int tp = this->keys[MIN_KEY_NUM]; + this->setNum(MIN_KEY_NUM); + _father->addKey(tp, _index); + _father->addChild(p, _index + 1); //DEBUG(check the index) + _father->addNum(); + _father->setDirty(); + p->setDirty(); + this->setDirty(); + return p; +} + +IVNode* +IVIntlNode::coalesce(IVNode* _father, int _index) +{ + //int num = this->getNum(); + int i, j = _father->getNum(), k; //BETTER: unsigned? + IVNode* p; + int ccase = 0; + //const Bstr* bstr; + if (_index < j) //the right neighbor + { + p = _father->getChild(_index + 1); + k = p->getNum(); + if ((unsigned)k > MIN_KEY_NUM) + ccase = 2; + else //==MIN_KEY_NUM + ccase = 1; + } + if (_index > 0) //the left neighbor + { + IVNode* tp = _father->getChild(_index - 1); + unsigned tk = tp->getNum(); + if (ccase < 2) + { + if (ccase == 0) + ccase = 3; + if (tk > MIN_KEY_NUM) + ccase = 4; + } + if (ccase > 2) + { + p = tp; + k = tk; + } + } + + int tmp = 0; + switch (ccase) + { + case 1: //union right to this + this->addKey(_father->getKey(_index), this->getNum()); + this->addNum(); + for (i = 0; i < k; ++i) + { + this->addKey(p->getKey(i), this->getNum()); + this->addChild(p->getChild(i), this->getNum()); + this->addNum(); + } + this->setChild(p->getChild(i), this->getNum()); + _father->subKey(_index); + _father->subChild(_index + 1); + _father->subNum(); + p->setNum(0); + //delete p; + break; + case 2: //move one form right + this->addKey(_father->getKey(_index), this->getNum()); + _father->setKey(p->getKey(0), _index); + p->subKey(0); + this->addChild(p->getChild(0), this->getNum() + 1); + p->subChild(0); + this->addNum(); + p->subNum(); + break; + case 3: //union left to this + this->addKey(_father->getKey(_index - 1), 0); + this->addNum(); + for (i = k; i > 0; --i) + { + int t = i - 1; + this->addKey(p->getKey(t), 0); + this->addChild(p->getChild(i), 0); + this->addNum(); + } + this->addChild(p->getChild(0), 0); + _father->subKey(_index - 1); + _father->subChild(_index - 1); + _father->subNum(); + p->setNum(0); + //delete p; + break; + case 4: //move one from left + tmp = p->getKey(k - 1); + p->subKey(k - 1); + this->addKey(_father->getKey(_index - 1), 0); + _father->setKey(tmp, _index - 1); + this->addChild(p->getChild(k), 0); + p->subChild(k); + this->addNum(); + p->subNum(); + break; + default: + print("error in coalesce: Invalid case!"); + //printf("error in coalesce: Invalid case!"); + } + _father->setDirty(); + p->setDirty(); + this->setDirty(); + if (ccase == 1 || ccase == 3) + return p; + else + return NULL; +} + +void +IVIntlNode::release() +{ + if (!this->inMem()) + return; + //unsigned num = this->getNum(); + delete[] keys; //this will release all!!! +} + +IVIntlNode::~IVIntlNode() +{ + release(); + //free(childs); +} + +void +IVIntlNode::print(string s) +{ +#ifdef DEBUG_KVSTORE + int num = this->getNum(); + fputs(Util::showtime().c_str(), Util::debug_kvstore); + fputs("Class IVIntlNode\n", Util::debug_kvstore); + fputs("Message: ", Util::debug_kvstore); + fputs(s.c_str(), Util::debug_kvstore); + fputs("\n", Util::debug_kvstore); + if (s == "node" || s == "NODE") + { + fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag); + /* + int i; + for (i = 0; i < num; ++i) + { + if (s == "node") + this->keys[i].print("bstr"); + else + this->keys[i].print("BSTR"); + } + */ + } + else if (s == "check node") + { + //TODO(check node, if satisfy B+ definition) + } + else; +#endif +} diff --git a/KVstore/IVTree/node/IVIntlNode.h b/KVstore/IVTree/node/IVIntlNode.h new file mode 100644 index 0000000..5d0932f --- /dev/null +++ b/KVstore/IVTree/node/IVIntlNode.h @@ -0,0 +1,48 @@ +/*============================================================================= +# Filename: IVIntlNode.h +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:40 +# Description: the internal-node of a B+ tree +=============================================================================*/ + +#ifndef _KVSTORE_IVTREE_NODE_IVINTLNODE_H +#define _KVSTORE_IVTREE_NODE_IVINTLNODE_H + +#include "IVNode.h" + +class IVIntlNode : public IVNode +{ +protected: + IVNode* childs[MAX_CHILD_NUM + 1]; + //Node** childs; + //void AllocChilds(); +public: + IVIntlNode(); + IVIntlNode(bool isVirtual); + //IntlNode(Storage* TSM); + void Virtual(); + void Normal(); + IVNode* getChild(int _index) const; + bool setChild(IVNode* _child, int _index); + bool addChild(IVNode* _child, int _index); + bool subChild(int _index); + unsigned getSize() const; + IVNode* split(IVNode* _father, int _index); + IVNode* coalesce(IVNode* _father, int _index); + void release(); + ~IVIntlNode(); + void print(std::string s); //DEBUG + /*non-sense functions: polymorphic + Node* getPrev() const; + Node* getNext() const; + const Bstr* getValue(int _index) const; + bool setValue(const Bstr* _value, int _index); + bool addValue(const Bstr* _value, int _index); + bool subValue(int _index); + void setPrev(Node* _prev); + void setNext(Node* _next); + */ +}; + +#endif diff --git a/KVstore/IVTree/node/IVLeafNode.cpp b/KVstore/IVTree/node/IVLeafNode.cpp new file mode 100644 index 0000000..a35bd6f --- /dev/null +++ b/KVstore/IVTree/node/IVLeafNode.cpp @@ -0,0 +1,443 @@ +/*============================================================================= +# Filename: IVLeafNode.cpp +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:40 +# Description: ahieve functions in IVLeafNode.h +=============================================================================*/ + +#include "IVLeafNode.h" + +using namespace std; + +void +IVLeafNode::AllocValues() +{ + values = new Bstr[MAX_KEY_NUM]; +} + +/* +void +IVLeafNode::FreeValues() +{ +delete[] values; +} +*/ + +IVLeafNode::IVLeafNode() +{ + flag |= NF_IL; //leaf flag + prev = next = NULL; + AllocValues(); +} + +IVLeafNode::IVLeafNode(bool isVirtual) +{ + flag |= NF_IL; + prev = next = NULL; + if (!isVirtual) + AllocValues(); +} + +/* +IVLeafNode::LeafNode(Storage* TSM) +{ +AllocValues(); +TSM->readNode(this, Storage::OVER); +} +*/ + +void +IVLeafNode::Virtual() +{ + //this->FreeKeys(); + //this->FreeValues(); + this->release(); + this->delMem(); +} + +void +IVLeafNode::Normal() +{ + this->AllocKeys(); + this->AllocValues(); + this->setMem(); +} + +IVNode* +IVLeafNode::getPrev() const +{ + return prev; +} + +IVNode* +IVLeafNode::getNext() const +{ + return next; +} + +const Bstr* +IVLeafNode::getValue(int _index) const +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + //print(string("error in getValue: Invalid index ") + Util::int2string(_index)); + return NULL; + } + else + return this->values + _index; +} + +bool +IVLeafNode::getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const +{ + //TODO: read long list + return true; +} + +bool +IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy) +{ + //TODO: consider the long list, how to cancel and reset + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in setValue: Invalid index ") + Util::int2string(_index)); + return false; + } + this->values[_index].release(); //NOTICE: only used in modify + + //DEBUG: we do not need to copy here + //we just need to ensure that the pointer's memory is not released + + //if (ifcopy) + //{ + //this->values[_index].copy(_value); + //} + //else + //{ + //this->values[_index] = *_value; + this->values[_index].setStr(_str); + this->values[_index].setLen(_len); + //} + return true; +} + +bool +IVLeafNode::addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy) +{ + //TODO:if the list is too large + int num = this->getNum(); + if (_index < 0 || _index > num) + { + print(string("error in addValue: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + for (i = num - 1; i >= _index; --i) + this->values[i + 1] = this->values[i]; + + //if (ifcopy) + //this->values[_index].copy(_value); + //else + //this->values[_index] = *_value; + this->values[_index].setStr(_str); + this->values[_index].setLen(_len); + + return true; +} + +bool +IVLeafNode::addValue(const Bstr* _value, int _index, bool ifcopy) +{ + int num = this->getNum(); + if (_index < 0 || _index > num) + { + print(string("error in addValue: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + for (i = num - 1; i >= _index; --i) + this->values[i + 1] = this->values[i]; + + if (ifcopy) + this->values[_index].copy(_value); + else + this->values[_index] = *_value; + + return true; +} + +bool +IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel) +{ + //TODO: if is to sub long list + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in subValue: Invalid index ") + Util::int2string(_index)); + return false; + } + + int i; + if (ifdel) + values[_index].release(); + for (i = _index; i < num - 1; ++i) + this->values[i] = this->values[i + 1]; + + return true; +} + +bool +IVLeafNode::subValue(int _index, bool ifdel) +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in subValue: Invalid index ") + Util::int2string(_index)); + return false; + } + + int i; + if (ifdel) + values[_index].release(); + for (i = _index; i < num - 1; ++i) + this->values[i] = this->values[i + 1]; + + return true; +} + +void +IVLeafNode::setPrev(IVNode* _prev) +{ + this->prev = _prev; +} + +void +IVLeafNode::setNext(IVNode* _next) +{ + this->next = _next; +} + +unsigned +IVLeafNode::getSize() const +{ + unsigned sum = LEAF_SIZE, num = this->getNum(), i; + for (i = 0; i < num; ++i) + { + sum += values[i].getLen(); + } + return sum; +} + +IVNode* +IVLeafNode::split(IVNode* _father, int _index) +{ + int num = this->getNum(); + IVNode* p = new IVLeafNode; //right child + p->setHeight(this->getHeight()); //NOTICE: assign height for new node + p->setNext(this->next); + this->setNext(p); + p->setPrev(this); + int i, k; + for (i = MIN_KEY_NUM, k = 0; i < num; ++i, ++k) + { + p->addKey(this->keys[i], k); + p->addValue(this->values + i, k); + p->addNum(); + } + int tp = this->keys[MIN_KEY_NUM]; + this->setNum(MIN_KEY_NUM); + _father->addKey(tp, _index); + _father->addChild(p, _index + 1); //DEBUG(check the index) + _father->addNum(); + _father->setDirty(); + p->setDirty(); + this->setDirty(); + return p; +} + +IVNode* +IVLeafNode::coalesce(IVNode* _father, int _index) +{ //add a key or coalesce a neighbor to this + int i, j = _father->getNum(), k; //BETTER: unsigned? + IVNode* p = NULL; + int ccase = 0; + //const Bstr* bstr; + if (_index < j) //the right neighbor + { + p = _father->getChild(_index + 1); + k = p->getNum(); + if ((unsigned)k > MIN_KEY_NUM) + ccase = 2; + else //==MIN_KEY_NUM + ccase = 1; + } + if (_index > 0) //the left neighbor + { + IVNode* tp = _father->getChild(_index - 1); + unsigned tk = tp->getNum(); + if (ccase < 2) + { + if (ccase == 0) + ccase = 3; + if (tk > MIN_KEY_NUM) + ccase = 4; + } + if (ccase > 2) + { + p = tp; + k = tk; + } + } + + int tmp = 0; + switch (ccase) + { + case 1: //union right to this + for (i = 0; i < k; ++i) + { + this->addKey(p->getKey(i), this->getNum()); + this->addValue(p->getValue(i), this->getNum()); + this->addNum(); + } + _father->subKey(_index); + _father->subChild(_index + 1); + _father->subNum(); + this->next = p->getNext(); + if (this->next != NULL) + this->next->setPrev(this); + p->setNum(0); //NOTICE: adjust num before delete! + //delete p; + break; + case 2: //move one from right + this->addKey(p->getKey(0), this->getNum()); + _father->setKey(p->getKey(1), _index); + p->subKey(0); + this->addValue(p->getValue(0), this->getNum()); + p->subValue(0); + this->addNum(); + p->subNum(); + break; + case 3: //union left to this + //BETTER: move all keys/etc one time + for (i = k; i > 0; --i) + { + int t = i - 1; + this->addKey(p->getKey(t), 0); + this->addValue(p->getValue(t), 0); + this->addNum(); + } + _father->subKey(_index - 1); + _father->subChild(_index - 1); + _father->subNum(); + this->prev = p->getPrev(); + if (this->prev != NULL) //else: leaves-list + this->prev->setNext(this); + p->setNum(0); + //delete p; + break; + case 4: //move one from left + tmp = p->getKey(k - 1); + p->subKey(k - 1); + this->addKey(tmp, 0); + _father->setKey(tmp, _index - 1); + this->addValue(p->getValue(k - 1), 0); + p->subValue(k - 1); + this->addNum(); + p->subNum(); + break; + default: + print("error in coalesce: Invalid case!"); + //printf("error in coalesce: Invalid case!"); + } + _father->setDirty(); + p->setDirty(); + this->setDirty(); + if (ccase == 1 || ccase == 3) + return p; + else + return NULL; +} + +void +IVLeafNode::release() +{ + if (!this->inMem()) + return; + unsigned num = this->getNum(); + /* + for(int i = 0; i < num; ++i) + { + keys[i].release(); + values[i].release(); + } + */ + for (unsigned i = num; i < MAX_KEY_NUM; ++i) + { + values[i].clear(); + } + delete[] keys; + delete[] values; +} + +IVLeafNode::~IVLeafNode() +{ + release(); +} + +void +IVLeafNode::print(string s) +{ +#ifdef DEBUG_KVSTORE + unsigned num = this->getNum(); + fputs(Util::showtime().c_str(), Util::debug_kvstore); + fputs("Class IVLeafNode\n", Util::debug_kvstore); + fputs("Message: ", Util::debug_kvstore); + fputs(s.c_str(), Util::debug_kvstore); + fputs("\n", Util::debug_kvstore); + unsigned i; + if (s == "NODE") + { + fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag); + fprintf(Util::debug_kvstore, "prev: %p\tnext: %p\n", this->prev, this->next); + for (i = 0; i < num; ++i) + { + //this->keys[i].print("BSTR"); + this->values[i].print("BSTR"); + } + } + else if (s == "node") + { + fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag); + fprintf(Util::debug_kvstore, "prev: %p\tnext: %p\n", this->prev, this->next); + } + else if (s == "check node") + { + //check the node, if satisfy B+ definition + bool flag = true; + if (num < MIN_KEY_NUM || num > MAX_KEY_NUM) + flag = false; + if (flag) + { + for (i = 1; i < num; ++i) + { + if (keys[i] > keys[i - 1]) + continue; + else + break; + } + if (i < num) + flag = false; + } + this->print("node"); + if (flag) + fprintf(Util::debug_kvstore, "This node is good\n"); + else + fprintf(Util::debug_kvstore, "This node is bad\n"); + } + else; +#endif +} + diff --git a/KVstore/IVTree/node/IVLeafNode.h b/KVstore/IVTree/node/IVLeafNode.h new file mode 100644 index 0000000..56638bd --- /dev/null +++ b/KVstore/IVTree/node/IVLeafNode.h @@ -0,0 +1,56 @@ +/*============================================================================= +# Filename: IVLeafNode.h +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:39 +# Description: the leaf-node of a B+ tree +=============================================================================*/ + +#ifndef _KVSTORE_IVTREE_NODE_IVLEAFNODE_H +#define _KVSTORE_IVTREE_NODE_IVLEAFNODE_H + +#include "IVNode.h" + +class IVLeafNode : public IVNode +{ +protected: + IVNode* prev; //LeafNode + IVNode* next; + Bstr* values; + void AllocValues(); + //void FreeValues(); +public: + IVLeafNode(); + IVLeafNode(bool isVirtual); + //LeafNode(Storage* TSM); + void Virtual(); + void Normal(); + IVNode* getPrev() const; + IVNode* getNext() const; + const Bstr* getValue(int _index) const; + bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const; + bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false); + + bool addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false); + bool subValue(VList* _vlist, int _index, bool ifdel = false); + bool addValue(const Bstr* _val, int _index, bool ifcopy = false); + bool subValue(int _index, bool ifdel = false); + + void setPrev(IVNode* _prev); + void setNext(IVNode* _next); + unsigned getSize() const; + IVNode* split(IVNode* _father, int _index); + IVNode* coalesce(IVNode* _father, int _index); + void release(); + ~IVLeafNode(); + void print(std::string s); //DEBUG + /*non-sense virtual function + Node* getChild(int _index) const; + bool addChild(Node* _child, int _index); + bool subChild(int _index); + */ +}; +//BETTER: prev isn't a must, and reverse-range can be achieved using recursive-next + +#endif + diff --git a/KVstore/IVTree/node/IVNode.cpp b/KVstore/IVTree/node/IVNode.cpp new file mode 100644 index 0000000..f688303 --- /dev/null +++ b/KVstore/IVTree/node/IVNode.cpp @@ -0,0 +1,320 @@ +/*============================================================================= +# Filename: IVNode.cpp +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:39 +# Description: achieve functions in IVNode.h +=============================================================================*/ + +#include "IVNode.h" + +using namespace std; + +void +IVNode::AllocKeys() +{ + keys = new int[MAX_KEY_NUM]; +} + +/* +void +IVNode::FreeKeys() +{ +delete[] keys; +} +*/ + +IVNode::IVNode() +{ + store = flag = 0; + flag |= NF_IM; + AllocKeys(); +} + +IVNode::IVNode(bool isVirtual) +{ + store = flag = 0; + if (!isVirtual) + { + flag |= NF_IM; + AllocKeys(); + } +} + +/* +IVNode::Node(Storage* TSM) +{ +AllocKeys(); +TSM->readIVNode(this, Storage::OVER); +} +*/ +bool +IVNode::isLeaf() const +{ + return this->flag & NF_IL; +} + +bool +IVNode::isDirty() const +{ + return this->flag & NF_ID; +} + +void +IVNode::setDirty() +{ + this->flag |= NF_ID; +} + +void +IVNode::delDirty() +{ + this->flag &= ~NF_ID; +} + +bool +IVNode::inMem() const +{ + return this->flag & NF_IM; +} + +void +IVNode::setMem() +{ + this->flag |= NF_IM; +} + +void +IVNode::delMem() +{ + this->flag &= ~NF_IM; +} + +/* +bool +IVNode::isVirtual() const +{ +return this->flag & NF_IV; +} + +void +IVNode::setVirtual() +{ +this->flag |= NF_IV; +} + +void +IVNode::delVirtual() +{ +this->flag &= ~NF_IV; +} +*/ + +unsigned +IVNode::getRank() const +{ + return this->flag & NF_RK; +} + +void +IVNode::setRank(unsigned _rank) +{ + this->flag &= ~NF_RK; + this->flag |= _rank; +} + +unsigned +IVNode::getHeight() const +{ + return (this->flag & NF_HT) >> 20; +} + +void +IVNode::setHeight(unsigned _h) +{ + this->flag &= ~NF_HT; + this->flag |= (_h << 20); +} + +unsigned +IVNode::getNum() const +{ + return (this->flag & NF_KN) >> 12; +} + +bool +IVNode::setNum(int _num) +{ + if (_num < 0 || (unsigned)_num > MAX_KEY_NUM) + { + print(string("error in setNum: Invalid num ") + Util::int2string(_num)); + return false; + } + this->flag &= ~NF_KN; + this->flag |= (_num << 12); + return true; +} + +bool +IVNode::addNum() +{ + if (this->getNum() + 1 > MAX_KEY_NUM) + { + print("error in addNum: Invalid!"); + return false; + } + this->flag += (1 << 12); + return true; +} + +bool +IVNode::subNum() +{ + if (this->getNum() < 1) + { + print("error in subNum: Invalid!"); + return false; + } + this->flag -= (1 << 12); + return true; +} + +unsigned +IVNode::getStore() const +{ + return this->store; +} + +void +IVNode::setStore(unsigned _store) +{ + this->store = _store; +} + +unsigned +IVNode::getFlag() const +{ + return flag; +} + +void +IVNode::setFlag(unsigned _flag) +{ + this->flag = _flag; +} + +int +IVNode::getKey(int _index) const +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + //print(string("error in getKey: Invalid index ") + Util::int2string(_index)); + printf("error in getKey: Invalid index\n"); + return -1; + } + else + return this->keys[_index]; +} + +bool +IVNode::setKey(int _key, int _index) +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in setKey: Invalid index ") + Util::int2string(_index)); + return false; + } + keys[_index] = _key; + return true; +} + +bool +IVNode::addKey(int _key, int _index) +{ + int num = this->getNum(); + if (_index < 0 || _index > num) + { + print(string("error in addKey: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + //NOTICE: if num == MAX_KEY_NUM, will visit keys[MAX_KEY_NUM], not legal!!! + //however. tree operations ensure that: when node is full, not add but split first! + for (i = num - 1; i >= _index; --i) + keys[i + 1] = keys[i]; + keys[_index] = _key; + return true; +} + +bool +IVNode::subKey(int _index) +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in subKey: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + for (i = _index; i < num - 1; ++i) + keys[i] = keys[i + 1]; + return true; +} + +int +IVNode::searchKey_less(int _key) const +{ + int num = this->getNum(); + //for(i = 0; i < num; ++i) + //if(bstr < *(p->getKey(i))) + //break; + + int low = 0, high = num - 1, mid = -1; + while (low <= high) + { + mid = (low + high) / 2; + if (this->keys[mid] > _key) + { + if (low == mid) + break; + high = mid; + } + else + { + low = mid + 1; + } + } + return low; +} + +int +IVNode::searchKey_equal(int _key) const +{ + int num = this->getNum(); + //for(i = 0; i < num; ++i) + // if(bstr == *(p->getKey(i))) + // { + + int ret = this->searchKey_less(_key); + if (ret > 0 && this->keys[ret - 1] == _key) + return ret - 1; + else + return num; +} + +int +IVNode::searchKey_lessEqual(int _key) const +{ + //int num = this->getNum(); + //for(i = 0; i < num; ++i) + //if(bstr <= *(p->getKey(i))) + //break; + + int ret = this->searchKey_less(_key); + if (ret > 0 && this->keys[ret - 1] == _key) + return ret - 1; + else + return ret; +} diff --git a/KVstore/IVTree/node/IVNode.h b/KVstore/IVTree/node/IVNode.h new file mode 100644 index 0000000..20d6cfe --- /dev/null +++ b/KVstore/IVTree/node/IVNode.h @@ -0,0 +1,119 @@ +/*============================================================================= +# Filename: IVNode.h +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:38 +# Description: basic Node class, father of IVIntlNode and IVLeafNode +=============================================================================*/ + +#ifndef _KVSTORE_IVTREE_NODE_IVNODE_H +#define _KVSTORE_IVTREE_NODE_IVNODE_H + +#include "../../../Util/Util.h" +#include "../../../Util/Bstr.h" +#include "../vlist/VList.h" + +class IVNode //abstract basic class +{ +public: + static const unsigned DEGREE = 2 * 63; //the degree of B+ tree + static const unsigned MAX_CHILD_NUM = DEGREE; + static const unsigned MIN_CHILD_NUM = DEGREE >> 1; + static const unsigned MAX_KEY_NUM = MAX_CHILD_NUM - 1; //max key-num + static const unsigned MIN_KEY_NUM = MIN_CHILD_NUM - 1; //min key-num + /* diffrent flags for tree-nodes, 32-bit put rank in low-bits means no need to move*/ + static const unsigned NF_IL = 0x80000000; //is leaf + static const unsigned NF_ID = 0x00080000; //is dirty, in rank-area + static const unsigned NF_IM = 0x20000000; //in memory, not virtual + //static const unsigned NF_IV = 0x10000000; //is virtual + static const unsigned NF_RK = 0x00ffffff; //select-rank, in Storage + static const unsigned NF_HT = 0xf00000; //height area in rank + static const unsigned NF_KN = 0x07f000; //NOTICE: decided by DEGREE + static const unsigned INTL_SIZE = sizeof(int) * MAX_KEY_NUM; + static const unsigned LEAF_SIZE = INTL_SIZE + sizeof(Bstr) * MAX_KEY_NUM; +protected: + unsigned store; //store address, the BLock index + unsigned flag; //NF_RK, NF_IL,NF_ID, NF_IV, propety + //int num; //totle keys num + //Node* father; //point to father-node, which must be IntlNode + int* keys; + void AllocKeys(); + //void FreeKeys(); +public: + IVNode(); + IVNode(bool isVirtual); + bool isLeaf() const; + bool isDirty() const; + void setDirty(); + void delDirty(); + bool inMem() const; + void setMem(); + void delMem(); + //bool isVirtual() const; + //void setVirtual(); + //void delVirtual(); + unsigned getRank() const; + void setRank(unsigned _rank); + unsigned getHeight() const; + void setHeight(unsigned _h); + unsigned getNum() const; + bool setNum(int _num); + bool addNum(); + bool subNum(); + unsigned getStore() const; + void setStore(unsigned _store); + unsigned getFlag() const; + void setFlag(unsigned _flag); + int getKey(int _index) const; //need to check the index + bool setKey(int _key, int _index); + bool addKey(int _key, int _index); + bool subKey(int _index); + + //several binary key search utilities + int searchKey_less(int _key) const; + int searchKey_equal(int _key) const; + int searchKey_lessEqual(int _key) const; + + //virtual functions: polymorphic + virtual IVNode* getChild(int _index) const { return NULL; }; + virtual bool setChild(IVNode* _child, int _index) { return true; }; + virtual bool addChild(IVNode* _child, int _index) { return true; }; + virtual bool subChild(int _index) { return true; }; + virtual IVNode* getPrev() const { return NULL; }; + virtual IVNode* getNext() const { return NULL; }; + virtual const Bstr* getValue(int _index) const { return NULL; }; + virtual bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const { return NULL; }; + virtual bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false) { return true; }; + + virtual bool addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false) { return true; }; + virtual bool subValue(VList* _vlist, int _index, bool ifdel = false) { return true; }; + virtual bool addValue(const Bstr* _val, int _index, bool ifcopy = false) { return true; }; + virtual bool subValue(int _index, bool ifdel = false) { return true; }; + + virtual void setPrev(IVNode* _prev) {}; + virtual void setNext(IVNode* _next) {}; + virtual void Virtual() = 0; + virtual void Normal() = 0; + virtual unsigned getSize() const = 0; //return all memory owned + virtual IVNode* split(IVNode* _father, int _index) = 0; + virtual IVNode* coalesce(IVNode* _father, int _index) = 0; + virtual void release() = 0; //release the node, only remain necessary information + virtual ~IVNode() {}; + virtual void print(std::string s) = 0; //DEBUG(print the Node) +}; + +/*NOTICE(operations in release()) +*To save memory, we can only remain store and flag(childs added for Leaf). +*However, in this way childs'pointers is ok to change, use Node** or Node*& is also nonsense +*because the pointer variable may die. +*Another way is only to release dynamic memory, and store thw whole, read the Bstr only to +*build. In this way nodes'pointer doesn't change, and operation is simplified, while memory +*is consumed a bit more. Because Bstrs consume the most memory, and memory-disk swapping is +*the most time-consuming thing, it seems to be a better way. +*WARN:when a node is in memory and not deleted, its basic content is always being! If nodes are +*really too many, this will cause disaster because we can't swap them out until tree is closed! +*To solve this problem, there should be two types of release-function: one to release Bstr, one +*to release the whole(pointer is invalid and rebuild problem) +*/ + +#endif diff --git a/KVstore/IVTree/storage/IVStorage.cpp b/KVstore/IVTree/storage/IVStorage.cpp new file mode 100644 index 0000000..d203d81 --- /dev/null +++ b/KVstore/IVTree/storage/IVStorage.cpp @@ -0,0 +1,722 @@ +/*============================================================================= +# Filename: IVStorage.cpp +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:43 +# Description: achieve functions in IVStorage.h +=============================================================================*/ + +#include "IVStorage.h" + +using namespace std; + +IVStorage::IVStorage() +{ //not use ../logs/, notice the location of program + cur_block_num = SET_BLOCK_NUM; + filepath = ""; + freelist = NULL; + treefp = NULL; + max_buffer_size = Util::MAX_BUFFER_SIZE; + heap_size = max_buffer_size / IVNode::INTL_SIZE; + freemem = max_buffer_size; + minheap = NULL; + this->value_list = NULL; +} + +IVStorage::IVStorage(string& _filepath, string& _mode, unsigned* _height, unsigned long long _buffer_size, VList* _vlist) +{ + cur_block_num = SET_BLOCK_NUM; //initialize + this->filepath = _filepath; + if (_mode == string("build")) + treefp = fopen(_filepath.c_str(), "w+b"); + else if (_mode == string("open")) + treefp = fopen(_filepath.c_str(), "r+b"); + else + { + print(string("error in IVStorage: Invalid mode ") + _mode); + return; + } + if (treefp == NULL) + { + print(string("error in IVStorage: Open error ") + _filepath); + return; + } + this->treeheight = _height; //originally set to 0 + this->max_buffer_size = _buffer_size; + this->heap_size = this->max_buffer_size / IVNode::INTL_SIZE; + this->freemem = this->max_buffer_size; + this->freelist = new BlockInfo; //null-head + unsigned i, j, k; //j = (SuperNum-1)*BLOCK_SIZE + BlockInfo* bp; + if (_mode == "build") + { //write basic information + i = 0; + fwrite(&i, sizeof(unsigned), 1, this->treefp); //height + fwrite(&i, sizeof(unsigned), 1, this->treefp); //rootnum + fwrite(&cur_block_num, sizeof(unsigned), 1, this->treefp); //current block num + fseek(this->treefp, BLOCK_SIZE, SEEK_SET); + bp = this->freelist; + j = cur_block_num / 8; + for (i = 0; i < j; ++i) + { + fputc(0, this->treefp); + for (k = 0; k < 8; ++k) + { + bp->next = new BlockInfo(i * 8 + k + 1, NULL); + bp = bp->next; + } + } + } + else //_mode == "open" + { + //read basic information + int rootnum; + char c; + fread(this->treeheight, sizeof(unsigned), 1, this->treefp); + fread(&rootnum, sizeof(unsigned), 1, this->treefp); + fread(&cur_block_num, sizeof(unsigned), 1, this->treefp); + fseek(this->treefp, BLOCK_SIZE, SEEK_SET); + bp = this->freelist; + j = cur_block_num / 8; + for (i = 0; i < j; ++i) + { + c = fgetc(treefp); + for (k = 0; k < 8; ++k) + { + if ((c & (1 << k)) == 0) + { + bp->next = new BlockInfo(i * 8 + 7 - k + 1, NULL); + bp = bp->next; + } + } + } + fseek(treefp, Address(rootnum), SEEK_SET); + //treefp is now ahead of root-block + } + + this->minheap = new IVHeap(this->heap_size); + this->value_list = _vlist; +} + +bool +IVStorage::preRead(IVNode*& _root, IVNode*& _leaves_head, IVNode*& _leaves_tail) //pre-read and build whole tree +{ //set root(in memory) and leaves_head + //TODO: false when exceed memory + _leaves_tail = _leaves_head = _root = NULL; + if (ftell(this->treefp) == 0) //root is null + { + return true; + } + unsigned next, store, j, pos = 0; + unsigned h = *this->treeheight; + IVNode* p; + //read root node + this->createNode(p); + _root = p; + fread(&next, sizeof(unsigned), 1, treefp); + //use stack to achieve + long address[h]; //current address + unsigned used[h]; //used child num + unsigned total[h]; //total child num + unsigned block[h]; //next block num + IVNode* nodes[h]; + address[pos] = ftell(treefp); + used[pos] = 0; + total[pos] = p->getNum() + 1; + block[pos] = next; + nodes[pos] = p; + pos++; + IVNode* prev = NULL; + while (pos > 0) + { + j = pos - 1; + if (nodes[j]->isLeaf() || used[j] == total[j]) //LeafNode or ready IntlNode + { + if (nodes[j]->isLeaf()) + { + if (prev != NULL) + { + prev->setNext(nodes[j]); + nodes[j]->setPrev(prev); + } + prev = nodes[j]; + } + pos--; + continue; + } + fseek(this->treefp, address[j], SEEK_SET); + fread(&store, sizeof(unsigned), 1, treefp); + this->ReadAlign(block + j); + address[j] = ftell(treefp); + fseek(treefp, Address(store), SEEK_SET); + this->createNode(p); + nodes[j]->setChild(p, used[j]); + used[j]++; + fread(&next, sizeof(unsigned), 1, treefp); + address[pos] = ftell(treefp); + used[pos] = 0; + total[pos] = p->getNum() + 1; + block[pos] = next; + nodes[pos] = p; + pos++; + } + //set leaves and read root, which is always keeped in-mem + p = _root; + while (!p->isLeaf()) + { + p = p->getChild(0); + } + _leaves_head = p; + p = _root; + while (!p->isLeaf()) + { + p = p->getChild(p->getNum()); + } + _leaves_tail = p; + long long memory = 0; + this->readNode(_root, &memory); + this->request(memory); + return true; +} + +long //8-byte in 64-bit machine +IVStorage::Address(unsigned _blocknum) const //BETTER: inline function +{ + if (_blocknum == 0) + return 0; + else if (_blocknum > cur_block_num) + { + //print(string("error in Address: Invalid blocknum ") + Util::int2string(_blocknum)); + return -1; //address should be non-negative + } + //NOTICE: here should explictly use long + return (long)(this->SuperNum + _blocknum - 1) * (long)BLOCK_SIZE; +} + +unsigned +IVStorage::Blocknum(long address) const +{ + return (address / BLOCK_SIZE) + 1 - this->SuperNum; +} + +unsigned +IVStorage::AllocBlock() +{ + BlockInfo* p = this->freelist->next; + if (p == NULL) + { + for (unsigned i = 0; i < SET_BLOCK_INC; ++i) + { + cur_block_num++; //BETTER: check if > MAX_BLOCK_NUM + this->FreeBlock(cur_block_num); + } + p = this->freelist->next; + } + unsigned t = p->num; + this->freelist->next = p->next; + delete p; + return t; +} + +void +IVStorage::FreeBlock(unsigned _blocknum) +{ //QUERY: head-sub and tail-add will be better? + BlockInfo* bp = new BlockInfo(_blocknum, this->freelist->next); + this->freelist->next = bp; +} + +//NOTICE: all reads are aligned to 4 bytes(including a string) +//a string may acrossseveral blocks + +void +IVStorage::ReadAlign(unsigned* _next) +{ + if (ftell(treefp) % BLOCK_SIZE == 0) + { + fseek(treefp, Address(*_next), SEEK_SET); + fread(_next, sizeof(unsigned), 1, treefp); + } +} + +void +IVStorage::WriteAlign(unsigned* _curnum, bool& _SpecialBlock) +{ + if (ftell(treefp) % BLOCK_SIZE == 0) + { + unsigned blocknum = this->AllocBlock(); + fseek(treefp, Address(*_curnum), SEEK_SET); + if (_SpecialBlock) + { + fseek(treefp, 4, SEEK_CUR); + _SpecialBlock = false; + } + fwrite(&blocknum, sizeof(unsigned), 1, treefp); + fseek(treefp, Address(blocknum) + 4, SEEK_SET); + *_curnum = blocknum; + } +} + +bool +IVStorage::readNode(IVNode* _np, long long* _request) +{ + if (_np == NULL || _np->inMem()) + return false; //can't read or needn't + + fseek(treefp, Address(_np->getStore()), SEEK_SET); + bool flag = _np->isLeaf(); + unsigned next; + unsigned i, num = _np->getNum(); + Bstr bstr; + fseek(treefp, 4, SEEK_CUR); + fread(&next, sizeof(unsigned), 1, treefp); + + //read data, use readBstr... + //fread(treefp, "%u", &num); + //_np->setNum(num); + if (flag) + *_request += IVNode::LEAF_SIZE; + else + *_request += IVNode::INTL_SIZE; + _np->Normal(); + if (!flag) + fseek(treefp, 4 * (num + 1), SEEK_CUR); + + //to read all keys + int tmp = -1; + for (i = 0; i < num; ++i) + { + fread(&tmp, sizeof(int), 1, treefp); + this->ReadAlign(&next); + _np->setKey(tmp, i); + } + + if (flag) + { + //to read all values + for (i = 0; i < num; ++i) + { + this->readBstr(&bstr, &next); + //if not long list value + if(bstr.getStr() != NULL) + { + *_request += bstr.getLen(); + } + _np->setValue(&bstr, i); + } + } + //_np->setFlag((_np->getFlag() & ~Node::NF_IV & ~Node::NF_ID) | Node::NF_IM); + //_np->delVirtual(); + _np->delDirty(); + //_np->setMem(); + this->updateHeap(_np, _np->getRank(), false); + bstr.clear(); + return true; +} + +bool +IVStorage::createNode(IVNode*& _np) //cretae virtual nodes, not in-mem +{ + /* + if(ftell(this->treefp)== 0) //null root + { + _np = NULL; + return false; + } + */ + unsigned t; //QUERY: maybe next-flag... will be better-storage? + bool flag = false; //IntlNode + fread(&t, sizeof(unsigned), 1, treefp); + if ((t & IVNode::NF_IL) > 0) //WARN: according to setting + flag = true; //LeafNode + if (flag) + { + //this->request(sizeof(LeafNode)); + _np = new IVLeafNode(true); + } + else + { + //this->request(sizeof(IntlNode)); + _np = new IVIntlNode(true); + } + //fseek(treefp, -4, SEEK_CUR); + //_np->setFlag(_np->getFlag() | (t & Node::NF_RK)); + //_np->setRank(t); + _np->setFlag(t); + _np->delDirty(); + _np->delMem(); + _np->setStore(Blocknum(ftell(treefp) - 4)); + return true; +} + +bool +IVStorage::writeNode(IVNode* _np) +{ + if (_np == NULL || !_np->inMem() || (_np->getRank() > 0 && !_np->isDirty())) + return false; //not need to write back + + unsigned num = _np->getNum(), i; + bool flag = _np->isLeaf(), SpecialBlock = true; + /* + if(!flag) + { + for(i = 0; i <= num; ++i) + if(_np->getChild(i)->isDirty()) + return false; //NOTICE: all childs must be clean! + } + */ + //to release original blocks + unsigned store = _np->getStore(), next; + //if first store is 0, meaning a new node + fseek(this->treefp, Address(store) + 4, SEEK_SET); + fread(&next, sizeof(unsigned), 1, treefp); + while (store != 0) + { + this->FreeBlock(store); + store = next; + fseek(treefp, Address(store), SEEK_SET); + fread(&next, sizeof(unsigned), 1, treefp); + } + if (num == 0) + return true; //node is empty! + unsigned t; + //write Node information + unsigned blocknum = this->AllocBlock(); + _np->setStore(blocknum); + long address = this->Address(blocknum); + fseek(this->treefp, address, SEEK_SET); + t = _np->getFlag(); + fwrite(&t, sizeof(unsigned), 1, treefp); //DEBUG + fseek(treefp, 4, SEEK_CUR); + if (!flag) + { + for (i = 0; i <= num; ++i) + { + t = _np->getChild(i)->getStore(); + fwrite(&t, sizeof(unsigned), 1, treefp); //DEBUG + this->WriteAlign(&blocknum, SpecialBlock); + } + } + + int tmp = 0; + //to write all keys + for (i = 0; i < num; ++i) + { + tmp = _np->getKey(i); + fwrite(&tmp, sizeof(int), 1, treefp); + this->WriteAlign(&blocknum, SpecialBlock); + } + + if (flag) + { + //to write all values + for (i = 0; i < num; ++i) + { + this->writeBstr(_np->getValue(i), &blocknum, SpecialBlock); + } + } + fseek(treefp, Address(blocknum), SEEK_SET); + if (SpecialBlock) + fseek(treefp, 4, SEEK_CUR); + t = 0; + fwrite(&t, sizeof(unsigned), 1, treefp); //the end-block + //_np->setFlag(_np->getFlag() & ~Node::NF_ID); + //NOTICE:we may store the dirty bit into the tree file, but that is ok + //Each time we read the tree file to construct a node, we always set the drity bit to 0 + _np->delDirty(); + return true; +} + +bool +IVStorage::readBstr(Bstr* _bp, unsigned* _next) +{ + //long address; + unsigned len, i, j; + fread(&len, sizeof(unsigned), 1, this->treefp); + this->ReadAlign(_next); + + //NOTICE: if this is a long list as value + if(len == 0) + { + unsigned addr = 0; + fread(&addr, sizeof(unsigned), 1, this->treefp); + _bp->setLen(addr); + _bp->setStr(NULL); + this->ReadAlign(_next); + return true; + } + + //this->request(len); + char* s = (char*)malloc(len); + _bp->setLen(len); + for (i = 0; i + 4 < len; i += 4) + { + fread(s + i, sizeof(char), 4, treefp); + this->ReadAlign(_next); + } + while (i < len) + { + fread(s + i, sizeof(char), 1, treefp); //BETTER + i++; + } + j = len % 4; + if (j > 0) + j = 4 - j; + fseek(treefp, j, SEEK_CUR); + this->ReadAlign(_next); + _bp->setStr(s); + + return true; +} + +bool +IVStorage::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock) +{ + unsigned i, j, len = _bp->getLen(); + + //NOTICE: to write long list value + if(_bp->getStr() == NULL) + { + unsigned flag = 0; + fwrite(&flag, sizeof(unsigned), 1, treefp); + this->WriteAlign(_curnum, _SpecialBlock); + //then this is the real block num + fwrite(&len, sizeof(unsigned), 1, treefp); + this->WriteAlign(_curnum, _SpecialBlock); + return true; + } + + fwrite(&len, sizeof(unsigned), 1, treefp); + this->WriteAlign(_curnum, _SpecialBlock); + + char* s = _bp->getStr(); + for (i = 0; i + 4 < len; i += 4) + { + fwrite(s + i, sizeof(char), 4, treefp); + this->WriteAlign(_curnum, _SpecialBlock); + } + while (i < len) + { + fwrite(s + i, sizeof(char), 1, treefp); + i++; + } + + j = len % 4; + if (j > 0) + j = 4 - j; + fseek(treefp, j, SEEK_CUR); + this->WriteAlign(_curnum, _SpecialBlock); + + return true; +} + +bool +IVStorage::writeTree(IVNode* _root) //write the whole tree back and close treefp +{ + fseek(this->treefp, 0, SEEK_SET); + fwrite(this->treeheight, sizeof(unsigned), 1, treefp); + //delete all nonsense-node in heap, otherwise will waste storage permanently + IVNode* p; + while (1) + { //all non-sense nodes will be in-head-area, due to minimal rank + p = minheap->getTop(); + if (p == NULL) //heap is empty, only when root==NULL + break; + if (p->getRank() == 0) //indicate non-sense node + { + this->minheap->remove(); + this->writeNode(p); + delete p; + } + else + break; + } + + unsigned i, j, t; + //QUERY: another way to write all nodes back is to print out all nodes in heap + //but this method will cause no node in heap any more, while operations may be + //afetr tree-saving. Which method is better? + //write nodes recursively using stack, including root-num + if (_root != NULL) + { + IVNode* p = _root; + unsigned h = *this->treeheight, pos = 0; + IVNode* ns[h]; + int ni[h]; + ns[pos] = p; + ni[pos] = p->getNum(); + pos++; + while (pos > 0) + { + j = pos - 1; + p = ns[j]; + if (p->isLeaf() || ni[j] < 0) //leaf or all childs are ready + { + this->writeNode(p); + pos--; + continue; + } + ns[pos] = p->getChild(ni[j]); + ni[pos] = ns[pos]->getNum(); + pos++; + ni[j]--; + } + t = _root->getStore(); + } + else + t = 0; + + fseek(this->treefp, 4, SEEK_SET); + fwrite(&t, sizeof(unsigned), 1, treefp); //write the root num + fwrite(&cur_block_num, sizeof(unsigned), 1, treefp);//write current blocks num + fseek(treefp, BLOCK_SIZE, SEEK_SET); + j = cur_block_num / 8; //(SuperNum-1)*BLOCK_SIZE; + //reset to 1 first + for (i = 0; i < j; ++i) + { + fputc(0xff, treefp); + } + char c; + BlockInfo* bp = this->freelist->next; + while (bp != NULL) + { + //if not-use then set 0, aligned to byte! +#ifdef DEBUG_KVSTORE + if (bp->num > cur_block_num) + { + printf("blocks num exceed, cur_block_num: %u\n", cur_block_num); + exit(1); + } +#endif + j = bp->num - 1; + i = j / 8; + j = 7 - j % 8; + fseek(treefp, BLOCK_SIZE + i, SEEK_SET); + c = fgetc(treefp); + fseek(treefp, -1, SEEK_CUR); + fputc(c & ~(1 << j), treefp); + bp = bp->next; + } + //fclose(this->treefp); + return true; +} + +void +IVStorage::updateHeap(IVNode* _np, unsigned _rank, bool _inheap) const +{ + if (_inheap) //already in heap, to modify + { + unsigned t = _np->getRank(); + _np->setRank(_rank); + if (t < _rank) + this->minheap->modify(_np, false); + else if (t > _rank) + this->minheap->modify(_np, true); + else; + } + else //not in heap, to add + { + _np->setRank(_rank); + this->minheap->insert(_np); + } +} + +bool +IVStorage::request(long long _needmem) //aligned to byte +{ //NOTICE: <0 means release + //cout<<"freemem: "<freemem<<" needmem: "<<_needmem< 0 && this->freemem < (unsigned long long)_needmem) + if (!this->handler(_needmem - freemem)) //disaster in buffer memory + { + print(string("error in request: out of buffer-mem, now to exit")); + //exit(1); + return false; + } + this->freemem -= _needmem; + return true; +} + +bool +IVStorage::handler(unsigned long long _needmem) //>0 +{ + //cout<<"swap happen"<minheap->getTop(); + //cout<<"get heap top"<minheap->remove(); + //cout<<"node removed in heap"<getSize(); + this->freemem += size; + this->writeNode(p); + //cout<<"node write back"<getNum() > 0) + p->Virtual(); + else + delete p; //non-sense node + //cout<<"node memory released"< size) + { + //cout<<"reduce the request"<freelist; + BlockInfo* next; + while (bp != NULL) + { + next = bp->next; + delete bp; + bp = next; + } +#ifdef DEBUG_KVSTORE + printf("already empty the freelist!\n"); +#endif + delete this->minheap; +#ifdef DEBUG_KVSTORE + printf("already empty the buffer heap!\n"); +#endif + fclose(this->treefp); + //#ifdef DEBUG_KVSTORE + //NOTICE:there is more than one tree + //fclose(Util::debug_kvstore); //NULL is ok! + //Util::debug_kvstore = NULL; + //#endif +} + +void +IVStorage::print(string s) +{ +#ifdef DEBUG_KVSTORE + fputs(Util::showtime().c_str(), Util::debug_kvstore); + fputs("Class IVStorage\n", Util::debug_kvstore); + fputs("Message: ", Util::debug_kvstore); + fputs(s.c_str(), Util::debug_kvstore); + fputs("\n", Util::debug_kvstore); +#endif +} diff --git a/KVstore/IVTree/storage/IVStorage.h b/KVstore/IVTree/storage/IVStorage.h new file mode 100644 index 0000000..88525e2 --- /dev/null +++ b/KVstore/IVTree/storage/IVStorage.h @@ -0,0 +1,84 @@ +/*============================================================================= +# Filename: IVStorage.h +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:43 +# Description: swap between memory and disk, achieving system-like method +=============================================================================*/ + +#ifndef _KVSTORE_IVTREE_STORAGE_IVSTORAGE_H +#define _KVSTORE_IVTREE_STORAGE_IVSTORAGE_H + +#include "../node/IVIntlNode.h" +#include "../node/IVLeafNode.h" +#include "../heap/IVHeap.h" +#include "../vlist/VList.h" + +//It controls read, write, swap +class IVStorage +{ +public: + static const unsigned BLOCK_SIZE = Util::STORAGE_BLOCK_SIZE; //fixed size of disk-block + //there are 18 B+Tree indexes and one vstree index, so set 3G buffer size + //static const unsigned long long MAX_BUFFER_SIZE = Util::MAX_BUFFER_SIZE; //max buffer size + //static const unsigned SET_BUFFER_SIZE = 1 << 30; //set buffer size + //static const unsigned HEAP_SIZE = MAX_BUFFER_SIZE / IVNode::INTL_SIZE; + + //DEBUG: maybe need to set larger, now the file size is 64G at most + static const unsigned MAX_BLOCK_NUM = 1 << 24; //max block-num + //below two constants: must can be exactly divided by 8 + static const unsigned SET_BLOCK_NUM = 1 << 8; //initial blocks num + static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc + static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1; + //static const unsigned TRANSFER_CAPACITY = BLOCK_SIZE; + //enum ReadType { OVER = 0, EXPAND, NORMAL }; + +private: + unsigned long long max_buffer_size; + unsigned heap_size; + unsigned cur_block_num; + std::string filepath; + unsigned* treeheight; + BlockInfo* freelist; + FILE* treefp; //file: tree nodes + IVHeap* minheap; //heap of Nodes's pointer, sorted in NF_RK + + //very long value list are stored in a separate file(with large block) + // + //NOTICE: according to the summary result, 90% value lists are just below 100 bytes + //<10%: 5000000~100M bytes + VList* value_list; + + //NOTICE: freemem's type is long long here, due to large memory in server. + //However, needmem in handler() and request() is ok to be int/unsigned. + //Because the bstr' size is controlled, so is the node. + unsigned long long freemem; //free memory to use, non-negative + //unsigned long long time; //QUERY(achieving an old-swap startegy?) + long Address(unsigned _blocknum) const; + unsigned Blocknum(long address) const; + unsigned AllocBlock(); + void FreeBlock(unsigned _blocknum); + void ReadAlign(unsigned* _next); + void WriteAlign(unsigned* _next, bool& _SpecialBlock); + +public: + IVStorage(); + IVStorage(std::string& _filepath, std::string& _mode, unsigned* _height, unsigned long long _buffer_size, VList* _vlist); //create a fixed-size file or open an existence + bool preRead(IVNode*& _root, IVNode*& _leaves_head, IVNode*& _leaves_tail); //read and build all nodes, only root in memory + bool readNode(IVNode* _np, long long* _request); //read, if virtual + bool createNode(IVNode*& _np); //use fp to create a new node + //NOTICE(if children and child not exist, build children's Nodes) + bool writeNode(IVNode* _np); + bool readBstr(Bstr* _bp, unsigned* _next); + bool writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock); + bool writeTree(IVNode* _np); + void updateHeap(IVNode* _np, unsigned _rank, bool _inheap) const; + bool request(long long _needmem); //deal with memory request + bool handler(unsigned long long _needmem); //swap some nodes out + //bool update(); //update InMem Node's rank, with clock + ~IVStorage(); + void print(std::string s); //DEBUG +}; + +#endif + diff --git a/KVstore/IVTree/vlist/VList.cpp b/KVstore/IVTree/vlist/VList.cpp new file mode 100644 index 0000000..798b553 --- /dev/null +++ b/KVstore/IVTree/vlist/VList.cpp @@ -0,0 +1,257 @@ +/*============================================================================= +# Filename: VList.cpp +# Author: Bookug Lobert +# Mail: zengli-bookug@pku.edu.cn +# Last Modified: 2017-03-27 15:47 +# Description: +=============================================================================*/ + +#include "VList.h" + +using namespace std; + +VList::VList() +{ //not use ../logs/, notice the location of program + cur_block_num = SET_BLOCK_NUM; + filepath = ""; + freelist = NULL; + treefp = NULL; + minheap = NULL; + max_buffer_size = Util::MAX_BUFFER_SIZE; + heap_size = max_buffer_size / IVNode::INTL_SIZE; + freemem = max_buffer_size; +} + +VList::VList(string& _filepath, unsigned long long _buffer_size) +{ + cur_block_num = SET_BLOCK_NUM; //initialize + this->filepath = _filepath; + if (_mode == string("build")) + treefp = fopen(_filepath.c_str(), "w+b"); + else if (_mode == string("open")) + treefp = fopen(_filepath.c_str(), "r+b"); + else + { + print(string("error in IVStorage: Invalid mode ") + _mode); + return; + } + if (treefp == NULL) + { + print(string("error in IVStorage: Open error ") + _filepath); + return; + } + this->treeheight = _height; //originally set to 0 + this->max_buffer_size = _buffer_size; + this->heap_size = this->max_buffer_size / IVNode::INTL_SIZE; + this->freemem = this->max_buffer_size; + this->freelist = new BlockInfo; //null-head + unsigned i, j, k; //j = (SuperNum-1)*BLOCK_SIZE + BlockInfo* bp; + if (_mode == "build") + { //write basic information + i = 0; + fwrite(&i, sizeof(unsigned), 1, this->treefp); //height + fwrite(&i, sizeof(unsigned), 1, this->treefp); //rootnum + fwrite(&cur_block_num, sizeof(unsigned), 1, this->treefp); //current block num + fseek(this->treefp, BLOCK_SIZE, SEEK_SET); + bp = this->freelist; + j = cur_block_num / 8; + for (i = 0; i < j; ++i) + { + fputc(0, this->treefp); + for (k = 0; k < 8; ++k) + { + bp->next = new BlockInfo(i * 8 + k + 1, NULL); + bp = bp->next; + } + } + } + else //_mode == "open" + { + //read basic information + int rootnum; + char c; + fread(this->treeheight, sizeof(unsigned), 1, this->treefp); + fread(&rootnum, sizeof(unsigned), 1, this->treefp); + fread(&cur_block_num, sizeof(unsigned), 1, this->treefp); + fseek(this->treefp, BLOCK_SIZE, SEEK_SET); + bp = this->freelist; + j = cur_block_num / 8; + for (i = 0; i < j; ++i) + { + c = fgetc(treefp); + for (k = 0; k < 8; ++k) + { + if ((c & (1 << k)) == 0) + { + bp->next = new BlockInfo(i * 8 + 7 - k + 1, NULL); + bp = bp->next; + } + } + } + fseek(treefp, Address(rootnum), SEEK_SET); + //treefp is now ahead of root-block + } + this->minheap = new IVHeap(this->heap_size); +} + +long //8-byte in 64-bit machine +IVStorage::Address(unsigned _blocknum) const //BETTER: inline function +{ + if (_blocknum == 0) + return 0; + else if (_blocknum > cur_block_num) + { + //print(string("error in Address: Invalid blocknum ") + Util::int2string(_blocknum)); + return -1; //address should be non-negative + } + //NOTICE: here should explictly use long + return (long)(this->SuperNum + _blocknum - 1) * (long)BLOCK_SIZE; +} + +unsigned +IVStorage::Blocknum(long address) const +{ + return (address / BLOCK_SIZE) + 1 - this->SuperNum; +} + +unsigned +IVStorage::AllocBlock() +{ + BlockInfo* p = this->freelist->next; + if (p == NULL) + { + for (unsigned i = 0; i < SET_BLOCK_INC; ++i) + { + cur_block_num++; //BETTER: check if > MAX_BLOCK_NUM + this->FreeBlock(cur_block_num); + } + p = this->freelist->next; + } + unsigned t = p->num; + this->freelist->next = p->next; + delete p; + return t; +} + +void +IVStorage::FreeBlock(unsigned _blocknum) +{ //QUERY: head-sub and tail-add will be better? + BlockInfo* bp = new BlockInfo(_blocknum, this->freelist->next); + this->freelist->next = bp; +} + +//NOTICE: all reads are aligned to 4 bytes(including a string) +//a string may acrossseveral blocks + +void +IVStorage::ReadAlign(unsigned* _next) +{ + if (ftell(treefp) % BLOCK_SIZE == 0) + { + fseek(treefp, Address(*_next), SEEK_SET); + fread(_next, sizeof(unsigned), 1, treefp); + } +} + +void +IVStorage::WriteAlign(unsigned* _curnum, bool& _SpecialBlock) +{ + if (ftell(treefp) % BLOCK_SIZE == 0) + { + unsigned blocknum = this->AllocBlock(); + fseek(treefp, Address(*_curnum), SEEK_SET); + if (_SpecialBlock) + { + fseek(treefp, 4, SEEK_CUR); + _SpecialBlock = false; + } + fwrite(&blocknum, sizeof(unsigned), 1, treefp); + fseek(treefp, Address(blocknum) + 4, SEEK_SET); + *_curnum = blocknum; + } +} + +bool +IVStorage::readBstr(Bstr* _bp, unsigned* _next) +{ + //long address; + unsigned len, i, j; + fread(&len, sizeof(unsigned), 1, this->treefp); + this->ReadAlign(_next); + //this->request(len); + char* s = (char*)malloc(len); + _bp->setLen(len); + for (i = 0; i + 4 < len; i += 4) + { + fread(s + i, sizeof(char), 4, treefp); + this->ReadAlign(_next); + } + while (i < len) + { + fread(s + i, sizeof(char), 1, treefp); //BETTER + i++; + } + j = len % 4; + if (j > 0) + j = 4 - j; + fseek(treefp, j, SEEK_CUR); + this->ReadAlign(_next); + _bp->setStr(s); + return true; +} + +bool +IVStorage::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock) +{ + unsigned i, j, len = _bp->getLen(); + fwrite(&len, sizeof(unsigned), 1, treefp); + this->WriteAlign(_curnum, _SpecialBlock); + char* s = _bp->getStr(); + for (i = 0; i + 4 < len; i += 4) + { + fwrite(s + i, sizeof(char), 4, treefp); + this->WriteAlign(_curnum, _SpecialBlock); + } + while (i < len) + { + fwrite(s + i, sizeof(char), 1, treefp); + i++; + } + j = len % 4; + if (j > 0) + j = 4 - j; + fseek(treefp, j, SEEK_CUR); + this->WriteAlign(_curnum, _SpecialBlock); + return true; +} + +VList::~VList() +{ + //release heap and freelist... +#ifdef DEBUG_KVSTORE + printf("now to release the kvstore!\n"); +#endif + BlockInfo* bp = this->freelist; + BlockInfo* next; + while (bp != NULL) + { + next = bp->next; + delete bp; + bp = next; + } +#ifdef DEBUG_KVSTORE + printf("already empty the freelist!\n"); +#endif + delete this->minheap; +#ifdef DEBUG_KVSTORE + printf("already empty the buffer heap!\n"); +#endif + fclose(this->treefp); + //#ifdef DEBUG_KVSTORE + //NOTICE:there is more than one tree + //fclose(Util::debug_kvstore); //NULL is ok! + //Util::debug_kvstore = NULL; + //#endif +} + diff --git a/KVstore/IVTree/vlist/VList.h b/KVstore/IVTree/vlist/VList.h new file mode 100644 index 0000000..61911f1 --- /dev/null +++ b/KVstore/IVTree/vlist/VList.h @@ -0,0 +1,71 @@ +/*============================================================================= +# Filename: VList.h +# Author: Bookug Lobert +# Mail: zengli-bookug@pku.edu.cn +# Last Modified: 2017-03-27 15:40 +# Description: +=============================================================================*/ + +#ifndef _KVSTORE_IVTREE_STORAGE_VLIST_H +#define _KVSTORE_IVTREE_STORAGE_VLIST_H + +#include "../../../Util/Util.h" +#include "../../../Util/Bstr.h" + +//TODO: not keep long list in memory, read each time +//but when can you free the long list(kvstore should release it after parsing) +// +//CONSIDER: if to keep long list in memory, should adjust the bstr in memory: +//unsigned: 0 char*: an object (if in memory, if modified, length, content, block num) +//when reading a long list in a node, generate the object first, and the object will tell you whether +//the list is in mmeory or not + +//BETTER: use two kind of blocks in two files, like 1M and 1G (split the block num into two parts) + +class VList +{ +public: + //NOTICE:the border is 10^6, but the block is larger, 1M + static const unsigned LENGTH_BORDER = 1000000; + static const unsigned BLOCK_SIZE = 1 << 20; //fixed size of disk-block + static const unsigned MAX_BLOCK_NUM = 1 << 16; //max block-num + //below two constants: must can be exactly divided by 8 + static const unsigned SET_BLOCK_NUM = 1 << 8; //initial blocks num + static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc + static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1; + +private: + unsigned long long max_buffer_size; + unsigned cur_block_num; + std::string filepath; + BlockInfo* freelist; + //very long value list are stored in a separate file(with large block) + // + //NOTICE: according to the summary result, 90% value lists are just below 100 bytes + //<10%: 5000000~100M bytes + FILE* valfp; + + //NOTICE: freemem's type is long long here, due to large memory in server. + //However, needmem in handler() and request() is ok to be int/unsigned. + //Because the bstr' size is controlled, so is the node. + unsigned long long freemem; //free memory to use, non-negative + //unsigned long long time; //QUERY(achieving an old-swap startegy?) + long Address(unsigned _blocknum) const; + unsigned Blocknum(long address) const; + unsigned AllocBlock(); + void FreeBlock(unsigned _blocknum); + void ReadAlign(unsigned* _next); + void WriteAlign(unsigned* _next, bool& _SpecialBlock); + +public: + VList(); + VList(std::string& _filepath, unsigned long long _buffer_size);//create a fixed-size file or open an existence + bool readBstr(Bstr* _bp, unsigned* _next); + bool writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock); + bool readValue(unsigned _block_num); + bool writeValue(const Bstr* _bp); + ~VList(); +}; + +#endif + diff --git a/KVstore/KVstore.cpp b/KVstore/KVstore.cpp index 1928799..bd9cd93 100644 --- a/KVstore/KVstore.cpp +++ b/KVstore/KVstore.cpp @@ -355,17 +355,19 @@ bool KVstore::updateInsert_s2values(int _sub_id, int _pre_id, int _obj_id) { int* _tmp = NULL; int _len = 0; bool _get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len); - bool _is_entity = KVstore::isEntity(_obj_id); + bool _is_entity = Util::is_entity_ele(_obj_id); //subID doesn't exist if (!_get) { - int _values[6]; + //int _values[6]; + int* _values = new int[6]; _values[0] = 1; _values[1] = 1; _values[2] = _is_entity ? 1 : 0; _values[3] = _pre_id; _values[4] = 5; _values[5] = _obj_id; + //NOTICE: not use array in stack here, otherwise it will be freed, and data in B+Tree, too this->addValueByKey(this->subID2values, _sub_id, (char*)_values, sizeof(int) * 6); } @@ -444,7 +446,7 @@ bool KVstore::updateInsert_s2values(int _sub_id, int _pre_id, int _obj_id) { } this->setValueByKey(this->subID2values, _sub_id, (char*)_values, sizeof(int) * _values_len); - delete[] _values; + //delete[] _values; } return true; @@ -454,7 +456,7 @@ bool KVstore::updateRemove_s2values(int _sub_id, int _pre_id, int _obj_id) { int* _tmp = NULL; int _len = 0; bool _get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len); - bool _is_entity = KVstore::isEntity(_obj_id); + bool _is_entity = Util::is_entity_ele(_obj_id); if (!_get) { return false; @@ -529,7 +531,7 @@ bool KVstore::updateRemove_s2values(int _sub_id, int _pre_id, int _obj_id) { } this->setValueByKey(this->subID2values, _sub_id, (char*)_values, sizeof(int) * _values_len); - delete[] _values; + //delete[] _values; } return true; @@ -568,7 +570,8 @@ bool KVstore::updateInsert_o2values(int _sub_id, int _pre_id, int _obj_id) { //objID doesn't exist if (!_get) { - int _values[5]; + //int _values[5]; + int* _values = new int[5]; _values[0] = 1; _values[1] = 1; _values[2] = _pre_id; @@ -649,7 +652,7 @@ bool KVstore::updateInsert_o2values(int _sub_id, int _pre_id, int _obj_id) { } this->setValueByKey(this->objID2values, _obj_id, (char*)_values, sizeof(int) * _values_len); - delete[] _values; + //delete[] _values; } return true; @@ -727,7 +730,7 @@ bool KVstore::updateRemove_o2values(int _sub_id, int _pre_id, int _obj_id) { } this->setValueByKey(this->objID2values, _obj_id, (char*)_values, sizeof(int) * _values_len); - delete[] _values; + //delete[] _values; } return true; @@ -766,7 +769,8 @@ bool KVstore::updateInsert_p2values(int _sub_id, int _pre_id, int _obj_id) { //preid doesn't exist if (!_get) { - int _values[3]; + //int _values[3]; + int* _values = new int[3]; _values[0] = 1; _values[1] = _sub_id; _values[2] = _obj_id; @@ -793,7 +797,7 @@ bool KVstore::updateInsert_p2values(int _sub_id, int _pre_id, int _obj_id) { _values[j + _tmp[0] + 1] = _tmp[i + _tmp[0]]; } this->setValueByKey(this->preID2values, _pre_id, (char*)_values, sizeof(int) * _values_len); - delete[] _values; + //delete[] _values; } return true; @@ -831,7 +835,7 @@ bool KVstore::updateRemove_p2values(int _sub_id, int _pre_id, int _obj_id) { _values[j + _tmp[0] - 1] = _tmp[i + _tmp[0]]; } this->setValueByKey(this->preID2values, _pre_id, (char*)_values, sizeof(int) * _values_len); - delete[] _values; + //delete[] _values; } return true; @@ -865,290 +869,452 @@ bool KVstore::updateRemove_p2values(int _preid, const std::vector& _sidoidl //for entity2id //_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE -bool KVstore::open_entity2id(int _mode) { +bool +KVstore::open_entity2id(int _mode) +{ unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { + if (_mode == KVstore::CREATE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_entity2id_build; } - else if (_mode == KVstore::READ_WRITE_MODE) { + else if (_mode == KVstore::READ_WRITE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_entity2id_query; } - else { + else + { cerr << "Invalid open mode in open_entity2id, mode = " << _mode << endl; return false; } + return this->open(this->entity2id, KVstore::s_entity2id, _mode, buffer_size); } -bool KVstore::close_entity2id() { - if (this->entity2id == NULL) { +bool +KVstore::close_entity2id() +{ + if (this->entity2id == NULL) + { return true; } + this->entity2id->save(); delete this->entity2id; this->entity2id = NULL; + return true; } -bool KVstore::subIDByEntity(string _entity) { +bool +KVstore::subIDByEntity(string _entity) +{ + //NOTICE: no need to copy _entity to a char* buffer + //_entity will not be released befor ethis function ends + //so _entity.c_str() is a valid const char* return this->entity2id->remove(_entity.c_str(), _entity.length()); } -int KVstore::getIDByEntity(string _entity) const { +int +KVstore::getIDByEntity(string _entity) const +{ return this->getIDByStr(this->entity2id, _entity.c_str(), _entity.length()); } -bool KVstore::setIDByEntity(string _entity, int _id) { - return this->addValueByKey(this->entity2id, _entity.c_str(), _entity.length(), _id); +bool +KVstore::setIDByEntity(string _entity, int _id) +{ + //int len = _entity.length() + 1; + int len = _entity.length(); + char* str = new char[len]; + memcpy(str, _entity.c_str(), len); + return this->addValueByKey(this->entity2id, str, len, _id); } //for id2entity //_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE -bool KVstore::open_id2entity(int _mode) { +bool +KVstore::open_id2entity(int _mode) +{ unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { + if (_mode == KVstore::CREATE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2entity_build; } - else if (_mode == KVstore::READ_WRITE_MODE) { + else if (_mode == KVstore::READ_WRITE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2entity_query; } - else { + else + { cerr << "Invalid open mode in open_id2entity, mode = " << _mode << endl; return false; } + return this->open(this->id2entity, KVstore::s_id2entity, _mode, buffer_size); } -bool KVstore::close_id2entity() { - if (this->id2entity == NULL) { +bool +KVstore::close_id2entity() +{ + if (this->id2entity == NULL) + { return true; } + this->id2entity->save(); delete this->id2entity; this->id2entity = NULL; + return true; } -bool KVstore::subEntityByID(int _id) { +bool +KVstore::subEntityByID(int _id) +{ return this->id2entity->remove(_id); } -string KVstore::getEntityByID(int _id) const { +string +KVstore::getEntityByID(int _id) const +{ char* _tmp = NULL; int _len = 0; + bool _get = this->getValueByKey(this->id2entity, _id, _tmp, _len); - if (!_get) { + if (!_get) + { return ""; } - string _ret = string(_tmp); + + //NOTICE: no need to add \0 at last if we indicate the length + string _ret = string(_tmp, _len); + return _ret; } -bool KVstore::setEntityByID(int _id, string _entity) { - return this->addValueByKey(this->id2entity, _id, _entity.c_str(), _entity.length()); +bool +KVstore::setEntityByID(int _id, string _entity) +{ + //int len = _entity.length() + 1; + int len = _entity.length(); + char* str = new char[len]; + memcpy(str, _entity.c_str(), len); + + return this->addValueByKey(this->id2entity, _id, str, len); } //for predicate2id //_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE -bool KVstore::open_predicate2id(int _mode) { +bool +KVstore::open_predicate2id(int _mode) +{ unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { + if (_mode == KVstore::CREATE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_predicate2id_build; } - else if (_mode == KVstore::READ_WRITE_MODE) { + else if (_mode == KVstore::READ_WRITE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_predicate2id_query; } - else { + else + { cerr << "Invalid open mode in open_predicate2id, mode = " << _mode << endl; return false; } + return this->open(this->predicate2id, KVstore::s_predicate2id, _mode, buffer_size); } -bool KVstore::close_predicate2id() { - if (this->predicate2id == NULL) { +bool +KVstore::close_predicate2id() +{ + if (this->predicate2id == NULL) + { return true; } + this->predicate2id->save(); delete this->predicate2id; this->predicate2id = NULL; + return true; } -bool KVstore::subIDByPredicate(string _predicate) { +bool +KVstore::subIDByPredicate(string _predicate) +{ return this->predicate2id->remove(_predicate.c_str(), _predicate.length()); } -int KVstore::getIDByPredicate(string _predicate) const { +int +KVstore::getIDByPredicate(string _predicate) const +{ return this->getIDByStr(this->predicate2id, _predicate.c_str(), _predicate.length()); } -bool KVstore::setIDByPredicate(string _predicate, int _id) { - return this->addValueByKey(this->predicate2id, _predicate.c_str(), _predicate.length(), _id); +bool +KVstore::setIDByPredicate(string _predicate, int _id) +{ + //int len = _predicate.length() + 1; + int len = _predicate.length(); + char* str = new char[len]; + memcpy(str, _predicate.c_str(), len); + return this->addValueByKey(this->predicate2id, str, len, _id); } //for id2predicate //_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE -bool KVstore::open_id2predicate(int _mode) { +bool +KVstore::open_id2predicate(int _mode) +{ unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { + if (_mode == KVstore::CREATE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2predicate_build; } - else if (_mode == KVstore::READ_WRITE_MODE) { + else if (_mode == KVstore::READ_WRITE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2predicate_query; } - else { + else + { cerr << "Invalid open mode in open_id2predicate, mode = " << _mode << endl; return false; } + return this->open(this->id2predicate, KVstore::s_id2predicate, _mode, buffer_size); } -bool KVstore::close_id2predicate() { - if (this->id2predicate == NULL) { +bool +KVstore::close_id2predicate() +{ + if (this->id2predicate == NULL) + { return true; } + this->id2predicate->save(); delete this->id2predicate; this->id2predicate = NULL; + return true; } -bool KVstore::subPredicateByID(int _id) { +bool +KVstore::subPredicateByID(int _id) +{ return this->id2predicate->remove(_id); } -string KVstore::getPredicateByID(int _id) const { +string +KVstore::getPredicateByID(int _id) const +{ char* _tmp = NULL; int _len = 0; + bool _get = this->getValueByKey(this->id2predicate, _id, _tmp, _len); - if (!_get) { + if (!_get) + { return ""; } - string _ret = string(_tmp); + string _ret = string(_tmp, _len); + return _ret; } -bool KVstore::setPredicateByID(int _id, string _predicate) { - return this->addValueByKey(this->id2predicate, _id, _predicate.c_str(), _predicate.length()); +bool +KVstore::setPredicateByID(int _id, string _predicate) +{ + //int len = _predicate.length() + 1; + int len = _predicate.length(); + char* str = new char[len]; + memcpy(str, _predicate.c_str(), len); + + return this->addValueByKey(this->id2predicate, _id, str, len); } //for literal2id //_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE -bool KVstore::open_literal2id(int _mode) { +bool +KVstore::open_literal2id(int _mode) +{ unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { + if (_mode == KVstore::CREATE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_literal2id_build; } - else if (_mode == KVstore::READ_WRITE_MODE) { + else if (_mode == KVstore::READ_WRITE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_literal2id_query; } - else { + else + { cerr << "Invalid open mode in open_literal2id, mode = " << _mode << endl; return false; } + return this->open(this->literal2id, KVstore::s_literal2id, _mode, buffer_size); } -bool KVstore::close_literal2id() { - if (this->literal2id == NULL) { +bool +KVstore::close_literal2id() +{ + if (this->literal2id == NULL) + { return true; } + this->literal2id->save(); delete this->literal2id; this->literal2id = NULL; + return true; } -bool KVstore::subIDByLiteral(string _literal) { +bool +KVstore::subIDByLiteral(string _literal) +{ return this->literal2id->remove(_literal.c_str(), _literal.length()); } -int KVstore::getIDByLiteral(string _literal) const { +int +KVstore::getIDByLiteral(string _literal) const +{ return this->getIDByStr(this->literal2id, _literal.c_str(), _literal.length()); } -bool KVstore::setIDByLiteral(string _literal, int _id) { - return this->addValueByKey(this->literal2id, _literal.c_str(), _literal.length(), _id); +bool +KVstore::setIDByLiteral(string _literal, int _id) +{ + //int len = _literal.length() + 1; + int len = _literal.length(); + char* str = new char[len]; + memcpy(str, _literal.c_str(), len); + + return this->addValueByKey(this->literal2id, str, len, _id); } //for id2literal //_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE -bool KVstore::open_id2literal(int _mode) { +bool +KVstore::open_id2literal(int _mode) +{ unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { + if (_mode == KVstore::CREATE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2literal_build; } - else if (_mode == KVstore::READ_WRITE_MODE) { + else if (_mode == KVstore::READ_WRITE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2literal_query; } - else { + else + { cerr << "Invalid open mode in open_id2literal, mode = " << _mode << endl; return false; } + return this->open(this->id2literal, KVstore::s_id2literal, _mode, buffer_size); } -bool KVstore::close_id2literal() { - if (this->id2literal == NULL) { +bool +KVstore::close_id2literal() +{ + if (this->id2literal == NULL) + { return true; } + this->id2literal->save(); delete this->id2literal; this->id2literal = NULL; + return true; } -bool KVstore::subLiteralByID(int _id) { +bool +KVstore::subLiteralByID(int _id) +{ return this->id2literal->remove(_id); } -string KVstore::getLiteralByID(int _id) const { +string +KVstore::getLiteralByID(int _id) const +{ char* _tmp = NULL; int _len = 0; + bool _get = this->getValueByKey(this->id2literal, _id, _tmp, _len); - if (!_get) { + if (!_get) + { //NOTICE:here assumes that all literals cannot be empty: "" return ""; } - string _ret = string(_tmp); + string _ret = string(_tmp, _len); + return _ret; } -bool KVstore::setLiteralByID(int _id, string _literal) { - return this->addValueByKey(this->id2literal, _id, _literal.c_str(), _literal.length()); +bool +KVstore::setLiteralByID(int _id, string _literal) +{ + //int len = _literal.length() + 1; + int len = _literal.length(); + char* str = new char[len]; + memcpy(str, _literal.c_str(), len); + + return this->addValueByKey(this->id2literal, _id, str, len); } -bool KVstore::open_subID2values(int _mode) { +bool +KVstore::open_subID2values(int _mode) +{ unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { + if (_mode == KVstore::CREATE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_sID2values_build; } - else if (_mode == KVstore::READ_WRITE_MODE) { + else if (_mode == KVstore::READ_WRITE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_sID2values_query; } - else { + else + { cerr << "Invalid open mode in open_subID2values, mode = " << _mode << endl; return false; } + return this->open(this->subID2values, KVstore::s_sID2values, _mode, buffer_size); } -bool KVstore::close_subID2values() { - if (this->subID2values == NULL) { +bool +KVstore::close_subID2values() +{ + if (this->subID2values == NULL) + { return true; } + this->subID2values->save(); delete this->subID2values; this->subID2values = NULL; + return true; } -bool KVstore::build_subID2values(int** _p_id_tuples, int _triples_num) { +//STRUCT of s2xx: triple_number pre_num entity_border p1 offset1 p2 offset2 ... pn offsetn +//p1-list(in offset1) p2-list(in offset2) ... pn-list(in offsetn) +//(the final whole list is a unsorted olist) +bool +KVstore::build_subID2values(int** _p_id_tuples, int _triples_num) +{ cout << "Begin building subID2values..." << endl; //qsort(_p_id_tuples, _triples_num, sizeof(int*), Util::_spo_cmp); vector _oidlist_s; vector _pidoffsetlist_s; + //NOTICE: this is used for entity-literal border, but not used now + //it is only set for the whole olist in s2po, not for sp2o int _entity_num = 0; //true means the next sub is a different one from the current one @@ -1160,10 +1326,12 @@ bool KVstore::build_subID2values(int** _p_id_tuples, int _triples_num) { this->open_subID2values(KVstore::CREATE_MODE); - for (int i = 0; i < _triples_num; i++) { - if (i + 1 == _triples_num || _p_id_tuples[i][0] != _p_id_tuples[i + 1][0] - || _p_id_tuples[i][1] != _p_id_tuples[i + 1][1] || _p_id_tuples[i][2] != _p_id_tuples[i + 1][2]) { - if (_sub_change) { + for (int i = 0; i < _triples_num; i++) + { + if (i + 1 == _triples_num || _p_id_tuples[i][0] != _p_id_tuples[i + 1][0] || _p_id_tuples[i][1] != _p_id_tuples[i + 1][1] || _p_id_tuples[i][2] != _p_id_tuples[i + 1][2]) + { + if (_sub_change) + { _pidoffsetlist_s.clear(); _oidlist_s.clear(); _entity_num = 0; @@ -1173,13 +1341,15 @@ bool KVstore::build_subID2values(int** _p_id_tuples, int _triples_num) { int _pre_id = _p_id_tuples[i][1]; int _obj_id = _p_id_tuples[i][2]; - if (_sub_pre_change) { + if (_sub_pre_change) + { _pidoffsetlist_s.push_back(_pre_id); _pidoffsetlist_s.push_back(_oidlist_s.size()); } _oidlist_s.push_back(_obj_id); - if (KVstore::isEntity(_obj_id)) { + if (Util::is_entity_ele(_obj_id)) + { _entity_num++; } @@ -1187,8 +1357,10 @@ bool KVstore::build_subID2values(int** _p_id_tuples, int _triples_num) { _pre_change = (i + 1 == _triples_num) || (_p_id_tuples[i][1] != _p_id_tuples[i + 1][1]); _sub_pre_change = _sub_change || _pre_change; - if (_sub_change) { - for (unsigned j = 1; j < _pidoffsetlist_s.size(); j += 2) { + if (_sub_change) + { + for (unsigned j = 1; j < _pidoffsetlist_s.size(); j += 2) + { _pidoffsetlist_s[j] += 3 + _pidoffsetlist_s.size(); } int* _entrylist_s = new int[3 + _pidoffsetlist_s.size() + _oidlist_s.size()]; @@ -1200,28 +1372,35 @@ bool KVstore::build_subID2values(int** _p_id_tuples, int _triples_num) { _entrylist_s[2] = _entity_num; unsigned j, k; //pidoffsetlist - for (j = 3, k = 0; k < _pidoffsetlist_s.size(); j++, k++) { + for (j = 3, k = 0; k < _pidoffsetlist_s.size(); j++, k++) + { _entrylist_s[j] = _pidoffsetlist_s[k]; } //unsorted oidlist - for (k = 0; k < _oidlist_s.size(); j++, k++) { + for (k = 0; k < _oidlist_s.size(); j++, k++) + { _entrylist_s[j] = _oidlist_s[k]; } this->addValueByKey(this->subID2values, _sub_id, (char*)_entrylist_s, sizeof(int) * j); - delete[] _entrylist_s; + //delete[] _entrylist_s; } } } this->close_subID2values(); cout << "Finished building subID2values" << endl; + return true; } -bool KVstore::getpreIDlistBysubID(int _subid, int*& _preidlist, int& _list_len, bool _no_duplicate) const { +//TODO: for long list in all get functions, should free the long list +//the 0th element can be used to identify if is the long list +bool +KVstore::getpreIDlistBysubID(int _subid, int*& _preidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getpreIDlistBysubID " << _subid << endl; - if (!isEntity(_subid)) { + if (!Util::is_entity_ele(_subid)) { _preidlist = NULL; _list_len = 0; return false; @@ -1229,22 +1408,28 @@ bool KVstore::getpreIDlistBysubID(int _subid, int*& _preidlist, int& _list_len, int* _tmp = NULL; int _len = 0; bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); - if (!_get) { + + if (!_get) + { _preidlist = NULL; _list_len = 0; return false; } + _list_len = _tmp[1]; _preidlist = new int[_list_len]; for (int i = 0; i < _list_len; i++) { _preidlist[i] = _tmp[2 * i + 3]; } + return true; } -bool KVstore::getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getobjIDlistBysubID " << _subid << endl; - if (!isEntity(_subid)) { + if (!Util::is_entity_ele(_subid)) { _objidlist = NULL; _list_len = 0; return false; @@ -1252,11 +1437,13 @@ bool KVstore::getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len, int* _tmp = NULL; int _len = 0; bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); - if (!_get) { + if (!_get) + { _objidlist = NULL; _list_len = 0; return false; } + _list_len = _tmp[0]; _objidlist = new int[_list_len]; memcpy(_objidlist, _tmp + 3 + 2 * _tmp[1], sizeof(int) * _list_len); @@ -1264,16 +1451,20 @@ bool KVstore::getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len, if (_no_duplicate) { _list_len = Util::removeDuplicate(_objidlist, _list_len); } + return true; } -bool KVstore::getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getobjIDlistBysubIDpreID " << _subid << ' ' << _preid << endl; - if (!isEntity(_subid)) { + if (!Util::is_entity_ele(_subid)) { _objidlist = NULL; _list_len = 0; return false; } + int* _tmp = NULL; int _len = 0; bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); @@ -1282,12 +1473,14 @@ bool KVstore::getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist, _list_len = 0; return false; } + int _result = KVstore::binarySearch(_preid, _tmp + 3, _tmp[1], 2); if (_result == -1) { _objidlist = NULL; _list_len = 0; return false; } + int _offset = _tmp[4 + 2 * _result]; int _offset_next; if (_result == _tmp[1] - 1) { @@ -1299,16 +1492,21 @@ bool KVstore::getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist, _list_len = _offset_next - _offset; _objidlist = new int[_list_len]; memcpy(_objidlist, _tmp + _offset, sizeof(int) * _list_len); + return true; } -bool KVstore::getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getpreIDobjIDlistBysubID " << _subid << endl; - if (!isEntity(_subid)) { + if (!Util::is_entity_ele(_subid)) + { _preid_objidlist = NULL; _list_len = 0; return false; } + int* _tmp = NULL; int _len = 0; bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); @@ -1317,6 +1515,7 @@ bool KVstore::getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _list_len = 0; return false; } + _list_len = 2 * _tmp[0]; _preid_objidlist = new int[_list_len]; int _offset_next; @@ -1333,35 +1532,53 @@ bool KVstore::getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _preid_objidlist[2 * j + 1] = _tmp[3 + 2 * _tmp[1] + j]; } } + return true; } -bool KVstore::open_objID2values(int _mode) { +bool +KVstore::open_objID2values(int _mode) +{ unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { + if (_mode == KVstore::CREATE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_oID2values_build; } - else if (_mode == KVstore::READ_WRITE_MODE) { + else if (_mode == KVstore::READ_WRITE_MODE) + { buffer_size = Util::MAX_BUFFER_SIZE * buffer_oID2values_query; } - else { + else + { cerr << "Invalid open mode in open_objID2values, mode = " << _mode << endl; return false; } + return this->open(this->objID2values, KVstore::s_oID2values, _mode, buffer_size); } -bool KVstore::close_objID2values() { - if (this->objID2values == NULL) { +bool +KVstore::close_objID2values() +{ + if (this->objID2values == NULL) + { return true; } + this->objID2values->save(); delete this->objID2values; this->objID2values = NULL; + return true; } -bool KVstore::build_objID2values(int** _p_id_tuples, int _triples_num) { +//NOTICE: do not need entity border here, because no literal in o2pslist +//STRUCT of o2xx: triple_number pre_num p1 offset1 p2 offset2 ... pn offsetn +//p1-list(in offset1) p2-list(in offset2) ... pn-list(in offsetn) +//(the final whole list is a unsorted slist) +bool +KVstore::build_objID2values(int** _p_id_tuples, int _triples_num) +{ cout << "Begin building objID2values..." << endl; //qsort(_p_id_tuples, _triples_num, sizeof(int*), Util::_ops_cmp); vector _sidlist_o; @@ -1376,9 +1593,10 @@ bool KVstore::build_objID2values(int** _p_id_tuples, int _triples_num) { this->open_objID2values(KVstore::CREATE_MODE); - for (int i = 0; i < _triples_num; i++) { - if (i + 1 == _triples_num || _p_id_tuples[i][2] != _p_id_tuples[i + 1][2] - || _p_id_tuples[i][1] != _p_id_tuples[i + 1][1] || _p_id_tuples[i][0] != _p_id_tuples[i + 1][0]) { + for (int i = 0; i < _triples_num; i++) + { + if (i + 1 == _triples_num || _p_id_tuples[i][2] != _p_id_tuples[i + 1][2] || _p_id_tuples[i][1] != _p_id_tuples[i + 1][1] || _p_id_tuples[i][0] != _p_id_tuples[i + 1][0]) + { if (_obj_change) { _pidoffsetlist_o.clear(); _sidlist_o.clear(); @@ -1418,7 +1636,7 @@ bool KVstore::build_objID2values(int** _p_id_tuples, int _triples_num) { _entrylist_o[j] = _sidlist_o[k]; } this->addValueByKey(this->objID2values, _obj_id, (char*)_entrylist_o, sizeof(int) * j); - delete[] _entrylist_o; + //delete[] _entrylist_o; } } } @@ -1428,7 +1646,9 @@ bool KVstore::build_objID2values(int** _p_id_tuples, int _triples_num) { return true; } -bool KVstore::getpreIDlistByobjID(int _objid, int*& _preidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getpreIDlistByobjID(int _objid, int*& _preidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getpreIDlistByobjID " << _objid << endl; int* _tmp = NULL; int _len = 0; @@ -1443,10 +1663,13 @@ bool KVstore::getpreIDlistByobjID(int _objid, int*& _preidlist, int& _list_len, for (int i = 0; i < _list_len; i++) { _preidlist[i] = _tmp[2 * i + 2]; } + return true; } -bool KVstore::getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getsubIDlistByobjID " << _objid << endl; int* _tmp = NULL; int _len = 0; @@ -1456,6 +1679,7 @@ bool KVstore::getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len, _list_len = 0; return false; } + _list_len = _tmp[0]; _subidlist = new int[_list_len]; memcpy(_subidlist, _tmp + 2 + 2 * _tmp[1], sizeof(int) * _list_len); @@ -1463,10 +1687,13 @@ bool KVstore::getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len, if (_no_duplicate) { _list_len = Util::removeDuplicate(_subidlist, _list_len); } + return true; } -bool KVstore::getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getsubIDlistByobjIDpreID " << _objid << ' ' << _preid << endl; int* _tmp = NULL; int _len = 0; @@ -1476,12 +1703,14 @@ bool KVstore::getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, _list_len = 0; return false; } + int _result = KVstore::binarySearch(_preid, _tmp + 2, _tmp[1], 2); if (_result == -1) { _subidlist = NULL; _list_len = 0; return false; } + int _offset = _tmp[3 + 2 * _result]; int _offset_next; if (_result == _tmp[1] - 1) { @@ -1493,10 +1722,13 @@ bool KVstore::getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, _list_len = _offset_next - _offset; _subidlist = new int[_list_len]; memcpy(_subidlist, _tmp + _offset, sizeof(int) * _list_len); + return true; } -bool KVstore::getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getpreIDsubIDlistByobjID " << _objid << endl; int* _tmp = NULL; int _len = 0; @@ -1506,6 +1738,7 @@ bool KVstore::getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _list_len = 0; return false; } + _list_len = 2 * _tmp[0]; _preid_subidlist = new int[_list_len]; int _offset_next; @@ -1522,10 +1755,13 @@ bool KVstore::getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _preid_subidlist[2 * j + 1] = _tmp[2 + 2 * _tmp[1] + j]; } } + return true; } -bool KVstore::open_preID2values(int _mode) { +bool +KVstore::open_preID2values(int _mode) +{ unsigned long long buffer_size; if (_mode == KVstore::CREATE_MODE) { buffer_size = Util::MAX_BUFFER_SIZE * buffer_pID2values_build; @@ -1540,17 +1776,25 @@ bool KVstore::open_preID2values(int _mode) { return this->open(this->preID2values, KVstore::s_pID2values, _mode, buffer_size); } -bool KVstore::close_preID2values() { +bool +KVstore::close_preID2values() +{ if (this->preID2values == NULL) { return true; } + this->preID2values->save(); delete this->preID2values; this->preID2values = NULL; + return true; } -bool KVstore::build_preID2values(int** _p_id_tuples, int _triples_num) { +//NOTICE: if we sort sidlist, then oidlist is not sorted; otherwise if we sort oidlist, then sidlist is not sorted +//STRUCT of p2xx: triple_number sidlist oidlist(not sorted, linked with sidlist one by one) +bool +KVstore::build_preID2values(int** _p_id_tuples, int _triples_num) +{ cout << "Begin building preID2values..." << endl; //qsort(_p_id_tuples, _triples_num, sizeof(int*), Util::_pso_cmp); vector _sidlist_p; @@ -1561,9 +1805,10 @@ bool KVstore::build_preID2values(int** _p_id_tuples, int _triples_num) { this->open_preID2values(KVstore::CREATE_MODE); - for (int i = 0; i < _triples_num; i++) { - if (i + 1 == _triples_num || _p_id_tuples[i][0] != _p_id_tuples[i + 1][0] - || _p_id_tuples[i][1] != _p_id_tuples[i + 1][1] || _p_id_tuples[i][2] != _p_id_tuples[i + 1][2]) { + for (int i = 0; i < _triples_num; i++) + { + if (i + 1 == _triples_num || _p_id_tuples[i][0] != _p_id_tuples[i + 1][0] || _p_id_tuples[i][1] != _p_id_tuples[i + 1][1] || _p_id_tuples[i][2] != _p_id_tuples[i + 1][2]) + { if (_pre_change) { _sidlist_p.clear(); _oidlist_p.clear(); @@ -1592,7 +1837,7 @@ bool KVstore::build_preID2values(int** _p_id_tuples, int _triples_num) { _entrylist_p[j] = _oidlist_p[k]; } this->addValueByKey(this->preID2values, _pre_id, (char*)_entrylist_p, sizeof(int) * j); - delete[] _entrylist_p; + //delete[] _entrylist_p; } } } @@ -1602,7 +1847,9 @@ bool KVstore::build_preID2values(int** _p_id_tuples, int _triples_num) { return true; } -bool KVstore::getsubIDlistBypreID(int _preid, int*& _subidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getsubIDlistBypreID(int _preid, int*& _subidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getsubIDlistBypreID " << _preid << endl; int* _tmp = NULL; int _len = 0; @@ -1612,16 +1859,20 @@ bool KVstore::getsubIDlistBypreID(int _preid, int*& _subidlist, int& _list_len, _list_len = 0; return false; } + _list_len = _tmp[0]; _subidlist = new int[_list_len]; memcpy(_subidlist, _tmp + 1, sizeof(int) * _list_len); if (_no_duplicate) { _list_len = Util::removeDuplicate(_subidlist, _list_len); } + return true; } -bool KVstore::getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getobjIDlistBypreID " << _preid << endl; int* _tmp = NULL; int _len = 0; @@ -1631,6 +1882,7 @@ bool KVstore::getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len, _list_len = 0; return false; } + _list_len = _tmp[0]; _objidlist = new int[_list_len]; memcpy(_objidlist, _tmp + 1 + _tmp[0], sizeof(int) * _list_len); @@ -1638,10 +1890,13 @@ bool KVstore::getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len, if (_no_duplicate) { _list_len = Util::removeDuplicate(_objidlist, _list_len); } + return true; } -bool KVstore::getsubIDobjIDlistBypreID(int _preid, int*& _subid_objidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getsubIDobjIDlistBypreID(int _preid, int*& _subid_objidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getsubIDobjIDlistBypreID " << _preid << endl; int* _tmp = NULL; int _len = 0; @@ -1657,10 +1912,13 @@ bool KVstore::getsubIDobjIDlistBypreID(int _preid, int*& _subid_objidlist, int& _subid_objidlist[2 * i] = _tmp[1 + i]; _subid_objidlist[2 * i + 1] = _tmp[1 + _tmp[0] + i]; } + return true; } -bool KVstore::getpreIDlistBysubIDobjID(int _subid, int _objid, int*& _preidlist, int& _list_len, bool _no_duplicate) const { +bool +KVstore::getpreIDlistBysubIDobjID(int _subid, int _objid, int*& _preidlist, int& _list_len, bool _no_duplicate) const +{ //cout << "In getpreIDlistBysubIDobjID " << _subid << ' ' << _objid << endl; int *list1 = NULL, *list2 = NULL; int len1 = 0, len2 = 0; @@ -1669,11 +1927,13 @@ bool KVstore::getpreIDlistBysubIDobjID(int _subid, int _objid, int*& _preidlist, _list_len = 0; return false; } + if (!this->getpreIDlistByobjID(_objid, list2, len2, true)) { _preidlist = NULL; _list_len = 0; return false; } + vector list = KVstore::intersect(list1, list2, len1, len2); delete[] list1; delete[] list2; @@ -1683,6 +1943,7 @@ bool KVstore::getpreIDlistBysubIDobjID(int _subid, int _objid, int*& _preidlist, _list_len = 0; return false; } + int* _tmp = NULL; int _len = 0; this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); @@ -1715,10 +1976,12 @@ bool KVstore::getpreIDlistBysubIDobjID(int _subid, int _objid, int*& _preidlist, _list_len--; } } + if (_list_len == 0) { _preidlist = NULL; return false; } + _preidlist = new int[_list_len]; int i = 0, j = 0; while (i < len) { @@ -1731,11 +1994,14 @@ bool KVstore::getpreIDlistBysubIDobjID(int _subid, int _objid, int*& _preidlist, i++; } } + return true; } -bool KVstore::open(SITree*& _p_btree, string _tree_name, int _mode, unsigned long long _buffer_size) { +bool +KVstore::open(SITree*& _p_btree, string _tree_name, int _mode, unsigned long long _buffer_size) +{ if (_p_btree != NULL) { return false; } @@ -1754,7 +2020,9 @@ bool KVstore::open(SITree*& _p_btree, string _tree_name, int _mode, unsigned lon return true; } -bool KVstore::open(ISTree*& _p_btree, string _tree_name, int _mode, unsigned long long _buffer_size) { +bool +KVstore::open(ISTree*& _p_btree, string _tree_name, int _mode, unsigned long long _buffer_size) +{ if (_p_btree != NULL) { return false; } @@ -1773,43 +2041,112 @@ bool KVstore::open(ISTree*& _p_btree, string _tree_name, int _mode, unsigned lon return true; } -void KVstore::flush(SITree* _p_btree) { +bool +KVstore::open(IVTree*& _p_btree, string _tree_name, int _mode, unsigned long long _buffer_size) +{ if (_p_btree != NULL) { + return false; + } + string smode; + if (_mode == KVstore::CREATE_MODE) { + smode = "build"; + } + else if (_mode == KVstore::READ_WRITE_MODE) { + smode = "open"; + } + else { + cerr << "Invalid open mode of: " << _tree_name << " mode = " << _mode << endl; + return false; + } + _p_btree = new IVTree(this->store_path, _tree_name, smode, _buffer_size); + + return true; +} + +void +KVstore::flush(SITree* _p_btree) +{ + if (_p_btree != NULL) + { _p_btree->save(); } } -void KVstore::flush(ISTree* _p_btree) { - if (_p_btree != NULL) { +void +KVstore::flush(ISTree* _p_btree) +{ + if (_p_btree != NULL) + { _p_btree->save(); } } -bool KVstore::addValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val) { +void +KVstore::flush(IVTree* _p_btree) +{ + if (_p_btree != NULL) + { + _p_btree->save(); + } +} + +bool +KVstore::addValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val) +{ return _p_btree->insert(_key, _klen, _val); } -bool KVstore::addValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen) { +bool +KVstore::addValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen) +{ return _p_btree->insert(_key, _val, _vlen); } -bool KVstore::setValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val) { +bool +KVstore::addValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen) +{ + return _p_btree->insert(_key, _val, _vlen); +} + +bool +KVstore::setValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val) +{ return _p_btree->modify(_key, _klen, _val); } -bool KVstore::setValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen) { +bool +KVstore::setValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen) +{ return _p_btree->modify(_key, _val, _vlen); } -bool KVstore::getValueByKey(SITree* _p_btree, const char* _key, int _klen, int* _val) const { +bool +KVstore::setValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen) +{ + return _p_btree->modify(_key, _val, _vlen); +} + +bool +KVstore::getValueByKey(SITree* _p_btree, const char* _key, int _klen, int* _val) const +{ return _p_btree->search(_key, _klen, _val); } -bool KVstore::getValueByKey(ISTree* _p_btree, int _key, char*& _val, int& _vlen) const { +bool +KVstore::getValueByKey(ISTree* _p_btree, int _key, char*& _val, int& _vlen) const +{ return _p_btree->search(_key, _val, _vlen); } -int KVstore::getIDByStr(SITree* _p_btree, const char* _key, int _klen) const { +bool +KVstore::getValueByKey(IVTree* _p_btree, int _key, char*& _val, int& _vlen) const +{ + return _p_btree->search(_key, _val, _vlen); +} + +int +KVstore::getIDByStr(SITree* _p_btree, const char* _key, int _klen) const +{ int val = 0; bool ret = _p_btree->search(_key, _klen, &val); if (!ret) @@ -1819,15 +2156,27 @@ int KVstore::getIDByStr(SITree* _p_btree, const char* _key, int _klen) const { return val; } -bool KVstore::removeKey(SITree* _p_btree, const char* _key, int _klen) { +bool +KVstore::removeKey(SITree* _p_btree, const char* _key, int _klen) +{ return _p_btree->remove(_key, _klen); } -bool KVstore::removeKey(ISTree* _p_btree, int _key) { +bool +KVstore::removeKey(ISTree* _p_btree, int _key) +{ return _p_btree->remove(_key); } -vector KVstore::intersect(const int* _list1, const int* _list2, int _len1, int _len2) { +bool +KVstore::removeKey(IVTree* _p_btree, int _key) +{ + return _p_btree->remove(_key); +} + +vector +KVstore::intersect(const int* _list1, const int* _list2, int _len1, int _len2) +{ int i = 0, j = 0; vector ret; while (i < _len1 && j < _len2) { @@ -1846,7 +2195,9 @@ vector KVstore::intersect(const int* _list1, const int* _list2, int _len1, return ret; } -int KVstore::binarySearch(int _key, const int* _list, int _list_len, int _step) { +int +KVstore::binarySearch(int _key, const int* _list, int _list_len, int _step) +{ int _left = 0; int _right = _list_len - 1; int _mid; @@ -1862,13 +2213,11 @@ int KVstore::binarySearch(int _key, const int* _list, int _list_len, int _step) _left = _mid + 1; } } + return -1; } -bool KVstore::isEntity(int id) { - return id < Util::LITERAL_FIRST_ID; -} - +//TODO: better to adjust these parameters according to memory usage and entity num string KVstore::s_entity2id = "s_entity2id"; string KVstore::s_id2entity = "s_id2entity"; unsigned short KVstore::buffer_entity2id_build = 8; @@ -1899,3 +2248,4 @@ unsigned short KVstore::buffer_pID2values_build = 16; unsigned short KVstore::buffer_sID2values_query = 16; unsigned short KVstore::buffer_oID2values_query = 16; unsigned short KVstore::buffer_pID2values_query = 8; + diff --git a/KVstore/KVstore.h b/KVstore/KVstore.h index dfdfd77..c74940b 100644 --- a/KVstore/KVstore.h +++ b/KVstore/KVstore.h @@ -164,9 +164,9 @@ private: static unsigned short buffer_literal2id_query; static unsigned short buffer_id2literal_query; - ISTree* subID2values; - ISTree* objID2values; - ISTree* preID2values; + IVTree* subID2values; + IVTree* objID2values; + IVTree* preID2values; static std::string s_sID2values; static std::string s_oID2values; static std::string s_pID2values; @@ -181,23 +181,29 @@ private: bool open(SITree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size); bool open(ISTree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size); + bool open(IVTree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size); void flush(SITree* _p_btree); void flush(ISTree* _p_btree); + void flush(IVTree* _p_btree); bool addValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val); bool addValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen); + bool addValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen); bool setValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val); bool setValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen); + bool setValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen); bool getValueByKey(SITree* _p_btree, const char* _key, int _klen, int* _val) const; bool getValueByKey(ISTree* _p_btree, int _key, char*& _val, int& _vlen) const; + bool getValueByKey(IVTree* _p_btree, int _key, char*& _val, int& _vlen) const; int getIDByStr(SITree* _p_btree, const char* _key, int _klen) const; bool removeKey(SITree* _p_btree, const char* _key, int _klen); bool removeKey(ISTree* _p_btree, int _key); + bool removeKey(IVTree* _p_btree, int _key); static std::vector intersect(const int* _list1, const int* _list2, int _len1, int _len2); static int binarySearch(int key, const int* _list, int _list_len, int step = 1); diff --git a/KVstore/SITree/SITree.cpp b/KVstore/SITree/SITree.cpp index 70245cf..1dfef4d 100644 --- a/KVstore/SITree/SITree.cpp +++ b/KVstore/SITree/SITree.cpp @@ -20,7 +20,7 @@ SITree::SITree() TSM = NULL; storepath = ""; filename = ""; - transfer_size[0] = transfer_size[1] = transfer_size[2] = 0; + //transfer_size[0] = transfer_size[1] = transfer_size[2] = 0; this->request = 0; } @@ -36,10 +36,10 @@ SITree::SITree(string _storepath, string _filename, string _mode, unsigned long this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail); else this->root = NULL; - this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE)); - this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE)); - this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE)); - this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M + //this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M this->request = 0; } @@ -49,30 +49,30 @@ SITree::getFilePath() return storepath + "/" + filename; } -void //WARN: not check _str and _len -SITree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index) -{ - if (_index > 2) - return; - /* - if(_str == NULL || _len == 0) - { - printf("error in CopyToTransfer: empty string\n"); - return; - } - */ - //unsigned length = _bstr->getLen(); - unsigned length = _len; - if (length + 1 > this->transfer_size[_index]) - { - transfer[_index].release(); - transfer[_index].setStr((char*)malloc(length + 1)); - this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0 - } - memcpy(this->transfer[_index].getStr(), _str, length); - this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore - this->transfer[_index].setLen(length); -} +//void //WARN: not check _str and _len +//SITree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index) +//{ + //if (_index > 2) + //return; + //[> + //if(_str == NULL || _len == 0) + //{ + //printf("error in CopyToTransfer: empty string\n"); + //return; + //} + //*/ + ////unsigned length = _bstr->getLen(); + //unsigned length = _len; + //if (length + 1 > this->transfer_size[_index]) + //{ + //transfer[_index].release(); + //transfer[_index].setStr((char*)malloc(length + 1)); + //this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0 + //} + //memcpy(this->transfer[_index].getStr(), _str, length); + //this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore + //this->transfer[_index].setLen(length); +//} unsigned SITree::getHeight() const @@ -110,20 +110,26 @@ SITree::search(const char* _str, unsigned _len, int* _val) *_val = -1; return false; } - this->CopyToTransfer(_str, _len, 1); + //this->CopyToTransfer(_str, _len, 1); request = 0; - Bstr bstr = this->transfer[1]; //not to modify its memory + //Bstr bstr = this->transfer[1]; //not to modify its memory + //Bstr bstr(_str, _len, true); int store; - SINode* ret = this->find(&transfer[1], &store, false); - if (ret == NULL || store == -1 || bstr != *(ret->getKey(store))) //tree is empty or not found + SINode* ret = this->find(_str, _len, &store, false); + if (ret == NULL || store == -1) //tree is empty or not found + { + //bstr.clear(); + return false; + } + const Bstr* tmp = ret->getKey(store); + if (Util::compare(_str, _len, tmp->getStr(), tmp->getLen()) != 0) //tree is empty or not found { - bstr.clear(); return false; } *_val = ret->getValue(store); this->TSM->request(request); - bstr.clear(); + //bstr.clear(); return true; } @@ -135,7 +141,7 @@ SITree::insert(const char* _str, unsigned _len, int _val) printf("error in SITree-insert: empty string\n"); return false; } - this->CopyToTransfer(_str, _len, 1); + //this->CopyToTransfer(_str, _len, 1); this->request = 0; SINode* ret; @@ -170,8 +176,8 @@ SITree::insert(const char* _str, unsigned _len, int _val) SINode* p = this->root; SINode* q; int i; - const Bstr* _key = &transfer[1]; - Bstr bstr = *_key; + //const Bstr* _key = &transfer[1]; + //Bstr bstr = *_key; while (!p->isLeaf()) { //j = p->getNum(); @@ -179,7 +185,7 @@ SITree::insert(const char* _str, unsigned _len, int _val) //if(bstr < *(p->getKey(i))) //break; //NOTICE: using binary search is better here - i = p->searchKey_less(bstr); + i = p->searchKey_less(_str, _len); q = p->getChild(i); this->prepare(q); @@ -196,7 +202,10 @@ SITree::insert(const char* _str, unsigned _len, int _val) this->TSM->updateHeap(ret, ret->getRank(), false); this->TSM->updateHeap(q, q->getRank(), true); this->TSM->updateHeap(p, p->getRank(), true); - if (bstr < *(p->getKey(i))) + //if (bstr < *(p->getKey(i))) + const Bstr* tmp = p->getKey(i); + int cmp_res = Util::compare(_str, _len, tmp->getStr(), tmp->getLen()); + if (cmp_res < 0) p = q; else p = ret; @@ -212,24 +221,35 @@ SITree::insert(const char* _str, unsigned _len, int _val) //for(i = 0; i < j; ++i) //if(bstr < *(p->getKey(i))) //break; - i = p->searchKey_less(bstr); + i = p->searchKey_less(_str, _len); //insert existing key is ok, but not inserted in //however, the tree-shape may change due to possible split in former code bool ifexist = false; - if (i > 0 && bstr == *(p->getKey(i - 1))) - ifexist = true; - else + //if (i > 0 && bstr == *(p->getKey(i - 1))) + if (i > 0) { - p->addKey(_key, i, true); + const Bstr* tmp = p->getKey(i-1); + int cmp_res = Util::compare(_str, _len, tmp->getStr(), tmp->getLen()); + if(cmp_res == 0) + { + ifexist = true; + } + } + + if(!ifexist) + { + p->addKey(_str, _len, i, true); p->addValue(_val, i); p->addNum(); - request += _key->getLen(); + request += _len; p->setDirty(); this->TSM->updateHeap(p, p->getRank(), true); } + this->TSM->request(request); - bstr.clear(); //NOTICE: must be cleared! + //bstr.clear(); //NOTICE: must be cleared! + return !ifexist; //QUERY(which case:return false) } @@ -241,34 +261,42 @@ SITree::modify(const char* _str, unsigned _len, int _val) printf("error in SITree-modify: empty string\n"); return false; } - this->CopyToTransfer(_str, _len, 1); + //this->CopyToTransfer(_str, _len, 1); this->request = 0; - const Bstr* _key = &transfer[1]; - Bstr bstr = *_key; + //const Bstr* _key = &transfer[1]; + //Bstr bstr = *_key; int store; - SINode* ret = this->find(_key, &store, true); - if (ret == NULL || store == -1 || bstr != *(ret->getKey(store))) //tree is empty or not found + SINode* ret = this->find(_str, _len, &store, true); + if (ret == NULL || store == -1) //tree is empty or not found { - bstr.clear(); + //bstr.clear(); return false; } + const Bstr* tmp = ret->getKey(store); + if (Util::compare(_str, _len, tmp->getStr(), tmp->getLen()) != 0) //tree is empty or not found + { + return false; + } + ret->setValue(_val, store); ret->setDirty(); this->TSM->request(request); - bstr.clear(); + //bstr.clear(); + return true; } //this function is useful for search and modify, and range-query SINode* //return the first key's position that >= *_key -SITree::find(const Bstr* _key, int* _store, bool ifmodify) +SITree::find(const char* _str, unsigned _len, int* _store, bool ifmodify) { //to assign value for this->bstr, function shouldn't be const! if (this->root == NULL) return NULL; //SITree Is Empty + SINode* p = root; int i, j; - Bstr bstr = *_key; //local Bstr: multiple delete + //Bstr bstr = *_key; //local Bstr: multiple delete while (!p->isLeaf()) { if (ifmodify) @@ -277,7 +305,7 @@ SITree::find(const Bstr* _key, int* _store, bool ifmodify) //for(i = 0; i < j; ++i) //BETTER(Binary-Search) //if(bstr < *(p->getKey(i))) //break; - i = p->searchKey_less(bstr); + i = p->searchKey_less(_str, _len); p = p->getChild(i); this->prepare(p); @@ -287,13 +315,15 @@ SITree::find(const Bstr* _key, int* _store, bool ifmodify) //for(i = 0; i < j; ++i) //if(bstr <= *(p->getKey(i))) //break; - i = p->searchKey_lessEqual(bstr); + i = p->searchKey_lessEqual(_str, _len); if (i == j) *_store = -1; //Not Found else *_store = i; - bstr.clear(); + + //bstr.clear(); + return p; } @@ -312,24 +342,25 @@ SITree::remove(const char* _str, unsigned _len) printf("error in SITree-remove: empty string\n"); return false; } - this->CopyToTransfer(_str, _len, 1); + //this->CopyToTransfer(_str, _len, 1); request = 0; - const Bstr* _key = &transfer[1]; + //const Bstr* _key = &transfer[1]; SINode* ret; if (this->root == NULL) //tree is empty return false; + SINode* p = this->root; SINode* q; int i, j; - Bstr bstr = *_key; + //Bstr bstr = *_key; while (!p->isLeaf()) { j = p->getNum(); //for(i = 0; i < j; ++i) //if(bstr < *(p->getKey(i))) //break; - i = p->searchKey_less(bstr); + i = p->searchKey_less(_str, _len); q = p->getChild(i); this->prepare(q); @@ -343,6 +374,7 @@ SITree::remove(const char* _str, unsigned _len) if (ret != NULL) this->TSM->updateHeap(ret, 0, true);//non-sense node this->TSM->updateHeap(q, q->getRank(), true); + if (q->isLeaf()) { if (q->getPrev() == NULL) @@ -350,6 +382,7 @@ SITree::remove(const char* _str, unsigned _len) if (q->getNext() == NULL) this->leaves_tail = q; } + if (p->getNum() == 0) //root shrinks { //this->leaves_head = q; @@ -365,7 +398,7 @@ SITree::remove(const char* _str, unsigned _len) } bool flag = false; - i = p->searchKey_equal(bstr); + i = p->searchKey_equal(_str, _len); //WARN+NOTICE:here must check, because the key to remove maybe not exist if (i != (int)p->getNum()) { @@ -386,7 +419,8 @@ SITree::remove(const char* _str, unsigned _len) } this->TSM->request(request); - bstr.clear(); + //bstr.clear(); + return flag; //i == j, not found } @@ -495,4 +529,5 @@ SITree::print(string s) } else; #endif -} \ No newline at end of file +} + diff --git a/KVstore/SITree/SITree.h b/KVstore/SITree/SITree.h index 49aa75f..52c27ee 100644 --- a/KVstore/SITree/SITree.h +++ b/KVstore/SITree/SITree.h @@ -3,7 +3,7 @@ # Author: syzz # Mail: 1181955272@qq.com # Last Modified: 2015-04-26 16:44 -# Description: struct and interface of the B+ tree +# Description: string2ID, including entity2id, literal2id, predicate2id =============================================================================*/ #ifndef _KVSTORE_SITREE_SITREE_H @@ -36,13 +36,19 @@ private: //so lock is a must. Add lock to transfer is better than to add //lock to every key/value. However, modify requires a lock for a //key/value, and multiple search for different keys are ok!!! - Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char* - unsigned transfer_size[3]; + //Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char* + //unsigned transfer_size[3]; + + //TODO: in all B+ trees, updat eoperation should lock the whole tree, while search operations not + //However, the transfer bstr maybe cause the parallism error!!!! + //Why we need the transfer? It is ok to pass the original string pointer to return + //A problem is that before the caller ends, the tree can not be modified(so a read-writ elock is required) + std::string storepath; std::string filename; //ok for user to change /* some private functions */ std::string getFilePath(); //in UNIX system - void CopyToTransfer(const char* _str, unsigned _len, unsigned _index); + //void CopyToTransfer(const char* _str, unsigned _len, unsigned _index); void release(SINode* _np) const; //tree's operations should be atom(if read nodes) @@ -61,7 +67,7 @@ public: bool search(const char* _str, unsigned _len, int* _val); bool insert(const char* _str, unsigned _len, int _val); bool modify(const char* _str, unsigned _len, int _val); - SINode* find(const Bstr* _key, int* store, bool ifmodify); + SINode* find(const char* _key, unsigned _len, int* store, bool ifmodify); bool remove(const char* _str, unsigned _len); bool save(); ~SITree(); @@ -71,4 +77,5 @@ public: //(problem range between two extremes: not-modified, totally-modified) //After saved, it's ok to continue operations on tree! -#endif \ No newline at end of file +#endif + diff --git a/KVstore/SITree/storage/SIStorage.h b/KVstore/SITree/storage/SIStorage.h index 3b454ba..4e9e0eb 100644 --- a/KVstore/SITree/storage/SIStorage.h +++ b/KVstore/SITree/storage/SIStorage.h @@ -13,6 +13,14 @@ #include "../node/SILeafNode.h" #include "../heap/SIHeap.h" +//TODO: whether to use heap or not, is a big question +//For single-query application, it seems that LRU list like VSTree is a better choice(no much cost in the buffer itself) +//But in multiple-queries case, things maybe different +//BETTER: +//add a heap position in node, to speed up the node-pointer searching +//lower the update times of heap, if the size is 128M, then each update is 27 at most +//if not update in time, then the heap maybe not be a heap, then why do we use heap? why not a simple array? + //It controls read, write, swap class SIStorage { diff --git a/KVstore/Tree.h b/KVstore/Tree.h index 528d2eb..b73612b 100644 --- a/KVstore/Tree.h +++ b/KVstore/Tree.h @@ -1,4 +1,5 @@ //headers wrapper for all kinds of BPlusTree +#include "IVTree/IVTree.h" #include "ISTree/ISTree.h" -#include "SITree/SITree.h" \ No newline at end of file +#include "SITree/SITree.h" diff --git a/Main/gadd.cpp b/Main/gadd.cpp index 498f86d..c7e8cdc 100644 --- a/Main/gadd.cpp +++ b/Main/gadd.cpp @@ -7,9 +7,9 @@ int main(int argc, char * argv[]) { //chdir(dirname(argv[0])); -#ifdef DEBUG +//#ifdef DEBUG Util util; -#endif +//#endif cout << "argc: " << argc << "\t"; cout << "DB_store:" << argv[1] << "\t"; diff --git a/Main/gbuild.cpp b/Main/gbuild.cpp index 23791c2..bca5c16 100644 --- a/Main/gbuild.cpp +++ b/Main/gbuild.cpp @@ -17,9 +17,9 @@ int main(int argc, char * argv[]) { //chdir(dirname(argv[0])); -#ifdef DEBUG +//#ifdef DEBUG Util util; -#endif +//#endif if(argc < 3) //./gbuild { //output help info here diff --git a/Main/gclient.cpp b/Main/gclient.cpp index c42de4d..9946ebd 100644 --- a/Main/gclient.cpp +++ b/Main/gclient.cpp @@ -12,9 +12,9 @@ int main(int argc, char * argv[]) { //chdir(dirname(argv[0])); -#ifdef DEBUG +//#ifdef DEBUG Util util; -#endif +//#endif std::string ip = Socket::DEFAULT_SERVER_IP; unsigned short port = Socket::DEFAULT_CONNECT_PORT; @@ -38,4 +38,4 @@ int main(int argc, char * argv[]) client.run(); return 0; -} \ No newline at end of file +} diff --git a/Main/gconsole.cpp b/Main/gconsole.cpp index f5252bb..2be9e35 100644 --- a/Main/gconsole.cpp +++ b/Main/gconsole.cpp @@ -122,9 +122,9 @@ main(int argc, char **argv) //NOTICE:this is needed to ensure the file path is the work path //chdir(dirname(argv[0])); //NOTICE:this is needed to set several debug files -#ifdef DEBUG +//#ifdef DEBUG Util util; -#endif +//#endif db_home = Util::global_config["db_home"]; diff --git a/Main/gquery.cpp b/Main/gquery.cpp index 7121b40..70c91c1 100644 --- a/Main/gquery.cpp +++ b/Main/gquery.cpp @@ -38,9 +38,9 @@ int main(int argc, char * argv[]) { //chdir(dirname(argv[0])); -#ifdef DEBUG +//#ifdef DEBUG Util util; -#endif +//#endif if (argc == 1 || strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0) { diff --git a/Main/gserver.cpp b/Main/gserver.cpp index f04948b..d6ae3d2 100644 --- a/Main/gserver.cpp +++ b/Main/gserver.cpp @@ -11,9 +11,9 @@ using namespace std; -#define GSERVER_PORT_FILE "bin/.gserver_port" -#define GSERVER_PORT_SWAP "bin/.gserver_port.swap" -#define GSERVER_LOG "logs/gserver.log" +//#define GSERVER_PORT_FILE "bin/.gserver_port" +//#define GSERVER_PORT_SWAP "bin/.gserver_port.swap" +//#define GSERVER_LOG "logs/gserver.log" bool isOnlyProcess(const char* argv0); void checkSwap(); @@ -22,9 +22,9 @@ bool stopServer(); int main(int argc, char* argv[]) { -#ifdef DEBUG +//#ifdef DEBUG Util util; -#endif +//#endif string mode; if (argc == 1) { @@ -61,7 +61,7 @@ int main(int argc, char* argv[]) unsigned short port = Socket::DEFAULT_CONNECT_PORT; if (argc == 3) { if (!Util::isValidPort(string(argv[2]))) { - cout << "Invalid port: " << argv[2] << endl; + cerr << "Invalid port: " << argv[2] << endl; return -1; } else { @@ -70,9 +70,9 @@ int main(int argc, char* argv[]) } } if (!isOnlyProcess(argv[0])) { - ofstream out(GSERVER_PORT_SWAP, ios::out); + ofstream out(Util::gserver_port_swap.c_str()); if (!out) { - cout << "Failed to change port!" << endl; + cerr << "Failed to change port!" << endl; return -1; } out << port; @@ -80,9 +80,9 @@ int main(int argc, char* argv[]) cout << "Port will be changed to " << port << " after the current server stops or restarts." << endl; return 0; } - ofstream out(GSERVER_PORT_FILE, ios::out); + ofstream out(Util::gserver_port_file.c_str()); if (!out) { - cout << "Failed to change port!" << endl; + cerr << "Failed to change port!" << endl; return -1; } out << port; @@ -93,10 +93,15 @@ int main(int argc, char* argv[]) if (mode == "-s" || mode == "--start") { if (!isOnlyProcess(argv[0])) { - cout << "gServer already running!" << endl; + cerr << "gServer already running!" << endl; return -1; } if (startServer()) { + sleep(1); + if (isOnlyProcess(argv[0])) { + cerr << "Server stopped unexpectedly. Check for port conflicts!" << endl; + return -1; + } return 0; } else { @@ -106,7 +111,7 @@ int main(int argc, char* argv[]) if (mode == "-t" || mode == "--stop") { if (isOnlyProcess(argv[0])) { - cout << "gServer not running!" << endl; + cerr << "gServer not running!" << endl; return -1; } if (stopServer()) { @@ -119,7 +124,7 @@ int main(int argc, char* argv[]) if (mode == "-r" || mode == "--restart") { if (isOnlyProcess(argv[0])) { - cout << "gServer not running!" << endl; + cerr << "gServer not running!" << endl; return -1; } if (!stopServer()) { @@ -133,14 +138,14 @@ int main(int argc, char* argv[]) if (mode == "-P" || mode == "--printport") { unsigned short port = Socket::DEFAULT_CONNECT_PORT; - ifstream in(GSERVER_PORT_FILE); + ifstream in(Util::gserver_port_file.c_str()); if (in) { in >> port; in.close(); } cout << "Current connection port is " << port << '.' << endl; unsigned short portSwap = 0; - ifstream inSwap(GSERVER_PORT_SWAP); + ifstream inSwap(Util::gserver_port_swap.c_str()); if (inSwap) { inSwap >> portSwap; inSwap.close(); @@ -153,14 +158,14 @@ int main(int argc, char* argv[]) if (mode == "-k" || mode == "--kill") { if (isOnlyProcess(argv[0])) { - cout << "No process to kill!" << endl; + cerr << "No process to kill!" << endl; return -1; } execl("/usr/bin/killall", "killall", Util::getExactPath(argv[0]).c_str(), NULL); return 0; } - cout << "Invalid arguments! Input \"bin/gserver -h\" for help." << endl; + cerr << "Invalid arguments! Type \"bin/gserver -h\" for help." << endl; return -1; } @@ -169,38 +174,38 @@ bool isOnlyProcess(const char* argv0) { } void checkSwap() { - if (access(GSERVER_PORT_SWAP, 00) != 0) { + if (access(Util::gserver_port_swap.c_str(), 00) != 0) { return; } - ifstream in(GSERVER_PORT_SWAP, ios::in); + ifstream in(Util::gserver_port_swap.c_str()); if (!in) { - cout << "Failed in checkSwap(), port may not be changed." << endl; + cerr << "Failed in checkSwap(), port may not be changed." << endl; return; } unsigned short port; in >> port; in.close(); - ofstream out(GSERVER_PORT_FILE, ios::out); + ofstream out(Util::gserver_port_file.c_str()); if (!out) { - cout << "Failed in checkSwap(), port may not be changed." << endl; + cerr << "Failed in checkSwap(), port may not be changed." << endl; return; } out << port; out.close(); - chmod(GSERVER_PORT_FILE, 0644); - string cmd = string("rm ") + GSERVER_PORT_SWAP; + chmod(Util::gserver_port_file.c_str(), 0644); + string cmd = string("rm ") + Util::gserver_port_swap; system(cmd.c_str()); } bool startServer() { unsigned short port = Socket::DEFAULT_CONNECT_PORT; - ifstream in(GSERVER_PORT_FILE, ios::in); + ifstream in(Util::gserver_port_file.c_str()); if (!in) { - ofstream out(GSERVER_PORT_FILE, ios::out); + ofstream out(Util::gserver_port_file.c_str()); if (out) { out << port; out.close(); - chmod(GSERVER_PORT_FILE, 0644); + chmod(Util::gserver_port_file.c_str(), 0644); } } else { @@ -215,47 +220,75 @@ bool startServer() { if (!Util::dir_exist("logs")) { Util::create_dir("logs"); } - freopen(GSERVER_LOG, "a", stdout); - freopen(GSERVER_LOG, "a", stderr); - Server server(port); - if (!server.createConnection()) { - cout << Util::getTimeString() << "Failed to create connection at port " << port << '.' << endl; - return false; + freopen(Util::gserver_log.c_str(), "a", stdout); + freopen(Util::gserver_log.c_str(), "a", stderr); + + int status; + + while (true) { + fpid = fork(); + + // child, main process + if (fpid == 0) { + Server server(port); + if (!server.createConnection()) { + cerr << Util::getTimeString() << "Failed to create connection at port " << port << '.' << endl; + return false; + } + cout << Util::getTimeString() << "Server started at port " << port << '.' << endl; + server.listen(); + exit(0); + return true; + } + + // parent, deamon process + else if (fpid > 0) { + waitpid(fpid, &status, 0); + if (WIFEXITED(status)) { + exit(0); + return true; + } + cerr << Util::getTimeString() << "Server stopped abnormally, restarting server..." << endl; + } + + // fork failure + else { + cerr << Util::getTimeString() << "Failed to start server: deamon fork failure." << endl; + return false; + } } - cout << Util::getTimeString() << "Server started at port " << port << '.' << endl; - server.listen(); - exit(0); - return true; } + // parent else if (fpid > 0) { cout << "Server started at port " << port << '.' << endl; return true; } + // fork failure else { - cout << "Failed to start server at port " << port << '.' << endl; + cerr << "Failed to start server at port " << port << '.' << endl; return false; } } bool stopServer() { unsigned short port = Socket::DEFAULT_CONNECT_PORT; - ifstream in(GSERVER_PORT_FILE, ios::in); + ifstream in(Util::gserver_port_file.c_str()); if (in) { in >> port; in.close(); } Socket socket; if (!socket.create() || !socket.connect("127.0.0.1", port) || !socket.send("stop")) { - cout << "Failed to stop server at port " << port << '.' << endl; + cerr << "Failed to stop server at port " << port << '.' << endl; return false; } string recv_msg; socket.recv(recv_msg); socket.close(); if (recv_msg != "server stopped.") { - cout << "Failed to stop server at port " << port << '.' << endl; + cerr << "Failed to stop server at port " << port << '.' << endl; return false; } cout << "Server stopped at port " << port << '.' << endl; diff --git a/Main/gsub.cpp b/Main/gsub.cpp index a7b1b5a..14a4938 100644 --- a/Main/gsub.cpp +++ b/Main/gsub.cpp @@ -12,9 +12,9 @@ int main(int argc, char * argv[]) { //chdir(dirname(argv[0])); -#ifdef DEBUG +//#ifdef DEBUG Util util; -#endif +//#endif cout << "argc: " << argc << "\t"; cout << "DB_store:" << argv[1] << "\t"; diff --git a/NOTES.md b/NOTES.md index 8d1c549..cf80444 100644 --- a/NOTES.md +++ b/NOTES.md @@ -7,6 +7,8 @@ 在使用gserver时,不能在数据库没有unload时再用gbuild或其他命令修改数据库,仅限于C/S模式 将IRC聊天放到gstore文档上,freenode #gStore +storage中大量使用long类型,文件大小也可能达到64G,最好在64位机器上运行。 + # 推广 必须建立一个官方网站,可以展示下团队、demo,需要建立社区/论坛并维护 @@ -86,13 +88,13 @@ http://blog.csdn.net/infoworld/article/details/8670951 要在单机支持到10亿triple,最坏情况下最多有20亿entity和20亿literal,目前的编号方式是不行的(int扩展为unsigned) 最好在单机100G内存上支持起freebase(2.5B triples)这个规模的数据集,就像jena和virtuoso一样,慢不要紧 -同时将ID的编码改为unsigned,无效标志-1改为最大值的宏, triple数目的类型也要改为unsigned -注意pre的ID还可以为-2,或者对于pre仍然用int,或者改函数的返回值为long long (还有一些没有用-1而是>=0) +在type分支中,sub2id_pre2id_obj2id函数中,每次double增长可能无法充分利用unsigned空间,只能利用到2560000000,超过后最好直接设置为最大 +去掉tree里面的复制,另外kvstore里面的复制可以考虑通过一个或若干个bstr buffer来实现,避免每次都重新new,但这会影响多线程程序 +而且在kvstore中往往需要对原始list做一些额外处理 --- -将B+tree中叶节点的大的value分离出来,新建一套缓存,使用block机制,标记length为0表示未读取 -类型bstr的length问题也需要解决 -如果把类型直接改成long long,空间开销一下子就上升了一倍 +UBSTR: 类型bstr的length问题也需要解决 如果把类型直接改成long long,空间开销一下子就上升了一倍 解决方法:对于ID2string,仍然用char*和unsigned,但对于s2xx p2xx o2xx,应该用unsigned long long*和unsigned来表示,这样最高可支持到40亿triple +(其实这个不是特别必要,很少会有这种情况,我们处理的triple数目一般限制在20亿,就算是type这种边,po对数也就是跟entity数目持平,很难达到5亿) --- 那么是否可以调整entity与literal的分界线,如果entity数目一般都比literal数目多的话 直接把literal从大到小编号,可在ID模块中指定顺序,这样每个Datbase模块应该有自己独特的分界线,其他模块用时也需要注意 @@ -518,6 +520,8 @@ http://www.oschina.net/question/188977_58777 # ADVICE +#### 考虑利用hdfs或者hbase,这样就可以利用各公司已有的数据库系统,但这是否会和已有的内外存交换冲突? + #### 数值型查询 实数域 [-bound, bound] 类型很难匹配,有必要单独编码么? 数据集中不应有范围 Query中编码过滤后还需验证 x>a, x=, <=, a时不直接取字符串,而是转换为数值并编码 @@ -602,3 +606,23 @@ Consider the use of Bloom Filter and FM-sketches http://www.hprd.org/download/ + + +## GIT + +#### how to commit a message + +package.json +http://www.json.cn/ +https://www.oschina.net/news/69705/git-commit-message-and-changelog-guide +https://sanwen8.cn/p/44eCof7.html + +1. commit one by one, a commit just do one thing + +2. place a empty line between head and body, body and footer + +3. the first letter of header should be in uppercase, and the header should not be too long, just a wonderful summary +FIX: ... ADD:... REF:... 代码重构 SUB:... + +4. each line should not be too long, add your real name and the influence in footer(maybe cause the code struct to change) + diff --git a/Query/BasicQuery.h b/Query/BasicQuery.h index 94128b3..642b0c8 100644 --- a/Query/BasicQuery.h +++ b/Query/BasicQuery.h @@ -175,8 +175,8 @@ private: map selected_var_position; public: - static const int MAX_VAR_NUM = 10; - static const int MAX_PRE_VAR_NUM = 10; + static const int MAX_VAR_NUM = 20; + static const int MAX_PRE_VAR_NUM = 20; static const char NOT_JUST_SELECT = 'a'; static const char SELECT_VAR = 's'; diff --git a/Util/Bstr.cpp b/Util/Bstr.cpp index 2ddc7ae..8157700 100644 --- a/Util/Bstr.cpp +++ b/Util/Bstr.cpp @@ -17,12 +17,17 @@ Bstr::Bstr() this->str = NULL; } -Bstr::Bstr(const char* _str, unsigned _len) +Bstr::Bstr(const char* _str, unsigned _len, bool _nocopy) { //WARN: if need a string .please add '\0' in your own! this->length = _len; - //DEBUG:if copy memory? - //this->str = _str; //not valid:const char* -> char* + + //if(_nocopy) + //{ + //this->str = _str; //not valid:const char* -> char* + //return; + //} + this->str = (char*)malloc(_len); memcpy(this->str, _str, sizeof(char) * _len); //this->str[_len]='\0'; @@ -116,6 +121,12 @@ Bstr::operator != (const Bstr& _bstr) unsigned Bstr::getLen() const { + //NOTICE: this is for VList + if(this->str == NULL) + { + return 0; + } + return length; } diff --git a/Util/Bstr.h b/Util/Bstr.h index 1d93bc6..63d8cd9 100644 --- a/Util/Bstr.h +++ b/Util/Bstr.h @@ -24,7 +24,7 @@ public: Bstr(); //if copy memory, then use const char*, but slow //else, can not use const char* -> char* - Bstr(const char* _str, unsigned _len); + Bstr(const char* _str, unsigned _len, bool _nocopy = false); //Bstr(char* _str, unsigned _len); Bstr(const Bstr& _bstr); //Bstr& operate = (const Bstr& _bstr); diff --git a/Util/Util.cpp b/Util/Util.cpp index 9a0a883..bbf6077 100644 --- a/Util/Util.cpp +++ b/Util/Util.cpp @@ -48,6 +48,10 @@ map Util::global_config; //================================================================================================================== +string Util::gserver_port_file = "bin/.gserver_port"; +string Util::gserver_port_swap = "bin/.gserver_port.swap"; +string Util::gserver_log = "logs/gserver.log"; + //NOTICE:used in Database, Join and Strategy //int Util::triple_num = 0; //int Util::pre_num = 0; @@ -441,11 +445,18 @@ Util::memoryLeft() } bool -Util::is_literal_ele(int _id) +Util::is_literal_ele(unsigned _id) { return _id >= Util::LITERAL_FIRST_ID; } +bool +Util::is_entity_ele(unsigned id) +{ + return id < Util::LITERAL_FIRST_ID; +} + + //NOTICE: require that the list is ordered int Util::removeDuplicate(int* _list, int _len) diff --git a/Util/Util.h b/Util/Util.h index b5c02af..1098da1 100644 --- a/Util/Util.h +++ b/Util/Util.h @@ -37,6 +37,7 @@ in the sparql query can point to the same node in data graph) #include #include #include +#include #include #include @@ -87,7 +88,7 @@ in the sparql query can point to the same node in data graph) //#define DEBUG_STREAM //#define DEBUG_PRECISE 1 all information //#define DEBUG_KVSTORE 1 //in KVstore -#define DEBUG_VSTREE 1 //in Database +//#define DEBUG_VSTREE 1 //in Database //#define DEBUG_LRUCACHE 1 //#define DEBUG_DATABASE 1 //in Database // @@ -218,7 +219,9 @@ public: static std::string getTimeString(); static std::string node2string(const char* _raw_str); - static bool is_literal_ele(int); + static bool is_literal_ele(unsigned id); + static bool is_entity_ele(unsigned id); + static int removeDuplicate(int*, int); static std::string getQueryFromFile(const char* _file_path); static std::string getSystemOutput(std::string cmd); @@ -279,6 +282,10 @@ public: static FILE* debug_database; static FILE* debug_vstree; + static std::string gserver_port_file; + static std::string gserver_port_swap; + static std::string gserver_log; + private: static bool isValidIPV4(std::string); diff --git a/logs/.gitignore b/logs/.gitignore new file mode 100644 index 0000000..397b4a7 --- /dev/null +++ b/logs/.gitignore @@ -0,0 +1 @@ +*.log diff --git a/makefile b/makefile index 2d27bda..da62f17 100644 --- a/makefile +++ b/makefile @@ -70,6 +70,7 @@ api_java = api/java/lib/GstoreJavaAPI.jar #sstreeobj = $(objdir)Tree.o $(objdir)Storage.o $(objdir)Node.o $(objdir)IntlNode.o $(objdir)LeafNode.o $(objdir)Heap.o sitreeobj = $(objdir)SITree.o $(objdir)SIStorage.o $(objdir)SINode.o $(objdir)SIIntlNode.o $(objdir)SILeafNode.o $(objdir)SIHeap.o istreeobj = $(objdir)ISTree.o $(objdir)ISStorage.o $(objdir)ISNode.o $(objdir)ISIntlNode.o $(objdir)ISLeafNode.o $(objdir)ISHeap.o +ivtreeobj = $(objdir)IVTree.o $(objdir)IVStorage.o $(objdir)IVNode.o $(objdir)IVIntlNode.o $(objdir)IVLeafNode.o $(objdir)IVHeap.o kvstoreobj = $(objdir)KVstore.o $(sitreeobj) $(istreeobj) #$(sstreeobj) @@ -217,6 +218,26 @@ $(objdir)ISHeap.o: KVstore/ISTree/heap/ISHeap.cpp KVstore/ISTree/heap/ISHeap.h $ $(CC) $(CFLAGS) KVstore/ISTree/heap/ISHeap.cpp -o $(objdir)ISHeap.o #objects in istree/ end +#objects in ivtree/ begin +$(objdir)IVTree.o: KVstore/IVTree/IVTree.cpp KVstore/IVTree/IVTree.h $(objdir)Stream.o + $(CC) $(CFLAGS) KVstore/IVTree/IVTree.cpp -o $(objdir)IVTree.o + +$(objdir)IVStorage.o: KVstore/IVTree/storage/IVStorage.cpp KVstore/IVTree/storage/IVStorage.h $(objdir)Util.o + $(CC) $(CFLAGS) KVstore/IVTree/storage/IVStorage.cpp -o $(objdir)IVStorage.o $(def64IO) + +$(objdir)IVNode.o: KVstore/IVTree/node/IVNode.cpp KVstore/IVTree/node/IVNode.h $(objdir)Util.o + $(CC) $(CFLAGS) KVstore/IVTree/node/IVNode.cpp -o $(objdir)IVNode.o + +$(objdir)IVIntlNode.o: KVstore/IVTree/node/IVIntlNode.cpp KVstore/IVTree/node/IVIntlNode.h + $(CC) $(CFLAGS) KVstore/IVTree/node/IVIntlNode.cpp -o $(objdir)IVIntlNode.o + +$(objdir)IVLeafNode.o: KVstore/IVTree/node/IVLeafNode.cpp KVstore/IVTree/node/IVLeafNode.h + $(CC) $(CFLAGS) KVstore/IVTree/node/IVLeafNode.cpp -o $(objdir)IVLeafNode.o + +$(objdir)IVHeap.o: KVstore/IVTree/heap/IVHeap.cpp KVstore/IVTree/heap/IVHeap.h $(objdir)Util.o + $(CC) $(CFLAGS) KVstore/IVTree/heap/IVHeap.cpp -o $(objdir)IVHeap.o +#objects in ivtree/ end + $(objdir)KVstore.o: KVstore/KVstore.cpp KVstore/KVstore.h KVstore/Tree.h $(CC) $(CFLAGS) KVstore/KVstore.cpp $(inc) -o $(objdir)KVstore.o diff --git a/package.json b/package.json new file mode 100644 index 0000000..fe9e70c --- /dev/null +++ b/package.json @@ -0,0 +1,12 @@ +{ + "config": { + "ghooks": { + "commit-msg": "validate-commit-msg" + } + }, + + "scripts": { + "changelog-all": "conventional-changelog -p angular -i CHANGELOG.md -w -r 0", + "changelog": "conventional-changelog -p angular -i CHANGELOG.md -w" + } +} diff --git a/test/package.json b/test/package.json new file mode 100644 index 0000000..7ba7f79 --- /dev/null +++ b/test/package.json @@ -0,0 +1,15 @@ +{ + "config": { + "ghooks": { + //"pre-commit": "gulp lint", + "commit-msg": "validate-commit-msg", + //"pre-push": "make test", + //"post-merge": "npm install", + //"post-rewrite": "npm install", + } + } + "scripts": { + "changelog-all": "conventional-changelog -p angular -i CHANGELOG.md -w -r 0", + "changelog": "conventional-changelog -p angular -i CHANGELOG.md -w", + } +}