diff --git a/.gitignore b/.gitignore index 70c39fd..7f3dbbb 100644 --- a/.gitignore +++ b/.gitignore @@ -91,6 +91,10 @@ tags *.out *.bak~ +# queries +*.sql +*.sh + # modules node_modules diff --git a/Database/Database.cpp b/Database/Database.cpp index 875d140..b48ccbe 100644 --- a/Database/Database.cpp +++ b/Database/Database.cpp @@ -743,6 +743,7 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp) #ifdef DEBUG cout<<"query success_num: "<kvstore->getEntityByID(0)<triple_num will be not right, and _p_id_tuples will save useless triples + //However, we can not use exist_triple to detect duplicates here, because it is too time-costly + // For id_tuples //_p_id_tuples[_id_tuples_size] = new TYPE_ENTITY_LITERAL_ID[3]; //_p_id_tuples[_id_tuples_size][0] = _sub_id; diff --git a/Database/Join.cpp b/Database/Join.cpp index 31471fb..67d8786 100644 --- a/Database/Join.cpp +++ b/Database/Join.cpp @@ -133,7 +133,8 @@ Join::judge(unsigned _smallest, unsigned _biggest) //BETTER:how to guess the size of can_lists double size = (_smallest + _biggest) / 2.0; double ans = Join::PARAM_DENSE * dense - size / Join::PARAM_SIZE; - if (ans > Join::JUDGE_LIMIT) + double limit = 1.0 / (double)Join::JUDGE_LIMIT; + if (ans > limit) return 0; //multi_join method else return 1; //index_join method @@ -984,6 +985,11 @@ Join::update_answer_list(IDList*& valid_ans_list, IDList& _can_list, unsigned* i } } +//TODO: multiple lists intersect, how about sort and intersect from small to big? +//but this need to generate all first, I think sort by pre2num if better! +// +//TODO: set the entity_literal border in kvstore, and intersect entity part and literal part respectively + //NOTICE: consider two directions according to table1 size and table2 size //1. -> add ID mapping record for the first linking column, whole(offset, size) zengli //2. <- join using inverted index for each column, offset and size for each column, hulin diff --git a/Database/Join.h b/Database/Join.h index 0779bc3..13deb5e 100644 --- a/Database/Join.h +++ b/Database/Join.h @@ -55,7 +55,12 @@ private: static const unsigned PARAM_SIZE = 1000000; static const unsigned PARAM_PRE = 10000; static const unsigned PARAM_DENSE = 1; - static const double JUDGE_LIMIT = 0.5; + + static const unsigned JUDGE_LIMIT = 2; + //NOTICE+DEBUG: please use constexpr below instead of the phase above(constexpr is supported in C++11) + //http://www.cnblogs.com/wanyuanchun/p/4041080.html + //constexpr static const double JUDGE_LIMIT = 0.5; + static const unsigned LIMIT_CANDIDATE_LIST_SIZE = 1000; //BETTER?:predefine size to avoid copy cost TableType current_table; diff --git a/KVstore/ISTree/ISTree.cpp b/KVstore/ISTree/ISTree.cpp index 86cb472..7ddff0d 100644 --- a/KVstore/ISTree/ISTree.cpp +++ b/KVstore/ISTree/ISTree.cpp @@ -20,7 +20,7 @@ ISTree::ISTree() TSM = NULL; storepath = ""; filename = ""; - transfer_size[0] = transfer_size[1] = transfer_size[2] = 0; + //transfer_size[0] = transfer_size[1] = transfer_size[2] = 0; this->stream = NULL; this->request = 0; } @@ -37,10 +37,10 @@ ISTree::ISTree(string _storepath, string _filename, string _mode, unsigned long this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail); else this->root = NULL; - this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE)); - this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE)); - this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE)); - this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M + //this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M this->stream = NULL; this->request = 0; } @@ -51,30 +51,30 @@ ISTree::getFilePath() return storepath + "/" + filename; } -void //WARN: not check _str and _len -ISTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index) -{ - if (_index > 2) - return; - /* - if(_str == NULL || _len == 0) - { - printf("error in CopyToTransfer: empty string\n"); - return; - } - */ - //unsigned length = _bstr->getLen(); - unsigned length = _len; - if (length + 1 > this->transfer_size[_index]) - { - transfer[_index].release(); - transfer[_index].setStr((char*)malloc(length + 1)); - this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0 - } - memcpy(this->transfer[_index].getStr(), _str, length); - this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore - this->transfer[_index].setLen(length); -} +//void //WARN: not check _str and _len +//ISTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index) +//{ + //if (_index > 2) + //return; + //[> + //if(_str == NULL || _len == 0) + //{ + //printf("error in CopyToTransfer: empty string\n"); + //return; + //} + //*/ + ////unsigned length = _bstr->getLen(); + //unsigned length = _len; + //if (length + 1 > this->transfer_size[_index]) + //{ + //transfer[_index].release(); + //transfer[_index].setStr((char*)malloc(length + 1)); + //this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0 + //} + //memcpy(this->transfer[_index].getStr(), _str, length); + //this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore + //this->transfer[_index].setLen(length); +//} unsigned ISTree::getHeight() const @@ -116,21 +116,25 @@ ISTree::search(unsigned _key, char*& _str, unsigned& _len) this->request = 0; int store; ISNode* ret = this->find(_key, &store, false); + //cout<<"to find the position: "<getKey(store)) //tree is empty or not found { return false; } const Bstr* val = ret->getValue(store); - this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request - _str = this->transfer[0].getStr(); - _len = this->transfer[0].getLen(); + //this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request + //_str = this->transfer[0].getStr(); + //_len = this->transfer[0].getLen(); + _str = val->getStr(); + _len = val->getLen(); + this->TSM->request(request); return true; } bool -ISTree::insert(unsigned _key, const char* _str, unsigned _len) +ISTree::insert(unsigned _key, char* _str, unsigned _len) { //if (_key < 0) //{ @@ -138,8 +142,8 @@ ISTree::insert(unsigned _key, const char* _str, unsigned _len) //return false; //} - this->CopyToTransfer(_str, _len, 2); - const Bstr* val = &(this->transfer[2]); + //this->CopyToTransfer(_str, _len, 2); + //const Bstr* val = &(this->transfer[2]); this->request = 0; ISNode* ret; if (this->root == NULL) //tree is empty @@ -223,20 +227,24 @@ ISTree::insert(unsigned _key, const char* _str, unsigned _len) else { p->addKey(_key, i); - p->addValue(val, i, true); + p->addValue(_str, _len, i, true); p->addNum(); - request += val->getLen(); + request += _len; p->setDirty(); this->TSM->updateHeap(p, p->getRank(), true); //_key->clear(); //_value->clear(); } this->TSM->request(request); + //if(_key == 0) + //{ + //cout<<"the 0th element is: "<<_str[0]<CopyToTransfer(_str, _len, 2); //not check value - const Bstr* val = &(this->transfer[2]); + //this->CopyToTransfer(_str, _len, 2); //not check value + //const Bstr* val = &(this->transfer[2]); this->request = 0; int store; ISNode* ret = this->find(_key, &store, true); @@ -256,16 +264,17 @@ ISTree::modify(unsigned _key, const char* _str, unsigned _len) } //cout<<"ISTree::modify() - key is found, now to remove"<getValue(store)->getLen(); - ret->setValue(val, store, true); + ret->setValue(_str, _len, store, true); //cout<<"value reset"<getLen()<<" oldlen: "<getLen() - len); - this->request = val->getLen(); + this->request = _len; this->request -= len; ret->setDirty(); //cout<<"to request"<TSM->request(request); //cout<<"memory requested"<getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in setValue: Invalid index ") + Util::int2string(_index)); + return false; + } + this->values[_index].release(); //NOTICE: only used in modify + + this->values[_index].setStr(_str); + this->values[_index].setLen(_len); + + return true; +} + +bool +ISLeafNode::addValue(char* _str, unsigned _len, int _index, bool ifcopy) +{ + int num = this->getNum(); + if (_index < 0 || _index > num) + { + print(string("error in addValue: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + for (i = num - 1; i >= _index; --i) + this->values[i + 1] = this->values[i]; + + this->values[_index].setStr(_str); + this->values[_index].setLen(_len); + + return true; +} + bool ISLeafNode::subValue(int _index, bool ifdel) { diff --git a/KVstore/ISTree/node/ISLeafNode.h b/KVstore/ISTree/node/ISLeafNode.h index e28d569..b83c471 100644 --- a/KVstore/ISTree/node/ISLeafNode.h +++ b/KVstore/ISTree/node/ISLeafNode.h @@ -27,6 +27,7 @@ public: void Normal(); ISNode* getPrev() const; ISNode* getNext() const; + const Bstr* getValue(int _index) const; bool setValue(const Bstr* _value, int _index, bool ifcopy = false); bool addValue(const Bstr* _value, int _index, bool ifcopy = false); @@ -34,6 +35,10 @@ public: void setPrev(ISNode* _prev); void setNext(ISNode* _next); unsigned getSize() const; + + bool setValue(char* _str, unsigned _len, int _index, bool ifcopy = false); + bool addValue(char* _str, unsigned _len, int _index, bool ifcopy = false); + ISNode* split(ISNode* _father, int _index); ISNode* coalesce(ISNode* _father, int _index); void release(); diff --git a/KVstore/ISTree/node/ISNode.h b/KVstore/ISTree/node/ISNode.h index c332377..a5f792d 100644 --- a/KVstore/ISTree/node/ISNode.h +++ b/KVstore/ISTree/node/ISNode.h @@ -80,12 +80,18 @@ public: virtual bool subChild(int _index) { return true; }; virtual ISNode* getPrev() const { return NULL; }; virtual ISNode* getNext() const { return NULL; }; + virtual const Bstr* getValue(int _index) const { return NULL; }; virtual bool setValue(const Bstr* _value, int _index, bool ifcopy = false) { return true; }; virtual bool addValue(const Bstr* _value, int _index, bool ifcopy = false) { return true; }; virtual bool subValue(int _index, bool ifdel = false) { return true; }; virtual void setPrev(ISNode* _prev) {}; virtual void setNext(ISNode* _next) {}; + + virtual bool setValue(const char* _str, unsigned _len, int _index, bool ifcopy = false) { return true; }; + virtual bool addValue(const char* _str, unsigned _len, int _index, bool ifcopy = false) { return true; }; + + //pure virtual function virtual void Virtual() = 0; virtual void Normal() = 0; virtual unsigned getSize() const = 0; //return all memory owned diff --git a/KVstore/ISTree/storage/ISStorage.cpp b/KVstore/ISTree/storage/ISStorage.cpp index b58a3e5..46c1eee 100644 --- a/KVstore/ISTree/storage/ISStorage.cpp +++ b/KVstore/ISTree/storage/ISStorage.cpp @@ -399,7 +399,13 @@ ISStorage::writeNode(ISNode* _np) { //to write all values for (i = 0; i < num; ++i) + { this->writeBstr(_np->getValue(i), &blocknum, SpecialBlock); + if(_np->getKey(0) == 0) + { + cout<<"the 0th value: "<<_np->getValue(i)->getStr()[0]<treefp); this->ReadAlign(_next); //this->request(len); - char* s = (char*)malloc(len); + //char* s = (char*)malloc(len); + char* s = new char[len]; _bp->setLen(len); for (i = 0; i + 4 < len; i += 4) { diff --git a/KVstore/IVTree/IVTree.cpp b/KVstore/IVTree/IVTree.cpp new file mode 100644 index 0000000..63aebba --- /dev/null +++ b/KVstore/IVTree/IVTree.cpp @@ -0,0 +1,702 @@ +/*============================================================================= +# Filename: IVTree.cpp +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:45 +# Description: achieve functions in IVTree.h +=============================================================================*/ + +#include "IVTree.h" + +using namespace std; + +IVTree::IVTree() +{ + height = 0; + mode = ""; + root = NULL; + leaves_head = NULL; + leaves_tail = NULL; + TSM = NULL; + storepath = ""; + filename = ""; + //transfer_size[0] = transfer_size[1] = transfer_size[2] = 0; + //transfer_size = 0; + this->stream = NULL; + this->request = 0; + this->value_list = NULL; +} + +IVTree::IVTree(string _storepath, string _filename, string _mode, unsigned long long _buffer_size) +{ + storepath = _storepath; + filename = _filename; + this->height = 0; + this->mode = string(_mode); + string filepath = this->getFilePath(); + + string vlist_file = filepath + "_vlist"; + this->value_list = new VList(vlist_file, this->mode, 1<<30); + + TSM = new IVStorage(filepath, this->mode, &this->height, _buffer_size, this->value_list); + if (this->mode == "open") + this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail); + else + this->root = NULL; + + //this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M + //this->transfer.setStr((char*)malloc(Util::TRANSFER_SIZE)); + + this->stream = NULL; + this->request = 0; +} + +string +IVTree::getFilePath() +{ + return storepath + "/" + filename; +} + +//void //WARN: not check _str and _len +//IVTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index) +//{ + //if (_index > 2) + //return; + //[> + //if(_str == NULL || _len == 0) + //{ + //printf("error in CopyToTransfer: empty string\n"); + //return; + //} + //*/ + ////unsigned length = _bstr->getLen(); + //unsigned length = _len; + //if (length + 1 > this->transfer_size[_index]) + //{ + //transfer[_index].release(); + //transfer[_index].setStr((char*)malloc(length + 1)); + //this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0 + //} + //memcpy(this->transfer[_index].getStr(), _str, length); + //this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore + //this->transfer[_index].setLen(length); +//} + +unsigned +IVTree::getHeight() const +{ + return this->height; +} + +void +IVTree::setHeight(unsigned _h) +{ + this->height = _h; +} + +IVNode* +IVTree::getRoot() const +{ + return this->root; +} + +void +IVTree::prepare(IVNode* _np) +{ + //this->request = 0; + bool flag = _np->inMem(); + if (!flag) + { + this->TSM->readNode(_np, &request); //readNode deal with request + } +} + +bool +IVTree::search(unsigned _key, char*& _str, unsigned& _len) +{ + //if (_key < 0) + //{ + //printf("error in IVTree-search: empty string\n"); + //return false; + //} + + this->request = 0; + int store; + IVNode* ret = this->find(_key, &store, false); + if (ret == NULL || store == -1 || _key != ret->getKey(store)) //tree is empty or not found + { + return false; + } + + ret->getValue(this->value_list, store, _str, _len); + //const Bstr* val = ret->getValue(store); + //this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request + //_str = this->transfer[0].getStr(); + //_len = this->transfer[0].getLen(); + + this->TSM->request(request); + return true; +} + +bool +IVTree::insert(unsigned _key, char* _str, unsigned _len) +{ + //if (_key < 0) + //{ + //printf("error in IVTree-insert: empty string\n"); + //return false; + //} + + //this->CopyToTransfer(_str, _len, 2); + //const Bstr* val = &(this->transfer[2]); + this->request = 0; + IVNode* ret; + if (this->root == NULL) //tree is empty + { + leaves_tail = leaves_head = root = new IVLeafNode; + request += IVNode::LEAF_SIZE; + this->height = 1; + root->setHeight(1); //add to heap later + } + + //this->prepare(this->root); //root must be in-mem + if (root->getNum() == IVNode::MAX_KEY_NUM) + { + IVNode* father = new IVIntlNode; + request += IVNode::INTL_SIZE; + father->addChild(root, 0); + ret = root->split(father, 0); + if (ret->isLeaf() && ret->getNext() == NULL) + this->leaves_tail = ret; + if (ret->isLeaf()) + request += IVNode::LEAF_SIZE; + else + request += IVNode::INTL_SIZE; + this->height++; //height rises only when root splits + //WARN: height area in Node: 4 bit! + father->setHeight(this->height); //add to heap later + this->TSM->updateHeap(ret, ret->getRank(), false); + this->root = father; + } + + IVNode* p = this->root; + IVNode* q; + int i; + while (!p->isLeaf()) + { + //j = p->getNum(); + //for(i = 0; i < j; ++i) + //if(bstr < *(p->getKey(i))) + //break; + //NOTICE: using binary search is better here + i = p->searchKey_less(_key); + + q = p->getChild(i); + this->prepare(q); + if (q->getNum() == IVNode::MAX_KEY_NUM) + { + ret = q->split(p, i); + if (ret->isLeaf() && ret->getNext() == NULL) + this->leaves_tail = ret; + if (ret->isLeaf()) + request += IVNode::LEAF_SIZE; + else + request += IVNode::INTL_SIZE; + //BETTER: in loop may update multiple times + this->TSM->updateHeap(ret, ret->getRank(), false); + this->TSM->updateHeap(q, q->getRank(), true); + this->TSM->updateHeap(p, p->getRank(), true); + if (_key < p->getKey(i)) + p = q; + else + p = ret; + } + else + { + p->setDirty(); + this->TSM->updateHeap(p, p->getRank(), true); + p = q; + } + } + //j = p->getNum(); + //for(i = 0; i < j; ++i) + //if(bstr < *(p->getKey(i))) + //break; + i = p->searchKey_less(_key); + + //insert existing key is ok, but not inserted in + //however, the tree-shape may change due to possible split in former code + bool ifexist = false; + if (i > 0 && _key == p->getKey(i - 1)) + ifexist = true; + else + { + p->addKey(_key, i); + p->addValue(this->value_list, i, _str, _len, true); + p->addNum(); + //NOTICE: is this is a vlist, then it will be freed, and should not be included in the request memory + if(!VList::isLongList(_len)) + { + request += _len; + } + //request += val->getLen(); + p->setDirty(); + this->TSM->updateHeap(p, p->getRank(), true); + //_key->clear(); + //_value->clear(); + } + + this->TSM->request(request); + return !ifexist; //QUERY(which case:return false) +} + +bool +IVTree::modify(unsigned _key, char* _str, unsigned _len) +{ + //if (_key < 0) + //{ + //printf("error in IVTree-modify: empty string\n"); + //return false; + //} + + //this->CopyToTransfer(_str, _len, 2); //not check value + //const Bstr* val = &(this->transfer[2]); + this->request = 0; + int store; + IVNode* ret = this->find(_key, &store, true); + if (ret == NULL || store == -1 || _key != ret->getKey(store)) //tree is empty or not found + { + cerr << "tree is empty or not found" << endl; + return false; + } + //cout<<"IVTree::modify() - key is found, now to remove"<getValue(store)->getLen(); + if(ret->getValue(store)->isBstrLongList()) + { + len = 0; + } + ret->setValue(this->value_list, store, _str, _len, true); + //ret->setValue(val, store, true); + //cout<<"value reset"<getLen()<<" oldlen: "<getLen() - len); + if(!VList::isLongList(_len)) + { + this->request += _len; + } + //this->request = val->getLen(); + this->request -= len; + ret->setDirty(); + //cout<<"to request"<TSM->request(request); + //cout<<"memory requested"<= *_key +IVTree::find(unsigned _key, int* _store, bool ifmodify) +{ //to assign value for this->bstr, function shouldn't be const! + if (this->root == NULL) + return NULL; //IVTree Is Empty + + IVNode* p = root; + int i, j; + while (!p->isLeaf()) + { + if (ifmodify) + p->setDirty(); + //j = p->getNum(); + //for(i = 0; i < j; ++i) //BETTER(Binary-Search) + //if(bstr < *(p->getKey(i))) + //break; + i = p->searchKey_less(_key); + + p = p->getChild(i); + this->prepare(p); + } + + j = p->getNum(); + //for(i = 0; i < j; ++i) + //if(bstr <= *(p->getKey(i))) + //break; + i = p->searchKey_lessEqual(_key); + + if (i == j) + *_store = -1; //Not Found + else + *_store = i; + + return p; +} + +/* +Node* +IVTree::find(unsigned _len, const char* _str, int* store) const +{ +} +*/ + +bool +IVTree::remove(unsigned _key) +{ + //if (_key < 0) + //{ + //printf("error in IVTree-remove: empty string\n"); + //return false; + //} + + this->request = 0; + IVNode* ret; + if (this->root == NULL) //tree is empty + return false; + + IVNode* p = this->root; + IVNode* q; + int i, j; + while (!p->isLeaf()) + { + j = p->getNum(); + //for(i = 0; i < j; ++i) + //if(bstr < *(p->getKey(i))) + //break; + i = p->searchKey_less(_key); + + q = p->getChild(i); + this->prepare(q); + if (q->getNum() < IVNode::MIN_CHILD_NUM) //==MIN_KEY_NUM + { + if (i > 0) + this->prepare(p->getChild(i - 1)); + if (i < j) + this->prepare(p->getChild(i + 1)); + ret = q->coalesce(p, i); + if (ret != NULL) + this->TSM->updateHeap(ret, 0, true);//non-sense node + this->TSM->updateHeap(q, q->getRank(), true); + if (q->isLeaf()) + { + if (q->getPrev() == NULL) + this->leaves_head = q; + if (q->getNext() == NULL) + this->leaves_tail = q; + } + if (p->getNum() == 0) //root shrinks + { + //this->leaves_head = q; + this->root = q; + this->TSM->updateHeap(p, 0, true); //instead of delete p + this->height--; + } + } + else + p->setDirty(); + this->TSM->updateHeap(p, p->getRank(), true); + p = q; + } + + bool flag = false; + //j = p->getNum(); //LeafNode(maybe root) + //for(i = 0; i < j; ++i) + // if(bstr == *(p->getKey(i))) + // { + // request -= p->getKey(i)->getLen(); + // request -= p->getValue(i)->getLen(); + // p->subKey(i, true); //to release + // p->subValue(i, true); //to release + // p->subNum(); + // if(p->getNum() == 0) //root leaf 0 key + // { + // this->root = NULL; + // this->leaves_head = NULL; + // this->leaves_tail = NULL; + // this->height = 0; + // this->TSM->updateHeap(p, 0, true); //instead of delete p + // } + // p->setDirty(); + // flag = true; + // break; + // } + i = p->searchKey_equal(_key); + //WARN+NOTICE:here must check, because the key to remove maybe not exist + if (i != (int)p->getNum()) + { + if(!p->getValue(i)->isBstrLongList()) + { + request -= p->getValue(i)->getLen(); + } + p->subKey(i); //to release + p->subValue(this->value_list, i, true); //to release + p->subNum(); + if (p->getNum() == 0) //root leaf 0 key + { + this->root = NULL; + this->leaves_head = NULL; + this->leaves_tail = NULL; + this->height = 0; + this->TSM->updateHeap(p, 0, true); //instead of delete p + } + p->setDirty(); + flag = true; + } + + this->TSM->request(request); + return flag; //i == j, not found +} + +const Bstr* +IVTree::getRangeValue() +{ + if (this->stream == NULL) + { + fprintf(stderr, "IVTree::getRangeValue(): no results now!\n"); + return NULL; + } + if (this->stream->isEnd()) + { + fprintf(stderr, "IVTree::getRangeValue(): read till end now!\n"); + return NULL; + } + //NOTICE:this is one record, and donot free the memory! + //NOTICE:Bstr[] but only one element, used as Bstr* + return this->stream->read(); +} + +void +IVTree::resetStream() +{ + if (this->stream == NULL) + { + fprintf(stderr, "no results now!\n"); + return; + } + this->stream->setEnd(); +} + +//TODO: change to using value list, getValue() maybe not get real long list +bool //special case: not exist, one-edge-case +IVTree::range_query(unsigned _key1, unsigned _key2) +{ //the range is: *_key1 <= x < *_key2 + //if(_key1 <0 && _key2 <0) + //return false; + //ok to search one-edge, requiring only one be negative + //find and write value + int store1, store2; + IVNode *p1, *p2; + if (_key1 >= 0) + { + request = 0; + p1 = this->find(_key1, &store1, false); + if (p1 == NULL || store1 == -1) + return false; //no element + this->TSM->request(request); + } + else + { + p1 = this->leaves_head; + store1 = 0; + } + if (_key2 >= 0) + { //QUERY: another strategy is to getnext and compare every time to tell end + request = 0; + p2 = this->find(_key2, &store2, false); + if (p2 == NULL) + return false; + else if (store2 == -1) + store2 = p2->getNum(); + else if (store2 == 0) + { + p2 = p2->getPrev(); + if (p2 == NULL) + return false; //no element + store2 = p2->getNum(); + } + this->TSM->request(request); + } + else + { + p2 = this->leaves_tail; + store2 = p2->getNum(); + } + + IVNode* p = p1; + unsigned i, l, r; + //get the num of answers first, not need to prepare the node + unsigned ansNum = 0; + while (true) + { + //request = 0; + //this->prepare(p); + if (p == p1) + l = store1; + else + l = 0; + if (p == p2) + r = store2; + else + r = p->getNum(); + ansNum += (r - l); + //this->TSM->request(request); + if (p != p2) + p = p->getNext(); + else + break; + } + + if (this->stream != NULL) + { + delete this->stream; + this->stream = NULL; + } + vector keys; + vector desc; + this->stream = new Stream(keys, desc, ansNum, 1, false); + + p = p1; + while (1) + { + request = 0; + this->prepare(p); + if (p == p1) + l = store1; + else + l = 0; + if (p == p2) + r = store2; + else + r = p->getNum(); + for (i = l; i < r; ++i) + { + //NOTICE:Bstr* in an array, used as Bstr[] + //DEBUG+TODO: if long list?? clean + this->stream->write(p->getValue(i)); + } + this->TSM->request(request); + if (p != p2) + p = p->getNext(); + else + break; + } + + this->stream->setEnd(); + + return true; +} + +bool +IVTree::save() //save the whole tree to disk +{ +#ifdef DEBUG_KVSTORE + printf("now to save tree!\n"); +#endif + if (TSM->writeTree(this->root)) + return true; + else + return false; +} + +void +IVTree::release(IVNode* _np) const +{ + if (_np == NULL) return; + if (_np->isLeaf()) + { + delete _np; + return; + } + int cnt = _np->getNum(); + for (; cnt >= 0; --cnt) + release(_np->getChild(cnt)); + delete _np; +} + +IVTree::~IVTree() +{ + delete this->value_list; + + delete this->stream; //maybe NULL + delete TSM; +#ifdef DEBUG_KVSTORE + printf("already empty the buffer, now to delete all nodes in tree!\n"); +#endif + //recursively delete each Node + release(root); +} + +void +IVTree::print(string s) +{ +#ifdef DEBUG_KVSTORE + fputs(Util::showtime().c_str(), Util::debug_kvstore); + fputs("Class IVTree\n", Util::debug_kvstore); + fputs("Message: ", Util::debug_kvstore); + fputs(s.c_str(), Util::debug_kvstore); + fputs("\n", Util::debug_kvstore); + fprintf(Util::debug_kvstore, "Height: %d\n", this->height); + if (s == "tree" || s == "TREE") + { + if (this->root == NULL) + { + fputs("Null IVTree\n", Util::debug_kvstore); + return; + } + IVNode** ns = new IVNode*[this->height]; + int* ni = new int[this->height]; + IVNode* np; + int i, pos = 0; + ns[pos] = this->root; + ni[pos] = this->root->getNum(); + pos++; + while (pos > 0) + { + np = ns[pos - 1]; + i = ni[pos - 1]; + this->prepare(np); + if (np->isLeaf() || i < 0) //LeafNode or ready IntlNode + { //child-num ranges: 0~num + if (s == "tree") + np->print("node"); + else + np->print("NODE"); //print full node-information + pos--; + continue; + } + else + { + ns[pos] = np->getChild(i); + ni[pos - 1]--; + ni[pos] = ns[pos]->getNum(); + pos++; + } + } + delete[] ns; + delete[] ni; + } + else if (s == "LEAVES" || s == "leaves") + { + IVNode* np; + for (np = this->leaves_head; np != NULL; np = np->getNext()) + { + this->prepare(np); + if (s == "leaves") + np->print("node"); + else + np->print("NODE"); + } + } + else if (s == "check tree") + { + //check the tree, if satisfy B+ definition + //TODO + } + else; +#endif +} + diff --git a/KVstore/IVTree/IVTree.h b/KVstore/IVTree/IVTree.h new file mode 100644 index 0000000..11184fe --- /dev/null +++ b/KVstore/IVTree/IVTree.h @@ -0,0 +1,98 @@ +/*============================================================================= +# Filename: IVTree.h +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:44 +# Description: ID2valueList, including s2po, p2so and o2ps +=============================================================================*/ + +#ifndef _KVSTORE_IVTREE_IVTREE_H +#define _KVSTORE_IVTREE_IVTREE_H + +#include "../../Util/Util.h" +#include "../../Util/Stream.h" +#include "../../Util/VList.h" +#include "node/IVNode.h" +#include "node/IVIntlNode.h" +#include "node/IVLeafNode.h" +#include "storage/IVStorage.h" + +//TODO: for long list, do not read in time, just on need +//the memory is kept with the node, updat ewith node +//NOTICE: to release the node, maybe the value list is NULL +//value bstr: unsigned=address, NULL +//BETTER?: build a new block store for long list?? + +//NOTICE: we do not need to use transfer bstr here, neithor for two directions +//when insert/query, we do not release the value in kvstore + +class IVTree +{ +protected: + unsigned height; //0 indicates an empty tree + IVNode* root; + IVNode* leaves_head; //the head of LeafNode-list + IVNode* leaves_tail; //the tail of LeafNode-list + std::string mode; //BETTER(to use enum) + IVStorage* TSM; //Tree-Storage-Manage + //BETTER:multiple stream maybe needed:) + Stream* stream; + + //always alloc one more byte than length, then user can add a '\0' + //to get a real string, instead of new and copy + //other operations will be harmful to search, so store value in + //transfer temporally, while length adjusted. + //TODO: in multi-user case, multiple-search will cause problem, + //so lock is a must. Add lock to transfer is better than to add + //lock to every key/value. However, modify requires a lock for a + //key/value, and multiple search for different keys are ok!!! + //Bstr transfer; + //unsigned transfer_size; + //Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char* + //unsigned transfer_size[3]; + + //tree's operations should be atom(if read nodes) + //sum the request and send to IVStorage at last + //ensure that all nodes operated are in memory + long long request; + void prepare(IVNode* _np); + + std::string storepath; + std::string filename; //ok for user to change + /* some private functions */ + std::string getFilePath(); //in UNIX system + //void CopyToTransfer(const char* _str, unsigned _len, unsigned _index); + //void CopyToTransfer(const char* _str, unsigned _len); + void release(IVNode* _np) const; + + //very long value list are stored in a separate file(with large block) + // + //NOTICE: according to the summary result, 90% value lists are just below 100 bytes + //<10%: 5000000~100M bytes + VList* value_list; + +public: + IVTree(); //always need to initial transfer + IVTree(std::string _storepath, std::string _filename, std::string _mode, unsigned long long _buffer_size); + unsigned getHeight() const; + void setHeight(unsigned _h); + IVNode* getRoot() const; + //void setRoot(Node* _root); + //insert, search, remove, set + bool search(unsigned _key, char*& _str, unsigned& _len); + bool insert(unsigned _key, char* _str, unsigned _len); + bool modify(unsigned _key, char* _str, unsigned _len); + IVNode* find(unsigned _key, int* store, bool ifmodify); + bool remove(unsigned _key); + const Bstr* getRangeValue(); + void resetStream(); + bool range_query(unsigned _key1, unsigned _key2); + bool save(); + ~IVTree(); + void print(std::string s); //DEBUG(print the tree) +}; +//NOTICE: need to save tree manually before delete, otherwise will cause problem. +//(problem range between two extremes: not-modified, totally-modified) +//After saved, it's ok to continue operations on tree! + +#endif diff --git a/KVstore/IVTree/heap/IVHeap.cpp b/KVstore/IVTree/heap/IVHeap.cpp new file mode 100644 index 0000000..5cc291f --- /dev/null +++ b/KVstore/IVTree/heap/IVHeap.cpp @@ -0,0 +1,186 @@ +/*============================================================================= +# Filename: IVHeap.cpp +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:37 +# Description: achieve functions in IVHeap.h +=============================================================================*/ + +#include "IVHeap.h" + +using namespace std; + +IVHeap::IVHeap() +{ + this->length = this->size = 0; + this->heap = NULL; +} + +IVHeap::IVHeap(unsigned _size) +{ + this->length = 0; + this->size = _size; + //this->heap = (Node**)malloc(this->size * sizeof(Node*)); //not use 4 or 8 + this->heap = new IVNode*[this->size]; + if (this->heap == NULL) + { + this->print("error in IVHeap: Allocation fail!"); + exit(1); + } + /* + this->npmap = (Map*)malloc(this->size * sizeof(struct Map)); + if(this->npmap == NULL) + { + this->print("error in IVHeap: Allocation fail!"); + exit(1); + } + */ +} + +IVNode* +IVHeap::getTop() const +{ + if (this->length > 0) + return this->heap[0]; + else + return NULL; +} + +unsigned +IVHeap::getLen() const +{ + return this->length; +} + +unsigned +IVHeap::getSize() const +{ + return this->size; +} + +bool +IVHeap::isEmpty() const +{ + return this->length == 0; +} + +bool +IVHeap::insert(IVNode* _np) +{ + if (this->length == this->size) //when full, reallocate + { + this->heap = (IVNode**)realloc(this->heap, 2 * this->size * sizeof(IVNode*)); + if (this->heap == NULL) + { + print("error in isert: Reallocation fail!"); + return false; + } + /* + this->npmap = (struct Map*)realloc(this->npmap, 2 * this->size * sizeof(struct Map)); + if(this->npmap == NULL) + { + print("error in insert: Reallocation fail!"); + return false; + } + */ + this->size = 2 * this->size; + } + unsigned i = this->length, j; + while (i != 0) + { + j = (i - 1) / 2; + if (_np->getRank() >= this->heap[j]->getRank()) + break; + heap[i] = heap[j]; + //this->npmap[k].pos = i; //adjust the position + i = j; + } + this->heap[i] = _np; + this->length++; + return true; +} + +bool +IVHeap::remove() +{ + if (this->length == 0) + { + print("error in remove: remove from empty heap!"); + return false; + } + //Node* tp = this->heap[0]; + this->length--; + if (this->length == 0) + return true; + IVNode* xp = this->heap[this->length]; + unsigned i = 0, j = 1; + while (j < this->length) + { + if (j < this->length - 1 && this->heap[j]->getRank() > this->heap[j + 1]->getRank()) + j++; + if (xp->getRank() <= this->heap[j]->getRank()) + break; + this->heap[i] = this->heap[j]; + i = j; + j = 2 * i + 1; + } + this->heap[i] = xp; + return true; +} + +bool +IVHeap::modify(IVNode* _np, bool _flag) //control direction +{ + //search and adjust + unsigned i, j; + for (i = 0; i < this->length; ++i) + if (this->heap[i] == _np) + break; + if (_flag == true) //move up + { + while (i != 0) + { + j = (i - 1) / 2; + if (_np->getRank() < heap[j]->getRank()) + { + heap[i] = heap[j]; + heap[j] = _np; + i = j; + } + else + break; + } + } + else //move down + { + j = 2 * i + 1; + while (j < this->length) + { + if (j < this->length - 1 && heap[j]->getRank() > heap[j + 1]->getRank()) + j++; + if (heap[j]->getRank() < _np->getRank()) + { + heap[i] = heap[j]; + heap[j] = _np; + i = j; + } + else + break; + } + } + return true; +} + +IVHeap::~IVHeap() +{ + delete[] this->heap; + this->heap = NULL; + this->length = this->size = 0; +} + +void +IVHeap::print(string s) +{ +#ifdef DEBUG_KVSTORE +#endif +} diff --git a/KVstore/IVTree/heap/IVHeap.h b/KVstore/IVTree/heap/IVHeap.h new file mode 100644 index 0000000..0e418fd --- /dev/null +++ b/KVstore/IVTree/heap/IVHeap.h @@ -0,0 +1,41 @@ +/*============================================================================= +# Filename: IVHeap.h +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:37 +# Description: set and deal of IVNode*s in memory +=============================================================================*/ + +#ifndef _KVSTORE_IVTREE_HEAP_IVHEAP_H +#define _KVSTORE_IVTREE_HEAP_IVHEAP_H + +#include "../../../Util/Util.h" +#include "../node/IVNode.h" + +/* add, sub, modify: all can be done within O(logn) using adjust-function */ +//QUERY: when modified, finding right position consumes O(n). How about keeping smallest? +//(add O(1), sub O(2n), modify O(n) +//TODO: to solve this probem, use another hash: (pointer, pos), to find the right position of +//given p in O(lgn) time + +class IVHeap +{ +private: + IVNode** heap; //dynamic array + unsigned length; //valid elements num + unsigned size; //max-size of heap +public: + IVHeap(); + IVHeap(unsigned _size); + IVNode* getTop() const; //return the top element + unsigned getLen() const; + unsigned getSize() const; + bool isEmpty() const; + bool insert(IVNode* _np); //insert and adjust + bool remove(); //remove top and adjust + bool modify(IVNode* _np, bool _flag); //searech modified element and adjust + ~IVHeap(); + void print(std::string s); //DEBUG +}; + +#endif diff --git a/KVstore/IVTree/node/IVIntlNode.cpp b/KVstore/IVTree/node/IVIntlNode.cpp new file mode 100644 index 0000000..f5741a9 --- /dev/null +++ b/KVstore/IVTree/node/IVIntlNode.cpp @@ -0,0 +1,293 @@ +/*============================================================================= +# Filename: IVIntlNode.cpp +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:40 +# Description: achieve functions in IVIntlNode.h +=============================================================================*/ + +#include "IVIntlNode.h" + +using namespace std; + +/* +void +IVIntlNode::AllocChilds() +{ +childs = (Node**)malloc(sizeof(Node*) * MAX_CHILD_NUM); +} +*/ + +IVIntlNode::IVIntlNode() +{ + memset(childs, 0, sizeof(IVNode*) * MAX_CHILD_NUM); + //this->AllocChilds(); +} + +IVIntlNode::IVIntlNode(bool isVirtual) //call father-class's constructor automaticlly +{ + memset(childs, 0, sizeof(IVNode*) * MAX_CHILD_NUM); + //this->AllocChilds(); +} + +/* +IVIntlNode::IntlNode(Storage* TSM) //QUERY +{ +TSM->readNode(this, Storage::OVER); +} +*/ + +void +IVIntlNode::Virtual() +{ + //this->FreeKeys(); + this->release(); + this->delMem(); +} + +void +IVIntlNode::Normal() +{ + this->AllocKeys(); + this->setMem(); +} + +IVNode* +IVIntlNode::getChild(int _index) const +{ + int num = this->getNum(); + if (_index < 0 || _index > num) //num keys, num+1 childs + { + //print(string("error in getChild: Invalid index ") + Util::int2string(_index)); + return NULL; + } + else + return childs[_index]; +} + +bool +IVIntlNode::setChild(IVNode* _child, int _index) +{ + int num = this->getNum(); + if (_index < 0 || _index > num) + { + print(string("error in setChild: Invalid index ") + Util::int2string(_index)); + return false; + } + this->childs[_index] = _child; + return true; +} + +bool +IVIntlNode::addChild(IVNode* _child, int _index) +{ + int num = this->getNum(); + if (_index < 0 || _index > num + 1) + { + print(string("error in addChild: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + for (i = num; i >= _index; --i) //DEBUG: right bounder!!! + childs[i + 1] = childs[i]; + childs[_index] = _child; + return true; +} + +bool +IVIntlNode::subChild(int _index) +{ + int num = this->getNum(); + if (_index < 0 || _index > num) + { + print(string("error in subchild: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + for (i = _index; i < num; ++i) //DEBUG: right bounder!!! + childs[i] = childs[i + 1]; + return true; +} + +unsigned +IVIntlNode::getSize() const +{ + //unsigned sum = INTL_SIZE, num = this->getNum(), i; + //return sum; + return INTL_SIZE; +} + +IVNode* +IVIntlNode::split(IVNode* _father, int _index) +{ + int num = this->getNum(); + IVNode* p = new IVIntlNode; //right child + p->setHeight(this->getHeight()); + int i, k; + for (i = MIN_CHILD_NUM, k = 0; i < num; ++i, ++k) + { + p->addKey(this->keys[i], k); + p->addChild(this->childs[i], k); + p->addNum(); + } + p->addChild(this->childs[i], k); + int tp = this->keys[MIN_KEY_NUM]; + this->setNum(MIN_KEY_NUM); + _father->addKey(tp, _index); + _father->addChild(p, _index + 1); //DEBUG(check the index) + _father->addNum(); + _father->setDirty(); + p->setDirty(); + this->setDirty(); + return p; +} + +IVNode* +IVIntlNode::coalesce(IVNode* _father, int _index) +{ + //int num = this->getNum(); + int i, j = _father->getNum(), k; //BETTER: unsigned? + IVNode* p; + int ccase = 0; + //const Bstr* bstr; + if (_index < j) //the right neighbor + { + p = _father->getChild(_index + 1); + k = p->getNum(); + if ((unsigned)k > MIN_KEY_NUM) + ccase = 2; + else //==MIN_KEY_NUM + ccase = 1; + } + if (_index > 0) //the left neighbor + { + IVNode* tp = _father->getChild(_index - 1); + unsigned tk = tp->getNum(); + if (ccase < 2) + { + if (ccase == 0) + ccase = 3; + if (tk > MIN_KEY_NUM) + ccase = 4; + } + if (ccase > 2) + { + p = tp; + k = tk; + } + } + + unsigned tmp = 0; + switch (ccase) + { + case 1: //union right to this + this->addKey(_father->getKey(_index), this->getNum()); + this->addNum(); + for (i = 0; i < k; ++i) + { + this->addKey(p->getKey(i), this->getNum()); + this->addChild(p->getChild(i), this->getNum()); + this->addNum(); + } + this->setChild(p->getChild(i), this->getNum()); + _father->subKey(_index); + _father->subChild(_index + 1); + _father->subNum(); + p->setNum(0); + //delete p; + break; + case 2: //move one form right + this->addKey(_father->getKey(_index), this->getNum()); + _father->setKey(p->getKey(0), _index); + p->subKey(0); + this->addChild(p->getChild(0), this->getNum() + 1); + p->subChild(0); + this->addNum(); + p->subNum(); + break; + case 3: //union left to this + this->addKey(_father->getKey(_index - 1), 0); + this->addNum(); + for (i = k; i > 0; --i) + { + int t = i - 1; + this->addKey(p->getKey(t), 0); + this->addChild(p->getChild(i), 0); + this->addNum(); + } + this->addChild(p->getChild(0), 0); + _father->subKey(_index - 1); + _father->subChild(_index - 1); + _father->subNum(); + p->setNum(0); + //delete p; + break; + case 4: //move one from left + tmp = p->getKey(k - 1); + p->subKey(k - 1); + this->addKey(_father->getKey(_index - 1), 0); + _father->setKey(tmp, _index - 1); + this->addChild(p->getChild(k), 0); + p->subChild(k); + this->addNum(); + p->subNum(); + break; + default: + print("error in coalesce: Invalid case!"); + //printf("error in coalesce: Invalid case!"); + } + _father->setDirty(); + p->setDirty(); + this->setDirty(); + if (ccase == 1 || ccase == 3) + return p; + else + return NULL; +} + +void +IVIntlNode::release() +{ + if (!this->inMem()) + return; + //unsigned num = this->getNum(); + delete[] keys; //this will release all!!! +} + +IVIntlNode::~IVIntlNode() +{ + release(); + //free(childs); +} + +void +IVIntlNode::print(string s) +{ +#ifdef DEBUG_KVSTORE + int num = this->getNum(); + fputs(Util::showtime().c_str(), Util::debug_kvstore); + fputs("Class IVIntlNode\n", Util::debug_kvstore); + fputs("Message: ", Util::debug_kvstore); + fputs(s.c_str(), Util::debug_kvstore); + fputs("\n", Util::debug_kvstore); + if (s == "node" || s == "NODE") + { + fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag); + /* + int i; + for (i = 0; i < num; ++i) + { + if (s == "node") + this->keys[i].print("bstr"); + else + this->keys[i].print("BSTR"); + } + */ + } + else if (s == "check node") + { + //TODO(check node, if satisfy B+ definition) + } + else; +#endif +} diff --git a/KVstore/IVTree/node/IVIntlNode.h b/KVstore/IVTree/node/IVIntlNode.h new file mode 100644 index 0000000..5d0932f --- /dev/null +++ b/KVstore/IVTree/node/IVIntlNode.h @@ -0,0 +1,48 @@ +/*============================================================================= +# Filename: IVIntlNode.h +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:40 +# Description: the internal-node of a B+ tree +=============================================================================*/ + +#ifndef _KVSTORE_IVTREE_NODE_IVINTLNODE_H +#define _KVSTORE_IVTREE_NODE_IVINTLNODE_H + +#include "IVNode.h" + +class IVIntlNode : public IVNode +{ +protected: + IVNode* childs[MAX_CHILD_NUM + 1]; + //Node** childs; + //void AllocChilds(); +public: + IVIntlNode(); + IVIntlNode(bool isVirtual); + //IntlNode(Storage* TSM); + void Virtual(); + void Normal(); + IVNode* getChild(int _index) const; + bool setChild(IVNode* _child, int _index); + bool addChild(IVNode* _child, int _index); + bool subChild(int _index); + unsigned getSize() const; + IVNode* split(IVNode* _father, int _index); + IVNode* coalesce(IVNode* _father, int _index); + void release(); + ~IVIntlNode(); + void print(std::string s); //DEBUG + /*non-sense functions: polymorphic + Node* getPrev() const; + Node* getNext() const; + const Bstr* getValue(int _index) const; + bool setValue(const Bstr* _value, int _index); + bool addValue(const Bstr* _value, int _index); + bool subValue(int _index); + void setPrev(Node* _prev); + void setNext(Node* _next); + */ +}; + +#endif diff --git a/KVstore/IVTree/node/IVLeafNode.cpp b/KVstore/IVTree/node/IVLeafNode.cpp new file mode 100644 index 0000000..f2ac757 --- /dev/null +++ b/KVstore/IVTree/node/IVLeafNode.cpp @@ -0,0 +1,538 @@ +/*============================================================================= +# Filename: IVLeafNode.cpp +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:40 +# Description: ahieve functions in IVLeafNode.h +=============================================================================*/ + +#include "IVLeafNode.h" + +using namespace std; + +void +IVLeafNode::AllocValues() +{ + values = new Bstr[MAX_KEY_NUM]; +} + +/* +void +IVLeafNode::FreeValues() +{ +delete[] values; +} +*/ + +IVLeafNode::IVLeafNode() +{ + flag |= NF_IL; //leaf flag + prev = next = NULL; + AllocValues(); +} + +IVLeafNode::IVLeafNode(bool isVirtual) +{ + flag |= NF_IL; + prev = next = NULL; + if (!isVirtual) + AllocValues(); +} + +/* +IVLeafNode::LeafNode(Storage* TSM) +{ +AllocValues(); +TSM->readNode(this, Storage::OVER); +} +*/ + +void +IVLeafNode::Virtual() +{ + //this->FreeKeys(); + //this->FreeValues(); + this->release(); + this->delMem(); +} + +void +IVLeafNode::Normal() +{ + this->AllocKeys(); + this->AllocValues(); + this->setMem(); +} + +IVNode* +IVLeafNode::getPrev() const +{ + return prev; +} + +IVNode* +IVLeafNode::getNext() const +{ + return next; +} + +const Bstr* +IVLeafNode::getValue(int _index) const +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + //print(string("error in getValue: Invalid index ") + Util::int2string(_index)); + return NULL; + } + else + return this->values + _index; +} + +bool +IVLeafNode::setValue(const Bstr* _value, int _index, bool _ifcopy) +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in setValue: Invalid index ") + Util::int2string(_index)); + return false; + } + + this->values[_index].release(); //NOTICE: only used in modify + + if(_ifcopy) + { + this->values[_index].copy(_value); + } + else + { + this->values[_index] = *_value; + } + + return true; +} + +bool +IVLeafNode::getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + //print(string("error in getValue: Invalid index ") + Util::int2string(_index)); + return NULL; + } + + //read long list + if(this->values[_index].isBstrLongList()) + { +#ifdef DEBUG_VLIST + cout<<"this is a vlist in get()"<values[_index].getLen(); + _vlist->readValue(block_num, _str, _len); + } + else + { + _str = this->values[_index].getStr(); + _len = this->values[_index].getLen(); + } + + return true; +} + +bool +IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy) +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in setValue: Invalid index ") + Util::int2string(_index)); + return false; + } + + if(this->values[_index].isBstrLongList()) + { +#ifdef DEBUG_VLIST + cout<<"this is a vlist in set()"<values[_index].getLen(); + _vlist->removeValue(block_num); + } + else + { + this->values[_index].release(); //NOTICE: only used in modify + } + + //DEBUG: we do not need to copy here + //we just need to ensure that the pointer's memory is not released + + //if (ifcopy) + //{ + //this->values[_index].copy(_value); + //} + //else + //{ + //this->values[_index] = *_value; + if(VList::isLongList(_len)) + { + unsigned block_num = _vlist->writeValue(_str, _len); + this->values[_index].setStr(NULL); + this->values[_index].setLen(block_num); + //NOTICE: we need to free the long list value + delete[] _str; + } + else + { + this->values[_index].setStr(_str); + this->values[_index].setLen(_len); + } + //} + return true; +} + +bool +IVLeafNode::addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy) +{ + int num = this->getNum(); + if (_index < 0 || _index > num) + { + print(string("error in addValue: Invalid index ") + Util::int2string(_index)); + return false; + } + + for (int i = num - 1; i >= _index; --i) + this->values[i + 1] = this->values[i]; + + //if (ifcopy) + //this->values[_index].copy(_value); + //else + //this->values[_index] = *_value; + + if(VList::isLongList(_len)) + { +#ifdef DEBUG_VLIST + cout<<"this is a vlist in add()"<writeValue(_str, _len); + this->values[_index].setStr(NULL); + this->values[_index].setLen(block_num); + //NOTICE: we need to free the long list value + delete[] _str; +#ifdef DEBUG_VLIST + //cout<<"to check vlist: "<values[_index].getLen()<values[_index].setStr(_str); + this->values[_index].setLen(_len); + } + //this->values[_index].setStr(_str); + //this->values[_index].setLen(_len); + + return true; +} + +bool +IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel) +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in subValue: Invalid index ") + Util::int2string(_index)); + return false; + } + + if(this->values[_index].isBstrLongList()) + { + unsigned block_num = this->values[_index].getLen(); + _vlist->removeValue(block_num); + } + else + { + if (ifdel) + { + values[_index].release(); + } + } + + for (int i = _index; i < num - 1; ++i) + this->values[i] = this->values[i + 1]; + + return true; +} + +bool +IVLeafNode::addValue(const Bstr* _value, int _index, bool ifcopy) +{ + int num = this->getNum(); + if (_index < 0 || _index > num) + { + print(string("error in addValue: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + for (i = num - 1; i >= _index; --i) + this->values[i + 1] = this->values[i]; + + if (ifcopy) + this->values[_index].copy(_value); + else + this->values[_index] = *_value; + + return true; +} + +bool +IVLeafNode::subValue(int _index, bool ifdel) +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in subValue: Invalid index ") + Util::int2string(_index)); + return false; + } + + int i; + if (ifdel) + values[_index].release(); + for (i = _index; i < num - 1; ++i) + this->values[i] = this->values[i + 1]; + + return true; +} + +void +IVLeafNode::setPrev(IVNode* _prev) +{ + this->prev = _prev; +} + +void +IVLeafNode::setNext(IVNode* _next) +{ + this->next = _next; +} + +unsigned +IVLeafNode::getSize() const +{ + unsigned sum = LEAF_SIZE, num = this->getNum(), i; + for (i = 0; i < num; ++i) + { + sum += values[i].getLen(); + } + return sum; +} + +IVNode* +IVLeafNode::split(IVNode* _father, int _index) +{ + int num = this->getNum(); + IVNode* p = new IVLeafNode; //right child + p->setHeight(this->getHeight()); //NOTICE: assign height for new node + p->setNext(this->next); + this->setNext(p); + p->setPrev(this); + int i, k; + for (i = MIN_KEY_NUM, k = 0; i < num; ++i, ++k) + { + p->addKey(this->keys[i], k); + p->addValue(this->values + i, k); + p->addNum(); + } + int tp = this->keys[MIN_KEY_NUM]; + this->setNum(MIN_KEY_NUM); + _father->addKey(tp, _index); + _father->addChild(p, _index + 1); //DEBUG(check the index) + _father->addNum(); + _father->setDirty(); + p->setDirty(); + this->setDirty(); + return p; +} + +IVNode* +IVLeafNode::coalesce(IVNode* _father, int _index) +{ //add a key or coalesce a neighbor to this + int i, j = _father->getNum(), k; //BETTER: unsigned? + IVNode* p = NULL; + int ccase = 0; + //const Bstr* bstr; + if (_index < j) //the right neighbor + { + p = _father->getChild(_index + 1); + k = p->getNum(); + if ((unsigned)k > MIN_KEY_NUM) + ccase = 2; + else //==MIN_KEY_NUM + ccase = 1; + } + if (_index > 0) //the left neighbor + { + IVNode* tp = _father->getChild(_index - 1); + unsigned tk = tp->getNum(); + if (ccase < 2) + { + if (ccase == 0) + ccase = 3; + if (tk > MIN_KEY_NUM) + ccase = 4; + } + if (ccase > 2) + { + p = tp; + k = tk; + } + } + + int tmp = 0; + switch (ccase) + { + case 1: //union right to this + for (i = 0; i < k; ++i) + { + this->addKey(p->getKey(i), this->getNum()); + this->addValue(p->getValue(i), this->getNum()); + this->addNum(); + } + _father->subKey(_index); + _father->subChild(_index + 1); + _father->subNum(); + this->next = p->getNext(); + if (this->next != NULL) + this->next->setPrev(this); + p->setNum(0); //NOTICE: adjust num before delete! + //delete p; + break; + case 2: //move one from right + this->addKey(p->getKey(0), this->getNum()); + _father->setKey(p->getKey(1), _index); + p->subKey(0); + this->addValue(p->getValue(0), this->getNum()); + p->subValue(0); + this->addNum(); + p->subNum(); + break; + case 3: //union left to this + //BETTER: move all keys/etc one time + for (i = k; i > 0; --i) + { + int t = i - 1; + this->addKey(p->getKey(t), 0); + this->addValue(p->getValue(t), 0); + this->addNum(); + } + _father->subKey(_index - 1); + _father->subChild(_index - 1); + _father->subNum(); + this->prev = p->getPrev(); + if (this->prev != NULL) //else: leaves-list + this->prev->setNext(this); + p->setNum(0); + //delete p; + break; + case 4: //move one from left + tmp = p->getKey(k - 1); + p->subKey(k - 1); + this->addKey(tmp, 0); + _father->setKey(tmp, _index - 1); + this->addValue(p->getValue(k - 1), 0); + p->subValue(k - 1); + this->addNum(); + p->subNum(); + break; + default: + print("error in coalesce: Invalid case!"); + //printf("error in coalesce: Invalid case!"); + } + _father->setDirty(); + p->setDirty(); + this->setDirty(); + if (ccase == 1 || ccase == 3) + return p; + else + return NULL; +} + +void +IVLeafNode::release() +{ + if (!this->inMem()) + return; + unsigned num = this->getNum(); + /* + for(int i = 0; i < num; ++i) + { + keys[i].release(); + values[i].release(); + } + */ + for (unsigned i = num; i < MAX_KEY_NUM; ++i) + { + values[i].clear(); + } + delete[] keys; + delete[] values; +} + +IVLeafNode::~IVLeafNode() +{ + release(); +} + +void +IVLeafNode::print(string s) +{ +#ifdef DEBUG_KVSTORE + unsigned num = this->getNum(); + fputs(Util::showtime().c_str(), Util::debug_kvstore); + fputs("Class IVLeafNode\n", Util::debug_kvstore); + fputs("Message: ", Util::debug_kvstore); + fputs(s.c_str(), Util::debug_kvstore); + fputs("\n", Util::debug_kvstore); + unsigned i; + if (s == "NODE") + { + fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag); + fprintf(Util::debug_kvstore, "prev: %p\tnext: %p\n", this->prev, this->next); + for (i = 0; i < num; ++i) + { + //this->keys[i].print("BSTR"); + this->values[i].print("BSTR"); + } + } + else if (s == "node") + { + fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag); + fprintf(Util::debug_kvstore, "prev: %p\tnext: %p\n", this->prev, this->next); + } + else if (s == "check node") + { + //check the node, if satisfy B+ definition + bool flag = true; + if (num < MIN_KEY_NUM || num > MAX_KEY_NUM) + flag = false; + if (flag) + { + for (i = 1; i < num; ++i) + { + if (keys[i] > keys[i - 1]) + continue; + else + break; + } + if (i < num) + flag = false; + } + this->print("node"); + if (flag) + fprintf(Util::debug_kvstore, "This node is good\n"); + else + fprintf(Util::debug_kvstore, "This node is bad\n"); + } + else; +#endif +} + diff --git a/KVstore/IVTree/node/IVLeafNode.h b/KVstore/IVTree/node/IVLeafNode.h new file mode 100644 index 0000000..6b2439a --- /dev/null +++ b/KVstore/IVTree/node/IVLeafNode.h @@ -0,0 +1,58 @@ +/*============================================================================= +# Filename: IVLeafNode.h +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:39 +# Description: the leaf-node of a B+ tree +=============================================================================*/ + +#ifndef _KVSTORE_IVTREE_NODE_IVLEAFNODE_H +#define _KVSTORE_IVTREE_NODE_IVLEAFNODE_H + +#include "IVNode.h" + +class IVLeafNode : public IVNode +{ +protected: + IVNode* prev; //LeafNode + IVNode* next; + Bstr* values; + void AllocValues(); + //void FreeValues(); +public: + IVLeafNode(); + IVLeafNode(bool isVirtual); + //LeafNode(Storage* TSM); + void Virtual(); + void Normal(); + IVNode* getPrev() const; + IVNode* getNext() const; + const Bstr* getValue(int _index) const; + bool setValue(const Bstr* _value, int _index, bool _ifcopy=false); + + bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const; + bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false); + + bool addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false); + bool subValue(VList* _vlist, int _index, bool ifdel = false); + bool addValue(const Bstr* _val, int _index, bool ifcopy = false); + bool subValue(int _index, bool ifdel = false); + + void setPrev(IVNode* _prev); + void setNext(IVNode* _next); + unsigned getSize() const; + IVNode* split(IVNode* _father, int _index); + IVNode* coalesce(IVNode* _father, int _index); + void release(); + ~IVLeafNode(); + void print(std::string s); //DEBUG + /*non-sense virtual function + Node* getChild(int _index) const; + bool addChild(Node* _child, int _index); + bool subChild(int _index); + */ +}; +//BETTER: prev isn't a must, and reverse-range can be achieved using recursive-next + +#endif + diff --git a/KVstore/IVTree/node/IVNode.cpp b/KVstore/IVTree/node/IVNode.cpp new file mode 100644 index 0000000..5a5aa1b --- /dev/null +++ b/KVstore/IVTree/node/IVNode.cpp @@ -0,0 +1,320 @@ +/*============================================================================= +# Filename: IVNode.cpp +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:39 +# Description: achieve functions in IVNode.h +=============================================================================*/ + +#include "IVNode.h" + +using namespace std; + +void +IVNode::AllocKeys() +{ + keys = new unsigned[MAX_KEY_NUM]; +} + +/* +void +IVNode::FreeKeys() +{ +delete[] keys; +} +*/ + +IVNode::IVNode() +{ + store = flag = 0; + flag |= NF_IM; + AllocKeys(); +} + +IVNode::IVNode(bool isVirtual) +{ + store = flag = 0; + if (!isVirtual) + { + flag |= NF_IM; + AllocKeys(); + } +} + +/* +IVNode::Node(Storage* TSM) +{ +AllocKeys(); +TSM->readIVNode(this, Storage::OVER); +} +*/ +bool +IVNode::isLeaf() const +{ + return this->flag & NF_IL; +} + +bool +IVNode::isDirty() const +{ + return this->flag & NF_ID; +} + +void +IVNode::setDirty() +{ + this->flag |= NF_ID; +} + +void +IVNode::delDirty() +{ + this->flag &= ~NF_ID; +} + +bool +IVNode::inMem() const +{ + return this->flag & NF_IM; +} + +void +IVNode::setMem() +{ + this->flag |= NF_IM; +} + +void +IVNode::delMem() +{ + this->flag &= ~NF_IM; +} + +/* +bool +IVNode::isVirtual() const +{ +return this->flag & NF_IV; +} + +void +IVNode::setVirtual() +{ +this->flag |= NF_IV; +} + +void +IVNode::delVirtual() +{ +this->flag &= ~NF_IV; +} +*/ + +unsigned +IVNode::getRank() const +{ + return this->flag & NF_RK; +} + +void +IVNode::setRank(unsigned _rank) +{ + this->flag &= ~NF_RK; + this->flag |= _rank; +} + +unsigned +IVNode::getHeight() const +{ + return (this->flag & NF_HT) >> 20; +} + +void +IVNode::setHeight(unsigned _h) +{ + this->flag &= ~NF_HT; + this->flag |= (_h << 20); +} + +unsigned +IVNode::getNum() const +{ + return (this->flag & NF_KN) >> 12; +} + +bool +IVNode::setNum(int _num) +{ + if (_num < 0 || (unsigned)_num > MAX_KEY_NUM) + { + print(string("error in setNum: Invalid num ") + Util::int2string(_num)); + return false; + } + this->flag &= ~NF_KN; + this->flag |= (_num << 12); + return true; +} + +bool +IVNode::addNum() +{ + if (this->getNum() + 1 > MAX_KEY_NUM) + { + print("error in addNum: Invalid!"); + return false; + } + this->flag += (1 << 12); + return true; +} + +bool +IVNode::subNum() +{ + if (this->getNum() < 1) + { + print("error in subNum: Invalid!"); + return false; + } + this->flag -= (1 << 12); + return true; +} + +unsigned +IVNode::getStore() const +{ + return this->store; +} + +void +IVNode::setStore(unsigned _store) +{ + this->store = _store; +} + +unsigned +IVNode::getFlag() const +{ + return flag; +} + +void +IVNode::setFlag(unsigned _flag) +{ + this->flag = _flag; +} + +unsigned +IVNode::getKey(int _index) const +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + //print(string("error in getKey: Invalid index ") + Util::int2string(_index)); + printf("error in getKey: Invalid index\n"); + return -1; + } + else + return this->keys[_index]; +} + +bool +IVNode::setKey(unsigned _key, int _index) +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in setKey: Invalid index ") + Util::int2string(_index)); + return false; + } + keys[_index] = _key; + return true; +} + +bool +IVNode::addKey(unsigned _key, int _index) +{ + int num = this->getNum(); + if (_index < 0 || _index > num) + { + print(string("error in addKey: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + //NOTICE: if num == MAX_KEY_NUM, will visit keys[MAX_KEY_NUM], not legal!!! + //however. tree operations ensure that: when node is full, not add but split first! + for (i = num - 1; i >= _index; --i) + keys[i + 1] = keys[i]; + keys[_index] = _key; + return true; +} + +bool +IVNode::subKey(int _index) +{ + int num = this->getNum(); + if (_index < 0 || _index >= num) + { + print(string("error in subKey: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + for (i = _index; i < num - 1; ++i) + keys[i] = keys[i + 1]; + return true; +} + +int +IVNode::searchKey_less(unsigned _key) const +{ + int num = this->getNum(); + //for(i = 0; i < num; ++i) + //if(bstr < *(p->getKey(i))) + //break; + + int low = 0, high = num - 1, mid = -1; + while (low <= high) + { + mid = (low + high) / 2; + if (this->keys[mid] > _key) + { + if (low == mid) + break; + high = mid; + } + else + { + low = mid + 1; + } + } + return low; +} + +int +IVNode::searchKey_equal(unsigned _key) const +{ + int num = this->getNum(); + //for(i = 0; i < num; ++i) + // if(bstr == *(p->getKey(i))) + // { + + int ret = this->searchKey_less(_key); + if (ret > 0 && this->keys[ret - 1] == _key) + return ret - 1; + else + return num; +} + +int +IVNode::searchKey_lessEqual(unsigned _key) const +{ + //int num = this->getNum(); + //for(i = 0; i < num; ++i) + //if(bstr <= *(p->getKey(i))) + //break; + + int ret = this->searchKey_less(_key); + if (ret > 0 && this->keys[ret - 1] == _key) + return ret - 1; + else + return ret; +} diff --git a/KVstore/IVTree/node/IVNode.h b/KVstore/IVTree/node/IVNode.h new file mode 100644 index 0000000..125c43f --- /dev/null +++ b/KVstore/IVTree/node/IVNode.h @@ -0,0 +1,123 @@ +/*============================================================================= +# Filename: IVNode.h +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:38 +# Description: basic Node class, father of IVIntlNode and IVLeafNode +=============================================================================*/ + +#ifndef _KVSTORE_IVTREE_NODE_IVNODE_H +#define _KVSTORE_IVTREE_NODE_IVNODE_H + +#include "../../../Util/Util.h" +#include "../../../Util/Bstr.h" +#include "../../../Util/VList.h" + +class IVNode //abstract basic class +{ +public: + static const unsigned DEGREE = 2 * 63; //the degree of B+ tree + static const unsigned MAX_CHILD_NUM = DEGREE; + static const unsigned MIN_CHILD_NUM = DEGREE >> 1; + static const unsigned MAX_KEY_NUM = MAX_CHILD_NUM - 1; //max key-num + static const unsigned MIN_KEY_NUM = MIN_CHILD_NUM - 1; //min key-num + /* diffrent flags for tree-nodes, 32-bit put rank in low-bits means no need to move*/ + static const unsigned NF_IL = 0x80000000; //is leaf + static const unsigned NF_ID = 0x00080000; //is dirty, in rank-area + static const unsigned NF_IM = 0x20000000; //in memory, not virtual + //static const unsigned NF_IV = 0x10000000; //is virtual + static const unsigned NF_RK = 0x00ffffff; //select-rank, in Storage + static const unsigned NF_HT = 0xf00000; //height area in rank + static const unsigned NF_KN = 0x07f000; //NOTICE: decided by DEGREE + static const unsigned INTL_SIZE = sizeof(int) * MAX_KEY_NUM; + static const unsigned LEAF_SIZE = INTL_SIZE + sizeof(Bstr) * MAX_KEY_NUM; +protected: + unsigned store; //store address, the BLock index + unsigned flag; //NF_RK, NF_IL,NF_ID, NF_IV, propety + //int num; //totle keys num + //Node* father; //point to father-node, which must be IntlNode + unsigned* keys; + void AllocKeys(); + //void FreeKeys(); +public: + IVNode(); + IVNode(bool isVirtual); + bool isLeaf() const; + bool isDirty() const; + void setDirty(); + void delDirty(); + bool inMem() const; + void setMem(); + void delMem(); + //bool isVirtual() const; + //void setVirtual(); + //void delVirtual(); + unsigned getRank() const; + void setRank(unsigned _rank); + unsigned getHeight() const; + void setHeight(unsigned _h); + unsigned getNum() const; + bool setNum(int _num); + bool addNum(); + bool subNum(); + unsigned getStore() const; + void setStore(unsigned _store); + unsigned getFlag() const; + void setFlag(unsigned _flag); + unsigned getKey(int _index) const; //need to check the index + bool setKey(unsigned _key, int _index); + bool addKey(unsigned _key, int _index); + bool subKey(int _index); + + //several binary key search utilities + int searchKey_less(unsigned _key) const; + int searchKey_equal(unsigned _key) const; + int searchKey_lessEqual(unsigned _key) const; + + //virtual functions: polymorphic + virtual IVNode* getChild(int _index) const { return NULL; }; + virtual bool setChild(IVNode* _child, int _index) { return true; }; + virtual bool addChild(IVNode* _child, int _index) { return true; }; + virtual bool subChild(int _index) { return true; }; + virtual IVNode* getPrev() const { return NULL; }; + virtual IVNode* getNext() const { return NULL; }; + + virtual const Bstr* getValue(int _index) const { return NULL; }; + virtual bool setValue(const Bstr* _value, int _index, bool _ifcopy=false) { return true; }; + virtual bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const { return NULL; }; + virtual bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false) { return true; }; + + virtual bool addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false) { return true; }; + virtual bool subValue(VList* _vlist, int _index, bool ifdel = false) { return true; }; + virtual bool addValue(const Bstr* _val, int _index, bool ifcopy = false) { return true; }; + virtual bool subValue(int _index, bool ifdel = false) { return true; }; + + virtual void setPrev(IVNode* _prev) {}; + virtual void setNext(IVNode* _next) {}; + + //pure virtual functions + virtual void Virtual() = 0; + virtual void Normal() = 0; + virtual unsigned getSize() const = 0; //return all memory owned + virtual IVNode* split(IVNode* _father, int _index) = 0; + virtual IVNode* coalesce(IVNode* _father, int _index) = 0; + virtual void release() = 0; //release the node, only remain necessary information + virtual ~IVNode() {}; + virtual void print(std::string s) = 0; //DEBUG(print the Node) +}; + +/*NOTICE(operations in release()) +*To save memory, we can only remain store and flag(childs added for Leaf). +*However, in this way childs'pointers is ok to change, use Node** or Node*& is also nonsense +*because the pointer variable may die. +*Another way is only to release dynamic memory, and store thw whole, read the Bstr only to +*build. In this way nodes'pointer doesn't change, and operation is simplified, while memory +*is consumed a bit more. Because Bstrs consume the most memory, and memory-disk swapping is +*the most time-consuming thing, it seems to be a better way. +*WARN:when a node is in memory and not deleted, its basic content is always being! If nodes are +*really too many, this will cause disaster because we can't swap them out until tree is closed! +*To solve this problem, there should be two types of release-function: one to release Bstr, one +*to release the whole(pointer is invalid and rebuild problem) +*/ + +#endif diff --git a/KVstore/IVTree/storage/IVStorage.cpp b/KVstore/IVTree/storage/IVStorage.cpp new file mode 100644 index 0000000..deadf0a --- /dev/null +++ b/KVstore/IVTree/storage/IVStorage.cpp @@ -0,0 +1,738 @@ +/*============================================================================= +# Filename: IVStorage.cpp +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:43 +# Description: achieve functions in IVStorage.h +=============================================================================*/ + +#include "IVStorage.h" + +using namespace std; + +IVStorage::IVStorage() +{ //not use ../logs/, notice the location of program + cur_block_num = SET_BLOCK_NUM; + filepath = ""; + freelist = NULL; + treefp = NULL; + max_buffer_size = Util::MAX_BUFFER_SIZE; + heap_size = max_buffer_size / IVNode::INTL_SIZE; + freemem = max_buffer_size; + minheap = NULL; + this->value_list = NULL; +} + +IVStorage::IVStorage(string& _filepath, string& _mode, unsigned* _height, unsigned long long _buffer_size, VList* _vlist) +{ + cur_block_num = SET_BLOCK_NUM; //initialize + this->filepath = _filepath; + if (_mode == string("build")) + treefp = fopen(_filepath.c_str(), "w+b"); + else if (_mode == string("open")) + treefp = fopen(_filepath.c_str(), "r+b"); + else + { + print(string("error in IVStorage: Invalid mode ") + _mode); + return; + } + if (treefp == NULL) + { + print(string("error in IVStorage: Open error ") + _filepath); + return; + } + this->treeheight = _height; //originally set to 0 + this->max_buffer_size = _buffer_size; + this->heap_size = this->max_buffer_size / IVNode::INTL_SIZE; + this->freemem = this->max_buffer_size; + this->freelist = new BlockInfo; //null-head + unsigned i, j, k; //j = (SuperNum-1)*BLOCK_SIZE + BlockInfo* bp; + if (_mode == "build") + { //write basic information + i = 0; + fwrite(&i, sizeof(unsigned), 1, this->treefp); //height + fwrite(&i, sizeof(unsigned), 1, this->treefp); //rootnum + fwrite(&cur_block_num, sizeof(unsigned), 1, this->treefp); //current block num + fseek(this->treefp, BLOCK_SIZE, SEEK_SET); + bp = this->freelist; + j = cur_block_num / 8; + for (i = 0; i < j; ++i) + { + fputc(0, this->treefp); + for (k = 0; k < 8; ++k) + { + bp->next = new BlockInfo(i * 8 + k + 1, NULL); + bp = bp->next; + } + } + } + else //_mode == "open" + { + //read basic information + unsigned rootnum; + char c; + fread(this->treeheight, sizeof(unsigned), 1, this->treefp); + fread(&rootnum, sizeof(unsigned), 1, this->treefp); + fread(&cur_block_num, sizeof(unsigned), 1, this->treefp); + fseek(this->treefp, BLOCK_SIZE, SEEK_SET); + bp = this->freelist; + j = cur_block_num / 8; + for (i = 0; i < j; ++i) + { + c = fgetc(treefp); + for (k = 0; k < 8; ++k) + { + if ((c & (1 << k)) == 0) + { + bp->next = new BlockInfo(i * 8 + 7 - k + 1, NULL); + bp = bp->next; + } + } + } + fseek(treefp, Address(rootnum), SEEK_SET); + //treefp is now ahead of root-block + } + + this->minheap = new IVHeap(this->heap_size); + this->value_list = _vlist; +} + +bool +IVStorage::preRead(IVNode*& _root, IVNode*& _leaves_head, IVNode*& _leaves_tail) //pre-read and build whole tree +{ //set root(in memory) and leaves_head + //TODO: false when exceed memory + _leaves_tail = _leaves_head = _root = NULL; + if (ftell(this->treefp) == 0) //root is null + { + return true; + } + unsigned next, store, j, pos = 0; + unsigned h = *this->treeheight; + IVNode* p; + //read root node + this->createNode(p); + _root = p; + fread(&next, sizeof(unsigned), 1, treefp); + //use stack to achieve + long address[h]; //current address + unsigned used[h]; //used child num + unsigned total[h]; //total child num + unsigned block[h]; //next block num + IVNode* nodes[h]; + address[pos] = ftell(treefp); + used[pos] = 0; + total[pos] = p->getNum() + 1; + block[pos] = next; + nodes[pos] = p; + pos++; + IVNode* prev = NULL; + while (pos > 0) + { + j = pos - 1; + if (nodes[j]->isLeaf() || used[j] == total[j]) //LeafNode or ready IntlNode + { + if (nodes[j]->isLeaf()) + { + if (prev != NULL) + { + prev->setNext(nodes[j]); + nodes[j]->setPrev(prev); + } + prev = nodes[j]; + } + pos--; + continue; + } + fseek(this->treefp, address[j], SEEK_SET); + fread(&store, sizeof(unsigned), 1, treefp); + this->ReadAlign(block + j); + address[j] = ftell(treefp); + fseek(treefp, Address(store), SEEK_SET); + this->createNode(p); + nodes[j]->setChild(p, used[j]); + used[j]++; + fread(&next, sizeof(unsigned), 1, treefp); + address[pos] = ftell(treefp); + used[pos] = 0; + total[pos] = p->getNum() + 1; + block[pos] = next; + nodes[pos] = p; + pos++; + } + //set leaves and read root, which is always keeped in-mem + p = _root; + while (!p->isLeaf()) + { + p = p->getChild(0); + } + _leaves_head = p; + p = _root; + while (!p->isLeaf()) + { + p = p->getChild(p->getNum()); + } + _leaves_tail = p; + long long memory = 0; + this->readNode(_root, &memory); + this->request(memory); + return true; +} + +long //8-byte in 64-bit machine +IVStorage::Address(unsigned _blocknum) const //BETTER: inline function +{ + if (_blocknum == 0) + return 0; + else if (_blocknum > cur_block_num) + { + //print(string("error in Address: Invalid blocknum ") + Util::int2string(_blocknum)); + return -1; //address should be non-negative + } + //NOTICE: here should explictly use long + return (long)(this->SuperNum + _blocknum - 1) * (long)BLOCK_SIZE; +} + +unsigned +IVStorage::Blocknum(long address) const +{ + return (address / BLOCK_SIZE) + 1 - this->SuperNum; +} + +unsigned +IVStorage::AllocBlock() +{ + BlockInfo* p = this->freelist->next; + if (p == NULL) + { + for (unsigned i = 0; i < SET_BLOCK_INC; ++i) + { + cur_block_num++; //BETTER: check if > MAX_BLOCK_NUM + this->FreeBlock(cur_block_num); + } + p = this->freelist->next; + } + unsigned t = p->num; + this->freelist->next = p->next; + delete p; + return t; +} + +void +IVStorage::FreeBlock(unsigned _blocknum) +{ //QUERY: head-sub and tail-add will be better? + BlockInfo* bp = new BlockInfo(_blocknum, this->freelist->next); + this->freelist->next = bp; +} + +//NOTICE: all reads are aligned to 4 bytes(including a string) +//a string may acrossseveral blocks + +void +IVStorage::ReadAlign(unsigned* _next) +{ + if (ftell(treefp) % BLOCK_SIZE == 0) + { + fseek(treefp, Address(*_next), SEEK_SET); + fread(_next, sizeof(unsigned), 1, treefp); + } +} + +void +IVStorage::WriteAlign(unsigned* _curnum, bool& _SpecialBlock) +{ + if (ftell(treefp) % BLOCK_SIZE == 0) + { + unsigned blocknum = this->AllocBlock(); + fseek(treefp, Address(*_curnum), SEEK_SET); + if (_SpecialBlock) + { + fseek(treefp, 4, SEEK_CUR); + _SpecialBlock = false; + } + fwrite(&blocknum, sizeof(unsigned), 1, treefp); + fseek(treefp, Address(blocknum) + 4, SEEK_SET); + *_curnum = blocknum; + } +} + +bool +IVStorage::readNode(IVNode* _np, long long* _request) +{ + if (_np == NULL || _np->inMem()) + return false; //can't read or needn't + + fseek(treefp, Address(_np->getStore()), SEEK_SET); + bool flag = _np->isLeaf(); + unsigned next; + unsigned i, num = _np->getNum(); + Bstr bstr; + fseek(treefp, 4, SEEK_CUR); + fread(&next, sizeof(unsigned), 1, treefp); + + //read data, use readBstr... + //fread(treefp, "%u", &num); + //_np->setNum(num); + if (flag) + *_request += IVNode::LEAF_SIZE; + else + *_request += IVNode::INTL_SIZE; + _np->Normal(); + if (!flag) + fseek(treefp, 4 * (num + 1), SEEK_CUR); + + //to read all keys + //int tmp = -1; + unsigned tmp = INVALID; + for (i = 0; i < num; ++i) + { + fread(&tmp, sizeof(int), 1, treefp); + this->ReadAlign(&next); + _np->setKey(tmp, i); + } + + if (flag) + { + //to read all values + for (i = 0; i < num; ++i) + { + this->readBstr(&bstr, &next); + //if not long list value + if(bstr.getStr() != NULL) + { + *_request += bstr.getLen(); + } + _np->setValue(&bstr, i); + } + } + //_np->setFlag((_np->getFlag() & ~Node::NF_IV & ~Node::NF_ID) | Node::NF_IM); + //_np->delVirtual(); + _np->delDirty(); + //_np->setMem(); + this->updateHeap(_np, _np->getRank(), false); + bstr.clear(); + return true; +} + +bool +IVStorage::createNode(IVNode*& _np) //cretae virtual nodes, not in-mem +{ + /* + if(ftell(this->treefp)== 0) //null root + { + _np = NULL; + return false; + } + */ + unsigned t; //QUERY: maybe next-flag... will be better-storage? + bool flag = false; //IntlNode + fread(&t, sizeof(unsigned), 1, treefp); + if ((t & IVNode::NF_IL) > 0) //WARN: according to setting + flag = true; //LeafNode + if (flag) + { + //this->request(sizeof(LeafNode)); + _np = new IVLeafNode(true); + } + else + { + //this->request(sizeof(IntlNode)); + _np = new IVIntlNode(true); + } + //fseek(treefp, -4, SEEK_CUR); + //_np->setFlag(_np->getFlag() | (t & Node::NF_RK)); + //_np->setRank(t); + _np->setFlag(t); + _np->delDirty(); + _np->delMem(); + _np->setStore(Blocknum(ftell(treefp) - 4)); + return true; +} + +//BETTER: Does SpecialBlock really needed? why can't we place next before flag?? +// +//NOTICE: root num begins from 1, if root num is 0, then it is invalid, i.e. the tree is NULL +//(and ftell(root address) will be 0 either) + +bool +IVStorage::writeNode(IVNode* _np) +{ + if (_np == NULL || !_np->inMem() || (_np->getRank() > 0 && !_np->isDirty())) + return false; //not need to write back + + unsigned num = _np->getNum(), i; + bool flag = _np->isLeaf(), SpecialBlock = true; + /* + if(!flag) + { + for(i = 0; i <= num; ++i) + if(_np->getChild(i)->isDirty()) + return false; //NOTICE: all childs must be clean! + } + */ + //to release original blocks + unsigned store = _np->getStore(), next; + //if first store is 0, meaning a new node + fseek(this->treefp, Address(store) + 4, SEEK_SET); + fread(&next, sizeof(unsigned), 1, treefp); + while (store != 0) + { + this->FreeBlock(store); + store = next; + fseek(treefp, Address(store), SEEK_SET); + fread(&next, sizeof(unsigned), 1, treefp); + } + if (num == 0) + return true; //node is empty! + unsigned t; + //write Node information + unsigned blocknum = this->AllocBlock(); + _np->setStore(blocknum); + long address = this->Address(blocknum); + fseek(this->treefp, address, SEEK_SET); + t = _np->getFlag(); + fwrite(&t, sizeof(unsigned), 1, treefp); //DEBUG + fseek(treefp, 4, SEEK_CUR); + if (!flag) + { + for (i = 0; i <= num; ++i) + { + t = _np->getChild(i)->getStore(); + fwrite(&t, sizeof(unsigned), 1, treefp); //DEBUG + this->WriteAlign(&blocknum, SpecialBlock); + } + } + + //int tmp = 0; + unsigned tmp = INVALID; + //to write all keys + for (i = 0; i < num; ++i) + { + tmp = _np->getKey(i); + fwrite(&tmp, sizeof(int), 1, treefp); + this->WriteAlign(&blocknum, SpecialBlock); + } + + if (flag) + { + //to write all values + for (i = 0; i < num; ++i) + { + this->writeBstr(_np->getValue(i), &blocknum, SpecialBlock); + } + } + fseek(treefp, Address(blocknum), SEEK_SET); + if (SpecialBlock) + fseek(treefp, 4, SEEK_CUR); + t = 0; + fwrite(&t, sizeof(unsigned), 1, treefp); //the end-block + //_np->setFlag(_np->getFlag() & ~Node::NF_ID); + //NOTICE:we may store the dirty bit into the tree file, but that is ok + //Each time we read the tree file to construct a node, we always set the drity bit to 0 + _np->delDirty(); + return true; +} + +bool +IVStorage::readBstr(Bstr* _bp, unsigned* _next) +{ + //long address; + unsigned len, i, j; + fread(&len, sizeof(unsigned), 1, this->treefp); + this->ReadAlign(_next); + + //NOTICE: if this is a long list as value + if(len == 0) + { + unsigned addr = 0; + fread(&addr, sizeof(unsigned), 1, this->treefp); +#ifdef DEBUG_VLIST + cout<<"read a vlist in IVStorage - addr: "<setLen(addr); + _bp->setStr(NULL); + this->ReadAlign(_next); + return true; + } + + //this->request(len); + + //NOTICE: we use new for all, consistent with Bstr and KVstore + //char* s = (char*)malloc(len); + char* s = new char[len]; + _bp->setLen(len); + for (i = 0; i + 4 < len; i += 4) + { + fread(s + i, sizeof(char), 4, treefp); + this->ReadAlign(_next); + } + while (i < len) + { + fread(s + i, sizeof(char), 1, treefp); //BETTER + i++; + } + j = len % 4; + if (j > 0) + j = 4 - j; + fseek(treefp, j, SEEK_CUR); + this->ReadAlign(_next); + _bp->setStr(s); + + return true; +} + +bool +IVStorage::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock) +{ + unsigned i, j, len = _bp->getLen(); + + //NOTICE: to write long list value + if(_bp->getStr() == NULL) + { + unsigned flag = 0; + fwrite(&flag, sizeof(unsigned), 1, treefp); + this->WriteAlign(_curnum, _SpecialBlock); + //then this is the real block num + fwrite(&len, sizeof(unsigned), 1, treefp); +#ifdef DEBUG_VLIST + cout<<"to write a vlist in IVStorage::writeBstr() - blocknum: "<WriteAlign(_curnum, _SpecialBlock); + return true; + } + + fwrite(&len, sizeof(unsigned), 1, treefp); + this->WriteAlign(_curnum, _SpecialBlock); + + char* s = _bp->getStr(); + for (i = 0; i + 4 < len; i += 4) + { + fwrite(s + i, sizeof(char), 4, treefp); + this->WriteAlign(_curnum, _SpecialBlock); + } + while (i < len) + { + fwrite(s + i, sizeof(char), 1, treefp); + i++; + } + + j = len % 4; + if (j > 0) + j = 4 - j; + fseek(treefp, j, SEEK_CUR); + this->WriteAlign(_curnum, _SpecialBlock); + + return true; +} + +bool +IVStorage::writeTree(IVNode* _root) //write the whole tree back and close treefp +{ + fseek(this->treefp, 0, SEEK_SET); + fwrite(this->treeheight, sizeof(unsigned), 1, treefp); + //delete all nonsense-node in heap, otherwise will waste storage permanently + IVNode* p; + while (1) + { //all non-sense nodes will be in-head-area, due to minimal rank + p = minheap->getTop(); + if (p == NULL) //heap is empty, only when root==NULL + break; + if (p->getRank() == 0) //indicate non-sense node + { + this->minheap->remove(); + this->writeNode(p); + delete p; + } + else + break; + } + + unsigned i, j, t; + //QUERY: another way to write all nodes back is to print out all nodes in heap + //but this method will cause no node in heap any more, while operations may be + //afetr tree-saving. Which method is better? + //write nodes recursively using stack, including root-num + if (_root != NULL) + { + IVNode* p = _root; + unsigned h = *this->treeheight, pos = 0; + IVNode* ns[h]; + int ni[h]; + ns[pos] = p; + ni[pos] = p->getNum(); + pos++; + while (pos > 0) + { + j = pos - 1; + p = ns[j]; + if (p->isLeaf() || ni[j] < 0) //leaf or all childs are ready + { + this->writeNode(p); + pos--; + continue; + } + ns[pos] = p->getChild(ni[j]); + ni[pos] = ns[pos]->getNum(); + pos++; + ni[j]--; + } + t = _root->getStore(); + } + else + t = 0; + + fseek(this->treefp, 4, SEEK_SET); + fwrite(&t, sizeof(unsigned), 1, treefp); //write the root num + fwrite(&cur_block_num, sizeof(unsigned), 1, treefp);//write current blocks num + fseek(treefp, BLOCK_SIZE, SEEK_SET); + j = cur_block_num / 8; //(SuperNum-1)*BLOCK_SIZE; + //reset to 1 first + for (i = 0; i < j; ++i) + { + fputc(0xff, treefp); + } + char c; + BlockInfo* bp = this->freelist->next; + while (bp != NULL) + { + //if not-use then set 0, aligned to byte! +#ifdef DEBUG_KVSTORE + if (bp->num > cur_block_num) + { + printf("blocks num exceed, cur_block_num: %u\n", cur_block_num); + exit(1); + } +#endif + j = bp->num - 1; + i = j / 8; + j = 7 - j % 8; + fseek(treefp, BLOCK_SIZE + i, SEEK_SET); + c = fgetc(treefp); + fseek(treefp, -1, SEEK_CUR); + fputc(c & ~(1 << j), treefp); + bp = bp->next; + } + //fclose(this->treefp); + return true; +} + +void +IVStorage::updateHeap(IVNode* _np, unsigned _rank, bool _inheap) const +{ + if (_inheap) //already in heap, to modify + { + unsigned t = _np->getRank(); + _np->setRank(_rank); + if (t < _rank) + this->minheap->modify(_np, false); + else if (t > _rank) + this->minheap->modify(_np, true); + else; + } + else //not in heap, to add + { + _np->setRank(_rank); + this->minheap->insert(_np); + } +} + +bool +IVStorage::request(long long _needmem) //aligned to byte +{ //NOTICE: <0 means release + //cout<<"freemem: "<freemem<<" needmem: "<<_needmem< 0 && this->freemem < (unsigned long long)_needmem) + if (!this->handler(_needmem - freemem)) //disaster in buffer memory + { + print(string("error in request: out of buffer-mem, now to exit")); + //exit(1); + return false; + } + this->freemem -= _needmem; + return true; +} + +bool +IVStorage::handler(unsigned long long _needmem) //>0 +{ + //cout<<"swap happen"<minheap->getTop(); + //cout<<"get heap top"<minheap->remove(); + //cout<<"node removed in heap"<getSize(); + this->freemem += size; + this->writeNode(p); + //cout<<"node write back"<getNum() > 0) + p->Virtual(); + else + delete p; //non-sense node + //cout<<"node memory released"< size) + { + //cout<<"reduce the request"<freelist; + BlockInfo* next; + while (bp != NULL) + { + next = bp->next; + delete bp; + bp = next; + } +#ifdef DEBUG_KVSTORE + printf("already empty the freelist!\n"); +#endif + delete this->minheap; +#ifdef DEBUG_KVSTORE + printf("already empty the buffer heap!\n"); +#endif + fclose(this->treefp); + //#ifdef DEBUG_KVSTORE + //NOTICE:there is more than one tree + //fclose(Util::debug_kvstore); //NULL is ok! + //Util::debug_kvstore = NULL; + //#endif +} + +void +IVStorage::print(string s) +{ +#ifdef DEBUG_KVSTORE + fputs(Util::showtime().c_str(), Util::debug_kvstore); + fputs("Class IVStorage\n", Util::debug_kvstore); + fputs("Message: ", Util::debug_kvstore); + fputs(s.c_str(), Util::debug_kvstore); + fputs("\n", Util::debug_kvstore); +#endif +} diff --git a/KVstore/IVTree/storage/IVStorage.h b/KVstore/IVTree/storage/IVStorage.h new file mode 100644 index 0000000..37e13ae --- /dev/null +++ b/KVstore/IVTree/storage/IVStorage.h @@ -0,0 +1,84 @@ +/*============================================================================= +# Filename: IVStorage.h +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-04-26 16:43 +# Description: swap between memory and disk, achieving system-like method +=============================================================================*/ + +#ifndef _KVSTORE_IVTREE_STORAGE_IVSTORAGE_H +#define _KVSTORE_IVTREE_STORAGE_IVSTORAGE_H + +#include "../../../Util/VList.h" +#include "../node/IVIntlNode.h" +#include "../node/IVLeafNode.h" +#include "../heap/IVHeap.h" + +//It controls read, write, swap +class IVStorage +{ +public: + static const unsigned BLOCK_SIZE = Util::STORAGE_BLOCK_SIZE; //fixed size of disk-block + //there are 18 B+Tree indexes and one vstree index, so set 3G buffer size + //static const unsigned long long MAX_BUFFER_SIZE = Util::MAX_BUFFER_SIZE; //max buffer size + //static const unsigned SET_BUFFER_SIZE = 1 << 30; //set buffer size + //static const unsigned HEAP_SIZE = MAX_BUFFER_SIZE / IVNode::INTL_SIZE; + + //DEBUG: maybe need to set larger, now the file size is 64G at most + static const unsigned MAX_BLOCK_NUM = 1 << 24; //max block-num + //below two constants: must can be exactly divided by 8 + static const unsigned SET_BLOCK_NUM = 1 << 8; //initial blocks num + static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc + static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1; + //static const unsigned TRANSFER_CAPACITY = BLOCK_SIZE; + //enum ReadType { OVER = 0, EXPAND, NORMAL }; + +private: + unsigned long long max_buffer_size; + unsigned heap_size; + unsigned cur_block_num; + std::string filepath; + unsigned* treeheight; + BlockInfo* freelist; + FILE* treefp; //file: tree nodes + IVHeap* minheap; //heap of Nodes's pointer, sorted in NF_RK + + //very long value list are stored in a separate file(with large block) + // + //NOTICE: according to the summary result, 90% value lists are just below 100 bytes + //<10%: 5000000~100M bytes + VList* value_list; + + //NOTICE: freemem's type is long long here, due to large memory in server. + //However, needmem in handler() and request() is ok to be int/unsigned. + //Because the bstr' size is controlled, so is the node. + unsigned long long freemem; //free memory to use, non-negative + //unsigned long long time; //QUERY(achieving an old-swap startegy?) + long Address(unsigned _blocknum) const; + unsigned Blocknum(long address) const; + unsigned AllocBlock(); + void FreeBlock(unsigned _blocknum); + void ReadAlign(unsigned* _next); + void WriteAlign(unsigned* _next, bool& _SpecialBlock); + +public: + IVStorage(); + IVStorage(std::string& _filepath, std::string& _mode, unsigned* _height, unsigned long long _buffer_size, VList* _vlist); //create a fixed-size file or open an existence + bool preRead(IVNode*& _root, IVNode*& _leaves_head, IVNode*& _leaves_tail); //read and build all nodes, only root in memory + bool readNode(IVNode* _np, long long* _request); //read, if virtual + bool createNode(IVNode*& _np); //use fp to create a new node + //NOTICE(if children and child not exist, build children's Nodes) + bool writeNode(IVNode* _np); + bool readBstr(Bstr* _bp, unsigned* _next); + bool writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock); + bool writeTree(IVNode* _np); + void updateHeap(IVNode* _np, unsigned _rank, bool _inheap) const; + bool request(long long _needmem); //deal with memory request + bool handler(unsigned long long _needmem); //swap some nodes out + //bool update(); //update InMem Node's rank, with clock + ~IVStorage(); + void print(std::string s); //DEBUG +}; + +#endif + diff --git a/KVstore/KVstore.cpp b/KVstore/KVstore.cpp index 3df4dc4..302e9f7 100644 --- a/KVstore/KVstore.cpp +++ b/KVstore/KVstore.cpp @@ -1,2088 +1,2513 @@ -/*============================================================================= -# Filename: KVstore.cpp -# Author: Bookug Lobert -# Mail: 1181955272@qq.com -# Last Modified: 2015-10-23 14:25 -# Description: Modified By Wang Libo -=============================================================================*/ - -#include "KVstore.h" - -using namespace std; - -//sets store_path as the root dir of this KVstore -//initial all Tree pointers as NULL -KVstore::KVstore(string _store_path) { - this->store_path = _store_path; - - this->entity2id = NULL; - this->id2entity = NULL; - - this->predicate2id = NULL; - this->id2predicate = NULL; - - this->literal2id = NULL; - this->id2literal = NULL; - - this->subID2values = NULL; - this->preID2values = NULL; - this->objID2values = NULL; -} - -//Release all the memory used in this KVstore before destruction -KVstore::~KVstore() { - this->flush(); - this->release(); -} - -//Flush all modified parts into the disk, which will not release any memory -//Does nothing to null tree pointers or parts that has not been modified -void KVstore::flush() { - this->flush(this->entity2id); - this->flush(this->id2entity); - - this->flush(this->literal2id); - this->flush(this->id2literal); - - this->flush(this->predicate2id); - this->flush(this->id2predicate); - - this->flush(this->subID2values); - this->flush(this->preID2values); - this->flush(this->objID2values); -} - -void KVstore::release() { - delete this->entity2id; - this->entity2id = NULL; - delete this->id2entity; - this->id2entity = NULL; - - delete this->literal2id; - this->literal2id = NULL; - delete this->id2literal; - this->id2literal = NULL; - - delete this->predicate2id; - this->predicate2id = NULL; - delete this->id2predicate; - this->id2predicate = NULL; - - delete this->subID2values; - this->subID2values = NULL; - delete this->preID2values; - this->preID2values = NULL; - delete this->objID2values; - this->objID2values = NULL; -} - -void KVstore::open() { - cout << "open KVstore" << endl; - - this->open_entity2id(KVstore::READ_WRITE_MODE); - this->open_id2entity(KVstore::READ_WRITE_MODE); - - this->open_literal2id(KVstore::READ_WRITE_MODE); - this->open_id2literal(KVstore::READ_WRITE_MODE); - - this->open_predicate2id(KVstore::READ_WRITE_MODE); - this->open_id2predicate(KVstore::READ_WRITE_MODE); - - this->open_subID2values(KVstore::READ_WRITE_MODE); - this->open_objID2values(KVstore::READ_WRITE_MODE); - this->open_preID2values(KVstore::READ_WRITE_MODE); -} - -unsigned -KVstore::getEntityDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const -{ - return this->getEntityInDegree(_entity_id) + this->getEntityOutDegree(_entity_id); -} - -unsigned -KVstore::getEntityInDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const -{ - //cout << "In getEntityInDegree " << _entity_id << endl; - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->objID2values, _entity_id, (char*&)_tmp, _len); - if (!_get) - { - return 0; - } - return _tmp[0]; -} - -unsigned -KVstore::getEntityOutDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const -{ - //cout << "In getEntityOutDegree " << _entity_id << endl; - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->subID2values, _entity_id, (char*&)_tmp, _len); - if (!_get) - { - return 0; - } - return _tmp[0]; -} - -unsigned -KVstore::getLiteralDegree(TYPE_ENTITY_LITERAL_ID _literal_id) const -{ - //cout << "In getLiteralDegree " << _literal_id << endl; - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->objID2values, _literal_id, (char*&)_tmp, _len); - if (!_get) - { - return 0; - } - return _tmp[0]; -} - -unsigned -KVstore::getPredicateDegree(TYPE_PREDICATE_ID _predicate_id) const -{ - //cout << "In getPredicate Degree " << _predicate_id << endl; - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->preID2values, _predicate_id, (char*&)_tmp, _len); - if (!_get) - { - return 0; - } - return _tmp[0]; -} - -unsigned -KVstore::getSubjectPredicateDegree(TYPE_ENTITY_LITERAL_ID _subid, TYPE_PREDICATE_ID _preid) const -{ - //cout << "In getSubjectPredicateDegree " << _subid << ' ' << _preid << endl; - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); - if (!_get) - { - return 0; - } - unsigned _result = KVstore::binarySearch(_preid, _tmp + 3, _tmp[1], 2); - //if (_result == -1) - if (_result == INVALID) - { - return 0; - } - - unsigned _offset = _tmp[4 + 2 * _result]; - unsigned _offset_next; - if (_result == _tmp[1] - 1) - { - _offset_next = 3 + 2 * _tmp[1] + _tmp[0]; - } - else - { - _offset_next = _tmp[6 + 2 * _result]; - } - - return _offset_next - _offset; -} - -unsigned -KVstore::getObjectPredicateDegree(TYPE_ENTITY_LITERAL_ID _objid, TYPE_PREDICATE_ID _preid) const -{ - //cout << "In getObjectPredicateDegree " << _objid << _preid << endl; - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); - if (!_get) - { - return 0; - } - - unsigned _result = KVstore::binarySearch(_preid, _tmp + 2, _tmp[1], 2); - //if (_result == -1) - if (_result == INVALID) - { - return 0; - } - unsigned _offset = _tmp[3 + 2 * _result]; - unsigned _offset_next; - if (_result == _tmp[1] - 1) - { - _offset_next = 2 + 2 * _tmp[1] + _tmp[0]; - } - else - { - _offset_next = _tmp[5 + 2 * _result]; - } - - return _offset_next - _offset; -} - -bool -KVstore::updateTupleslist_insert(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id) -{ - //int* _tmp = NULL; - //int _len = 0; - //bool _get; - //cout << "Inserting:\t" << _sub_id << '\t' << _pre_id << '\t' << _obj_id << endl; - //cout << "Before insertion:" << endl; - //_get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len); - //if (!_get) { - // cout << "SubID " << _sub_id << ": doesn't exist." << endl; - //} - //else { - // cout << "SubID " << _sub_id << ':'; - // for (unsigned i = 0; i < _len / sizeof(int); i++) { - // cout << '\t' << _tmp[i]; - // } - // cout << endl; - //} - //_get = this->getValueByKey(this->objID2values, _obj_id, (char*&)_tmp, _len); - //if (!_get) { - // cout << "ObjID " << _obj_id << ": doesn't exist." << endl; - //} - //else { - // cout << "ObjID " << _obj_id << ':'; - // for (unsigned i = 0; i < _len / sizeof(int); i++) { - // cout << '\t' << _tmp[i]; - // } - // cout << endl; - //} - //_get = this->getValueByKey(this->preID2values, _pre_id, (char*&)_tmp, _len); - //if (!_get) { - // cout << "PreID " << _pre_id << ": doesn't exist." << endl; - //} - //else { - // cout << "PreID " << _pre_id << ':'; - // for (unsigned i = 0; i < _len / sizeof(int); i++) { - // cout << '\t' << _tmp[i]; - // } - // cout << endl; - //} - //bool flag = this->updateInsert_s2values(_sub_id, _pre_id, _obj_id) - // && this->updateInsert_o2values(_sub_id, _pre_id, _obj_id) - // && this->updateInsert_p2values(_sub_id, _pre_id, _obj_id); - //cout << "After insertion:" << endl; - //_get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len); - //if (!_get) { - // cout << "SubID " << _sub_id << ": doesn't exist." << endl; - //} - //else { - // cout << "SubID " << _sub_id << ':'; - // for (unsigned i = 0; i < _len / sizeof(int); i++) { - // cout << '\t' << _tmp[i]; - // } - // cout << endl; - //} - //_get = this->getValueByKey(this->objID2values, _obj_id, (char*&)_tmp, _len); - //if (!_get) { - // cout << "ObjID " << _obj_id << ": doesn't exist." << endl; - //} - //else { - // cout << "ObjID " << _obj_id << ':'; - // for (unsigned i = 0; i < _len / sizeof(int); i++) { - // cout << '\t' << _tmp[i]; - // } - // cout << endl; - //} - //_get = this->getValueByKey(this->preID2values, _pre_id, (char*&)_tmp, _len); - //if (!_get) { - // cout << "PreID " << _pre_id << ": doesn't exist." << endl; - //} - //else { - // cout << "PreID " << _pre_id << ':'; - // for (unsigned i = 0; i < _len / sizeof(int); i++) { - // cout << '\t' << _tmp[i]; - // } - // cout << endl; - //} - //return flag; - return this->updateInsert_s2values(_sub_id, _pre_id, _obj_id) - && this->updateInsert_o2values(_sub_id, _pre_id, _obj_id) - && this->updateInsert_p2values(_sub_id, _pre_id, _obj_id); -} - -bool -KVstore::updateTupleslist_remove(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id) -{ - //int* _tmp = NULL; - //int _len = 0; - //bool _get; - //cout << "Removing:\t" << _sub_id << '\t' << _pre_id << '\t' << _obj_id << endl; - //cout << "Before removal:" << endl; - //_get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len); - //if (!_get) { - // cout << "SubID " << _sub_id << ": doesn't exist." << endl; - //} - //else { - // cout << "SubID " << _sub_id << ':'; - // for (unsigned i = 0; i < _len / sizeof(int); i++) { - // cout << '\t' << _tmp[i]; - // } - // cout << endl; - //} - //_get = this->getValueByKey(this->objID2values, _obj_id, (char*&)_tmp, _len); - //if (!_get) { - // cout << "ObjID " << _obj_id << ": doesn't exist." << endl; - //} - //else { - // cout << "ObjID " << _obj_id << ':'; - // for (unsigned i = 0; i < _len / sizeof(int); i++) { - // cout << '\t' << _tmp[i]; - // } - // cout << endl; - //} - //_get = this->getValueByKey(this->preID2values, _pre_id, (char*&)_tmp, _len); - //if (!_get) { - // cout << "PreID " << _pre_id << ": doesn't exist." << endl; - //} - //else { - // cout << "PreID " << _pre_id << ':'; - // for (unsigned i = 0; i < _len / sizeof(int); i++) { - // cout << '\t' << _tmp[i]; - // } - // cout << endl; - //} - //bool flag = this->updateRemove_s2values(_sub_id, _pre_id, _obj_id) - // && this->updateRemove_o2values(_sub_id, _pre_id, _obj_id) - // && this->updateRemove_p2values(_sub_id, _pre_id, _obj_id); - //cout << "After removal:" << endl; - //_get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len); - //if (!_get) { - // cout << "SubID " << _sub_id << ": doesn't exist." << endl; - //} - //else { - // cout << "SubID " << _sub_id << ':'; - // for (unsigned i = 0; i < _len / sizeof(int); i++) { - // cout << '\t' << _tmp[i]; - // } - // cout << endl; - //} - //_get = this->getValueByKey(this->objID2values, _obj_id, (char*&)_tmp, _len); - //if (!_get) { - // cout << "ObjID " << _obj_id << ": doesn't exist." << endl; - //} - //else { - // cout << "ObjID " << _obj_id << ':'; - // for (unsigned i = 0; i < _len / sizeof(int); i++) { - // cout << '\t' << _tmp[i]; - // } - // cout << endl; - //} - //_get = this->getValueByKey(this->preID2values, _pre_id, (char*&)_tmp, _len); - //if (!_get) { - // cout << "PreID " << _pre_id << ": doesn't exist." << endl; - //} - //else { - // cout << "PreID " << _pre_id << ':'; - // for (unsigned i = 0; i < _len / sizeof(int); i++) { - // cout << '\t' << _tmp[i]; - // } - // cout << endl; - //} - //return flag; - return this->updateRemove_s2values(_sub_id, _pre_id, _obj_id) - && this->updateRemove_o2values(_sub_id, _pre_id, _obj_id) - && this->updateRemove_p2values(_sub_id, _pre_id, _obj_id); -} - -bool -KVstore::updateInsert_s2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id) -{ - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len); - bool _is_entity = KVstore::isEntity(_obj_id); - - //subID doesn't exist - if (!_get) - { - unsigned _values[6]; - _values[0] = 1; - _values[1] = 1; - _values[2] = _is_entity ? 1 : 0; - _values[3] = _pre_id; - _values[4] = 5; - _values[5] = _obj_id; - this->addValueByKey(this->subID2values, _sub_id, (char*)_values, sizeof(int) * 6); - } - - //subID exists - else - { - unsigned* _values; - unsigned _values_len; - unsigned _position = KVstore::binarySearch(_pre_id, _tmp + 3, _tmp[1], 2); - - //preID doesn't exist - if (_position == INVALID) - //if (_position == -1) - { - _values_len = _len / sizeof(unsigned) + 3; - _values = new unsigned[_values_len]; - _values[0] = _tmp[0] + 1; - _values[1] = _tmp[1] + 1; - _values[2] = _tmp[2] + (_is_entity ? 1 : 0); - unsigned i, j; - for (i = 0, j = 3; i < _tmp[1] && _tmp[3 + 2 * i] < _pre_id; i++, j += 2) { - _values[j] = _tmp[3 + 2 * i]; - _values[j + 1] = _tmp[4 + 2 * i] + 2; - } - _values[j] = _pre_id; - unsigned _offset_old; - if (i == _tmp[1]) { - _offset_old = 3 + 2 * _tmp[1] + _tmp[0]; - } - else { - _offset_old = _tmp[4 + 2 * i]; - } - _values[j + 1] = _offset_old + 2; - j += 2; - for (; i < _tmp[1]; i++, j += 2) { - _values[j] = _tmp[3 + 2 * i]; - _values[j + 1] = _tmp[4 + 2 * i] + 3; - } - for (i = 3 + 2 * _tmp[1]; i < 3 + 2 * _tmp[1] + _tmp[0]; i++, j++) { - if (i == _offset_old) { - _values[j] = _obj_id; - j++; - } - _values[j] = _tmp[i]; - } - if (i == _offset_old) { - _values[j] = _obj_id; - } - } - - //preID exists - else - { - _values_len = _len / sizeof(unsigned) + 1; - _values = new unsigned[_values_len]; - memcpy(_values, _tmp, sizeof(unsigned) * _tmp[4 + 2 * _position]); - _values[0]++; - if (_is_entity) { - _values[2]++; - } - for (unsigned i = _position + 1; i < _tmp[1]; i++) { - _values[4 + 2 * i]++; - } - unsigned i, j; - unsigned right; - if (_position == _tmp[1] - 1) { - right = 3 + 2 * _tmp[1] + _tmp[0]; - } - else { - right = _tmp[6 + 2 * _position]; - } - for (i = _tmp[4 + 2 * _position], j = _tmp[4 + 2 * _position]; i < right && _tmp[i] < _obj_id; i++, j++) { - _values[j] = _tmp[i]; - } - _values[j] = _obj_id; - j++; - for (; i < 3 + 2 * _tmp[1] + _tmp[0]; i++, j++) { - _values[j] = _tmp[i]; - } - } - - this->setValueByKey(this->subID2values, _sub_id, (char*)_values, sizeof(unsigned) * _values_len); - delete[] _values; - } - - return true; -} - -bool -KVstore::updateRemove_s2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id) -{ - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len); - bool _is_entity = KVstore::isEntity(_obj_id); - - if (!_get) - { - return false; - } - - //subID doesn't exist after removal - if (_tmp[0] == 1) - { - this->removeKey(this->subID2values, _sub_id); - } - //subID still exists after removal - else - { - unsigned* _values; - unsigned _values_len; - unsigned _position = KVstore::binarySearch(_pre_id, _tmp + 3, _tmp[1], 2); - unsigned _oidlen_sp; - //DEBUG: if _tmp[1] -1 < 0?? - if (_position == _tmp[1] - 1) - { - _oidlen_sp = 3 + 2 * _tmp[1] + _tmp[0] - _tmp[4 + 2 * _position]; - } - else - { - _oidlen_sp = _tmp[6 + 2 * _position] - _tmp[4 + 2 * _position]; - } - - //preID doesn't exist after removal - if (_oidlen_sp == 1) { - _values_len = _len / sizeof(unsigned) - 3; - _values = new unsigned[_values_len]; - memcpy(_values, _tmp, sizeof(unsigned) * (3 + 2 * _position)); - _values[0]--; - _values[1]--; - if (_is_entity) { - _values[2]--; - } - for (unsigned i = 0; i < _position; i++) { - _values[4 + 2 * i] -= 2; - } - unsigned i, j; - for (i = 5 + 2 * _position, j = 3 + 2 * _position; i < 3 + 2 * _tmp[1]; i += 2, j += 2) { - _values[j] = _tmp[i]; - _values[j + 1] = _tmp[i + 1] - 3; - } - for (; i < 3 + 2 * _tmp[1] + _tmp[0]; i++, j++) { - if (i == _tmp[4 + 2 * _position]) { - j--; - continue; - } - _values[j] = _tmp[i]; - } - } - - //preID still exists after removal - else { - _values_len = _len / sizeof(unsigned) - 1; - _values = new unsigned[_values_len]; - memcpy(_values, _tmp, sizeof(unsigned) * _tmp[4 + 2 * _position]); - _values[0]--; - if (_is_entity) { - _values[2]--; - } - for (unsigned i = _position + 1; i < _tmp[1]; i++) { - _values[4 + 2 * i]--; - } - unsigned i, j; - for (i = _tmp[4 + 2 * _position], j = _tmp[4 + 2 * _position]; - i < 3 + 2 * _tmp[1] + _tmp[0] && _tmp[i] < _obj_id; i++, j++) { - _values[j] = _tmp[i]; - } - i++; - for (; i < 3 + 2 * _tmp[1] + _tmp[0]; i++, j++) { - _values[j] = _tmp[i]; - } - } - - this->setValueByKey(this->subID2values, _sub_id, (char*)_values, sizeof(unsigned) * _values_len); - delete[] _values; - } - - return true; -} - -//TODO: TO BE IMPROVED -bool -KVstore::updateInsert_s2values(TYPE_ENTITY_LITERAL_ID _subid, const std::vector& _pidoidlist) -{ - vector::const_iterator iter = _pidoidlist.begin(); - while (iter < _pidoidlist.end()) { - TYPE_PREDICATE_ID _preid = *iter; - iter++; - TYPE_ENTITY_LITERAL_ID _objid = *iter; - iter++; - this->updateInsert_s2values(_subid, _preid, _objid); - } - return true; -} - -//TODO: TO BE IMPROVED -bool -KVstore::updateRemove_s2values(TYPE_ENTITY_LITERAL_ID _subid, const std::vector& _pidoidlist) -{ - vector::const_iterator iter = _pidoidlist.begin(); - while (iter < _pidoidlist.end()) { - TYPE_PREDICATE_ID _preid = *iter; - iter++; - TYPE_ENTITY_LITERAL_ID _objid = *iter; - iter++; - this->updateRemove_s2values(_subid, _preid, _objid); - } - return true; -} - -bool -KVstore::updateInsert_o2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id) -{ - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->objID2values, _obj_id, (char*&)_tmp, _len); - - //objID doesn't exist - if (!_get) { - unsigned _values[5]; - _values[0] = 1; - _values[1] = 1; - _values[2] = _pre_id; - _values[3] = 4; - _values[4] = _sub_id; - this->addValueByKey(this->objID2values, _obj_id, (char*)_values, sizeof(unsigned) * 5); - } - - //objID exists - else { - unsigned* _values; - unsigned _values_len; - unsigned _position = KVstore::binarySearch(_pre_id, _tmp + 2, _tmp[1], 2); - - //preID doesn't exist - if (_position == -1) { - _values_len = _len / sizeof(unsigned) + 3; - _values = new unsigned[_values_len]; - _values[0] = _tmp[0] + 1; - _values[1] = _tmp[1] + 1; - unsigned i, j; - for (i = 0, j = 2; i < _tmp[1] && _tmp[2 + 2 * i] < _pre_id; i++, j += 2) { - _values[j] = _tmp[2 + 2 * i]; - _values[j + 1] = _tmp[3 + 2 * i] + 2; - } - _values[j] = _pre_id; - unsigned _offset_old; - if (i == _tmp[1]) { - _offset_old = 2 + 2 * _tmp[1] + _tmp[0]; - } - else { - _offset_old = _tmp[3 + 2 * i]; - } - _values[j + 1] = _offset_old + 2; - j += 2; - for (; i < _tmp[1]; i++, j += 2) { - _values[j] = _tmp[2 + 2 * i]; - _values[j + 1] = _tmp[3 + 2 * i] + 3; - } - for (i = 2 + 2 * _tmp[1]; i < 2 + 2 * _tmp[1] + _tmp[0]; i++, j++) { - if (i == _offset_old) { - _values[j] = _sub_id; - j++; - } - _values[j] = _tmp[i]; - } - if (i == _offset_old) { - _values[j] = _sub_id; - j++; - } - } - - //preID exists - else { - _values_len = _len / sizeof(unsigned) + 1; - _values = new unsigned[_values_len]; - memcpy(_values, _tmp, sizeof(unsigned) * _tmp[3 + 2 * _position]); - _values[0]++; - for (unsigned i = _position + 1; i < _tmp[1]; i++) { - _values[3 + 2 * i]++; - } - unsigned i, j; - unsigned right; - if (_position == _tmp[1] - 1) { - right = 2 + 2 * _tmp[1] + _tmp[0]; - } - else { - right = _tmp[5 + 2 * _position]; - } - for (i = _tmp[3 + 2 * _position], j = _tmp[3 + 2 * _position]; i < right && _tmp[i] < _sub_id; i++, j++) { - _values[j] = _tmp[i]; - } - _values[j] = _sub_id; - j++; - for (; i < 2 + 2 * _tmp[1] + _tmp[0]; i++, j++) { - _values[j] = _tmp[i]; - } - } - - this->setValueByKey(this->objID2values, _obj_id, (char*)_values, sizeof(unsigned) * _values_len); - delete[] _values; - } - - return true; -} - -bool -KVstore::updateRemove_o2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id) -{ - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->objID2values, _obj_id, (char*&)_tmp, _len); - - if (!_get) { - return false; - } - - //objID doesn't exist after removal - if (_tmp[0] == 1) { - this->removeKey(this->objID2values, _obj_id); - } - - //objID still exists after removal - else { - unsigned* _values; - unsigned _values_len; - unsigned _position = KVstore::binarySearch(_pre_id, _tmp + 2, _tmp[1], 2); - unsigned _sidlen_op; - if (_position == _tmp[1] - 1) { - _sidlen_op = 2 + 2 * _tmp[1] + _tmp[0] - _tmp[3 + 2 * _position]; - } - else { - _sidlen_op = _tmp[5 + 2 * _position] - _tmp[3 + 2 * _position]; - } - - //preID doesn't exist after removal - if (_sidlen_op == 1) { - _values_len = _len / sizeof(unsigned) - 3; - _values = new unsigned[_values_len]; - memcpy(_values, _tmp, sizeof(unsigned) * (2 + 2 * _position)); - _values[0]--; - _values[1]--; - for (unsigned i = 0; i < _position; i++) { - _values[3 + 2 * i] -= 2; - } - unsigned i, j; - for (i = 4 + 2 * _position, j = 2 + 2 * _position; i < 2 + 2 * _tmp[1]; i += 2, j += 2) { - _values[j] = _tmp[i]; - _values[j + 1] = _tmp[i + 1] - 3; - } - for (; i < 2 + 2 * _tmp[1] + _tmp[0]; i++, j++) { - if (i == _tmp[3 + 2 * _position]) { - j--; - continue; - } - _values[j] = _tmp[i]; - } - } - - //preID still exists after removal - else { - _values_len = _len / sizeof(unsigned) - 1; - _values = new unsigned[_values_len]; - memcpy(_values, _tmp, sizeof(unsigned) * _tmp[3 + 2 * _position]); - _values[0]--; - for (unsigned i = _position + 1; i < _tmp[1]; i++) { - _values[3 + 2 * i]--; - } - unsigned i, j; - for (i = _tmp[3 + 2 * _position], j = _tmp[3 + 2 * _position]; - i < 2 + 2 * _tmp[1] + _tmp[0] && _tmp[i] < _sub_id; i++, j++) { - _values[j] = _tmp[i]; - } - i++; - for (; i < 2 + 2 * _tmp[1] + _tmp[0]; i++, j++) { - _values[j] = _tmp[i]; - } - } - - this->setValueByKey(this->objID2values, _obj_id, (char*)_values, sizeof(unsigned) * _values_len); - delete[] _values; - } - - return true; -} - -//TODO: TO BE IMPROVED -bool -KVstore::updateInsert_o2values(TYPE_ENTITY_LITERAL_ID _objid, const std::vector& _pidsidlist) -{ - vector::const_iterator iter = _pidsidlist.begin(); - while (iter < _pidsidlist.end()) { - TYPE_PREDICATE_ID _preid = *iter; - iter++; - TYPE_ENTITY_LITERAL_ID _subid = *iter; - iter++; - this->updateInsert_o2values(_subid, _preid, _objid); - } - return true; -} - -//TODO: TO BE IMPROVED -bool -KVstore::updateRemove_o2values(TYPE_ENTITY_LITERAL_ID _objid, const std::vector& _pidsidlist) -{ - vector::const_iterator iter = _pidsidlist.begin(); - while (iter < _pidsidlist.end()) { - TYPE_PREDICATE_ID _preid = *iter; - iter++; - TYPE_ENTITY_LITERAL_ID _subid = *iter; - iter++; - this->updateRemove_o2values(_subid, _preid, _objid); - } - return true; -} - -bool -KVstore::updateInsert_p2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id) -{ - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->preID2values, _pre_id, (char*&)_tmp, _len); - - //preid doesn't exist - if (!_get) { - unsigned _values[3]; - _values[0] = 1; - _values[1] = _sub_id; - _values[2] = _obj_id; - this->addValueByKey(this->preID2values, _pre_id, (char*)_values, sizeof(unsigned) * 3); - } - - //preid exists - else { - unsigned _values_len = _len / sizeof(unsigned) + 2; - unsigned* _values = new unsigned[_values_len]; - unsigned i, j; - _values[0] = _tmp[0] + 1; - for (i = 1, j = 1; - i < 1 + _tmp[0] && (_tmp[i] < _sub_id || (_tmp[i] == _sub_id && _tmp[i + _tmp[0]] < _obj_id)); - i++, j++) { - _values[j] = _tmp[i]; - _values[j + _tmp[0] + 1] = _tmp[i + _tmp[0]]; - } - _values[j] = _sub_id; - _values[j + _tmp[0] + 1] = _obj_id; - j++; - for (; i < 1 + _tmp[0]; i++, j++) { - _values[j] = _tmp[i]; - _values[j + _tmp[0] + 1] = _tmp[i + _tmp[0]]; - } - this->setValueByKey(this->preID2values, _pre_id, (char*)_values, sizeof(unsigned) * _values_len); - delete[] _values; - } - - return true; -} - -bool -KVstore::updateRemove_p2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id) -{ - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->preID2values, _pre_id, (char*&)_tmp, _len); - - if (!_get) { - return false; - } - - //preid doesn't exist after removal - if (_tmp[0] == 1) { - this->removeKey(this->preID2values, _pre_id); - } - - //preid still exists after removal - else { - unsigned _values_len = _len / sizeof(unsigned) - 2; - unsigned* _values = new unsigned[_values_len]; - unsigned i, j; - _values[0] = _tmp[0] - 1; - for (i = 1, j = 1; - i < 1 + _tmp[0] && (_tmp[i] < _sub_id || (_tmp[i] == _sub_id && _tmp[i + _tmp[0]] < _obj_id)); - i++, j++) { - _values[j] = _tmp[i]; - _values[j + _tmp[0] - 1] = _tmp[i + _tmp[0]]; - } - i++; - for (; i < 1 + _tmp[0]; i++, j++) { - _values[j] = _tmp[i]; - _values[j + _tmp[0] - 1] = _tmp[i + _tmp[0]]; - } - this->setValueByKey(this->preID2values, _pre_id, (char*)_values, sizeof(unsigned) * _values_len); - delete[] _values; - } - - return true; -} - -//TODO: TO BE IMPROVED -bool -KVstore::updateInsert_p2values(TYPE_PREDICATE_ID _preid, const std::vector& _sidoidlist) -{ - vector::const_iterator iter = _sidoidlist.begin(); - while (iter < _sidoidlist.end()) { - TYPE_ENTITY_LITERAL_ID _subid = *iter; - iter++; - TYPE_ENTITY_LITERAL_ID _objid = *iter; - iter++; - this->updateInsert_p2values(_subid, _preid, _objid); - } - return true; -} - -//TODO: TO BE IMPROVED -bool -KVstore::updateRemove_p2values(TYPE_PREDICATE_ID _preid, const std::vector& _sidoidlist) -{ - vector::const_iterator iter = _sidoidlist.begin(); - while (iter < _sidoidlist.end()) { - TYPE_ENTITY_LITERAL_ID _subid = *iter; - iter++; - TYPE_ENTITY_LITERAL_ID _objid = *iter; - iter++; - this->updateRemove_p2values(_subid, _preid, _objid); - } - return true; -} - -//for entity2id -//_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE -bool KVstore::open_entity2id(int _mode) { - unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { - buffer_size = Util::MAX_BUFFER_SIZE * buffer_entity2id_build; - } - else if (_mode == KVstore::READ_WRITE_MODE) { - buffer_size = Util::MAX_BUFFER_SIZE * buffer_entity2id_query; - } - else { - cerr << "Invalid open mode in open_entity2id, mode = " << _mode << endl; - return false; - } - return this->open(this->entity2id, KVstore::s_entity2id, _mode, buffer_size); -} - -bool KVstore::close_entity2id() { - if (this->entity2id == NULL) { - return true; - } - this->entity2id->save(); - delete this->entity2id; - this->entity2id = NULL; - return true; -} - -bool KVstore::subIDByEntity(string _entity) { - return this->entity2id->remove(_entity.c_str(), _entity.length()); -} - -TYPE_ENTITY_LITERAL_ID -KVstore::getIDByEntity(string _entity) const -{ - return this->getIDByStr(this->entity2id, _entity.c_str(), _entity.length()); -} - -bool -KVstore::setIDByEntity(string _entity, TYPE_ENTITY_LITERAL_ID _id) -{ - return this->addValueByKey(this->entity2id, _entity.c_str(), _entity.length(), _id); -} - -//for id2entity -//_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE -bool KVstore::open_id2entity(int _mode) { - unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { - buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2entity_build; - } - else if (_mode == KVstore::READ_WRITE_MODE) { - buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2entity_query; - } - else { - cerr << "Invalid open mode in open_id2entity, mode = " << _mode << endl; - return false; - } - return this->open(this->id2entity, KVstore::s_id2entity, _mode, buffer_size); -} - -bool KVstore::close_id2entity() { - if (this->id2entity == NULL) { - return true; - } - this->id2entity->save(); - delete this->id2entity; - this->id2entity = NULL; - return true; -} - -bool -KVstore::subEntityByID(TYPE_ENTITY_LITERAL_ID _id) -{ - return this->id2entity->remove(_id); -} - -string -KVstore::getEntityByID(TYPE_ENTITY_LITERAL_ID _id) const -{ - char* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->id2entity, _id, _tmp, _len); - if (!_get) { - return ""; - } - string _ret = string(_tmp); - return _ret; -} - -bool -KVstore::setEntityByID(TYPE_ENTITY_LITERAL_ID _id, string _entity) -{ - return this->addValueByKey(this->id2entity, _id, _entity.c_str(), _entity.length()); -} - -//for predicate2id -//_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE -bool KVstore::open_predicate2id(int _mode) { - unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { - buffer_size = Util::MAX_BUFFER_SIZE * buffer_predicate2id_build; - } - else if (_mode == KVstore::READ_WRITE_MODE) { - buffer_size = Util::MAX_BUFFER_SIZE * buffer_predicate2id_query; - } - else { - cerr << "Invalid open mode in open_predicate2id, mode = " << _mode << endl; - return false; - } - return this->open(this->predicate2id, KVstore::s_predicate2id, _mode, buffer_size); -} - -bool KVstore::close_predicate2id() { - if (this->predicate2id == NULL) { - return true; - } - this->predicate2id->save(); - delete this->predicate2id; - this->predicate2id = NULL; - return true; -} - -bool KVstore::subIDByPredicate(string _predicate) { - return this->predicate2id->remove(_predicate.c_str(), _predicate.length()); -} - -TYPE_PREDICATE_ID -KVstore::getIDByPredicate(string _predicate) const -{ - return this->getIDByStr(this->predicate2id, _predicate.c_str(), _predicate.length()); -} - -bool -KVstore::setIDByPredicate(string _predicate, TYPE_PREDICATE_ID _id) -{ - return this->addValueByKey(this->predicate2id, _predicate.c_str(), _predicate.length(), _id); -} - -//for id2predicate -//_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE -bool KVstore::open_id2predicate(int _mode) { - unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { - buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2predicate_build; - } - else if (_mode == KVstore::READ_WRITE_MODE) { - buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2predicate_query; - } - else { - cerr << "Invalid open mode in open_id2predicate, mode = " << _mode << endl; - return false; - } - return this->open(this->id2predicate, KVstore::s_id2predicate, _mode, buffer_size); -} - -bool KVstore::close_id2predicate() { - if (this->id2predicate == NULL) { - return true; - } - this->id2predicate->save(); - delete this->id2predicate; - this->id2predicate = NULL; - return true; -} - -bool -KVstore::subPredicateByID(TYPE_PREDICATE_ID _id) -{ - return this->id2predicate->remove(_id); -} - -string -KVstore::getPredicateByID(TYPE_PREDICATE_ID _id) const -{ - char* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->id2predicate, _id, _tmp, _len); - if (!_get) { - return ""; - } - string _ret = string(_tmp); - return _ret; -} - -bool -KVstore::setPredicateByID(TYPE_PREDICATE_ID _id, string _predicate) -{ - return this->addValueByKey(this->id2predicate, _id, _predicate.c_str(), _predicate.length()); -} - -//for literal2id -//_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE -bool KVstore::open_literal2id(int _mode) { - unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { - buffer_size = Util::MAX_BUFFER_SIZE * buffer_literal2id_build; - } - else if (_mode == KVstore::READ_WRITE_MODE) { - buffer_size = Util::MAX_BUFFER_SIZE * buffer_literal2id_query; - } - else { - cerr << "Invalid open mode in open_literal2id, mode = " << _mode << endl; - return false; - } - return this->open(this->literal2id, KVstore::s_literal2id, _mode, buffer_size); -} - -bool KVstore::close_literal2id() { - if (this->literal2id == NULL) { - return true; - } - this->literal2id->save(); - delete this->literal2id; - this->literal2id = NULL; - return true; -} - -bool KVstore::subIDByLiteral(string _literal) { - return this->literal2id->remove(_literal.c_str(), _literal.length()); -} - -TYPE_ENTITY_LITERAL_ID -KVstore::getIDByLiteral(string _literal) const -{ - return this->getIDByStr(this->literal2id, _literal.c_str(), _literal.length()); -} - -bool -KVstore::setIDByLiteral(string _literal, TYPE_ENTITY_LITERAL_ID _id) -{ - return this->addValueByKey(this->literal2id, _literal.c_str(), _literal.length(), _id); -} - -//for id2literal -//_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE -bool KVstore::open_id2literal(int _mode) { - unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { - buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2literal_build; - } - else if (_mode == KVstore::READ_WRITE_MODE) { - buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2literal_query; - } - else { - cerr << "Invalid open mode in open_id2literal, mode = " << _mode << endl; - return false; - } - return this->open(this->id2literal, KVstore::s_id2literal, _mode, buffer_size); -} - -bool KVstore::close_id2literal() { - if (this->id2literal == NULL) { - return true; - } - this->id2literal->save(); - delete this->id2literal; - this->id2literal = NULL; - return true; -} - -bool -KVstore::subLiteralByID(TYPE_ENTITY_LITERAL_ID _id) -{ - return this->id2literal->remove(_id); -} - -string -KVstore::getLiteralByID(TYPE_ENTITY_LITERAL_ID _id) const -{ - char* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->id2literal, _id, _tmp, _len); - if (!_get) { - //NOTICE:here assumes that all literals cannot be empty: "" - return ""; - } - string _ret = string(_tmp); - return _ret; -} - -bool -KVstore::setLiteralByID(TYPE_ENTITY_LITERAL_ID _id, string _literal) -{ - return this->addValueByKey(this->id2literal, _id, _literal.c_str(), _literal.length()); -} - -bool -KVstore::open_subID2values(int _mode) -{ - unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { - buffer_size = Util::MAX_BUFFER_SIZE * buffer_sID2values_build; - } - else if (_mode == KVstore::READ_WRITE_MODE) { - buffer_size = Util::MAX_BUFFER_SIZE * buffer_sID2values_query; - } - else { - cerr << "Invalid open mode in open_subID2values, mode = " << _mode << endl; - return false; - } - return this->open(this->subID2values, KVstore::s_sID2values, _mode, buffer_size); -} - -bool KVstore::close_subID2values() { - if (this->subID2values == NULL) { - return true; - } - this->subID2values->save(); - delete this->subID2values; - this->subID2values = NULL; - return true; -} - -bool -KVstore::build_subID2values(ID_TUPLE* _p_id_tuples, TYPE_TRIPLE_NUM _triples_num) -{ - cout << "Begin building subID2values..." << endl; - //qsort(_p_id_tuples, _triples_num, sizeof(int*), Util::_spo_cmp); - vector _oidlist_s; - vector _pidoffsetlist_s; - unsigned _entity_num = 0; - - //true means the next sub is a different one from the current one - bool _sub_change = true; - //true means the next is different from the current pair - bool _sub_pre_change = true; - //true means the next pre is different from the current one - bool _pre_change = true; - - this->open_subID2values(KVstore::CREATE_MODE); - - //NOTICE: i*3 + j maybe break the unsigned limit - //for (unsigned long i = 0; i < _triples_num; i++) - for (TYPE_TRIPLE_NUM i = 0; i < _triples_num; i++) - { - if (i + 1 == _triples_num || _p_id_tuples[i].subid != _p_id_tuples[i+1].subid - || _p_id_tuples[i].preid != _p_id_tuples[i+1].preid || _p_id_tuples[i].objid != _p_id_tuples[i+1].objid) - { - if (_sub_change) - { - _pidoffsetlist_s.clear(); - _oidlist_s.clear(); - _entity_num = 0; - } - - TYPE_ENTITY_LITERAL_ID _sub_id = _p_id_tuples[i].subid; - TYPE_PREDICATE_ID _pre_id = _p_id_tuples[i].preid; - TYPE_ENTITY_LITERAL_ID _obj_id = _p_id_tuples[i].objid; - - if (_sub_pre_change) - { - _pidoffsetlist_s.push_back(_pre_id); - _pidoffsetlist_s.push_back(_oidlist_s.size()); - } - - _oidlist_s.push_back(_obj_id); - if (KVstore::isEntity(_obj_id)) { - _entity_num++; - } - - _sub_change = (i + 1 == _triples_num) || (_p_id_tuples[i].subid != _p_id_tuples[i+1].subid); - _pre_change = (i + 1 == _triples_num) || (_p_id_tuples[i].preid != _p_id_tuples[i+1].preid); - _sub_pre_change = _sub_change || _pre_change; - - if (_sub_change) { - for (unsigned j = 1; j < _pidoffsetlist_s.size(); j += 2) { - _pidoffsetlist_s[j] += 3 + _pidoffsetlist_s.size(); - } - unsigned* _entrylist_s = new unsigned[3 + _pidoffsetlist_s.size() + _oidlist_s.size()]; - //triples number - _entrylist_s[0] = _oidlist_s.size(); - //pre number - _entrylist_s[1] = _pidoffsetlist_s.size() / 2; - //entity number - _entrylist_s[2] = _entity_num; - unsigned j, k; - //pidoffsetlist - for (j = 3, k = 0; k < _pidoffsetlist_s.size(); j++, k++) { - _entrylist_s[j] = _pidoffsetlist_s[k]; - } - //unsorted oidlist - for (k = 0; k < _oidlist_s.size(); j++, k++) { - _entrylist_s[j] = _oidlist_s[k]; - } - - this->addValueByKey(this->subID2values, _sub_id, (char*)_entrylist_s, sizeof(unsigned) * j); - delete[] _entrylist_s; - } - } - } - - this->close_subID2values(); - cout << "Finished building subID2values" << endl; - return true; -} - -bool -KVstore::getpreIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _preidlist, unsigned& _list_len, bool _no_duplicate) const -{ - //cout << "In getpreIDlistBysubID " << _subid << endl; - if (!isEntity(_subid)) { - _preidlist = NULL; - _list_len = 0; - return false; - } - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); - if (!_get) { - _preidlist = NULL; - _list_len = 0; - return false; - } - _list_len = _tmp[1]; - _preidlist = new unsigned[_list_len]; - for (unsigned i = 0; i < _list_len; i++) { - _preidlist[i] = _tmp[2 * i + 3]; - } - return true; -} - -bool -KVstore::getobjIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _objidlist, unsigned& _list_len, bool _no_duplicate) const -{ - //cout << "In getobjIDlistBysubID " << _subid << endl; - if (!isEntity(_subid)) { - _objidlist = NULL; - _list_len = 0; - return false; - } - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); - if (!_get) { - _objidlist = NULL; - _list_len = 0; - return false; - } - _list_len = _tmp[0]; - _objidlist = new unsigned[_list_len]; - memcpy(_objidlist, _tmp + 3 + 2 * _tmp[1], sizeof(unsigned) * _list_len); - Util::sort(_objidlist, _list_len); - if (_no_duplicate) { - _list_len = Util::removeDuplicate(_objidlist, _list_len); - } - return true; -} - -bool -KVstore::getobjIDlistBysubIDpreID(TYPE_ENTITY_LITERAL_ID _subid, TYPE_PREDICATE_ID _preid, unsigned*& _objidlist, unsigned& _list_len, bool _no_duplicate) const -{ - //cout << "In getobjIDlistBysubIDpreID " << _subid << ' ' << _preid << endl; - if (!isEntity(_subid)) { - _objidlist = NULL; - _list_len = 0; - return false; - } - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); - if (!_get) { - _objidlist = NULL; - _list_len = 0; - return false; - } - unsigned _result = KVstore::binarySearch(_preid, _tmp + 3, _tmp[1], 2); - //if (_result == -1) - if (_result == INVALID) - { - _objidlist = NULL; - _list_len = 0; - return false; - } - unsigned _offset = _tmp[4 + 2 * _result]; - unsigned _offset_next; - if (_result == _tmp[1] - 1) { - _offset_next = 3 + 2 * _tmp[1] + _tmp[0]; - } - else { - _offset_next = _tmp[6 + 2 * _result]; - } - _list_len = _offset_next - _offset; - _objidlist = new unsigned[_list_len]; - memcpy(_objidlist, _tmp + _offset, sizeof(unsigned) * _list_len); - - return true; -} - -bool -KVstore::getpreIDobjIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _preid_objidlist, unsigned& _list_len, bool _no_duplicate) const -{ - //cout << "In getpreIDobjIDlistBysubID " << _subid << endl; - if (!isEntity(_subid)) { - _preid_objidlist = NULL; - _list_len = 0; - return false; - } - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); - if (!_get) { - _preid_objidlist = NULL; - _list_len = 0; - return false; - } - _list_len = 2 * _tmp[0]; - _preid_objidlist = new unsigned[_list_len]; - unsigned _offset_next; - unsigned j = 0; - for (unsigned i = 0; i < _tmp[1]; i++) { - if (i == _tmp[1] - 1) { - _offset_next = 3 + 2 * _tmp[1] + _tmp[0]; - } - else { - _offset_next = _tmp[6 + 2 * i]; - } - for (; 3 + 2 * _tmp[1] + j < _offset_next; j++) { - _preid_objidlist[2 * j] = _tmp[3 + 2 * i]; - _preid_objidlist[2 * j + 1] = _tmp[3 + 2 * _tmp[1] + j]; - } - } - - return true; -} - -bool KVstore::open_objID2values(int _mode) { - unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { - buffer_size = Util::MAX_BUFFER_SIZE * buffer_oID2values_build; - } - else if (_mode == KVstore::READ_WRITE_MODE) { - buffer_size = Util::MAX_BUFFER_SIZE * buffer_oID2values_query; - } - else { - cerr << "Invalid open mode in open_objID2values, mode = " << _mode << endl; - return false; - } - return this->open(this->objID2values, KVstore::s_oID2values, _mode, buffer_size); -} - -bool KVstore::close_objID2values() { - if (this->objID2values == NULL) { - return true; - } - this->objID2values->save(); - delete this->objID2values; - this->objID2values = NULL; - return true; -} - -bool -KVstore::build_objID2values(ID_TUPLE* _p_id_tuples, TYPE_TRIPLE_NUM _triples_num) -{ - cout << "Begin building objID2values..." << endl; - //qsort(_p_id_tuples, _triples_num, sizeof(int*), Util::_ops_cmp); - vector _sidlist_o; - vector _pidoffsetlist_o; - - //true means the next obj is a different one from the current one - bool _obj_change = true; - //true means the next is different from the current pair - bool _obj_pre_change = true; - //true means the next pre is different from the current one - bool _pre_change = true; - - this->open_objID2values(KVstore::CREATE_MODE); - - //for (unsigned long i = 0; i < _triples_num; i++) - for (TYPE_TRIPLE_NUM i = 0; i < _triples_num; i++) - { - if (i + 1 == _triples_num || _p_id_tuples[i].subid != _p_id_tuples[i+1].subid - || _p_id_tuples[i].preid != _p_id_tuples[i+1].preid || _p_id_tuples[i].objid != _p_id_tuples[i+1].objid) { - if (_obj_change) { - _pidoffsetlist_o.clear(); - _sidlist_o.clear(); - } - - TYPE_ENTITY_LITERAL_ID _sub_id = _p_id_tuples[i].subid; - TYPE_PREDICATE_ID _pre_id = _p_id_tuples[i].preid; - TYPE_ENTITY_LITERAL_ID _obj_id = _p_id_tuples[i].objid; - - if (_obj_pre_change) { - _pidoffsetlist_o.push_back(_pre_id); - _pidoffsetlist_o.push_back(_sidlist_o.size()); - } - - _sidlist_o.push_back(_sub_id); - - _obj_change = (i + 1 == _triples_num) || (_p_id_tuples[i].objid != _p_id_tuples[i+1].objid); - _pre_change = (i + 1 == _triples_num) || (_p_id_tuples[i].preid != _p_id_tuples[i+1].preid); - _obj_pre_change = _obj_change || _pre_change; - - if (_obj_change) { - for (unsigned j = 1; j < _pidoffsetlist_o.size(); j += 2) { - _pidoffsetlist_o[j] += 2 + _pidoffsetlist_o.size(); - } - unsigned* _entrylist_o = new unsigned[2 + _pidoffsetlist_o.size() + _sidlist_o.size()]; - //triples number - _entrylist_o[0] = _sidlist_o.size(); - //pre number - _entrylist_o[1] = _pidoffsetlist_o.size() / 2; - unsigned j, k; - //pidoffsetlist - for (j = 2, k = 0; k < _pidoffsetlist_o.size(); j++, k++) { - _entrylist_o[j] = _pidoffsetlist_o[k]; - } - //unsorted sidlist - for (k = 0; k < _sidlist_o.size(); j++, k++) { - _entrylist_o[j] = _sidlist_o[k]; - } - this->addValueByKey(this->objID2values, _obj_id, (char*)_entrylist_o, sizeof(unsigned) * j); - delete[] _entrylist_o; - } - } - } - - this->close_objID2values(); - cout << "Finished building objID2values" << endl; - return true; -} - -bool -KVstore::getpreIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _preidlist, unsigned& _list_len, bool _no_duplicate) const -{ - //cout << "In getpreIDlistByobjID " << _objid << endl; - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); - if (!_get) { - _preidlist = NULL; - _list_len = 0; - return false; - } - _list_len = _tmp[1]; - _preidlist = new unsigned[_list_len]; - for (unsigned i = 0; i < _list_len; i++) { - _preidlist[i] = _tmp[2 * i + 2]; - } - - return true; -} - -bool -KVstore::getsubIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _subidlist, unsigned& _list_len, bool _no_duplicate) const -{ - //cout << "In getsubIDlistByobjID " << _objid << endl; - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); - if (!_get) { - _subidlist = NULL; - _list_len = 0; - return false; - } - _list_len = _tmp[0]; - _subidlist = new unsigned[_list_len]; - memcpy(_subidlist, _tmp + 2 + 2 * _tmp[1], sizeof(unsigned) * _list_len); - Util::sort(_subidlist, _list_len); - if (_no_duplicate) { - _list_len = Util::removeDuplicate(_subidlist, _list_len); - } - - return true; -} - -bool -KVstore::getsubIDlistByobjIDpreID(TYPE_ENTITY_LITERAL_ID _objid, TYPE_PREDICATE_ID _preid, unsigned*& _subidlist, unsigned& _list_len, bool _no_duplicate) const -{ - //cout << "In getsubIDlistByobjIDpreID " << _objid << ' ' << _preid << endl; - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); - if (!_get) { - _subidlist = NULL; - _list_len = 0; - return false; - } - unsigned _result = KVstore::binarySearch(_preid, _tmp + 2, _tmp[1], 2); - //if (_result == -1) - if (_result == INVALID) - { - _subidlist = NULL; - _list_len = 0; - return false; - } - unsigned _offset = _tmp[3 + 2 * _result]; - unsigned _offset_next; - if (_result == _tmp[1] - 1) { - _offset_next = 2 + 2 * _tmp[1] + _tmp[0]; - } - else { - _offset_next = _tmp[5 + 2 * _result]; - } - _list_len = _offset_next - _offset; - _subidlist = new unsigned[_list_len]; - memcpy(_subidlist, _tmp + _offset, sizeof(unsigned) * _list_len); - - return true; -} - -bool -KVstore::getpreIDsubIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _preid_subidlist, unsigned& _list_len, bool _no_duplicate) const -{ - //cout << "In getpreIDsubIDlistByobjID " << _objid << endl; - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); - if (!_get) { - _preid_subidlist = NULL; - _list_len = 0; - return false; - } - _list_len = 2 * _tmp[0]; - _preid_subidlist = new unsigned[_list_len]; - unsigned _offset_next; - unsigned j = 0; - for (unsigned i = 0; i < _tmp[1]; i++) { - if (i == _tmp[1] - 1) { - _offset_next = 2 + 2 * _tmp[1] + _tmp[0]; - } - else { - _offset_next = _tmp[5 + 2 * i]; - } - for (; 2 + 2 * _tmp[1] + j < _offset_next; j++) { - _preid_subidlist[2 * j] = _tmp[2 + 2 * i]; - _preid_subidlist[2 * j + 1] = _tmp[2 + 2 * _tmp[1] + j]; - } - } - - return true; -} - -bool KVstore::open_preID2values(int _mode) { - unsigned long long buffer_size; - if (_mode == KVstore::CREATE_MODE) { - buffer_size = Util::MAX_BUFFER_SIZE * buffer_pID2values_build; - } - else if (_mode == KVstore::READ_WRITE_MODE) { - buffer_size = Util::MAX_BUFFER_SIZE * buffer_pID2values_query; - } - else { - cerr << "Invalid open mode in open_preID2values, mode = " << _mode << endl; - return false; - } - return this->open(this->preID2values, KVstore::s_pID2values, _mode, buffer_size); -} - -bool KVstore::close_preID2values() { - if (this->preID2values == NULL) { - return true; - } - this->preID2values->save(); - delete this->preID2values; - this->preID2values = NULL; - return true; -} - -bool -KVstore::build_preID2values(ID_TUPLE* _p_id_tuples, TYPE_TRIPLE_NUM _triples_num) -{ - cout << "Begin building preID2values..." << endl; - //qsort(_p_id_tuples, _triples_num, sizeof(int*), Util::_pso_cmp); - vector _sidlist_p; - vector _oidlist_p; - - //true means the next pre is different from the current one - bool _pre_change = true; - - this->open_preID2values(KVstore::CREATE_MODE); - - //for (unsigned long i = 0; i < _triples_num; i++) - for (TYPE_TRIPLE_NUM i = 0; i < _triples_num; i++) - { - if (i + 1 == _triples_num || _p_id_tuples[i].subid != _p_id_tuples[i+1].subid - || _p_id_tuples[i].preid != _p_id_tuples[i+1].preid || _p_id_tuples[i].objid != _p_id_tuples[i+1].objid) { - if (_pre_change) { - _sidlist_p.clear(); - _oidlist_p.clear(); - } - - TYPE_ENTITY_LITERAL_ID _sub_id = _p_id_tuples[i].subid; - TYPE_PREDICATE_ID _pre_id = _p_id_tuples[i].preid; - TYPE_ENTITY_LITERAL_ID _obj_id = _p_id_tuples[i].objid; - - _sidlist_p.push_back(_sub_id); - _oidlist_p.push_back(_obj_id); - - _pre_change = (i + 1 == _triples_num) || (_p_id_tuples[i].preid != _p_id_tuples[i+1].preid); - - if (_pre_change) { - unsigned* _entrylist_p = new unsigned[1 + _sidlist_p.size() * 2]; - //triples number - _entrylist_p[0] = _sidlist_p.size(); - unsigned j, k; - //sidlist - for (j = 1, k = 0; k < _sidlist_p.size(); j++, k++) { - _entrylist_p[j] = _sidlist_p[k]; - } - //unsorted oidlist - for (k = 0; k < _oidlist_p.size(); j++, k++) { - _entrylist_p[j] = _oidlist_p[k]; - } - this->addValueByKey(this->preID2values, _pre_id, (char*)_entrylist_p, sizeof(unsigned) * j); - delete[] _entrylist_p; - } - } - } - - this->close_preID2values(); - cout << "Finished building preID2values" << endl; - return true; -} - -bool -KVstore::getsubIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _subidlist, unsigned& _list_len, bool _no_duplicate) const -{ - //cout << "In getsubIDlistBypreID " << _preid << endl; - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->preID2values, _preid, (char*&)_tmp, _len); - if (!_get) { - _subidlist = NULL; - _list_len = 0; - return false; - } - _list_len = _tmp[0]; - _subidlist = new unsigned[_list_len]; - memcpy(_subidlist, _tmp + 1, sizeof(unsigned) * _list_len); - if (_no_duplicate) { - _list_len = Util::removeDuplicate(_subidlist, _list_len); - } - - return true; -} - -bool -KVstore::getobjIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _objidlist, unsigned& _list_len, bool _no_duplicate) const { - //cout << "In getobjIDlistBypreID " << _preid << endl; - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->preID2values, _preid, (char*&)_tmp, _len); - if (!_get) { - _objidlist = NULL; - _list_len = 0; - return false; - } - _list_len = _tmp[0]; - _objidlist = new unsigned[_list_len]; - memcpy(_objidlist, _tmp + 1 + _tmp[0], sizeof(unsigned) * _list_len); - Util::sort(_objidlist, _list_len); - if (_no_duplicate) { - _list_len = Util::removeDuplicate(_objidlist, _list_len); - } - - return true; -} - -bool -KVstore::getsubIDobjIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _subid_objidlist, unsigned& _list_len, bool _no_duplicate) const -{ - //cout << "In getsubIDobjIDlistBypreID " << _preid << endl; - unsigned* _tmp = NULL; - unsigned _len = 0; - bool _get = this->getValueByKey(this->preID2values, _preid, (char*&)_tmp, _len); - if (!_get) { - _subid_objidlist = NULL; - _list_len = 0; - return false; - } - _list_len = _tmp[0] * 2; - _subid_objidlist = new unsigned[_list_len]; - for (unsigned i = 0; i < _tmp[0]; i++) { - _subid_objidlist[2 * i] = _tmp[1 + i]; - _subid_objidlist[2 * i + 1] = _tmp[1 + _tmp[0] + i]; - } - - return true; -} - -bool -KVstore::getpreIDlistBysubIDobjID(TYPE_ENTITY_LITERAL_ID _subid, TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _preidlist, unsigned& _list_len, bool _no_duplicate) const -{ - //cout << "In getpreIDlistBysubIDobjID " << _subid << ' ' << _objid << endl; - unsigned *list1 = NULL, *list2 = NULL; - unsigned len1 = 0, len2 = 0; - if (!this->getpreIDlistBysubID(_subid, list1, len1, true)) { - _preidlist = NULL; - _list_len = 0; - return false; - } - if (!this->getpreIDlistByobjID(_objid, list2, len2, true)) { - _preidlist = NULL; - _list_len = 0; - return false; - } - vector list = KVstore::intersect(list1, list2, len1, len2); - delete[] list1; - delete[] list2; - unsigned len = list.size(); - if (len == 0) { - _preidlist = NULL; - _list_len = 0; - return false; - } - unsigned* _tmp = NULL; - unsigned _len = 0; - this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); - _list_len = len; - unsigned _result = 0; - for (unsigned i = 0; i < len; i++) { - TYPE_PREDICATE_ID _preid = list[i]; - for (; _result < _tmp[1]; _result++) { - if (_tmp[3 + 2 * _result] == _preid) { - break; - } - } - if (_result == _tmp[1]) { - for (unsigned j = i; j < len; j++) { - list[j] = -1; - _list_len--; - } - break; - } - unsigned _offset = _tmp[4 + 2 * _result]; - unsigned _offset_next; - if (_result == _tmp[1] - 1) { - _offset_next = 3 + 2 * _tmp[1] + _tmp[0]; - } - else { - _offset_next = _tmp[6 + 2 * _result]; - } - if (KVstore::binarySearch(_objid, _tmp + _offset, _offset_next - _offset) == -1) { - list[i] = -1; - _list_len--; - } - } - if (_list_len == 0) { - _preidlist = NULL; - return false; - } - _preidlist = new unsigned[_list_len]; - unsigned i = 0, j = 0; - while (i < len) { - if (list[i] != -1) { - _preidlist[j] = list[i]; - i++; - j++; - } - else { - i++; - } - } - - return true; -} - - -bool -KVstore::open(SITree*& _p_btree, string _tree_name, int _mode, unsigned long long _buffer_size) -{ - if (_p_btree != NULL) { - return false; - } - string smode; - if (_mode == KVstore::CREATE_MODE) { - smode = "build"; - } - else if (_mode == KVstore::READ_WRITE_MODE) { - smode = "open"; - } - else { - cerr << "Invalid open mode of: " << _tree_name << " mode = " << _mode << endl; - return false; - } - _p_btree = new SITree(this->store_path, _tree_name, smode, _buffer_size); - return true; -} - -bool KVstore::open(ISTree*& _p_btree, string _tree_name, int _mode, unsigned long long _buffer_size) { - if (_p_btree != NULL) { - return false; - } - string smode; - if (_mode == KVstore::CREATE_MODE) { - smode = "build"; - } - else if (_mode == KVstore::READ_WRITE_MODE) { - smode = "open"; - } - else { - cerr << "Invalid open mode of: " << _tree_name << " mode = " << _mode << endl; - return false; - } - _p_btree = new ISTree(this->store_path, _tree_name, smode, _buffer_size); - return true; -} - -void KVstore::flush(SITree* _p_btree) { - if (_p_btree != NULL) { - _p_btree->save(); - } -} - -void KVstore::flush(ISTree* _p_btree) { - if (_p_btree != NULL) { - _p_btree->save(); - } -} - -bool -KVstore::addValueByKey(SITree* _p_btree, const char* _key, unsigned _klen, unsigned _val) -{ - return _p_btree->insert(_key, _klen, _val); -} - -bool -KVstore::addValueByKey(ISTree* _p_btree, unsigned _key, const char* _val, unsigned _vlen) -{ - return _p_btree->insert(_key, _val, _vlen); -} - -bool -KVstore::setValueByKey(SITree* _p_btree, const char* _key, unsigned _klen, unsigned _val) -{ - return _p_btree->modify(_key, _klen, _val); -} - -bool -KVstore::setValueByKey(ISTree* _p_btree, unsigned _key, const char* _val, unsigned _vlen) -{ - return _p_btree->modify(_key, _val, _vlen); -} - -bool -KVstore::getValueByKey(SITree* _p_btree, const char* _key, unsigned _klen, unsigned* _val) const -{ - return _p_btree->search(_key, _klen, _val); -} - -bool -KVstore::getValueByKey(ISTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const -{ - return _p_btree->search(_key, _val, _vlen); -} - -TYPE_ENTITY_LITERAL_ID -KVstore::getIDByStr(SITree* _p_btree, const char* _key, unsigned _klen) const -{ - unsigned val = 0; - bool ret = _p_btree->search(_key, _klen, &val); - if (!ret) - { - //return -1; - return INVALID; - } - - return val; -} - -bool -KVstore::removeKey(SITree* _p_btree, const char* _key, unsigned _klen) -{ - return _p_btree->remove(_key, _klen); -} - -bool -KVstore::removeKey(ISTree* _p_btree, unsigned _key) -{ - return _p_btree->remove(_key); -} - -vector -KVstore::intersect(const unsigned* _list1, const unsigned* _list2, unsigned _len1, unsigned _len2) -{ - unsigned i = 0, j = 0; - vector ret; - while (i < _len1 && j < _len2) { - if (_list1[i] < _list2[j]) { - i++; - } - else if (_list1[i] > _list2[j]) { - j++; - } - else { - ret.push_back(_list1[i]); - i++; - j++; - } - } - return ret; -} - -unsigned -KVstore::binarySearch(unsigned _key, const unsigned* _list, unsigned _list_len, int _step) -{ - unsigned _left = 0; - unsigned _right = _list_len - 1; - unsigned _mid; - while (_left <= _right) { - _mid = (_right - _left) / 2 + _left; - if (_key == _list[_step * _mid]) { - return _mid; - } - if (_key < _list[_step * _mid]) { - _right = _mid - 1; - } - else { - _left = _mid + 1; - } - } - - //return -1; - return INVALID; -} - -bool -KVstore::isEntity(TYPE_ENTITY_LITERAL_ID id) -{ - return id < Util::LITERAL_FIRST_ID; -} - -//TODO+BETTER: adjust the buffer size according to current memory usage(global memory manager) -string KVstore::s_entity2id = "s_entity2id"; -string KVstore::s_id2entity = "s_id2entity"; -unsigned short KVstore::buffer_entity2id_build = 8; -unsigned short KVstore::buffer_id2entity_build = 8; -unsigned short KVstore::buffer_entity2id_query = 2; -unsigned short KVstore::buffer_id2entity_query = 1; - -string KVstore::s_predicate2id = "s_predicate2id"; -string KVstore::s_id2predicate = "s_id2predicate"; -unsigned short KVstore::buffer_predicate2id_build = 8; -unsigned short KVstore::buffer_id2predicate_build = 8; -unsigned short KVstore::buffer_predicate2id_query = 1; -unsigned short KVstore::buffer_id2predicate_query = 1; - -string KVstore::s_literal2id = "s_literal2id"; -string KVstore::s_id2literal = "s_id2literal"; -unsigned short KVstore::buffer_literal2id_build = 8; -unsigned short KVstore::buffer_id2literal_build = 8; -unsigned short KVstore::buffer_literal2id_query = 2; -unsigned short KVstore::buffer_id2literal_query = 1; - -string KVstore::s_sID2values = "s_sID2values"; -string KVstore::s_oID2values = "s_oID2values"; -string KVstore::s_pID2values = "s_pID2values"; -unsigned short KVstore::buffer_sID2values_build = 32; -unsigned short KVstore::buffer_oID2values_build = 32; -unsigned short KVstore::buffer_pID2values_build = 16; -unsigned short KVstore::buffer_sID2values_query = 16; -unsigned short KVstore::buffer_oID2values_query = 16; -unsigned short KVstore::buffer_pID2values_query = 8; - +/*============================================================================= +# Filename: KVstore.cpp +# Author: Bookug Lobert +# Mail: 1181955272@qq.com +# Last Modified: 2015-10-23 14:25 +# Description: Modified By Wang Libo +=============================================================================*/ + +#include "KVstore.h" + +using namespace std; + +//sets store_path as the root dir of this KVstore +//initial all Tree pointers as NULL +KVstore::KVstore(string _store_path) +{ + this->store_path = _store_path; + + this->entity2id = NULL; + this->id2entity = NULL; + + this->predicate2id = NULL; + this->id2predicate = NULL; + + this->literal2id = NULL; + this->id2literal = NULL; + + this->subID2values = NULL; + this->preID2values = NULL; + this->objID2values = NULL; +} + +//Release all the memory used in this KVstore before destruction +KVstore::~KVstore() +{ + this->flush(); + this->release(); +} + +//Flush all modified parts into the disk, which will not release any memory +//Does nothing to null tree pointers or parts that has not been modified +void +KVstore::flush() +{ + this->flush(this->entity2id); + this->flush(this->id2entity); + + this->flush(this->literal2id); + this->flush(this->id2literal); + + this->flush(this->predicate2id); + this->flush(this->id2predicate); + + this->flush(this->subID2values); + this->flush(this->preID2values); + this->flush(this->objID2values); +} + +void +KVstore::release() +{ + delete this->entity2id; + this->entity2id = NULL; + delete this->id2entity; + this->id2entity = NULL; + + delete this->literal2id; + this->literal2id = NULL; + delete this->id2literal; + this->id2literal = NULL; + + delete this->predicate2id; + this->predicate2id = NULL; + delete this->id2predicate; + this->id2predicate = NULL; + + delete this->subID2values; + this->subID2values = NULL; + delete this->preID2values; + this->preID2values = NULL; + delete this->objID2values; + this->objID2values = NULL; +} + +void +KVstore::open() +{ + cout << "open KVstore" << endl; + + this->open_entity2id(KVstore::READ_WRITE_MODE); + this->open_id2entity(KVstore::READ_WRITE_MODE); + + this->open_literal2id(KVstore::READ_WRITE_MODE); + this->open_id2literal(KVstore::READ_WRITE_MODE); + + this->open_predicate2id(KVstore::READ_WRITE_MODE); + this->open_id2predicate(KVstore::READ_WRITE_MODE); + + this->open_subID2values(KVstore::READ_WRITE_MODE); + this->open_objID2values(KVstore::READ_WRITE_MODE); + this->open_preID2values(KVstore::READ_WRITE_MODE); +} + +unsigned +KVstore::getEntityDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const +{ + return this->getEntityInDegree(_entity_id) + this->getEntityOutDegree(_entity_id); +} + +unsigned +KVstore::getEntityInDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const +{ + //cout << "In getEntityInDegree " << _entity_id << endl; + unsigned* _tmp = NULL; + unsigned _len = 0; + bool _get = this->getValueByKey(this->objID2values, _entity_id, (char*&)_tmp, _len); + + int ret = 0; + if (_get) + { + ret = _tmp[0]; + } + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + + return ret; +} + +unsigned +KVstore::getEntityOutDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const +{ + //cout << "In getEntityOutDegree " << _entity_id << endl; + unsigned* _tmp = NULL; + unsigned _len = 0; + bool _get = this->getValueByKey(this->subID2values, _entity_id, (char*&)_tmp, _len); + + int ret = 0; + if (_get) + { + ret = _tmp[0]; + } + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + + return ret; +} + +unsigned +KVstore::getLiteralDegree(TYPE_ENTITY_LITERAL_ID _literal_id) const +{ + //cout << "In getLiteralDegree " << _literal_id << endl; + unsigned* _tmp = NULL; + unsigned _len = 0; + bool _get = this->getValueByKey(this->objID2values, _literal_id, (char*&)_tmp, _len); + + int ret = 0; + if (_get) + { + ret = _tmp[0]; + } + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + + return ret; +} + +unsigned +KVstore::getPredicateDegree(TYPE_PREDICATE_ID _predicate_id) const +{ + //cout << "In getPredicate Degree " << _predicate_id << endl; + unsigned* _tmp = NULL; + unsigned _len = 0; + bool _get = this->getValueByKey(this->preID2values, _predicate_id, (char*&)_tmp, _len); + + int ret = 0; + if (_get) + { + ret = _tmp[0]; + } + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + + return ret; +} + +unsigned +KVstore::getSubjectPredicateDegree(TYPE_ENTITY_LITERAL_ID _subid, TYPE_PREDICATE_ID _preid) const +{ + //cout << "In getSubjectPredicateDegree " << _subid << ' ' << _preid << endl; + unsigned* _tmp = NULL; + unsigned _len = 0; + bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); + + int ret = 0; + if(_get) + { + int _result = KVstore::binarySearch(_preid, _tmp + 3, _tmp[1], 2); + if (_result != -1) + { + int _offset = _tmp[4 + 2 * _result]; + int _offset_next; + if (_result == _tmp[1] - 1) + { + _offset_next = 3 + 2 * _tmp[1] + _tmp[0]; + } + else + { + _offset_next = _tmp[6 + 2 * _result]; + } + ret = _offset_next - _offset; + } + } + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + + return ret; +} + +unsigned +KVstore::getObjectPredicateDegree(TYPE_ENTITY_LITERAL_ID _objid, TYPE_PREDICATE_ID _preid) const +{ + //cout << "In getObjectPredicateDegree " << _objid << _preid << endl; + unsigned* _tmp = NULL; + unsigned _len = 0; + bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); + + int ret = 0; + if (_get) + { + int _result = KVstore::binarySearch(_preid, _tmp + 2, _tmp[1], 2); + if (_result != -1) + { + int _offset = _tmp[3 + 2 * _result]; + int _offset_next; + if (_result == _tmp[1] - 1) + { + _offset_next = 2 + 2 * _tmp[1] + _tmp[0]; + } + else + { + _offset_next = _tmp[5 + 2 * _result]; + } + ret = _offset_next - _offset; + } + } + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + + return ret; +} + +bool +KVstore::updateTupleslist_insert(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id) +{ + //int* _tmp = NULL; + //int _len = 0; + //bool _get; + //cout << "Inserting:\t" << _sub_id << '\t' << _pre_id << '\t' << _obj_id << endl; + //cout << "Before insertion:" << endl; + //_get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len); + //if (!_get) { + // cout << "SubID " << _sub_id << ": doesn't exist." << endl; + //} + //else { + // cout << "SubID " << _sub_id << ':'; + // for (unsigned i = 0; i < _len / sizeof(int); i++) { + // cout << '\t' << _tmp[i]; + // } + // cout << endl; + //} + //_get = this->getValueByKey(this->objID2values, _obj_id, (char*&)_tmp, _len); + //if (!_get) { + // cout << "ObjID " << _obj_id << ": doesn't exist." << endl; + //} + //else { + // cout << "ObjID " << _obj_id << ':'; + // for (unsigned i = 0; i < _len / sizeof(int); i++) { + // cout << '\t' << _tmp[i]; + // } + // cout << endl; + //} + //_get = this->getValueByKey(this->preID2values, _pre_id, (char*&)_tmp, _len); + //if (!_get) { + // cout << "PreID " << _pre_id << ": doesn't exist." << endl; + //} + //else { + // cout << "PreID " << _pre_id << ':'; + // for (unsigned i = 0; i < _len / sizeof(int); i++) { + // cout << '\t' << _tmp[i]; + // } + // cout << endl; + //} + //bool flag = this->updateInsert_s2values(_sub_id, _pre_id, _obj_id) + // && this->updateInsert_o2values(_sub_id, _pre_id, _obj_id) + // && this->updateInsert_p2values(_sub_id, _pre_id, _obj_id); + //cout << "After insertion:" << endl; + //_get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len); + //if (!_get) { + // cout << "SubID " << _sub_id << ": doesn't exist." << endl; + //} + //else { + // cout << "SubID " << _sub_id << ':'; + // for (unsigned i = 0; i < _len / sizeof(int); i++) { + // cout << '\t' << _tmp[i]; + // } + // cout << endl; + //} + //_get = this->getValueByKey(this->objID2values, _obj_id, (char*&)_tmp, _len); + //if (!_get) { + // cout << "ObjID " << _obj_id << ": doesn't exist." << endl; + //} + //else { + // cout << "ObjID " << _obj_id << ':'; + // for (unsigned i = 0; i < _len / sizeof(int); i++) { + // cout << '\t' << _tmp[i]; + // } + // cout << endl; + //} + //_get = this->getValueByKey(this->preID2values, _pre_id, (char*&)_tmp, _len); + //if (!_get) { + // cout << "PreID " << _pre_id << ": doesn't exist." << endl; + //} + //else { + // cout << "PreID " << _pre_id << ':'; + // for (unsigned i = 0; i < _len / sizeof(int); i++) { + // cout << '\t' << _tmp[i]; + // } + // cout << endl; + //} + //return flag; + return this->updateInsert_s2values(_sub_id, _pre_id, _obj_id) + && this->updateInsert_o2values(_sub_id, _pre_id, _obj_id) + && this->updateInsert_p2values(_sub_id, _pre_id, _obj_id); +} + +bool +KVstore::updateTupleslist_remove(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id) +{ + //int* _tmp = NULL; + //int _len = 0; + //bool _get; + //cout << "Removing:\t" << _sub_id << '\t' << _pre_id << '\t' << _obj_id << endl; + //cout << "Before removal:" << endl; + //_get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len); + //if (!_get) { + // cout << "SubID " << _sub_id << ": doesn't exist." << endl; + //} + //else { + // cout << "SubID " << _sub_id << ':'; + // for (unsigned i = 0; i < _len / sizeof(int); i++) { + // cout << '\t' << _tmp[i]; + // } + // cout << endl; + //} + //_get = this->getValueByKey(this->objID2values, _obj_id, (char*&)_tmp, _len); + //if (!_get) { + // cout << "ObjID " << _obj_id << ": doesn't exist." << endl; + //} + //else { + // cout << "ObjID " << _obj_id << ':'; + // for (unsigned i = 0; i < _len / sizeof(int); i++) { + // cout << '\t' << _tmp[i]; + // } + // cout << endl; + //} + //_get = this->getValueByKey(this->preID2values, _pre_id, (char*&)_tmp, _len); + //if (!_get) { + // cout << "PreID " << _pre_id << ": doesn't exist." << endl; + //} + //else { + // cout << "PreID " << _pre_id << ':'; + // for (unsigned i = 0; i < _len / sizeof(int); i++) { + // cout << '\t' << _tmp[i]; + // } + // cout << endl; + //} + //bool flag = this->updateRemove_s2values(_sub_id, _pre_id, _obj_id) + // && this->updateRemove_o2values(_sub_id, _pre_id, _obj_id) + // && this->updateRemove_p2values(_sub_id, _pre_id, _obj_id); + //cout << "After removal:" << endl; + //_get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len); + //if (!_get) { + // cout << "SubID " << _sub_id << ": doesn't exist." << endl; + //} + //else { + // cout << "SubID " << _sub_id << ':'; + // for (unsigned i = 0; i < _len / sizeof(int); i++) { + // cout << '\t' << _tmp[i]; + // } + // cout << endl; + //} + //_get = this->getValueByKey(this->objID2values, _obj_id, (char*&)_tmp, _len); + //if (!_get) { + // cout << "ObjID " << _obj_id << ": doesn't exist." << endl; + //} + //else { + // cout << "ObjID " << _obj_id << ':'; + // for (unsigned i = 0; i < _len / sizeof(int); i++) { + // cout << '\t' << _tmp[i]; + // } + // cout << endl; + //} + //_get = this->getValueByKey(this->preID2values, _pre_id, (char*&)_tmp, _len); + //if (!_get) { + // cout << "PreID " << _pre_id << ": doesn't exist." << endl; + //} + //else { + // cout << "PreID " << _pre_id << ':'; + // for (unsigned i = 0; i < _len / sizeof(int); i++) { + // cout << '\t' << _tmp[i]; + // } + // cout << endl; + //} + //return flag; + return this->updateRemove_s2values(_sub_id, _pre_id, _obj_id) + && this->updateRemove_o2values(_sub_id, _pre_id, _obj_id) + && this->updateRemove_p2values(_sub_id, _pre_id, _obj_id); +} + +bool +KVstore::updateInsert_s2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id) +{ + unsigned* _tmp = NULL; + unsigned _len = 0; + bool _get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len); + bool _is_entity = Util::is_entity_ele(_obj_id); + + //subID doesn't exist + if (!_get) + { + //unsigned _values[6]; + unsigned* _values = new unsigned[6]; + _values[0] = 1; + _values[1] = 1; + _values[2] = _is_entity ? 1 : 0; + _values[3] = _pre_id; + _values[4] = 5; + _values[5] = _obj_id; + //NOTICE: not use array in stack here, otherwise it will be freed, and data in B+Tree, too + this->addValueByKey(this->subID2values, _sub_id, (char*)_values, sizeof(int) * 6); + } + + //subID exists + else + { + unsigned* _values; + unsigned _values_len; + unsigned _position = KVstore::binarySearch(_pre_id, _tmp + 3, _tmp[1], 2); + + //preID doesn't exist + if (_position == INVALID) + //if (_position == -1) + { + _values_len = _len / sizeof(unsigned) + 3; + _values = new unsigned[_values_len]; + _values[0] = _tmp[0] + 1; + _values[1] = _tmp[1] + 1; + _values[2] = _tmp[2] + (_is_entity ? 1 : 0); + unsigned i, j; + for (i = 0, j = 3; i < _tmp[1] && _tmp[3 + 2 * i] < _pre_id; i++, j += 2) { + _values[j] = _tmp[3 + 2 * i]; + _values[j + 1] = _tmp[4 + 2 * i] + 2; + } + _values[j] = _pre_id; + unsigned _offset_old; + if (i == _tmp[1]) { + _offset_old = 3 + 2 * _tmp[1] + _tmp[0]; + } + else { + _offset_old = _tmp[4 + 2 * i]; + } + _values[j + 1] = _offset_old + 2; + j += 2; + for (; i < _tmp[1]; i++, j += 2) { + _values[j] = _tmp[3 + 2 * i]; + _values[j + 1] = _tmp[4 + 2 * i] + 3; + } + for (i = 3 + 2 * _tmp[1]; i < 3 + 2 * _tmp[1] + _tmp[0]; i++, j++) { + if (i == _offset_old) { + _values[j] = _obj_id; + j++; + } + _values[j] = _tmp[i]; + } + if (i == _offset_old) { + _values[j] = _obj_id; + } + } + + //preID exists + else + { + _values_len = _len / sizeof(unsigned) + 1; + _values = new unsigned[_values_len]; + memcpy(_values, _tmp, sizeof(unsigned) * _tmp[4 + 2 * _position]); + _values[0]++; + if (_is_entity) { + _values[2]++; + } + for (unsigned i = _position + 1; i < _tmp[1]; i++) { + _values[4 + 2 * i]++; + } + unsigned i, j; + unsigned right; + if (_position == _tmp[1] - 1) { + right = 3 + 2 * _tmp[1] + _tmp[0]; + } + else { + right = _tmp[6 + 2 * _position]; + } + for (i = _tmp[4 + 2 * _position], j = _tmp[4 + 2 * _position]; i < right && _tmp[i] < _obj_id; i++, j++) { + _values[j] = _tmp[i]; + } + _values[j] = _obj_id; + j++; + for (; i < 3 + 2 * _tmp[1] + _tmp[0]; i++, j++) { + _values[j] = _tmp[i]; + } + } + + this->setValueByKey(this->subID2values, _sub_id, (char*)_values, sizeof(unsigned) * _values_len); + //delete[] _values; + } + + return true; +} + +bool +KVstore::updateRemove_s2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id) +{ + unsigned* _tmp = NULL; + unsigned _len = 0; + bool _get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len); + bool _is_entity = Util::is_entity_ele(_obj_id); + + if (!_get) + { + return false; + } + + //subID doesn't exist after removal + if (_tmp[0] == 1) + { + this->removeKey(this->subID2values, _sub_id); + } + //subID still exists after removal + else + { + unsigned* _values; + unsigned _values_len; + unsigned _position = KVstore::binarySearch(_pre_id, _tmp + 3, _tmp[1], 2); + unsigned _oidlen_sp; + //DEBUG: if _tmp[1] -1 < 0?? + if (_position == _tmp[1] - 1) + { + _oidlen_sp = 3 + 2 * _tmp[1] + _tmp[0] - _tmp[4 + 2 * _position]; + } + else + { + _oidlen_sp = _tmp[6 + 2 * _position] - _tmp[4 + 2 * _position]; + } + + //preID doesn't exist after removal + if (_oidlen_sp == 1) { + _values_len = _len / sizeof(unsigned) - 3; + _values = new unsigned[_values_len]; + memcpy(_values, _tmp, sizeof(unsigned) * (3 + 2 * _position)); + _values[0]--; + _values[1]--; + if (_is_entity) { + _values[2]--; + } + for (unsigned i = 0; i < _position; i++) { + _values[4 + 2 * i] -= 2; + } + unsigned i, j; + for (i = 5 + 2 * _position, j = 3 + 2 * _position; i < 3 + 2 * _tmp[1]; i += 2, j += 2) { + _values[j] = _tmp[i]; + _values[j + 1] = _tmp[i + 1] - 3; + } + for (; i < 3 + 2 * _tmp[1] + _tmp[0]; i++, j++) { + if (i == _tmp[4 + 2 * _position]) { + j--; + continue; + } + _values[j] = _tmp[i]; + } + } + + //preID still exists after removal + else { + _values_len = _len / sizeof(unsigned) - 1; + _values = new unsigned[_values_len]; + memcpy(_values, _tmp, sizeof(unsigned) * _tmp[4 + 2 * _position]); + _values[0]--; + if (_is_entity) { + _values[2]--; + } + for (unsigned i = _position + 1; i < _tmp[1]; i++) { + _values[4 + 2 * i]--; + } + unsigned i, j; + for (i = _tmp[4 + 2 * _position], j = _tmp[4 + 2 * _position]; + i < 3 + 2 * _tmp[1] + _tmp[0] && _tmp[i] < _obj_id; i++, j++) { + _values[j] = _tmp[i]; + } + i++; + for (; i < 3 + 2 * _tmp[1] + _tmp[0]; i++, j++) { + _values[j] = _tmp[i]; + } + } + + this->setValueByKey(this->subID2values, _sub_id, (char*)_values, sizeof(unsigned) * _values_len); + //delete[] _values; + } + + return true; +} + +//TODO: TO BE IMPROVED +bool +KVstore::updateInsert_s2values(TYPE_ENTITY_LITERAL_ID _subid, const std::vector& _pidoidlist) +{ + vector::const_iterator iter = _pidoidlist.begin(); + while (iter < _pidoidlist.end()) { + TYPE_PREDICATE_ID _preid = *iter; + iter++; + TYPE_ENTITY_LITERAL_ID _objid = *iter; + iter++; + this->updateInsert_s2values(_subid, _preid, _objid); + } + return true; +} + +//TODO: TO BE IMPROVED +bool +KVstore::updateRemove_s2values(TYPE_ENTITY_LITERAL_ID _subid, const std::vector& _pidoidlist) +{ + vector::const_iterator iter = _pidoidlist.begin(); + while (iter < _pidoidlist.end()) { + TYPE_PREDICATE_ID _preid = *iter; + iter++; + TYPE_ENTITY_LITERAL_ID _objid = *iter; + iter++; + this->updateRemove_s2values(_subid, _preid, _objid); + } + return true; +} + +bool +KVstore::updateInsert_o2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id) +{ + unsigned* _tmp = NULL; + unsigned _len = 0; + bool _get = this->getValueByKey(this->objID2values, _obj_id, (char*&)_tmp, _len); + + //objID doesn't exist + if (!_get) { + //unsigned _values[5]; + unsigned* _values = new unsigned[5]; + _values[0] = 1; + _values[1] = 1; + _values[2] = _pre_id; + _values[3] = 4; + _values[4] = _sub_id; + this->addValueByKey(this->objID2values, _obj_id, (char*)_values, sizeof(unsigned) * 5); + } + + //objID exists + else { + unsigned* _values; + unsigned _values_len; + unsigned _position = KVstore::binarySearch(_pre_id, _tmp + 2, _tmp[1], 2); + + //preID doesn't exist + if (_position == -1) { + _values_len = _len / sizeof(unsigned) + 3; + _values = new unsigned[_values_len]; + _values[0] = _tmp[0] + 1; + _values[1] = _tmp[1] + 1; + unsigned i, j; + for (i = 0, j = 2; i < _tmp[1] && _tmp[2 + 2 * i] < _pre_id; i++, j += 2) { + _values[j] = _tmp[2 + 2 * i]; + _values[j + 1] = _tmp[3 + 2 * i] + 2; + } + _values[j] = _pre_id; + unsigned _offset_old; + if (i == _tmp[1]) { + _offset_old = 2 + 2 * _tmp[1] + _tmp[0]; + } + else { + _offset_old = _tmp[3 + 2 * i]; + } + _values[j + 1] = _offset_old + 2; + j += 2; + for (; i < _tmp[1]; i++, j += 2) { + _values[j] = _tmp[2 + 2 * i]; + _values[j + 1] = _tmp[3 + 2 * i] + 3; + } + for (i = 2 + 2 * _tmp[1]; i < 2 + 2 * _tmp[1] + _tmp[0]; i++, j++) { + if (i == _offset_old) { + _values[j] = _sub_id; + j++; + } + _values[j] = _tmp[i]; + } + if (i == _offset_old) { + _values[j] = _sub_id; + j++; + } + } + + //preID exists + else { + _values_len = _len / sizeof(unsigned) + 1; + _values = new unsigned[_values_len]; + memcpy(_values, _tmp, sizeof(unsigned) * _tmp[3 + 2 * _position]); + _values[0]++; + for (unsigned i = _position + 1; i < _tmp[1]; i++) { + _values[3 + 2 * i]++; + } + unsigned i, j; + unsigned right; + if (_position == _tmp[1] - 1) { + right = 2 + 2 * _tmp[1] + _tmp[0]; + } + else { + right = _tmp[5 + 2 * _position]; + } + for (i = _tmp[3 + 2 * _position], j = _tmp[3 + 2 * _position]; i < right && _tmp[i] < _sub_id; i++, j++) { + _values[j] = _tmp[i]; + } + _values[j] = _sub_id; + j++; + for (; i < 2 + 2 * _tmp[1] + _tmp[0]; i++, j++) { + _values[j] = _tmp[i]; + } + } + + this->setValueByKey(this->objID2values, _obj_id, (char*)_values, sizeof(unsigned) * _values_len); + //delete[] _values; + } + + return true; +} + +bool +KVstore::updateRemove_o2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id) +{ + unsigned* _tmp = NULL; + unsigned _len = 0; + bool _get = this->getValueByKey(this->objID2values, _obj_id, (char*&)_tmp, _len); + + if (!_get) { + return false; + } + + //objID doesn't exist after removal + if (_tmp[0] == 1) { + this->removeKey(this->objID2values, _obj_id); + } + + //objID still exists after removal + else { + unsigned* _values; + unsigned _values_len; + unsigned _position = KVstore::binarySearch(_pre_id, _tmp + 2, _tmp[1], 2); + unsigned _sidlen_op; + if (_position == _tmp[1] - 1) { + _sidlen_op = 2 + 2 * _tmp[1] + _tmp[0] - _tmp[3 + 2 * _position]; + } + else { + _sidlen_op = _tmp[5 + 2 * _position] - _tmp[3 + 2 * _position]; + } + + //preID doesn't exist after removal + if (_sidlen_op == 1) { + _values_len = _len / sizeof(unsigned) - 3; + _values = new unsigned[_values_len]; + memcpy(_values, _tmp, sizeof(unsigned) * (2 + 2 * _position)); + _values[0]--; + _values[1]--; + for (unsigned i = 0; i < _position; i++) { + _values[3 + 2 * i] -= 2; + } + unsigned i, j; + for (i = 4 + 2 * _position, j = 2 + 2 * _position; i < 2 + 2 * _tmp[1]; i += 2, j += 2) { + _values[j] = _tmp[i]; + _values[j + 1] = _tmp[i + 1] - 3; + } + for (; i < 2 + 2 * _tmp[1] + _tmp[0]; i++, j++) { + if (i == _tmp[3 + 2 * _position]) { + j--; + continue; + } + _values[j] = _tmp[i]; + } + } + + //preID still exists after removal + else { + _values_len = _len / sizeof(unsigned) - 1; + _values = new unsigned[_values_len]; + memcpy(_values, _tmp, sizeof(unsigned) * _tmp[3 + 2 * _position]); + _values[0]--; + for (unsigned i = _position + 1; i < _tmp[1]; i++) { + _values[3 + 2 * i]--; + } + unsigned i, j; + for (i = _tmp[3 + 2 * _position], j = _tmp[3 + 2 * _position]; + i < 2 + 2 * _tmp[1] + _tmp[0] && _tmp[i] < _sub_id; i++, j++) { + _values[j] = _tmp[i]; + } + i++; + for (; i < 2 + 2 * _tmp[1] + _tmp[0]; i++, j++) { + _values[j] = _tmp[i]; + } + } + + this->setValueByKey(this->objID2values, _obj_id, (char*)_values, sizeof(unsigned) * _values_len); + //delete[] _values; + } + + return true; +} + +//TODO: TO BE IMPROVED +bool +KVstore::updateInsert_o2values(TYPE_ENTITY_LITERAL_ID _objid, const std::vector& _pidsidlist) +{ + vector::const_iterator iter = _pidsidlist.begin(); + while (iter < _pidsidlist.end()) { + TYPE_PREDICATE_ID _preid = *iter; + iter++; + TYPE_ENTITY_LITERAL_ID _subid = *iter; + iter++; + this->updateInsert_o2values(_subid, _preid, _objid); + } + return true; +} + +//TODO: TO BE IMPROVED +bool +KVstore::updateRemove_o2values(TYPE_ENTITY_LITERAL_ID _objid, const std::vector& _pidsidlist) +{ + vector::const_iterator iter = _pidsidlist.begin(); + while (iter < _pidsidlist.end()) { + TYPE_PREDICATE_ID _preid = *iter; + iter++; + TYPE_ENTITY_LITERAL_ID _subid = *iter; + iter++; + this->updateRemove_o2values(_subid, _preid, _objid); + } + return true; +} + +bool +KVstore::updateInsert_p2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id) +{ + unsigned* _tmp = NULL; + unsigned _len = 0; + bool _get = this->getValueByKey(this->preID2values, _pre_id, (char*&)_tmp, _len); + + //preid doesn't exist + if (!_get) { + //unsigned _values[3]; + unsigned* _values = new unsigned[3]; + _values[0] = 1; + _values[1] = _sub_id; + _values[2] = _obj_id; + this->addValueByKey(this->preID2values, _pre_id, (char*)_values, sizeof(unsigned) * 3); + } + + //preid exists + else { + unsigned _values_len = _len / sizeof(unsigned) + 2; + unsigned* _values = new unsigned[_values_len]; + unsigned i, j; + _values[0] = _tmp[0] + 1; + for (i = 1, j = 1; + i < 1 + _tmp[0] && (_tmp[i] < _sub_id || (_tmp[i] == _sub_id && _tmp[i + _tmp[0]] < _obj_id)); + i++, j++) { + _values[j] = _tmp[i]; + _values[j + _tmp[0] + 1] = _tmp[i + _tmp[0]]; + } + _values[j] = _sub_id; + _values[j + _tmp[0] + 1] = _obj_id; + j++; + for (; i < 1 + _tmp[0]; i++, j++) { + _values[j] = _tmp[i]; + _values[j + _tmp[0] + 1] = _tmp[i + _tmp[0]]; + } + this->setValueByKey(this->preID2values, _pre_id, (char*)_values, sizeof(unsigned) * _values_len); + //delete[] _values; + } + + return true; +} + +bool +KVstore::updateRemove_p2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id) +{ + unsigned* _tmp = NULL; + unsigned _len = 0; + bool _get = this->getValueByKey(this->preID2values, _pre_id, (char*&)_tmp, _len); + + if (!_get) { + return false; + } + + //preid doesn't exist after removal + if (_tmp[0] == 1) { + this->removeKey(this->preID2values, _pre_id); + } + + //preid still exists after removal + else { + unsigned _values_len = _len / sizeof(unsigned) - 2; + unsigned* _values = new unsigned[_values_len]; + unsigned i, j; + _values[0] = _tmp[0] - 1; + for (i = 1, j = 1; + i < 1 + _tmp[0] && (_tmp[i] < _sub_id || (_tmp[i] == _sub_id && _tmp[i + _tmp[0]] < _obj_id)); + i++, j++) { + _values[j] = _tmp[i]; + _values[j + _tmp[0] - 1] = _tmp[i + _tmp[0]]; + } + i++; + for (; i < 1 + _tmp[0]; i++, j++) { + _values[j] = _tmp[i]; + _values[j + _tmp[0] - 1] = _tmp[i + _tmp[0]]; + } + this->setValueByKey(this->preID2values, _pre_id, (char*)_values, sizeof(unsigned) * _values_len); + //delete[] _values; + } + + return true; +} + +//TODO: TO BE IMPROVED +bool +KVstore::updateInsert_p2values(TYPE_PREDICATE_ID _preid, const std::vector& _sidoidlist) +{ + vector::const_iterator iter = _sidoidlist.begin(); + while (iter < _sidoidlist.end()) { + TYPE_ENTITY_LITERAL_ID _subid = *iter; + iter++; + TYPE_ENTITY_LITERAL_ID _objid = *iter; + iter++; + this->updateInsert_p2values(_subid, _preid, _objid); + } + return true; +} + +//TODO: TO BE IMPROVED +bool +KVstore::updateRemove_p2values(TYPE_PREDICATE_ID _preid, const std::vector& _sidoidlist) +{ + vector::const_iterator iter = _sidoidlist.begin(); + while (iter < _sidoidlist.end()) { + TYPE_ENTITY_LITERAL_ID _subid = *iter; + iter++; + TYPE_ENTITY_LITERAL_ID _objid = *iter; + iter++; + this->updateRemove_p2values(_subid, _preid, _objid); + } + return true; +} + +//for entity2id +//_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE +bool +KVstore::open_entity2id(int _mode) +{ + unsigned long long buffer_size; + if (_mode == KVstore::CREATE_MODE) + { + buffer_size = Util::MAX_BUFFER_SIZE * buffer_entity2id_build; + } + else if (_mode == KVstore::READ_WRITE_MODE) + { + buffer_size = Util::MAX_BUFFER_SIZE * buffer_entity2id_query; + } + else + { + cerr << "Invalid open mode in open_entity2id, mode = " << _mode << endl; + return false; + } + + return this->open(this->entity2id, KVstore::s_entity2id, _mode, buffer_size); +} + +bool +KVstore::close_entity2id() +{ + if (this->entity2id == NULL) + { + return true; + } + + this->entity2id->save(); + delete this->entity2id; + this->entity2id = NULL; + + return true; +} + +bool +KVstore::subIDByEntity(string _entity) +{ + //NOTICE: no need to copy _entity to a char* buffer + //_entity will not be released befor ethis function ends + //so _entity.c_str() is a valid const char* + return this->entity2id->remove(_entity.c_str(), _entity.length()); +} + +TYPE_ENTITY_LITERAL_ID +KVstore::getIDByEntity(string _entity) const +{ + return this->getIDByStr(this->entity2id, _entity.c_str(), _entity.length()); +} + +bool +KVstore::setIDByEntity(string _entity, TYPE_ENTITY_LITERAL_ID _id) +{ + //return this->addValueByKey(this->entity2id, _entity.c_str(), _entity.length(), _id); + //int len = _entity.length() + 1; + int len = _entity.length(); + char* str = new char[len]; + memcpy(str, _entity.c_str(), len); + return this->addValueByKey(this->entity2id, str, len, _id); +} + +//for id2entity +//_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE +bool +KVstore::open_id2entity(int _mode) +{ + unsigned long long buffer_size; + if (_mode == KVstore::CREATE_MODE) + { + buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2entity_build; + } + else if (_mode == KVstore::READ_WRITE_MODE) + { + buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2entity_query; + } + else + { + cerr << "Invalid open mode in open_id2entity, mode = " << _mode << endl; + return false; + } + + return this->open(this->id2entity, KVstore::s_id2entity, _mode, buffer_size); +} + +bool +KVstore::close_id2entity() +{ + if (this->id2entity == NULL) + { + return true; + } + + this->id2entity->save(); + delete this->id2entity; + this->id2entity = NULL; + + return true; +} + +bool +KVstore::subEntityByID(TYPE_ENTITY_LITERAL_ID _id) +{ + return this->id2entity->remove(_id); +} + +string +KVstore::getEntityByID(TYPE_ENTITY_LITERAL_ID _id) const +{ + char* _tmp = NULL; + unsigned _len = 0; + + bool _get = this->getValueByKey(this->id2entity, _id, _tmp, _len); + if (!_get) + { + return ""; + } + + //NOTICE: no need to add \0 at last if we indicate the length + string _ret = string(_tmp, _len); + + return _ret; +} + +bool +KVstore::setEntityByID(TYPE_ENTITY_LITERAL_ID _id, string _entity) +{ + //return this->addValueByKey(this->id2entity, _id, _entity.c_str(), _entity.length()); + //int len = _entity.length() + 1; + int len = _entity.length(); + char* str = new char[len]; + memcpy(str, _entity.c_str(), len); + + return this->addValueByKey(this->id2entity, _id, str, len); +} + +//for predicate2id +//_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE +bool +KVstore::open_predicate2id(int _mode) +{ + unsigned long long buffer_size; + if (_mode == KVstore::CREATE_MODE) + { + buffer_size = Util::MAX_BUFFER_SIZE * buffer_predicate2id_build; + } + else if (_mode == KVstore::READ_WRITE_MODE) + { + buffer_size = Util::MAX_BUFFER_SIZE * buffer_predicate2id_query; + } + else + { + cerr << "Invalid open mode in open_predicate2id, mode = " << _mode << endl; + return false; + } + + return this->open(this->predicate2id, KVstore::s_predicate2id, _mode, buffer_size); +} + +bool +KVstore::close_predicate2id() +{ + if (this->predicate2id == NULL) + { + return true; + } + + this->predicate2id->save(); + delete this->predicate2id; + this->predicate2id = NULL; + + return true; +} + +bool +KVstore::subIDByPredicate(string _predicate) +{ + return this->predicate2id->remove(_predicate.c_str(), _predicate.length()); +} + +TYPE_PREDICATE_ID +KVstore::getIDByPredicate(string _predicate) const +{ + return this->getIDByStr(this->predicate2id, _predicate.c_str(), _predicate.length()); +} + +bool +KVstore::setIDByPredicate(string _predicate, TYPE_PREDICATE_ID _id) +{ + //return this->addValueByKey(this->predicate2id, _predicate.c_str(), _predicate.length(), _id); + //int len = _predicate.length() + 1; + int len = _predicate.length(); + char* str = new char[len]; + memcpy(str, _predicate.c_str(), len); + return this->addValueByKey(this->predicate2id, str, len, _id); +} + +//for id2predicate +//_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE +bool +KVstore::open_id2predicate(int _mode) +{ + unsigned long long buffer_size; + if (_mode == KVstore::CREATE_MODE) + { + buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2predicate_build; + } + else if (_mode == KVstore::READ_WRITE_MODE) + { + buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2predicate_query; + } + else + { + cerr << "Invalid open mode in open_id2predicate, mode = " << _mode << endl; + return false; + } + + return this->open(this->id2predicate, KVstore::s_id2predicate, _mode, buffer_size); +} + +bool +KVstore::close_id2predicate() +{ + if (this->id2predicate == NULL) + { + return true; + } + + this->id2predicate->save(); + delete this->id2predicate; + this->id2predicate = NULL; + + return true; +} + +bool +KVstore::subPredicateByID(TYPE_PREDICATE_ID _id) +{ + return this->id2predicate->remove(_id); +} + +string +KVstore::getPredicateByID(TYPE_PREDICATE_ID _id) const +{ + char* _tmp = NULL; + unsigned _len = 0; + + bool _get = this->getValueByKey(this->id2predicate, _id, _tmp, _len); + if (!_get) + { + return ""; + } + string _ret = string(_tmp, _len); + + return _ret; +} + +bool +KVstore::setPredicateByID(TYPE_PREDICATE_ID _id, string _predicate) +{ + //return this->addValueByKey(this->id2predicate, _id, _predicate.c_str(), _predicate.length()); + //int len = _predicate.length() + 1; + int len = _predicate.length(); + char* str = new char[len]; + memcpy(str, _predicate.c_str(), len); + + return this->addValueByKey(this->id2predicate, _id, str, len); +} + +//for literal2id +//_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE +bool +KVstore::open_literal2id(int _mode) +{ + unsigned long long buffer_size; + if (_mode == KVstore::CREATE_MODE) + { + buffer_size = Util::MAX_BUFFER_SIZE * buffer_literal2id_build; + } + else if (_mode == KVstore::READ_WRITE_MODE) + { + buffer_size = Util::MAX_BUFFER_SIZE * buffer_literal2id_query; + } + else + { + cerr << "Invalid open mode in open_literal2id, mode = " << _mode << endl; + return false; + } + + return this->open(this->literal2id, KVstore::s_literal2id, _mode, buffer_size); +} + +bool +KVstore::close_literal2id() +{ + if (this->literal2id == NULL) + { + return true; + } + + this->literal2id->save(); + delete this->literal2id; + this->literal2id = NULL; + + return true; +} + +bool +KVstore::subIDByLiteral(string _literal) +{ + return this->literal2id->remove(_literal.c_str(), _literal.length()); +} + +TYPE_ENTITY_LITERAL_ID +KVstore::getIDByLiteral(string _literal) const +{ + return this->getIDByStr(this->literal2id, _literal.c_str(), _literal.length()); +} + +bool +KVstore::setIDByLiteral(string _literal, TYPE_ENTITY_LITERAL_ID _id) +{ + //return this->addValueByKey(this->literal2id, _literal.c_str(), _literal.length(), _id); + //int len = _literal.length() + 1; + int len = _literal.length(); + char* str = new char[len]; + memcpy(str, _literal.c_str(), len); + + return this->addValueByKey(this->literal2id, str, len, _id); +} + +//for id2literal +//_mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE +bool +KVstore::open_id2literal(int _mode) +{ + unsigned long long buffer_size; + if (_mode == KVstore::CREATE_MODE) + { + buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2literal_build; + } + else if (_mode == KVstore::READ_WRITE_MODE) + { + buffer_size = Util::MAX_BUFFER_SIZE * buffer_id2literal_query; + } + else + { + cerr << "Invalid open mode in open_id2literal, mode = " << _mode << endl; + return false; + } + + return this->open(this->id2literal, KVstore::s_id2literal, _mode, buffer_size); +} + +bool +KVstore::close_id2literal() +{ + if (this->id2literal == NULL) + { + return true; + } + + this->id2literal->save(); + delete this->id2literal; + this->id2literal = NULL; + + return true; +} + +bool +KVstore::subLiteralByID(TYPE_ENTITY_LITERAL_ID _id) +{ + return this->id2literal->remove(_id); +} + +string +KVstore::getLiteralByID(TYPE_ENTITY_LITERAL_ID _id) const +{ + char* _tmp = NULL; + unsigned _len = 0; + + bool _get = this->getValueByKey(this->id2literal, _id, _tmp, _len); + if (!_get) + { + //NOTICE:here assumes that all literals cannot be empty: "" + return ""; + } + string _ret = string(_tmp, _len); + + return _ret; +} + +bool +KVstore::setLiteralByID(TYPE_ENTITY_LITERAL_ID _id, string _literal) +{ + //return this->addValueByKey(this->id2literal, _id, _literal.c_str(), _literal.length()); + //int len = _literal.length() + 1; + int len = _literal.length(); + char* str = new char[len]; + memcpy(str, _literal.c_str(), len); + + return this->addValueByKey(this->id2literal, _id, str, len); +} + +bool +KVstore::open_subID2values(int _mode) +{ + unsigned long long buffer_size; + if (_mode == KVstore::CREATE_MODE) + { + buffer_size = Util::MAX_BUFFER_SIZE * buffer_sID2values_build; + } + else if (_mode == KVstore::READ_WRITE_MODE) + { + buffer_size = Util::MAX_BUFFER_SIZE * buffer_sID2values_query; + } + else + { + cerr << "Invalid open mode in open_subID2values, mode = " << _mode << endl; + return false; + } + + return this->open(this->subID2values, KVstore::s_sID2values, _mode, buffer_size); +} + +bool +KVstore::close_subID2values() +{ + if (this->subID2values == NULL) + { + return true; + } + + this->subID2values->save(); + delete this->subID2values; + this->subID2values = NULL; + + return true; +} + +//STRUCT of s2xx: triple_number pre_num entity_border p1 offset1 p2 offset2 ... pn offsetn +//p1-list(in offset1) p2-list(in offset2) ... pn-list(in offsetn) +//(the final whole list is a unsorted olist) +bool +KVstore::build_subID2values(ID_TUPLE* _p_id_tuples, TYPE_TRIPLE_NUM _triples_num) +{ + cout << "Begin building subID2values..." << endl; + //qsort(_p_id_tuples, _triples_num, sizeof(int*), Util::_spo_cmp); + vector _oidlist_s; + vector _pidoffsetlist_s; + //NOTICE: this is used for entity-literal border, but not used now + //it is only set for the whole olist in s2po, not for sp2o + unsigned _entity_num = 0; + + //true means the next sub is a different one from the current one + bool _sub_change = true; + //true means the next is different from the current pair + bool _sub_pre_change = true; + //true means the next pre is different from the current one + bool _pre_change = true; + + this->open_subID2values(KVstore::CREATE_MODE); + + //NOTICE: i*3 + j maybe break the unsigned limit + //for (unsigned long i = 0; i < _triples_num; i++) + for (TYPE_TRIPLE_NUM i = 0; i < _triples_num; i++) + { + if (i + 1 == _triples_num || _p_id_tuples[i].subid != _p_id_tuples[i+1].subid + || _p_id_tuples[i].preid != _p_id_tuples[i+1].preid || _p_id_tuples[i].objid != _p_id_tuples[i+1].objid) + { + if (_sub_change) + { + _pidoffsetlist_s.clear(); + _oidlist_s.clear(); + _entity_num = 0; + } + + TYPE_ENTITY_LITERAL_ID _sub_id = _p_id_tuples[i].subid; + TYPE_PREDICATE_ID _pre_id = _p_id_tuples[i].preid; + TYPE_ENTITY_LITERAL_ID _obj_id = _p_id_tuples[i].objid; + + if (_sub_pre_change) + { + _pidoffsetlist_s.push_back(_pre_id); + _pidoffsetlist_s.push_back(_oidlist_s.size()); + } + + _oidlist_s.push_back(_obj_id); + if (Util::is_entity_ele(_obj_id)) + { + _entity_num++; + } + + _sub_change = (i + 1 == _triples_num) || (_p_id_tuples[i].subid != _p_id_tuples[i+1].subid); + _pre_change = (i + 1 == _triples_num) || (_p_id_tuples[i].preid != _p_id_tuples[i+1].preid); + _sub_pre_change = _sub_change || _pre_change; + + if (_sub_change) + { + for (unsigned j = 1; j < _pidoffsetlist_s.size(); j += 2) + { + _pidoffsetlist_s[j] += 3 + _pidoffsetlist_s.size(); + } + unsigned* _entrylist_s = new unsigned[3 + _pidoffsetlist_s.size() + _oidlist_s.size()]; + //triples number + _entrylist_s[0] = _oidlist_s.size(); + //pre number + _entrylist_s[1] = _pidoffsetlist_s.size() / 2; + //entity number + _entrylist_s[2] = _entity_num; + unsigned j, k; + //pidoffsetlist + for (j = 3, k = 0; k < _pidoffsetlist_s.size(); j++, k++) + { + _entrylist_s[j] = _pidoffsetlist_s[k]; + } + //unsorted oidlist + for (k = 0; k < _oidlist_s.size(); j++, k++) + { + _entrylist_s[j] = _oidlist_s[k]; + } + + this->addValueByKey(this->subID2values, _sub_id, (char*)_entrylist_s, sizeof(unsigned) * j); + //delete[] _entrylist_s; + } + } + } + + this->close_subID2values(); + cout << "Finished building subID2values" << endl; + + return true; +} + +bool +KVstore::getpreIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _preidlist, unsigned& _list_len, bool _no_duplicate) const +{ + //cout << "In getpreIDlistBysubID " << _subid << endl; + if (!Util::is_entity_ele(_subid)) { + _preidlist = NULL; + _list_len = 0; + return false; + } + unsigned* _tmp = NULL; + unsigned _len = 0; + bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); + + if (!_get) + { + _preidlist = NULL; + _list_len = 0; + return false; + } + + _list_len = _tmp[1]; + _preidlist = new unsigned[_list_len]; + for (unsigned i = 0; i < _list_len; i++) { + _preidlist[i] = _tmp[2 * i + 3]; + } + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + + return true; +} + +bool +KVstore::getobjIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _objidlist, unsigned& _list_len, bool _no_duplicate) const +{ + //cout << "In getobjIDlistBysubID " << _subid << endl; + if (!Util::is_entity_ele(_subid)) { + _objidlist = NULL; + _list_len = 0; + return false; + } + unsigned* _tmp = NULL; + unsigned _len = 0; + bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); + if (!_get) + { + _objidlist = NULL; + _list_len = 0; + return false; + } + + _list_len = _tmp[0]; + _objidlist = new unsigned[_list_len]; + memcpy(_objidlist, _tmp + 3 + 2 * _tmp[1], sizeof(unsigned) * _list_len); + Util::sort(_objidlist, _list_len); + if (_no_duplicate) { + _list_len = Util::removeDuplicate(_objidlist, _list_len); + } + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + + return true; +} + +bool +KVstore::getobjIDlistBysubIDpreID(TYPE_ENTITY_LITERAL_ID _subid, TYPE_PREDICATE_ID _preid, unsigned*& _objidlist, unsigned& _list_len, bool _no_duplicate) const +{ + //cout << "In getobjIDlistBysubIDpreID " << _subid << ' ' << _preid << endl; + if (!Util::is_entity_ele(_subid)) { + _objidlist = NULL; + _list_len = 0; + return false; + } + unsigned* _tmp = NULL; + + unsigned _len = 0; + bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); + if (!_get) { + _objidlist = NULL; + _list_len = 0; + return false; + } + unsigned _result = KVstore::binarySearch(_preid, _tmp + 3, _tmp[1], 2); + //if (_result == -1) + if (_result == INVALID) + { + _objidlist = NULL; + _list_len = 0; + return false; + } + unsigned _offset = _tmp[4 + 2 * _result]; + unsigned _offset_next; + if (_result == _tmp[1] - 1) { + _offset_next = 3 + 2 * _tmp[1] + _tmp[0]; + } + else { + _offset_next = _tmp[6 + 2 * _result]; + } + _list_len = _offset_next - _offset; + _objidlist = new unsigned[_list_len]; + memcpy(_objidlist, _tmp + _offset, sizeof(unsigned) * _list_len); + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + + return true; +} + +bool +KVstore::getpreIDobjIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _preid_objidlist, unsigned& _list_len, bool _no_duplicate) const +{ + //cout << "In getpreIDobjIDlistBysubID " << _subid << endl; + if (!Util::is_entity_ele(_subid)) + { + _preid_objidlist = NULL; + _list_len = 0; + return false; + } + unsigned* _tmp = NULL; + + unsigned _len = 0; + bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); + if (!_get) { + _preid_objidlist = NULL; + _list_len = 0; + return false; + } + + _list_len = 2 * _tmp[0]; + _preid_objidlist = new unsigned[_list_len]; + unsigned _offset_next; + unsigned j = 0; + for (unsigned i = 0; i < _tmp[1]; i++) { + if (i == _tmp[1] - 1) { + _offset_next = 3 + 2 * _tmp[1] + _tmp[0]; + } + else { + _offset_next = _tmp[6 + 2 * i]; + } + for (; 3 + 2 * _tmp[1] + j < _offset_next; j++) { + _preid_objidlist[2 * j] = _tmp[3 + 2 * i]; + _preid_objidlist[2 * j + 1] = _tmp[3 + 2 * _tmp[1] + j]; + } + } + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + + return true; +} + +bool +KVstore::open_objID2values(int _mode) +{ + unsigned long long buffer_size; + if (_mode == KVstore::CREATE_MODE) + { + buffer_size = Util::MAX_BUFFER_SIZE * buffer_oID2values_build; + } + else if (_mode == KVstore::READ_WRITE_MODE) + { + buffer_size = Util::MAX_BUFFER_SIZE * buffer_oID2values_query; + } + else + { + cerr << "Invalid open mode in open_objID2values, mode = " << _mode << endl; + return false; + } + + return this->open(this->objID2values, KVstore::s_oID2values, _mode, buffer_size); +} + +bool +KVstore::close_objID2values() +{ + if (this->objID2values == NULL) + { + return true; + } + + this->objID2values->save(); + delete this->objID2values; + this->objID2values = NULL; + + return true; +} + +//NOTICE: do not need entity border here, because no literal in o2pslist +//STRUCT of o2xx: triple_number pre_num p1 offset1 p2 offset2 ... pn offsetn +//p1-list(in offset1) p2-list(in offset2) ... pn-list(in offsetn) +//(the final whole list is a unsorted slist) +bool +KVstore::build_objID2values(ID_TUPLE* _p_id_tuples, TYPE_TRIPLE_NUM _triples_num) +{ + cout << "Begin building objID2values..." << endl; + //qsort(_p_id_tuples, _triples_num, sizeof(int*), Util::_ops_cmp); + vector _sidlist_o; + vector _pidoffsetlist_o; + + //true means the next obj is a different one from the current one + bool _obj_change = true; + //true means the next is different from the current pair + bool _obj_pre_change = true; + //true means the next pre is different from the current one + bool _pre_change = true; + + this->open_objID2values(KVstore::CREATE_MODE); + + //for (unsigned long i = 0; i < _triples_num; i++) + for (TYPE_TRIPLE_NUM i = 0; i < _triples_num; i++) + { + if (i + 1 == _triples_num || _p_id_tuples[i].subid != _p_id_tuples[i+1].subid + || _p_id_tuples[i].preid != _p_id_tuples[i+1].preid || _p_id_tuples[i].objid != _p_id_tuples[i+1].objid) { + if (_obj_change) { + _pidoffsetlist_o.clear(); + _sidlist_o.clear(); + } + + TYPE_ENTITY_LITERAL_ID _sub_id = _p_id_tuples[i].subid; + TYPE_PREDICATE_ID _pre_id = _p_id_tuples[i].preid; + TYPE_ENTITY_LITERAL_ID _obj_id = _p_id_tuples[i].objid; + + if (_obj_pre_change) { + _pidoffsetlist_o.push_back(_pre_id); + _pidoffsetlist_o.push_back(_sidlist_o.size()); + } + + _sidlist_o.push_back(_sub_id); + + _obj_change = (i + 1 == _triples_num) || (_p_id_tuples[i].objid != _p_id_tuples[i+1].objid); + _pre_change = (i + 1 == _triples_num) || (_p_id_tuples[i].preid != _p_id_tuples[i+1].preid); + _obj_pre_change = _obj_change || _pre_change; + + if (_obj_change) { + for (unsigned j = 1; j < _pidoffsetlist_o.size(); j += 2) { + _pidoffsetlist_o[j] += 2 + _pidoffsetlist_o.size(); + } + unsigned* _entrylist_o = new unsigned[2 + _pidoffsetlist_o.size() + _sidlist_o.size()]; + //triples number + _entrylist_o[0] = _sidlist_o.size(); + //pre number + _entrylist_o[1] = _pidoffsetlist_o.size() / 2; + unsigned j, k; + //pidoffsetlist + for (j = 2, k = 0; k < _pidoffsetlist_o.size(); j++, k++) { + _entrylist_o[j] = _pidoffsetlist_o[k]; + } + //unsorted sidlist + for (k = 0; k < _sidlist_o.size(); j++, k++) { + _entrylist_o[j] = _sidlist_o[k]; + } + this->addValueByKey(this->objID2values, _obj_id, (char*)_entrylist_o, sizeof(unsigned) * j); + //delete[] _entrylist_o; + } + } + } + + this->close_objID2values(); + cout << "Finished building objID2values" << endl; + return true; +} + +bool +KVstore::getpreIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _preidlist, unsigned& _list_len, bool _no_duplicate) const +{ + //cout << "In getpreIDlistByobjID " << _objid << endl; + unsigned* _tmp = NULL; + unsigned _len = 0; + bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); + if (!_get) { + _preidlist = NULL; + _list_len = 0; + return false; + } + _list_len = _tmp[1]; + _preidlist = new unsigned[_list_len]; + for (unsigned i = 0; i < _list_len; i++) { + _preidlist[i] = _tmp[2 * i + 2]; + } + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + + return true; +} + +bool +KVstore::getsubIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _subidlist, unsigned& _list_len, bool _no_duplicate) const +{ + //cout << "In getsubIDlistByobjID " << _objid << endl; + unsigned* _tmp = NULL; + unsigned _len = 0; + bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); + if (!_get) { + _subidlist = NULL; + _list_len = 0; + return false; + } + + _list_len = _tmp[0]; + _subidlist = new unsigned[_list_len]; + memcpy(_subidlist, _tmp + 2 + 2 * _tmp[1], sizeof(unsigned) * _list_len); + Util::sort(_subidlist, _list_len); + if (_no_duplicate) { + _list_len = Util::removeDuplicate(_subidlist, _list_len); + } + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + + return true; +} + +bool +KVstore::getsubIDlistByobjIDpreID(TYPE_ENTITY_LITERAL_ID _objid, TYPE_PREDICATE_ID _preid, unsigned*& _subidlist, unsigned& _list_len, bool _no_duplicate) const +{ + //cout << "In getsubIDlistByobjIDpreID " << _objid << ' ' << _preid << endl; + unsigned* _tmp = NULL; + unsigned _len = 0; + bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); + if (!_get) { + _subidlist = NULL; + _list_len = 0; + return false; + } + unsigned _result = KVstore::binarySearch(_preid, _tmp + 2, _tmp[1], 2); + //if (_result == -1) + if (_result == INVALID) + { + _subidlist = NULL; + _list_len = 0; + return false; + } + unsigned _offset = _tmp[3 + 2 * _result]; + unsigned _offset_next; + + if (_result == _tmp[1] - 1) { + _offset_next = 2 + 2 * _tmp[1] + _tmp[0]; + } + else { + _offset_next = _tmp[5 + 2 * _result]; + } + _list_len = _offset_next - _offset; + _subidlist = new unsigned[_list_len]; + memcpy(_subidlist, _tmp + _offset, sizeof(unsigned) * _list_len); + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + + return true; +} + +bool +KVstore::getpreIDsubIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _preid_subidlist, unsigned& _list_len, bool _no_duplicate) const +{ + //cout << "In getpreIDsubIDlistByobjID " << _objid << endl; + unsigned* _tmp = NULL; + unsigned _len = 0; + bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); + if (!_get) { + _preid_subidlist = NULL; + _list_len = 0; + return false; + } + + _list_len = 2 * _tmp[0]; + _preid_subidlist = new unsigned[_list_len]; + unsigned _offset_next; + unsigned j = 0; + for (unsigned i = 0; i < _tmp[1]; i++) { + if (i == _tmp[1] - 1) { + _offset_next = 2 + 2 * _tmp[1] + _tmp[0]; + } + else { + _offset_next = _tmp[5 + 2 * i]; + } + for (; 2 + 2 * _tmp[1] + j < _offset_next; j++) { + _preid_subidlist[2 * j] = _tmp[2 + 2 * i]; + _preid_subidlist[2 * j + 1] = _tmp[2 + 2 * _tmp[1] + j]; + } + } + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + + return true; +} + +bool +KVstore::open_preID2values(int _mode) +{ + unsigned long long buffer_size; + if (_mode == KVstore::CREATE_MODE) { + buffer_size = Util::MAX_BUFFER_SIZE * buffer_pID2values_build; + } + else if (_mode == KVstore::READ_WRITE_MODE) { + buffer_size = Util::MAX_BUFFER_SIZE * buffer_pID2values_query; + } + else { + cerr << "Invalid open mode in open_preID2values, mode = " << _mode << endl; + return false; + } + return this->open(this->preID2values, KVstore::s_pID2values, _mode, buffer_size); +} + +bool +KVstore::close_preID2values() +{ + if (this->preID2values == NULL) { + return true; + } + + this->preID2values->save(); + delete this->preID2values; + this->preID2values = NULL; + + return true; +} + +bool +KVstore::build_preID2values(ID_TUPLE* _p_id_tuples, TYPE_TRIPLE_NUM _triples_num) +//NOTICE: if we sort sidlist, then oidlist is not sorted; otherwise if we sort oidlist, then sidlist is not sorted +//STRUCT of p2xx: triple_number sidlist oidlist(not sorted, linked with sidlist one by one) +{ + cout << "Begin building preID2values..." << endl; + //qsort(_p_id_tuples, _triples_num, sizeof(int*), Util::_pso_cmp); + vector _sidlist_p; + vector _oidlist_p; + + //true means the next pre is different from the current one + bool _pre_change = true; + + this->open_preID2values(KVstore::CREATE_MODE); + + //for (unsigned long i = 0; i < _triples_num; i++) + for (TYPE_TRIPLE_NUM i = 0; i < _triples_num; i++) + { + if (i + 1 == _triples_num || _p_id_tuples[i].subid != _p_id_tuples[i+1].subid + || _p_id_tuples[i].preid != _p_id_tuples[i+1].preid || _p_id_tuples[i].objid != _p_id_tuples[i+1].objid) { + if (_pre_change) { + _sidlist_p.clear(); + _oidlist_p.clear(); + } + + TYPE_ENTITY_LITERAL_ID _sub_id = _p_id_tuples[i].subid; + TYPE_PREDICATE_ID _pre_id = _p_id_tuples[i].preid; + TYPE_ENTITY_LITERAL_ID _obj_id = _p_id_tuples[i].objid; + + _sidlist_p.push_back(_sub_id); + _oidlist_p.push_back(_obj_id); + + _pre_change = (i + 1 == _triples_num) || (_p_id_tuples[i].preid != _p_id_tuples[i+1].preid); + + if (_pre_change) { + unsigned* _entrylist_p = new unsigned[1 + _sidlist_p.size() * 2]; + //triples number + _entrylist_p[0] = _sidlist_p.size(); + unsigned j, k; + //sidlist + for (j = 1, k = 0; k < _sidlist_p.size(); j++, k++) { + _entrylist_p[j] = _sidlist_p[k]; + } + //unsorted oidlist + for (k = 0; k < _oidlist_p.size(); j++, k++) { + _entrylist_p[j] = _oidlist_p[k]; + } + this->addValueByKey(this->preID2values, _pre_id, (char*)_entrylist_p, sizeof(unsigned) * j); + //delete[] _entrylist_p; + } + } + } + + this->close_preID2values(); + cout << "Finished building preID2values" << endl; + return true; +} + +bool +KVstore::getsubIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _subidlist, unsigned& _list_len, bool _no_duplicate) const +{ + //cout << "In getsubIDlistBypreID " << _preid << endl; + unsigned* _tmp = NULL; + unsigned _len = 0; + bool _get = this->getValueByKey(this->preID2values, _preid, (char*&)_tmp, _len); + if (!_get) { + _subidlist = NULL; + _list_len = 0; + return false; + } + + _list_len = _tmp[0]; + _subidlist = new unsigned[_list_len]; + memcpy(_subidlist, _tmp + 1, sizeof(unsigned) * _list_len); + if (_no_duplicate) { + _list_len = Util::removeDuplicate(_subidlist, _list_len); + } + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + + return true; +} + +bool +KVstore::getobjIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _objidlist, unsigned& _list_len, bool _no_duplicate) const { + //cout << "In getobjIDlistBypreID " << _preid << endl; + unsigned* _tmp = NULL; + unsigned _len = 0; + bool _get = this->getValueByKey(this->preID2values, _preid, (char*&)_tmp, _len); + if (!_get) { + _objidlist = NULL; + _list_len = 0; + return false; + } + + _list_len = _tmp[0]; + _objidlist = new unsigned[_list_len]; + memcpy(_objidlist, _tmp + 1 + _tmp[0], sizeof(unsigned) * _list_len); + Util::sort(_objidlist, _list_len); + if (_no_duplicate) { + _list_len = Util::removeDuplicate(_objidlist, _list_len); + } + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + + return true; +} + +bool +KVstore::getsubIDobjIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _subid_objidlist, unsigned& _list_len, bool _no_duplicate) const +{ +#ifdef DEBUG_KVSTORE + cout << "In getsubIDobjIDlistBypreID " << _preid << endl; +#endif + unsigned* _tmp = NULL; + unsigned _len = 0; + bool _get = this->getValueByKey(this->preID2values, _preid, (char*&)_tmp, _len); +#ifdef DEBUG_KVSTORE + cout<<"the length of list: "<<_len<getpreIDlistBysubID(_subid, list1, len1, true)) { + _preidlist = NULL; + _list_len = 0; + return false; + } + + if (!this->getpreIDlistByobjID(_objid, list2, len2, true)) { + _preidlist = NULL; + _list_len = 0; + return false; + } + vector list = KVstore::intersect(list1, list2, len1, len2); + delete[] list1; + delete[] list2; + unsigned len = list.size(); + if (len == 0) { + _preidlist = NULL; + _list_len = 0; + return false; + } + unsigned* _tmp = NULL; + unsigned _len = 0; + this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); + _list_len = len; + unsigned _result = 0; + for (unsigned i = 0; i < len; i++) { + TYPE_PREDICATE_ID _preid = list[i]; + for (; _result < _tmp[1]; _result++) { + if (_tmp[3 + 2 * _result] == _preid) { + break; + } + } + if (_result == _tmp[1]) { + for (unsigned j = i; j < len; j++) { + list[j] = -1; + _list_len--; + } + break; + } + unsigned _offset = _tmp[4 + 2 * _result]; + unsigned _offset_next; + if (_result == _tmp[1] - 1) { + _offset_next = 3 + 2 * _tmp[1] + _tmp[0]; + } + else { + _offset_next = _tmp[6 + 2 * _result]; + } + if (KVstore::binarySearch(_objid, _tmp + _offset, _offset_next - _offset) == -1) { + list[i] = -1; + _list_len--; + } + } + + //if this is a long list, then we should remove itself after copying + //otherwise, we should not free the list memory + if(VList::isLongList(_len)) + { + delete[] _tmp; + //_tmp = NULL; + } + + if (_list_len == 0) { + _preidlist = NULL; + return false; + } + _preidlist = new unsigned[_list_len]; + unsigned i = 0, j = 0; + while (i < len) { + if (list[i] != -1) { + _preidlist[j] = list[i]; + i++; + j++; + } + else { + i++; + } + } + + return true; +} + + +bool +KVstore::open(SITree*& _p_btree, string _tree_name, int _mode, unsigned long long _buffer_size) +{ + if (_p_btree != NULL) { + return false; + } + string smode; + if (_mode == KVstore::CREATE_MODE) { + smode = "build"; + } + else if (_mode == KVstore::READ_WRITE_MODE) { + smode = "open"; + } + else { + cerr << "Invalid open mode of: " << _tree_name << " mode = " << _mode << endl; + return false; + } + _p_btree = new SITree(this->store_path, _tree_name, smode, _buffer_size); + return true; +} + +bool +KVstore::open(ISTree*& _p_btree, string _tree_name, int _mode, unsigned long long _buffer_size) +{ + if (_p_btree != NULL) { + return false; + } + string smode; + if (_mode == KVstore::CREATE_MODE) { + smode = "build"; + } + else if (_mode == KVstore::READ_WRITE_MODE) { + smode = "open"; + } + else { + cerr << "Invalid open mode of: " << _tree_name << " mode = " << _mode << endl; + return false; + } + _p_btree = new ISTree(this->store_path, _tree_name, smode, _buffer_size); + return true; +} + +bool +KVstore::open(IVTree*& _p_btree, string _tree_name, int _mode, unsigned long long _buffer_size) +{ + if (_p_btree != NULL) { + return false; + } + string smode; + if (_mode == KVstore::CREATE_MODE) { + smode = "build"; + } + else if (_mode == KVstore::READ_WRITE_MODE) { + smode = "open"; + } + else { + cerr << "Invalid open mode of: " << _tree_name << " mode = " << _mode << endl; + return false; + } + _p_btree = new IVTree(this->store_path, _tree_name, smode, _buffer_size); + + return true; +} + +void +KVstore::flush(SITree* _p_btree) +{ + if (_p_btree != NULL) + { + _p_btree->save(); + } +} + +void +KVstore::flush(ISTree* _p_btree) +{ + if (_p_btree != NULL) + { + _p_btree->save(); + } +} + +void +KVstore::flush(IVTree* _p_btree) +{ + if (_p_btree != NULL) + { + _p_btree->save(); + } +} + +bool +KVstore::addValueByKey(SITree* _p_btree, char* _key, unsigned _klen, unsigned _val) +{ + return _p_btree->insert(_key, _klen, _val); +} + +bool +KVstore::addValueByKey(ISTree* _p_btree, unsigned _key, char* _val, unsigned _vlen) +{ + return _p_btree->insert(_key, _val, _vlen); +} + +bool +KVstore::addValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen) +{ + return _p_btree->insert(_key, _val, _vlen); +} + +bool +KVstore::setValueByKey(SITree* _p_btree, char* _key, unsigned _klen, unsigned _val) +{ + return _p_btree->modify(_key, _klen, _val); +} + +bool +KVstore::setValueByKey(ISTree* _p_btree, unsigned _key, char* _val, unsigned _vlen) +{ + return _p_btree->modify(_key, _val, _vlen); +} + +bool +KVstore::setValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen) +{ + return _p_btree->modify(_key, _val, _vlen); +} + +bool +KVstore::getValueByKey(SITree* _p_btree, const char* _key, unsigned _klen, unsigned* _val) const +{ + return _p_btree->search(_key, _klen, _val); +} + +bool +KVstore::getValueByKey(ISTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const +{ + return _p_btree->search(_key, _val, _vlen); +} + +bool +KVstore::getValueByKey(IVTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const +{ + return _p_btree->search(_key, _val, _vlen); +} + +TYPE_ENTITY_LITERAL_ID +KVstore::getIDByStr(SITree* _p_btree, const char* _key, unsigned _klen) const +{ + unsigned val = 0; + bool ret = _p_btree->search(_key, _klen, &val); + if (!ret) + { + //return -1; + return INVALID; + } + + return val; +} + +bool +KVstore::removeKey(SITree* _p_btree, const char* _key, unsigned _klen) +{ + return _p_btree->remove(_key, _klen); +} + +bool +KVstore::removeKey(ISTree* _p_btree, unsigned _key) +{ + return _p_btree->remove(_key); +} + +bool +KVstore::removeKey(IVTree* _p_btree, unsigned _key) +{ + return _p_btree->remove(_key); +} + +vector +KVstore::intersect(const unsigned* _list1, const unsigned* _list2, unsigned _len1, unsigned _len2) +{ + unsigned i = 0, j = 0; + vector ret; + while (i < _len1 && j < _len2) { + if (_list1[i] < _list2[j]) { + i++; + } + else if (_list1[i] > _list2[j]) { + j++; + } + else { + ret.push_back(_list1[i]); + i++; + j++; + } + } + return ret; +} + +unsigned +KVstore::binarySearch(unsigned _key, const unsigned* _list, unsigned _list_len, int _step) +{ + unsigned _left = 0; + unsigned _right = _list_len - 1; + unsigned _mid; + while (_left <= _right) { + _mid = (_right - _left) / 2 + _left; + if (_key == _list[_step * _mid]) { + return _mid; + } + if (_key < _list[_step * _mid]) { + _right = _mid - 1; + } + else { + _left = _mid + 1; + } + } + + //return -1; + return INVALID; +} + +bool +KVstore::isEntity(TYPE_ENTITY_LITERAL_ID id) +{ + return id < Util::LITERAL_FIRST_ID; +} + +//TODO+BETTER: adjust the buffer size according to current memory usage(global memory manager) +//better to adjust these parameters according to memory usage and entity num +//need a memory manager first +string KVstore::s_entity2id = "s_entity2id"; +string KVstore::s_id2entity = "s_id2entity"; +unsigned short KVstore::buffer_entity2id_build = 8; +unsigned short KVstore::buffer_id2entity_build = 8; +unsigned short KVstore::buffer_entity2id_query = 2; +unsigned short KVstore::buffer_id2entity_query = 1; + +string KVstore::s_predicate2id = "s_predicate2id"; +string KVstore::s_id2predicate = "s_id2predicate"; +unsigned short KVstore::buffer_predicate2id_build = 8; +unsigned short KVstore::buffer_id2predicate_build = 8; +unsigned short KVstore::buffer_predicate2id_query = 1; +unsigned short KVstore::buffer_id2predicate_query = 1; + +string KVstore::s_literal2id = "s_literal2id"; +string KVstore::s_id2literal = "s_id2literal"; +unsigned short KVstore::buffer_literal2id_build = 8; +unsigned short KVstore::buffer_id2literal_build = 8; +unsigned short KVstore::buffer_literal2id_query = 2; +unsigned short KVstore::buffer_id2literal_query = 1; + +string KVstore::s_sID2values = "s_sID2values"; +string KVstore::s_oID2values = "s_oID2values"; +string KVstore::s_pID2values = "s_pID2values"; +unsigned short KVstore::buffer_sID2values_build = 32; +unsigned short KVstore::buffer_oID2values_build = 32; +unsigned short KVstore::buffer_pID2values_build = 16; +unsigned short KVstore::buffer_sID2values_query = 16; +unsigned short KVstore::buffer_oID2values_query = 16; +unsigned short KVstore::buffer_pID2values_query = 8; + diff --git a/KVstore/KVstore.h b/KVstore/KVstore.h index a305058..d66cf16 100644 --- a/KVstore/KVstore.h +++ b/KVstore/KVstore.h @@ -1,208 +1,229 @@ -/*============================================================================= -# Filename: KVstore.h -# Author: Bookug Lobert -# Mail: 1181955272@qq.com -# Last Modified: 2015-10-23 14:23 -# Description: Modified by Wang Libo -=============================================================================*/ - -#ifndef _KVSTORE_KVSTORE_H -#define _KVSTORE_KVSTORE_H - -#include "../Util/Util.h" -#include "Tree.h" - -class KVstore -{ -public: - static const int READ_WRITE_MODE = 1; //Open a B tree, which must exist - static const int CREATE_MODE = 2; //Build a new B tree and delete existing ones (if any) - - KVstore(std::string _store_path = "."); - ~KVstore(); - void flush(); - void release(); - void open(); - - //=============================================================================== - - //including IN-neighbor & OUT-neighbor - unsigned getEntityDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const; - unsigned getEntityInDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const; - unsigned getEntityOutDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const; - - unsigned getLiteralDegree(TYPE_ENTITY_LITERAL_ID _literal_id) const; - unsigned getPredicateDegree(TYPE_PREDICATE_ID _predicate_id) const; - - unsigned getSubjectPredicateDegree(TYPE_ENTITY_LITERAL_ID _subid, TYPE_PREDICATE_ID _preid) const; - unsigned getObjectPredicateDegree(TYPE_ENTITY_LITERAL_ID _objid, TYPE_PREDICATE_ID _preid) const; - - //=============================================================================== - //Before calling these functions, we are sure that the triples doesn't exist. - - bool updateTupleslist_insert(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); - bool updateTupleslist_remove(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); - - bool updateInsert_s2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); - bool updateRemove_s2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); - bool updateInsert_s2values(TYPE_ENTITY_LITERAL_ID _subid, const std::vector& _pidoidlist); - bool updateRemove_s2values(TYPE_ENTITY_LITERAL_ID _subid, const std::vector& _pidoidlist); - - bool updateInsert_o2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); - bool updateRemove_o2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); - bool updateInsert_o2values(TYPE_ENTITY_LITERAL_ID _objid, const std::vector& _pidsidlist); - bool updateRemove_o2values(TYPE_ENTITY_LITERAL_ID _objid, const std::vector& _pidsidlist); - - bool updateInsert_p2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); - bool updateRemove_p2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); - bool updateInsert_p2values(TYPE_PREDICATE_ID _preid, const std::vector& _sidoidlist); - bool updateRemove_p2values(TYPE_PREDICATE_ID _preid, const std::vector& _sidoidlist); - - //=============================================================================== - - //for entity2id - bool open_entity2id(int _mode); - bool close_entity2id(); - bool subIDByEntity(std::string _entity); - TYPE_ENTITY_LITERAL_ID getIDByEntity(std::string _entity) const; - bool setIDByEntity(std::string _entity, TYPE_ENTITY_LITERAL_ID _id); - - //for id2entity - bool open_id2entity(int _mode); - bool close_id2entity(); - bool subEntityByID(TYPE_ENTITY_LITERAL_ID _id); - std::string getEntityByID(TYPE_ENTITY_LITERAL_ID _id) const; - bool setEntityByID(TYPE_ENTITY_LITERAL_ID _id, std::string _entity); - - //for predicate2id - bool open_predicate2id(int _mode); - bool close_predicate2id(); - bool subIDByPredicate(std::string _predicate); - TYPE_PREDICATE_ID getIDByPredicate(std::string _predicate) const; - bool setIDByPredicate(std::string _predicate, TYPE_PREDICATE_ID _id); - - //for id2predicate - bool open_id2predicate(int _mode); - bool close_id2predicate(); - bool subPredicateByID(TYPE_PREDICATE_ID _id); - std::string getPredicateByID(TYPE_PREDICATE_ID _id) const; - bool setPredicateByID(TYPE_PREDICATE_ID _id, std::string _predicate); - - //for literal2id - bool open_literal2id(int _mode); - bool close_literal2id(); - bool subIDByLiteral(std::string _literal); - TYPE_ENTITY_LITERAL_ID getIDByLiteral(std::string _literal) const; - bool setIDByLiteral(std::string _literal, TYPE_ENTITY_LITERAL_ID _id); - - //for id2literal - bool open_id2literal(int _mode); - bool close_id2literal(); - bool subLiteralByID(TYPE_ENTITY_LITERAL_ID _id); - std::string getLiteralByID(TYPE_ENTITY_LITERAL_ID _id) const; - bool setLiteralByID(TYPE_ENTITY_LITERAL_ID _id, std::string _literal); - - //=============================================================================== - - //for subID2values - bool open_subID2values(int _mode); - bool close_subID2values(); - bool build_subID2values(ID_TUPLE* _p_id_tuples, TYPE_TRIPLE_NUM _triples_num); - bool getpreIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _preidlist, unsigned& _list_len, bool _no_duplicate = false) const; - bool getobjIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _objidlist, unsigned& _list_len, bool _no_duplicate = false) const; - bool getobjIDlistBysubIDpreID(TYPE_ENTITY_LITERAL_ID _subid, TYPE_PREDICATE_ID _preid, unsigned*& _objidlist, unsigned& _list_len, bool _no_duplicate = false) const; - bool getpreIDobjIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _preid_objidlist, unsigned& _list_len, bool _no_duplicate = false) const; - - //for objID2values - bool open_objID2values(int _mode); - bool close_objID2values(); - bool build_objID2values(ID_TUPLE* _p_id_tuples, TYPE_TRIPLE_NUM _triples_num); - bool getpreIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _preidlist, unsigned& _list_len, bool _no_duplicate = false) const; - bool getsubIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _subidlist, unsigned& _list_len, bool _no_duplicate = false) const; - bool getsubIDlistByobjIDpreID(TYPE_ENTITY_LITERAL_ID _objid, TYPE_PREDICATE_ID _preid, unsigned*& _subidlist, unsigned& _list_len, bool _no_duplicate = false) const; - bool getpreIDsubIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _preid_subidlist, unsigned& _list_len, bool _no_duplicate = false) const; - - //for preID2values - bool open_preID2values(int _mode); - bool close_preID2values(); - bool build_preID2values(ID_TUPLE* _p_id_tuples, TYPE_TRIPLE_NUM _triples_num); - bool getsubIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _subidlist, unsigned& _list_len, bool _no_duplicate = false) const; - bool getobjIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _objidlist, unsigned& _list_len, bool _no_duplicate = false) const; - bool getsubIDobjIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _subid_objidlist, unsigned& _list_len, bool _no_duplicate = false) const; - - //for so2p - bool getpreIDlistBysubIDobjID(TYPE_ENTITY_LITERAL_ID _subID, TYPE_ENTITY_LITERAL_ID _objID, unsigned*& _preidlist, unsigned& _list_len, bool _no_duplicate = false) const; - - -private: - std::string store_path; - - SITree* entity2id; - ISTree* id2entity; - static std::string s_entity2id; - static std::string s_id2entity; - static unsigned short buffer_entity2id_build; - static unsigned short buffer_id2entity_build; - static unsigned short buffer_entity2id_query; - static unsigned short buffer_id2entity_query; - - SITree* predicate2id; - ISTree* id2predicate; - static std::string s_predicate2id; - static std::string s_id2predicate; - static unsigned short buffer_predicate2id_build; - static unsigned short buffer_id2predicate_build; - static unsigned short buffer_predicate2id_query; - static unsigned short buffer_id2predicate_query; - - SITree* literal2id; - ISTree* id2literal; - static std::string s_literal2id; - static std::string s_id2literal; - static unsigned short buffer_literal2id_build; - static unsigned short buffer_id2literal_build; - static unsigned short buffer_literal2id_query; - static unsigned short buffer_id2literal_query; - - ISTree* subID2values; - ISTree* objID2values; - ISTree* preID2values; - static std::string s_sID2values; - static std::string s_oID2values; - static std::string s_pID2values; - static unsigned short buffer_sID2values_build; - static unsigned short buffer_oID2values_build; - static unsigned short buffer_pID2values_build; - static unsigned short buffer_sID2values_query; - static unsigned short buffer_oID2values_query; - static unsigned short buffer_pID2values_query; - - //=============================================================================== - - bool open(SITree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size); - bool open(ISTree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size); - - void flush(SITree* _p_btree); - void flush(ISTree* _p_btree); - - bool addValueByKey(SITree* _p_btree, const char* _key, unsigned _klen, unsigned _val); - bool addValueByKey(ISTree* _p_btree, unsigned _key, const char* _val, unsigned _vlen); - - bool setValueByKey(SITree* _p_btree, const char* _key, unsigned _klen, unsigned _val); - bool setValueByKey(ISTree* _p_btree, unsigned _key, const char* _val, unsigned _vlen); - - bool getValueByKey(SITree* _p_btree, const char* _key, unsigned _klen, unsigned* _val) const; - bool getValueByKey(ISTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const; - - TYPE_ENTITY_LITERAL_ID getIDByStr(SITree* _p_btree, const char* _key, unsigned _klen) const; - - bool removeKey(SITree* _p_btree, const char* _key, unsigned _klen); - bool removeKey(ISTree* _p_btree, unsigned _key); - - static std::vector intersect(const unsigned* _list1, const unsigned* _list2, unsigned _len1, unsigned _len2); - static unsigned binarySearch(unsigned key, const unsigned* _list, unsigned _list_len, int step = 1); - static bool isEntity(TYPE_ENTITY_LITERAL_ID id); -}; - -#endif //_KVSTORE_KVSTORE_H - +/*============================================================================= +# Filename: KVstore.h +# Author: Bookug Lobert +# Mail: 1181955272@qq.com +# Last Modified: 2015-10-23 14:23 +# Description: Modified by Wang Libo +=============================================================================*/ + +#ifndef _KVSTORE_KVSTORE_H +#define _KVSTORE_KVSTORE_H + +#include "../Util/Util.h" +#include "../Util/VList.h" +#include "Tree.h" + +//TODO: is it needed to keep a length in Bstr?? especially for IVTree? +//add a length: sizeof bstr from 8 to 16(4 -> 8 for alignment) +//add a \0 in tail: only add 1 char +//QUERY: but to count the length each time maybe very costly? +//No, because triple num is stored in char* now!!!! we do not need to save it again +//TODO: entity_border in s2values list is not needed!!! not waste memory here +// +//QUERY: but to implement vlist, we need a unsigned flag +//What is more, we need to store the string in disk, how can we store it if without the length? +//unsigned type stored as chars, maybe will have '\0' +//In memory, we do not know when the oidlist ends if without the original length (butthe triple num will answer this!) + +class KVstore +{ +public: + static const int READ_WRITE_MODE = 1; //Open a B tree, which must exist + static const int CREATE_MODE = 2; //Build a new B tree and delete existing ones (if any) + + KVstore(std::string _store_path = "."); + ~KVstore(); + void flush(); + void release(); + void open(); + + //=============================================================================== + + //including IN-neighbor & OUT-neighbor + unsigned getEntityDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const; + unsigned getEntityInDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const; + unsigned getEntityOutDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const; + + unsigned getLiteralDegree(TYPE_ENTITY_LITERAL_ID _literal_id) const; + unsigned getPredicateDegree(TYPE_PREDICATE_ID _predicate_id) const; + + unsigned getSubjectPredicateDegree(TYPE_ENTITY_LITERAL_ID _subid, TYPE_PREDICATE_ID _preid) const; + unsigned getObjectPredicateDegree(TYPE_ENTITY_LITERAL_ID _objid, TYPE_PREDICATE_ID _preid) const; + + //=============================================================================== + //Before calling these functions, we are sure that the triples doesn't exist. + + bool updateTupleslist_insert(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); + bool updateTupleslist_remove(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); + + bool updateInsert_s2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); + bool updateRemove_s2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); + bool updateInsert_s2values(TYPE_ENTITY_LITERAL_ID _subid, const std::vector& _pidoidlist); + bool updateRemove_s2values(TYPE_ENTITY_LITERAL_ID _subid, const std::vector& _pidoidlist); + + bool updateInsert_o2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); + bool updateRemove_o2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); + bool updateInsert_o2values(TYPE_ENTITY_LITERAL_ID _objid, const std::vector& _pidsidlist); + bool updateRemove_o2values(TYPE_ENTITY_LITERAL_ID _objid, const std::vector& _pidsidlist); + + bool updateInsert_p2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); + bool updateRemove_p2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id); + bool updateInsert_p2values(TYPE_PREDICATE_ID _preid, const std::vector& _sidoidlist); + bool updateRemove_p2values(TYPE_PREDICATE_ID _preid, const std::vector& _sidoidlist); + + //=============================================================================== + + //for entity2id + bool open_entity2id(int _mode); + bool close_entity2id(); + bool subIDByEntity(std::string _entity); + TYPE_ENTITY_LITERAL_ID getIDByEntity(std::string _entity) const; + bool setIDByEntity(std::string _entity, TYPE_ENTITY_LITERAL_ID _id); + + //for id2entity + bool open_id2entity(int _mode); + bool close_id2entity(); + bool subEntityByID(TYPE_ENTITY_LITERAL_ID _id); + std::string getEntityByID(TYPE_ENTITY_LITERAL_ID _id) const; + bool setEntityByID(TYPE_ENTITY_LITERAL_ID _id, std::string _entity); + + //for predicate2id + bool open_predicate2id(int _mode); + bool close_predicate2id(); + bool subIDByPredicate(std::string _predicate); + TYPE_PREDICATE_ID getIDByPredicate(std::string _predicate) const; + bool setIDByPredicate(std::string _predicate, TYPE_PREDICATE_ID _id); + + //for id2predicate + bool open_id2predicate(int _mode); + bool close_id2predicate(); + bool subPredicateByID(TYPE_PREDICATE_ID _id); + std::string getPredicateByID(TYPE_PREDICATE_ID _id) const; + bool setPredicateByID(TYPE_PREDICATE_ID _id, std::string _predicate); + + //for literal2id + bool open_literal2id(int _mode); + bool close_literal2id(); + bool subIDByLiteral(std::string _literal); + TYPE_ENTITY_LITERAL_ID getIDByLiteral(std::string _literal) const; + bool setIDByLiteral(std::string _literal, TYPE_ENTITY_LITERAL_ID _id); + + //for id2literal + bool open_id2literal(int _mode); + bool close_id2literal(); + bool subLiteralByID(TYPE_ENTITY_LITERAL_ID _id); + std::string getLiteralByID(TYPE_ENTITY_LITERAL_ID _id) const; + bool setLiteralByID(TYPE_ENTITY_LITERAL_ID _id, std::string _literal); + + //=============================================================================== + + //for subID2values + bool open_subID2values(int _mode); + bool close_subID2values(); + bool build_subID2values(ID_TUPLE* _p_id_tuples, TYPE_TRIPLE_NUM _triples_num); + bool getpreIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _preidlist, unsigned& _list_len, bool _no_duplicate = false) const; + bool getobjIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _objidlist, unsigned& _list_len, bool _no_duplicate = false) const; + bool getobjIDlistBysubIDpreID(TYPE_ENTITY_LITERAL_ID _subid, TYPE_PREDICATE_ID _preid, unsigned*& _objidlist, unsigned& _list_len, bool _no_duplicate = false) const; + bool getpreIDobjIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _preid_objidlist, unsigned& _list_len, bool _no_duplicate = false) const; + + //for objID2values + bool open_objID2values(int _mode); + bool close_objID2values(); + bool build_objID2values(ID_TUPLE* _p_id_tuples, TYPE_TRIPLE_NUM _triples_num); + bool getpreIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _preidlist, unsigned& _list_len, bool _no_duplicate = false) const; + bool getsubIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _subidlist, unsigned& _list_len, bool _no_duplicate = false) const; + bool getsubIDlistByobjIDpreID(TYPE_ENTITY_LITERAL_ID _objid, TYPE_PREDICATE_ID _preid, unsigned*& _subidlist, unsigned& _list_len, bool _no_duplicate = false) const; + bool getpreIDsubIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _preid_subidlist, unsigned& _list_len, bool _no_duplicate = false) const; + + //for preID2values + bool open_preID2values(int _mode); + bool close_preID2values(); + bool build_preID2values(ID_TUPLE* _p_id_tuples, TYPE_TRIPLE_NUM _triples_num); + bool getsubIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _subidlist, unsigned& _list_len, bool _no_duplicate = false) const; + bool getobjIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _objidlist, unsigned& _list_len, bool _no_duplicate = false) const; + bool getsubIDobjIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _subid_objidlist, unsigned& _list_len, bool _no_duplicate = false) const; + + //for so2p + bool getpreIDlistBysubIDobjID(TYPE_ENTITY_LITERAL_ID _subID, TYPE_ENTITY_LITERAL_ID _objID, unsigned*& _preidlist, unsigned& _list_len, bool _no_duplicate = false) const; + + +private: + std::string store_path; + + SITree* entity2id; + ISTree* id2entity; + static std::string s_entity2id; + static std::string s_id2entity; + static unsigned short buffer_entity2id_build; + static unsigned short buffer_id2entity_build; + static unsigned short buffer_entity2id_query; + static unsigned short buffer_id2entity_query; + + SITree* predicate2id; + ISTree* id2predicate; + static std::string s_predicate2id; + static std::string s_id2predicate; + static unsigned short buffer_predicate2id_build; + static unsigned short buffer_id2predicate_build; + static unsigned short buffer_predicate2id_query; + static unsigned short buffer_id2predicate_query; + + SITree* literal2id; + ISTree* id2literal; + static std::string s_literal2id; + static std::string s_id2literal; + static unsigned short buffer_literal2id_build; + static unsigned short buffer_id2literal_build; + static unsigned short buffer_literal2id_query; + static unsigned short buffer_id2literal_query; + + IVTree* subID2values; + IVTree* objID2values; + IVTree* preID2values; + static std::string s_sID2values; + static std::string s_oID2values; + static std::string s_pID2values; + static unsigned short buffer_sID2values_build; + static unsigned short buffer_oID2values_build; + static unsigned short buffer_pID2values_build; + static unsigned short buffer_sID2values_query; + static unsigned short buffer_oID2values_query; + static unsigned short buffer_pID2values_query; + + //=============================================================================== + + bool open(SITree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size); + bool open(ISTree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size); + bool open(IVTree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size); + + void flush(SITree* _p_btree); + void flush(ISTree* _p_btree); + void flush(IVTree* _p_btree); + + bool addValueByKey(SITree* _p_btree, char* _key, unsigned _klen, unsigned _val); + bool addValueByKey(ISTree* _p_btree, unsigned _key, char* _val, unsigned _vlen); + bool addValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen); + + bool setValueByKey(SITree* _p_btree, char* _key, unsigned _klen, unsigned _val); + bool setValueByKey(ISTree* _p_btree, unsigned _key, char* _val, unsigned _vlen); + bool setValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen); + + bool getValueByKey(SITree* _p_btree, const char* _key, unsigned _klen, unsigned* _val) const; + bool getValueByKey(ISTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const; + bool getValueByKey(IVTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const; + + + + TYPE_ENTITY_LITERAL_ID getIDByStr(SITree* _p_btree, const char* _key, unsigned _klen) const; + + bool removeKey(SITree* _p_btree, const char* _key, unsigned _klen); + bool removeKey(ISTree* _p_btree, unsigned _key); + bool removeKey(IVTree* _p_btree, unsigned _key); + + static std::vector intersect(const unsigned* _list1, const unsigned* _list2, unsigned _len1, unsigned _len2); + static unsigned binarySearch(unsigned key, const unsigned* _list, unsigned _list_len, int step = 1); + static bool isEntity(TYPE_ENTITY_LITERAL_ID id); +}; + +#endif //_KVSTORE_KVSTORE_H + diff --git a/KVstore/SITree/SITree.cpp b/KVstore/SITree/SITree.cpp index c44602d..4eaf8f3 100644 --- a/KVstore/SITree/SITree.cpp +++ b/KVstore/SITree/SITree.cpp @@ -20,7 +20,7 @@ SITree::SITree() TSM = NULL; storepath = ""; filename = ""; - transfer_size[0] = transfer_size[1] = transfer_size[2] = 0; + //transfer_size[0] = transfer_size[1] = transfer_size[2] = 0; this->request = 0; } @@ -36,10 +36,10 @@ SITree::SITree(string _storepath, string _filename, string _mode, unsigned long this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail); else this->root = NULL; - this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE)); - this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE)); - this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE)); - this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M + //this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE)); + //this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M this->request = 0; } @@ -49,30 +49,30 @@ SITree::getFilePath() return storepath + "/" + filename; } -void //WARN: not check _str and _len -SITree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index) -{ - if (_index > 2) - return; - /* - if(_str == NULL || _len == 0) - { - printf("error in CopyToTransfer: empty string\n"); - return; - } - */ - //unsigned length = _bstr->getLen(); - unsigned length = _len; - if (length + 1 > this->transfer_size[_index]) - { - transfer[_index].release(); - transfer[_index].setStr((char*)malloc(length + 1)); - this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0 - } - memcpy(this->transfer[_index].getStr(), _str, length); - this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore - this->transfer[_index].setLen(length); -} +//void //WARN: not check _str and _len +//SITree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index) +//{ + //if (_index > 2) + //return; + //[> + //if(_str == NULL || _len == 0) + //{ + //printf("error in CopyToTransfer: empty string\n"); + //return; + //} + //*/ + ////unsigned length = _bstr->getLen(); + //unsigned length = _len; + //if (length + 1 > this->transfer_size[_index]) + //{ + //transfer[_index].release(); + //transfer[_index].setStr((char*)malloc(length + 1)); + //this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0 + //} + //memcpy(this->transfer[_index].getStr(), _str, length); + //this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore + //this->transfer[_index].setLen(length); +//} unsigned SITree::getHeight() const @@ -110,33 +110,39 @@ SITree::search(const char* _str, unsigned _len, unsigned* _val) //*_val = -1; return false; } - this->CopyToTransfer(_str, _len, 1); + //this->CopyToTransfer(_str, _len, 1); request = 0; - Bstr bstr = this->transfer[1]; //not to modify its memory + //Bstr bstr = this->transfer[1]; //not to modify its memory + //Bstr bstr(_str, _len, true); int store; - SINode* ret = this->find(&transfer[1], &store, false); - if (ret == NULL || store == -1 || bstr != *(ret->getKey(store))) //tree is empty or not found + SINode* ret = this->find(_str, _len, &store, false); + if (ret == NULL || store == -1) //tree is empty or not found + { + //bstr.clear(); + return false; + } + const Bstr* tmp = ret->getKey(store); + if (Util::compare(_str, _len, tmp->getStr(), tmp->getLen()) != 0) //tree is empty or not found { - bstr.clear(); return false; } *_val = ret->getValue(store); this->TSM->request(request); - bstr.clear(); + //bstr.clear(); return true; } bool -SITree::insert(const char* _str, unsigned _len, unsigned _val) +SITree::insert(char* _str, unsigned _len, unsigned _val) { if (_str == NULL || _len == 0) { printf("error in SITree-insert: empty string\n"); return false; } - this->CopyToTransfer(_str, _len, 1); + //this->CopyToTransfer(_str, _len, 1); this->request = 0; SINode* ret; @@ -171,8 +177,8 @@ SITree::insert(const char* _str, unsigned _len, unsigned _val) SINode* p = this->root; SINode* q; int i; - const Bstr* _key = &transfer[1]; - Bstr bstr = *_key; + //const Bstr* _key = &transfer[1]; + //Bstr bstr = *_key; while (!p->isLeaf()) { //j = p->getNum(); @@ -180,7 +186,7 @@ SITree::insert(const char* _str, unsigned _len, unsigned _val) //if(bstr < *(p->getKey(i))) //break; //NOTICE: using binary search is better here - i = p->searchKey_less(bstr); + i = p->searchKey_less(_str, _len); q = p->getChild(i); this->prepare(q); @@ -197,7 +203,10 @@ SITree::insert(const char* _str, unsigned _len, unsigned _val) this->TSM->updateHeap(ret, ret->getRank(), false); this->TSM->updateHeap(q, q->getRank(), true); this->TSM->updateHeap(p, p->getRank(), true); - if (bstr < *(p->getKey(i))) + //if (bstr < *(p->getKey(i))) + const Bstr* tmp = p->getKey(i); + int cmp_res = Util::compare(_str, _len, tmp->getStr(), tmp->getLen()); + if (cmp_res < 0) p = q; else p = ret; @@ -213,24 +222,34 @@ SITree::insert(const char* _str, unsigned _len, unsigned _val) //for(i = 0; i < j; ++i) //if(bstr < *(p->getKey(i))) //break; - i = p->searchKey_less(bstr); + i = p->searchKey_less(_str, _len); //insert existing key is ok, but not inserted in //however, the tree-shape may change due to possible split in former code bool ifexist = false; - if (i > 0 && bstr == *(p->getKey(i - 1))) - ifexist = true; - else + //if (i > 0 && bstr == *(p->getKey(i - 1))) + if (i > 0) { - p->addKey(_key, i, true); + const Bstr* tmp = p->getKey(i-1); + int cmp_res = Util::compare(_str, _len, tmp->getStr(), tmp->getLen()); + if(cmp_res == 0) + { + ifexist = true; + } + } + + if(!ifexist) + { + p->addKey(_str, _len, i, true); p->addValue(_val, i); p->addNum(); - request += _key->getLen(); + request += _len; p->setDirty(); this->TSM->updateHeap(p, p->getRank(), true); } + this->TSM->request(request); - bstr.clear(); //NOTICE: must be cleared! + //bstr.clear(); //NOTICE: must be cleared! return !ifexist; //QUERY(which case:return false) } @@ -243,35 +262,42 @@ SITree::modify(const char* _str, unsigned _len, unsigned _val) printf("error in SITree-modify: empty string\n"); return false; } - this->CopyToTransfer(_str, _len, 1); + //this->CopyToTransfer(_str, _len, 1); this->request = 0; - const Bstr* _key = &transfer[1]; - Bstr bstr = *_key; + //const Bstr* _key = &transfer[1]; + //Bstr bstr = *_key; int store; - SINode* ret = this->find(_key, &store, true); - if (ret == NULL || store == -1 || bstr != *(ret->getKey(store))) //tree is empty or not found + SINode* ret = this->find(_str, _len, &store, true); + if (ret == NULL || store == -1) //tree is empty or not found { - bstr.clear(); + //bstr.clear(); return false; } + const Bstr* tmp = ret->getKey(store); + if (Util::compare(_str, _len, tmp->getStr(), tmp->getLen()) != 0) //tree is empty or not found + { + return false; + } + ret->setValue(_val, store); ret->setDirty(); this->TSM->request(request); - bstr.clear(); + //bstr.clear(); return true; } //this function is useful for search and modify, and range-query SINode* //return the first key's position that >= *_key -SITree::find(const Bstr* _key, int* _store, bool ifmodify) +SITree::find(const char* _str, unsigned _len, int* _store, bool ifmodify) { //to assign value for this->bstr, function shouldn't be const! if (this->root == NULL) return NULL; //SITree Is Empty + SINode* p = root; int i, j; - Bstr bstr = *_key; //local Bstr: multiple delete + //Bstr bstr = *_key; //local Bstr: multiple delete while (!p->isLeaf()) { if (ifmodify) @@ -280,7 +306,7 @@ SITree::find(const Bstr* _key, int* _store, bool ifmodify) //for(i = 0; i < j; ++i) //BETTER(Binary-Search) //if(bstr < *(p->getKey(i))) //break; - i = p->searchKey_less(bstr); + i = p->searchKey_less(_str, _len); p = p->getChild(i); this->prepare(p); @@ -290,13 +316,14 @@ SITree::find(const Bstr* _key, int* _store, bool ifmodify) //for(i = 0; i < j; ++i) //if(bstr <= *(p->getKey(i))) //break; - i = p->searchKey_lessEqual(bstr); + i = p->searchKey_lessEqual(_str, _len); if (i == j) *_store = -1; //Not Found else *_store = i; - bstr.clear(); + + //bstr.clear(); return p; } @@ -316,24 +343,25 @@ SITree::remove(const char* _str, unsigned _len) printf("error in SITree-remove: empty string\n"); return false; } - this->CopyToTransfer(_str, _len, 1); + //this->CopyToTransfer(_str, _len, 1); request = 0; - const Bstr* _key = &transfer[1]; + //const Bstr* _key = &transfer[1]; SINode* ret; if (this->root == NULL) //tree is empty return false; + SINode* p = this->root; SINode* q; int i, j; - Bstr bstr = *_key; + //Bstr bstr = *_key; while (!p->isLeaf()) { j = p->getNum(); //for(i = 0; i < j; ++i) //if(bstr < *(p->getKey(i))) //break; - i = p->searchKey_less(bstr); + i = p->searchKey_less(_str, _len); q = p->getChild(i); this->prepare(q); @@ -347,6 +375,7 @@ SITree::remove(const char* _str, unsigned _len) if (ret != NULL) this->TSM->updateHeap(ret, 0, true);//non-sense node this->TSM->updateHeap(q, q->getRank(), true); + if (q->isLeaf()) { if (q->getPrev() == NULL) @@ -354,6 +383,7 @@ SITree::remove(const char* _str, unsigned _len) if (q->getNext() == NULL) this->leaves_tail = q; } + if (p->getNum() == 0) //root shrinks { //this->leaves_head = q; @@ -369,7 +399,7 @@ SITree::remove(const char* _str, unsigned _len) } bool flag = false; - i = p->searchKey_equal(bstr); + i = p->searchKey_equal(_str, _len); //WARN+NOTICE:here must check, because the key to remove maybe not exist if (i != (int)p->getNum()) { @@ -390,7 +420,7 @@ SITree::remove(const char* _str, unsigned _len) } this->TSM->request(request); - bstr.clear(); + //bstr.clear(); return flag; //i == j, not found } diff --git a/KVstore/SITree/SITree.h b/KVstore/SITree/SITree.h index 0d2aa30..3f5d924 100644 --- a/KVstore/SITree/SITree.h +++ b/KVstore/SITree/SITree.h @@ -3,7 +3,7 @@ # Author: syzz # Mail: 1181955272@qq.com # Last Modified: 2015-04-26 16:44 -# Description: struct and interface of the B+ tree +# Description: string2ID, including entity2id, literal2id, predicate2id =============================================================================*/ #ifndef _KVSTORE_SITREE_SITREE_H @@ -36,13 +36,19 @@ private: //so lock is a must. Add lock to transfer is better than to add //lock to every key/value. However, modify requires a lock for a //key/value, and multiple search for different keys are ok!!! - Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char* - unsigned transfer_size[3]; + //Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char* + //unsigned transfer_size[3]; + + //TODO: in all B+ trees, updat eoperation should lock the whole tree, while search operations not + //However, the transfer bstr maybe cause the parallism error!!!! + //Why we need the transfer? It is ok to pass the original string pointer to return + //A problem is that before the caller ends, the tree can not be modified(so a read-writ elock is required) + std::string storepath; std::string filename; //ok for user to change /* some private functions */ std::string getFilePath(); //in UNIX system - void CopyToTransfer(const char* _str, unsigned _len, unsigned _index); + //void CopyToTransfer(const char* _str, unsigned _len, unsigned _index); void release(SINode* _np) const; //tree's operations should be atom(if read nodes) @@ -59,9 +65,10 @@ public: SINode* getRoot() const; //insert, search, remove, set bool search(const char* _str, unsigned _len, unsigned* _val); - bool insert(const char* _str, unsigned _len, unsigned _val); + bool insert(char* _str, unsigned _len, unsigned _val); bool modify(const char* _str, unsigned _len, unsigned _val); SINode* find(const Bstr* _key, int* store, bool ifmodify); + SINode* find(const char* _key, unsigned _len, int* store, bool ifmodify); bool remove(const char* _str, unsigned _len); bool save(); ~SITree(); diff --git a/KVstore/SITree/node/SILeafNode.h b/KVstore/SITree/node/SILeafNode.h index 599b35a..1c3f14b 100644 --- a/KVstore/SITree/node/SILeafNode.h +++ b/KVstore/SITree/node/SILeafNode.h @@ -30,15 +30,21 @@ public: unsigned getValue(int _index) const; bool setValue(unsigned _val, int _index); bool addValue(unsigned _val, int _index); + bool subValue(int _index); + void setPrev(SINode* _prev); void setNext(SINode* _next); + unsigned getSize() const; + SINode* split(SINode* _father, int _index); SINode* coalesce(SINode* _father, int _index); + void release(); ~SILeafNode(); void print(std::string s); //DEBUG + /*non-sense virtual function Node* getChild(int _index) const; bool addChild(Node* _child, int _index); diff --git a/KVstore/SITree/node/SINode.cpp b/KVstore/SITree/node/SINode.cpp index 6f1a937..df7e0c7 100644 --- a/KVstore/SITree/node/SINode.cpp +++ b/KVstore/SITree/node/SINode.cpp @@ -255,6 +255,27 @@ SINode::addKey(const Bstr* _key, int _index, bool ifcopy) return true; } +bool +SINode::addKey(char* _str, unsigned _len, int _index, bool ifcopy) +{ + int num = this->getNum(); + if (_index < 0 || _index > num) + { + print(string("error in addKey: Invalid index ") + Util::int2string(_index)); + return false; + } + int i; + //NOTICE: if num == MAX_KEY_NUM, will visit keys[MAX_KEY_NUM], not legal!!! + //however. tree operations ensure that: when node is full, not add but split first! + for (i = num - 1; i >= _index; --i) + keys[i + 1] = keys[i]; + + keys[_index].setStr(_str); + keys[_index].setLen(_len); + + return true; +} + bool SINode::subKey(int _index, bool ifdel) { @@ -330,3 +351,55 @@ SINode::searchKey_lessEqual(const Bstr& _bstr) const return ret; } +int +SINode::searchKey_less(const char* _str, unsigned _len) const +{ + int num = this->getNum(); + + int low = 0, high = num - 1, mid = -1; + while (low <= high) + { + mid = (low + high) / 2; + //if (this->keys[mid] > _bstr) + if (Util::compare(this->keys[mid].getStr(), this->keys[mid].getLen(), _str, _len) > 0) + { + if (low == mid) + break; + high = mid; + } + else + { + low = mid + 1; + } + } + + return low; +} + +int +SINode::searchKey_equal(const char* _str, unsigned _len) const +{ + int num = this->getNum(); + //for(i = 0; i < num; ++i) + // if(bstr == *(p->getKey(i))) + // { + + int ret = this->searchKey_less(_str, _len); + //if (ret > 0 && this->keys[ret - 1] == _bstr) + if (ret > 0 && Util::compare(this->keys[ret-1].getStr(), this->keys[ret-1].getLen(), _str, _len) == 0) + return ret - 1; + else + return num; +} + +int +SINode::searchKey_lessEqual(const char* _str, unsigned _len) const +{ + int ret = this->searchKey_less(_str, _len); + //if (ret > 0 && this->keys[ret - 1] == _bstr) + if (ret > 0 && Util::compare(this->keys[ret-1].getStr(), this->keys[ret-1].getLen(), _str, _len) == 0) + return ret - 1; + else + return ret; +} + diff --git a/KVstore/SITree/node/SINode.h b/KVstore/SITree/node/SINode.h index 4780d52..dd4169e 100644 --- a/KVstore/SITree/node/SINode.h +++ b/KVstore/SITree/node/SINode.h @@ -64,9 +64,11 @@ public: void setStore(unsigned _store); unsigned getFlag() const; void setFlag(unsigned _flag); + const Bstr* getKey(int _index) const; //need to check the index bool setKey(const Bstr* _key, int _index, bool ifcopy = false); bool addKey(const Bstr* _key, int _index, bool ifcopy = false); + bool addKey(char* _str, unsigned _len, int _index, bool ifcopy = false); bool subKey(int _index, bool ifdel = false); //several binary key search utilities @@ -74,7 +76,12 @@ public: int searchKey_equal(const Bstr& _bstr) const; int searchKey_lessEqual(const Bstr& _bstr) const; + int searchKey_less(const char* _str, unsigned _len) const; + int searchKey_equal(const char* _str, unsigned _len) const; + int searchKey_lessEqual(const char* _str, unsigned _len) const; + //virtual functions: polymorphic + //NOTICE: not pure-virtual, not required to be implemented again, can be used now virtual SINode* getChild(int _index) const { return NULL; }; virtual bool setChild(SINode* _child, int _index) { return true; }; virtual bool addChild(SINode* _child, int _index) { return true; }; @@ -87,6 +94,8 @@ public: virtual bool subValue(int _index) { return true; }; virtual void setPrev(SINode* _prev) {}; virtual void setNext(SINode* _next) {}; + + //NOTICE: pure-virtual, must to be implemented again in the sub-class virtual void Virtual() = 0; virtual void Normal() = 0; virtual unsigned getSize() const = 0; //return all memory owned diff --git a/KVstore/SITree/storage/SIStorage.cpp b/KVstore/SITree/storage/SIStorage.cpp index 30b55bc..442791d 100644 --- a/KVstore/SITree/storage/SIStorage.cpp +++ b/KVstore/SITree/storage/SIStorage.cpp @@ -427,7 +427,8 @@ SIStorage::readBstr(Bstr* _bp, unsigned* _next) fread(&len, sizeof(unsigned), 1, this->treefp); this->ReadAlign(_next); //this->request(len); - char* s = (char*)malloc(len); + //char* s = (char*)malloc(len); + char* s = new char[len]; _bp->setLen(len); for (i = 0; i + 4 < len; i += 4) { diff --git a/KVstore/SITree/storage/SIStorage.h b/KVstore/SITree/storage/SIStorage.h index 9f9b601..56f421e 100644 --- a/KVstore/SITree/storage/SIStorage.h +++ b/KVstore/SITree/storage/SIStorage.h @@ -13,6 +13,14 @@ #include "../node/SILeafNode.h" #include "../heap/SIHeap.h" +//TODO: whether to use heap or not, is a big question +//For single-query application, it seems that LRU list like VSTree is a better choice(no much cost in the buffer itself) +//But in multiple-queries case, things maybe different +//BETTER: +//add a heap position in node, to speed up the node-pointer searching +//lower the update times of heap, if the size is 128M, then each update is 27 at most +//if not update in time, then the heap maybe not be a heap, then why do we use heap? why not a simple array? + //It controls read, write, swap class SIStorage { diff --git a/KVstore/Tree.h b/KVstore/Tree.h index 528d2eb..2574b2f 100644 --- a/KVstore/Tree.h +++ b/KVstore/Tree.h @@ -1,4 +1,5 @@ //headers wrapper for all kinds of BPlusTree #include "ISTree/ISTree.h" -#include "SITree/SITree.h" \ No newline at end of file +#include "SITree/SITree.h" +#include "IVTree/IVTree.h" diff --git a/Main/gserver.cpp b/Main/gserver.cpp index f8f6248..d6ae3d2 100644 --- a/Main/gserver.cpp +++ b/Main/gserver.cpp @@ -11,9 +11,9 @@ using namespace std; -#define GSERVER_PORT_FILE "bin/.gserver_port" -#define GSERVER_PORT_SWAP "bin/.gserver_port.swap" -#define GSERVER_LOG "logs/gserver.log" +//#define GSERVER_PORT_FILE "bin/.gserver_port" +//#define GSERVER_PORT_SWAP "bin/.gserver_port.swap" +//#define GSERVER_LOG "logs/gserver.log" bool isOnlyProcess(const char* argv0); void checkSwap(); @@ -61,7 +61,7 @@ int main(int argc, char* argv[]) unsigned short port = Socket::DEFAULT_CONNECT_PORT; if (argc == 3) { if (!Util::isValidPort(string(argv[2]))) { - cout << "Invalid port: " << argv[2] << endl; + cerr << "Invalid port: " << argv[2] << endl; return -1; } else { @@ -70,9 +70,9 @@ int main(int argc, char* argv[]) } } if (!isOnlyProcess(argv[0])) { - ofstream out(GSERVER_PORT_SWAP, ios::out); + ofstream out(Util::gserver_port_swap.c_str()); if (!out) { - cout << "Failed to change port!" << endl; + cerr << "Failed to change port!" << endl; return -1; } out << port; @@ -80,9 +80,9 @@ int main(int argc, char* argv[]) cout << "Port will be changed to " << port << " after the current server stops or restarts." << endl; return 0; } - ofstream out(GSERVER_PORT_FILE, ios::out); + ofstream out(Util::gserver_port_file.c_str()); if (!out) { - cout << "Failed to change port!" << endl; + cerr << "Failed to change port!" << endl; return -1; } out << port; @@ -93,10 +93,15 @@ int main(int argc, char* argv[]) if (mode == "-s" || mode == "--start") { if (!isOnlyProcess(argv[0])) { - cout << "gServer already running!" << endl; + cerr << "gServer already running!" << endl; return -1; } if (startServer()) { + sleep(1); + if (isOnlyProcess(argv[0])) { + cerr << "Server stopped unexpectedly. Check for port conflicts!" << endl; + return -1; + } return 0; } else { @@ -106,7 +111,7 @@ int main(int argc, char* argv[]) if (mode == "-t" || mode == "--stop") { if (isOnlyProcess(argv[0])) { - cout << "gServer not running!" << endl; + cerr << "gServer not running!" << endl; return -1; } if (stopServer()) { @@ -119,7 +124,7 @@ int main(int argc, char* argv[]) if (mode == "-r" || mode == "--restart") { if (isOnlyProcess(argv[0])) { - cout << "gServer not running!" << endl; + cerr << "gServer not running!" << endl; return -1; } if (!stopServer()) { @@ -133,14 +138,14 @@ int main(int argc, char* argv[]) if (mode == "-P" || mode == "--printport") { unsigned short port = Socket::DEFAULT_CONNECT_PORT; - ifstream in(GSERVER_PORT_FILE); + ifstream in(Util::gserver_port_file.c_str()); if (in) { in >> port; in.close(); } cout << "Current connection port is " << port << '.' << endl; unsigned short portSwap = 0; - ifstream inSwap(GSERVER_PORT_SWAP); + ifstream inSwap(Util::gserver_port_swap.c_str()); if (inSwap) { inSwap >> portSwap; inSwap.close(); @@ -153,14 +158,14 @@ int main(int argc, char* argv[]) if (mode == "-k" || mode == "--kill") { if (isOnlyProcess(argv[0])) { - cout << "No process to kill!" << endl; + cerr << "No process to kill!" << endl; return -1; } execl("/usr/bin/killall", "killall", Util::getExactPath(argv[0]).c_str(), NULL); return 0; } - cout << "Invalid arguments! Input \"bin/gserver -h\" for help." << endl; + cerr << "Invalid arguments! Type \"bin/gserver -h\" for help." << endl; return -1; } @@ -169,38 +174,38 @@ bool isOnlyProcess(const char* argv0) { } void checkSwap() { - if (access(GSERVER_PORT_SWAP, 00) != 0) { + if (access(Util::gserver_port_swap.c_str(), 00) != 0) { return; } - ifstream in(GSERVER_PORT_SWAP, ios::in); + ifstream in(Util::gserver_port_swap.c_str()); if (!in) { - cout << "Failed in checkSwap(), port may not be changed." << endl; + cerr << "Failed in checkSwap(), port may not be changed." << endl; return; } unsigned short port; in >> port; in.close(); - ofstream out(GSERVER_PORT_FILE, ios::out); + ofstream out(Util::gserver_port_file.c_str()); if (!out) { - cout << "Failed in checkSwap(), port may not be changed." << endl; + cerr << "Failed in checkSwap(), port may not be changed." << endl; return; } out << port; out.close(); - chmod(GSERVER_PORT_FILE, 0644); - string cmd = string("rm ") + GSERVER_PORT_SWAP; + chmod(Util::gserver_port_file.c_str(), 0644); + string cmd = string("rm ") + Util::gserver_port_swap; system(cmd.c_str()); } bool startServer() { unsigned short port = Socket::DEFAULT_CONNECT_PORT; - ifstream in(GSERVER_PORT_FILE, ios::in); + ifstream in(Util::gserver_port_file.c_str()); if (!in) { - ofstream out(GSERVER_PORT_FILE, ios::out); + ofstream out(Util::gserver_port_file.c_str()); if (out) { out << port; out.close(); - chmod(GSERVER_PORT_FILE, 0644); + chmod(Util::gserver_port_file.c_str(), 0644); } } else { @@ -215,47 +220,75 @@ bool startServer() { if (!Util::dir_exist("logs")) { Util::create_dir("logs"); } - freopen(GSERVER_LOG, "a", stdout); - freopen(GSERVER_LOG, "a", stderr); - Server server(port); - if (!server.createConnection()) { - cout << Util::getTimeString() << "Failed to create connection at port " << port << '.' << endl; - return false; + freopen(Util::gserver_log.c_str(), "a", stdout); + freopen(Util::gserver_log.c_str(), "a", stderr); + + int status; + + while (true) { + fpid = fork(); + + // child, main process + if (fpid == 0) { + Server server(port); + if (!server.createConnection()) { + cerr << Util::getTimeString() << "Failed to create connection at port " << port << '.' << endl; + return false; + } + cout << Util::getTimeString() << "Server started at port " << port << '.' << endl; + server.listen(); + exit(0); + return true; + } + + // parent, deamon process + else if (fpid > 0) { + waitpid(fpid, &status, 0); + if (WIFEXITED(status)) { + exit(0); + return true; + } + cerr << Util::getTimeString() << "Server stopped abnormally, restarting server..." << endl; + } + + // fork failure + else { + cerr << Util::getTimeString() << "Failed to start server: deamon fork failure." << endl; + return false; + } } - cout << Util::getTimeString() << "Server started at port " << port << '.' << endl; - server.listen(); - exit(0); - return true; } + // parent else if (fpid > 0) { cout << "Server started at port " << port << '.' << endl; return true; } + // fork failure else { - cout << "Failed to start server at port " << port << '.' << endl; + cerr << "Failed to start server at port " << port << '.' << endl; return false; } } bool stopServer() { unsigned short port = Socket::DEFAULT_CONNECT_PORT; - ifstream in(GSERVER_PORT_FILE, ios::in); + ifstream in(Util::gserver_port_file.c_str()); if (in) { in >> port; in.close(); } Socket socket; if (!socket.create() || !socket.connect("127.0.0.1", port) || !socket.send("stop")) { - cout << "Failed to stop server at port " << port << '.' << endl; + cerr << "Failed to stop server at port " << port << '.' << endl; return false; } string recv_msg; socket.recv(recv_msg); socket.close(); if (recv_msg != "server stopped.") { - cout << "Failed to stop server at port " << port << '.' << endl; + cerr << "Failed to stop server at port " << port << '.' << endl; return false; } cout << "Server stopped at port " << port << '.' << endl; diff --git a/NOTES.md b/NOTES.md index 5484c9d..29c62b6 100644 --- a/NOTES.md +++ b/NOTES.md @@ -7,6 +7,11 @@ 在使用gserver时,不能在数据库没有unload时再用gbuild或其他命令修改数据库,仅限于C/S模式 将IRC聊天放到gstore文档上,freenode #gStore +storage中大量使用long类型,文件大小也可能达到64G,最好在64位机器上运行。 + +# 推广 + +必须建立一个官方网站,可以展示下团队、demo,需要建立社区/论坛并维护 另外要有桌面应用或者网页应用,以可视化的方式操作数据库,类似virtuoso和neo4j那种 server 118.89.115.42 gstore-pku.com @@ -86,14 +91,18 @@ http://blog.csdn.net/infoworld/article/details/8670951 要在单机支持到10亿triple,最坏情况下最多有20亿entity和20亿literal,目前的编号方式是不行的(int扩展为unsigned) 最好在单机100G内存上支持起freebase(2.5B triples)这个规模的数据集,就像jena和virtuoso一样,慢不要紧 -同时将ID的编码改为unsigned,无效标志-1改为最大值的宏, triple数目的类型也要改为unsigned -注意pre的ID还可以为-2,或者对于pre仍然用int,或者改函数的返回值为long long (还有一些没有用-1而是>=0) +type分支中query过程可能还有问题,需要修改Query/里面的类型,另外stringindex中也要修改,分界线已经是20亿且非法不再是-1 +remove signature.binary, 合并两个分支type value +vstree在build和query时可以用不同大小的缓存,来加速build过程 --- 将B+tree中叶节点的大的value分离出来,新建一套缓存,使用block机制,标记length为0表示未读取 类型bstr的length问题也需要解决(新建Istr类型) 如果把类型直接改成long long,空间开销一下子就上升了一倍 解决方法:对于ID2string,仍然用char*和unsigned,但对于s2xx p2xx o2xx,应该用long long*和unsigned来表示,这样最高可支持到40亿triple 注意:在B+树中是以long long*的方式存,但读出后应该全部换成unsigned*和unsigned搭配的方式(最长支持20亿个po对) +UBSTR: 类型bstr的length问题也需要解决 如果把类型直接改成long long,空间开销一下子就上升了一倍 +解决方法:对于ID2string,仍然用char*和unsigned,但对于s2xx p2xx o2xx,应该用unsigned long long*和unsigned来表示,这样最高可支持到40亿triple +(其实这个不是特别必要,很少会有这种情况,我们处理的triple数目一般限制在20亿,就算是type这种边,po对数也就是跟entity数目持平,很难达到5亿) --- 那么是否可以调整entity与literal的分界线,如果entity数目一般都比literal数目多的话 直接把literal从大到小编号,可在ID模块中指定顺序,这样每个Datbase模块应该有自己独特的分界线,其他模块用时也需要注意 @@ -469,6 +478,8 @@ build db error if triple num > 500M # BETTER +#### 添加数据访问层,数据范式和生成数据访问的源码 + #### 在BasicQuery.cpp中的encodeBasicQuery函数中发现有pre_id==-1时就可以直接中止查询,返回空值! #### 将KVstore模块中在堆中寻找Node*的操作改为用treap实现(或多存指针避开搜索?) @@ -519,6 +530,8 @@ http://www.oschina.net/question/188977_58777 # ADVICE +#### 考虑利用hdfs或者hbase,这样就可以利用各公司已有的数据库系统,但这是否会和已有的内外存交换冲突? + #### 数值型查询 实数域 [-bound, bound] 类型很难匹配,有必要单独编码么? 数据集中不应有范围 Query中编码过滤后还需验证 x>a, x=, <=, a时不直接取字符串,而是转换为数值并编码 @@ -608,3 +621,19 @@ http://www.hprd.org/download/ ## GIT USAGE https://git-scm.com/book/zh/v1/%E8%B5%B7%E6%AD%A5-%E5%88%9D%E6%AC%A1%E8%BF%90%E8%A1%8C-Git-%E5%89%8D%E7%9A%84%E9%85%8D%E7%BD%AE +#### how to commit a message + +package.json +http://www.json.cn/ +https://www.oschina.net/news/69705/git-commit-message-and-changelog-guide +https://sanwen8.cn/p/44eCof7.html + +1. commit one by one, a commit just do one thing + +2. place a empty line between head and body, body and footer + +3. the first letter of header should be in uppercase, and the header should not be too long, just a wonderful summary +FIX: ... ADD:... REF:... 代码重构 SUB:... + +4. each line should not be too long, add your real name and the influence in footer(maybe cause the code struct to change) + diff --git a/Query/BasicQuery.h b/Query/BasicQuery.h index 503ebd8..e97bb04 100644 --- a/Query/BasicQuery.h +++ b/Query/BasicQuery.h @@ -175,8 +175,8 @@ private: map selected_var_position; public: - static const int MAX_VAR_NUM = 10; - static const int MAX_PRE_VAR_NUM = 10; + static const int MAX_VAR_NUM = 20; + static const int MAX_PRE_VAR_NUM = 20; static const char NOT_JUST_SELECT = 'a'; static const char SELECT_VAR = 's'; diff --git a/Util/Bstr.cpp b/Util/Bstr.cpp index 2ddc7ae..c81ab29 100644 --- a/Util/Bstr.cpp +++ b/Util/Bstr.cpp @@ -17,13 +17,20 @@ Bstr::Bstr() this->str = NULL; } -Bstr::Bstr(const char* _str, unsigned _len) +Bstr::Bstr(const char* _str, unsigned _len, bool _nocopy) { //WARN: if need a string .please add '\0' in your own! this->length = _len; - //DEBUG:if copy memory? - //this->str = _str; //not valid:const char* -> char* - this->str = (char*)malloc(_len); + + //if(_nocopy) + //{ + //this->str = _str; //not valid:const char* -> char* + //return; + //} + + //NOTICE: we decide to use new/delete in global area + //this->str = (char*)malloc(_len); + this->str = new char[_len]; memcpy(this->str, _str, sizeof(char) * _len); //this->str[_len]='\0'; } @@ -116,6 +123,15 @@ Bstr::operator != (const Bstr& _bstr) unsigned Bstr::getLen() const { +//WARN: we should not include too complicate logic here!!!! + + //NOTICE: this is for VList + //if(this->isBstrLongList()) + ////if(this->str == NULL) + //{ + //return 0; + //} + return length; } @@ -146,15 +162,18 @@ Bstr::copy(const Bstr* _bp) this->length = _bp->getLen(); //DEBUG!!! //cerr<<"bstr length: "<length<str = (char*)malloc(this->length); - memcpy(this->str, _bp->getStr(), this->length); + + //this->str = (char*)malloc(this->length); + this->str = new char[this->length]; + memcpy(this->str, _bp->getStr(), sizeof(char) * this->length); } void Bstr::copy(const char* _str, unsigned _len) { this->length = _len; - this->str = (char*)malloc(this->length); + //this->str = (char*)malloc(this->length); + this->str = new char[this->length]; memcpy(this->str, _str, this->length); } @@ -168,7 +187,8 @@ Bstr::clear() void Bstr::release() { - free(this->str); //ok to be null, do nothing + //free(this->str); //ok to be null, do nothing + delete[] this->str; clear(); } @@ -203,3 +223,9 @@ Bstr::print(string s) const //#endif } +bool +Bstr::isBstrLongList() const +{ + return this->str == NULL; +} + diff --git a/Util/Bstr.h b/Util/Bstr.h index 1d93bc6..aaaf84a 100644 --- a/Util/Bstr.h +++ b/Util/Bstr.h @@ -18,13 +18,14 @@ class Bstr { private: char* str; //pointers consume 8 byte in 64-bit system + //TODO: the length maybe not needed unsigned length; public: Bstr(); //if copy memory, then use const char*, but slow //else, can not use const char* -> char* - Bstr(const char* _str, unsigned _len); + Bstr(const char* _str, unsigned _len, bool _nocopy = false); //Bstr(char* _str, unsigned _len); Bstr(const Bstr& _bstr); //Bstr& operate = (const Bstr& _bstr); @@ -47,6 +48,9 @@ public: //int write(FILE* _fp); ~Bstr(); void print(std::string s) const; //DEBUG + + //judge if this Bstr represent a long list value, and waiting to be each time on need + bool isBstrLongList() const; }; #endif // _UTIL_BSTR_H diff --git a/Util/Stream.cpp b/Util/Stream.cpp index a4003b1..cc612c1 100644 --- a/Util/Stream.cpp +++ b/Util/Stream.cpp @@ -58,7 +58,8 @@ Stream::Stream(std::vector& _keys, std::vector& _d this->record_size = new unsigned[this->colnum]; for(unsigned i = 0; i < this->colnum; ++i) { - this->record[i].setStr((char*)malloc(Util::TRANSFER_SIZE)); + char* tmptr = new char[Util::TRANSFER_SIZE]; + this->record[i].setStr(tmptr); this->record_size[i] = Util::TRANSFER_SIZE; } @@ -148,7 +149,8 @@ Stream::copyToRecord(const char* _str, unsigned _len, unsigned _idx) if(length + 1 > this->record_size[_idx]) { this->record[_idx].release(); - this->record[_idx].setStr((char*)malloc((length + 1) * sizeof(char))); + char* tmptr = new char[length+1]; + this->record[_idx].setStr(tmptr); this->record_size[_idx] = length + 1; //one more byte: convenient to add \0 } @@ -187,7 +189,8 @@ Stream::outputCache() { unsigned len; fread(&len, sizeof(unsigned), 1, this->tempfp); - char* p = (char*)malloc(len * sizeof(char)); + //char* p = (char*)malloc(len * sizeof(char)); + char* p = new char[len]; fread(p, sizeof(char), len, this->tempfp); bp[i].setLen(len); bp[i].setStr(p); @@ -320,13 +323,16 @@ Stream::read() //FILE* fp = (FILE*)(this->ans); for(unsigned i = 0; i < this->colnum; ++i) { - //BETTER:alloca and reuse the space in Bstr? + //BETTER:alloc and reuse the space in Bstr? unsigned len; fread(&len, sizeof(unsigned), 1, this->ansDisk); - char* s = (char*)calloc(len + 1, sizeof(char)); + //char* s = (char*)calloc(len + 1, sizeof(char)); + char* s = new char[len+1]; fread(s, sizeof(char), len, this->ansDisk); + s[len] = '\0'; this->copyToRecord(s, len, i); - free(s); + //free(s); + delete[] s; } } this->xpos++; @@ -420,7 +426,9 @@ Stream::mergeSort() #endif break; } - s = (char*)malloc(sizeof(char) * len); + + //s = (char*)malloc(sizeof(char) * len); + s = new char[len]; fread(s, sizeof(char), len, tp); bp[i].setLen(len); bp[i].setStr(s); diff --git a/Util/Util.cpp b/Util/Util.cpp index 1cb29aa..0ae091f 100644 --- a/Util/Util.cpp +++ b/Util/Util.cpp @@ -1,1621 +1,1639 @@ -/*============================================================================= -# Filename: Util.cpp -# Author: Bookug Lobert -# Mail: 1181955272@qq.com -# Last Modified: 2015-10-16 10:43 -# Description: -1. firstly written by liyouhuan, modified by zengli -2. achieve functions in Util.h -=============================================================================*/ - -#include "Util.h" - -using namespace std; - -//================================================================================================================== -//configure() to config the basic options of gStore system -//================================================================================================================== - -//string Util::profile = "../init.conf"; -string Util::profile = "init.conf"; - -map Util::global_config; - -//database home directory, which is an absolute path by config -//TODO:everywhere using database, the prefix should be it -//string Util::db_home = "."; - -//false:single true:distribute -//bool Util::gstore_mode = false; - -//control the debug information -//string Util::debug_level = "simple"; - -//database placed in which path -//string Util::db_path = "."; - -//the suffix to be added to database name -//string Util::db_suffix = ".db"; - -//the maxium buffer size assigned to gStore system -//string Util::buffer_maxium = "100"; //the unit is GB - -//the maxium thread num assigned to gStore system -//string Util::thread_maxium = "1000"; - -//if record logs in gStore system(to be recoverable or faster) -//string Util::operation_logs = "true"; - -//================================================================================================================== - -//NOTICE:used in Database, Join and Strategy -//int Util::triple_num = 0; -//int Util::pre_num = 0; -//int Util::entity_num = 0; -//int Util::literal_num = 0; - -//string Util::tmp_path = "../.tmp/"; -//string Util::debug_path = "../.debug/"; -string Util::tmp_path = ".tmp/"; -string Util::debug_path = ".debug/"; - -//QUERY: assign all in Util()? -//BETTER:assigned in KVstore, not one tree? -FILE* Util::debug_kvstore = NULL; //used by KVstore -FILE* Util::debug_database = NULL; //used by Database -FILE* Util::debug_vstree = NULL; //used by VSTree - -//set hash table -HashFunction Util::hash[] = { Util::simpleHash, Util::APHash, Util::BKDRHash, Util::DJBHash, Util::ELFHash, \ - Util::DEKHash, Util::BPHash, Util::FNVHash, Util::HFLPHash, Util::HFHash, Util::JSHash, \ - Util::PJWHash, Util::RSHash, Util::SDBMHash, Util::StrHash, Util::TianlHash, NULL}; - -//remove spaces in the left -char* -Util::l_trim(char* szOutput, const char* szInput) -{ - assert(szInput != NULL); - assert(szOutput != NULL); - assert(szOutput != szInput); - for (; *szInput != '\0' && isspace(*szInput); ++szInput); - return strcpy(szOutput, szInput); -} - -//remove spaces in the right -char* -Util::r_trim(char *szOutput, const char* szInput) -{ - char *p = NULL; - assert(szInput != NULL); - assert(szOutput != NULL); - assert(szOutput != szInput); - strcpy(szOutput, szInput); - for(p = szOutput + strlen(szOutput) - 1; p >= szOutput && isspace(*p); --p); - *(++p) = '\0'; - return szOutput; -} - -//remove spaces in the two sides -char* -Util::a_trim(char * szOutput, const char * szInput) -{ - char *p = NULL; - assert(szInput != NULL); - assert(szOutput != NULL); - l_trim(szOutput, szInput); - for (p = szOutput + strlen(szOutput) - 1; p >= szOutput && isspace(*p); --p); - *(++p) = '\0'; - return szOutput; -} - -bool -Util::configure() -{ - const unsigned len = 505; - char *buf, *c; - char buf_i[len], buf_o[len]; - FILE *fp = NULL; - char keyname[len]; - char keyval[len]; - - //initialize the settings - Util::global_config["gstore_mode"] = "single"; - //NOTICE+BETTER+TODO:use macro is better to avoid too many judging on this variable(add a DEBUG macro at the outer) - Util::global_config["debug_level"] = "simple"; - Util::global_config["db_home"] = "."; - Util::global_config["db_suffix"] = ".db"; - Util::global_config["buffer_maxium"] = "100"; - Util::global_config["thread_maxium"] = "1000"; - //TODO:to be recoverable - Util::global_config["operation_logs"] = "true"; - -#ifdef DEBUG - fprintf(stderr, "profile: %s\n", profile.c_str()); -#endif - if((fp = fopen(profile.c_str(), "r")) == NULL) //NOTICE: this is not a binary file - { -#ifdef DEBUG - fprintf(stderr, "openfile [%s] error [%s]\n", profile.c_str(), strerror(errno)); -#endif - return false; - } - fseek(fp, 0, SEEK_SET); - - while(!feof(fp) && fgets(buf_i, len, fp) != NULL) - { - //fprintf(stderr, "buffer: %s\n", buf_i); - Util::l_trim(buf_o, buf_i); - if(strlen(buf_o) <= 0) - continue; - buf = NULL; - buf = buf_o; - if(buf[0] == '#') - { - continue; - } - else if(buf[0] == '[') - { - continue; - } - if((c = (char*)strchr(buf, '=')) == NULL) - continue; - memset(keyname, 0, sizeof(keyname)); - sscanf(buf, "%[^=|^ |^\t]", keyname); -#ifdef DEBUG - //fprintf(stderr, "keyname: %s\n", keyname); -#endif - sscanf(++c, "%[^\n]", keyval); - char *keyval_o = (char *)calloc(strlen(keyval) + 1, sizeof(char)); - if(keyval_o != NULL) - { - Util::a_trim(keyval_o, keyval); -#ifdef DEBUG - //fprintf(stderr, "keyval: %s\n", keyval_o); -#endif - if(keyval_o && strlen(keyval_o) > 0) - { - //strcpy(keyval, keyval_o); - global_config[string(keyname)] = string(keyval_o); - } - xfree(keyval_o); - } - } - - fclose(fp); - //display all settings here - cout<<"the current settings are as below: "<::iterator it = global_config.begin(); it != global_config.end(); ++it) - { - cout<first<<" : "<second<> 2)); - } - - return (key & 0x7FFFFFFF); -} - -unsigned -Util::PJWHash(const char *_str) -{ - unsigned int bits_in_unsigned_int = (unsigned int)(sizeof(unsigned int) * 8); - unsigned int three_quarters = (unsigned int)((bits_in_unsigned_int * 3) / 4); - unsigned int one_eighth = (unsigned int)(bits_in_unsigned_int / 8); - - unsigned int high_bits = (unsigned int)(0xFFFFFFFF) << (bits_in_unsigned_int - one_eighth); - unsigned int key = 0; - unsigned int test = 0; - - while (*_str) - { - key = (key << one_eighth) + (*_str++); - if ((test = key & high_bits) != 0) - { - key = ((key ^ (test >> three_quarters)) & (~high_bits)); - } - } - - return (key & 0x7FFFFFFF); -} - -unsigned -Util::ELFHash(const char *_str) -{ - unsigned int key = 0; - unsigned int x = 0; - - while (*_str) - { - key = (key << 4) + (*_str++); - if ((x = key & 0xF0000000L) != 0) - { - key ^= (x >> 24); - key &= ~x; - } - } - - return (key & 0x7FFFFFFF); -} - -unsigned -Util::SDBMHash(const char *_str) -{ - unsigned int key = 0; - - while (*_str) - { - key = (*_str++) + (key << 6) + (key << 16) - key; - } - - return (key & 0x7FFFFFFF); -} - -unsigned -Util::DJBHash(const char *_str) -{ - unsigned int key = 5381; - while (*_str) { - key += (key << 5) + (*_str++); - } - return (key & 0x7FFFFFFF); -} - -unsigned -Util::APHash(const char *_str) -{ - unsigned int key = 0; - int i; - - for (i=0; *_str; i++) - { - if ((i & 1) == 0) - { - key ^= ((key << 7) ^ (*_str++) ^ (key >> 3)); - } - else - { - key ^= (~((key << 11) ^ (*_str++) ^ (key >> 5))); - } - } - - return (key & 0x7FFFFFFF); -} - -unsigned -Util::DEKHash(const char* _str) -{ - unsigned int hash = strlen(_str); - for(; *_str; _str++) - { - hash = ((hash << 5) ^ (hash >> 27)) ^ (*_str); - } - return hash; -} - -unsigned -Util::BPHash(const char* _str) -{ - unsigned int hash = 0; - for(; *_str; _str++) - { - hash = hash << 7 ^ (*_str); - } - - return hash; -} - -unsigned -Util::FNVHash(const char* _str) -{ - const unsigned int fnv_prime = 0x811C9DC5; - unsigned int hash = 0; - - for(; *_str; _str++) - { - hash *= fnv_prime; - hash ^= (*_str); - } - - return hash; -} - -unsigned -Util::HFLPHash(const char* _str) -{ - unsigned int n = 0; - char* b = (char*)&n; - unsigned int len = strlen(_str); - for(unsigned i = 0; i < len; ++i) - { - b[i%4] ^= _str[i]; - } - return n%len; -} - -unsigned -Util::HFHash(const char* _str) -{ - int result=0; - const char* ptr = _str; - int c; - unsigned int len = strlen(_str); - for(int i=1; (c=*ptr++); i++) - result += c*3*i; - if (result<0) - result = -result; - return result%len; -} - -unsigned -Util::StrHash(const char* _str) -{ - register unsigned int h; - register unsigned char *p; - for(h = 0, p = (unsigned char *)_str; *p; p++) - { - h = 31 * h + *p; - } - - return h; - -} - -unsigned -Util::TianlHash(const char* _str) -{ - unsigned long urlHashValue=0; - int ilength=strlen(_str); - int i; - unsigned char ucChar; - if(!ilength) { - return 0; - } - if(ilength<=256) { - urlHashValue=16777216*(ilength-1); - } else { - urlHashValue = 42781900080; - } - if(ilength<=96) { - for(i=1; i<=ilength; i++) { - ucChar = _str[i-1]; - if(ucChar<='Z'&&ucChar>='A') { - ucChar=ucChar+32; - } - urlHashValue+=(3*i*ucChar*ucChar+5*i*ucChar+7*i+11*ucChar)%1677216; - } - } else { - for(i=1; i<=96; i++) - { - ucChar = _str[i+ilength-96-1]; - if(ucChar<='Z'&&ucChar>='A') - { - ucChar=ucChar+32; - } - urlHashValue+=(3*i*ucChar*ucChar+5*i*ucChar+7*i+11*ucChar)%1677216; - } - } - - return urlHashValue; -} - -//NOTICE: the time of log() and sqrt() in C can be seen as constant - -//NOTICE:_b must >= 1 -double -Util::logarithm(double _a, double _b) -{ - //REFRENCE: http://blog.csdn.net/liyuanbhu/article/details/8997850 - //a>0 != 1; b>0 (b>=2 using log/log10/change, 1= 2 - return log(_b) / under; - return -1.0; -} - -void -Util::intersect(unsigned*& _id_list, unsigned& _id_list_len, const unsigned* _list1, unsigned _len1, const unsigned* _list2, unsigned _len2) -{ - vector res; - //cout<<"intersect prevar: "<<_len1<<" "<<_len2<0 m=nk(02 - //k<=k0 binary search; k>k0 intersect - int method = -1; //0: intersect 1: search in list1 2: search in list2 - unsigned n = _len1; - double k = 0; - if(n < _len2) - { - k = (double)n / (double)_len2; - n = _len2; - method = 2; - } - else - { - k = (double)_len2 / (double)n; - method = 1; - } - if(n <= 2) - method = 0; - else - { - double limit = Util::logarithm(n/2, 2); - if(k > limit) - method = 0; - } - - switch(method) - { - case 0: - { //this bracket is needed if vars are defined in case - unsigned id_i = 0; - unsigned num = _len1; - for(unsigned i = 0; i < num; ++i) - { - unsigned can_id = _list1[i]; - while((id_i < _len2) && (_list2[id_i] < can_id)) - { - id_i ++; - } - - if(id_i == _len2) - { - break; - } - - if(can_id == _list2[id_i]) - { - res.push_back(can_id); - id_i ++; - } - } - break; - } - case 1: - { - for(unsigned i = 0; i < _len2; ++i) - { - if(Util::bsearch_int_uporder(_list2[i], _list1, _len1) != INVALID) - res.push_back(_list2[i]); - } - break; - } - case 2: - { - unsigned m = _len1, i; - for(i = 0; i < m; ++i) - { - unsigned t = _list1[i]; - if(Util::bsearch_int_uporder(t, _list2, _len2) != INVALID) - res.push_back(t); - } - break; - } - default: - cerr << "no such method in Util::intersect()" << endl; - break; - } - - _id_list_len = res.size(); - - if (_id_list_len == 0) { - _id_list = NULL; - } - else { - _id_list = new unsigned[_id_list_len]; - for (unsigned i = 0; i < _id_list_len; ++i) - _id_list[i] = res[i]; - } - delete[] _list1; - delete[] _list2; -} - -int -Util::compIIpair(int _a1, int _b1, int _a2, int _b2) -{ - if(_a1 == _a2 && _b1 == _b2) - return 0; - else if(_a1 < _a2 || (_a1 == _a2 && _b1 <= _b2)) - return -1; - else - return 1; -} - -bool -Util::isValidPort(string str) -{ - //valid port number: 0 - 65535 - if(str.length() < 1 || str.length() > 5) - { - return false; - } - - unsigned i; - for(i = 0; i < str.length(); i++) - { - if(str[i] < '0' || str[i] > '9') - { - return false; - } - } - - int port = Util::string2int(str); - if(port < 0 || port>65535) - { - return false; - } - - return true; -} - -bool -Util::isValidIP(string str) -{ - if(str == "localhost") - { - return true; - } - return (Util::isValidIPV4(str) || Util::isValidIPV6(str)); -} - -bool -Util::isValidIPV4(string str) -{ - regex_t reg; - char pattern[] = "^(([01]?[0-9][0-9]?|2[0-4][0-9]|25[0-5])\\.){3}([01]?[0-9][0-9]?|2[0-4][0-9]|25[0-5])$"; - regcomp(®, pattern, REG_EXTENDED | REG_NOSUB); - regmatch_t pm[1]; - int status = regexec(®, str.c_str(), 1, pm, 0); - regfree(®); - if(status == REG_NOMATCH) - { - return false; - } - return true; -} - -bool -Util::isValidIPV6(string str) -{ - //TO BE IMPLEMENTED - return false; -} - -string -Util::getTimeString() { - static const int max = 20; // max length of time string - char time_str[max]; - time_t timep; - time(&timep); - strftime(time_str, max, "%Y%m%d %H:%M:%S\t", gmtime(&timep)); - return string(time_str); -} - -string -Util::node2string(const char* _raw_str) { - string _output; - unsigned _first_quote = 0; - unsigned _last_quote = 0; - bool _has_quote = false; - for (unsigned i = 0; _raw_str[i] != '\0'; i++) { - if (_raw_str[i] == '\"') { - if (!_has_quote) { - _first_quote = i; - _last_quote = i; - _has_quote = true; - } - else { - _last_quote = i; - } - } - } - if (_first_quote==_last_quote) { - _output += _raw_str; - return _output; - } - for (unsigned i = 0; i <= _first_quote; i++) { - _output += _raw_str[i]; - } - for (unsigned i = _first_quote + 1; i < _last_quote; i++) { - switch (_raw_str[i]) { - case '\n': - _output += "\\n"; - break; - case '\r': - _output += "\\r"; - break; - case '\t': - _output += "\\t"; - break; - case '\"': - _output += "\\\""; - break; - case '\\': - _output += "\\\\"; - break; - default: - _output += _raw_str[i]; - } - } - for (unsigned i = _last_quote; _raw_str[i] != 0; i++) { - _output += _raw_str[i]; - } - return _output; -} - -int -Util::_spo_cmp(const void* _a, const void* _b) -{ - int** _p_a = (int**)_a; - int** _p_b = (int**)_b; - - int _sub_id_a = (*_p_a)[0]; - int _sub_id_b = (*_p_b)[0]; - if (_sub_id_a != _sub_id_b) { - return _sub_id_a - _sub_id_b; - } - - int _pre_id_a = (*_p_a)[1]; - int _pre_id_b = (*_p_b)[1]; - if (_pre_id_a != _pre_id_b) { - return _pre_id_a - _pre_id_b; - } - - int _obj_id_a = (*_p_a)[2]; - int _obj_id_b = (*_p_b)[2]; - if (_obj_id_a != _obj_id_b) { - return _obj_id_a - _obj_id_b; - } - - return 0; -} - -int -Util::_ops_cmp(const void* _a, const void* _b) -{ - int** _p_a = (int**)_a; - int** _p_b = (int**)_b; - - int _obj_id_a = (*_p_a)[2]; - int _obj_id_b = (*_p_b)[2]; - if (_obj_id_a != _obj_id_b) { - return _obj_id_a - _obj_id_b; - } - - int _pre_id_a = (*_p_a)[1]; - int _pre_id_b = (*_p_b)[1]; - if (_pre_id_a != _pre_id_b) { - return _pre_id_a - _pre_id_b; - } - - int _sub_id_a = (*_p_a)[0]; - int _sub_id_b = (*_p_b)[0]; - if (_sub_id_a != _sub_id_b) { - return _sub_id_a - _sub_id_b; - } - - return 0; -} - -int -Util::_pso_cmp(const void* _a, const void* _b) -{ - int** _p_a = (int**)_a; - int** _p_b = (int**)_b; - - int _pre_id_a = (*_p_a)[1]; - int _pre_id_b = (*_p_b)[1]; - if (_pre_id_a != _pre_id_b) { - return _pre_id_a - _pre_id_b; - } - - int _sub_id_a = (*_p_a)[0]; - int _sub_id_b = (*_p_b)[0]; - if (_sub_id_a != _sub_id_b) { - return _sub_id_a - _sub_id_b; - } - - int _obj_id_a = (*_p_a)[2]; - int _obj_id_b = (*_p_b)[2]; - if (_obj_id_a != _obj_id_b) { - return _obj_id_a - _obj_id_b; - } - - return 0; -} - -bool -Util::spo_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b) -{ - if(a.subid != b.subid) - { - return a.subid < b.subid; - } - - if(a.preid != b.preid) - { - return a.preid < b.preid; - } - - if(a.objid != b.objid) - { - return a.objid < b.objid; - } - - //all are equal, no need to sort this two - return false; -} - -bool -Util::ops_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b) -{ - if(a.objid != b.objid) - { - return a.objid < b.objid; - } - - if(a.preid != b.preid) - { - return a.preid < b.preid; - } - - if(a.subid != b.subid) - { - return a.subid < b.subid; - } - - //all are equal, no need to sort this two - return false; -} - -bool -Util::pso_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b) -{ - if(a.preid != b.preid) - { - return a.preid < b.preid; - } - - if(a.subid != b.subid) - { - return a.subid < b.subid; - } - - if(a.objid != b.objid) - { - return a.objid < b.objid; - } - - //all are equal, no need to sort this two - return false; -} - -void -Util::empty_file(const char* _fname) -{ - FILE * fp; - //NOTICE: if exist, then overwrite and create a empty file - fp = fopen(_fname, "w"); - if(fp == NULL) - { - printf("do empty file %s failed\n", _fname); - } - else - { - fclose(fp); - } -} - +/*============================================================================= +# Filename: Util.cpp +# Author: Bookug Lobert +# Mail: 1181955272@qq.com +# Last Modified: 2015-10-16 10:43 +# Description: +1. firstly written by liyouhuan, modified by zengli +2. achieve functions in Util.h +=============================================================================*/ + +#include "Util.h" + +using namespace std; + +//================================================================================================================== +//configure() to config the basic options of gStore system +//================================================================================================================== + +//string Util::profile = "../init.conf"; +string Util::profile = "init.conf"; + +map Util::global_config; + +//database home directory, which is an absolute path by config +//TODO:everywhere using database, the prefix should be it +//string Util::db_home = "."; + +//false:single true:distribute +//bool Util::gstore_mode = false; + +//control the debug information +//string Util::debug_level = "simple"; + +//database placed in which path +//string Util::db_path = "."; + +//the suffix to be added to database name +//string Util::db_suffix = ".db"; + +//the maxium buffer size assigned to gStore system +//string Util::buffer_maxium = "100"; //the unit is GB + +//the maxium thread num assigned to gStore system +//string Util::thread_maxium = "1000"; + +//if record logs in gStore system(to be recoverable or faster) +//string Util::operation_logs = "true"; + +//================================================================================================================== + +string Util::gserver_port_file = "bin/.gserver_port"; +string Util::gserver_port_swap = "bin/.gserver_port.swap"; +string Util::gserver_log = "logs/gserver.log"; + +//NOTICE:used in Database, Join and Strategy +//int Util::triple_num = 0; +//int Util::pre_num = 0; +//int Util::entity_num = 0; +//int Util::literal_num = 0; + +//string Util::tmp_path = "../.tmp/"; +//string Util::debug_path = "../.debug/"; +string Util::tmp_path = ".tmp/"; +string Util::debug_path = ".debug/"; + +//QUERY: assign all in Util()? +//BETTER:assigned in KVstore, not one tree? +FILE* Util::debug_kvstore = NULL; //used by KVstore +FILE* Util::debug_database = NULL; //used by Database +FILE* Util::debug_vstree = NULL; //used by VSTree + +//set hash table +HashFunction Util::hash[] = { Util::simpleHash, Util::APHash, Util::BKDRHash, Util::DJBHash, Util::ELFHash, \ + Util::DEKHash, Util::BPHash, Util::FNVHash, Util::HFLPHash, Util::HFHash, Util::JSHash, \ + Util::PJWHash, Util::RSHash, Util::SDBMHash, Util::StrHash, Util::TianlHash, NULL}; + +//remove spaces in the left +char* +Util::l_trim(char* szOutput, const char* szInput) +{ + assert(szInput != NULL); + assert(szOutput != NULL); + assert(szOutput != szInput); + for (; *szInput != '\0' && isspace(*szInput); ++szInput); + return strcpy(szOutput, szInput); +} + +//remove spaces in the right +char* +Util::r_trim(char *szOutput, const char* szInput) +{ + char *p = NULL; + assert(szInput != NULL); + assert(szOutput != NULL); + assert(szOutput != szInput); + strcpy(szOutput, szInput); + for(p = szOutput + strlen(szOutput) - 1; p >= szOutput && isspace(*p); --p); + *(++p) = '\0'; + return szOutput; +} + +//remove spaces in the two sides +char* +Util::a_trim(char * szOutput, const char * szInput) +{ + char *p = NULL; + assert(szInput != NULL); + assert(szOutput != NULL); + l_trim(szOutput, szInput); + for (p = szOutput + strlen(szOutput) - 1; p >= szOutput && isspace(*p); --p); + *(++p) = '\0'; + return szOutput; +} + +bool +Util::configure() +{ + const unsigned len = 505; + char *buf, *c; + char buf_i[len], buf_o[len]; + FILE *fp = NULL; + char keyname[len]; + char keyval[len]; + + //initialize the settings + Util::global_config["gstore_mode"] = "single"; + //NOTICE+BETTER+TODO:use macro is better to avoid too many judging on this variable(add a DEBUG macro at the outer) + Util::global_config["debug_level"] = "simple"; + Util::global_config["db_home"] = "."; + Util::global_config["db_suffix"] = ".db"; + Util::global_config["buffer_maxium"] = "100"; + Util::global_config["thread_maxium"] = "1000"; + //TODO:to be recoverable + Util::global_config["operation_logs"] = "true"; + +#ifdef DEBUG + fprintf(stderr, "profile: %s\n", profile.c_str()); +#endif + if((fp = fopen(profile.c_str(), "r")) == NULL) //NOTICE: this is not a binary file + { +#ifdef DEBUG + fprintf(stderr, "openfile [%s] error [%s]\n", profile.c_str(), strerror(errno)); +#endif + return false; + } + fseek(fp, 0, SEEK_SET); + + while(!feof(fp) && fgets(buf_i, len, fp) != NULL) + { + //fprintf(stderr, "buffer: %s\n", buf_i); + Util::l_trim(buf_o, buf_i); + if(strlen(buf_o) <= 0) + continue; + buf = NULL; + buf = buf_o; + if(buf[0] == '#') + { + continue; + } + else if(buf[0] == '[') + { + continue; + } + if((c = (char*)strchr(buf, '=')) == NULL) + continue; + memset(keyname, 0, sizeof(keyname)); + sscanf(buf, "%[^=|^ |^\t]", keyname); +#ifdef DEBUG + //fprintf(stderr, "keyname: %s\n", keyname); +#endif + sscanf(++c, "%[^\n]", keyval); + char *keyval_o = (char *)calloc(strlen(keyval) + 1, sizeof(char)); + if(keyval_o != NULL) + { + Util::a_trim(keyval_o, keyval); +#ifdef DEBUG + //fprintf(stderr, "keyval: %s\n", keyval_o); +#endif + if(keyval_o && strlen(keyval_o) > 0) + { + //strcpy(keyval, keyval_o); + global_config[string(keyname)] = string(keyval_o); + } + xfree(keyval_o); + } + } + + fclose(fp); + //display all settings here + cout<<"the current settings are as below: "<::iterator it = global_config.begin(); it != global_config.end(); ++it) + { + cout<first<<" : "<second<> 2)); + } + + return (key & 0x7FFFFFFF); +} + +unsigned +Util::PJWHash(const char *_str) +{ + unsigned int bits_in_unsigned_int = (unsigned int)(sizeof(unsigned int) * 8); + unsigned int three_quarters = (unsigned int)((bits_in_unsigned_int * 3) / 4); + unsigned int one_eighth = (unsigned int)(bits_in_unsigned_int / 8); + + unsigned int high_bits = (unsigned int)(0xFFFFFFFF) << (bits_in_unsigned_int - one_eighth); + unsigned int key = 0; + unsigned int test = 0; + + while (*_str) + { + key = (key << one_eighth) + (*_str++); + if ((test = key & high_bits) != 0) + { + key = ((key ^ (test >> three_quarters)) & (~high_bits)); + } + } + + return (key & 0x7FFFFFFF); +} + +unsigned +Util::ELFHash(const char *_str) +{ + unsigned int key = 0; + unsigned int x = 0; + + while (*_str) + { + key = (key << 4) + (*_str++); + if ((x = key & 0xF0000000L) != 0) + { + key ^= (x >> 24); + key &= ~x; + } + } + + return (key & 0x7FFFFFFF); +} + +unsigned +Util::SDBMHash(const char *_str) +{ + unsigned int key = 0; + + while (*_str) + { + key = (*_str++) + (key << 6) + (key << 16) - key; + } + + return (key & 0x7FFFFFFF); +} + +unsigned +Util::DJBHash(const char *_str) +{ + unsigned int key = 5381; + while (*_str) { + key += (key << 5) + (*_str++); + } + return (key & 0x7FFFFFFF); +} + +unsigned +Util::APHash(const char *_str) +{ + unsigned int key = 0; + int i; + + for (i=0; *_str; i++) + { + if ((i & 1) == 0) + { + key ^= ((key << 7) ^ (*_str++) ^ (key >> 3)); + } + else + { + key ^= (~((key << 11) ^ (*_str++) ^ (key >> 5))); + } + } + + return (key & 0x7FFFFFFF); +} + +unsigned +Util::DEKHash(const char* _str) +{ + unsigned int hash = strlen(_str); + for(; *_str; _str++) + { + hash = ((hash << 5) ^ (hash >> 27)) ^ (*_str); + } + return hash; +} + +unsigned +Util::BPHash(const char* _str) +{ + unsigned int hash = 0; + for(; *_str; _str++) + { + hash = hash << 7 ^ (*_str); + } + + return hash; +} + +unsigned +Util::FNVHash(const char* _str) +{ + const unsigned int fnv_prime = 0x811C9DC5; + unsigned int hash = 0; + + for(; *_str; _str++) + { + hash *= fnv_prime; + hash ^= (*_str); + } + + return hash; +} + +unsigned +Util::HFLPHash(const char* _str) +{ + unsigned int n = 0; + char* b = (char*)&n; + unsigned int len = strlen(_str); + for(unsigned i = 0; i < len; ++i) + { + b[i%4] ^= _str[i]; + } + return n%len; +} + +unsigned +Util::HFHash(const char* _str) +{ + int result=0; + const char* ptr = _str; + int c; + unsigned int len = strlen(_str); + for(int i=1; (c=*ptr++); i++) + result += c*3*i; + if (result<0) + result = -result; + return result%len; +} + +unsigned +Util::StrHash(const char* _str) +{ + register unsigned int h; + register unsigned char *p; + for(h = 0, p = (unsigned char *)_str; *p; p++) + { + h = 31 * h + *p; + } + + return h; + +} + +unsigned +Util::TianlHash(const char* _str) +{ + unsigned long urlHashValue=0; + int ilength=strlen(_str); + int i; + unsigned char ucChar; + if(!ilength) { + return 0; + } + if(ilength<=256) { + urlHashValue=16777216*(ilength-1); + } else { + urlHashValue = 42781900080; + } + if(ilength<=96) { + for(i=1; i<=ilength; i++) { + ucChar = _str[i-1]; + if(ucChar<='Z'&&ucChar>='A') { + ucChar=ucChar+32; + } + urlHashValue+=(3*i*ucChar*ucChar+5*i*ucChar+7*i+11*ucChar)%1677216; + } + } else { + for(i=1; i<=96; i++) + { + ucChar = _str[i+ilength-96-1]; + if(ucChar<='Z'&&ucChar>='A') + { + ucChar=ucChar+32; + } + urlHashValue+=(3*i*ucChar*ucChar+5*i*ucChar+7*i+11*ucChar)%1677216; + } + } + + return urlHashValue; +} + +//NOTICE: the time of log() and sqrt() in C can be seen as constant + +//NOTICE:_b must >= 1 +double +Util::logarithm(double _a, double _b) +{ + //REFRENCE: http://blog.csdn.net/liyuanbhu/article/details/8997850 + //a>0 != 1; b>0 (b>=2 using log/log10/change, 1= 2 + return log(_b) / under; + return -1.0; +} + +void +Util::intersect(unsigned*& _id_list, unsigned& _id_list_len, const unsigned* _list1, unsigned _len1, const unsigned* _list2, unsigned _len2) +{ + vector res; + //cout<<"intersect prevar: "<<_len1<<" "<<_len2<0 m=nk(02 + //k<=k0 binary search; k>k0 intersect + int method = -1; //0: intersect 1: search in list1 2: search in list2 + unsigned n = _len1; + double k = 0; + if(n < _len2) + { + k = (double)n / (double)_len2; + n = _len2; + method = 2; + } + else + { + k = (double)_len2 / (double)n; + method = 1; + } + if(n <= 2) + method = 0; + else + { + double limit = Util::logarithm(n/2, 2); + if(k > limit) + method = 0; + } + + switch(method) + { + case 0: + { //this bracket is needed if vars are defined in case + unsigned id_i = 0; + unsigned num = _len1; + for(unsigned i = 0; i < num; ++i) + { + unsigned can_id = _list1[i]; + while((id_i < _len2) && (_list2[id_i] < can_id)) + { + id_i ++; + } + + if(id_i == _len2) + { + break; + } + + if(can_id == _list2[id_i]) + { + res.push_back(can_id); + id_i ++; + } + } + break; + } + case 1: + { + for(unsigned i = 0; i < _len2; ++i) + { + if(Util::bsearch_int_uporder(_list2[i], _list1, _len1) != INVALID) + res.push_back(_list2[i]); + } + break; + } + case 2: + { + unsigned m = _len1, i; + for(i = 0; i < m; ++i) + { + unsigned t = _list1[i]; + if(Util::bsearch_int_uporder(t, _list2, _len2) != INVALID) + res.push_back(t); + } + break; + } + default: + cerr << "no such method in Util::intersect()" << endl; + break; + } + + _id_list_len = res.size(); + + if (_id_list_len == 0) { + _id_list = NULL; + } + else { + _id_list = new unsigned[_id_list_len]; + for (unsigned i = 0; i < _id_list_len; ++i) + _id_list[i] = res[i]; + } + delete[] _list1; + delete[] _list2; +} + +int +Util::compIIpair(int _a1, int _b1, int _a2, int _b2) +{ + if(_a1 == _a2 && _b1 == _b2) + return 0; + else if(_a1 < _a2 || (_a1 == _a2 && _b1 <= _b2)) + return -1; + else + return 1; +} + +bool +Util::isValidPort(string str) +{ + //valid port number: 0 - 65535 + if(str.length() < 1 || str.length() > 5) + { + return false; + } + + unsigned i; + for(i = 0; i < str.length(); i++) + { + if(str[i] < '0' || str[i] > '9') + { + return false; + } + } + + int port = Util::string2int(str); + if(port < 0 || port>65535) + { + return false; + } + + return true; +} + +bool +Util::isValidIP(string str) +{ + if(str == "localhost") + { + return true; + } + return (Util::isValidIPV4(str) || Util::isValidIPV6(str)); +} + +bool +Util::isValidIPV4(string str) +{ + regex_t reg; + char pattern[] = "^(([01]?[0-9][0-9]?|2[0-4][0-9]|25[0-5])\\.){3}([01]?[0-9][0-9]?|2[0-4][0-9]|25[0-5])$"; + regcomp(®, pattern, REG_EXTENDED | REG_NOSUB); + regmatch_t pm[1]; + int status = regexec(®, str.c_str(), 1, pm, 0); + regfree(®); + if(status == REG_NOMATCH) + { + return false; + } + return true; +} + +bool +Util::isValidIPV6(string str) +{ + //TO BE IMPLEMENTED + return false; +} + +string +Util::getTimeString() { + static const int max = 20; // max length of time string + char time_str[max]; + time_t timep; + time(&timep); + strftime(time_str, max, "%Y%m%d %H:%M:%S\t", gmtime(&timep)); + return string(time_str); +} + +string +Util::node2string(const char* _raw_str) { + string _output; + unsigned _first_quote = 0; + unsigned _last_quote = 0; + bool _has_quote = false; + for (unsigned i = 0; _raw_str[i] != '\0'; i++) { + if (_raw_str[i] == '\"') { + if (!_has_quote) { + _first_quote = i; + _last_quote = i; + _has_quote = true; + } + else { + _last_quote = i; + } + } + } + if (_first_quote==_last_quote) { + _output += _raw_str; + return _output; + } + for (unsigned i = 0; i <= _first_quote; i++) { + _output += _raw_str[i]; + } + for (unsigned i = _first_quote + 1; i < _last_quote; i++) { + switch (_raw_str[i]) { + case '\n': + _output += "\\n"; + break; + case '\r': + _output += "\\r"; + break; + case '\t': + _output += "\\t"; + break; + case '\"': + _output += "\\\""; + break; + case '\\': + _output += "\\\\"; + break; + default: + _output += _raw_str[i]; + } + } + for (unsigned i = _last_quote; _raw_str[i] != 0; i++) { + _output += _raw_str[i]; + } + return _output; +} + +int +Util::_spo_cmp(const void* _a, const void* _b) +{ + int** _p_a = (int**)_a; + int** _p_b = (int**)_b; + + int _sub_id_a = (*_p_a)[0]; + int _sub_id_b = (*_p_b)[0]; + if (_sub_id_a != _sub_id_b) { + return _sub_id_a - _sub_id_b; + } + + int _pre_id_a = (*_p_a)[1]; + int _pre_id_b = (*_p_b)[1]; + if (_pre_id_a != _pre_id_b) { + return _pre_id_a - _pre_id_b; + } + + int _obj_id_a = (*_p_a)[2]; + int _obj_id_b = (*_p_b)[2]; + if (_obj_id_a != _obj_id_b) { + return _obj_id_a - _obj_id_b; + } + + return 0; +} + +int +Util::_ops_cmp(const void* _a, const void* _b) +{ + int** _p_a = (int**)_a; + int** _p_b = (int**)_b; + + int _obj_id_a = (*_p_a)[2]; + int _obj_id_b = (*_p_b)[2]; + if (_obj_id_a != _obj_id_b) { + return _obj_id_a - _obj_id_b; + } + + int _pre_id_a = (*_p_a)[1]; + int _pre_id_b = (*_p_b)[1]; + if (_pre_id_a != _pre_id_b) { + return _pre_id_a - _pre_id_b; + } + + int _sub_id_a = (*_p_a)[0]; + int _sub_id_b = (*_p_b)[0]; + if (_sub_id_a != _sub_id_b) { + return _sub_id_a - _sub_id_b; + } + + return 0; +} + +int +Util::_pso_cmp(const void* _a, const void* _b) +{ + int** _p_a = (int**)_a; + int** _p_b = (int**)_b; + + int _pre_id_a = (*_p_a)[1]; + int _pre_id_b = (*_p_b)[1]; + if (_pre_id_a != _pre_id_b) { + return _pre_id_a - _pre_id_b; + } + + int _sub_id_a = (*_p_a)[0]; + int _sub_id_b = (*_p_b)[0]; + if (_sub_id_a != _sub_id_b) { + return _sub_id_a - _sub_id_b; + } + + int _obj_id_a = (*_p_a)[2]; + int _obj_id_b = (*_p_b)[2]; + if (_obj_id_a != _obj_id_b) { + return _obj_id_a - _obj_id_b; + } + + return 0; +} + +bool +Util::spo_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b) +{ + if(a.subid != b.subid) + { + return a.subid < b.subid; + } + + if(a.preid != b.preid) + { + return a.preid < b.preid; + } + + if(a.objid != b.objid) + { + return a.objid < b.objid; + } + + //all are equal, no need to sort this two + return false; +} + +bool +Util::ops_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b) +{ + if(a.objid != b.objid) + { + return a.objid < b.objid; + } + + if(a.preid != b.preid) + { + return a.preid < b.preid; + } + + if(a.subid != b.subid) + { + return a.subid < b.subid; + } + + //all are equal, no need to sort this two + return false; +} + +bool +Util::pso_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b) +{ + if(a.preid != b.preid) + { + return a.preid < b.preid; + } + + if(a.subid != b.subid) + { + return a.subid < b.subid; + } + + if(a.objid != b.objid) + { + return a.objid < b.objid; + } + + //all are equal, no need to sort this two + return false; +} + +void +Util::empty_file(const char* _fname) +{ + FILE * fp; + //NOTICE: if exist, then overwrite and create a empty file + fp = fopen(_fname, "w"); + if(fp == NULL) + { + printf("do empty file %s failed\n", _fname); + } + else + { + fclose(fp); + } +} + diff --git a/Util/Util.h b/Util/Util.h index 9b9800a..a2b0a56 100644 --- a/Util/Util.h +++ b/Util/Util.h @@ -37,6 +37,7 @@ in the sparql query can point to the same node in data graph) #include #include #include +#include #include #include @@ -86,10 +87,11 @@ in the sparql query can point to the same node in data graph) //#define DEBUG_JOIN //#define DEBUG_STREAM //#define DEBUG_PRECISE 1 all information -//#define DEBUG_KVSTORE 1 //in KVstore +#define DEBUG_KVSTORE 1 //in KVstore //#define DEBUG_VSTREE 1 //in Database //#define DEBUG_LRUCACHE 1 //#define DEBUG_DATABASE 1 //in Database +//#define DEBUG_VLIST 1 // // @@ -123,6 +125,12 @@ in the sparql query can point to the same node in data graph) #endif #endif +#ifdef DEBUG_VLIST +#ifndef DEBUG +#define DEBUG +#endif +#endif + #ifndef DEBUG //#define DEBUG #endif @@ -247,8 +255,11 @@ public: static std::string getTimeString(); static std::string node2string(const char* _raw_str); - static bool is_literal_ele(TYPE_ENTITY_LITERAL_ID); + static bool is_literal_ele(TYPE_ENTITY_LITERAL_ID id); + static bool is_entity_ele(TYPE_ENTITY_LITERAL_ID id); + static unsigned removeDuplicate(unsigned*, unsigned); + static std::string getQueryFromFile(const char* _file_path); static std::string getSystemOutput(std::string cmd); static std::string getExactPath(const char* path); @@ -316,6 +327,10 @@ public: static FILE* debug_database; static FILE* debug_vstree; + static std::string gserver_port_file; + static std::string gserver_port_swap; + static std::string gserver_log; + private: static bool isValidIPV4(std::string); diff --git a/Util/VList.cpp b/Util/VList.cpp new file mode 100644 index 0000000..6407d86 --- /dev/null +++ b/Util/VList.cpp @@ -0,0 +1,348 @@ +/*============================================================================= +# Filename: VList.cpp +# Author: Bookug Lobert +# Mail: zengli-bookug@pku.edu.cn +# Last Modified: 2017-03-27 15:47 +# Description: +=============================================================================*/ + +#include "VList.h" + +using namespace std; + +bool +VList::isLongList(unsigned _len) +{ + return _len > VList::LENGTH_BORDER; +} + +VList::VList() +{ //not use ../logs/, notice the location of program + cur_block_num = SET_BLOCK_NUM; + filepath = ""; + freelist = NULL; + max_buffer_size = Util::MAX_BUFFER_SIZE; + freemem = max_buffer_size; +} + +VList::VList(string& _filepath, string& _mode, unsigned long long _buffer_size) +{ + cur_block_num = SET_BLOCK_NUM; //initialize + this->filepath = _filepath; + + if (_mode == string("build")) + valfp = fopen(_filepath.c_str(), "w+b"); + else if (_mode == string("open")) + valfp = fopen(_filepath.c_str(), "r+b"); + else + { + cout<max_buffer_size = _buffer_size; + this->freemem = this->max_buffer_size; + this->freelist = new BlockInfo; //null-head + + //TODO: read/write by char is too slow, how about read all and deal , then clear? + // + //BETTER: hwo about assign IDs in a dynamic way? + //limitID freelist + //QUETY: can free id list consume very large memory?? + + unsigned i, j, k; //j = (SuperNum-1)*BLOCK_SIZE + BlockInfo* bp; + if (_mode == "build") + { //write basic information + i = 0; + fwrite(&cur_block_num, sizeof(unsigned), 1, this->valfp); //current block num + //NOTICE: use a 1M block for a unsigned?? not ok! + fseek(this->valfp, BLOCK_SIZE, SEEK_SET); + bp = this->freelist; + j = cur_block_num / 8; + for (i = 0; i < j; ++i) + { + fputc(0, this->valfp); + for (k = 0; k < 8; ++k) + { + bp->next = new BlockInfo(i * 8 + k + 1, NULL); + bp = bp->next; + } + } + } + else //_mode == "open" + { + //read basic information + char c; + fread(&cur_block_num, sizeof(unsigned), 1, this->valfp); + fseek(this->valfp, BLOCK_SIZE, SEEK_SET); + bp = this->freelist; + j = cur_block_num / 8; + for (i = 0; i < j; ++i) + { + c = fgetc(valfp); + for (k = 0; k < 8; ++k) + { + if ((c & (1 << k)) == 0) + { + bp->next = new BlockInfo(i * 8 + 7 - k + 1, NULL); + bp = bp->next; + } + } + } + } + + //NOTICE: we do not need to alloc the blocks for free block id list, AllocBlock is only for later blocks +} + +long //8-byte in 64-bit machine +VList::Address(unsigned _blocknum) const //BETTER: inline function +{ + if (_blocknum == 0) + return 0; + else if (_blocknum > cur_block_num) + { + //print(string("error in Address: Invalid blocknum ") + Util::int2string(_blocknum)); + return -1; //address should be non-negative + } + //NOTICE: here should explictly use long + return (long)(this->SuperNum + _blocknum - 1) * (long)BLOCK_SIZE; +} + +unsigned +VList::Blocknum(long address) const +{ + return (address / BLOCK_SIZE) + 1 - this->SuperNum; +} + +unsigned +VList::AllocBlock() +{ + BlockInfo* p = this->freelist->next; + if (p == NULL) + { + for (unsigned i = 0; i < SET_BLOCK_INC; ++i) + { + cur_block_num++; //BETTER: check if > MAX_BLOCK_NUM + this->FreeBlock(cur_block_num); + } + p = this->freelist->next; + } + unsigned t = p->num; + this->freelist->next = p->next; + delete p; + + return t; +} + +void +VList::FreeBlock(unsigned _blocknum) +{ //QUERY: head-sub and tail-add will be better? + BlockInfo* bp = new BlockInfo(_blocknum, this->freelist->next); + this->freelist->next = bp; +} + +//NOTICE: all reads are aligned to 4 bytes(including a string) +//a string may acrossseveral blocks +// +//NOTICE: not use buffer, read/write on need, update at once, so no need to write back at last +//NOTICE: the next is placed at the begin of a block + + +void +VList::ReadAlign(unsigned* _next) +{ + if (ftell(valfp) % BLOCK_SIZE == 0) + { + fseek(valfp, Address(*_next), SEEK_SET); + fread(_next, sizeof(unsigned), 1, valfp); + } +} + +void +VList::WriteAlign(unsigned* _curnum) +{ + if (ftell(valfp) % BLOCK_SIZE == 0) + { + unsigned blocknum = this->AllocBlock(); + fseek(valfp, Address(*_curnum), SEEK_SET); + fwrite(&blocknum, sizeof(unsigned), 1, valfp); + fseek(valfp, Address(blocknum) + 4, SEEK_SET); + *_curnum = blocknum; + } +} + +bool +VList::readValue(unsigned _block_num, char*& _str, unsigned& _len) +{ +#ifdef DEBUG_VLIST + cout<<"to get value of block num: "<<_block_num<readBstr(_str, _len, &next); + + return true; +} + +unsigned +VList::writeValue(const char* _str, unsigned _len) +{ + unsigned blocknum = this->AllocBlock(); + unsigned curnum = blocknum; + + //NOTICE: here we must skip the next position first + fseek(valfp, Address(curnum) + 4, SEEK_SET); + this->writeBstr(_str, _len, &curnum); + +#ifdef DEBUG_VLIST + cout<<"to write value - block num: "<valfp, Address(store), SEEK_SET); + fread(&next, sizeof(unsigned), 1, valfp); + + while (store != 0) + { + this->FreeBlock(store); + store = next; + fseek(valfp, Address(store), SEEK_SET); + fread(&next, sizeof(unsigned), 1, valfp); + } + + return true; +} + +bool +VList::readBstr(char*& _str, unsigned& _len, unsigned* _next) +{ + //long address; + unsigned len, i, j; + fread(&len, sizeof(unsigned), 1, this->valfp); +#ifdef DEBUG_VLIST + cout<<"the length of value: "<ReadAlign(_next); + + //char* s = (char*)malloc(len); + char* s = new char[len]; + _len = len; + + for (i = 0; i + 4 < len; i += 4) + { + fread(s + i, sizeof(char), 4, valfp); + this->ReadAlign(_next); + } + while (i < len) + { + fread(s + i, sizeof(char), 1, valfp); //BETTER + i++; + } + + j = len % 4; + if (j > 0) + j = 4 - j; + fseek(valfp, j, SEEK_CUR); + + //NOTICE+DEBUG: I think no need to align here, later no data to read + //(if need to read, then fseek again to find a new value) + //this->ReadAlign(_next); + + _str = s; + return true; +} + +bool +VList::writeBstr(const char* _str, unsigned _len, unsigned* _curnum) +{ + unsigned i, j, len = _len; + fwrite(&len, sizeof(unsigned), 1, valfp); + this->WriteAlign(_curnum); + //cout<<"to write bstr, length: "<WriteAlign(_curnum); + } + while (i < len) + { + fwrite(s + i, sizeof(char), 1, valfp); + i++; + } + + j = len % 4; + if (j > 0) + j = 4 - j; + fseek(valfp, j, SEEK_CUR); + + //NOTICE+DEBUG: I think no need to align here, later no data to write + //(if need to write, then fseek again to write a new value) + //this->WriteAlign(_curnum); + fseek(valfp, Address(*_curnum), SEEK_SET); + unsigned t = 0; + fwrite(&t, sizeof(unsigned), 1, valfp); + + return true; +} + +VList::~VList() +{ + //write the info back + fseek(this->valfp, 0, SEEK_SET); + fwrite(&cur_block_num, sizeof(unsigned), 1, valfp);//write current blocks num + fseek(valfp, BLOCK_SIZE, SEEK_SET); + int i, j = cur_block_num / 8; //(SuperNum-1)*BLOCK_SIZE; + for (i = 0; i < j; ++i) + { + //reset to 1 first + fputc(0xff, valfp); + } + char c; + BlockInfo* bp = this->freelist->next; + while (bp != NULL) + { + //if not-use then set 0, aligned to byte! +#ifdef DEBUG_KVSTORE + if (bp->num > cur_block_num) + { + printf("blocks num exceed, cur_block_num: %u\n", cur_block_num); + exit(1); + } +#endif + j = bp->num - 1; + i = j / 8; + j = 7 - j % 8; + fseek(valfp, BLOCK_SIZE + i, SEEK_SET); + c = fgetc(valfp); + fseek(valfp, -1, SEEK_CUR); + fputc(c & ~(1 << j), valfp); + bp = bp->next; + } + + bp = this->freelist; + BlockInfo* next; + while (bp != NULL) + { + next = bp->next; + delete bp; + bp = next; + } + fclose(this->valfp); +} + diff --git a/Util/VList.h b/Util/VList.h new file mode 100644 index 0000000..2281391 --- /dev/null +++ b/Util/VList.h @@ -0,0 +1,84 @@ +/*============================================================================= +# Filename: VList.h +# Author: Bookug Lobert +# Mail: zengli-bookug@pku.edu.cn +# Last Modified: 2017-03-27 15:40 +# Description: +=============================================================================*/ + +#ifndef _UTIL_VLIST_H +#define _UTIL_VLIST_H + +#include "Util.h" +#include "Bstr.h" + +//NOTICE: not keep long list in memory, read each time +//but when can you free the long list(kvstore should release it after parsing) +// +//CONSIDER: if to keep long list in memory, should adjust the bstr in memory: +//unsigned: 0 char*: an object (if in memory, if modified, length, content, block num) +//when reading a long list in a node, generate the object first, and the object will tell you whether +//the list is in mmeory or not + +//BETTER: use two kind of blocks in two files, like 1M and 1G (split the block num into two parts) + +//STRUCT: a long list real-address is the block ID in file2(only for long value lists, a list across several 1M blocks) +//tree-value Bstr: unsigned=the real address char*=NULL +//in disk: +//file1 is tree file, the long list is represented as: 0 real-address +//NOTICE: long list is not kept in mmeory for cache, it is read/update each time on need! + +//TODO: use fread/fwrite here instead of fgetc/fputc +//including other trees + +class VList +{ +public: + //NOTICE:the border is 10^6, but the block is larger, 1M + static const unsigned LENGTH_BORDER = 1000000; + //static const unsigned LENGTH_BORDER = 1000; + static const unsigned BLOCK_SIZE = 1 << 20; //fixed size of disk-block + static const unsigned MAX_BLOCK_NUM = 1 << 23; //max block-num + //below two constants: must can be exactly divided by 8 + static const unsigned SET_BLOCK_NUM = 1 << 3; //initial blocks num + static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc + static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1; + +private: + unsigned long long max_buffer_size; + unsigned cur_block_num; + std::string filepath; + BlockInfo* freelist; + //very long value list are stored in a separate file(with large block) + // + //NOTICE: according to the summary result, 90% value lists are just below 100 bytes + //<10%: 5000000~100M bytes + FILE* valfp; + + //NOTICE: freemem's type is long long here, due to large memory in server. + //However, needmem in handler() and request() is ok to be int/unsigned. + //Because the bstr' size is controlled, so is the node. + unsigned long long freemem; //free memory to use, non-negative + //unsigned long long time; //QUERY(achieving an old-swap startegy?) + long Address(unsigned _blocknum) const; + unsigned Blocknum(long address) const; + unsigned AllocBlock(); + void FreeBlock(unsigned _blocknum); + void ReadAlign(unsigned* _next); + void WriteAlign(unsigned* _next); + bool readBstr(char*& _bp, unsigned& _len, unsigned* _next); + bool writeBstr(const char* _str, unsigned _len, unsigned* _curnum); + +public: + VList(); + VList(std::string& _filepath, std::string& _mode, unsigned long long _buffer_size);//create a fixed-size file or open an existence + bool readValue(unsigned _block_num, char*& _str, unsigned& _len); + unsigned writeValue(const char* _str, unsigned _len); + bool removeValue(unsigned _block_num); + ~VList(); + + static bool isLongList(unsigned _len); +}; + +#endif + diff --git a/logs/.gitignore b/logs/.gitignore new file mode 100644 index 0000000..397b4a7 --- /dev/null +++ b/logs/.gitignore @@ -0,0 +1 @@ +*.log diff --git a/makefile b/makefile index 0cef5ed..0f34d10 100644 --- a/makefile +++ b/makefile @@ -43,11 +43,11 @@ CC = ccache g++ #NOTICE: -O2 is recommended, while -O3 is dangerous #when developing, not use -O because it will disturb the normal #routine. use it for test and release. -#CFLAGS = -c -Wall -g -pthread #-fprofile-arcs -ftest-coverage #-pg -#EXEFLAG = -g -pthread #-fprofile-arcs -ftest-coverage #-pg +CFLAGS = -c -Wall -g -pthread #-fprofile-arcs -ftest-coverage #-pg +EXEFLAG = -g -pthread #-fprofile-arcs -ftest-coverage #-pg #-coverage -CFLAGS = -c -Wall -O2 -pthread -EXEFLAG = -O2 -pthread +#CFLAGS = -c -Wall -O2 -pthread +#EXEFLAG = -O2 -pthread #add -lreadline -ltermcap if using readline or objs contain readline library = -ltermcap -lreadline -L./lib -lantlr -lgcov @@ -70,10 +70,11 @@ api_java = api/java/lib/GstoreJavaAPI.jar #sstreeobj = $(objdir)Tree.o $(objdir)Storage.o $(objdir)Node.o $(objdir)IntlNode.o $(objdir)LeafNode.o $(objdir)Heap.o sitreeobj = $(objdir)SITree.o $(objdir)SIStorage.o $(objdir)SINode.o $(objdir)SIIntlNode.o $(objdir)SILeafNode.o $(objdir)SIHeap.o istreeobj = $(objdir)ISTree.o $(objdir)ISStorage.o $(objdir)ISNode.o $(objdir)ISIntlNode.o $(objdir)ISLeafNode.o $(objdir)ISHeap.o +ivtreeobj = $(objdir)IVTree.o $(objdir)IVStorage.o $(objdir)IVNode.o $(objdir)IVIntlNode.o $(objdir)IVLeafNode.o $(objdir)IVHeap.o -kvstoreobj = $(objdir)KVstore.o $(sitreeobj) $(istreeobj) #$(sstreeobj) +kvstoreobj = $(objdir)KVstore.o $(sitreeobj) $(istreeobj) $(ivtreeobj) #$(sstreeobj) -utilobj = $(objdir)Util.o $(objdir)Bstr.o $(objdir)Stream.o $(objdir)Triple.o $(objdir)BloomFilter.o +utilobj = $(objdir)Util.o $(objdir)Bstr.o $(objdir)Stream.o $(objdir)Triple.o $(objdir)BloomFilter.o $(objdir)VList.o queryobj = $(objdir)SPARQLquery.o $(objdir)BasicQuery.o $(objdir)ResultSet.o $(objdir)IDList.o \ $(objdir)Varset.o $(objdir)QueryTree.o $(objdir)ResultFilter.o $(objdir)GeneralEvaluation.o @@ -217,6 +218,26 @@ $(objdir)ISHeap.o: KVstore/ISTree/heap/ISHeap.cpp KVstore/ISTree/heap/ISHeap.h $ $(CC) $(CFLAGS) KVstore/ISTree/heap/ISHeap.cpp -o $(objdir)ISHeap.o #objects in istree/ end +#objects in ivtree/ begin +$(objdir)IVTree.o: KVstore/IVTree/IVTree.cpp KVstore/IVTree/IVTree.h $(objdir)Stream.o $(objdir)VList.o + $(CC) $(CFLAGS) KVstore/IVTree/IVTree.cpp -o $(objdir)IVTree.o + +$(objdir)IVStorage.o: KVstore/IVTree/storage/IVStorage.cpp KVstore/IVTree/storage/IVStorage.h $(objdir)Util.o + $(CC) $(CFLAGS) KVstore/IVTree/storage/IVStorage.cpp -o $(objdir)IVStorage.o $(def64IO) + +$(objdir)IVNode.o: KVstore/IVTree/node/IVNode.cpp KVstore/IVTree/node/IVNode.h $(objdir)Util.o + $(CC) $(CFLAGS) KVstore/IVTree/node/IVNode.cpp -o $(objdir)IVNode.o + +$(objdir)IVIntlNode.o: KVstore/IVTree/node/IVIntlNode.cpp KVstore/IVTree/node/IVIntlNode.h + $(CC) $(CFLAGS) KVstore/IVTree/node/IVIntlNode.cpp -o $(objdir)IVIntlNode.o + +$(objdir)IVLeafNode.o: KVstore/IVTree/node/IVLeafNode.cpp KVstore/IVTree/node/IVLeafNode.h + $(CC) $(CFLAGS) KVstore/IVTree/node/IVLeafNode.cpp -o $(objdir)IVLeafNode.o + +$(objdir)IVHeap.o: KVstore/IVTree/heap/IVHeap.cpp KVstore/IVTree/heap/IVHeap.h $(objdir)Util.o + $(CC) $(CFLAGS) KVstore/IVTree/heap/IVHeap.cpp -o $(objdir)IVHeap.o +#objects in ivtree/ end + $(objdir)KVstore.o: KVstore/KVstore.cpp KVstore/KVstore.h KVstore/Tree.h $(CC) $(CFLAGS) KVstore/KVstore.cpp $(inc) -o $(objdir)KVstore.o @@ -302,6 +323,9 @@ $(objdir)Triple.o: Util/Triple.cpp Util/Triple.h $(objdir)Util.o $(objdir)BloomFilter.o: Util/BloomFilter.cpp Util/BloomFilter.h $(objdir)Util.o $(CC) $(CFLAGS) Util/BloomFilter.cpp -o $(objdir)BloomFilter.o +$(objdir)VList.o: Util/VList.cpp Util/VList.h + $(CC) $(CFLAGS) Util/VList.cpp -o $(objdir)VList.o + #objects in util/ end diff --git a/test/package.json b/test/package.json new file mode 100644 index 0000000..7ba7f79 --- /dev/null +++ b/test/package.json @@ -0,0 +1,15 @@ +{ + "config": { + "ghooks": { + //"pre-commit": "gulp lint", + "commit-msg": "validate-commit-msg", + //"pre-push": "make test", + //"post-merge": "npm install", + //"post-rewrite": "npm install", + } + } + "scripts": { + "changelog-all": "conventional-changelog -p angular -i CHANGELOG.md -w -r 0", + "changelog": "conventional-changelog -p angular -i CHANGELOG.md -w", + } +}