refactor: merge type and value branch;

TODO: bug exists with ISTree;

by zengli
This commit is contained in:
bookug 2017-05-17 20:46:02 +08:00
commit f3202ada40
45 changed files with 8608 additions and 4108 deletions

4
.gitignore vendored
View File

@ -91,6 +91,10 @@ tags
*.out
*.bak~
# queries
*.sql
*.sh
# modules
node_modules

View File

@ -743,6 +743,7 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp)
#ifdef DEBUG
cout<<"query success_num: "<<success_num<<endl;
#endif
//cout<<"to check: "<<this->kvstore->getEntityByID(0)<<endl;
return success_num;
}
@ -827,6 +828,7 @@ Database::build(const string& _rdf_file)
//sync();
//cout << "sync vstree" << endl;
//TODO: use fopen w+ to remove signature.binary file
//string cmd = "rm -rf " + _entry_file;
//system(cmd.c_str());
//cout << "signature file removed" << endl;
@ -1609,6 +1611,10 @@ Database::sub2id_pre2id_obj2id_RDFintoSignature(const string _rdf_file)
}
}
//NOTICE: we assume that there is no duplicates in the dataset
//if not, this->triple_num will be not right, and _p_id_tuples will save useless triples
//However, we can not use exist_triple to detect duplicates here, because it is too time-costly
// For id_tuples
//_p_id_tuples[_id_tuples_size] = new TYPE_ENTITY_LITERAL_ID[3];
//_p_id_tuples[_id_tuples_size][0] = _sub_id;

View File

@ -133,7 +133,8 @@ Join::judge(unsigned _smallest, unsigned _biggest)
//BETTER:how to guess the size of can_lists
double size = (_smallest + _biggest) / 2.0;
double ans = Join::PARAM_DENSE * dense - size / Join::PARAM_SIZE;
if (ans > Join::JUDGE_LIMIT)
double limit = 1.0 / (double)Join::JUDGE_LIMIT;
if (ans > limit)
return 0; //multi_join method
else
return 1; //index_join method
@ -984,6 +985,11 @@ Join::update_answer_list(IDList*& valid_ans_list, IDList& _can_list, unsigned* i
}
}
//TODO: multiple lists intersect, how about sort and intersect from small to big?
//but this need to generate all first, I think sort by pre2num if better!
//
//TODO: set the entity_literal border in kvstore, and intersect entity part and literal part respectively
//NOTICE: consider two directions according to table1 size and table2 size
//1. -> add ID mapping record for the first linking column, whole(offset, size) zengli
//2. <- join using inverted index for each column, offset and size for each column, hulin

View File

@ -55,7 +55,12 @@ private:
static const unsigned PARAM_SIZE = 1000000;
static const unsigned PARAM_PRE = 10000;
static const unsigned PARAM_DENSE = 1;
static const double JUDGE_LIMIT = 0.5;
static const unsigned JUDGE_LIMIT = 2;
//NOTICE+DEBUG: please use constexpr below instead of the phase above(constexpr is supported in C++11)
//http://www.cnblogs.com/wanyuanchun/p/4041080.html
//constexpr static const double JUDGE_LIMIT = 0.5;
static const unsigned LIMIT_CANDIDATE_LIST_SIZE = 1000;
//BETTER?:predefine size to avoid copy cost
TableType current_table;

View File

@ -20,7 +20,7 @@ ISTree::ISTree()
TSM = NULL;
storepath = "";
filename = "";
transfer_size[0] = transfer_size[1] = transfer_size[2] = 0;
//transfer_size[0] = transfer_size[1] = transfer_size[2] = 0;
this->stream = NULL;
this->request = 0;
}
@ -37,10 +37,10 @@ ISTree::ISTree(string _storepath, string _filename, string _mode, unsigned long
this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail);
else
this->root = NULL;
this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE));
this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE));
this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE));
this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M
//this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE));
//this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE));
//this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE));
//this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M
this->stream = NULL;
this->request = 0;
}
@ -51,30 +51,30 @@ ISTree::getFilePath()
return storepath + "/" + filename;
}
void //WARN: not check _str and _len
ISTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index)
{
if (_index > 2)
return;
/*
if(_str == NULL || _len == 0)
{
printf("error in CopyToTransfer: empty string\n");
return;
}
*/
//unsigned length = _bstr->getLen();
unsigned length = _len;
if (length + 1 > this->transfer_size[_index])
{
transfer[_index].release();
transfer[_index].setStr((char*)malloc(length + 1));
this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0
}
memcpy(this->transfer[_index].getStr(), _str, length);
this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore
this->transfer[_index].setLen(length);
}
//void //WARN: not check _str and _len
//ISTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index)
//{
//if (_index > 2)
//return;
//[>
//if(_str == NULL || _len == 0)
//{
//printf("error in CopyToTransfer: empty string\n");
//return;
//}
//*/
////unsigned length = _bstr->getLen();
//unsigned length = _len;
//if (length + 1 > this->transfer_size[_index])
//{
//transfer[_index].release();
//transfer[_index].setStr((char*)malloc(length + 1));
//this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0
//}
//memcpy(this->transfer[_index].getStr(), _str, length);
//this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore
//this->transfer[_index].setLen(length);
//}
unsigned
ISTree::getHeight() const
@ -116,21 +116,25 @@ ISTree::search(unsigned _key, char*& _str, unsigned& _len)
this->request = 0;
int store;
ISNode* ret = this->find(_key, &store, false);
//cout<<"to find the position: "<<store<<endl;
if (ret == NULL || store == -1 || _key != ret->getKey(store)) //tree is empty or not found
{
return false;
}
const Bstr* val = ret->getValue(store);
this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request
_str = this->transfer[0].getStr();
_len = this->transfer[0].getLen();
//this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request
//_str = this->transfer[0].getStr();
//_len = this->transfer[0].getLen();
_str = val->getStr();
_len = val->getLen();
this->TSM->request(request);
return true;
}
bool
ISTree::insert(unsigned _key, const char* _str, unsigned _len)
ISTree::insert(unsigned _key, char* _str, unsigned _len)
{
//if (_key < 0)
//{
@ -138,8 +142,8 @@ ISTree::insert(unsigned _key, const char* _str, unsigned _len)
//return false;
//}
this->CopyToTransfer(_str, _len, 2);
const Bstr* val = &(this->transfer[2]);
//this->CopyToTransfer(_str, _len, 2);
//const Bstr* val = &(this->transfer[2]);
this->request = 0;
ISNode* ret;
if (this->root == NULL) //tree is empty
@ -223,20 +227,24 @@ ISTree::insert(unsigned _key, const char* _str, unsigned _len)
else
{
p->addKey(_key, i);
p->addValue(val, i, true);
p->addValue(_str, _len, i, true);
p->addNum();
request += val->getLen();
request += _len;
p->setDirty();
this->TSM->updateHeap(p, p->getRank(), true);
//_key->clear();
//_value->clear();
}
this->TSM->request(request);
//if(_key == 0)
//{
//cout<<"the 0th element is: "<<_str[0]<<endl;
//}
return !ifexist; //QUERY(which case:return false)
}
bool
ISTree::modify(unsigned _key, const char* _str, unsigned _len)
ISTree::modify(unsigned _key, char* _str, unsigned _len)
{
//if (_key < 0)
//{
@ -244,8 +252,8 @@ ISTree::modify(unsigned _key, const char* _str, unsigned _len)
//return false;
//}
this->CopyToTransfer(_str, _len, 2); //not check value
const Bstr* val = &(this->transfer[2]);
//this->CopyToTransfer(_str, _len, 2); //not check value
//const Bstr* val = &(this->transfer[2]);
this->request = 0;
int store;
ISNode* ret = this->find(_key, &store, true);
@ -256,16 +264,17 @@ ISTree::modify(unsigned _key, const char* _str, unsigned _len)
}
//cout<<"ISTree::modify() - key is found, now to remove"<<endl;
unsigned len = ret->getValue(store)->getLen();
ret->setValue(val, store, true);
ret->setValue(_str, _len, store, true);
//cout<<"value reset"<<endl;
//cout<<"newlen: "<<val->getLen()<<" oldlen: "<<len<<endl;
//request += (val->getLen() - len);
this->request = val->getLen();
this->request = _len;
this->request -= len;
ret->setDirty();
//cout<<"to request"<<endl;
this->TSM->request(request);
//cout<<"memory requested"<<endl;
return true;
}
@ -301,6 +310,7 @@ ISTree::find(unsigned _key, int* _store, bool ifmodify)
*_store = -1; //Not Found
else
*_store = i;
return p;
}

View File

@ -3,7 +3,7 @@
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:44
# Description: struct and interface of the B+ tree
# Description: ID2string, including id2entity, id2literal and id2predicate
=============================================================================*/
#ifndef _KVSTORE_ISTREE_ISTREE_H
@ -36,8 +36,8 @@ protected:
//so lock is a must. Add lock to transfer is better than to add
//lock to every key/value. However, modify requires a lock for a
//key/value, and multiple search for different keys are ok!!!
Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char*
unsigned transfer_size[3];
//Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char*
//unsigned transfer_size[3];
//tree's operations should be atom(if read nodes)
//sum the request and send to ISStorage at last
@ -49,7 +49,7 @@ protected:
std::string filename; //ok for user to change
/* some private functions */
std::string getFilePath(); //in UNIX system
void CopyToTransfer(const char* _str, unsigned _len, unsigned _index);
//void CopyToTransfer(const char* _str, unsigned _len, unsigned _index);
void release(ISNode* _np) const;
public:
@ -61,8 +61,8 @@ public:
//void setRoot(Node* _root);
//insert, search, remove, set
bool search(unsigned _key, char*& _str, unsigned& _len);
bool insert(unsigned _key, const char* _str, unsigned _len);
bool modify(unsigned _key, const char* _str, unsigned _len);
bool insert(unsigned _key, char* _str, unsigned _len);
bool modify(unsigned _key, char* _str, unsigned _len);
ISNode* find(unsigned _key, int* store, bool ifmodify);
bool remove(unsigned _key);
const Bstr* getRangeValue();

View File

@ -125,6 +125,42 @@ ISLeafNode::addValue(const Bstr* _value, int _index, bool ifcopy)
return true;
}
bool
ISLeafNode::setValue(char* _str, unsigned _len, int _index, bool ifcopy)
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
print(string("error in setValue: Invalid index ") + Util::int2string(_index));
return false;
}
this->values[_index].release(); //NOTICE: only used in modify
this->values[_index].setStr(_str);
this->values[_index].setLen(_len);
return true;
}
bool
ISLeafNode::addValue(char* _str, unsigned _len, int _index, bool ifcopy)
{
int num = this->getNum();
if (_index < 0 || _index > num)
{
print(string("error in addValue: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
for (i = num - 1; i >= _index; --i)
this->values[i + 1] = this->values[i];
this->values[_index].setStr(_str);
this->values[_index].setLen(_len);
return true;
}
bool
ISLeafNode::subValue(int _index, bool ifdel)
{

View File

@ -27,6 +27,7 @@ public:
void Normal();
ISNode* getPrev() const;
ISNode* getNext() const;
const Bstr* getValue(int _index) const;
bool setValue(const Bstr* _value, int _index, bool ifcopy = false);
bool addValue(const Bstr* _value, int _index, bool ifcopy = false);
@ -34,6 +35,10 @@ public:
void setPrev(ISNode* _prev);
void setNext(ISNode* _next);
unsigned getSize() const;
bool setValue(char* _str, unsigned _len, int _index, bool ifcopy = false);
bool addValue(char* _str, unsigned _len, int _index, bool ifcopy = false);
ISNode* split(ISNode* _father, int _index);
ISNode* coalesce(ISNode* _father, int _index);
void release();

View File

@ -80,12 +80,18 @@ public:
virtual bool subChild(int _index) { return true; };
virtual ISNode* getPrev() const { return NULL; };
virtual ISNode* getNext() const { return NULL; };
virtual const Bstr* getValue(int _index) const { return NULL; };
virtual bool setValue(const Bstr* _value, int _index, bool ifcopy = false) { return true; };
virtual bool addValue(const Bstr* _value, int _index, bool ifcopy = false) { return true; };
virtual bool subValue(int _index, bool ifdel = false) { return true; };
virtual void setPrev(ISNode* _prev) {};
virtual void setNext(ISNode* _next) {};
virtual bool setValue(const char* _str, unsigned _len, int _index, bool ifcopy = false) { return true; };
virtual bool addValue(const char* _str, unsigned _len, int _index, bool ifcopy = false) { return true; };
//pure virtual function
virtual void Virtual() = 0;
virtual void Normal() = 0;
virtual unsigned getSize() const = 0; //return all memory owned

View File

@ -399,7 +399,13 @@ ISStorage::writeNode(ISNode* _np)
{
//to write all values
for (i = 0; i < num; ++i)
{
this->writeBstr(_np->getValue(i), &blocknum, SpecialBlock);
if(_np->getKey(0) == 0)
{
cout<<"the 0th value: "<<_np->getValue(i)->getStr()[0]<<endl;
}
}
}
fseek(treefp, Address(blocknum), SEEK_SET);
if (SpecialBlock)
@ -422,7 +428,8 @@ ISStorage::readBstr(Bstr* _bp, unsigned* _next)
fread(&len, sizeof(unsigned), 1, this->treefp);
this->ReadAlign(_next);
//this->request(len);
char* s = (char*)malloc(len);
//char* s = (char*)malloc(len);
char* s = new char[len];
_bp->setLen(len);
for (i = 0; i + 4 < len; i += 4)
{

702
KVstore/IVTree/IVTree.cpp Normal file
View File

@ -0,0 +1,702 @@
/*=============================================================================
# Filename: IVTree.cpp
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:45
# Description: achieve functions in IVTree.h
=============================================================================*/
#include "IVTree.h"
using namespace std;
IVTree::IVTree()
{
height = 0;
mode = "";
root = NULL;
leaves_head = NULL;
leaves_tail = NULL;
TSM = NULL;
storepath = "";
filename = "";
//transfer_size[0] = transfer_size[1] = transfer_size[2] = 0;
//transfer_size = 0;
this->stream = NULL;
this->request = 0;
this->value_list = NULL;
}
IVTree::IVTree(string _storepath, string _filename, string _mode, unsigned long long _buffer_size)
{
storepath = _storepath;
filename = _filename;
this->height = 0;
this->mode = string(_mode);
string filepath = this->getFilePath();
string vlist_file = filepath + "_vlist";
this->value_list = new VList(vlist_file, this->mode, 1<<30);
TSM = new IVStorage(filepath, this->mode, &this->height, _buffer_size, this->value_list);
if (this->mode == "open")
this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail);
else
this->root = NULL;
//this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE));
//this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE));
//this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE));
//this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M
//this->transfer.setStr((char*)malloc(Util::TRANSFER_SIZE));
this->stream = NULL;
this->request = 0;
}
string
IVTree::getFilePath()
{
return storepath + "/" + filename;
}
//void //WARN: not check _str and _len
//IVTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index)
//{
//if (_index > 2)
//return;
//[>
//if(_str == NULL || _len == 0)
//{
//printf("error in CopyToTransfer: empty string\n");
//return;
//}
//*/
////unsigned length = _bstr->getLen();
//unsigned length = _len;
//if (length + 1 > this->transfer_size[_index])
//{
//transfer[_index].release();
//transfer[_index].setStr((char*)malloc(length + 1));
//this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0
//}
//memcpy(this->transfer[_index].getStr(), _str, length);
//this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore
//this->transfer[_index].setLen(length);
//}
unsigned
IVTree::getHeight() const
{
return this->height;
}
void
IVTree::setHeight(unsigned _h)
{
this->height = _h;
}
IVNode*
IVTree::getRoot() const
{
return this->root;
}
void
IVTree::prepare(IVNode* _np)
{
//this->request = 0;
bool flag = _np->inMem();
if (!flag)
{
this->TSM->readNode(_np, &request); //readNode deal with request
}
}
bool
IVTree::search(unsigned _key, char*& _str, unsigned& _len)
{
//if (_key < 0)
//{
//printf("error in IVTree-search: empty string\n");
//return false;
//}
this->request = 0;
int store;
IVNode* ret = this->find(_key, &store, false);
if (ret == NULL || store == -1 || _key != ret->getKey(store)) //tree is empty or not found
{
return false;
}
ret->getValue(this->value_list, store, _str, _len);
//const Bstr* val = ret->getValue(store);
//this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request
//_str = this->transfer[0].getStr();
//_len = this->transfer[0].getLen();
this->TSM->request(request);
return true;
}
bool
IVTree::insert(unsigned _key, char* _str, unsigned _len)
{
//if (_key < 0)
//{
//printf("error in IVTree-insert: empty string\n");
//return false;
//}
//this->CopyToTransfer(_str, _len, 2);
//const Bstr* val = &(this->transfer[2]);
this->request = 0;
IVNode* ret;
if (this->root == NULL) //tree is empty
{
leaves_tail = leaves_head = root = new IVLeafNode;
request += IVNode::LEAF_SIZE;
this->height = 1;
root->setHeight(1); //add to heap later
}
//this->prepare(this->root); //root must be in-mem
if (root->getNum() == IVNode::MAX_KEY_NUM)
{
IVNode* father = new IVIntlNode;
request += IVNode::INTL_SIZE;
father->addChild(root, 0);
ret = root->split(father, 0);
if (ret->isLeaf() && ret->getNext() == NULL)
this->leaves_tail = ret;
if (ret->isLeaf())
request += IVNode::LEAF_SIZE;
else
request += IVNode::INTL_SIZE;
this->height++; //height rises only when root splits
//WARN: height area in Node: 4 bit!
father->setHeight(this->height); //add to heap later
this->TSM->updateHeap(ret, ret->getRank(), false);
this->root = father;
}
IVNode* p = this->root;
IVNode* q;
int i;
while (!p->isLeaf())
{
//j = p->getNum();
//for(i = 0; i < j; ++i)
//if(bstr < *(p->getKey(i)))
//break;
//NOTICE: using binary search is better here
i = p->searchKey_less(_key);
q = p->getChild(i);
this->prepare(q);
if (q->getNum() == IVNode::MAX_KEY_NUM)
{
ret = q->split(p, i);
if (ret->isLeaf() && ret->getNext() == NULL)
this->leaves_tail = ret;
if (ret->isLeaf())
request += IVNode::LEAF_SIZE;
else
request += IVNode::INTL_SIZE;
//BETTER: in loop may update multiple times
this->TSM->updateHeap(ret, ret->getRank(), false);
this->TSM->updateHeap(q, q->getRank(), true);
this->TSM->updateHeap(p, p->getRank(), true);
if (_key < p->getKey(i))
p = q;
else
p = ret;
}
else
{
p->setDirty();
this->TSM->updateHeap(p, p->getRank(), true);
p = q;
}
}
//j = p->getNum();
//for(i = 0; i < j; ++i)
//if(bstr < *(p->getKey(i)))
//break;
i = p->searchKey_less(_key);
//insert existing key is ok, but not inserted in
//however, the tree-shape may change due to possible split in former code
bool ifexist = false;
if (i > 0 && _key == p->getKey(i - 1))
ifexist = true;
else
{
p->addKey(_key, i);
p->addValue(this->value_list, i, _str, _len, true);
p->addNum();
//NOTICE: is this is a vlist, then it will be freed, and should not be included in the request memory
if(!VList::isLongList(_len))
{
request += _len;
}
//request += val->getLen();
p->setDirty();
this->TSM->updateHeap(p, p->getRank(), true);
//_key->clear();
//_value->clear();
}
this->TSM->request(request);
return !ifexist; //QUERY(which case:return false)
}
bool
IVTree::modify(unsigned _key, char* _str, unsigned _len)
{
//if (_key < 0)
//{
//printf("error in IVTree-modify: empty string\n");
//return false;
//}
//this->CopyToTransfer(_str, _len, 2); //not check value
//const Bstr* val = &(this->transfer[2]);
this->request = 0;
int store;
IVNode* ret = this->find(_key, &store, true);
if (ret == NULL || store == -1 || _key != ret->getKey(store)) //tree is empty or not found
{
cerr << "tree is empty or not found" << endl;
return false;
}
//cout<<"IVTree::modify() - key is found, now to remove"<<endl;
//NOTICE+DEBUG: if this value is a long list, then it is not saved in memory, here should return 0 in Bstr
unsigned len = ret->getValue(store)->getLen();
if(ret->getValue(store)->isBstrLongList())
{
len = 0;
}
ret->setValue(this->value_list, store, _str, _len, true);
//ret->setValue(val, store, true);
//cout<<"value reset"<<endl;
//cout<<"newlen: "<<val->getLen()<<" oldlen: "<<len<<endl;
//request += (val->getLen() - len);
if(!VList::isLongList(_len))
{
this->request += _len;
}
//this->request = val->getLen();
this->request -= len;
ret->setDirty();
//cout<<"to request"<<endl;
this->TSM->request(request);
//cout<<"memory requested"<<endl;
return true;
}
//this function is useful for search and modify, and range-query
IVNode* //return the first key's position that >= *_key
IVTree::find(unsigned _key, int* _store, bool ifmodify)
{ //to assign value for this->bstr, function shouldn't be const!
if (this->root == NULL)
return NULL; //IVTree Is Empty
IVNode* p = root;
int i, j;
while (!p->isLeaf())
{
if (ifmodify)
p->setDirty();
//j = p->getNum();
//for(i = 0; i < j; ++i) //BETTER(Binary-Search)
//if(bstr < *(p->getKey(i)))
//break;
i = p->searchKey_less(_key);
p = p->getChild(i);
this->prepare(p);
}
j = p->getNum();
//for(i = 0; i < j; ++i)
//if(bstr <= *(p->getKey(i)))
//break;
i = p->searchKey_lessEqual(_key);
if (i == j)
*_store = -1; //Not Found
else
*_store = i;
return p;
}
/*
Node*
IVTree::find(unsigned _len, const char* _str, int* store) const
{
}
*/
bool
IVTree::remove(unsigned _key)
{
//if (_key < 0)
//{
//printf("error in IVTree-remove: empty string\n");
//return false;
//}
this->request = 0;
IVNode* ret;
if (this->root == NULL) //tree is empty
return false;
IVNode* p = this->root;
IVNode* q;
int i, j;
while (!p->isLeaf())
{
j = p->getNum();
//for(i = 0; i < j; ++i)
//if(bstr < *(p->getKey(i)))
//break;
i = p->searchKey_less(_key);
q = p->getChild(i);
this->prepare(q);
if (q->getNum() < IVNode::MIN_CHILD_NUM) //==MIN_KEY_NUM
{
if (i > 0)
this->prepare(p->getChild(i - 1));
if (i < j)
this->prepare(p->getChild(i + 1));
ret = q->coalesce(p, i);
if (ret != NULL)
this->TSM->updateHeap(ret, 0, true);//non-sense node
this->TSM->updateHeap(q, q->getRank(), true);
if (q->isLeaf())
{
if (q->getPrev() == NULL)
this->leaves_head = q;
if (q->getNext() == NULL)
this->leaves_tail = q;
}
if (p->getNum() == 0) //root shrinks
{
//this->leaves_head = q;
this->root = q;
this->TSM->updateHeap(p, 0, true); //instead of delete p
this->height--;
}
}
else
p->setDirty();
this->TSM->updateHeap(p, p->getRank(), true);
p = q;
}
bool flag = false;
//j = p->getNum(); //LeafNode(maybe root)
//for(i = 0; i < j; ++i)
// if(bstr == *(p->getKey(i)))
// {
// request -= p->getKey(i)->getLen();
// request -= p->getValue(i)->getLen();
// p->subKey(i, true); //to release
// p->subValue(i, true); //to release
// p->subNum();
// if(p->getNum() == 0) //root leaf 0 key
// {
// this->root = NULL;
// this->leaves_head = NULL;
// this->leaves_tail = NULL;
// this->height = 0;
// this->TSM->updateHeap(p, 0, true); //instead of delete p
// }
// p->setDirty();
// flag = true;
// break;
// }
i = p->searchKey_equal(_key);
//WARN+NOTICE:here must check, because the key to remove maybe not exist
if (i != (int)p->getNum())
{
if(!p->getValue(i)->isBstrLongList())
{
request -= p->getValue(i)->getLen();
}
p->subKey(i); //to release
p->subValue(this->value_list, i, true); //to release
p->subNum();
if (p->getNum() == 0) //root leaf 0 key
{
this->root = NULL;
this->leaves_head = NULL;
this->leaves_tail = NULL;
this->height = 0;
this->TSM->updateHeap(p, 0, true); //instead of delete p
}
p->setDirty();
flag = true;
}
this->TSM->request(request);
return flag; //i == j, not found
}
const Bstr*
IVTree::getRangeValue()
{
if (this->stream == NULL)
{
fprintf(stderr, "IVTree::getRangeValue(): no results now!\n");
return NULL;
}
if (this->stream->isEnd())
{
fprintf(stderr, "IVTree::getRangeValue(): read till end now!\n");
return NULL;
}
//NOTICE:this is one record, and donot free the memory!
//NOTICE:Bstr[] but only one element, used as Bstr*
return this->stream->read();
}
void
IVTree::resetStream()
{
if (this->stream == NULL)
{
fprintf(stderr, "no results now!\n");
return;
}
this->stream->setEnd();
}
//TODO: change to using value list, getValue() maybe not get real long list
bool //special case: not exist, one-edge-case
IVTree::range_query(unsigned _key1, unsigned _key2)
{ //the range is: *_key1 <= x < *_key2
//if(_key1 <0 && _key2 <0)
//return false;
//ok to search one-edge, requiring only one be negative
//find and write value
int store1, store2;
IVNode *p1, *p2;
if (_key1 >= 0)
{
request = 0;
p1 = this->find(_key1, &store1, false);
if (p1 == NULL || store1 == -1)
return false; //no element
this->TSM->request(request);
}
else
{
p1 = this->leaves_head;
store1 = 0;
}
if (_key2 >= 0)
{ //QUERY: another strategy is to getnext and compare every time to tell end
request = 0;
p2 = this->find(_key2, &store2, false);
if (p2 == NULL)
return false;
else if (store2 == -1)
store2 = p2->getNum();
else if (store2 == 0)
{
p2 = p2->getPrev();
if (p2 == NULL)
return false; //no element
store2 = p2->getNum();
}
this->TSM->request(request);
}
else
{
p2 = this->leaves_tail;
store2 = p2->getNum();
}
IVNode* p = p1;
unsigned i, l, r;
//get the num of answers first, not need to prepare the node
unsigned ansNum = 0;
while (true)
{
//request = 0;
//this->prepare(p);
if (p == p1)
l = store1;
else
l = 0;
if (p == p2)
r = store2;
else
r = p->getNum();
ansNum += (r - l);
//this->TSM->request(request);
if (p != p2)
p = p->getNext();
else
break;
}
if (this->stream != NULL)
{
delete this->stream;
this->stream = NULL;
}
vector<unsigned> keys;
vector<bool> desc;
this->stream = new Stream(keys, desc, ansNum, 1, false);
p = p1;
while (1)
{
request = 0;
this->prepare(p);
if (p == p1)
l = store1;
else
l = 0;
if (p == p2)
r = store2;
else
r = p->getNum();
for (i = l; i < r; ++i)
{
//NOTICE:Bstr* in an array, used as Bstr[]
//DEBUG+TODO: if long list?? clean
this->stream->write(p->getValue(i));
}
this->TSM->request(request);
if (p != p2)
p = p->getNext();
else
break;
}
this->stream->setEnd();
return true;
}
bool
IVTree::save() //save the whole tree to disk
{
#ifdef DEBUG_KVSTORE
printf("now to save tree!\n");
#endif
if (TSM->writeTree(this->root))
return true;
else
return false;
}
void
IVTree::release(IVNode* _np) const
{
if (_np == NULL) return;
if (_np->isLeaf())
{
delete _np;
return;
}
int cnt = _np->getNum();
for (; cnt >= 0; --cnt)
release(_np->getChild(cnt));
delete _np;
}
IVTree::~IVTree()
{
delete this->value_list;
delete this->stream; //maybe NULL
delete TSM;
#ifdef DEBUG_KVSTORE
printf("already empty the buffer, now to delete all nodes in tree!\n");
#endif
//recursively delete each Node
release(root);
}
void
IVTree::print(string s)
{
#ifdef DEBUG_KVSTORE
fputs(Util::showtime().c_str(), Util::debug_kvstore);
fputs("Class IVTree\n", Util::debug_kvstore);
fputs("Message: ", Util::debug_kvstore);
fputs(s.c_str(), Util::debug_kvstore);
fputs("\n", Util::debug_kvstore);
fprintf(Util::debug_kvstore, "Height: %d\n", this->height);
if (s == "tree" || s == "TREE")
{
if (this->root == NULL)
{
fputs("Null IVTree\n", Util::debug_kvstore);
return;
}
IVNode** ns = new IVNode*[this->height];
int* ni = new int[this->height];
IVNode* np;
int i, pos = 0;
ns[pos] = this->root;
ni[pos] = this->root->getNum();
pos++;
while (pos > 0)
{
np = ns[pos - 1];
i = ni[pos - 1];
this->prepare(np);
if (np->isLeaf() || i < 0) //LeafNode or ready IntlNode
{ //child-num ranges: 0~num
if (s == "tree")
np->print("node");
else
np->print("NODE"); //print full node-information
pos--;
continue;
}
else
{
ns[pos] = np->getChild(i);
ni[pos - 1]--;
ni[pos] = ns[pos]->getNum();
pos++;
}
}
delete[] ns;
delete[] ni;
}
else if (s == "LEAVES" || s == "leaves")
{
IVNode* np;
for (np = this->leaves_head; np != NULL; np = np->getNext())
{
this->prepare(np);
if (s == "leaves")
np->print("node");
else
np->print("NODE");
}
}
else if (s == "check tree")
{
//check the tree, if satisfy B+ definition
//TODO
}
else;
#endif
}

98
KVstore/IVTree/IVTree.h Normal file
View File

@ -0,0 +1,98 @@
/*=============================================================================
# Filename: IVTree.h
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:44
# Description: ID2valueList, including s2po, p2so and o2ps
=============================================================================*/
#ifndef _KVSTORE_IVTREE_IVTREE_H
#define _KVSTORE_IVTREE_IVTREE_H
#include "../../Util/Util.h"
#include "../../Util/Stream.h"
#include "../../Util/VList.h"
#include "node/IVNode.h"
#include "node/IVIntlNode.h"
#include "node/IVLeafNode.h"
#include "storage/IVStorage.h"
//TODO: for long list, do not read in time, just on need
//the memory is kept with the node, updat ewith node
//NOTICE: to release the node, maybe the value list is NULL
//value bstr: unsigned=address, NULL
//BETTER?: build a new block store for long list??
//NOTICE: we do not need to use transfer bstr here, neithor for two directions
//when insert/query, we do not release the value in kvstore
class IVTree
{
protected:
unsigned height; //0 indicates an empty tree
IVNode* root;
IVNode* leaves_head; //the head of LeafNode-list
IVNode* leaves_tail; //the tail of LeafNode-list
std::string mode; //BETTER(to use enum)
IVStorage* TSM; //Tree-Storage-Manage
//BETTER:multiple stream maybe needed:)
Stream* stream;
//always alloc one more byte than length, then user can add a '\0'
//to get a real string, instead of new and copy
//other operations will be harmful to search, so store value in
//transfer temporally, while length adjusted.
//TODO: in multi-user case, multiple-search will cause problem,
//so lock is a must. Add lock to transfer is better than to add
//lock to every key/value. However, modify requires a lock for a
//key/value, and multiple search for different keys are ok!!!
//Bstr transfer;
//unsigned transfer_size;
//Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char*
//unsigned transfer_size[3];
//tree's operations should be atom(if read nodes)
//sum the request and send to IVStorage at last
//ensure that all nodes operated are in memory
long long request;
void prepare(IVNode* _np);
std::string storepath;
std::string filename; //ok for user to change
/* some private functions */
std::string getFilePath(); //in UNIX system
//void CopyToTransfer(const char* _str, unsigned _len, unsigned _index);
//void CopyToTransfer(const char* _str, unsigned _len);
void release(IVNode* _np) const;
//very long value list are stored in a separate file(with large block)
//
//NOTICE: according to the summary result, 90% value lists are just below 100 bytes
//<10%: 5000000~100M bytes
VList* value_list;
public:
IVTree(); //always need to initial transfer
IVTree(std::string _storepath, std::string _filename, std::string _mode, unsigned long long _buffer_size);
unsigned getHeight() const;
void setHeight(unsigned _h);
IVNode* getRoot() const;
//void setRoot(Node* _root);
//insert, search, remove, set
bool search(unsigned _key, char*& _str, unsigned& _len);
bool insert(unsigned _key, char* _str, unsigned _len);
bool modify(unsigned _key, char* _str, unsigned _len);
IVNode* find(unsigned _key, int* store, bool ifmodify);
bool remove(unsigned _key);
const Bstr* getRangeValue();
void resetStream();
bool range_query(unsigned _key1, unsigned _key2);
bool save();
~IVTree();
void print(std::string s); //DEBUG(print the tree)
};
//NOTICE: need to save tree manually before delete, otherwise will cause problem.
//(problem range between two extremes: not-modified, totally-modified)
//After saved, it's ok to continue operations on tree!
#endif

View File

@ -0,0 +1,186 @@
/*=============================================================================
# Filename: IVHeap.cpp
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:37
# Description: achieve functions in IVHeap.h
=============================================================================*/
#include "IVHeap.h"
using namespace std;
IVHeap::IVHeap()
{
this->length = this->size = 0;
this->heap = NULL;
}
IVHeap::IVHeap(unsigned _size)
{
this->length = 0;
this->size = _size;
//this->heap = (Node**)malloc(this->size * sizeof(Node*)); //not use 4 or 8
this->heap = new IVNode*[this->size];
if (this->heap == NULL)
{
this->print("error in IVHeap: Allocation fail!");
exit(1);
}
/*
this->npmap = (Map*)malloc(this->size * sizeof(struct Map));
if(this->npmap == NULL)
{
this->print("error in IVHeap: Allocation fail!");
exit(1);
}
*/
}
IVNode*
IVHeap::getTop() const
{
if (this->length > 0)
return this->heap[0];
else
return NULL;
}
unsigned
IVHeap::getLen() const
{
return this->length;
}
unsigned
IVHeap::getSize() const
{
return this->size;
}
bool
IVHeap::isEmpty() const
{
return this->length == 0;
}
bool
IVHeap::insert(IVNode* _np)
{
if (this->length == this->size) //when full, reallocate
{
this->heap = (IVNode**)realloc(this->heap, 2 * this->size * sizeof(IVNode*));
if (this->heap == NULL)
{
print("error in isert: Reallocation fail!");
return false;
}
/*
this->npmap = (struct Map*)realloc(this->npmap, 2 * this->size * sizeof(struct Map));
if(this->npmap == NULL)
{
print("error in insert: Reallocation fail!");
return false;
}
*/
this->size = 2 * this->size;
}
unsigned i = this->length, j;
while (i != 0)
{
j = (i - 1) / 2;
if (_np->getRank() >= this->heap[j]->getRank())
break;
heap[i] = heap[j];
//this->npmap[k].pos = i; //adjust the position
i = j;
}
this->heap[i] = _np;
this->length++;
return true;
}
bool
IVHeap::remove()
{
if (this->length == 0)
{
print("error in remove: remove from empty heap!");
return false;
}
//Node* tp = this->heap[0];
this->length--;
if (this->length == 0)
return true;
IVNode* xp = this->heap[this->length];
unsigned i = 0, j = 1;
while (j < this->length)
{
if (j < this->length - 1 && this->heap[j]->getRank() > this->heap[j + 1]->getRank())
j++;
if (xp->getRank() <= this->heap[j]->getRank())
break;
this->heap[i] = this->heap[j];
i = j;
j = 2 * i + 1;
}
this->heap[i] = xp;
return true;
}
bool
IVHeap::modify(IVNode* _np, bool _flag) //control direction
{
//search and adjust
unsigned i, j;
for (i = 0; i < this->length; ++i)
if (this->heap[i] == _np)
break;
if (_flag == true) //move up
{
while (i != 0)
{
j = (i - 1) / 2;
if (_np->getRank() < heap[j]->getRank())
{
heap[i] = heap[j];
heap[j] = _np;
i = j;
}
else
break;
}
}
else //move down
{
j = 2 * i + 1;
while (j < this->length)
{
if (j < this->length - 1 && heap[j]->getRank() > heap[j + 1]->getRank())
j++;
if (heap[j]->getRank() < _np->getRank())
{
heap[i] = heap[j];
heap[j] = _np;
i = j;
}
else
break;
}
}
return true;
}
IVHeap::~IVHeap()
{
delete[] this->heap;
this->heap = NULL;
this->length = this->size = 0;
}
void
IVHeap::print(string s)
{
#ifdef DEBUG_KVSTORE
#endif
}

View File

@ -0,0 +1,41 @@
/*=============================================================================
# Filename: IVHeap.h
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:37
# Description: set and deal of IVNode*s in memory
=============================================================================*/
#ifndef _KVSTORE_IVTREE_HEAP_IVHEAP_H
#define _KVSTORE_IVTREE_HEAP_IVHEAP_H
#include "../../../Util/Util.h"
#include "../node/IVNode.h"
/* add, sub, modify: all can be done within O(logn) using adjust-function */
//QUERY: when modified, finding right position consumes O(n). How about keeping smallest?
//(add O(1), sub O(2n), modify O(n)
//TODO: to solve this probem, use another hash: (pointer, pos), to find the right position of
//given p in O(lgn) time
class IVHeap
{
private:
IVNode** heap; //dynamic array
unsigned length; //valid elements num
unsigned size; //max-size of heap
public:
IVHeap();
IVHeap(unsigned _size);
IVNode* getTop() const; //return the top element
unsigned getLen() const;
unsigned getSize() const;
bool isEmpty() const;
bool insert(IVNode* _np); //insert and adjust
bool remove(); //remove top and adjust
bool modify(IVNode* _np, bool _flag); //searech modified element and adjust
~IVHeap();
void print(std::string s); //DEBUG
};
#endif

View File

@ -0,0 +1,293 @@
/*=============================================================================
# Filename: IVIntlNode.cpp
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:40
# Description: achieve functions in IVIntlNode.h
=============================================================================*/
#include "IVIntlNode.h"
using namespace std;
/*
void
IVIntlNode::AllocChilds()
{
childs = (Node**)malloc(sizeof(Node*) * MAX_CHILD_NUM);
}
*/
IVIntlNode::IVIntlNode()
{
memset(childs, 0, sizeof(IVNode*) * MAX_CHILD_NUM);
//this->AllocChilds();
}
IVIntlNode::IVIntlNode(bool isVirtual) //call father-class's constructor automaticlly
{
memset(childs, 0, sizeof(IVNode*) * MAX_CHILD_NUM);
//this->AllocChilds();
}
/*
IVIntlNode::IntlNode(Storage* TSM) //QUERY
{
TSM->readNode(this, Storage::OVER);
}
*/
void
IVIntlNode::Virtual()
{
//this->FreeKeys();
this->release();
this->delMem();
}
void
IVIntlNode::Normal()
{
this->AllocKeys();
this->setMem();
}
IVNode*
IVIntlNode::getChild(int _index) const
{
int num = this->getNum();
if (_index < 0 || _index > num) //num keys, num+1 childs
{
//print(string("error in getChild: Invalid index ") + Util::int2string(_index));
return NULL;
}
else
return childs[_index];
}
bool
IVIntlNode::setChild(IVNode* _child, int _index)
{
int num = this->getNum();
if (_index < 0 || _index > num)
{
print(string("error in setChild: Invalid index ") + Util::int2string(_index));
return false;
}
this->childs[_index] = _child;
return true;
}
bool
IVIntlNode::addChild(IVNode* _child, int _index)
{
int num = this->getNum();
if (_index < 0 || _index > num + 1)
{
print(string("error in addChild: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
for (i = num; i >= _index; --i) //DEBUG: right bounder!!!
childs[i + 1] = childs[i];
childs[_index] = _child;
return true;
}
bool
IVIntlNode::subChild(int _index)
{
int num = this->getNum();
if (_index < 0 || _index > num)
{
print(string("error in subchild: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
for (i = _index; i < num; ++i) //DEBUG: right bounder!!!
childs[i] = childs[i + 1];
return true;
}
unsigned
IVIntlNode::getSize() const
{
//unsigned sum = INTL_SIZE, num = this->getNum(), i;
//return sum;
return INTL_SIZE;
}
IVNode*
IVIntlNode::split(IVNode* _father, int _index)
{
int num = this->getNum();
IVNode* p = new IVIntlNode; //right child
p->setHeight(this->getHeight());
int i, k;
for (i = MIN_CHILD_NUM, k = 0; i < num; ++i, ++k)
{
p->addKey(this->keys[i], k);
p->addChild(this->childs[i], k);
p->addNum();
}
p->addChild(this->childs[i], k);
int tp = this->keys[MIN_KEY_NUM];
this->setNum(MIN_KEY_NUM);
_father->addKey(tp, _index);
_father->addChild(p, _index + 1); //DEBUG(check the index)
_father->addNum();
_father->setDirty();
p->setDirty();
this->setDirty();
return p;
}
IVNode*
IVIntlNode::coalesce(IVNode* _father, int _index)
{
//int num = this->getNum();
int i, j = _father->getNum(), k; //BETTER: unsigned?
IVNode* p;
int ccase = 0;
//const Bstr* bstr;
if (_index < j) //the right neighbor
{
p = _father->getChild(_index + 1);
k = p->getNum();
if ((unsigned)k > MIN_KEY_NUM)
ccase = 2;
else //==MIN_KEY_NUM
ccase = 1;
}
if (_index > 0) //the left neighbor
{
IVNode* tp = _father->getChild(_index - 1);
unsigned tk = tp->getNum();
if (ccase < 2)
{
if (ccase == 0)
ccase = 3;
if (tk > MIN_KEY_NUM)
ccase = 4;
}
if (ccase > 2)
{
p = tp;
k = tk;
}
}
unsigned tmp = 0;
switch (ccase)
{
case 1: //union right to this
this->addKey(_father->getKey(_index), this->getNum());
this->addNum();
for (i = 0; i < k; ++i)
{
this->addKey(p->getKey(i), this->getNum());
this->addChild(p->getChild(i), this->getNum());
this->addNum();
}
this->setChild(p->getChild(i), this->getNum());
_father->subKey(_index);
_father->subChild(_index + 1);
_father->subNum();
p->setNum(0);
//delete p;
break;
case 2: //move one form right
this->addKey(_father->getKey(_index), this->getNum());
_father->setKey(p->getKey(0), _index);
p->subKey(0);
this->addChild(p->getChild(0), this->getNum() + 1);
p->subChild(0);
this->addNum();
p->subNum();
break;
case 3: //union left to this
this->addKey(_father->getKey(_index - 1), 0);
this->addNum();
for (i = k; i > 0; --i)
{
int t = i - 1;
this->addKey(p->getKey(t), 0);
this->addChild(p->getChild(i), 0);
this->addNum();
}
this->addChild(p->getChild(0), 0);
_father->subKey(_index - 1);
_father->subChild(_index - 1);
_father->subNum();
p->setNum(0);
//delete p;
break;
case 4: //move one from left
tmp = p->getKey(k - 1);
p->subKey(k - 1);
this->addKey(_father->getKey(_index - 1), 0);
_father->setKey(tmp, _index - 1);
this->addChild(p->getChild(k), 0);
p->subChild(k);
this->addNum();
p->subNum();
break;
default:
print("error in coalesce: Invalid case!");
//printf("error in coalesce: Invalid case!");
}
_father->setDirty();
p->setDirty();
this->setDirty();
if (ccase == 1 || ccase == 3)
return p;
else
return NULL;
}
void
IVIntlNode::release()
{
if (!this->inMem())
return;
//unsigned num = this->getNum();
delete[] keys; //this will release all!!!
}
IVIntlNode::~IVIntlNode()
{
release();
//free(childs);
}
void
IVIntlNode::print(string s)
{
#ifdef DEBUG_KVSTORE
int num = this->getNum();
fputs(Util::showtime().c_str(), Util::debug_kvstore);
fputs("Class IVIntlNode\n", Util::debug_kvstore);
fputs("Message: ", Util::debug_kvstore);
fputs(s.c_str(), Util::debug_kvstore);
fputs("\n", Util::debug_kvstore);
if (s == "node" || s == "NODE")
{
fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag);
/*
int i;
for (i = 0; i < num; ++i)
{
if (s == "node")
this->keys[i].print("bstr");
else
this->keys[i].print("BSTR");
}
*/
}
else if (s == "check node")
{
//TODO(check node, if satisfy B+ definition)
}
else;
#endif
}

View File

@ -0,0 +1,48 @@
/*=============================================================================
# Filename: IVIntlNode.h
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:40
# Description: the internal-node of a B+ tree
=============================================================================*/
#ifndef _KVSTORE_IVTREE_NODE_IVINTLNODE_H
#define _KVSTORE_IVTREE_NODE_IVINTLNODE_H
#include "IVNode.h"
class IVIntlNode : public IVNode
{
protected:
IVNode* childs[MAX_CHILD_NUM + 1];
//Node** childs;
//void AllocChilds();
public:
IVIntlNode();
IVIntlNode(bool isVirtual);
//IntlNode(Storage* TSM);
void Virtual();
void Normal();
IVNode* getChild(int _index) const;
bool setChild(IVNode* _child, int _index);
bool addChild(IVNode* _child, int _index);
bool subChild(int _index);
unsigned getSize() const;
IVNode* split(IVNode* _father, int _index);
IVNode* coalesce(IVNode* _father, int _index);
void release();
~IVIntlNode();
void print(std::string s); //DEBUG
/*non-sense functions: polymorphic
Node* getPrev() const;
Node* getNext() const;
const Bstr* getValue(int _index) const;
bool setValue(const Bstr* _value, int _index);
bool addValue(const Bstr* _value, int _index);
bool subValue(int _index);
void setPrev(Node* _prev);
void setNext(Node* _next);
*/
};
#endif

View File

@ -0,0 +1,538 @@
/*=============================================================================
# Filename: IVLeafNode.cpp
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:40
# Description: ahieve functions in IVLeafNode.h
=============================================================================*/
#include "IVLeafNode.h"
using namespace std;
void
IVLeafNode::AllocValues()
{
values = new Bstr[MAX_KEY_NUM];
}
/*
void
IVLeafNode::FreeValues()
{
delete[] values;
}
*/
IVLeafNode::IVLeafNode()
{
flag |= NF_IL; //leaf flag
prev = next = NULL;
AllocValues();
}
IVLeafNode::IVLeafNode(bool isVirtual)
{
flag |= NF_IL;
prev = next = NULL;
if (!isVirtual)
AllocValues();
}
/*
IVLeafNode::LeafNode(Storage* TSM)
{
AllocValues();
TSM->readNode(this, Storage::OVER);
}
*/
void
IVLeafNode::Virtual()
{
//this->FreeKeys();
//this->FreeValues();
this->release();
this->delMem();
}
void
IVLeafNode::Normal()
{
this->AllocKeys();
this->AllocValues();
this->setMem();
}
IVNode*
IVLeafNode::getPrev() const
{
return prev;
}
IVNode*
IVLeafNode::getNext() const
{
return next;
}
const Bstr*
IVLeafNode::getValue(int _index) const
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
//print(string("error in getValue: Invalid index ") + Util::int2string(_index));
return NULL;
}
else
return this->values + _index;
}
bool
IVLeafNode::setValue(const Bstr* _value, int _index, bool _ifcopy)
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
print(string("error in setValue: Invalid index ") + Util::int2string(_index));
return false;
}
this->values[_index].release(); //NOTICE: only used in modify
if(_ifcopy)
{
this->values[_index].copy(_value);
}
else
{
this->values[_index] = *_value;
}
return true;
}
bool
IVLeafNode::getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
//print(string("error in getValue: Invalid index ") + Util::int2string(_index));
return NULL;
}
//read long list
if(this->values[_index].isBstrLongList())
{
#ifdef DEBUG_VLIST
cout<<"this is a vlist in get()"<<endl;
#endif
unsigned block_num = this->values[_index].getLen();
_vlist->readValue(block_num, _str, _len);
}
else
{
_str = this->values[_index].getStr();
_len = this->values[_index].getLen();
}
return true;
}
bool
IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy)
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
print(string("error in setValue: Invalid index ") + Util::int2string(_index));
return false;
}
if(this->values[_index].isBstrLongList())
{
#ifdef DEBUG_VLIST
cout<<"this is a vlist in set()"<<endl;
#endif
unsigned block_num = this->values[_index].getLen();
_vlist->removeValue(block_num);
}
else
{
this->values[_index].release(); //NOTICE: only used in modify
}
//DEBUG: we do not need to copy here
//we just need to ensure that the pointer's memory is not released
//if (ifcopy)
//{
//this->values[_index].copy(_value);
//}
//else
//{
//this->values[_index] = *_value;
if(VList::isLongList(_len))
{
unsigned block_num = _vlist->writeValue(_str, _len);
this->values[_index].setStr(NULL);
this->values[_index].setLen(block_num);
//NOTICE: we need to free the long list value
delete[] _str;
}
else
{
this->values[_index].setStr(_str);
this->values[_index].setLen(_len);
}
//}
return true;
}
bool
IVLeafNode::addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy)
{
int num = this->getNum();
if (_index < 0 || _index > num)
{
print(string("error in addValue: Invalid index ") + Util::int2string(_index));
return false;
}
for (int i = num - 1; i >= _index; --i)
this->values[i + 1] = this->values[i];
//if (ifcopy)
//this->values[_index].copy(_value);
//else
//this->values[_index] = *_value;
if(VList::isLongList(_len))
{
#ifdef DEBUG_VLIST
cout<<"this is a vlist in add()"<<endl;
#endif
unsigned block_num = _vlist->writeValue(_str, _len);
this->values[_index].setStr(NULL);
this->values[_index].setLen(block_num);
//NOTICE: we need to free the long list value
delete[] _str;
#ifdef DEBUG_VLIST
//cout<<"to check vlist: "<<this->values[_index].getLen()<<endl;
#endif
}
else
{
this->values[_index].setStr(_str);
this->values[_index].setLen(_len);
}
//this->values[_index].setStr(_str);
//this->values[_index].setLen(_len);
return true;
}
bool
IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel)
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
print(string("error in subValue: Invalid index ") + Util::int2string(_index));
return false;
}
if(this->values[_index].isBstrLongList())
{
unsigned block_num = this->values[_index].getLen();
_vlist->removeValue(block_num);
}
else
{
if (ifdel)
{
values[_index].release();
}
}
for (int i = _index; i < num - 1; ++i)
this->values[i] = this->values[i + 1];
return true;
}
bool
IVLeafNode::addValue(const Bstr* _value, int _index, bool ifcopy)
{
int num = this->getNum();
if (_index < 0 || _index > num)
{
print(string("error in addValue: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
for (i = num - 1; i >= _index; --i)
this->values[i + 1] = this->values[i];
if (ifcopy)
this->values[_index].copy(_value);
else
this->values[_index] = *_value;
return true;
}
bool
IVLeafNode::subValue(int _index, bool ifdel)
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
print(string("error in subValue: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
if (ifdel)
values[_index].release();
for (i = _index; i < num - 1; ++i)
this->values[i] = this->values[i + 1];
return true;
}
void
IVLeafNode::setPrev(IVNode* _prev)
{
this->prev = _prev;
}
void
IVLeafNode::setNext(IVNode* _next)
{
this->next = _next;
}
unsigned
IVLeafNode::getSize() const
{
unsigned sum = LEAF_SIZE, num = this->getNum(), i;
for (i = 0; i < num; ++i)
{
sum += values[i].getLen();
}
return sum;
}
IVNode*
IVLeafNode::split(IVNode* _father, int _index)
{
int num = this->getNum();
IVNode* p = new IVLeafNode; //right child
p->setHeight(this->getHeight()); //NOTICE: assign height for new node
p->setNext(this->next);
this->setNext(p);
p->setPrev(this);
int i, k;
for (i = MIN_KEY_NUM, k = 0; i < num; ++i, ++k)
{
p->addKey(this->keys[i], k);
p->addValue(this->values + i, k);
p->addNum();
}
int tp = this->keys[MIN_KEY_NUM];
this->setNum(MIN_KEY_NUM);
_father->addKey(tp, _index);
_father->addChild(p, _index + 1); //DEBUG(check the index)
_father->addNum();
_father->setDirty();
p->setDirty();
this->setDirty();
return p;
}
IVNode*
IVLeafNode::coalesce(IVNode* _father, int _index)
{ //add a key or coalesce a neighbor to this
int i, j = _father->getNum(), k; //BETTER: unsigned?
IVNode* p = NULL;
int ccase = 0;
//const Bstr* bstr;
if (_index < j) //the right neighbor
{
p = _father->getChild(_index + 1);
k = p->getNum();
if ((unsigned)k > MIN_KEY_NUM)
ccase = 2;
else //==MIN_KEY_NUM
ccase = 1;
}
if (_index > 0) //the left neighbor
{
IVNode* tp = _father->getChild(_index - 1);
unsigned tk = tp->getNum();
if (ccase < 2)
{
if (ccase == 0)
ccase = 3;
if (tk > MIN_KEY_NUM)
ccase = 4;
}
if (ccase > 2)
{
p = tp;
k = tk;
}
}
int tmp = 0;
switch (ccase)
{
case 1: //union right to this
for (i = 0; i < k; ++i)
{
this->addKey(p->getKey(i), this->getNum());
this->addValue(p->getValue(i), this->getNum());
this->addNum();
}
_father->subKey(_index);
_father->subChild(_index + 1);
_father->subNum();
this->next = p->getNext();
if (this->next != NULL)
this->next->setPrev(this);
p->setNum(0); //NOTICE: adjust num before delete!
//delete p;
break;
case 2: //move one from right
this->addKey(p->getKey(0), this->getNum());
_father->setKey(p->getKey(1), _index);
p->subKey(0);
this->addValue(p->getValue(0), this->getNum());
p->subValue(0);
this->addNum();
p->subNum();
break;
case 3: //union left to this
//BETTER: move all keys/etc one time
for (i = k; i > 0; --i)
{
int t = i - 1;
this->addKey(p->getKey(t), 0);
this->addValue(p->getValue(t), 0);
this->addNum();
}
_father->subKey(_index - 1);
_father->subChild(_index - 1);
_father->subNum();
this->prev = p->getPrev();
if (this->prev != NULL) //else: leaves-list
this->prev->setNext(this);
p->setNum(0);
//delete p;
break;
case 4: //move one from left
tmp = p->getKey(k - 1);
p->subKey(k - 1);
this->addKey(tmp, 0);
_father->setKey(tmp, _index - 1);
this->addValue(p->getValue(k - 1), 0);
p->subValue(k - 1);
this->addNum();
p->subNum();
break;
default:
print("error in coalesce: Invalid case!");
//printf("error in coalesce: Invalid case!");
}
_father->setDirty();
p->setDirty();
this->setDirty();
if (ccase == 1 || ccase == 3)
return p;
else
return NULL;
}
void
IVLeafNode::release()
{
if (!this->inMem())
return;
unsigned num = this->getNum();
/*
for(int i = 0; i < num; ++i)
{
keys[i].release();
values[i].release();
}
*/
for (unsigned i = num; i < MAX_KEY_NUM; ++i)
{
values[i].clear();
}
delete[] keys;
delete[] values;
}
IVLeafNode::~IVLeafNode()
{
release();
}
void
IVLeafNode::print(string s)
{
#ifdef DEBUG_KVSTORE
unsigned num = this->getNum();
fputs(Util::showtime().c_str(), Util::debug_kvstore);
fputs("Class IVLeafNode\n", Util::debug_kvstore);
fputs("Message: ", Util::debug_kvstore);
fputs(s.c_str(), Util::debug_kvstore);
fputs("\n", Util::debug_kvstore);
unsigned i;
if (s == "NODE")
{
fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag);
fprintf(Util::debug_kvstore, "prev: %p\tnext: %p\n", this->prev, this->next);
for (i = 0; i < num; ++i)
{
//this->keys[i].print("BSTR");
this->values[i].print("BSTR");
}
}
else if (s == "node")
{
fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag);
fprintf(Util::debug_kvstore, "prev: %p\tnext: %p\n", this->prev, this->next);
}
else if (s == "check node")
{
//check the node, if satisfy B+ definition
bool flag = true;
if (num < MIN_KEY_NUM || num > MAX_KEY_NUM)
flag = false;
if (flag)
{
for (i = 1; i < num; ++i)
{
if (keys[i] > keys[i - 1])
continue;
else
break;
}
if (i < num)
flag = false;
}
this->print("node");
if (flag)
fprintf(Util::debug_kvstore, "This node is good\n");
else
fprintf(Util::debug_kvstore, "This node is bad\n");
}
else;
#endif
}

View File

@ -0,0 +1,58 @@
/*=============================================================================
# Filename: IVLeafNode.h
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:39
# Description: the leaf-node of a B+ tree
=============================================================================*/
#ifndef _KVSTORE_IVTREE_NODE_IVLEAFNODE_H
#define _KVSTORE_IVTREE_NODE_IVLEAFNODE_H
#include "IVNode.h"
class IVLeafNode : public IVNode
{
protected:
IVNode* prev; //LeafNode
IVNode* next;
Bstr* values;
void AllocValues();
//void FreeValues();
public:
IVLeafNode();
IVLeafNode(bool isVirtual);
//LeafNode(Storage* TSM);
void Virtual();
void Normal();
IVNode* getPrev() const;
IVNode* getNext() const;
const Bstr* getValue(int _index) const;
bool setValue(const Bstr* _value, int _index, bool _ifcopy=false);
bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const;
bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false);
bool addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false);
bool subValue(VList* _vlist, int _index, bool ifdel = false);
bool addValue(const Bstr* _val, int _index, bool ifcopy = false);
bool subValue(int _index, bool ifdel = false);
void setPrev(IVNode* _prev);
void setNext(IVNode* _next);
unsigned getSize() const;
IVNode* split(IVNode* _father, int _index);
IVNode* coalesce(IVNode* _father, int _index);
void release();
~IVLeafNode();
void print(std::string s); //DEBUG
/*non-sense virtual function
Node* getChild(int _index) const;
bool addChild(Node* _child, int _index);
bool subChild(int _index);
*/
};
//BETTER: prev isn't a must, and reverse-range can be achieved using recursive-next
#endif

View File

@ -0,0 +1,320 @@
/*=============================================================================
# Filename: IVNode.cpp
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:39
# Description: achieve functions in IVNode.h
=============================================================================*/
#include "IVNode.h"
using namespace std;
void
IVNode::AllocKeys()
{
keys = new unsigned[MAX_KEY_NUM];
}
/*
void
IVNode::FreeKeys()
{
delete[] keys;
}
*/
IVNode::IVNode()
{
store = flag = 0;
flag |= NF_IM;
AllocKeys();
}
IVNode::IVNode(bool isVirtual)
{
store = flag = 0;
if (!isVirtual)
{
flag |= NF_IM;
AllocKeys();
}
}
/*
IVNode::Node(Storage* TSM)
{
AllocKeys();
TSM->readIVNode(this, Storage::OVER);
}
*/
bool
IVNode::isLeaf() const
{
return this->flag & NF_IL;
}
bool
IVNode::isDirty() const
{
return this->flag & NF_ID;
}
void
IVNode::setDirty()
{
this->flag |= NF_ID;
}
void
IVNode::delDirty()
{
this->flag &= ~NF_ID;
}
bool
IVNode::inMem() const
{
return this->flag & NF_IM;
}
void
IVNode::setMem()
{
this->flag |= NF_IM;
}
void
IVNode::delMem()
{
this->flag &= ~NF_IM;
}
/*
bool
IVNode::isVirtual() const
{
return this->flag & NF_IV;
}
void
IVNode::setVirtual()
{
this->flag |= NF_IV;
}
void
IVNode::delVirtual()
{
this->flag &= ~NF_IV;
}
*/
unsigned
IVNode::getRank() const
{
return this->flag & NF_RK;
}
void
IVNode::setRank(unsigned _rank)
{
this->flag &= ~NF_RK;
this->flag |= _rank;
}
unsigned
IVNode::getHeight() const
{
return (this->flag & NF_HT) >> 20;
}
void
IVNode::setHeight(unsigned _h)
{
this->flag &= ~NF_HT;
this->flag |= (_h << 20);
}
unsigned
IVNode::getNum() const
{
return (this->flag & NF_KN) >> 12;
}
bool
IVNode::setNum(int _num)
{
if (_num < 0 || (unsigned)_num > MAX_KEY_NUM)
{
print(string("error in setNum: Invalid num ") + Util::int2string(_num));
return false;
}
this->flag &= ~NF_KN;
this->flag |= (_num << 12);
return true;
}
bool
IVNode::addNum()
{
if (this->getNum() + 1 > MAX_KEY_NUM)
{
print("error in addNum: Invalid!");
return false;
}
this->flag += (1 << 12);
return true;
}
bool
IVNode::subNum()
{
if (this->getNum() < 1)
{
print("error in subNum: Invalid!");
return false;
}
this->flag -= (1 << 12);
return true;
}
unsigned
IVNode::getStore() const
{
return this->store;
}
void
IVNode::setStore(unsigned _store)
{
this->store = _store;
}
unsigned
IVNode::getFlag() const
{
return flag;
}
void
IVNode::setFlag(unsigned _flag)
{
this->flag = _flag;
}
unsigned
IVNode::getKey(int _index) const
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
//print(string("error in getKey: Invalid index ") + Util::int2string(_index));
printf("error in getKey: Invalid index\n");
return -1;
}
else
return this->keys[_index];
}
bool
IVNode::setKey(unsigned _key, int _index)
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
print(string("error in setKey: Invalid index ") + Util::int2string(_index));
return false;
}
keys[_index] = _key;
return true;
}
bool
IVNode::addKey(unsigned _key, int _index)
{
int num = this->getNum();
if (_index < 0 || _index > num)
{
print(string("error in addKey: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
//NOTICE: if num == MAX_KEY_NUM, will visit keys[MAX_KEY_NUM], not legal!!!
//however. tree operations ensure that: when node is full, not add but split first!
for (i = num - 1; i >= _index; --i)
keys[i + 1] = keys[i];
keys[_index] = _key;
return true;
}
bool
IVNode::subKey(int _index)
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
print(string("error in subKey: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
for (i = _index; i < num - 1; ++i)
keys[i] = keys[i + 1];
return true;
}
int
IVNode::searchKey_less(unsigned _key) const
{
int num = this->getNum();
//for(i = 0; i < num; ++i)
//if(bstr < *(p->getKey(i)))
//break;
int low = 0, high = num - 1, mid = -1;
while (low <= high)
{
mid = (low + high) / 2;
if (this->keys[mid] > _key)
{
if (low == mid)
break;
high = mid;
}
else
{
low = mid + 1;
}
}
return low;
}
int
IVNode::searchKey_equal(unsigned _key) const
{
int num = this->getNum();
//for(i = 0; i < num; ++i)
// if(bstr == *(p->getKey(i)))
// {
int ret = this->searchKey_less(_key);
if (ret > 0 && this->keys[ret - 1] == _key)
return ret - 1;
else
return num;
}
int
IVNode::searchKey_lessEqual(unsigned _key) const
{
//int num = this->getNum();
//for(i = 0; i < num; ++i)
//if(bstr <= *(p->getKey(i)))
//break;
int ret = this->searchKey_less(_key);
if (ret > 0 && this->keys[ret - 1] == _key)
return ret - 1;
else
return ret;
}

View File

@ -0,0 +1,123 @@
/*=============================================================================
# Filename: IVNode.h
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:38
# Description: basic Node class, father of IVIntlNode and IVLeafNode
=============================================================================*/
#ifndef _KVSTORE_IVTREE_NODE_IVNODE_H
#define _KVSTORE_IVTREE_NODE_IVNODE_H
#include "../../../Util/Util.h"
#include "../../../Util/Bstr.h"
#include "../../../Util/VList.h"
class IVNode //abstract basic class
{
public:
static const unsigned DEGREE = 2 * 63; //the degree of B+ tree
static const unsigned MAX_CHILD_NUM = DEGREE;
static const unsigned MIN_CHILD_NUM = DEGREE >> 1;
static const unsigned MAX_KEY_NUM = MAX_CHILD_NUM - 1; //max key-num
static const unsigned MIN_KEY_NUM = MIN_CHILD_NUM - 1; //min key-num
/* diffrent flags for tree-nodes, 32-bit put rank in low-bits means no need to move*/
static const unsigned NF_IL = 0x80000000; //is leaf
static const unsigned NF_ID = 0x00080000; //is dirty, in rank-area
static const unsigned NF_IM = 0x20000000; //in memory, not virtual
//static const unsigned NF_IV = 0x10000000; //is virtual
static const unsigned NF_RK = 0x00ffffff; //select-rank, in Storage
static const unsigned NF_HT = 0xf00000; //height area in rank
static const unsigned NF_KN = 0x07f000; //NOTICE: decided by DEGREE
static const unsigned INTL_SIZE = sizeof(int) * MAX_KEY_NUM;
static const unsigned LEAF_SIZE = INTL_SIZE + sizeof(Bstr) * MAX_KEY_NUM;
protected:
unsigned store; //store address, the BLock index
unsigned flag; //NF_RK, NF_IL,NF_ID, NF_IV, propety
//int num; //totle keys num
//Node* father; //point to father-node, which must be IntlNode
unsigned* keys;
void AllocKeys();
//void FreeKeys();
public:
IVNode();
IVNode(bool isVirtual);
bool isLeaf() const;
bool isDirty() const;
void setDirty();
void delDirty();
bool inMem() const;
void setMem();
void delMem();
//bool isVirtual() const;
//void setVirtual();
//void delVirtual();
unsigned getRank() const;
void setRank(unsigned _rank);
unsigned getHeight() const;
void setHeight(unsigned _h);
unsigned getNum() const;
bool setNum(int _num);
bool addNum();
bool subNum();
unsigned getStore() const;
void setStore(unsigned _store);
unsigned getFlag() const;
void setFlag(unsigned _flag);
unsigned getKey(int _index) const; //need to check the index
bool setKey(unsigned _key, int _index);
bool addKey(unsigned _key, int _index);
bool subKey(int _index);
//several binary key search utilities
int searchKey_less(unsigned _key) const;
int searchKey_equal(unsigned _key) const;
int searchKey_lessEqual(unsigned _key) const;
//virtual functions: polymorphic
virtual IVNode* getChild(int _index) const { return NULL; };
virtual bool setChild(IVNode* _child, int _index) { return true; };
virtual bool addChild(IVNode* _child, int _index) { return true; };
virtual bool subChild(int _index) { return true; };
virtual IVNode* getPrev() const { return NULL; };
virtual IVNode* getNext() const { return NULL; };
virtual const Bstr* getValue(int _index) const { return NULL; };
virtual bool setValue(const Bstr* _value, int _index, bool _ifcopy=false) { return true; };
virtual bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const { return NULL; };
virtual bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false) { return true; };
virtual bool addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false) { return true; };
virtual bool subValue(VList* _vlist, int _index, bool ifdel = false) { return true; };
virtual bool addValue(const Bstr* _val, int _index, bool ifcopy = false) { return true; };
virtual bool subValue(int _index, bool ifdel = false) { return true; };
virtual void setPrev(IVNode* _prev) {};
virtual void setNext(IVNode* _next) {};
//pure virtual functions
virtual void Virtual() = 0;
virtual void Normal() = 0;
virtual unsigned getSize() const = 0; //return all memory owned
virtual IVNode* split(IVNode* _father, int _index) = 0;
virtual IVNode* coalesce(IVNode* _father, int _index) = 0;
virtual void release() = 0; //release the node, only remain necessary information
virtual ~IVNode() {};
virtual void print(std::string s) = 0; //DEBUG(print the Node)
};
/*NOTICE(operations in release())
*To save memory, we can only remain store and flag(childs added for Leaf).
*However, in this way childs'pointers is ok to change, use Node** or Node*& is also nonsense
*because the pointer variable may die.
*Another way is only to release dynamic memory, and store thw whole, read the Bstr only to
*build. In this way nodes'pointer doesn't change, and operation is simplified, while memory
*is consumed a bit more. Because Bstrs consume the most memory, and memory-disk swapping is
*the most time-consuming thing, it seems to be a better way.
*WARN:when a node is in memory and not deleted, its basic content is always being! If nodes are
*really too many, this will cause disaster because we can't swap them out until tree is closed!
*To solve this problem, there should be two types of release-function: one to release Bstr, one
*to release the whole(pointer is invalid and rebuild problem)
*/
#endif

View File

@ -0,0 +1,738 @@
/*=============================================================================
# Filename: IVStorage.cpp
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:43
# Description: achieve functions in IVStorage.h
=============================================================================*/
#include "IVStorage.h"
using namespace std;
IVStorage::IVStorage()
{ //not use ../logs/, notice the location of program
cur_block_num = SET_BLOCK_NUM;
filepath = "";
freelist = NULL;
treefp = NULL;
max_buffer_size = Util::MAX_BUFFER_SIZE;
heap_size = max_buffer_size / IVNode::INTL_SIZE;
freemem = max_buffer_size;
minheap = NULL;
this->value_list = NULL;
}
IVStorage::IVStorage(string& _filepath, string& _mode, unsigned* _height, unsigned long long _buffer_size, VList* _vlist)
{
cur_block_num = SET_BLOCK_NUM; //initialize
this->filepath = _filepath;
if (_mode == string("build"))
treefp = fopen(_filepath.c_str(), "w+b");
else if (_mode == string("open"))
treefp = fopen(_filepath.c_str(), "r+b");
else
{
print(string("error in IVStorage: Invalid mode ") + _mode);
return;
}
if (treefp == NULL)
{
print(string("error in IVStorage: Open error ") + _filepath);
return;
}
this->treeheight = _height; //originally set to 0
this->max_buffer_size = _buffer_size;
this->heap_size = this->max_buffer_size / IVNode::INTL_SIZE;
this->freemem = this->max_buffer_size;
this->freelist = new BlockInfo; //null-head
unsigned i, j, k; //j = (SuperNum-1)*BLOCK_SIZE
BlockInfo* bp;
if (_mode == "build")
{ //write basic information
i = 0;
fwrite(&i, sizeof(unsigned), 1, this->treefp); //height
fwrite(&i, sizeof(unsigned), 1, this->treefp); //rootnum
fwrite(&cur_block_num, sizeof(unsigned), 1, this->treefp); //current block num
fseek(this->treefp, BLOCK_SIZE, SEEK_SET);
bp = this->freelist;
j = cur_block_num / 8;
for (i = 0; i < j; ++i)
{
fputc(0, this->treefp);
for (k = 0; k < 8; ++k)
{
bp->next = new BlockInfo(i * 8 + k + 1, NULL);
bp = bp->next;
}
}
}
else //_mode == "open"
{
//read basic information
unsigned rootnum;
char c;
fread(this->treeheight, sizeof(unsigned), 1, this->treefp);
fread(&rootnum, sizeof(unsigned), 1, this->treefp);
fread(&cur_block_num, sizeof(unsigned), 1, this->treefp);
fseek(this->treefp, BLOCK_SIZE, SEEK_SET);
bp = this->freelist;
j = cur_block_num / 8;
for (i = 0; i < j; ++i)
{
c = fgetc(treefp);
for (k = 0; k < 8; ++k)
{
if ((c & (1 << k)) == 0)
{
bp->next = new BlockInfo(i * 8 + 7 - k + 1, NULL);
bp = bp->next;
}
}
}
fseek(treefp, Address(rootnum), SEEK_SET);
//treefp is now ahead of root-block
}
this->minheap = new IVHeap(this->heap_size);
this->value_list = _vlist;
}
bool
IVStorage::preRead(IVNode*& _root, IVNode*& _leaves_head, IVNode*& _leaves_tail) //pre-read and build whole tree
{ //set root(in memory) and leaves_head
//TODO: false when exceed memory
_leaves_tail = _leaves_head = _root = NULL;
if (ftell(this->treefp) == 0) //root is null
{
return true;
}
unsigned next, store, j, pos = 0;
unsigned h = *this->treeheight;
IVNode* p;
//read root node
this->createNode(p);
_root = p;
fread(&next, sizeof(unsigned), 1, treefp);
//use stack to achieve
long address[h]; //current address
unsigned used[h]; //used child num
unsigned total[h]; //total child num
unsigned block[h]; //next block num
IVNode* nodes[h];
address[pos] = ftell(treefp);
used[pos] = 0;
total[pos] = p->getNum() + 1;
block[pos] = next;
nodes[pos] = p;
pos++;
IVNode* prev = NULL;
while (pos > 0)
{
j = pos - 1;
if (nodes[j]->isLeaf() || used[j] == total[j]) //LeafNode or ready IntlNode
{
if (nodes[j]->isLeaf())
{
if (prev != NULL)
{
prev->setNext(nodes[j]);
nodes[j]->setPrev(prev);
}
prev = nodes[j];
}
pos--;
continue;
}
fseek(this->treefp, address[j], SEEK_SET);
fread(&store, sizeof(unsigned), 1, treefp);
this->ReadAlign(block + j);
address[j] = ftell(treefp);
fseek(treefp, Address(store), SEEK_SET);
this->createNode(p);
nodes[j]->setChild(p, used[j]);
used[j]++;
fread(&next, sizeof(unsigned), 1, treefp);
address[pos] = ftell(treefp);
used[pos] = 0;
total[pos] = p->getNum() + 1;
block[pos] = next;
nodes[pos] = p;
pos++;
}
//set leaves and read root, which is always keeped in-mem
p = _root;
while (!p->isLeaf())
{
p = p->getChild(0);
}
_leaves_head = p;
p = _root;
while (!p->isLeaf())
{
p = p->getChild(p->getNum());
}
_leaves_tail = p;
long long memory = 0;
this->readNode(_root, &memory);
this->request(memory);
return true;
}
long //8-byte in 64-bit machine
IVStorage::Address(unsigned _blocknum) const //BETTER: inline function
{
if (_blocknum == 0)
return 0;
else if (_blocknum > cur_block_num)
{
//print(string("error in Address: Invalid blocknum ") + Util::int2string(_blocknum));
return -1; //address should be non-negative
}
//NOTICE: here should explictly use long
return (long)(this->SuperNum + _blocknum - 1) * (long)BLOCK_SIZE;
}
unsigned
IVStorage::Blocknum(long address) const
{
return (address / BLOCK_SIZE) + 1 - this->SuperNum;
}
unsigned
IVStorage::AllocBlock()
{
BlockInfo* p = this->freelist->next;
if (p == NULL)
{
for (unsigned i = 0; i < SET_BLOCK_INC; ++i)
{
cur_block_num++; //BETTER: check if > MAX_BLOCK_NUM
this->FreeBlock(cur_block_num);
}
p = this->freelist->next;
}
unsigned t = p->num;
this->freelist->next = p->next;
delete p;
return t;
}
void
IVStorage::FreeBlock(unsigned _blocknum)
{ //QUERY: head-sub and tail-add will be better?
BlockInfo* bp = new BlockInfo(_blocknum, this->freelist->next);
this->freelist->next = bp;
}
//NOTICE: all reads are aligned to 4 bytes(including a string)
//a string may acrossseveral blocks
void
IVStorage::ReadAlign(unsigned* _next)
{
if (ftell(treefp) % BLOCK_SIZE == 0)
{
fseek(treefp, Address(*_next), SEEK_SET);
fread(_next, sizeof(unsigned), 1, treefp);
}
}
void
IVStorage::WriteAlign(unsigned* _curnum, bool& _SpecialBlock)
{
if (ftell(treefp) % BLOCK_SIZE == 0)
{
unsigned blocknum = this->AllocBlock();
fseek(treefp, Address(*_curnum), SEEK_SET);
if (_SpecialBlock)
{
fseek(treefp, 4, SEEK_CUR);
_SpecialBlock = false;
}
fwrite(&blocknum, sizeof(unsigned), 1, treefp);
fseek(treefp, Address(blocknum) + 4, SEEK_SET);
*_curnum = blocknum;
}
}
bool
IVStorage::readNode(IVNode* _np, long long* _request)
{
if (_np == NULL || _np->inMem())
return false; //can't read or needn't
fseek(treefp, Address(_np->getStore()), SEEK_SET);
bool flag = _np->isLeaf();
unsigned next;
unsigned i, num = _np->getNum();
Bstr bstr;
fseek(treefp, 4, SEEK_CUR);
fread(&next, sizeof(unsigned), 1, treefp);
//read data, use readBstr...
//fread(treefp, "%u", &num);
//_np->setNum(num);
if (flag)
*_request += IVNode::LEAF_SIZE;
else
*_request += IVNode::INTL_SIZE;
_np->Normal();
if (!flag)
fseek(treefp, 4 * (num + 1), SEEK_CUR);
//to read all keys
//int tmp = -1;
unsigned tmp = INVALID;
for (i = 0; i < num; ++i)
{
fread(&tmp, sizeof(int), 1, treefp);
this->ReadAlign(&next);
_np->setKey(tmp, i);
}
if (flag)
{
//to read all values
for (i = 0; i < num; ++i)
{
this->readBstr(&bstr, &next);
//if not long list value
if(bstr.getStr() != NULL)
{
*_request += bstr.getLen();
}
_np->setValue(&bstr, i);
}
}
//_np->setFlag((_np->getFlag() & ~Node::NF_IV & ~Node::NF_ID) | Node::NF_IM);
//_np->delVirtual();
_np->delDirty();
//_np->setMem();
this->updateHeap(_np, _np->getRank(), false);
bstr.clear();
return true;
}
bool
IVStorage::createNode(IVNode*& _np) //cretae virtual nodes, not in-mem
{
/*
if(ftell(this->treefp)== 0) //null root
{
_np = NULL;
return false;
}
*/
unsigned t; //QUERY: maybe next-flag... will be better-storage?
bool flag = false; //IntlNode
fread(&t, sizeof(unsigned), 1, treefp);
if ((t & IVNode::NF_IL) > 0) //WARN: according to setting
flag = true; //LeafNode
if (flag)
{
//this->request(sizeof(LeafNode));
_np = new IVLeafNode(true);
}
else
{
//this->request(sizeof(IntlNode));
_np = new IVIntlNode(true);
}
//fseek(treefp, -4, SEEK_CUR);
//_np->setFlag(_np->getFlag() | (t & Node::NF_RK));
//_np->setRank(t);
_np->setFlag(t);
_np->delDirty();
_np->delMem();
_np->setStore(Blocknum(ftell(treefp) - 4));
return true;
}
//BETTER: Does SpecialBlock really needed? why can't we place next before flag??
//
//NOTICE: root num begins from 1, if root num is 0, then it is invalid, i.e. the tree is NULL
//(and ftell(root address) will be 0 either)
bool
IVStorage::writeNode(IVNode* _np)
{
if (_np == NULL || !_np->inMem() || (_np->getRank() > 0 && !_np->isDirty()))
return false; //not need to write back
unsigned num = _np->getNum(), i;
bool flag = _np->isLeaf(), SpecialBlock = true;
/*
if(!flag)
{
for(i = 0; i <= num; ++i)
if(_np->getChild(i)->isDirty())
return false; //NOTICE: all childs must be clean!
}
*/
//to release original blocks
unsigned store = _np->getStore(), next;
//if first store is 0, meaning a new node
fseek(this->treefp, Address(store) + 4, SEEK_SET);
fread(&next, sizeof(unsigned), 1, treefp);
while (store != 0)
{
this->FreeBlock(store);
store = next;
fseek(treefp, Address(store), SEEK_SET);
fread(&next, sizeof(unsigned), 1, treefp);
}
if (num == 0)
return true; //node is empty!
unsigned t;
//write Node information
unsigned blocknum = this->AllocBlock();
_np->setStore(blocknum);
long address = this->Address(blocknum);
fseek(this->treefp, address, SEEK_SET);
t = _np->getFlag();
fwrite(&t, sizeof(unsigned), 1, treefp); //DEBUG
fseek(treefp, 4, SEEK_CUR);
if (!flag)
{
for (i = 0; i <= num; ++i)
{
t = _np->getChild(i)->getStore();
fwrite(&t, sizeof(unsigned), 1, treefp); //DEBUG
this->WriteAlign(&blocknum, SpecialBlock);
}
}
//int tmp = 0;
unsigned tmp = INVALID;
//to write all keys
for (i = 0; i < num; ++i)
{
tmp = _np->getKey(i);
fwrite(&tmp, sizeof(int), 1, treefp);
this->WriteAlign(&blocknum, SpecialBlock);
}
if (flag)
{
//to write all values
for (i = 0; i < num; ++i)
{
this->writeBstr(_np->getValue(i), &blocknum, SpecialBlock);
}
}
fseek(treefp, Address(blocknum), SEEK_SET);
if (SpecialBlock)
fseek(treefp, 4, SEEK_CUR);
t = 0;
fwrite(&t, sizeof(unsigned), 1, treefp); //the end-block
//_np->setFlag(_np->getFlag() & ~Node::NF_ID);
//NOTICE:we may store the dirty bit into the tree file, but that is ok
//Each time we read the tree file to construct a node, we always set the drity bit to 0
_np->delDirty();
return true;
}
bool
IVStorage::readBstr(Bstr* _bp, unsigned* _next)
{
//long address;
unsigned len, i, j;
fread(&len, sizeof(unsigned), 1, this->treefp);
this->ReadAlign(_next);
//NOTICE: if this is a long list as value
if(len == 0)
{
unsigned addr = 0;
fread(&addr, sizeof(unsigned), 1, this->treefp);
#ifdef DEBUG_VLIST
cout<<"read a vlist in IVStorage - addr: "<<addr<<endl;
#endif
_bp->setLen(addr);
_bp->setStr(NULL);
this->ReadAlign(_next);
return true;
}
//this->request(len);
//NOTICE: we use new for all, consistent with Bstr and KVstore
//char* s = (char*)malloc(len);
char* s = new char[len];
_bp->setLen(len);
for (i = 0; i + 4 < len; i += 4)
{
fread(s + i, sizeof(char), 4, treefp);
this->ReadAlign(_next);
}
while (i < len)
{
fread(s + i, sizeof(char), 1, treefp); //BETTER
i++;
}
j = len % 4;
if (j > 0)
j = 4 - j;
fseek(treefp, j, SEEK_CUR);
this->ReadAlign(_next);
_bp->setStr(s);
return true;
}
bool
IVStorage::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock)
{
unsigned i, j, len = _bp->getLen();
//NOTICE: to write long list value
if(_bp->getStr() == NULL)
{
unsigned flag = 0;
fwrite(&flag, sizeof(unsigned), 1, treefp);
this->WriteAlign(_curnum, _SpecialBlock);
//then this is the real block num
fwrite(&len, sizeof(unsigned), 1, treefp);
#ifdef DEBUG_VLIST
cout<<"to write a vlist in IVStorage::writeBstr() - blocknum: "<<len<<endl;
#endif
this->WriteAlign(_curnum, _SpecialBlock);
return true;
}
fwrite(&len, sizeof(unsigned), 1, treefp);
this->WriteAlign(_curnum, _SpecialBlock);
char* s = _bp->getStr();
for (i = 0; i + 4 < len; i += 4)
{
fwrite(s + i, sizeof(char), 4, treefp);
this->WriteAlign(_curnum, _SpecialBlock);
}
while (i < len)
{
fwrite(s + i, sizeof(char), 1, treefp);
i++;
}
j = len % 4;
if (j > 0)
j = 4 - j;
fseek(treefp, j, SEEK_CUR);
this->WriteAlign(_curnum, _SpecialBlock);
return true;
}
bool
IVStorage::writeTree(IVNode* _root) //write the whole tree back and close treefp
{
fseek(this->treefp, 0, SEEK_SET);
fwrite(this->treeheight, sizeof(unsigned), 1, treefp);
//delete all nonsense-node in heap, otherwise will waste storage permanently
IVNode* p;
while (1)
{ //all non-sense nodes will be in-head-area, due to minimal rank
p = minheap->getTop();
if (p == NULL) //heap is empty, only when root==NULL
break;
if (p->getRank() == 0) //indicate non-sense node
{
this->minheap->remove();
this->writeNode(p);
delete p;
}
else
break;
}
unsigned i, j, t;
//QUERY: another way to write all nodes back is to print out all nodes in heap
//but this method will cause no node in heap any more, while operations may be
//afetr tree-saving. Which method is better?
//write nodes recursively using stack, including root-num
if (_root != NULL)
{
IVNode* p = _root;
unsigned h = *this->treeheight, pos = 0;
IVNode* ns[h];
int ni[h];
ns[pos] = p;
ni[pos] = p->getNum();
pos++;
while (pos > 0)
{
j = pos - 1;
p = ns[j];
if (p->isLeaf() || ni[j] < 0) //leaf or all childs are ready
{
this->writeNode(p);
pos--;
continue;
}
ns[pos] = p->getChild(ni[j]);
ni[pos] = ns[pos]->getNum();
pos++;
ni[j]--;
}
t = _root->getStore();
}
else
t = 0;
fseek(this->treefp, 4, SEEK_SET);
fwrite(&t, sizeof(unsigned), 1, treefp); //write the root num
fwrite(&cur_block_num, sizeof(unsigned), 1, treefp);//write current blocks num
fseek(treefp, BLOCK_SIZE, SEEK_SET);
j = cur_block_num / 8; //(SuperNum-1)*BLOCK_SIZE;
//reset to 1 first
for (i = 0; i < j; ++i)
{
fputc(0xff, treefp);
}
char c;
BlockInfo* bp = this->freelist->next;
while (bp != NULL)
{
//if not-use then set 0, aligned to byte!
#ifdef DEBUG_KVSTORE
if (bp->num > cur_block_num)
{
printf("blocks num exceed, cur_block_num: %u\n", cur_block_num);
exit(1);
}
#endif
j = bp->num - 1;
i = j / 8;
j = 7 - j % 8;
fseek(treefp, BLOCK_SIZE + i, SEEK_SET);
c = fgetc(treefp);
fseek(treefp, -1, SEEK_CUR);
fputc(c & ~(1 << j), treefp);
bp = bp->next;
}
//fclose(this->treefp);
return true;
}
void
IVStorage::updateHeap(IVNode* _np, unsigned _rank, bool _inheap) const
{
if (_inheap) //already in heap, to modify
{
unsigned t = _np->getRank();
_np->setRank(_rank);
if (t < _rank)
this->minheap->modify(_np, false);
else if (t > _rank)
this->minheap->modify(_np, true);
else;
}
else //not in heap, to add
{
_np->setRank(_rank);
this->minheap->insert(_np);
}
}
bool
IVStorage::request(long long _needmem) //aligned to byte
{ //NOTICE: <0 means release
//cout<<"freemem: "<<this->freemem<<" needmem: "<<_needmem<<endl;
if (_needmem > 0 && this->freemem < (unsigned long long)_needmem)
if (!this->handler(_needmem - freemem)) //disaster in buffer memory
{
print(string("error in request: out of buffer-mem, now to exit"));
//exit(1);
return false;
}
this->freemem -= _needmem;
return true;
}
bool
IVStorage::handler(unsigned long long _needmem) //>0
{
//cout<<"swap happen"<<endl;
IVNode* p;
unsigned long long size;
//if(_needmem < SET_BUFFER_SIZE) //to recover to SET_BUFFER_SIZE buffer
// _needmem = SET_BUFFER_SIZE;
//cout<<"IVStorage::handler() - now to loop to release nodes"<<endl;
while (1)
{
p = this->minheap->getTop();
//cout<<"get heap top"<<endl;
if (p == NULL)
{
cout << "the heap top is null" << endl;
return false; //can't satisfy or can't recover to SET_BUFFER_SIZE
}
this->minheap->remove();
//cout<<"node removed in heap"<<endl;
size = p->getSize();
this->freemem += size;
this->writeNode(p);
//cout<<"node write back"<<endl;
if (p->getNum() > 0)
p->Virtual();
else
delete p; //non-sense node
//cout<<"node memory released"<<endl;
if (_needmem > size)
{
//cout<<"reduce the request"<<endl;
_needmem -= size;
}
else
{
//cout<<"ok to break"<<endl;
break;
}
}
//cout<<"IVStorage::handler() -- finished"<<endl;
return true;
}
IVStorage::~IVStorage()
{
//release heap and freelist...
#ifdef DEBUG_KVSTORE
printf("now to release the kvstore!\n");
#endif
BlockInfo* bp = this->freelist;
BlockInfo* next;
while (bp != NULL)
{
next = bp->next;
delete bp;
bp = next;
}
#ifdef DEBUG_KVSTORE
printf("already empty the freelist!\n");
#endif
delete this->minheap;
#ifdef DEBUG_KVSTORE
printf("already empty the buffer heap!\n");
#endif
fclose(this->treefp);
//#ifdef DEBUG_KVSTORE
//NOTICE:there is more than one tree
//fclose(Util::debug_kvstore); //NULL is ok!
//Util::debug_kvstore = NULL;
//#endif
}
void
IVStorage::print(string s)
{
#ifdef DEBUG_KVSTORE
fputs(Util::showtime().c_str(), Util::debug_kvstore);
fputs("Class IVStorage\n", Util::debug_kvstore);
fputs("Message: ", Util::debug_kvstore);
fputs(s.c_str(), Util::debug_kvstore);
fputs("\n", Util::debug_kvstore);
#endif
}

View File

@ -0,0 +1,84 @@
/*=============================================================================
# Filename: IVStorage.h
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:43
# Description: swap between memory and disk, achieving system-like method
=============================================================================*/
#ifndef _KVSTORE_IVTREE_STORAGE_IVSTORAGE_H
#define _KVSTORE_IVTREE_STORAGE_IVSTORAGE_H
#include "../../../Util/VList.h"
#include "../node/IVIntlNode.h"
#include "../node/IVLeafNode.h"
#include "../heap/IVHeap.h"
//It controls read, write, swap
class IVStorage
{
public:
static const unsigned BLOCK_SIZE = Util::STORAGE_BLOCK_SIZE; //fixed size of disk-block
//there are 18 B+Tree indexes and one vstree index, so set 3G buffer size
//static const unsigned long long MAX_BUFFER_SIZE = Util::MAX_BUFFER_SIZE; //max buffer size
//static const unsigned SET_BUFFER_SIZE = 1 << 30; //set buffer size
//static const unsigned HEAP_SIZE = MAX_BUFFER_SIZE / IVNode::INTL_SIZE;
//DEBUG: maybe need to set larger, now the file size is 64G at most
static const unsigned MAX_BLOCK_NUM = 1 << 24; //max block-num
//below two constants: must can be exactly divided by 8
static const unsigned SET_BLOCK_NUM = 1 << 8; //initial blocks num
static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc
static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1;
//static const unsigned TRANSFER_CAPACITY = BLOCK_SIZE;
//enum ReadType { OVER = 0, EXPAND, NORMAL };
private:
unsigned long long max_buffer_size;
unsigned heap_size;
unsigned cur_block_num;
std::string filepath;
unsigned* treeheight;
BlockInfo* freelist;
FILE* treefp; //file: tree nodes
IVHeap* minheap; //heap of Nodes's pointer, sorted in NF_RK
//very long value list are stored in a separate file(with large block)
//
//NOTICE: according to the summary result, 90% value lists are just below 100 bytes
//<10%: 5000000~100M bytes
VList* value_list;
//NOTICE: freemem's type is long long here, due to large memory in server.
//However, needmem in handler() and request() is ok to be int/unsigned.
//Because the bstr' size is controlled, so is the node.
unsigned long long freemem; //free memory to use, non-negative
//unsigned long long time; //QUERY(achieving an old-swap startegy?)
long Address(unsigned _blocknum) const;
unsigned Blocknum(long address) const;
unsigned AllocBlock();
void FreeBlock(unsigned _blocknum);
void ReadAlign(unsigned* _next);
void WriteAlign(unsigned* _next, bool& _SpecialBlock);
public:
IVStorage();
IVStorage(std::string& _filepath, std::string& _mode, unsigned* _height, unsigned long long _buffer_size, VList* _vlist); //create a fixed-size file or open an existence
bool preRead(IVNode*& _root, IVNode*& _leaves_head, IVNode*& _leaves_tail); //read and build all nodes, only root in memory
bool readNode(IVNode* _np, long long* _request); //read, if virtual
bool createNode(IVNode*& _np); //use fp to create a new node
//NOTICE(if children and child not exist, build children's Nodes)
bool writeNode(IVNode* _np);
bool readBstr(Bstr* _bp, unsigned* _next);
bool writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock);
bool writeTree(IVNode* _np);
void updateHeap(IVNode* _np, unsigned _rank, bool _inheap) const;
bool request(long long _needmem); //deal with memory request
bool handler(unsigned long long _needmem); //swap some nodes out
//bool update(); //update InMem Node's rank, with clock
~IVStorage();
void print(std::string s); //DEBUG
};
#endif

File diff suppressed because it is too large Load Diff

View File

@ -10,8 +10,21 @@
#define _KVSTORE_KVSTORE_H
#include "../Util/Util.h"
#include "../Util/VList.h"
#include "Tree.h"
//TODO: is it needed to keep a length in Bstr?? especially for IVTree?
//add a length: sizeof bstr from 8 to 16(4 -> 8 for alignment)
//add a \0 in tail: only add 1 char
//QUERY: but to count the length each time maybe very costly?
//No, because triple num is stored in char* now!!!! we do not need to save it again
//TODO: entity_border in s2values list is not needed!!! not waste memory here
//
//QUERY: but to implement vlist, we need a unsigned flag
//What is more, we need to store the string in disk, how can we store it if without the length?
//unsigned type stored as chars, maybe will have '\0'
//In memory, we do not know when the oidlist ends if without the original length (butthe triple num will answer this!)
class KVstore
{
public:
@ -164,9 +177,9 @@ private:
static unsigned short buffer_literal2id_query;
static unsigned short buffer_id2literal_query;
ISTree* subID2values;
ISTree* objID2values;
ISTree* preID2values;
IVTree* subID2values;
IVTree* objID2values;
IVTree* preID2values;
static std::string s_sID2values;
static std::string s_oID2values;
static std::string s_pID2values;
@ -181,23 +194,31 @@ private:
bool open(SITree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size);
bool open(ISTree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size);
bool open(IVTree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size);
void flush(SITree* _p_btree);
void flush(ISTree* _p_btree);
void flush(IVTree* _p_btree);
bool addValueByKey(SITree* _p_btree, const char* _key, unsigned _klen, unsigned _val);
bool addValueByKey(ISTree* _p_btree, unsigned _key, const char* _val, unsigned _vlen);
bool addValueByKey(SITree* _p_btree, char* _key, unsigned _klen, unsigned _val);
bool addValueByKey(ISTree* _p_btree, unsigned _key, char* _val, unsigned _vlen);
bool addValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen);
bool setValueByKey(SITree* _p_btree, const char* _key, unsigned _klen, unsigned _val);
bool setValueByKey(ISTree* _p_btree, unsigned _key, const char* _val, unsigned _vlen);
bool setValueByKey(SITree* _p_btree, char* _key, unsigned _klen, unsigned _val);
bool setValueByKey(ISTree* _p_btree, unsigned _key, char* _val, unsigned _vlen);
bool setValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen);
bool getValueByKey(SITree* _p_btree, const char* _key, unsigned _klen, unsigned* _val) const;
bool getValueByKey(ISTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const;
bool getValueByKey(IVTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const;
TYPE_ENTITY_LITERAL_ID getIDByStr(SITree* _p_btree, const char* _key, unsigned _klen) const;
bool removeKey(SITree* _p_btree, const char* _key, unsigned _klen);
bool removeKey(ISTree* _p_btree, unsigned _key);
bool removeKey(IVTree* _p_btree, unsigned _key);
static std::vector<unsigned> intersect(const unsigned* _list1, const unsigned* _list2, unsigned _len1, unsigned _len2);
static unsigned binarySearch(unsigned key, const unsigned* _list, unsigned _list_len, int step = 1);

View File

@ -20,7 +20,7 @@ SITree::SITree()
TSM = NULL;
storepath = "";
filename = "";
transfer_size[0] = transfer_size[1] = transfer_size[2] = 0;
//transfer_size[0] = transfer_size[1] = transfer_size[2] = 0;
this->request = 0;
}
@ -36,10 +36,10 @@ SITree::SITree(string _storepath, string _filename, string _mode, unsigned long
this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail);
else
this->root = NULL;
this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE));
this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE));
this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE));
this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M
//this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE));
//this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE));
//this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE));
//this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M
this->request = 0;
}
@ -49,30 +49,30 @@ SITree::getFilePath()
return storepath + "/" + filename;
}
void //WARN: not check _str and _len
SITree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index)
{
if (_index > 2)
return;
/*
if(_str == NULL || _len == 0)
{
printf("error in CopyToTransfer: empty string\n");
return;
}
*/
//unsigned length = _bstr->getLen();
unsigned length = _len;
if (length + 1 > this->transfer_size[_index])
{
transfer[_index].release();
transfer[_index].setStr((char*)malloc(length + 1));
this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0
}
memcpy(this->transfer[_index].getStr(), _str, length);
this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore
this->transfer[_index].setLen(length);
}
//void //WARN: not check _str and _len
//SITree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index)
//{
//if (_index > 2)
//return;
//[>
//if(_str == NULL || _len == 0)
//{
//printf("error in CopyToTransfer: empty string\n");
//return;
//}
//*/
////unsigned length = _bstr->getLen();
//unsigned length = _len;
//if (length + 1 > this->transfer_size[_index])
//{
//transfer[_index].release();
//transfer[_index].setStr((char*)malloc(length + 1));
//this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0
//}
//memcpy(this->transfer[_index].getStr(), _str, length);
//this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore
//this->transfer[_index].setLen(length);
//}
unsigned
SITree::getHeight() const
@ -110,33 +110,39 @@ SITree::search(const char* _str, unsigned _len, unsigned* _val)
//*_val = -1;
return false;
}
this->CopyToTransfer(_str, _len, 1);
//this->CopyToTransfer(_str, _len, 1);
request = 0;
Bstr bstr = this->transfer[1]; //not to modify its memory
//Bstr bstr = this->transfer[1]; //not to modify its memory
//Bstr bstr(_str, _len, true);
int store;
SINode* ret = this->find(&transfer[1], &store, false);
if (ret == NULL || store == -1 || bstr != *(ret->getKey(store))) //tree is empty or not found
SINode* ret = this->find(_str, _len, &store, false);
if (ret == NULL || store == -1) //tree is empty or not found
{
//bstr.clear();
return false;
}
const Bstr* tmp = ret->getKey(store);
if (Util::compare(_str, _len, tmp->getStr(), tmp->getLen()) != 0) //tree is empty or not found
{
bstr.clear();
return false;
}
*_val = ret->getValue(store);
this->TSM->request(request);
bstr.clear();
//bstr.clear();
return true;
}
bool
SITree::insert(const char* _str, unsigned _len, unsigned _val)
SITree::insert(char* _str, unsigned _len, unsigned _val)
{
if (_str == NULL || _len == 0)
{
printf("error in SITree-insert: empty string\n");
return false;
}
this->CopyToTransfer(_str, _len, 1);
//this->CopyToTransfer(_str, _len, 1);
this->request = 0;
SINode* ret;
@ -171,8 +177,8 @@ SITree::insert(const char* _str, unsigned _len, unsigned _val)
SINode* p = this->root;
SINode* q;
int i;
const Bstr* _key = &transfer[1];
Bstr bstr = *_key;
//const Bstr* _key = &transfer[1];
//Bstr bstr = *_key;
while (!p->isLeaf())
{
//j = p->getNum();
@ -180,7 +186,7 @@ SITree::insert(const char* _str, unsigned _len, unsigned _val)
//if(bstr < *(p->getKey(i)))
//break;
//NOTICE: using binary search is better here
i = p->searchKey_less(bstr);
i = p->searchKey_less(_str, _len);
q = p->getChild(i);
this->prepare(q);
@ -197,7 +203,10 @@ SITree::insert(const char* _str, unsigned _len, unsigned _val)
this->TSM->updateHeap(ret, ret->getRank(), false);
this->TSM->updateHeap(q, q->getRank(), true);
this->TSM->updateHeap(p, p->getRank(), true);
if (bstr < *(p->getKey(i)))
//if (bstr < *(p->getKey(i)))
const Bstr* tmp = p->getKey(i);
int cmp_res = Util::compare(_str, _len, tmp->getStr(), tmp->getLen());
if (cmp_res < 0)
p = q;
else
p = ret;
@ -213,24 +222,34 @@ SITree::insert(const char* _str, unsigned _len, unsigned _val)
//for(i = 0; i < j; ++i)
//if(bstr < *(p->getKey(i)))
//break;
i = p->searchKey_less(bstr);
i = p->searchKey_less(_str, _len);
//insert existing key is ok, but not inserted in
//however, the tree-shape may change due to possible split in former code
bool ifexist = false;
if (i > 0 && bstr == *(p->getKey(i - 1)))
ifexist = true;
else
//if (i > 0 && bstr == *(p->getKey(i - 1)))
if (i > 0)
{
p->addKey(_key, i, true);
const Bstr* tmp = p->getKey(i-1);
int cmp_res = Util::compare(_str, _len, tmp->getStr(), tmp->getLen());
if(cmp_res == 0)
{
ifexist = true;
}
}
if(!ifexist)
{
p->addKey(_str, _len, i, true);
p->addValue(_val, i);
p->addNum();
request += _key->getLen();
request += _len;
p->setDirty();
this->TSM->updateHeap(p, p->getRank(), true);
}
this->TSM->request(request);
bstr.clear(); //NOTICE: must be cleared!
//bstr.clear(); //NOTICE: must be cleared!
return !ifexist; //QUERY(which case:return false)
}
@ -243,35 +262,42 @@ SITree::modify(const char* _str, unsigned _len, unsigned _val)
printf("error in SITree-modify: empty string\n");
return false;
}
this->CopyToTransfer(_str, _len, 1);
//this->CopyToTransfer(_str, _len, 1);
this->request = 0;
const Bstr* _key = &transfer[1];
Bstr bstr = *_key;
//const Bstr* _key = &transfer[1];
//Bstr bstr = *_key;
int store;
SINode* ret = this->find(_key, &store, true);
if (ret == NULL || store == -1 || bstr != *(ret->getKey(store))) //tree is empty or not found
SINode* ret = this->find(_str, _len, &store, true);
if (ret == NULL || store == -1) //tree is empty or not found
{
bstr.clear();
//bstr.clear();
return false;
}
const Bstr* tmp = ret->getKey(store);
if (Util::compare(_str, _len, tmp->getStr(), tmp->getLen()) != 0) //tree is empty or not found
{
return false;
}
ret->setValue(_val, store);
ret->setDirty();
this->TSM->request(request);
bstr.clear();
//bstr.clear();
return true;
}
//this function is useful for search and modify, and range-query
SINode* //return the first key's position that >= *_key
SITree::find(const Bstr* _key, int* _store, bool ifmodify)
SITree::find(const char* _str, unsigned _len, int* _store, bool ifmodify)
{ //to assign value for this->bstr, function shouldn't be const!
if (this->root == NULL)
return NULL; //SITree Is Empty
SINode* p = root;
int i, j;
Bstr bstr = *_key; //local Bstr: multiple delete
//Bstr bstr = *_key; //local Bstr: multiple delete
while (!p->isLeaf())
{
if (ifmodify)
@ -280,7 +306,7 @@ SITree::find(const Bstr* _key, int* _store, bool ifmodify)
//for(i = 0; i < j; ++i) //BETTER(Binary-Search)
//if(bstr < *(p->getKey(i)))
//break;
i = p->searchKey_less(bstr);
i = p->searchKey_less(_str, _len);
p = p->getChild(i);
this->prepare(p);
@ -290,13 +316,14 @@ SITree::find(const Bstr* _key, int* _store, bool ifmodify)
//for(i = 0; i < j; ++i)
//if(bstr <= *(p->getKey(i)))
//break;
i = p->searchKey_lessEqual(bstr);
i = p->searchKey_lessEqual(_str, _len);
if (i == j)
*_store = -1; //Not Found
else
*_store = i;
bstr.clear();
//bstr.clear();
return p;
}
@ -316,24 +343,25 @@ SITree::remove(const char* _str, unsigned _len)
printf("error in SITree-remove: empty string\n");
return false;
}
this->CopyToTransfer(_str, _len, 1);
//this->CopyToTransfer(_str, _len, 1);
request = 0;
const Bstr* _key = &transfer[1];
//const Bstr* _key = &transfer[1];
SINode* ret;
if (this->root == NULL) //tree is empty
return false;
SINode* p = this->root;
SINode* q;
int i, j;
Bstr bstr = *_key;
//Bstr bstr = *_key;
while (!p->isLeaf())
{
j = p->getNum();
//for(i = 0; i < j; ++i)
//if(bstr < *(p->getKey(i)))
//break;
i = p->searchKey_less(bstr);
i = p->searchKey_less(_str, _len);
q = p->getChild(i);
this->prepare(q);
@ -347,6 +375,7 @@ SITree::remove(const char* _str, unsigned _len)
if (ret != NULL)
this->TSM->updateHeap(ret, 0, true);//non-sense node
this->TSM->updateHeap(q, q->getRank(), true);
if (q->isLeaf())
{
if (q->getPrev() == NULL)
@ -354,6 +383,7 @@ SITree::remove(const char* _str, unsigned _len)
if (q->getNext() == NULL)
this->leaves_tail = q;
}
if (p->getNum() == 0) //root shrinks
{
//this->leaves_head = q;
@ -369,7 +399,7 @@ SITree::remove(const char* _str, unsigned _len)
}
bool flag = false;
i = p->searchKey_equal(bstr);
i = p->searchKey_equal(_str, _len);
//WARN+NOTICE:here must check, because the key to remove maybe not exist
if (i != (int)p->getNum())
{
@ -390,7 +420,7 @@ SITree::remove(const char* _str, unsigned _len)
}
this->TSM->request(request);
bstr.clear();
//bstr.clear();
return flag; //i == j, not found
}

View File

@ -3,7 +3,7 @@
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:44
# Description: struct and interface of the B+ tree
# Description: string2ID, including entity2id, literal2id, predicate2id
=============================================================================*/
#ifndef _KVSTORE_SITREE_SITREE_H
@ -36,13 +36,19 @@ private:
//so lock is a must. Add lock to transfer is better than to add
//lock to every key/value. However, modify requires a lock for a
//key/value, and multiple search for different keys are ok!!!
Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char*
unsigned transfer_size[3];
//Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char*
//unsigned transfer_size[3];
//TODO: in all B+ trees, updat eoperation should lock the whole tree, while search operations not
//However, the transfer bstr maybe cause the parallism error!!!!
//Why we need the transfer? It is ok to pass the original string pointer to return
//A problem is that before the caller ends, the tree can not be modified(so a read-writ elock is required)
std::string storepath;
std::string filename; //ok for user to change
/* some private functions */
std::string getFilePath(); //in UNIX system
void CopyToTransfer(const char* _str, unsigned _len, unsigned _index);
//void CopyToTransfer(const char* _str, unsigned _len, unsigned _index);
void release(SINode* _np) const;
//tree's operations should be atom(if read nodes)
@ -59,9 +65,10 @@ public:
SINode* getRoot() const;
//insert, search, remove, set
bool search(const char* _str, unsigned _len, unsigned* _val);
bool insert(const char* _str, unsigned _len, unsigned _val);
bool insert(char* _str, unsigned _len, unsigned _val);
bool modify(const char* _str, unsigned _len, unsigned _val);
SINode* find(const Bstr* _key, int* store, bool ifmodify);
SINode* find(const char* _key, unsigned _len, int* store, bool ifmodify);
bool remove(const char* _str, unsigned _len);
bool save();
~SITree();

View File

@ -30,15 +30,21 @@ public:
unsigned getValue(int _index) const;
bool setValue(unsigned _val, int _index);
bool addValue(unsigned _val, int _index);
bool subValue(int _index);
void setPrev(SINode* _prev);
void setNext(SINode* _next);
unsigned getSize() const;
SINode* split(SINode* _father, int _index);
SINode* coalesce(SINode* _father, int _index);
void release();
~SILeafNode();
void print(std::string s); //DEBUG
/*non-sense virtual function
Node* getChild(int _index) const;
bool addChild(Node* _child, int _index);

View File

@ -255,6 +255,27 @@ SINode::addKey(const Bstr* _key, int _index, bool ifcopy)
return true;
}
bool
SINode::addKey(char* _str, unsigned _len, int _index, bool ifcopy)
{
int num = this->getNum();
if (_index < 0 || _index > num)
{
print(string("error in addKey: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
//NOTICE: if num == MAX_KEY_NUM, will visit keys[MAX_KEY_NUM], not legal!!!
//however. tree operations ensure that: when node is full, not add but split first!
for (i = num - 1; i >= _index; --i)
keys[i + 1] = keys[i];
keys[_index].setStr(_str);
keys[_index].setLen(_len);
return true;
}
bool
SINode::subKey(int _index, bool ifdel)
{
@ -330,3 +351,55 @@ SINode::searchKey_lessEqual(const Bstr& _bstr) const
return ret;
}
int
SINode::searchKey_less(const char* _str, unsigned _len) const
{
int num = this->getNum();
int low = 0, high = num - 1, mid = -1;
while (low <= high)
{
mid = (low + high) / 2;
//if (this->keys[mid] > _bstr)
if (Util::compare(this->keys[mid].getStr(), this->keys[mid].getLen(), _str, _len) > 0)
{
if (low == mid)
break;
high = mid;
}
else
{
low = mid + 1;
}
}
return low;
}
int
SINode::searchKey_equal(const char* _str, unsigned _len) const
{
int num = this->getNum();
//for(i = 0; i < num; ++i)
// if(bstr == *(p->getKey(i)))
// {
int ret = this->searchKey_less(_str, _len);
//if (ret > 0 && this->keys[ret - 1] == _bstr)
if (ret > 0 && Util::compare(this->keys[ret-1].getStr(), this->keys[ret-1].getLen(), _str, _len) == 0)
return ret - 1;
else
return num;
}
int
SINode::searchKey_lessEqual(const char* _str, unsigned _len) const
{
int ret = this->searchKey_less(_str, _len);
//if (ret > 0 && this->keys[ret - 1] == _bstr)
if (ret > 0 && Util::compare(this->keys[ret-1].getStr(), this->keys[ret-1].getLen(), _str, _len) == 0)
return ret - 1;
else
return ret;
}

View File

@ -64,9 +64,11 @@ public:
void setStore(unsigned _store);
unsigned getFlag() const;
void setFlag(unsigned _flag);
const Bstr* getKey(int _index) const; //need to check the index
bool setKey(const Bstr* _key, int _index, bool ifcopy = false);
bool addKey(const Bstr* _key, int _index, bool ifcopy = false);
bool addKey(char* _str, unsigned _len, int _index, bool ifcopy = false);
bool subKey(int _index, bool ifdel = false);
//several binary key search utilities
@ -74,7 +76,12 @@ public:
int searchKey_equal(const Bstr& _bstr) const;
int searchKey_lessEqual(const Bstr& _bstr) const;
int searchKey_less(const char* _str, unsigned _len) const;
int searchKey_equal(const char* _str, unsigned _len) const;
int searchKey_lessEqual(const char* _str, unsigned _len) const;
//virtual functions: polymorphic
//NOTICE: not pure-virtual, not required to be implemented again, can be used now
virtual SINode* getChild(int _index) const { return NULL; };
virtual bool setChild(SINode* _child, int _index) { return true; };
virtual bool addChild(SINode* _child, int _index) { return true; };
@ -87,6 +94,8 @@ public:
virtual bool subValue(int _index) { return true; };
virtual void setPrev(SINode* _prev) {};
virtual void setNext(SINode* _next) {};
//NOTICE: pure-virtual, must to be implemented again in the sub-class
virtual void Virtual() = 0;
virtual void Normal() = 0;
virtual unsigned getSize() const = 0; //return all memory owned

View File

@ -427,7 +427,8 @@ SIStorage::readBstr(Bstr* _bp, unsigned* _next)
fread(&len, sizeof(unsigned), 1, this->treefp);
this->ReadAlign(_next);
//this->request(len);
char* s = (char*)malloc(len);
//char* s = (char*)malloc(len);
char* s = new char[len];
_bp->setLen(len);
for (i = 0; i + 4 < len; i += 4)
{

View File

@ -13,6 +13,14 @@
#include "../node/SILeafNode.h"
#include "../heap/SIHeap.h"
//TODO: whether to use heap or not, is a big question
//For single-query application, it seems that LRU list like VSTree is a better choice(no much cost in the buffer itself)
//But in multiple-queries case, things maybe different
//BETTER:
//add a heap position in node, to speed up the node-pointer searching
//lower the update times of heap, if the size is 128M, then each update is 27 at most
//if not update in time, then the heap maybe not be a heap, then why do we use heap? why not a simple array?
//It controls read, write, swap
class SIStorage
{

View File

@ -2,3 +2,4 @@
#include "ISTree/ISTree.h"
#include "SITree/SITree.h"
#include "IVTree/IVTree.h"

View File

@ -11,9 +11,9 @@
using namespace std;
#define GSERVER_PORT_FILE "bin/.gserver_port"
#define GSERVER_PORT_SWAP "bin/.gserver_port.swap"
#define GSERVER_LOG "logs/gserver.log"
//#define GSERVER_PORT_FILE "bin/.gserver_port"
//#define GSERVER_PORT_SWAP "bin/.gserver_port.swap"
//#define GSERVER_LOG "logs/gserver.log"
bool isOnlyProcess(const char* argv0);
void checkSwap();
@ -61,7 +61,7 @@ int main(int argc, char* argv[])
unsigned short port = Socket::DEFAULT_CONNECT_PORT;
if (argc == 3) {
if (!Util::isValidPort(string(argv[2]))) {
cout << "Invalid port: " << argv[2] << endl;
cerr << "Invalid port: " << argv[2] << endl;
return -1;
}
else {
@ -70,9 +70,9 @@ int main(int argc, char* argv[])
}
}
if (!isOnlyProcess(argv[0])) {
ofstream out(GSERVER_PORT_SWAP, ios::out);
ofstream out(Util::gserver_port_swap.c_str());
if (!out) {
cout << "Failed to change port!" << endl;
cerr << "Failed to change port!" << endl;
return -1;
}
out << port;
@ -80,9 +80,9 @@ int main(int argc, char* argv[])
cout << "Port will be changed to " << port << " after the current server stops or restarts." << endl;
return 0;
}
ofstream out(GSERVER_PORT_FILE, ios::out);
ofstream out(Util::gserver_port_file.c_str());
if (!out) {
cout << "Failed to change port!" << endl;
cerr << "Failed to change port!" << endl;
return -1;
}
out << port;
@ -93,10 +93,15 @@ int main(int argc, char* argv[])
if (mode == "-s" || mode == "--start") {
if (!isOnlyProcess(argv[0])) {
cout << "gServer already running!" << endl;
cerr << "gServer already running!" << endl;
return -1;
}
if (startServer()) {
sleep(1);
if (isOnlyProcess(argv[0])) {
cerr << "Server stopped unexpectedly. Check for port conflicts!" << endl;
return -1;
}
return 0;
}
else {
@ -106,7 +111,7 @@ int main(int argc, char* argv[])
if (mode == "-t" || mode == "--stop") {
if (isOnlyProcess(argv[0])) {
cout << "gServer not running!" << endl;
cerr << "gServer not running!" << endl;
return -1;
}
if (stopServer()) {
@ -119,7 +124,7 @@ int main(int argc, char* argv[])
if (mode == "-r" || mode == "--restart") {
if (isOnlyProcess(argv[0])) {
cout << "gServer not running!" << endl;
cerr << "gServer not running!" << endl;
return -1;
}
if (!stopServer()) {
@ -133,14 +138,14 @@ int main(int argc, char* argv[])
if (mode == "-P" || mode == "--printport") {
unsigned short port = Socket::DEFAULT_CONNECT_PORT;
ifstream in(GSERVER_PORT_FILE);
ifstream in(Util::gserver_port_file.c_str());
if (in) {
in >> port;
in.close();
}
cout << "Current connection port is " << port << '.' << endl;
unsigned short portSwap = 0;
ifstream inSwap(GSERVER_PORT_SWAP);
ifstream inSwap(Util::gserver_port_swap.c_str());
if (inSwap) {
inSwap >> portSwap;
inSwap.close();
@ -153,14 +158,14 @@ int main(int argc, char* argv[])
if (mode == "-k" || mode == "--kill") {
if (isOnlyProcess(argv[0])) {
cout << "No process to kill!" << endl;
cerr << "No process to kill!" << endl;
return -1;
}
execl("/usr/bin/killall", "killall", Util::getExactPath(argv[0]).c_str(), NULL);
return 0;
}
cout << "Invalid arguments! Input \"bin/gserver -h\" for help." << endl;
cerr << "Invalid arguments! Type \"bin/gserver -h\" for help." << endl;
return -1;
}
@ -169,38 +174,38 @@ bool isOnlyProcess(const char* argv0) {
}
void checkSwap() {
if (access(GSERVER_PORT_SWAP, 00) != 0) {
if (access(Util::gserver_port_swap.c_str(), 00) != 0) {
return;
}
ifstream in(GSERVER_PORT_SWAP, ios::in);
ifstream in(Util::gserver_port_swap.c_str());
if (!in) {
cout << "Failed in checkSwap(), port may not be changed." << endl;
cerr << "Failed in checkSwap(), port may not be changed." << endl;
return;
}
unsigned short port;
in >> port;
in.close();
ofstream out(GSERVER_PORT_FILE, ios::out);
ofstream out(Util::gserver_port_file.c_str());
if (!out) {
cout << "Failed in checkSwap(), port may not be changed." << endl;
cerr << "Failed in checkSwap(), port may not be changed." << endl;
return;
}
out << port;
out.close();
chmod(GSERVER_PORT_FILE, 0644);
string cmd = string("rm ") + GSERVER_PORT_SWAP;
chmod(Util::gserver_port_file.c_str(), 0644);
string cmd = string("rm ") + Util::gserver_port_swap;
system(cmd.c_str());
}
bool startServer() {
unsigned short port = Socket::DEFAULT_CONNECT_PORT;
ifstream in(GSERVER_PORT_FILE, ios::in);
ifstream in(Util::gserver_port_file.c_str());
if (!in) {
ofstream out(GSERVER_PORT_FILE, ios::out);
ofstream out(Util::gserver_port_file.c_str());
if (out) {
out << port;
out.close();
chmod(GSERVER_PORT_FILE, 0644);
chmod(Util::gserver_port_file.c_str(), 0644);
}
}
else {
@ -215,11 +220,19 @@ bool startServer() {
if (!Util::dir_exist("logs")) {
Util::create_dir("logs");
}
freopen(GSERVER_LOG, "a", stdout);
freopen(GSERVER_LOG, "a", stderr);
freopen(Util::gserver_log.c_str(), "a", stdout);
freopen(Util::gserver_log.c_str(), "a", stderr);
int status;
while (true) {
fpid = fork();
// child, main process
if (fpid == 0) {
Server server(port);
if (!server.createConnection()) {
cout << Util::getTimeString() << "Failed to create connection at port " << port << '.' << endl;
cerr << Util::getTimeString() << "Failed to create connection at port " << port << '.' << endl;
return false;
}
cout << Util::getTimeString() << "Server started at port " << port << '.' << endl;
@ -227,35 +240,55 @@ bool startServer() {
exit(0);
return true;
}
// parent, deamon process
else if (fpid > 0) {
waitpid(fpid, &status, 0);
if (WIFEXITED(status)) {
exit(0);
return true;
}
cerr << Util::getTimeString() << "Server stopped abnormally, restarting server..." << endl;
}
// fork failure
else {
cerr << Util::getTimeString() << "Failed to start server: deamon fork failure." << endl;
return false;
}
}
}
// parent
else if (fpid > 0) {
cout << "Server started at port " << port << '.' << endl;
return true;
}
// fork failure
else {
cout << "Failed to start server at port " << port << '.' << endl;
cerr << "Failed to start server at port " << port << '.' << endl;
return false;
}
}
bool stopServer() {
unsigned short port = Socket::DEFAULT_CONNECT_PORT;
ifstream in(GSERVER_PORT_FILE, ios::in);
ifstream in(Util::gserver_port_file.c_str());
if (in) {
in >> port;
in.close();
}
Socket socket;
if (!socket.create() || !socket.connect("127.0.0.1", port) || !socket.send("stop")) {
cout << "Failed to stop server at port " << port << '.' << endl;
cerr << "Failed to stop server at port " << port << '.' << endl;
return false;
}
string recv_msg;
socket.recv(recv_msg);
socket.close();
if (recv_msg != "server stopped.") {
cout << "Failed to stop server at port " << port << '.' << endl;
cerr << "Failed to stop server at port " << port << '.' << endl;
return false;
}
cout << "Server stopped at port " << port << '.' << endl;

View File

@ -7,6 +7,11 @@
在使用gserver时不能在数据库没有unload时再用gbuild或其他命令修改数据库仅限于C/S模式
将IRC聊天放到gstore文档上freenode #gStore
storage中大量使用long类型文件大小也可能达到64G最好在64位机器上运行。
# 推广
必须建立一个官方网站可以展示下团队、demo需要建立社区/论坛并维护
另外要有桌面应用或者网页应用以可视化的方式操作数据库类似virtuoso和neo4j那种
server 118.89.115.42 gstore-pku.com
@ -86,14 +91,18 @@ http://blog.csdn.net/infoworld/article/details/8670951
要在单机支持到10亿triple最坏情况下最多有20亿entity和20亿literal目前的编号方式是不行的(int扩展为unsigned)
最好在单机100G内存上支持起freebase(2.5B triples)这个规模的数据集就像jena和virtuoso一样慢不要紧
同时将ID的编码改为unsigned无效标志-1改为最大值的宏, triple数目的类型也要改为unsigned
注意pre的ID还可以为-2或者对于pre仍然用int或者改函数的返回值为long long (还有一些没有用-1而是>=0)
type分支中query过程可能还有问题需要修改Query/里面的类型另外stringindex中也要修改分界线已经是20亿且非法不再是-1
remove signature.binary, 合并两个分支type value
vstree在build和query时可以用不同大小的缓存来加速build过程
---
将B+tree中叶节点的大的value分离出来新建一套缓存使用block机制标记length为0表示未读取
类型bstr的length问题也需要解决(新建Istr类型)
如果把类型直接改成long long空间开销一下子就上升了一倍
解决方法对于ID2string仍然用char*和unsigned但对于s2xx p2xx o2xx应该用long long*和unsigned来表示这样最高可支持到40亿triple
注意在B+树中是以long long*的方式存但读出后应该全部换成unsigned*和unsigned搭配的方式(最长支持20亿个po对)
UBSTR: 类型bstr的length问题也需要解决 如果把类型直接改成long long空间开销一下子就上升了一倍
解决方法对于ID2string仍然用char*和unsigned但对于s2xx p2xx o2xx应该用unsigned long long*和unsigned来表示这样最高可支持到40亿triple
(其实这个不是特别必要很少会有这种情况我们处理的triple数目一般限制在20亿就算是type这种边po对数也就是跟entity数目持平很难达到5亿)
---
那么是否可以调整entity与literal的分界线如果entity数目一般都比literal数目多的话
直接把literal从大到小编号可在ID模块中指定顺序这样每个Datbase模块应该有自己独特的分界线其他模块用时也需要注意
@ -469,6 +478,8 @@ build db error if triple num > 500M
# BETTER
#### 添加数据访问层,数据范式和生成数据访问的源码
#### 在BasicQuery.cpp中的encodeBasicQuery函数中发现有pre_id==-1时就可以直接中止查询返回空值
#### 将KVstore模块中在堆中寻找Node*的操作改为用treap实现(或多存指针避开搜索?)
@ -519,6 +530,8 @@ http://www.oschina.net/question/188977_58777
# ADVICE
#### 考虑利用hdfs或者hbase这样就可以利用各公司已有的数据库系统但这是否会和已有的内外存交换冲突
#### 数值型查询 实数域 [-bound, bound] 类型很难匹配,有必要单独编码么? 数据集中不应有范围 Query中编码过滤后还需验证
x>a, x<b, >=, <=, a<x<b, x=c
vstree中遇到"1237"^^<...integer>时不直接取字符串,而是转换为数值并编码
@ -608,3 +621,19 @@ http://www.hprd.org/download/
## GIT USAGE
https://git-scm.com/book/zh/v1/%E8%B5%B7%E6%AD%A5-%E5%88%9D%E6%AC%A1%E8%BF%90%E8%A1%8C-Git-%E5%89%8D%E7%9A%84%E9%85%8D%E7%BD%AE
#### how to commit a message
package.json
http://www.json.cn/
https://www.oschina.net/news/69705/git-commit-message-and-changelog-guide
https://sanwen8.cn/p/44eCof7.html
1. commit one by one, a commit just do one thing
2. place a empty line between head and body, body and footer
3. the first letter of header should be in uppercase, and the header should not be too long, just a wonderful summary
FIX: ... ADD:... REF:... 代码重构 SUB:...
4. each line should not be too long, add your real name and the influence in footer(maybe cause the code struct to change)

View File

@ -175,8 +175,8 @@ private:
map<int, int> selected_var_position;
public:
static const int MAX_VAR_NUM = 10;
static const int MAX_PRE_VAR_NUM = 10;
static const int MAX_VAR_NUM = 20;
static const int MAX_PRE_VAR_NUM = 20;
static const char NOT_JUST_SELECT = 'a';
static const char SELECT_VAR = 's';

View File

@ -17,13 +17,20 @@ Bstr::Bstr()
this->str = NULL;
}
Bstr::Bstr(const char* _str, unsigned _len)
Bstr::Bstr(const char* _str, unsigned _len, bool _nocopy)
{
//WARN: if need a string .please add '\0' in your own!
this->length = _len;
//DEBUG:if copy memory?
//if(_nocopy)
//{
//this->str = _str; //not valid:const char* -> char*
this->str = (char*)malloc(_len);
//return;
//}
//NOTICE: we decide to use new/delete in global area
//this->str = (char*)malloc(_len);
this->str = new char[_len];
memcpy(this->str, _str, sizeof(char) * _len);
//this->str[_len]='\0';
}
@ -116,6 +123,15 @@ Bstr::operator != (const Bstr& _bstr)
unsigned
Bstr::getLen() const
{
//WARN: we should not include too complicate logic here!!!!
//NOTICE: this is for VList
//if(this->isBstrLongList())
////if(this->str == NULL)
//{
//return 0;
//}
return length;
}
@ -146,15 +162,18 @@ Bstr::copy(const Bstr* _bp)
this->length = _bp->getLen();
//DEBUG!!!
//cerr<<"bstr length: "<<this->length<<endl;
this->str = (char*)malloc(this->length);
memcpy(this->str, _bp->getStr(), this->length);
//this->str = (char*)malloc(this->length);
this->str = new char[this->length];
memcpy(this->str, _bp->getStr(), sizeof(char) * this->length);
}
void
Bstr::copy(const char* _str, unsigned _len)
{
this->length = _len;
this->str = (char*)malloc(this->length);
//this->str = (char*)malloc(this->length);
this->str = new char[this->length];
memcpy(this->str, _str, this->length);
}
@ -168,7 +187,8 @@ Bstr::clear()
void
Bstr::release()
{
free(this->str); //ok to be null, do nothing
//free(this->str); //ok to be null, do nothing
delete[] this->str;
clear();
}
@ -203,3 +223,9 @@ Bstr::print(string s) const
//#endif
}
bool
Bstr::isBstrLongList() const
{
return this->str == NULL;
}

View File

@ -18,13 +18,14 @@ class Bstr
{
private:
char* str; //pointers consume 8 byte in 64-bit system
//TODO: the length maybe not needed
unsigned length;
public:
Bstr();
//if copy memory, then use const char*, but slow
//else, can not use const char* -> char*
Bstr(const char* _str, unsigned _len);
Bstr(const char* _str, unsigned _len, bool _nocopy = false);
//Bstr(char* _str, unsigned _len);
Bstr(const Bstr& _bstr);
//Bstr& operate = (const Bstr& _bstr);
@ -47,6 +48,9 @@ public:
//int write(FILE* _fp);
~Bstr();
void print(std::string s) const; //DEBUG
//judge if this Bstr represent a long list value, and waiting to be each time on need
bool isBstrLongList() const;
};
#endif // _UTIL_BSTR_H

View File

@ -58,7 +58,8 @@ Stream::Stream(std::vector<TYPE_ENTITY_LITERAL_ID>& _keys, std::vector<bool>& _d
this->record_size = new unsigned[this->colnum];
for(unsigned i = 0; i < this->colnum; ++i)
{
this->record[i].setStr((char*)malloc(Util::TRANSFER_SIZE));
char* tmptr = new char[Util::TRANSFER_SIZE];
this->record[i].setStr(tmptr);
this->record_size[i] = Util::TRANSFER_SIZE;
}
@ -148,7 +149,8 @@ Stream::copyToRecord(const char* _str, unsigned _len, unsigned _idx)
if(length + 1 > this->record_size[_idx])
{
this->record[_idx].release();
this->record[_idx].setStr((char*)malloc((length + 1) * sizeof(char)));
char* tmptr = new char[length+1];
this->record[_idx].setStr(tmptr);
this->record_size[_idx] = length + 1; //one more byte: convenient to add \0
}
@ -187,7 +189,8 @@ Stream::outputCache()
{
unsigned len;
fread(&len, sizeof(unsigned), 1, this->tempfp);
char* p = (char*)malloc(len * sizeof(char));
//char* p = (char*)malloc(len * sizeof(char));
char* p = new char[len];
fread(p, sizeof(char), len, this->tempfp);
bp[i].setLen(len);
bp[i].setStr(p);
@ -320,13 +323,16 @@ Stream::read()
//FILE* fp = (FILE*)(this->ans);
for(unsigned i = 0; i < this->colnum; ++i)
{
//BETTER:alloca and reuse the space in Bstr?
//BETTER:alloc and reuse the space in Bstr?
unsigned len;
fread(&len, sizeof(unsigned), 1, this->ansDisk);
char* s = (char*)calloc(len + 1, sizeof(char));
//char* s = (char*)calloc(len + 1, sizeof(char));
char* s = new char[len+1];
fread(s, sizeof(char), len, this->ansDisk);
s[len] = '\0';
this->copyToRecord(s, len, i);
free(s);
//free(s);
delete[] s;
}
}
this->xpos++;
@ -420,7 +426,9 @@ Stream::mergeSort()
#endif
break;
}
s = (char*)malloc(sizeof(char) * len);
//s = (char*)malloc(sizeof(char) * len);
s = new char[len];
fread(s, sizeof(char), len, tp);
bp[i].setLen(len);
bp[i].setStr(s);

View File

@ -48,6 +48,10 @@ map<string, string> Util::global_config;
//==================================================================================================================
string Util::gserver_port_file = "bin/.gserver_port";
string Util::gserver_port_swap = "bin/.gserver_port.swap";
string Util::gserver_log = "logs/gserver.log";
//NOTICE:used in Database, Join and Strategy
//int Util::triple_num = 0;
//int Util::pre_num = 0;
@ -446,6 +450,13 @@ Util::is_literal_ele(TYPE_ENTITY_LITERAL_ID _id)
return _id >= Util::LITERAL_FIRST_ID;
}
bool
Util::is_entity_ele(TYPE_ENTITY_LITERAL_ID id)
{
return id < Util::LITERAL_FIRST_ID;
}
//NOTICE: require that the list is ordered
unsigned
Util::removeDuplicate(unsigned* _list, unsigned _len)
@ -672,7 +683,14 @@ Util::result_id_str(vector<unsigned*>& _v, int _var_num)
bool
Util::dir_exist(const string _dir)
{
return (opendir(_dir.c_str()) != NULL);
DIR* dirptr = opendir(_dir.c_str());
if(dirptr != NULL)
{
closedir(dirptr);
return true;
}
return false;
}
bool

View File

@ -37,6 +37,7 @@ in the sparql query can point to the same node in data graph)
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <sys/socket.h>
#include <netinet/in.h>
@ -86,10 +87,11 @@ in the sparql query can point to the same node in data graph)
//#define DEBUG_JOIN
//#define DEBUG_STREAM
//#define DEBUG_PRECISE 1 all information
//#define DEBUG_KVSTORE 1 //in KVstore
#define DEBUG_KVSTORE 1 //in KVstore
//#define DEBUG_VSTREE 1 //in Database
//#define DEBUG_LRUCACHE 1
//#define DEBUG_DATABASE 1 //in Database
//#define DEBUG_VLIST 1
//
//
@ -123,6 +125,12 @@ in the sparql query can point to the same node in data graph)
#endif
#endif
#ifdef DEBUG_VLIST
#ifndef DEBUG
#define DEBUG
#endif
#endif
#ifndef DEBUG
//#define DEBUG
#endif
@ -247,8 +255,11 @@ public:
static std::string getTimeString();
static std::string node2string(const char* _raw_str);
static bool is_literal_ele(TYPE_ENTITY_LITERAL_ID);
static bool is_literal_ele(TYPE_ENTITY_LITERAL_ID id);
static bool is_entity_ele(TYPE_ENTITY_LITERAL_ID id);
static unsigned removeDuplicate(unsigned*, unsigned);
static std::string getQueryFromFile(const char* _file_path);
static std::string getSystemOutput(std::string cmd);
static std::string getExactPath(const char* path);
@ -316,6 +327,10 @@ public:
static FILE* debug_database;
static FILE* debug_vstree;
static std::string gserver_port_file;
static std::string gserver_port_swap;
static std::string gserver_log;
private:
static bool isValidIPV4(std::string);

348
Util/VList.cpp Normal file
View File

@ -0,0 +1,348 @@
/*=============================================================================
# Filename: VList.cpp
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2017-03-27 15:47
# Description:
=============================================================================*/
#include "VList.h"
using namespace std;
bool
VList::isLongList(unsigned _len)
{
return _len > VList::LENGTH_BORDER;
}
VList::VList()
{ //not use ../logs/, notice the location of program
cur_block_num = SET_BLOCK_NUM;
filepath = "";
freelist = NULL;
max_buffer_size = Util::MAX_BUFFER_SIZE;
freemem = max_buffer_size;
}
VList::VList(string& _filepath, string& _mode, unsigned long long _buffer_size)
{
cur_block_num = SET_BLOCK_NUM; //initialize
this->filepath = _filepath;
if (_mode == string("build"))
valfp = fopen(_filepath.c_str(), "w+b");
else if (_mode == string("open"))
valfp = fopen(_filepath.c_str(), "r+b");
else
{
cout<<string("error in VList: Invalid mode ") + _mode<<endl;
return;
}
if (valfp == NULL)
{
cout<<string("error in VList: Open error ") + _filepath<<endl;
return;
}
this->max_buffer_size = _buffer_size;
this->freemem = this->max_buffer_size;
this->freelist = new BlockInfo; //null-head
//TODO: read/write by char is too slow, how about read all and deal , then clear?
//
//BETTER: hwo about assign IDs in a dynamic way?
//limitID freelist
//QUETY: can free id list consume very large memory??
unsigned i, j, k; //j = (SuperNum-1)*BLOCK_SIZE
BlockInfo* bp;
if (_mode == "build")
{ //write basic information
i = 0;
fwrite(&cur_block_num, sizeof(unsigned), 1, this->valfp); //current block num
//NOTICE: use a 1M block for a unsigned?? not ok!
fseek(this->valfp, BLOCK_SIZE, SEEK_SET);
bp = this->freelist;
j = cur_block_num / 8;
for (i = 0; i < j; ++i)
{
fputc(0, this->valfp);
for (k = 0; k < 8; ++k)
{
bp->next = new BlockInfo(i * 8 + k + 1, NULL);
bp = bp->next;
}
}
}
else //_mode == "open"
{
//read basic information
char c;
fread(&cur_block_num, sizeof(unsigned), 1, this->valfp);
fseek(this->valfp, BLOCK_SIZE, SEEK_SET);
bp = this->freelist;
j = cur_block_num / 8;
for (i = 0; i < j; ++i)
{
c = fgetc(valfp);
for (k = 0; k < 8; ++k)
{
if ((c & (1 << k)) == 0)
{
bp->next = new BlockInfo(i * 8 + 7 - k + 1, NULL);
bp = bp->next;
}
}
}
}
//NOTICE: we do not need to alloc the blocks for free block id list, AllocBlock is only for later blocks
}
long //8-byte in 64-bit machine
VList::Address(unsigned _blocknum) const //BETTER: inline function
{
if (_blocknum == 0)
return 0;
else if (_blocknum > cur_block_num)
{
//print(string("error in Address: Invalid blocknum ") + Util::int2string(_blocknum));
return -1; //address should be non-negative
}
//NOTICE: here should explictly use long
return (long)(this->SuperNum + _blocknum - 1) * (long)BLOCK_SIZE;
}
unsigned
VList::Blocknum(long address) const
{
return (address / BLOCK_SIZE) + 1 - this->SuperNum;
}
unsigned
VList::AllocBlock()
{
BlockInfo* p = this->freelist->next;
if (p == NULL)
{
for (unsigned i = 0; i < SET_BLOCK_INC; ++i)
{
cur_block_num++; //BETTER: check if > MAX_BLOCK_NUM
this->FreeBlock(cur_block_num);
}
p = this->freelist->next;
}
unsigned t = p->num;
this->freelist->next = p->next;
delete p;
return t;
}
void
VList::FreeBlock(unsigned _blocknum)
{ //QUERY: head-sub and tail-add will be better?
BlockInfo* bp = new BlockInfo(_blocknum, this->freelist->next);
this->freelist->next = bp;
}
//NOTICE: all reads are aligned to 4 bytes(including a string)
//a string may acrossseveral blocks
//
//NOTICE: not use buffer, read/write on need, update at once, so no need to write back at last
//NOTICE: the next is placed at the begin of a block
void
VList::ReadAlign(unsigned* _next)
{
if (ftell(valfp) % BLOCK_SIZE == 0)
{
fseek(valfp, Address(*_next), SEEK_SET);
fread(_next, sizeof(unsigned), 1, valfp);
}
}
void
VList::WriteAlign(unsigned* _curnum)
{
if (ftell(valfp) % BLOCK_SIZE == 0)
{
unsigned blocknum = this->AllocBlock();
fseek(valfp, Address(*_curnum), SEEK_SET);
fwrite(&blocknum, sizeof(unsigned), 1, valfp);
fseek(valfp, Address(blocknum) + 4, SEEK_SET);
*_curnum = blocknum;
}
}
bool
VList::readValue(unsigned _block_num, char*& _str, unsigned& _len)
{
#ifdef DEBUG_VLIST
cout<<"to get value of block num: "<<_block_num<<endl;
#endif
fseek(valfp, Address(_block_num), SEEK_SET);
unsigned next;
fread(&next, sizeof(unsigned), 1, valfp);
this->readBstr(_str, _len, &next);
return true;
}
unsigned
VList::writeValue(const char* _str, unsigned _len)
{
unsigned blocknum = this->AllocBlock();
unsigned curnum = blocknum;
//NOTICE: here we must skip the next position first
fseek(valfp, Address(curnum) + 4, SEEK_SET);
this->writeBstr(_str, _len, &curnum);
#ifdef DEBUG_VLIST
cout<<"to write value - block num: "<<blocknum<<endl;
#endif
return blocknum;
}
bool
VList::removeValue(unsigned _block_num)
{
unsigned store = _block_num, next;
fseek(this->valfp, Address(store), SEEK_SET);
fread(&next, sizeof(unsigned), 1, valfp);
while (store != 0)
{
this->FreeBlock(store);
store = next;
fseek(valfp, Address(store), SEEK_SET);
fread(&next, sizeof(unsigned), 1, valfp);
}
return true;
}
bool
VList::readBstr(char*& _str, unsigned& _len, unsigned* _next)
{
//long address;
unsigned len, i, j;
fread(&len, sizeof(unsigned), 1, this->valfp);
#ifdef DEBUG_VLIST
cout<<"the length of value: "<<len<<endl;
#endif
this->ReadAlign(_next);
//char* s = (char*)malloc(len);
char* s = new char[len];
_len = len;
for (i = 0; i + 4 < len; i += 4)
{
fread(s + i, sizeof(char), 4, valfp);
this->ReadAlign(_next);
}
while (i < len)
{
fread(s + i, sizeof(char), 1, valfp); //BETTER
i++;
}
j = len % 4;
if (j > 0)
j = 4 - j;
fseek(valfp, j, SEEK_CUR);
//NOTICE+DEBUG: I think no need to align here, later no data to read
//(if need to read, then fseek again to find a new value)
//this->ReadAlign(_next);
_str = s;
return true;
}
bool
VList::writeBstr(const char* _str, unsigned _len, unsigned* _curnum)
{
unsigned i, j, len = _len;
fwrite(&len, sizeof(unsigned), 1, valfp);
this->WriteAlign(_curnum);
//cout<<"to write bstr, length: "<<len<<endl;
//BETTER: compute this need how many blocks first, then write a block a time
const char* s = _str;
for (i = 0; i + 4 < len; i += 4)
{
fwrite(s + i, sizeof(char), 4, valfp);
this->WriteAlign(_curnum);
}
while (i < len)
{
fwrite(s + i, sizeof(char), 1, valfp);
i++;
}
j = len % 4;
if (j > 0)
j = 4 - j;
fseek(valfp, j, SEEK_CUR);
//NOTICE+DEBUG: I think no need to align here, later no data to write
//(if need to write, then fseek again to write a new value)
//this->WriteAlign(_curnum);
fseek(valfp, Address(*_curnum), SEEK_SET);
unsigned t = 0;
fwrite(&t, sizeof(unsigned), 1, valfp);
return true;
}
VList::~VList()
{
//write the info back
fseek(this->valfp, 0, SEEK_SET);
fwrite(&cur_block_num, sizeof(unsigned), 1, valfp);//write current blocks num
fseek(valfp, BLOCK_SIZE, SEEK_SET);
int i, j = cur_block_num / 8; //(SuperNum-1)*BLOCK_SIZE;
for (i = 0; i < j; ++i)
{
//reset to 1 first
fputc(0xff, valfp);
}
char c;
BlockInfo* bp = this->freelist->next;
while (bp != NULL)
{
//if not-use then set 0, aligned to byte!
#ifdef DEBUG_KVSTORE
if (bp->num > cur_block_num)
{
printf("blocks num exceed, cur_block_num: %u\n", cur_block_num);
exit(1);
}
#endif
j = bp->num - 1;
i = j / 8;
j = 7 - j % 8;
fseek(valfp, BLOCK_SIZE + i, SEEK_SET);
c = fgetc(valfp);
fseek(valfp, -1, SEEK_CUR);
fputc(c & ~(1 << j), valfp);
bp = bp->next;
}
bp = this->freelist;
BlockInfo* next;
while (bp != NULL)
{
next = bp->next;
delete bp;
bp = next;
}
fclose(this->valfp);
}

84
Util/VList.h Normal file
View File

@ -0,0 +1,84 @@
/*=============================================================================
# Filename: VList.h
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2017-03-27 15:40
# Description:
=============================================================================*/
#ifndef _UTIL_VLIST_H
#define _UTIL_VLIST_H
#include "Util.h"
#include "Bstr.h"
//NOTICE: not keep long list in memory, read each time
//but when can you free the long list(kvstore should release it after parsing)
//
//CONSIDER: if to keep long list in memory, should adjust the bstr in memory:
//unsigned: 0 char*: an object (if in memory, if modified, length, content, block num)
//when reading a long list in a node, generate the object first, and the object will tell you whether
//the list is in mmeory or not
//BETTER: use two kind of blocks in two files, like 1M and 1G (split the block num into two parts)
//STRUCT: a long list real-address is the block ID in file2(only for long value lists, a list across several 1M blocks)
//tree-value Bstr: unsigned=the real address char*=NULL
//in disk:
//file1 is tree file, the long list is represented as: 0 real-address
//NOTICE: long list is not kept in mmeory for cache, it is read/update each time on need!
//TODO: use fread/fwrite here instead of fgetc/fputc
//including other trees
class VList
{
public:
//NOTICE:the border is 10^6, but the block is larger, 1M
static const unsigned LENGTH_BORDER = 1000000;
//static const unsigned LENGTH_BORDER = 1000;
static const unsigned BLOCK_SIZE = 1 << 20; //fixed size of disk-block
static const unsigned MAX_BLOCK_NUM = 1 << 23; //max block-num
//below two constants: must can be exactly divided by 8
static const unsigned SET_BLOCK_NUM = 1 << 3; //initial blocks num
static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc
static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1;
private:
unsigned long long max_buffer_size;
unsigned cur_block_num;
std::string filepath;
BlockInfo* freelist;
//very long value list are stored in a separate file(with large block)
//
//NOTICE: according to the summary result, 90% value lists are just below 100 bytes
//<10%: 5000000~100M bytes
FILE* valfp;
//NOTICE: freemem's type is long long here, due to large memory in server.
//However, needmem in handler() and request() is ok to be int/unsigned.
//Because the bstr' size is controlled, so is the node.
unsigned long long freemem; //free memory to use, non-negative
//unsigned long long time; //QUERY(achieving an old-swap startegy?)
long Address(unsigned _blocknum) const;
unsigned Blocknum(long address) const;
unsigned AllocBlock();
void FreeBlock(unsigned _blocknum);
void ReadAlign(unsigned* _next);
void WriteAlign(unsigned* _next);
bool readBstr(char*& _bp, unsigned& _len, unsigned* _next);
bool writeBstr(const char* _str, unsigned _len, unsigned* _curnum);
public:
VList();
VList(std::string& _filepath, std::string& _mode, unsigned long long _buffer_size);//create a fixed-size file or open an existence
bool readValue(unsigned _block_num, char*& _str, unsigned& _len);
unsigned writeValue(const char* _str, unsigned _len);
bool removeValue(unsigned _block_num);
~VList();
static bool isLongList(unsigned _len);
};
#endif

1
logs/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
*.log

View File

@ -43,11 +43,11 @@ CC = ccache g++
#NOTICE: -O2 is recommended, while -O3 is dangerous
#when developing, not use -O because it will disturb the normal
#routine. use it for test and release.
#CFLAGS = -c -Wall -g -pthread #-fprofile-arcs -ftest-coverage #-pg
#EXEFLAG = -g -pthread #-fprofile-arcs -ftest-coverage #-pg
CFLAGS = -c -Wall -g -pthread #-fprofile-arcs -ftest-coverage #-pg
EXEFLAG = -g -pthread #-fprofile-arcs -ftest-coverage #-pg
#-coverage
CFLAGS = -c -Wall -O2 -pthread
EXEFLAG = -O2 -pthread
#CFLAGS = -c -Wall -O2 -pthread
#EXEFLAG = -O2 -pthread
#add -lreadline -ltermcap if using readline or objs contain readline
library = -ltermcap -lreadline -L./lib -lantlr -lgcov
@ -70,10 +70,11 @@ api_java = api/java/lib/GstoreJavaAPI.jar
#sstreeobj = $(objdir)Tree.o $(objdir)Storage.o $(objdir)Node.o $(objdir)IntlNode.o $(objdir)LeafNode.o $(objdir)Heap.o
sitreeobj = $(objdir)SITree.o $(objdir)SIStorage.o $(objdir)SINode.o $(objdir)SIIntlNode.o $(objdir)SILeafNode.o $(objdir)SIHeap.o
istreeobj = $(objdir)ISTree.o $(objdir)ISStorage.o $(objdir)ISNode.o $(objdir)ISIntlNode.o $(objdir)ISLeafNode.o $(objdir)ISHeap.o
ivtreeobj = $(objdir)IVTree.o $(objdir)IVStorage.o $(objdir)IVNode.o $(objdir)IVIntlNode.o $(objdir)IVLeafNode.o $(objdir)IVHeap.o
kvstoreobj = $(objdir)KVstore.o $(sitreeobj) $(istreeobj) #$(sstreeobj)
kvstoreobj = $(objdir)KVstore.o $(sitreeobj) $(istreeobj) $(ivtreeobj) #$(sstreeobj)
utilobj = $(objdir)Util.o $(objdir)Bstr.o $(objdir)Stream.o $(objdir)Triple.o $(objdir)BloomFilter.o
utilobj = $(objdir)Util.o $(objdir)Bstr.o $(objdir)Stream.o $(objdir)Triple.o $(objdir)BloomFilter.o $(objdir)VList.o
queryobj = $(objdir)SPARQLquery.o $(objdir)BasicQuery.o $(objdir)ResultSet.o $(objdir)IDList.o \
$(objdir)Varset.o $(objdir)QueryTree.o $(objdir)ResultFilter.o $(objdir)GeneralEvaluation.o
@ -217,6 +218,26 @@ $(objdir)ISHeap.o: KVstore/ISTree/heap/ISHeap.cpp KVstore/ISTree/heap/ISHeap.h $
$(CC) $(CFLAGS) KVstore/ISTree/heap/ISHeap.cpp -o $(objdir)ISHeap.o
#objects in istree/ end
#objects in ivtree/ begin
$(objdir)IVTree.o: KVstore/IVTree/IVTree.cpp KVstore/IVTree/IVTree.h $(objdir)Stream.o $(objdir)VList.o
$(CC) $(CFLAGS) KVstore/IVTree/IVTree.cpp -o $(objdir)IVTree.o
$(objdir)IVStorage.o: KVstore/IVTree/storage/IVStorage.cpp KVstore/IVTree/storage/IVStorage.h $(objdir)Util.o
$(CC) $(CFLAGS) KVstore/IVTree/storage/IVStorage.cpp -o $(objdir)IVStorage.o $(def64IO)
$(objdir)IVNode.o: KVstore/IVTree/node/IVNode.cpp KVstore/IVTree/node/IVNode.h $(objdir)Util.o
$(CC) $(CFLAGS) KVstore/IVTree/node/IVNode.cpp -o $(objdir)IVNode.o
$(objdir)IVIntlNode.o: KVstore/IVTree/node/IVIntlNode.cpp KVstore/IVTree/node/IVIntlNode.h
$(CC) $(CFLAGS) KVstore/IVTree/node/IVIntlNode.cpp -o $(objdir)IVIntlNode.o
$(objdir)IVLeafNode.o: KVstore/IVTree/node/IVLeafNode.cpp KVstore/IVTree/node/IVLeafNode.h
$(CC) $(CFLAGS) KVstore/IVTree/node/IVLeafNode.cpp -o $(objdir)IVLeafNode.o
$(objdir)IVHeap.o: KVstore/IVTree/heap/IVHeap.cpp KVstore/IVTree/heap/IVHeap.h $(objdir)Util.o
$(CC) $(CFLAGS) KVstore/IVTree/heap/IVHeap.cpp -o $(objdir)IVHeap.o
#objects in ivtree/ end
$(objdir)KVstore.o: KVstore/KVstore.cpp KVstore/KVstore.h KVstore/Tree.h
$(CC) $(CFLAGS) KVstore/KVstore.cpp $(inc) -o $(objdir)KVstore.o
@ -302,6 +323,9 @@ $(objdir)Triple.o: Util/Triple.cpp Util/Triple.h $(objdir)Util.o
$(objdir)BloomFilter.o: Util/BloomFilter.cpp Util/BloomFilter.h $(objdir)Util.o
$(CC) $(CFLAGS) Util/BloomFilter.cpp -o $(objdir)BloomFilter.o
$(objdir)VList.o: Util/VList.cpp Util/VList.h
$(CC) $(CFLAGS) Util/VList.cpp -o $(objdir)VList.o
#objects in util/ end

15
test/package.json Normal file
View File

@ -0,0 +1,15 @@
{
"config": {
"ghooks": {
//"pre-commit": "gulp lint",
"commit-msg": "validate-commit-msg",
//"pre-push": "make test",
//"post-merge": "npm install",
//"post-rewrite": "npm install",
}
}
"scripts": {
"changelog-all": "conventional-changelog -p angular -i CHANGELOG.md -w -r 0",
"changelog": "conventional-changelog -p angular -i CHANGELOG.md -w",
}
}