refactor: to add long list value

TODO: vlist support and IVLeafNode

by zengli, long list need to be freed each time
This commit is contained in:
bookug 2017-03-29 23:57:09 +08:00
parent d64358b606
commit 939e84c8bf
22 changed files with 436 additions and 171 deletions

View File

@ -20,7 +20,7 @@ ISTree::ISTree()
TSM = NULL;
storepath = "";
filename = "";
transfer_size[0] = transfer_size[1] = transfer_size[2] = 0;
//transfer_size[0] = transfer_size[1] = transfer_size[2] = 0;
this->stream = NULL;
this->request = 0;
}
@ -37,10 +37,10 @@ ISTree::ISTree(string _storepath, string _filename, string _mode, unsigned long
this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail);
else
this->root = NULL;
this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE));
this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE));
this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE));
this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M
//this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE));
//this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE));
//this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE));
//this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M
this->stream = NULL;
this->request = 0;
}
@ -51,30 +51,30 @@ ISTree::getFilePath()
return storepath + "/" + filename;
}
void //WARN: not check _str and _len
ISTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index)
{
if (_index > 2)
return;
/*
if(_str == NULL || _len == 0)
{
printf("error in CopyToTransfer: empty string\n");
return;
}
*/
//unsigned length = _bstr->getLen();
unsigned length = _len;
if (length + 1 > this->transfer_size[_index])
{
transfer[_index].release();
transfer[_index].setStr((char*)malloc(length + 1));
this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0
}
memcpy(this->transfer[_index].getStr(), _str, length);
this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore
this->transfer[_index].setLen(length);
}
//void //WARN: not check _str and _len
//ISTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index)
//{
//if (_index > 2)
//return;
//[>
//if(_str == NULL || _len == 0)
//{
//printf("error in CopyToTransfer: empty string\n");
//return;
//}
//*/
////unsigned length = _bstr->getLen();
//unsigned length = _len;
//if (length + 1 > this->transfer_size[_index])
//{
//transfer[_index].release();
//transfer[_index].setStr((char*)malloc(length + 1));
//this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0
//}
//memcpy(this->transfer[_index].getStr(), _str, length);
//this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore
//this->transfer[_index].setLen(length);
//}
unsigned
ISTree::getHeight() const
@ -121,15 +121,18 @@ ISTree::search(int _key, char*& _str, int& _len)
}
const Bstr* val = ret->getValue(store);
this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request
_str = this->transfer[0].getStr();
_len = this->transfer[0].getLen();
//this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request
//_str = this->transfer[0].getStr();
//_len = this->transfer[0].getLen();
_str = val->getStr();
_len = val->getLen();
this->TSM->request(request);
return true;
}
bool
ISTree::insert(int _key, const char* _str, unsigned _len)
ISTree::insert(int _key, char* _str, unsigned _len)
{
if (_key < 0)
{
@ -137,8 +140,8 @@ ISTree::insert(int _key, const char* _str, unsigned _len)
return false;
}
this->CopyToTransfer(_str, _len, 2);
const Bstr* val = &(this->transfer[2]);
//this->CopyToTransfer(_str, _len, 2);
//const Bstr* val = &(this->transfer[2]);
this->request = 0;
ISNode* ret;
if (this->root == NULL) //tree is empty
@ -222,9 +225,9 @@ ISTree::insert(int _key, const char* _str, unsigned _len)
else
{
p->addKey(_key, i);
p->addValue(val, i, true);
p->addValue(_str, _len, i, true);
p->addNum();
request += val->getLen();
request += _len;
p->setDirty();
this->TSM->updateHeap(p, p->getRank(), true);
//_key->clear();
@ -235,7 +238,7 @@ ISTree::insert(int _key, const char* _str, unsigned _len)
}
bool
ISTree::modify(int _key, const char* _str, unsigned _len)
ISTree::modify(int _key, char* _str, unsigned _len)
{
if (_key < 0)
{
@ -243,8 +246,8 @@ ISTree::modify(int _key, const char* _str, unsigned _len)
return false;
}
this->CopyToTransfer(_str, _len, 2); //not check value
const Bstr* val = &(this->transfer[2]);
//this->CopyToTransfer(_str, _len, 2); //not check value
//const Bstr* val = &(this->transfer[2]);
this->request = 0;
int store;
ISNode* ret = this->find(_key, &store, true);
@ -255,16 +258,17 @@ ISTree::modify(int _key, const char* _str, unsigned _len)
}
//cout<<"ISTree::modify() - key is found, now to remove"<<endl;
unsigned len = ret->getValue(store)->getLen();
ret->setValue(val, store, true);
ret->setValue(_str, _len, store, true);
//cout<<"value reset"<<endl;
//cout<<"newlen: "<<val->getLen()<<" oldlen: "<<len<<endl;
//request += (val->getLen() - len);
this->request = val->getLen();
this->request = _len;
this->request -= len;
ret->setDirty();
//cout<<"to request"<<endl;
this->TSM->request(request);
//cout<<"memory requested"<<endl;
return true;
}
@ -300,6 +304,7 @@ ISTree::find(int _key, int* _store, bool ifmodify)
*_store = -1; //Not Found
else
*_store = i;
return p;
}

View File

@ -36,8 +36,8 @@ protected:
//so lock is a must. Add lock to transfer is better than to add
//lock to every key/value. However, modify requires a lock for a
//key/value, and multiple search for different keys are ok!!!
Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char*
unsigned transfer_size[3];
//Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char*
//unsigned transfer_size[3];
//tree's operations should be atom(if read nodes)
//sum the request and send to ISStorage at last
@ -49,7 +49,7 @@ protected:
std::string filename; //ok for user to change
/* some private functions */
std::string getFilePath(); //in UNIX system
void CopyToTransfer(const char* _str, unsigned _len, unsigned _index);
//void CopyToTransfer(const char* _str, unsigned _len, unsigned _index);
void release(ISNode* _np) const;
public:
@ -61,8 +61,8 @@ public:
//void setRoot(Node* _root);
//insert, search, remove, set
bool search(int _key, char*& _str, int& _len);
bool insert(int _key, const char* _str, unsigned _len);
bool modify(int _key, const char* _str, unsigned _len);
bool insert(int _key, char* _str, unsigned _len);
bool modify(int _key, char* _str, unsigned _len);
ISNode* find(int _key, int* store, bool ifmodify);
bool remove(int _key);
const Bstr* getRangeValue();

View File

@ -125,6 +125,42 @@ ISLeafNode::addValue(const Bstr* _value, int _index, bool ifcopy)
return true;
}
bool
ISLeafNode::setValue(char* _str, unsigned _len, int _index, bool ifcopy)
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
print(string("error in setValue: Invalid index ") + Util::int2string(_index));
return false;
}
this->values[_index].release(); //NOTICE: only used in modify
this->values[_index].setStr(_str);
this->values[_index].setLen(_len);
return true;
}
bool
ISLeafNode::addValue(char* _str, unsigned _len, int _index, bool ifcopy)
{
int num = this->getNum();
if (_index < 0 || _index > num)
{
print(string("error in addValue: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
for (i = num - 1; i >= _index; --i)
this->values[i + 1] = this->values[i];
this->values[_index].setStr(_str);
this->values[_index].setLen(_len);
return true;
}
bool
ISLeafNode::subValue(int _index, bool ifdel)
{
@ -373,4 +409,4 @@ ISLeafNode::print(string s)
}
else;
#endif
}
}

View File

@ -27,6 +27,7 @@ public:
void Normal();
ISNode* getPrev() const;
ISNode* getNext() const;
const Bstr* getValue(int _index) const;
bool setValue(const Bstr* _value, int _index, bool ifcopy = false);
bool addValue(const Bstr* _value, int _index, bool ifcopy = false);
@ -34,6 +35,10 @@ public:
void setPrev(ISNode* _prev);
void setNext(ISNode* _next);
unsigned getSize() const;
bool setValue(char* _str, unsigned _len, int _index, bool ifcopy = false);
bool addValue(char* _str, unsigned _len, int _index, bool ifcopy = false);
ISNode* split(ISNode* _father, int _index);
ISNode* coalesce(ISNode* _father, int _index);
void release();
@ -47,4 +52,4 @@ public:
};
//BETTER: prev isn't a must, and reverse-range can be achieved using recursive-next
#endif
#endif

View File

@ -80,12 +80,18 @@ public:
virtual bool subChild(int _index) { return true; };
virtual ISNode* getPrev() const { return NULL; };
virtual ISNode* getNext() const { return NULL; };
virtual const Bstr* getValue(int _index) const { return NULL; };
virtual bool setValue(const Bstr* _value, int _index, bool ifcopy = false) { return true; };
virtual bool addValue(const Bstr* _value, int _index, bool ifcopy = false) { return true; };
virtual bool subValue(int _index, bool ifdel = false) { return true; };
virtual void setPrev(ISNode* _prev) {};
virtual void setNext(ISNode* _next) {};
virtual bool setValue(char* _str, unsigned _len, int _index, bool ifcopy = false) { return true; };
virtual bool addValue(char* _str, unsigned _len, int _index, bool ifcopy = false) { return true; };
//pure virtual function
virtual void Virtual() = 0;
virtual void Normal() = 0;
virtual unsigned getSize() const = 0; //return all memory owned
@ -110,4 +116,4 @@ public:
*to release the whole(pointer is invalid and rebuild problem)
*/
#endif
#endif

View File

@ -36,7 +36,7 @@ IVTree::IVTree(string _storepath, string _filename, string _mode, unsigned long
string filepath = this->getFilePath();
string vlist_file = filepath + "_vlist";
this->value_list = new VList(vlist_file, 1<<30);
this->value_list = new VList(vlist_file, this->mode, 1<<30);
TSM = new IVStorage(filepath, this->mode, &this->height, _buffer_size, this->value_list);
if (this->mode == "open")
@ -142,7 +142,7 @@ IVTree::search(int _key, char*& _str, int& _len)
}
bool
IVTree::insert(int _key, const char* _str, unsigned _len)
IVTree::insert(int _key, char* _str, unsigned _len)
{
if (_key < 0)
{
@ -244,12 +244,13 @@ IVTree::insert(int _key, const char* _str, unsigned _len)
//_key->clear();
//_value->clear();
}
this->TSM->request(request);
return !ifexist; //QUERY(which case:return false)
}
bool
IVTree::modify(int _key, const char* _str, unsigned _len)
IVTree::modify(int _key, char* _str, unsigned _len)
{
if (_key < 0)
{
@ -268,11 +269,14 @@ IVTree::modify(int _key, const char* _str, unsigned _len)
return false;
}
//cout<<"IVTree::modify() - key is found, now to remove"<<endl;
//NOTICE+DEBUG: if this value is a long list, then it is not saved in memory, here should return 0 in Bstr
unsigned len = ret->getValue(store)->getLen();
ret->setValue(this->value_list, store, _str, _len, true);
//ret->setValue(val, store, true);
//cout<<"value reset"<<endl;
//cout<<"newlen: "<<val->getLen()<<" oldlen: "<<len<<endl;
//request += (val->getLen() - len);
this->request = _len;
//this->request = val->getLen();
@ -386,6 +390,7 @@ IVTree::remove(int _key)
this->TSM->updateHeap(p, p->getRank(), true);
p = q;
}
bool flag = false;
//j = p->getNum(); //LeafNode(maybe root)
//for(i = 0; i < j; ++i)
@ -414,7 +419,7 @@ IVTree::remove(int _key)
{
request -= p->getValue(i)->getLen();
p->subKey(i); //to release
p->subValue(i, true); //to release
p->subValue(this->value_list, i, true); //to release
p->subNum();
if (p->getNum() == 0) //root leaf 0 key
{
@ -461,6 +466,7 @@ IVTree::resetStream()
this->stream->setEnd();
}
//TODO: change to using value list, getValue() maybe not get real long list
bool //special case: not exist, one-edge-case
IVTree::range_query(int _key1, int _key2)
{ //the range is: *_key1 <= x < *_key2
@ -555,6 +561,7 @@ IVTree::range_query(int _key1, int _key2)
for (i = l; i < r; ++i)
{
//NOTICE:Bstr* in an array, used as Bstr[]
//DEBUG+TODO: if long list?? clean
this->stream->write(p->getValue(i));
}
this->TSM->request(request);
@ -563,7 +570,9 @@ IVTree::range_query(int _key1, int _key2)
else
break;
}
this->stream->setEnd();
return true;
}

View File

@ -11,11 +11,11 @@
#include "../../Util/Util.h"
#include "../../Util/Stream.h"
#include "../../Util/VList.h"
#include "node/IVNode.h"
#include "node/IVIntlNode.h"
#include "node/IVLeafNode.h"
#include "storage/IVStorage.h"
#include "./vlist/VList.h"
//TODO: for long list, do not read in time, just on need
//the memory is kept with the node, updat ewith node
@ -80,8 +80,8 @@ public:
//void setRoot(Node* _root);
//insert, search, remove, set
bool search(int _key, char*& _str, int& _len);
bool insert(int _key, const char* _str, unsigned _len);
bool modify(int _key, const char* _str, unsigned _len);
bool insert(int _key, char* _str, unsigned _len);
bool modify(int _key, char* _str, unsigned _len);
IVNode* find(int _key, int* store, bool ifmodify);
bool remove(int _key);
const Bstr* getRangeValue();

View File

@ -89,6 +89,7 @@ IVLeafNode::getValue(int _index) const
return this->values + _index;
}
//TODO!!!
bool
IVLeafNode::getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const
{
@ -148,6 +149,26 @@ IVLeafNode::addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool
return true;
}
bool
IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel)
{
//TODO: if is to sub long list
int num = this->getNum();
if (_index < 0 || _index >= num)
{
print(string("error in subValue: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
if (ifdel)
values[_index].release();
for (i = _index; i < num - 1; ++i)
this->values[i] = this->values[i + 1];
return true;
}
bool
IVLeafNode::addValue(const Bstr* _value, int _index, bool ifcopy)
{
@ -169,26 +190,6 @@ IVLeafNode::addValue(const Bstr* _value, int _index, bool ifcopy)
return true;
}
bool
IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel)
{
//TODO: if is to sub long list
int num = this->getNum();
if (_index < 0 || _index >= num)
{
print(string("error in subValue: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
if (ifdel)
values[_index].release();
for (i = _index; i < num - 1; ++i)
this->values[i] = this->values[i + 1];
return true;
}
bool
IVLeafNode::subValue(int _index, bool ifdel)
{

View File

@ -11,7 +11,7 @@
#include "../../../Util/Util.h"
#include "../../../Util/Bstr.h"
#include "../vlist/VList.h"
#include "../../../Util/VList.h"
class IVNode //abstract basic class
{
@ -81,6 +81,7 @@ public:
virtual bool subChild(int _index) { return true; };
virtual IVNode* getPrev() const { return NULL; };
virtual IVNode* getNext() const { return NULL; };
virtual const Bstr* getValue(int _index) const { return NULL; };
virtual bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const { return NULL; };
virtual bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false) { return true; };
@ -92,6 +93,8 @@ public:
virtual void setPrev(IVNode* _prev) {};
virtual void setNext(IVNode* _next) {};
//pure virtual functions
virtual void Virtual() = 0;
virtual void Normal() = 0;
virtual unsigned getSize() const = 0; //return all memory owned

View File

@ -9,10 +9,10 @@
#ifndef _KVSTORE_IVTREE_STORAGE_IVSTORAGE_H
#define _KVSTORE_IVTREE_STORAGE_IVSTORAGE_H
#include "../../../Util/VList.h"
#include "../node/IVIntlNode.h"
#include "../node/IVLeafNode.h"
#include "../heap/IVHeap.h"
#include "../vlist/VList.h"
//It controls read, write, swap
class IVStorage

View File

@ -1422,6 +1422,14 @@ KVstore::getpreIDlistBysubID(int _subid, int*& _preidlist, int& _list_len, bool
_preidlist[i] = _tmp[2 * i + 3];
}
//if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory
if(VList::isLongList(_len))
{
delete[] _tmp;
//_tmp = NULL;
}
return true;
}
@ -1452,6 +1460,14 @@ KVstore::getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len, bool
_list_len = Util::removeDuplicate(_objidlist, _list_len);
}
//if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory
if(VList::isLongList(_len))
{
delete[] _tmp;
//_tmp = NULL;
}
return true;
}
@ -1493,6 +1509,14 @@ KVstore::getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist, int&
_objidlist = new int[_list_len];
memcpy(_objidlist, _tmp + _offset, sizeof(int) * _list_len);
//if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory
if(VList::isLongList(_len))
{
delete[] _tmp;
//_tmp = NULL;
}
return true;
}
@ -1533,6 +1557,14 @@ KVstore::getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _list
}
}
//if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory
if(VList::isLongList(_len))
{
delete[] _tmp;
//_tmp = NULL;
}
return true;
}
@ -1664,6 +1696,14 @@ KVstore::getpreIDlistByobjID(int _objid, int*& _preidlist, int& _list_len, bool
_preidlist[i] = _tmp[2 * i + 2];
}
//if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory
if(VList::isLongList(_len))
{
delete[] _tmp;
//_tmp = NULL;
}
return true;
}
@ -1688,6 +1728,14 @@ KVstore::getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len, bool
_list_len = Util::removeDuplicate(_subidlist, _list_len);
}
//if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory
if(VList::isLongList(_len))
{
delete[] _tmp;
//_tmp = NULL;
}
return true;
}
@ -1723,6 +1771,14 @@ KVstore::getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, int&
_subidlist = new int[_list_len];
memcpy(_subidlist, _tmp + _offset, sizeof(int) * _list_len);
//if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory
if(VList::isLongList(_len))
{
delete[] _tmp;
//_tmp = NULL;
}
return true;
}
@ -1756,6 +1812,14 @@ KVstore::getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _list
}
}
//if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory
if(VList::isLongList(_len))
{
delete[] _tmp;
//_tmp = NULL;
}
return true;
}
@ -1867,6 +1931,14 @@ KVstore::getsubIDlistBypreID(int _preid, int*& _subidlist, int& _list_len, bool
_list_len = Util::removeDuplicate(_subidlist, _list_len);
}
//if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory
if(VList::isLongList(_len))
{
delete[] _tmp;
//_tmp = NULL;
}
return true;
}
@ -1891,6 +1963,14 @@ KVstore::getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len, bool
_list_len = Util::removeDuplicate(_objidlist, _list_len);
}
//if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory
if(VList::isLongList(_len))
{
delete[] _tmp;
//_tmp = NULL;
}
return true;
}
@ -1913,6 +1993,14 @@ KVstore::getsubIDobjIDlistBypreID(int _preid, int*& _subid_objidlist, int& _list
_subid_objidlist[2 * i + 1] = _tmp[1 + _tmp[0] + i];
}
//if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory
if(VList::isLongList(_len))
{
delete[] _tmp;
//_tmp = NULL;
}
return true;
}
@ -1977,6 +2065,14 @@ KVstore::getpreIDlistBysubIDobjID(int _subid, int _objid, int*& _preidlist, int&
}
}
//if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory
if(VList::isLongList(_len))
{
delete[] _tmp;
//_tmp = NULL;
}
if (_list_len == 0) {
_preidlist = NULL;
return false;
@ -2091,37 +2187,37 @@ KVstore::flush(IVTree* _p_btree)
}
bool
KVstore::addValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val)
KVstore::addValueByKey(SITree* _p_btree, char* _key, int _klen, int _val)
{
return _p_btree->insert(_key, _klen, _val);
}
bool
KVstore::addValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen)
KVstore::addValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen)
{
return _p_btree->insert(_key, _val, _vlen);
}
bool
KVstore::addValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen)
KVstore::addValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen)
{
return _p_btree->insert(_key, _val, _vlen);
}
bool
KVstore::setValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val)
KVstore::setValueByKey(SITree* _p_btree, char* _key, int _klen, int _val)
{
return _p_btree->modify(_key, _klen, _val);
}
bool
KVstore::setValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen)
KVstore::setValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen)
{
return _p_btree->modify(_key, _val, _vlen);
}
bool
KVstore::setValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen)
KVstore::setValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen)
{
return _p_btree->modify(_key, _val, _vlen);
}
@ -2218,6 +2314,7 @@ KVstore::binarySearch(int _key, const int* _list, int _list_len, int _step)
}
//TODO: better to adjust these parameters according to memory usage and entity num
//need a memory manager first
string KVstore::s_entity2id = "s_entity2id";
string KVstore::s_id2entity = "s_id2entity";
unsigned short KVstore::buffer_entity2id_build = 8;

View File

@ -10,8 +10,16 @@
#define _KVSTORE_KVSTORE_H
#include "../Util/Util.h"
#include "../Util/VList.h"
#include "Tree.h"
//TODO: is it needed to keep a length in Bstr?? especially for IVTree?
//add a length: sizeof bstr from 8 to 16(4 -> 8 for alignment)
//add a \0 in tail: only add 1 char
//QUERY: but to count the length each time maybe very costly?
//No, because triple num is stored in char* now!!!! we do not need to save it again
//TODO: entity_border in s2values list is not needed!!! not waste memory here
class KVstore
{
public:
@ -187,13 +195,13 @@ private:
void flush(ISTree* _p_btree);
void flush(IVTree* _p_btree);
bool addValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val);
bool addValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen);
bool addValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen);
bool addValueByKey(SITree* _p_btree, char* _key, int _klen, int _val);
bool addValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen);
bool addValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen);
bool setValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val);
bool setValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen);
bool setValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen);
bool setValueByKey(SITree* _p_btree, char* _key, int _klen, int _val);
bool setValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen);
bool setValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen);
bool getValueByKey(SITree* _p_btree, const char* _key, int _klen, int* _val) const;
bool getValueByKey(ISTree* _p_btree, int _key, char*& _val, int& _vlen) const;

View File

@ -134,7 +134,7 @@ SITree::search(const char* _str, unsigned _len, int* _val)
}
bool
SITree::insert(const char* _str, unsigned _len, int _val)
SITree::insert(char* _str, unsigned _len, int _val)
{
if (_str == NULL || _len == 0)
{

View File

@ -65,7 +65,7 @@ public:
SINode* getRoot() const;
//insert, search, remove, set
bool search(const char* _str, unsigned _len, int* _val);
bool insert(const char* _str, unsigned _len, int _val);
bool insert(char* _str, unsigned _len, int _val);
bool modify(const char* _str, unsigned _len, int _val);
SINode* find(const char* _key, unsigned _len, int* store, bool ifmodify);
bool remove(const char* _str, unsigned _len);

View File

@ -362,4 +362,4 @@ SILeafNode::print(string s)
}
else;
#endif
}
}

View File

@ -27,18 +27,24 @@ public:
void Normal();
SINode* getPrev() const;
SINode* getNext() const;
int getValue(int _index) const;
bool setValue(int _val, int _index);
bool addValue(int _val, int _index);
bool subValue(int _index);
void setPrev(SINode* _prev);
void setNext(SINode* _next);
unsigned getSize() const;
SINode* split(SINode* _father, int _index);
SINode* coalesce(SINode* _father, int _index);
void release();
~SILeafNode();
void print(std::string s); //DEBUG
/*non-sense virtual function
Node* getChild(int _index) const;
bool addChild(Node* _child, int _index);
@ -47,4 +53,4 @@ public:
};
//BETTER: prev isn't a must, and reverse-range can be achieved using recursive-next
#endif
#endif

View File

@ -254,6 +254,27 @@ SINode::addKey(const Bstr* _key, int _index, bool ifcopy)
return true;
}
bool
SINode::addKey(char* _str, unsigned _len, int _index, bool ifcopy)
{
int num = this->getNum();
if (_index < 0 || _index > num)
{
print(string("error in addKey: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
//NOTICE: if num == MAX_KEY_NUM, will visit keys[MAX_KEY_NUM], not legal!!!
//however. tree operations ensure that: when node is full, not add but split first!
for (i = num - 1; i >= _index; --i)
keys[i + 1] = keys[i];
keys[_index].setStr(_str);
keys[_index].setLen(_len);
return true;
}
bool
SINode::subKey(int _index, bool ifdel)
{
@ -325,4 +346,57 @@ SINode::searchKey_lessEqual(const Bstr& _bstr) const
return ret - 1;
else
return ret;
}
}
int
SINode::searchKey_less(const char* _str, unsigned _len) const
{
int num = this->getNum();
int low = 0, high = num - 1, mid = -1;
while (low <= high)
{
mid = (low + high) / 2;
//if (this->keys[mid] > _bstr)
if (Util::compare(this->keys[mid].getStr(), this->keys[mid].getLen(), _str, _len) > 0)
{
if (low == mid)
break;
high = mid;
}
else
{
low = mid + 1;
}
}
return low;
}
int
SINode::searchKey_equal(const char* _str, unsigned _len) const
{
int num = this->getNum();
//for(i = 0; i < num; ++i)
// if(bstr == *(p->getKey(i)))
// {
int ret = this->searchKey_less(_str, _len);
//if (ret > 0 && this->keys[ret - 1] == _bstr)
if (ret > 0 && Util::compare(this->keys[ret-1].getStr(), this->keys[ret-1].getLen(), _str, _len) == 0)
return ret - 1;
else
return num;
}
int
SINode::searchKey_lessEqual(const char* _str, unsigned _len) const
{
int ret = this->searchKey_less(_str, _len);
//if (ret > 0 && this->keys[ret - 1] == _bstr)
if (ret > 0 && Util::compare(this->keys[ret-1].getStr(), this->keys[ret-1].getLen(), _str, _len) == 0)
return ret - 1;
else
return ret;
}

View File

@ -64,9 +64,11 @@ public:
void setStore(unsigned _store);
unsigned getFlag() const;
void setFlag(unsigned _flag);
const Bstr* getKey(int _index) const; //need to check the index
bool setKey(const Bstr* _key, int _index, bool ifcopy = false);
bool addKey(const Bstr* _key, int _index, bool ifcopy = false);
bool addKey(char* _str, unsigned _len, int _index, bool ifcopy = false);
bool subKey(int _index, bool ifdel = false);
//several binary key search utilities
@ -74,7 +76,12 @@ public:
int searchKey_equal(const Bstr& _bstr) const;
int searchKey_lessEqual(const Bstr& _bstr) const;
int searchKey_less(const char* _str, unsigned _len) const;
int searchKey_equal(const char* _str, unsigned _len) const;
int searchKey_lessEqual(const char* _str, unsigned _len) const;
//virtual functions: polymorphic
//NOTICE: not pure-virtual, not required to be implemented again, can be used now
virtual SINode* getChild(int _index) const { return NULL; };
virtual bool setChild(SINode* _child, int _index) { return true; };
virtual bool addChild(SINode* _child, int _index) { return true; };
@ -87,6 +94,8 @@ public:
virtual bool subValue(int _index) { return true; };
virtual void setPrev(SINode* _prev) {};
virtual void setNext(SINode* _next) {};
//NOTICE: pure-virtual, must to be implemented again in the sub-class
virtual void Virtual() = 0;
virtual void Normal() = 0;
virtual unsigned getSize() const = 0; //return all memory owned
@ -111,4 +120,4 @@ public:
*to release the whole(pointer is invalid and rebuild problem)
*/
#endif
#endif

View File

@ -88,7 +88,7 @@ http://blog.csdn.net/infoworld/article/details/8670951
要在单机支持到10亿triple最坏情况下最多有20亿entity和20亿literal目前的编号方式是不行的(int扩展为unsigned)
最好在单机100G内存上支持起freebase(2.5B triples)这个规模的数据集就像jena和virtuoso一样慢不要紧
在type分支中sub2id_pre2id_obj2id函数中每次double增长可能无法充分利用unsigned空间只能利用到2560000000超过后最好直接设置为最大
type分支中query过程可能还有问题需要修改Query/里面的类型
去掉tree里面的复制另外kvstore里面的复制可以考虑通过一个或若干个bstr buffer来实现避免每次都重新new但这会影响多线程程序
而且在kvstore中往往需要对原始list做一些额外处理
---

View File

@ -18,6 +18,7 @@ class Bstr
{
private:
char* str; //pointers consume 8 byte in 64-bit system
//TODO: the length maybe not needed
unsigned length;
public:

View File

@ -10,55 +10,64 @@
using namespace std;
bool
VList::isLongList(unsigned _len)
{
return _len > VList::LENGTH_BORDER;
}
VList::VList()
{ //not use ../logs/, notice the location of program
cur_block_num = SET_BLOCK_NUM;
filepath = "";
freelist = NULL;
treefp = NULL;
minheap = NULL;
max_buffer_size = Util::MAX_BUFFER_SIZE;
heap_size = max_buffer_size / IVNode::INTL_SIZE;
freemem = max_buffer_size;
}
VList::VList(string& _filepath, unsigned long long _buffer_size)
VList::VList(string& _filepath, string& _mode, unsigned long long _buffer_size)
{
cur_block_num = SET_BLOCK_NUM; //initialize
this->filepath = _filepath;
if (_mode == string("build"))
treefp = fopen(_filepath.c_str(), "w+b");
valfp = fopen(_filepath.c_str(), "w+b");
else if (_mode == string("open"))
treefp = fopen(_filepath.c_str(), "r+b");
valfp = fopen(_filepath.c_str(), "r+b");
else
{
print(string("error in IVStorage: Invalid mode ") + _mode);
cout<<string("error in VList: Invalid mode ") + _mode<<endl;
return;
}
if (treefp == NULL)
if (valfp == NULL)
{
print(string("error in IVStorage: Open error ") + _filepath);
cout<<string("error in VList: Open error ") + _filepath<<endl;
return;
}
this->treeheight = _height; //originally set to 0
this->max_buffer_size = _buffer_size;
this->heap_size = this->max_buffer_size / IVNode::INTL_SIZE;
this->freemem = this->max_buffer_size;
this->freelist = new BlockInfo; //null-head
//TODO: read/write by char is too slow, how about read all and deal , then clear?
//
//BETTER: hwo about assign IDs in a dynamic way?
//limitID freelist
//QUETY: can free id list consume very large memory??
unsigned i, j, k; //j = (SuperNum-1)*BLOCK_SIZE
BlockInfo* bp;
if (_mode == "build")
{ //write basic information
i = 0;
fwrite(&i, sizeof(unsigned), 1, this->treefp); //height
fwrite(&i, sizeof(unsigned), 1, this->treefp); //rootnum
fwrite(&cur_block_num, sizeof(unsigned), 1, this->treefp); //current block num
fseek(this->treefp, BLOCK_SIZE, SEEK_SET);
fwrite(&cur_block_num, sizeof(unsigned), 1, this->valfp); //current block num
//NOTICE: use a 1M block for a unsigned?? not ok!
fseek(this->valfp, BLOCK_SIZE, SEEK_SET);
bp = this->freelist;
j = cur_block_num / 8;
for (i = 0; i < j; ++i)
{
fputc(0, this->treefp);
fputc(0, this->valfp);
for (k = 0; k < 8; ++k)
{
bp->next = new BlockInfo(i * 8 + k + 1, NULL);
@ -69,17 +78,14 @@ VList::VList(string& _filepath, unsigned long long _buffer_size)
else //_mode == "open"
{
//read basic information
int rootnum;
char c;
fread(this->treeheight, sizeof(unsigned), 1, this->treefp);
fread(&rootnum, sizeof(unsigned), 1, this->treefp);
fread(&cur_block_num, sizeof(unsigned), 1, this->treefp);
fseek(this->treefp, BLOCK_SIZE, SEEK_SET);
fread(&cur_block_num, sizeof(unsigned), 1, this->valfp);
fseek(this->valfp, BLOCK_SIZE, SEEK_SET);
bp = this->freelist;
j = cur_block_num / 8;
for (i = 0; i < j; ++i)
{
c = fgetc(treefp);
c = fgetc(valfp);
for (k = 0; k < 8; ++k)
{
if ((c & (1 << k)) == 0)
@ -89,14 +95,13 @@ VList::VList(string& _filepath, unsigned long long _buffer_size)
}
}
}
fseek(treefp, Address(rootnum), SEEK_SET);
//treefp is now ahead of root-block
}
this->minheap = new IVHeap(this->heap_size);
//NOTICE: we do not need to alloc the blocks for free block id list, AllocBlock is only for later blocks
}
long //8-byte in 64-bit machine
IVStorage::Address(unsigned _blocknum) const //BETTER: inline function
VList::Address(unsigned _blocknum) const //BETTER: inline function
{
if (_blocknum == 0)
return 0;
@ -110,13 +115,13 @@ IVStorage::Address(unsigned _blocknum) const //BETTER: inline function
}
unsigned
IVStorage::Blocknum(long address) const
VList::Blocknum(long address) const
{
return (address / BLOCK_SIZE) + 1 - this->SuperNum;
}
unsigned
IVStorage::AllocBlock()
VList::AllocBlock()
{
BlockInfo* p = this->freelist->next;
if (p == NULL)
@ -131,11 +136,12 @@ IVStorage::AllocBlock()
unsigned t = p->num;
this->freelist->next = p->next;
delete p;
return t;
}
void
IVStorage::FreeBlock(unsigned _blocknum)
VList::FreeBlock(unsigned _blocknum)
{ //QUERY: head-sub and tail-add will be better?
BlockInfo* bp = new BlockInfo(_blocknum, this->freelist->next);
this->freelist->next = bp;
@ -145,93 +151,96 @@ IVStorage::FreeBlock(unsigned _blocknum)
//a string may acrossseveral blocks
void
IVStorage::ReadAlign(unsigned* _next)
VList::ReadAlign(unsigned* _next)
{
if (ftell(treefp) % BLOCK_SIZE == 0)
if (ftell(valfp) % BLOCK_SIZE == 0)
{
fseek(treefp, Address(*_next), SEEK_SET);
fread(_next, sizeof(unsigned), 1, treefp);
fseek(valfp, Address(*_next), SEEK_SET);
fread(_next, sizeof(unsigned), 1, valfp);
}
}
void
IVStorage::WriteAlign(unsigned* _curnum, bool& _SpecialBlock)
VList::WriteAlign(unsigned* _curnum, bool& _SpecialBlock)
{
if (ftell(treefp) % BLOCK_SIZE == 0)
if (ftell(valfp) % BLOCK_SIZE == 0)
{
unsigned blocknum = this->AllocBlock();
fseek(treefp, Address(*_curnum), SEEK_SET);
fseek(valfp, Address(*_curnum), SEEK_SET);
if (_SpecialBlock)
{
fseek(treefp, 4, SEEK_CUR);
fseek(valfp, 4, SEEK_CUR);
_SpecialBlock = false;
}
fwrite(&blocknum, sizeof(unsigned), 1, treefp);
fseek(treefp, Address(blocknum) + 4, SEEK_SET);
fwrite(&blocknum, sizeof(unsigned), 1, valfp);
fseek(valfp, Address(blocknum) + 4, SEEK_SET);
*_curnum = blocknum;
}
}
//TODO: check , read/write a long list, across several blocks
//not use buffer, read/write on need, update at once, so no need to write back at last
//TODO: still use Bstr?? how can we get the next pointer?? use NULL to init
//NOTICE: the next is placed at the begin of a block
bool
IVStorage::readBstr(Bstr* _bp, unsigned* _next)
VList::readBstr(Bstr* _bp, unsigned* _next)
{
//long address;
unsigned len, i, j;
fread(&len, sizeof(unsigned), 1, this->treefp);
fread(&len, sizeof(unsigned), 1, this->valfp);
this->ReadAlign(_next);
//this->request(len);
char* s = (char*)malloc(len);
_bp->setLen(len);
for (i = 0; i + 4 < len; i += 4)
{
fread(s + i, sizeof(char), 4, treefp);
fread(s + i, sizeof(char), 4, valfp);
this->ReadAlign(_next);
}
while (i < len)
{
fread(s + i, sizeof(char), 1, treefp); //BETTER
fread(s + i, sizeof(char), 1, valfp); //BETTER
i++;
}
j = len % 4;
if (j > 0)
j = 4 - j;
fseek(treefp, j, SEEK_CUR);
fseek(valfp, j, SEEK_CUR);
this->ReadAlign(_next);
_bp->setStr(s);
return true;
}
bool
IVStorage::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock)
VList::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock)
{
unsigned i, j, len = _bp->getLen();
fwrite(&len, sizeof(unsigned), 1, treefp);
fwrite(&len, sizeof(unsigned), 1, valfp);
this->WriteAlign(_curnum, _SpecialBlock);
char* s = _bp->getStr();
for (i = 0; i + 4 < len; i += 4)
{
fwrite(s + i, sizeof(char), 4, treefp);
fwrite(s + i, sizeof(char), 4, valfp);
this->WriteAlign(_curnum, _SpecialBlock);
}
while (i < len)
{
fwrite(s + i, sizeof(char), 1, treefp);
fwrite(s + i, sizeof(char), 1, valfp);
i++;
}
j = len % 4;
if (j > 0)
j = 4 - j;
fseek(treefp, j, SEEK_CUR);
fseek(valfp, j, SEEK_CUR);
this->WriteAlign(_curnum, _SpecialBlock);
return true;
}
VList::~VList()
{
//release heap and freelist...
#ifdef DEBUG_KVSTORE
printf("now to release the kvstore!\n");
#endif
BlockInfo* bp = this->freelist;
BlockInfo* next;
while (bp != NULL)
@ -240,18 +249,6 @@ VList::~VList()
delete bp;
bp = next;
}
#ifdef DEBUG_KVSTORE
printf("already empty the freelist!\n");
#endif
delete this->minheap;
#ifdef DEBUG_KVSTORE
printf("already empty the buffer heap!\n");
#endif
fclose(this->treefp);
//#ifdef DEBUG_KVSTORE
//NOTICE:there is more than one tree
//fclose(Util::debug_kvstore); //NULL is ok!
//Util::debug_kvstore = NULL;
//#endif
fclose(this->valfp);
}

View File

@ -6,11 +6,11 @@
# Description:
=============================================================================*/
#ifndef _KVSTORE_IVTREE_STORAGE_VLIST_H
#define _KVSTORE_IVTREE_STORAGE_VLIST_H
#ifndef _UTIL_VLIST_H
#define _UTIL_VLIST_H
#include "../../../Util/Util.h"
#include "../../../Util/Bstr.h"
#include "Util.h"
#include "Bstr.h"
//TODO: not keep long list in memory, read each time
//but when can you free the long list(kvstore should release it after parsing)
@ -22,15 +22,21 @@
//BETTER: use two kind of blocks in two files, like 1M and 1G (split the block num into two parts)
//STRUCT: a long list real-address is the block ID in file2(only for long value lists, a list across several 1M blocks)
//tree-value Bstr: unsigned=the real address char*=NULL
//in disk:
//file1 is tree file, the long list is represented as: 0 real-address
//NOTICE: long list is not kept in mmeory for cache, it is read/update each time on need!
class VList
{
public:
//NOTICE:the border is 10^6, but the block is larger, 1M
static const unsigned LENGTH_BORDER = 1000000;
static const unsigned BLOCK_SIZE = 1 << 20; //fixed size of disk-block
static const unsigned MAX_BLOCK_NUM = 1 << 16; //max block-num
static const unsigned MAX_BLOCK_NUM = 1 << 23; //max block-num
//below two constants: must can be exactly divided by 8
static const unsigned SET_BLOCK_NUM = 1 << 8; //initial blocks num
static const unsigned SET_BLOCK_NUM = 1 << 2; //initial blocks num
static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc
static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1;
@ -59,12 +65,14 @@ private:
public:
VList();
VList(std::string& _filepath, unsigned long long _buffer_size);//create a fixed-size file or open an existence
VList(std::string& _filepath, std::string& _mode, unsigned long long _buffer_size);//create a fixed-size file or open an existence
bool readBstr(Bstr* _bp, unsigned* _next);
bool writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock);
bool readValue(unsigned _block_num);
bool writeValue(const Bstr* _bp);
~VList();
static bool isLongList(unsigned _len);
};
#endif