refactor: to add long list value
TODO: vlist support and IVLeafNode by zengli, long list need to be freed each time
This commit is contained in:
parent
d64358b606
commit
939e84c8bf
|
@ -20,7 +20,7 @@ ISTree::ISTree()
|
|||
TSM = NULL;
|
||||
storepath = "";
|
||||
filename = "";
|
||||
transfer_size[0] = transfer_size[1] = transfer_size[2] = 0;
|
||||
//transfer_size[0] = transfer_size[1] = transfer_size[2] = 0;
|
||||
this->stream = NULL;
|
||||
this->request = 0;
|
||||
}
|
||||
|
@ -37,10 +37,10 @@ ISTree::ISTree(string _storepath, string _filename, string _mode, unsigned long
|
|||
this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail);
|
||||
else
|
||||
this->root = NULL;
|
||||
this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M
|
||||
//this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
//this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
//this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
//this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M
|
||||
this->stream = NULL;
|
||||
this->request = 0;
|
||||
}
|
||||
|
@ -51,30 +51,30 @@ ISTree::getFilePath()
|
|||
return storepath + "/" + filename;
|
||||
}
|
||||
|
||||
void //WARN: not check _str and _len
|
||||
ISTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index)
|
||||
{
|
||||
if (_index > 2)
|
||||
return;
|
||||
/*
|
||||
if(_str == NULL || _len == 0)
|
||||
{
|
||||
printf("error in CopyToTransfer: empty string\n");
|
||||
return;
|
||||
}
|
||||
*/
|
||||
//unsigned length = _bstr->getLen();
|
||||
unsigned length = _len;
|
||||
if (length + 1 > this->transfer_size[_index])
|
||||
{
|
||||
transfer[_index].release();
|
||||
transfer[_index].setStr((char*)malloc(length + 1));
|
||||
this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0
|
||||
}
|
||||
memcpy(this->transfer[_index].getStr(), _str, length);
|
||||
this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore
|
||||
this->transfer[_index].setLen(length);
|
||||
}
|
||||
//void //WARN: not check _str and _len
|
||||
//ISTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index)
|
||||
//{
|
||||
//if (_index > 2)
|
||||
//return;
|
||||
//[>
|
||||
//if(_str == NULL || _len == 0)
|
||||
//{
|
||||
//printf("error in CopyToTransfer: empty string\n");
|
||||
//return;
|
||||
//}
|
||||
//*/
|
||||
////unsigned length = _bstr->getLen();
|
||||
//unsigned length = _len;
|
||||
//if (length + 1 > this->transfer_size[_index])
|
||||
//{
|
||||
//transfer[_index].release();
|
||||
//transfer[_index].setStr((char*)malloc(length + 1));
|
||||
//this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0
|
||||
//}
|
||||
//memcpy(this->transfer[_index].getStr(), _str, length);
|
||||
//this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore
|
||||
//this->transfer[_index].setLen(length);
|
||||
//}
|
||||
|
||||
unsigned
|
||||
ISTree::getHeight() const
|
||||
|
@ -121,15 +121,18 @@ ISTree::search(int _key, char*& _str, int& _len)
|
|||
}
|
||||
|
||||
const Bstr* val = ret->getValue(store);
|
||||
this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request
|
||||
_str = this->transfer[0].getStr();
|
||||
_len = this->transfer[0].getLen();
|
||||
//this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request
|
||||
//_str = this->transfer[0].getStr();
|
||||
//_len = this->transfer[0].getLen();
|
||||
_str = val->getStr();
|
||||
_len = val->getLen();
|
||||
|
||||
this->TSM->request(request);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ISTree::insert(int _key, const char* _str, unsigned _len)
|
||||
ISTree::insert(int _key, char* _str, unsigned _len)
|
||||
{
|
||||
if (_key < 0)
|
||||
{
|
||||
|
@ -137,8 +140,8 @@ ISTree::insert(int _key, const char* _str, unsigned _len)
|
|||
return false;
|
||||
}
|
||||
|
||||
this->CopyToTransfer(_str, _len, 2);
|
||||
const Bstr* val = &(this->transfer[2]);
|
||||
//this->CopyToTransfer(_str, _len, 2);
|
||||
//const Bstr* val = &(this->transfer[2]);
|
||||
this->request = 0;
|
||||
ISNode* ret;
|
||||
if (this->root == NULL) //tree is empty
|
||||
|
@ -222,9 +225,9 @@ ISTree::insert(int _key, const char* _str, unsigned _len)
|
|||
else
|
||||
{
|
||||
p->addKey(_key, i);
|
||||
p->addValue(val, i, true);
|
||||
p->addValue(_str, _len, i, true);
|
||||
p->addNum();
|
||||
request += val->getLen();
|
||||
request += _len;
|
||||
p->setDirty();
|
||||
this->TSM->updateHeap(p, p->getRank(), true);
|
||||
//_key->clear();
|
||||
|
@ -235,7 +238,7 @@ ISTree::insert(int _key, const char* _str, unsigned _len)
|
|||
}
|
||||
|
||||
bool
|
||||
ISTree::modify(int _key, const char* _str, unsigned _len)
|
||||
ISTree::modify(int _key, char* _str, unsigned _len)
|
||||
{
|
||||
if (_key < 0)
|
||||
{
|
||||
|
@ -243,8 +246,8 @@ ISTree::modify(int _key, const char* _str, unsigned _len)
|
|||
return false;
|
||||
}
|
||||
|
||||
this->CopyToTransfer(_str, _len, 2); //not check value
|
||||
const Bstr* val = &(this->transfer[2]);
|
||||
//this->CopyToTransfer(_str, _len, 2); //not check value
|
||||
//const Bstr* val = &(this->transfer[2]);
|
||||
this->request = 0;
|
||||
int store;
|
||||
ISNode* ret = this->find(_key, &store, true);
|
||||
|
@ -255,16 +258,17 @@ ISTree::modify(int _key, const char* _str, unsigned _len)
|
|||
}
|
||||
//cout<<"ISTree::modify() - key is found, now to remove"<<endl;
|
||||
unsigned len = ret->getValue(store)->getLen();
|
||||
ret->setValue(val, store, true);
|
||||
ret->setValue(_str, _len, store, true);
|
||||
//cout<<"value reset"<<endl;
|
||||
//cout<<"newlen: "<<val->getLen()<<" oldlen: "<<len<<endl;
|
||||
//request += (val->getLen() - len);
|
||||
this->request = val->getLen();
|
||||
this->request = _len;
|
||||
this->request -= len;
|
||||
ret->setDirty();
|
||||
//cout<<"to request"<<endl;
|
||||
this->TSM->request(request);
|
||||
//cout<<"memory requested"<<endl;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -300,6 +304,7 @@ ISTree::find(int _key, int* _store, bool ifmodify)
|
|||
*_store = -1; //Not Found
|
||||
else
|
||||
*_store = i;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
|
|
|
@ -36,8 +36,8 @@ protected:
|
|||
//so lock is a must. Add lock to transfer is better than to add
|
||||
//lock to every key/value. However, modify requires a lock for a
|
||||
//key/value, and multiple search for different keys are ok!!!
|
||||
Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char*
|
||||
unsigned transfer_size[3];
|
||||
//Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char*
|
||||
//unsigned transfer_size[3];
|
||||
|
||||
//tree's operations should be atom(if read nodes)
|
||||
//sum the request and send to ISStorage at last
|
||||
|
@ -49,7 +49,7 @@ protected:
|
|||
std::string filename; //ok for user to change
|
||||
/* some private functions */
|
||||
std::string getFilePath(); //in UNIX system
|
||||
void CopyToTransfer(const char* _str, unsigned _len, unsigned _index);
|
||||
//void CopyToTransfer(const char* _str, unsigned _len, unsigned _index);
|
||||
void release(ISNode* _np) const;
|
||||
|
||||
public:
|
||||
|
@ -61,8 +61,8 @@ public:
|
|||
//void setRoot(Node* _root);
|
||||
//insert, search, remove, set
|
||||
bool search(int _key, char*& _str, int& _len);
|
||||
bool insert(int _key, const char* _str, unsigned _len);
|
||||
bool modify(int _key, const char* _str, unsigned _len);
|
||||
bool insert(int _key, char* _str, unsigned _len);
|
||||
bool modify(int _key, char* _str, unsigned _len);
|
||||
ISNode* find(int _key, int* store, bool ifmodify);
|
||||
bool remove(int _key);
|
||||
const Bstr* getRangeValue();
|
||||
|
|
|
@ -125,6 +125,42 @@ ISLeafNode::addValue(const Bstr* _value, int _index, bool ifcopy)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ISLeafNode::setValue(char* _str, unsigned _len, int _index, bool ifcopy)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index >= num)
|
||||
{
|
||||
print(string("error in setValue: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
this->values[_index].release(); //NOTICE: only used in modify
|
||||
|
||||
this->values[_index].setStr(_str);
|
||||
this->values[_index].setLen(_len);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ISLeafNode::addValue(char* _str, unsigned _len, int _index, bool ifcopy)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index > num)
|
||||
{
|
||||
print(string("error in addValue: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
int i;
|
||||
for (i = num - 1; i >= _index; --i)
|
||||
this->values[i + 1] = this->values[i];
|
||||
|
||||
this->values[_index].setStr(_str);
|
||||
this->values[_index].setLen(_len);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ISLeafNode::subValue(int _index, bool ifdel)
|
||||
{
|
||||
|
@ -373,4 +409,4 @@ ISLeafNode::print(string s)
|
|||
}
|
||||
else;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@ public:
|
|||
void Normal();
|
||||
ISNode* getPrev() const;
|
||||
ISNode* getNext() const;
|
||||
|
||||
const Bstr* getValue(int _index) const;
|
||||
bool setValue(const Bstr* _value, int _index, bool ifcopy = false);
|
||||
bool addValue(const Bstr* _value, int _index, bool ifcopy = false);
|
||||
|
@ -34,6 +35,10 @@ public:
|
|||
void setPrev(ISNode* _prev);
|
||||
void setNext(ISNode* _next);
|
||||
unsigned getSize() const;
|
||||
|
||||
bool setValue(char* _str, unsigned _len, int _index, bool ifcopy = false);
|
||||
bool addValue(char* _str, unsigned _len, int _index, bool ifcopy = false);
|
||||
|
||||
ISNode* split(ISNode* _father, int _index);
|
||||
ISNode* coalesce(ISNode* _father, int _index);
|
||||
void release();
|
||||
|
@ -47,4 +52,4 @@ public:
|
|||
};
|
||||
//BETTER: prev isn't a must, and reverse-range can be achieved using recursive-next
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -80,12 +80,18 @@ public:
|
|||
virtual bool subChild(int _index) { return true; };
|
||||
virtual ISNode* getPrev() const { return NULL; };
|
||||
virtual ISNode* getNext() const { return NULL; };
|
||||
|
||||
virtual const Bstr* getValue(int _index) const { return NULL; };
|
||||
virtual bool setValue(const Bstr* _value, int _index, bool ifcopy = false) { return true; };
|
||||
virtual bool addValue(const Bstr* _value, int _index, bool ifcopy = false) { return true; };
|
||||
virtual bool subValue(int _index, bool ifdel = false) { return true; };
|
||||
virtual void setPrev(ISNode* _prev) {};
|
||||
virtual void setNext(ISNode* _next) {};
|
||||
|
||||
virtual bool setValue(char* _str, unsigned _len, int _index, bool ifcopy = false) { return true; };
|
||||
virtual bool addValue(char* _str, unsigned _len, int _index, bool ifcopy = false) { return true; };
|
||||
|
||||
//pure virtual function
|
||||
virtual void Virtual() = 0;
|
||||
virtual void Normal() = 0;
|
||||
virtual unsigned getSize() const = 0; //return all memory owned
|
||||
|
@ -110,4 +116,4 @@ public:
|
|||
*to release the whole(pointer is invalid and rebuild problem)
|
||||
*/
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -36,7 +36,7 @@ IVTree::IVTree(string _storepath, string _filename, string _mode, unsigned long
|
|||
string filepath = this->getFilePath();
|
||||
|
||||
string vlist_file = filepath + "_vlist";
|
||||
this->value_list = new VList(vlist_file, 1<<30);
|
||||
this->value_list = new VList(vlist_file, this->mode, 1<<30);
|
||||
|
||||
TSM = new IVStorage(filepath, this->mode, &this->height, _buffer_size, this->value_list);
|
||||
if (this->mode == "open")
|
||||
|
@ -142,7 +142,7 @@ IVTree::search(int _key, char*& _str, int& _len)
|
|||
}
|
||||
|
||||
bool
|
||||
IVTree::insert(int _key, const char* _str, unsigned _len)
|
||||
IVTree::insert(int _key, char* _str, unsigned _len)
|
||||
{
|
||||
if (_key < 0)
|
||||
{
|
||||
|
@ -244,12 +244,13 @@ IVTree::insert(int _key, const char* _str, unsigned _len)
|
|||
//_key->clear();
|
||||
//_value->clear();
|
||||
}
|
||||
|
||||
this->TSM->request(request);
|
||||
return !ifexist; //QUERY(which case:return false)
|
||||
}
|
||||
|
||||
bool
|
||||
IVTree::modify(int _key, const char* _str, unsigned _len)
|
||||
IVTree::modify(int _key, char* _str, unsigned _len)
|
||||
{
|
||||
if (_key < 0)
|
||||
{
|
||||
|
@ -268,11 +269,14 @@ IVTree::modify(int _key, const char* _str, unsigned _len)
|
|||
return false;
|
||||
}
|
||||
//cout<<"IVTree::modify() - key is found, now to remove"<<endl;
|
||||
|
||||
//NOTICE+DEBUG: if this value is a long list, then it is not saved in memory, here should return 0 in Bstr
|
||||
unsigned len = ret->getValue(store)->getLen();
|
||||
ret->setValue(this->value_list, store, _str, _len, true);
|
||||
//ret->setValue(val, store, true);
|
||||
//cout<<"value reset"<<endl;
|
||||
//cout<<"newlen: "<<val->getLen()<<" oldlen: "<<len<<endl;
|
||||
|
||||
//request += (val->getLen() - len);
|
||||
this->request = _len;
|
||||
//this->request = val->getLen();
|
||||
|
@ -386,6 +390,7 @@ IVTree::remove(int _key)
|
|||
this->TSM->updateHeap(p, p->getRank(), true);
|
||||
p = q;
|
||||
}
|
||||
|
||||
bool flag = false;
|
||||
//j = p->getNum(); //LeafNode(maybe root)
|
||||
//for(i = 0; i < j; ++i)
|
||||
|
@ -414,7 +419,7 @@ IVTree::remove(int _key)
|
|||
{
|
||||
request -= p->getValue(i)->getLen();
|
||||
p->subKey(i); //to release
|
||||
p->subValue(i, true); //to release
|
||||
p->subValue(this->value_list, i, true); //to release
|
||||
p->subNum();
|
||||
if (p->getNum() == 0) //root leaf 0 key
|
||||
{
|
||||
|
@ -461,6 +466,7 @@ IVTree::resetStream()
|
|||
this->stream->setEnd();
|
||||
}
|
||||
|
||||
//TODO: change to using value list, getValue() maybe not get real long list
|
||||
bool //special case: not exist, one-edge-case
|
||||
IVTree::range_query(int _key1, int _key2)
|
||||
{ //the range is: *_key1 <= x < *_key2
|
||||
|
@ -555,6 +561,7 @@ IVTree::range_query(int _key1, int _key2)
|
|||
for (i = l; i < r; ++i)
|
||||
{
|
||||
//NOTICE:Bstr* in an array, used as Bstr[]
|
||||
//DEBUG+TODO: if long list?? clean
|
||||
this->stream->write(p->getValue(i));
|
||||
}
|
||||
this->TSM->request(request);
|
||||
|
@ -563,7 +570,9 @@ IVTree::range_query(int _key1, int _key2)
|
|||
else
|
||||
break;
|
||||
}
|
||||
|
||||
this->stream->setEnd();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -11,11 +11,11 @@
|
|||
|
||||
#include "../../Util/Util.h"
|
||||
#include "../../Util/Stream.h"
|
||||
#include "../../Util/VList.h"
|
||||
#include "node/IVNode.h"
|
||||
#include "node/IVIntlNode.h"
|
||||
#include "node/IVLeafNode.h"
|
||||
#include "storage/IVStorage.h"
|
||||
#include "./vlist/VList.h"
|
||||
|
||||
//TODO: for long list, do not read in time, just on need
|
||||
//the memory is kept with the node, updat ewith node
|
||||
|
@ -80,8 +80,8 @@ public:
|
|||
//void setRoot(Node* _root);
|
||||
//insert, search, remove, set
|
||||
bool search(int _key, char*& _str, int& _len);
|
||||
bool insert(int _key, const char* _str, unsigned _len);
|
||||
bool modify(int _key, const char* _str, unsigned _len);
|
||||
bool insert(int _key, char* _str, unsigned _len);
|
||||
bool modify(int _key, char* _str, unsigned _len);
|
||||
IVNode* find(int _key, int* store, bool ifmodify);
|
||||
bool remove(int _key);
|
||||
const Bstr* getRangeValue();
|
||||
|
|
|
@ -89,6 +89,7 @@ IVLeafNode::getValue(int _index) const
|
|||
return this->values + _index;
|
||||
}
|
||||
|
||||
//TODO!!!
|
||||
bool
|
||||
IVLeafNode::getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const
|
||||
{
|
||||
|
@ -148,6 +149,26 @@ IVLeafNode::addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool
|
|||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel)
|
||||
{
|
||||
//TODO: if is to sub long list
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index >= num)
|
||||
{
|
||||
print(string("error in subValue: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
|
||||
int i;
|
||||
if (ifdel)
|
||||
values[_index].release();
|
||||
for (i = _index; i < num - 1; ++i)
|
||||
this->values[i] = this->values[i + 1];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVLeafNode::addValue(const Bstr* _value, int _index, bool ifcopy)
|
||||
{
|
||||
|
@ -169,26 +190,6 @@ IVLeafNode::addValue(const Bstr* _value, int _index, bool ifcopy)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel)
|
||||
{
|
||||
//TODO: if is to sub long list
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index >= num)
|
||||
{
|
||||
print(string("error in subValue: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
|
||||
int i;
|
||||
if (ifdel)
|
||||
values[_index].release();
|
||||
for (i = _index; i < num - 1; ++i)
|
||||
this->values[i] = this->values[i + 1];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVLeafNode::subValue(int _index, bool ifdel)
|
||||
{
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
|
||||
#include "../../../Util/Util.h"
|
||||
#include "../../../Util/Bstr.h"
|
||||
#include "../vlist/VList.h"
|
||||
#include "../../../Util/VList.h"
|
||||
|
||||
class IVNode //abstract basic class
|
||||
{
|
||||
|
@ -81,6 +81,7 @@ public:
|
|||
virtual bool subChild(int _index) { return true; };
|
||||
virtual IVNode* getPrev() const { return NULL; };
|
||||
virtual IVNode* getNext() const { return NULL; };
|
||||
|
||||
virtual const Bstr* getValue(int _index) const { return NULL; };
|
||||
virtual bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const { return NULL; };
|
||||
virtual bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false) { return true; };
|
||||
|
@ -92,6 +93,8 @@ public:
|
|||
|
||||
virtual void setPrev(IVNode* _prev) {};
|
||||
virtual void setNext(IVNode* _next) {};
|
||||
|
||||
//pure virtual functions
|
||||
virtual void Virtual() = 0;
|
||||
virtual void Normal() = 0;
|
||||
virtual unsigned getSize() const = 0; //return all memory owned
|
||||
|
|
|
@ -9,10 +9,10 @@
|
|||
#ifndef _KVSTORE_IVTREE_STORAGE_IVSTORAGE_H
|
||||
#define _KVSTORE_IVTREE_STORAGE_IVSTORAGE_H
|
||||
|
||||
#include "../../../Util/VList.h"
|
||||
#include "../node/IVIntlNode.h"
|
||||
#include "../node/IVLeafNode.h"
|
||||
#include "../heap/IVHeap.h"
|
||||
#include "../vlist/VList.h"
|
||||
|
||||
//It controls read, write, swap
|
||||
class IVStorage
|
||||
|
|
|
@ -1422,6 +1422,14 @@ KVstore::getpreIDlistBysubID(int _subid, int*& _preidlist, int& _list_len, bool
|
|||
_preidlist[i] = _tmp[2 * i + 3];
|
||||
}
|
||||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1452,6 +1460,14 @@ KVstore::getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len, bool
|
|||
_list_len = Util::removeDuplicate(_objidlist, _list_len);
|
||||
}
|
||||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1493,6 +1509,14 @@ KVstore::getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist, int&
|
|||
_objidlist = new int[_list_len];
|
||||
memcpy(_objidlist, _tmp + _offset, sizeof(int) * _list_len);
|
||||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1533,6 +1557,14 @@ KVstore::getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _list
|
|||
}
|
||||
}
|
||||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1664,6 +1696,14 @@ KVstore::getpreIDlistByobjID(int _objid, int*& _preidlist, int& _list_len, bool
|
|||
_preidlist[i] = _tmp[2 * i + 2];
|
||||
}
|
||||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1688,6 +1728,14 @@ KVstore::getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len, bool
|
|||
_list_len = Util::removeDuplicate(_subidlist, _list_len);
|
||||
}
|
||||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1723,6 +1771,14 @@ KVstore::getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, int&
|
|||
_subidlist = new int[_list_len];
|
||||
memcpy(_subidlist, _tmp + _offset, sizeof(int) * _list_len);
|
||||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1756,6 +1812,14 @@ KVstore::getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _list
|
|||
}
|
||||
}
|
||||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1867,6 +1931,14 @@ KVstore::getsubIDlistBypreID(int _preid, int*& _subidlist, int& _list_len, bool
|
|||
_list_len = Util::removeDuplicate(_subidlist, _list_len);
|
||||
}
|
||||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1891,6 +1963,14 @@ KVstore::getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len, bool
|
|||
_list_len = Util::removeDuplicate(_objidlist, _list_len);
|
||||
}
|
||||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1913,6 +1993,14 @@ KVstore::getsubIDobjIDlistBypreID(int _preid, int*& _subid_objidlist, int& _list
|
|||
_subid_objidlist[2 * i + 1] = _tmp[1 + _tmp[0] + i];
|
||||
}
|
||||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1977,6 +2065,14 @@ KVstore::getpreIDlistBysubIDobjID(int _subid, int _objid, int*& _preidlist, int&
|
|||
}
|
||||
}
|
||||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
}
|
||||
|
||||
if (_list_len == 0) {
|
||||
_preidlist = NULL;
|
||||
return false;
|
||||
|
@ -2091,37 +2187,37 @@ KVstore::flush(IVTree* _p_btree)
|
|||
}
|
||||
|
||||
bool
|
||||
KVstore::addValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val)
|
||||
KVstore::addValueByKey(SITree* _p_btree, char* _key, int _klen, int _val)
|
||||
{
|
||||
return _p_btree->insert(_key, _klen, _val);
|
||||
}
|
||||
|
||||
bool
|
||||
KVstore::addValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen)
|
||||
KVstore::addValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen)
|
||||
{
|
||||
return _p_btree->insert(_key, _val, _vlen);
|
||||
}
|
||||
|
||||
bool
|
||||
KVstore::addValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen)
|
||||
KVstore::addValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen)
|
||||
{
|
||||
return _p_btree->insert(_key, _val, _vlen);
|
||||
}
|
||||
|
||||
bool
|
||||
KVstore::setValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val)
|
||||
KVstore::setValueByKey(SITree* _p_btree, char* _key, int _klen, int _val)
|
||||
{
|
||||
return _p_btree->modify(_key, _klen, _val);
|
||||
}
|
||||
|
||||
bool
|
||||
KVstore::setValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen)
|
||||
KVstore::setValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen)
|
||||
{
|
||||
return _p_btree->modify(_key, _val, _vlen);
|
||||
}
|
||||
|
||||
bool
|
||||
KVstore::setValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen)
|
||||
KVstore::setValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen)
|
||||
{
|
||||
return _p_btree->modify(_key, _val, _vlen);
|
||||
}
|
||||
|
@ -2218,6 +2314,7 @@ KVstore::binarySearch(int _key, const int* _list, int _list_len, int _step)
|
|||
}
|
||||
|
||||
//TODO: better to adjust these parameters according to memory usage and entity num
|
||||
//need a memory manager first
|
||||
string KVstore::s_entity2id = "s_entity2id";
|
||||
string KVstore::s_id2entity = "s_id2entity";
|
||||
unsigned short KVstore::buffer_entity2id_build = 8;
|
||||
|
|
|
@ -10,8 +10,16 @@
|
|||
#define _KVSTORE_KVSTORE_H
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Util/VList.h"
|
||||
#include "Tree.h"
|
||||
|
||||
//TODO: is it needed to keep a length in Bstr?? especially for IVTree?
|
||||
//add a length: sizeof bstr from 8 to 16(4 -> 8 for alignment)
|
||||
//add a \0 in tail: only add 1 char
|
||||
//QUERY: but to count the length each time maybe very costly?
|
||||
//No, because triple num is stored in char* now!!!! we do not need to save it again
|
||||
//TODO: entity_border in s2values list is not needed!!! not waste memory here
|
||||
|
||||
class KVstore
|
||||
{
|
||||
public:
|
||||
|
@ -187,13 +195,13 @@ private:
|
|||
void flush(ISTree* _p_btree);
|
||||
void flush(IVTree* _p_btree);
|
||||
|
||||
bool addValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val);
|
||||
bool addValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen);
|
||||
bool addValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen);
|
||||
bool addValueByKey(SITree* _p_btree, char* _key, int _klen, int _val);
|
||||
bool addValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen);
|
||||
bool addValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen);
|
||||
|
||||
bool setValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val);
|
||||
bool setValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen);
|
||||
bool setValueByKey(IVTree* _p_btree, int _key, const char* _val, int _vlen);
|
||||
bool setValueByKey(SITree* _p_btree, char* _key, int _klen, int _val);
|
||||
bool setValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen);
|
||||
bool setValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen);
|
||||
|
||||
bool getValueByKey(SITree* _p_btree, const char* _key, int _klen, int* _val) const;
|
||||
bool getValueByKey(ISTree* _p_btree, int _key, char*& _val, int& _vlen) const;
|
||||
|
|
|
@ -134,7 +134,7 @@ SITree::search(const char* _str, unsigned _len, int* _val)
|
|||
}
|
||||
|
||||
bool
|
||||
SITree::insert(const char* _str, unsigned _len, int _val)
|
||||
SITree::insert(char* _str, unsigned _len, int _val)
|
||||
{
|
||||
if (_str == NULL || _len == 0)
|
||||
{
|
||||
|
|
|
@ -65,7 +65,7 @@ public:
|
|||
SINode* getRoot() const;
|
||||
//insert, search, remove, set
|
||||
bool search(const char* _str, unsigned _len, int* _val);
|
||||
bool insert(const char* _str, unsigned _len, int _val);
|
||||
bool insert(char* _str, unsigned _len, int _val);
|
||||
bool modify(const char* _str, unsigned _len, int _val);
|
||||
SINode* find(const char* _key, unsigned _len, int* store, bool ifmodify);
|
||||
bool remove(const char* _str, unsigned _len);
|
||||
|
|
|
@ -362,4 +362,4 @@ SILeafNode::print(string s)
|
|||
}
|
||||
else;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,18 +27,24 @@ public:
|
|||
void Normal();
|
||||
SINode* getPrev() const;
|
||||
SINode* getNext() const;
|
||||
|
||||
int getValue(int _index) const;
|
||||
bool setValue(int _val, int _index);
|
||||
bool addValue(int _val, int _index);
|
||||
bool subValue(int _index);
|
||||
|
||||
void setPrev(SINode* _prev);
|
||||
void setNext(SINode* _next);
|
||||
|
||||
unsigned getSize() const;
|
||||
|
||||
SINode* split(SINode* _father, int _index);
|
||||
SINode* coalesce(SINode* _father, int _index);
|
||||
|
||||
void release();
|
||||
~SILeafNode();
|
||||
void print(std::string s); //DEBUG
|
||||
|
||||
/*non-sense virtual function
|
||||
Node* getChild(int _index) const;
|
||||
bool addChild(Node* _child, int _index);
|
||||
|
@ -47,4 +53,4 @@ public:
|
|||
};
|
||||
//BETTER: prev isn't a must, and reverse-range can be achieved using recursive-next
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -254,6 +254,27 @@ SINode::addKey(const Bstr* _key, int _index, bool ifcopy)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
SINode::addKey(char* _str, unsigned _len, int _index, bool ifcopy)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index > num)
|
||||
{
|
||||
print(string("error in addKey: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
int i;
|
||||
//NOTICE: if num == MAX_KEY_NUM, will visit keys[MAX_KEY_NUM], not legal!!!
|
||||
//however. tree operations ensure that: when node is full, not add but split first!
|
||||
for (i = num - 1; i >= _index; --i)
|
||||
keys[i + 1] = keys[i];
|
||||
|
||||
keys[_index].setStr(_str);
|
||||
keys[_index].setLen(_len);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
SINode::subKey(int _index, bool ifdel)
|
||||
{
|
||||
|
@ -325,4 +346,57 @@ SINode::searchKey_lessEqual(const Bstr& _bstr) const
|
|||
return ret - 1;
|
||||
else
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
SINode::searchKey_less(const char* _str, unsigned _len) const
|
||||
{
|
||||
int num = this->getNum();
|
||||
|
||||
int low = 0, high = num - 1, mid = -1;
|
||||
while (low <= high)
|
||||
{
|
||||
mid = (low + high) / 2;
|
||||
//if (this->keys[mid] > _bstr)
|
||||
if (Util::compare(this->keys[mid].getStr(), this->keys[mid].getLen(), _str, _len) > 0)
|
||||
{
|
||||
if (low == mid)
|
||||
break;
|
||||
high = mid;
|
||||
}
|
||||
else
|
||||
{
|
||||
low = mid + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return low;
|
||||
}
|
||||
|
||||
int
|
||||
SINode::searchKey_equal(const char* _str, unsigned _len) const
|
||||
{
|
||||
int num = this->getNum();
|
||||
//for(i = 0; i < num; ++i)
|
||||
// if(bstr == *(p->getKey(i)))
|
||||
// {
|
||||
|
||||
int ret = this->searchKey_less(_str, _len);
|
||||
//if (ret > 0 && this->keys[ret - 1] == _bstr)
|
||||
if (ret > 0 && Util::compare(this->keys[ret-1].getStr(), this->keys[ret-1].getLen(), _str, _len) == 0)
|
||||
return ret - 1;
|
||||
else
|
||||
return num;
|
||||
}
|
||||
|
||||
int
|
||||
SINode::searchKey_lessEqual(const char* _str, unsigned _len) const
|
||||
{
|
||||
int ret = this->searchKey_less(_str, _len);
|
||||
//if (ret > 0 && this->keys[ret - 1] == _bstr)
|
||||
if (ret > 0 && Util::compare(this->keys[ret-1].getStr(), this->keys[ret-1].getLen(), _str, _len) == 0)
|
||||
return ret - 1;
|
||||
else
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -64,9 +64,11 @@ public:
|
|||
void setStore(unsigned _store);
|
||||
unsigned getFlag() const;
|
||||
void setFlag(unsigned _flag);
|
||||
|
||||
const Bstr* getKey(int _index) const; //need to check the index
|
||||
bool setKey(const Bstr* _key, int _index, bool ifcopy = false);
|
||||
bool addKey(const Bstr* _key, int _index, bool ifcopy = false);
|
||||
bool addKey(char* _str, unsigned _len, int _index, bool ifcopy = false);
|
||||
bool subKey(int _index, bool ifdel = false);
|
||||
|
||||
//several binary key search utilities
|
||||
|
@ -74,7 +76,12 @@ public:
|
|||
int searchKey_equal(const Bstr& _bstr) const;
|
||||
int searchKey_lessEqual(const Bstr& _bstr) const;
|
||||
|
||||
int searchKey_less(const char* _str, unsigned _len) const;
|
||||
int searchKey_equal(const char* _str, unsigned _len) const;
|
||||
int searchKey_lessEqual(const char* _str, unsigned _len) const;
|
||||
|
||||
//virtual functions: polymorphic
|
||||
//NOTICE: not pure-virtual, not required to be implemented again, can be used now
|
||||
virtual SINode* getChild(int _index) const { return NULL; };
|
||||
virtual bool setChild(SINode* _child, int _index) { return true; };
|
||||
virtual bool addChild(SINode* _child, int _index) { return true; };
|
||||
|
@ -87,6 +94,8 @@ public:
|
|||
virtual bool subValue(int _index) { return true; };
|
||||
virtual void setPrev(SINode* _prev) {};
|
||||
virtual void setNext(SINode* _next) {};
|
||||
|
||||
//NOTICE: pure-virtual, must to be implemented again in the sub-class
|
||||
virtual void Virtual() = 0;
|
||||
virtual void Normal() = 0;
|
||||
virtual unsigned getSize() const = 0; //return all memory owned
|
||||
|
@ -111,4 +120,4 @@ public:
|
|||
*to release the whole(pointer is invalid and rebuild problem)
|
||||
*/
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
2
NOTES.md
2
NOTES.md
|
@ -88,7 +88,7 @@ http://blog.csdn.net/infoworld/article/details/8670951
|
|||
要在单机支持到10亿triple,最坏情况下最多有20亿entity和20亿literal,目前的编号方式是不行的(int扩展为unsigned)
|
||||
最好在单机100G内存上支持起freebase(2.5B triples)这个规模的数据集,就像jena和virtuoso一样,慢不要紧
|
||||
|
||||
在type分支中,sub2id_pre2id_obj2id函数中,每次double增长可能无法充分利用unsigned空间,只能利用到2560000000,超过后最好直接设置为最大
|
||||
type分支中query过程可能还有问题,需要修改Query/里面的类型
|
||||
去掉tree里面的复制,另外kvstore里面的复制可以考虑通过一个或若干个bstr buffer来实现,避免每次都重新new,但这会影响多线程程序
|
||||
而且在kvstore中往往需要对原始list做一些额外处理
|
||||
---
|
||||
|
|
|
@ -18,6 +18,7 @@ class Bstr
|
|||
{
|
||||
private:
|
||||
char* str; //pointers consume 8 byte in 64-bit system
|
||||
//TODO: the length maybe not needed
|
||||
unsigned length;
|
||||
|
||||
public:
|
||||
|
|
|
@ -10,55 +10,64 @@
|
|||
|
||||
using namespace std;
|
||||
|
||||
bool
|
||||
VList::isLongList(unsigned _len)
|
||||
{
|
||||
return _len > VList::LENGTH_BORDER;
|
||||
}
|
||||
|
||||
VList::VList()
|
||||
{ //not use ../logs/, notice the location of program
|
||||
cur_block_num = SET_BLOCK_NUM;
|
||||
filepath = "";
|
||||
freelist = NULL;
|
||||
treefp = NULL;
|
||||
minheap = NULL;
|
||||
max_buffer_size = Util::MAX_BUFFER_SIZE;
|
||||
heap_size = max_buffer_size / IVNode::INTL_SIZE;
|
||||
freemem = max_buffer_size;
|
||||
}
|
||||
|
||||
VList::VList(string& _filepath, unsigned long long _buffer_size)
|
||||
VList::VList(string& _filepath, string& _mode, unsigned long long _buffer_size)
|
||||
{
|
||||
cur_block_num = SET_BLOCK_NUM; //initialize
|
||||
this->filepath = _filepath;
|
||||
|
||||
if (_mode == string("build"))
|
||||
treefp = fopen(_filepath.c_str(), "w+b");
|
||||
valfp = fopen(_filepath.c_str(), "w+b");
|
||||
else if (_mode == string("open"))
|
||||
treefp = fopen(_filepath.c_str(), "r+b");
|
||||
valfp = fopen(_filepath.c_str(), "r+b");
|
||||
else
|
||||
{
|
||||
print(string("error in IVStorage: Invalid mode ") + _mode);
|
||||
cout<<string("error in VList: Invalid mode ") + _mode<<endl;
|
||||
return;
|
||||
}
|
||||
if (treefp == NULL)
|
||||
if (valfp == NULL)
|
||||
{
|
||||
print(string("error in IVStorage: Open error ") + _filepath);
|
||||
cout<<string("error in VList: Open error ") + _filepath<<endl;
|
||||
return;
|
||||
}
|
||||
this->treeheight = _height; //originally set to 0
|
||||
|
||||
this->max_buffer_size = _buffer_size;
|
||||
this->heap_size = this->max_buffer_size / IVNode::INTL_SIZE;
|
||||
this->freemem = this->max_buffer_size;
|
||||
this->freelist = new BlockInfo; //null-head
|
||||
|
||||
//TODO: read/write by char is too slow, how about read all and deal , then clear?
|
||||
//
|
||||
//BETTER: hwo about assign IDs in a dynamic way?
|
||||
//limitID freelist
|
||||
//QUETY: can free id list consume very large memory??
|
||||
|
||||
unsigned i, j, k; //j = (SuperNum-1)*BLOCK_SIZE
|
||||
BlockInfo* bp;
|
||||
if (_mode == "build")
|
||||
{ //write basic information
|
||||
i = 0;
|
||||
fwrite(&i, sizeof(unsigned), 1, this->treefp); //height
|
||||
fwrite(&i, sizeof(unsigned), 1, this->treefp); //rootnum
|
||||
fwrite(&cur_block_num, sizeof(unsigned), 1, this->treefp); //current block num
|
||||
fseek(this->treefp, BLOCK_SIZE, SEEK_SET);
|
||||
fwrite(&cur_block_num, sizeof(unsigned), 1, this->valfp); //current block num
|
||||
//NOTICE: use a 1M block for a unsigned?? not ok!
|
||||
fseek(this->valfp, BLOCK_SIZE, SEEK_SET);
|
||||
bp = this->freelist;
|
||||
j = cur_block_num / 8;
|
||||
for (i = 0; i < j; ++i)
|
||||
{
|
||||
fputc(0, this->treefp);
|
||||
fputc(0, this->valfp);
|
||||
for (k = 0; k < 8; ++k)
|
||||
{
|
||||
bp->next = new BlockInfo(i * 8 + k + 1, NULL);
|
||||
|
@ -69,17 +78,14 @@ VList::VList(string& _filepath, unsigned long long _buffer_size)
|
|||
else //_mode == "open"
|
||||
{
|
||||
//read basic information
|
||||
int rootnum;
|
||||
char c;
|
||||
fread(this->treeheight, sizeof(unsigned), 1, this->treefp);
|
||||
fread(&rootnum, sizeof(unsigned), 1, this->treefp);
|
||||
fread(&cur_block_num, sizeof(unsigned), 1, this->treefp);
|
||||
fseek(this->treefp, BLOCK_SIZE, SEEK_SET);
|
||||
fread(&cur_block_num, sizeof(unsigned), 1, this->valfp);
|
||||
fseek(this->valfp, BLOCK_SIZE, SEEK_SET);
|
||||
bp = this->freelist;
|
||||
j = cur_block_num / 8;
|
||||
for (i = 0; i < j; ++i)
|
||||
{
|
||||
c = fgetc(treefp);
|
||||
c = fgetc(valfp);
|
||||
for (k = 0; k < 8; ++k)
|
||||
{
|
||||
if ((c & (1 << k)) == 0)
|
||||
|
@ -89,14 +95,13 @@ VList::VList(string& _filepath, unsigned long long _buffer_size)
|
|||
}
|
||||
}
|
||||
}
|
||||
fseek(treefp, Address(rootnum), SEEK_SET);
|
||||
//treefp is now ahead of root-block
|
||||
}
|
||||
this->minheap = new IVHeap(this->heap_size);
|
||||
|
||||
//NOTICE: we do not need to alloc the blocks for free block id list, AllocBlock is only for later blocks
|
||||
}
|
||||
|
||||
long //8-byte in 64-bit machine
|
||||
IVStorage::Address(unsigned _blocknum) const //BETTER: inline function
|
||||
VList::Address(unsigned _blocknum) const //BETTER: inline function
|
||||
{
|
||||
if (_blocknum == 0)
|
||||
return 0;
|
||||
|
@ -110,13 +115,13 @@ IVStorage::Address(unsigned _blocknum) const //BETTER: inline function
|
|||
}
|
||||
|
||||
unsigned
|
||||
IVStorage::Blocknum(long address) const
|
||||
VList::Blocknum(long address) const
|
||||
{
|
||||
return (address / BLOCK_SIZE) + 1 - this->SuperNum;
|
||||
}
|
||||
|
||||
unsigned
|
||||
IVStorage::AllocBlock()
|
||||
VList::AllocBlock()
|
||||
{
|
||||
BlockInfo* p = this->freelist->next;
|
||||
if (p == NULL)
|
||||
|
@ -131,11 +136,12 @@ IVStorage::AllocBlock()
|
|||
unsigned t = p->num;
|
||||
this->freelist->next = p->next;
|
||||
delete p;
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
void
|
||||
IVStorage::FreeBlock(unsigned _blocknum)
|
||||
VList::FreeBlock(unsigned _blocknum)
|
||||
{ //QUERY: head-sub and tail-add will be better?
|
||||
BlockInfo* bp = new BlockInfo(_blocknum, this->freelist->next);
|
||||
this->freelist->next = bp;
|
||||
|
@ -145,93 +151,96 @@ IVStorage::FreeBlock(unsigned _blocknum)
|
|||
//a string may acrossseveral blocks
|
||||
|
||||
void
|
||||
IVStorage::ReadAlign(unsigned* _next)
|
||||
VList::ReadAlign(unsigned* _next)
|
||||
{
|
||||
if (ftell(treefp) % BLOCK_SIZE == 0)
|
||||
if (ftell(valfp) % BLOCK_SIZE == 0)
|
||||
{
|
||||
fseek(treefp, Address(*_next), SEEK_SET);
|
||||
fread(_next, sizeof(unsigned), 1, treefp);
|
||||
fseek(valfp, Address(*_next), SEEK_SET);
|
||||
fread(_next, sizeof(unsigned), 1, valfp);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
IVStorage::WriteAlign(unsigned* _curnum, bool& _SpecialBlock)
|
||||
VList::WriteAlign(unsigned* _curnum, bool& _SpecialBlock)
|
||||
{
|
||||
if (ftell(treefp) % BLOCK_SIZE == 0)
|
||||
if (ftell(valfp) % BLOCK_SIZE == 0)
|
||||
{
|
||||
unsigned blocknum = this->AllocBlock();
|
||||
fseek(treefp, Address(*_curnum), SEEK_SET);
|
||||
fseek(valfp, Address(*_curnum), SEEK_SET);
|
||||
if (_SpecialBlock)
|
||||
{
|
||||
fseek(treefp, 4, SEEK_CUR);
|
||||
fseek(valfp, 4, SEEK_CUR);
|
||||
_SpecialBlock = false;
|
||||
}
|
||||
fwrite(&blocknum, sizeof(unsigned), 1, treefp);
|
||||
fseek(treefp, Address(blocknum) + 4, SEEK_SET);
|
||||
fwrite(&blocknum, sizeof(unsigned), 1, valfp);
|
||||
fseek(valfp, Address(blocknum) + 4, SEEK_SET);
|
||||
*_curnum = blocknum;
|
||||
}
|
||||
}
|
||||
|
||||
//TODO: check , read/write a long list, across several blocks
|
||||
//not use buffer, read/write on need, update at once, so no need to write back at last
|
||||
|
||||
//TODO: still use Bstr?? how can we get the next pointer?? use NULL to init
|
||||
//NOTICE: the next is placed at the begin of a block
|
||||
bool
|
||||
IVStorage::readBstr(Bstr* _bp, unsigned* _next)
|
||||
VList::readBstr(Bstr* _bp, unsigned* _next)
|
||||
{
|
||||
//long address;
|
||||
unsigned len, i, j;
|
||||
fread(&len, sizeof(unsigned), 1, this->treefp);
|
||||
fread(&len, sizeof(unsigned), 1, this->valfp);
|
||||
this->ReadAlign(_next);
|
||||
//this->request(len);
|
||||
char* s = (char*)malloc(len);
|
||||
_bp->setLen(len);
|
||||
for (i = 0; i + 4 < len; i += 4)
|
||||
{
|
||||
fread(s + i, sizeof(char), 4, treefp);
|
||||
fread(s + i, sizeof(char), 4, valfp);
|
||||
this->ReadAlign(_next);
|
||||
}
|
||||
while (i < len)
|
||||
{
|
||||
fread(s + i, sizeof(char), 1, treefp); //BETTER
|
||||
fread(s + i, sizeof(char), 1, valfp); //BETTER
|
||||
i++;
|
||||
}
|
||||
j = len % 4;
|
||||
if (j > 0)
|
||||
j = 4 - j;
|
||||
fseek(treefp, j, SEEK_CUR);
|
||||
fseek(valfp, j, SEEK_CUR);
|
||||
this->ReadAlign(_next);
|
||||
_bp->setStr(s);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVStorage::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock)
|
||||
VList::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock)
|
||||
{
|
||||
unsigned i, j, len = _bp->getLen();
|
||||
fwrite(&len, sizeof(unsigned), 1, treefp);
|
||||
fwrite(&len, sizeof(unsigned), 1, valfp);
|
||||
this->WriteAlign(_curnum, _SpecialBlock);
|
||||
char* s = _bp->getStr();
|
||||
for (i = 0; i + 4 < len; i += 4)
|
||||
{
|
||||
fwrite(s + i, sizeof(char), 4, treefp);
|
||||
fwrite(s + i, sizeof(char), 4, valfp);
|
||||
this->WriteAlign(_curnum, _SpecialBlock);
|
||||
}
|
||||
while (i < len)
|
||||
{
|
||||
fwrite(s + i, sizeof(char), 1, treefp);
|
||||
fwrite(s + i, sizeof(char), 1, valfp);
|
||||
i++;
|
||||
}
|
||||
j = len % 4;
|
||||
if (j > 0)
|
||||
j = 4 - j;
|
||||
fseek(treefp, j, SEEK_CUR);
|
||||
fseek(valfp, j, SEEK_CUR);
|
||||
this->WriteAlign(_curnum, _SpecialBlock);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
VList::~VList()
|
||||
{
|
||||
//release heap and freelist...
|
||||
#ifdef DEBUG_KVSTORE
|
||||
printf("now to release the kvstore!\n");
|
||||
#endif
|
||||
BlockInfo* bp = this->freelist;
|
||||
BlockInfo* next;
|
||||
while (bp != NULL)
|
||||
|
@ -240,18 +249,6 @@ VList::~VList()
|
|||
delete bp;
|
||||
bp = next;
|
||||
}
|
||||
#ifdef DEBUG_KVSTORE
|
||||
printf("already empty the freelist!\n");
|
||||
#endif
|
||||
delete this->minheap;
|
||||
#ifdef DEBUG_KVSTORE
|
||||
printf("already empty the buffer heap!\n");
|
||||
#endif
|
||||
fclose(this->treefp);
|
||||
//#ifdef DEBUG_KVSTORE
|
||||
//NOTICE:there is more than one tree
|
||||
//fclose(Util::debug_kvstore); //NULL is ok!
|
||||
//Util::debug_kvstore = NULL;
|
||||
//#endif
|
||||
fclose(this->valfp);
|
||||
}
|
||||
|
|
@ -6,11 +6,11 @@
|
|||
# Description:
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _KVSTORE_IVTREE_STORAGE_VLIST_H
|
||||
#define _KVSTORE_IVTREE_STORAGE_VLIST_H
|
||||
#ifndef _UTIL_VLIST_H
|
||||
#define _UTIL_VLIST_H
|
||||
|
||||
#include "../../../Util/Util.h"
|
||||
#include "../../../Util/Bstr.h"
|
||||
#include "Util.h"
|
||||
#include "Bstr.h"
|
||||
|
||||
//TODO: not keep long list in memory, read each time
|
||||
//but when can you free the long list(kvstore should release it after parsing)
|
||||
|
@ -22,15 +22,21 @@
|
|||
|
||||
//BETTER: use two kind of blocks in two files, like 1M and 1G (split the block num into two parts)
|
||||
|
||||
//STRUCT: a long list real-address is the block ID in file2(only for long value lists, a list across several 1M blocks)
|
||||
//tree-value Bstr: unsigned=the real address char*=NULL
|
||||
//in disk:
|
||||
//file1 is tree file, the long list is represented as: 0 real-address
|
||||
//NOTICE: long list is not kept in mmeory for cache, it is read/update each time on need!
|
||||
|
||||
class VList
|
||||
{
|
||||
public:
|
||||
//NOTICE:the border is 10^6, but the block is larger, 1M
|
||||
static const unsigned LENGTH_BORDER = 1000000;
|
||||
static const unsigned BLOCK_SIZE = 1 << 20; //fixed size of disk-block
|
||||
static const unsigned MAX_BLOCK_NUM = 1 << 16; //max block-num
|
||||
static const unsigned MAX_BLOCK_NUM = 1 << 23; //max block-num
|
||||
//below two constants: must can be exactly divided by 8
|
||||
static const unsigned SET_BLOCK_NUM = 1 << 8; //initial blocks num
|
||||
static const unsigned SET_BLOCK_NUM = 1 << 2; //initial blocks num
|
||||
static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc
|
||||
static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1;
|
||||
|
||||
|
@ -59,12 +65,14 @@ private:
|
|||
|
||||
public:
|
||||
VList();
|
||||
VList(std::string& _filepath, unsigned long long _buffer_size);//create a fixed-size file or open an existence
|
||||
VList(std::string& _filepath, std::string& _mode, unsigned long long _buffer_size);//create a fixed-size file or open an existence
|
||||
bool readBstr(Bstr* _bp, unsigned* _next);
|
||||
bool writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock);
|
||||
bool readValue(unsigned _block_num);
|
||||
bool writeValue(const Bstr* _bp);
|
||||
~VList();
|
||||
|
||||
static bool isLongList(unsigned _len);
|
||||
};
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue