refactor: add value list for IVTree; lower the copy cost of string

not done, waiting to be debugged

by zengli, long list must be cleared after got, no changes besides KVstore
This commit is contained in:
bookug 2017-03-31 00:23:16 +08:00
parent 939e84c8bf
commit ceff3544ae
15 changed files with 306 additions and 124 deletions

View File

@ -115,13 +115,13 @@ IVTree::prepare(IVNode* _np)
}
bool
IVTree::search(int _key, char*& _str, int& _len)
IVTree::search(unsigned _key, char*& _str, unsigned& _len)
{
if (_key < 0)
{
printf("error in IVTree-search: empty string\n");
return false;
}
//if (_key < 0)
//{
//printf("error in IVTree-search: empty string\n");
//return false;
//}
this->request = 0;
int store;
@ -142,13 +142,13 @@ IVTree::search(int _key, char*& _str, int& _len)
}
bool
IVTree::insert(int _key, char* _str, unsigned _len)
IVTree::insert(unsigned _key, char* _str, unsigned _len)
{
if (_key < 0)
{
printf("error in IVTree-insert: empty string\n");
return false;
}
//if (_key < 0)
//{
//printf("error in IVTree-insert: empty string\n");
//return false;
//}
//this->CopyToTransfer(_str, _len, 2);
//const Bstr* val = &(this->transfer[2]);
@ -250,13 +250,13 @@ IVTree::insert(int _key, char* _str, unsigned _len)
}
bool
IVTree::modify(int _key, char* _str, unsigned _len)
IVTree::modify(unsigned _key, char* _str, unsigned _len)
{
if (_key < 0)
{
printf("error in IVTree-modify: empty string\n");
return false;
}
//if (_key < 0)
//{
//printf("error in IVTree-modify: empty string\n");
//return false;
//}
//this->CopyToTransfer(_str, _len, 2); //not check value
//const Bstr* val = &(this->transfer[2]);
@ -291,7 +291,7 @@ IVTree::modify(int _key, char* _str, unsigned _len)
//this function is useful for search and modify, and range-query
IVNode* //return the first key's position that >= *_key
IVTree::find(int _key, int* _store, bool ifmodify)
IVTree::find(unsigned _key, int* _store, bool ifmodify)
{ //to assign value for this->bstr, function shouldn't be const!
if (this->root == NULL)
return NULL; //IVTree Is Empty
@ -334,13 +334,13 @@ IVTree::find(unsigned _len, const char* _str, int* store) const
*/
bool
IVTree::remove(int _key)
IVTree::remove(unsigned _key)
{
if (_key < 0)
{
printf("error in IVTree-remove: empty string\n");
return false;
}
//if (_key < 0)
//{
//printf("error in IVTree-remove: empty string\n");
//return false;
//}
this->request = 0;
IVNode* ret;
@ -468,7 +468,7 @@ IVTree::resetStream()
//TODO: change to using value list, getValue() maybe not get real long list
bool //special case: not exist, one-edge-case
IVTree::range_query(int _key1, int _key2)
IVTree::range_query(unsigned _key1, unsigned _key2)
{ //the range is: *_key1 <= x < *_key2
//if(_key1 <0 && _key2 <0)
//return false;

View File

@ -29,7 +29,7 @@
class IVTree
{
protected:
unsigned int height; //0 indicates an empty tree
unsigned height; //0 indicates an empty tree
IVNode* root;
IVNode* leaves_head; //the head of LeafNode-list
IVNode* leaves_tail; //the tail of LeafNode-list
@ -74,19 +74,19 @@ protected:
public:
IVTree(); //always need to initial transfer
IVTree(std::string _storepath, std::string _filename, std::string _mode, unsigned long long _buffer_size);
unsigned int getHeight() const;
unsigned getHeight() const;
void setHeight(unsigned _h);
IVNode* getRoot() const;
//void setRoot(Node* _root);
//insert, search, remove, set
bool search(int _key, char*& _str, int& _len);
bool insert(int _key, char* _str, unsigned _len);
bool modify(int _key, char* _str, unsigned _len);
IVNode* find(int _key, int* store, bool ifmodify);
bool remove(int _key);
bool search(unsigned _key, char*& _str, unsigned& _len);
bool insert(unsigned _key, char* _str, unsigned _len);
bool modify(unsigned _key, char* _str, unsigned _len);
IVNode* find(unsigned _key, int* store, bool ifmodify);
bool remove(unsigned _key);
const Bstr* getRangeValue();
void resetStream();
bool range_query(int _key1, int _key2);
bool range_query(unsigned _key1, unsigned _key2);
bool save();
~IVTree();
void print(std::string s); //DEBUG(print the tree)

View File

@ -89,25 +89,74 @@ IVLeafNode::getValue(int _index) const
return this->values + _index;
}
//TODO!!!
bool
IVLeafNode::getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const
IVLeafNode::setValue(const Bstr* _value, int _index, bool _ifcopy)
{
//TODO: read long list
return true;
}
bool
IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy)
{
//TODO: consider the long list, how to cancel and reset
int num = this->getNum();
if (_index < 0 || _index >= num)
{
print(string("error in setValue: Invalid index ") + Util::int2string(_index));
return false;
}
this->values[_index].release(); //NOTICE: only used in modify
if(_ifcopy)
{
this->values[_index].copy(_value);
}
else
{
this->values[_index] = *_value;
}
return true;
}
bool
IVLeafNode::getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
//print(string("error in getValue: Invalid index ") + Util::int2string(_index));
return NULL;
}
//read long list
if(this->values[_index].isBstrLongList())
{
unsigned block_num = this->values[_index].getLen();
_vlist->readValue(block_num, _str, _len);
}
else
{
_str = this->values[_index].getStr();
_len = this->values[_index].getLen();
}
return true;
}
bool
IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy)
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
print(string("error in setValue: Invalid index ") + Util::int2string(_index));
return false;
}
if(this->values[_index].isBstrLongList())
{
unsigned block_num = this->values[_index].getLen();
_vlist->removeValue(block_num);
}
else
{
this->values[_index].release(); //NOTICE: only used in modify
}
//DEBUG: we do not need to copy here
//we just need to ensure that the pointer's memory is not released
@ -119,8 +168,17 @@ IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool
//else
//{
//this->values[_index] = *_value;
if(VList::isLongList(_len))
{
unsigned block_num = _vlist->writeValue(_str, _len);
this->values[_index].setStr(NULL);
this->values[_index].setLen(block_num);
}
else
{
this->values[_index].setStr(_str);
this->values[_index].setLen(_len);
}
//}
return true;
}
@ -128,23 +186,34 @@ IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool
bool
IVLeafNode::addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy)
{
//TODO:if the list is too large
int num = this->getNum();
if (_index < 0 || _index > num)
{
print(string("error in addValue: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
for (i = num - 1; i >= _index; --i)
for (int i = num - 1; i >= _index; --i)
this->values[i + 1] = this->values[i];
//if (ifcopy)
//this->values[_index].copy(_value);
//else
//this->values[_index] = *_value;
this->values[_index].setStr(_str);
this->values[_index].setLen(_len);
if(VList::isLongList(_len))
{
unsigned block_num = _vlist->writeValue(_str, _len);
this->values[_index].setStr(NULL);
this->values[_index].setLen(block_num);
}
else
{
this->values[_index].setStr(_str);
this->values[_index].setLen(_len);
}
//this->values[_index].setStr(_str);
//this->values[_index].setLen(_len);
return true;
}
@ -152,7 +221,6 @@ IVLeafNode::addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool
bool
IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel)
{
//TODO: if is to sub long list
int num = this->getNum();
if (_index < 0 || _index >= num)
{
@ -160,10 +228,20 @@ IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel)
return false;
}
int i;
if (ifdel)
values[_index].release();
for (i = _index; i < num - 1; ++i)
if(this->values[_index].isBstrLongList())
{
unsigned block_num = this->values[_index].getLen();
_vlist->removeValue(block_num);
}
else
{
if (ifdel)
{
values[_index].release();
}
}
for (int i = _index; i < num - 1; ++i)
this->values[i] = this->values[i + 1];
return true;

View File

@ -28,6 +28,8 @@ public:
IVNode* getPrev() const;
IVNode* getNext() const;
const Bstr* getValue(int _index) const;
bool setValue(const Bstr* _value, int _index, bool _ifcopy=false);
bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const;
bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false);

View File

@ -83,6 +83,7 @@ public:
virtual IVNode* getNext() const { return NULL; };
virtual const Bstr* getValue(int _index) const { return NULL; };
virtual bool setValue(const Bstr* _value, int _index, bool _ifcopy=false) { return true; };
virtual bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const { return NULL; };
virtual bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false) { return true; };

View File

@ -348,6 +348,11 @@ IVStorage::createNode(IVNode*& _np) //cretae virtual nodes, not in-mem
return true;
}
//BETTER: Does SpecialBlock really needed? why can't we place next before flag??
//
//NOTICE: root num begins from 1, if root num is 0, then it is invalid, i.e. the tree is NULL
//(and ftell(root address) will be 0 either)
bool
IVStorage::writeNode(IVNode* _np)
{
@ -446,7 +451,10 @@ IVStorage::readBstr(Bstr* _bp, unsigned* _next)
}
//this->request(len);
char* s = (char*)malloc(len);
//NOTICE: we use new for all, consistent with Bstr and KVstore
//char* s = (char*)malloc(len);
char* s = new char[len];
_bp->setLen(len);
for (i = 0; i + 4 < len; i += 4)
{

View File

@ -99,8 +99,8 @@ int KVstore::getEntityDegree(int _entity_id) const {
int KVstore::getEntityInDegree(int _entity_id) const {
//cout << "In getEntityInDegree " << _entity_id << endl;
int* _tmp = NULL;
int _len = 0;
unsigned* _tmp = NULL;
unsigned _len = 0;
bool _get = this->getValueByKey(this->objID2values, _entity_id, (char*&)_tmp, _len);
if (!_get) {
return 0;
@ -110,8 +110,8 @@ int KVstore::getEntityInDegree(int _entity_id) const {
int KVstore::getEntityOutDegree(int _entity_id) const {
//cout << "In getEntityOutDegree " << _entity_id << endl;
int* _tmp = NULL;
int _len = 0;
unsigned* _tmp = NULL;
unsigned _len = 0;
bool _get = this->getValueByKey(this->subID2values, _entity_id, (char*&)_tmp, _len);
if (!_get) {
return 0;
@ -121,8 +121,8 @@ int KVstore::getEntityOutDegree(int _entity_id) const {
int KVstore::getLiteralDegree(int _literal_id) const {
//cout << "In getLiteralDegree " << _literal_id << endl;
int* _tmp = NULL;
int _len = 0;
unsigned* _tmp = NULL;
unsigned _len = 0;
bool _get = this->getValueByKey(this->objID2values, _literal_id, (char*&)_tmp, _len);
if (!_get) {
return 0;
@ -132,8 +132,8 @@ int KVstore::getLiteralDegree(int _literal_id) const {
int KVstore::getPredicateDegree(int _predicate_id) const {
//cout << "In getPredicate Degree " << _predicate_id << endl;
int* _tmp = NULL;
int _len = 0;
unsigned* _tmp = NULL;
unsigned _len = 0;
bool _get = this->getValueByKey(this->preID2values, _predicate_id, (char*&)_tmp, _len);
if (!_get) {
return 0;
@ -143,8 +143,10 @@ int KVstore::getPredicateDegree(int _predicate_id) const {
int KVstore::getSubjectPredicateDegree(int _subid, int _preid) const {
//cout << "In getSubjectPredicateDegree " << _subid << ' ' << _preid << endl;
//TODO: use unsigned
int* _tmp = NULL;
int _len = 0;
unsigned _len = 0;
bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len);
if (!_get) {
return 0;
@ -166,8 +168,10 @@ int KVstore::getSubjectPredicateDegree(int _subid, int _preid) const {
int KVstore::getObjectPredicateDegree(int _objid, int _preid) const {
//cout << "In getObjectPredicateDegree " << _objid << _preid << endl;
//TODO: use unsigned
int* _tmp = NULL;
int _len = 0;
unsigned _len = 0;
bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len);
if (!_get) {
return 0;
@ -352,8 +356,9 @@ bool KVstore::updateTupleslist_remove(int _sub_id, int _pre_id, int _obj_id) {
}
bool KVstore::updateInsert_s2values(int _sub_id, int _pre_id, int _obj_id) {
//TODO: use unsigned
int* _tmp = NULL;
int _len = 0;
unsigned _len = 0;
bool _get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len);
bool _is_entity = Util::is_entity_ele(_obj_id);
@ -453,8 +458,9 @@ bool KVstore::updateInsert_s2values(int _sub_id, int _pre_id, int _obj_id) {
}
bool KVstore::updateRemove_s2values(int _sub_id, int _pre_id, int _obj_id) {
//TODO: use unsigned
int* _tmp = NULL;
int _len = 0;
unsigned _len = 0;
bool _get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len);
bool _is_entity = Util::is_entity_ele(_obj_id);
@ -564,8 +570,9 @@ bool KVstore::updateRemove_s2values(int _subid, const std::vector<int>& _pidoidl
}
bool KVstore::updateInsert_o2values(int _sub_id, int _pre_id, int _obj_id) {
//TODO: use unsigned
int* _tmp = NULL;
int _len = 0;
unsigned _len = 0;
bool _get = this->getValueByKey(this->objID2values, _obj_id, (char*&)_tmp, _len);
//objID doesn't exist
@ -659,8 +666,9 @@ bool KVstore::updateInsert_o2values(int _sub_id, int _pre_id, int _obj_id) {
}
bool KVstore::updateRemove_o2values(int _sub_id, int _pre_id, int _obj_id) {
//TODO: use unsigned
int* _tmp = NULL;
int _len = 0;
unsigned _len = 0;
bool _get = this->getValueByKey(this->objID2values, _obj_id, (char*&)_tmp, _len);
if (!_get) {
@ -763,8 +771,9 @@ bool KVstore::updateRemove_o2values(int _objid, const std::vector<int>& _pidsidl
}
bool KVstore::updateInsert_p2values(int _sub_id, int _pre_id, int _obj_id) {
//TODO: use unsigned
int* _tmp = NULL;
int _len = 0;
unsigned _len = 0;
bool _get = this->getValueByKey(this->preID2values, _pre_id, (char*&)_tmp, _len);
//preid doesn't exist
@ -804,8 +813,9 @@ bool KVstore::updateInsert_p2values(int _sub_id, int _pre_id, int _obj_id) {
}
bool KVstore::updateRemove_p2values(int _sub_id, int _pre_id, int _obj_id) {
//TODO: use unsigned
int* _tmp = NULL;
int _len = 0;
unsigned _len = 0;
bool _get = this->getValueByKey(this->preID2values, _pre_id, (char*&)_tmp, _len);
if (!_get) {
@ -1405,8 +1415,9 @@ KVstore::getpreIDlistBysubID(int _subid, int*& _preidlist, int& _list_len, bool
_list_len = 0;
return false;
}
//TODO: use unsigned
int* _tmp = NULL;
int _len = 0;
unsigned _len = 0;
bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len);
if (!_get)
@ -1442,8 +1453,9 @@ KVstore::getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len, bool
_list_len = 0;
return false;
}
//TODO: use unsigned
int* _tmp = NULL;
int _len = 0;
unsigned _len = 0;
bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len);
if (!_get)
{
@ -1481,8 +1493,9 @@ KVstore::getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist, int&
return false;
}
//TODO: use unsigned
int* _tmp = NULL;
int _len = 0;
unsigned _len = 0;
bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len);
if (!_get) {
_objidlist = NULL;
@ -1531,8 +1544,9 @@ KVstore::getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _list
return false;
}
//TODO: use unsigned
int* _tmp = NULL;
int _len = 0;
unsigned _len = 0;
bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len);
if (!_get) {
_preid_objidlist = NULL;
@ -1682,8 +1696,10 @@ bool
KVstore::getpreIDlistByobjID(int _objid, int*& _preidlist, int& _list_len, bool _no_duplicate) const
{
//cout << "In getpreIDlistByobjID " << _objid << endl;
//TODO: use unsigned
int* _tmp = NULL;
int _len = 0;
unsigned _len = 0;
bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len);
if (!_get) {
_preidlist = NULL;
@ -1711,8 +1727,10 @@ bool
KVstore::getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len, bool _no_duplicate) const
{
//cout << "In getsubIDlistByobjID " << _objid << endl;
//TODO: use unsigned
int* _tmp = NULL;
int _len = 0;
unsigned _len = 0;
bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len);
if (!_get) {
_subidlist = NULL;
@ -1743,8 +1761,9 @@ bool
KVstore::getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, int& _list_len, bool _no_duplicate) const
{
//cout << "In getsubIDlistByobjIDpreID " << _objid << ' ' << _preid << endl;
//TODO: use unsigned
int* _tmp = NULL;
int _len = 0;
unsigned _len = 0;
bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len);
if (!_get) {
_subidlist = NULL;
@ -1786,8 +1805,9 @@ bool
KVstore::getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _list_len, bool _no_duplicate) const
{
//cout << "In getpreIDsubIDlistByobjID " << _objid << endl;
//TODO: use unsigned
int* _tmp = NULL;
int _len = 0;
unsigned _len = 0;
bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len);
if (!_get) {
_preid_subidlist = NULL;
@ -1915,8 +1935,9 @@ bool
KVstore::getsubIDlistBypreID(int _preid, int*& _subidlist, int& _list_len, bool _no_duplicate) const
{
//cout << "In getsubIDlistBypreID " << _preid << endl;
//TODO: use unsigned
int* _tmp = NULL;
int _len = 0;
unsigned _len = 0;
bool _get = this->getValueByKey(this->preID2values, _preid, (char*&)_tmp, _len);
if (!_get) {
_subidlist = NULL;
@ -1946,8 +1967,9 @@ bool
KVstore::getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len, bool _no_duplicate) const
{
//cout << "In getobjIDlistBypreID " << _preid << endl;
//TODO: use unsigned
int* _tmp = NULL;
int _len = 0;
unsigned _len = 0;
bool _get = this->getValueByKey(this->preID2values, _preid, (char*&)_tmp, _len);
if (!_get) {
_objidlist = NULL;
@ -1978,8 +2000,9 @@ bool
KVstore::getsubIDobjIDlistBypreID(int _preid, int*& _subid_objidlist, int& _list_len, bool _no_duplicate) const
{
//cout << "In getsubIDobjIDlistBypreID " << _preid << endl;
//TODO: use unsigned
int* _tmp = NULL;
int _len = 0;
unsigned _len = 0;
bool _get = this->getValueByKey(this->preID2values, _preid, (char*&)_tmp, _len);
if (!_get) {
_subid_objidlist = NULL;
@ -2032,8 +2055,9 @@ KVstore::getpreIDlistBysubIDobjID(int _subid, int _objid, int*& _preidlist, int&
return false;
}
//TODO: use unsigned
int* _tmp = NULL;
int _len = 0;
unsigned _len = 0;
this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len);
_list_len = len;
int _result = 0;
@ -2199,7 +2223,7 @@ KVstore::addValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen)
}
bool
KVstore::addValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen)
KVstore::addValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen)
{
return _p_btree->insert(_key, _val, _vlen);
}
@ -2217,7 +2241,7 @@ KVstore::setValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen)
}
bool
KVstore::setValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen)
KVstore::setValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen)
{
return _p_btree->modify(_key, _val, _vlen);
}
@ -2235,7 +2259,7 @@ KVstore::getValueByKey(ISTree* _p_btree, int _key, char*& _val, int& _vlen) cons
}
bool
KVstore::getValueByKey(IVTree* _p_btree, int _key, char*& _val, int& _vlen) const
KVstore::getValueByKey(IVTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const
{
return _p_btree->search(_key, _val, _vlen);
}

View File

@ -19,6 +19,11 @@
//QUERY: but to count the length each time maybe very costly?
//No, because triple num is stored in char* now!!!! we do not need to save it again
//TODO: entity_border in s2values list is not needed!!! not waste memory here
//
//QUERY: but to implement vlist, we need a unsigned flag
//What is more, we need to store the string in disk, how can we store it if without the length?
//unsigned type stored as chars, maybe will have '\0'
//In memory, we do not know when the oidlist ends if without the original length (butthe triple num will answer this!)
class KVstore
{
@ -197,15 +202,15 @@ private:
bool addValueByKey(SITree* _p_btree, char* _key, int _klen, int _val);
bool addValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen);
bool addValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen);
bool addValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen);
bool setValueByKey(SITree* _p_btree, char* _key, int _klen, int _val);
bool setValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen);
bool setValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen);
bool setValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen);
bool getValueByKey(SITree* _p_btree, const char* _key, int _klen, int* _val) const;
bool getValueByKey(ISTree* _p_btree, int _key, char*& _val, int& _vlen) const;
bool getValueByKey(IVTree* _p_btree, int _key, char*& _val, int& _vlen) const;
bool getValueByKey(IVTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const;
int getIDByStr(SITree* _p_btree, const char* _key, int _klen) const;

View File

@ -1,5 +1,5 @@
//headers wrapper for all kinds of BPlusTree
#include "IVTree/IVTree.h"
#include "ISTree/ISTree.h"
#include "SITree/SITree.h"
#include "IVTree/IVTree.h"

View File

@ -88,9 +88,8 @@ http://blog.csdn.net/infoworld/article/details/8670951
要在单机支持到10亿triple最坏情况下最多有20亿entity和20亿literal目前的编号方式是不行的(int扩展为unsigned)
最好在单机100G内存上支持起freebase(2.5B triples)这个规模的数据集就像jena和virtuoso一样慢不要紧
type分支中query过程可能还有问题需要修改Query/里面的类型
去掉tree里面的复制另外kvstore里面的复制可以考虑通过一个或若干个bstr buffer来实现避免每次都重新new但这会影响多线程程序
而且在kvstore中往往需要对原始list做一些额外处理
type分支中query过程可能还有问题需要修改Query/里面的类型另外stringindex中也要修改分界线已经是20亿且非法不再是-1
vstree在build和query时可以用不同大小的缓存来加速build过程
---
UBSTR: 类型bstr的length问题也需要解决 如果把类型直接改成long long空间开销一下子就上升了一倍
解决方法对于ID2string仍然用char*和unsigned但对于s2xx p2xx o2xx应该用unsigned long long*和unsigned来表示这样最高可支持到40亿triple

View File

@ -122,7 +122,8 @@ unsigned
Bstr::getLen() const
{
//NOTICE: this is for VList
if(this->str == NULL)
if(this->isBstrLongList())
//if(this->str == NULL)
{
return 0;
}
@ -214,3 +215,9 @@ Bstr::print(string s) const
//#endif
}
bool
Bstr::isBstrLongList() const
{
return this->str == NULL;
}

View File

@ -48,6 +48,9 @@ public:
//int write(FILE* _fp);
~Bstr();
void print(std::string s) const; //DEBUG
//judge if this Bstr represent a long list value, and waiting to be each time on need
bool isBstrLongList() const;
};
#endif // _UTIL_BSTR_H

View File

@ -149,6 +149,10 @@ VList::FreeBlock(unsigned _blocknum)
//NOTICE: all reads are aligned to 4 bytes(including a string)
//a string may acrossseveral blocks
//
//NOTICE: not use buffer, read/write on need, update at once, so no need to write back at last
//NOTICE: the next is placed at the begin of a block
void
VList::ReadAlign(unsigned* _next)
@ -161,38 +165,68 @@ VList::ReadAlign(unsigned* _next)
}
void
VList::WriteAlign(unsigned* _curnum, bool& _SpecialBlock)
VList::WriteAlign(unsigned* _curnum)
{
if (ftell(valfp) % BLOCK_SIZE == 0)
{
unsigned blocknum = this->AllocBlock();
fseek(valfp, Address(*_curnum), SEEK_SET);
if (_SpecialBlock)
{
fseek(valfp, 4, SEEK_CUR);
_SpecialBlock = false;
}
fwrite(&blocknum, sizeof(unsigned), 1, valfp);
fseek(valfp, Address(blocknum) + 4, SEEK_SET);
*_curnum = blocknum;
}
}
//TODO: check , read/write a long list, across several blocks
//not use buffer, read/write on need, update at once, so no need to write back at last
bool
VList::readValue(unsigned _block_num, char*& _str, unsigned& _len)
{
fseek(valfp, Address(_block_num), SEEK_SET);
unsigned next;
fread(&next, sizeof(unsigned), 1, valfp);
this->readBstr(_str, _len, &next);
return true;
}
unsigned
VList::writeValue(const char* _str, unsigned _len)
{
unsigned blocknum = this->AllocBlock();
unsigned curnum = blocknum;
this->writeBstr(_str, _len, &curnum);
return blocknum;
}
bool
VList::removeValue(unsigned _block_num)
{
unsigned store = _block_num, next;
fseek(this->valfp, Address(store), SEEK_SET);
fread(&next, sizeof(unsigned), 1, valfp);
while (store != 0)
{
this->FreeBlock(store);
store = next;
fseek(valfp, Address(store), SEEK_SET);
fread(&next, sizeof(unsigned), 1, valfp);
}
return true;
}
//TODO: still use Bstr?? how can we get the next pointer?? use NULL to init
//NOTICE: the next is placed at the begin of a block
bool
VList::readBstr(Bstr* _bp, unsigned* _next)
VList::readBstr(char*& _str, unsigned& _len, unsigned* _next)
{
//long address;
unsigned len, i, j;
fread(&len, sizeof(unsigned), 1, this->valfp);
this->ReadAlign(_next);
//this->request(len);
char* s = (char*)malloc(len);
_bp->setLen(len);
_len = len;
for (i = 0; i + 4 < len; i += 4)
{
fread(s + i, sizeof(char), 4, valfp);
@ -203,38 +237,52 @@ VList::readBstr(Bstr* _bp, unsigned* _next)
fread(s + i, sizeof(char), 1, valfp); //BETTER
i++;
}
j = len % 4;
if (j > 0)
j = 4 - j;
fseek(valfp, j, SEEK_CUR);
this->ReadAlign(_next);
_bp->setStr(s);
//NOTICE+DEBUG: I think no need to align here, later no data to read
//(if need to read, then fseek again to find a new value)
//this->ReadAlign(_next);
_str = s;
return true;
}
bool
VList::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock)
VList::writeBstr(const char* _str, unsigned _len, unsigned* _curnum)
{
unsigned i, j, len = _bp->getLen();
unsigned i, j, len = _len;
fwrite(&len, sizeof(unsigned), 1, valfp);
this->WriteAlign(_curnum, _SpecialBlock);
char* s = _bp->getStr();
this->WriteAlign(_curnum);
//BETTER: compute this need how many blocks first, then write a block a time
const char* s = _str;
for (i = 0; i + 4 < len; i += 4)
{
fwrite(s + i, sizeof(char), 4, valfp);
this->WriteAlign(_curnum, _SpecialBlock);
this->WriteAlign(_curnum);
}
while (i < len)
{
fwrite(s + i, sizeof(char), 1, valfp);
i++;
}
j = len % 4;
if (j > 0)
j = 4 - j;
fseek(valfp, j, SEEK_CUR);
this->WriteAlign(_curnum, _SpecialBlock);
//NOTICE+DEBUG: I think no need to align here, later no data to write
//(if need to write, then fseek again to write a new value)
//this->WriteAlign(_curnum);
fseek(valfp, Address(*_curnum), SEEK_SET);
unsigned t = 0;
fwrite(&t, sizeof(unsigned), 1, valfp);
return true;
}

View File

@ -12,7 +12,10 @@
#include "Util.h"
#include "Bstr.h"
//TODO: not keep long list in memory, read each time
//TODO: all use new/delete for Bstr, KVstore and trees, including Stream
//then give a full test, including valgrind
//NOTICE: not keep long list in memory, read each time
//but when can you free the long list(kvstore should release it after parsing)
//
//CONSIDER: if to keep long list in memory, should adjust the bstr in memory:
@ -61,15 +64,16 @@ private:
unsigned AllocBlock();
void FreeBlock(unsigned _blocknum);
void ReadAlign(unsigned* _next);
void WriteAlign(unsigned* _next, bool& _SpecialBlock);
void WriteAlign(unsigned* _next);
bool readBstr(char*& _bp, unsigned& _len, unsigned* _next);
bool writeBstr(const char* _str, unsigned _len, unsigned* _curnum);
public:
VList();
VList(std::string& _filepath, std::string& _mode, unsigned long long _buffer_size);//create a fixed-size file or open an existence
bool readBstr(Bstr* _bp, unsigned* _next);
bool writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock);
bool readValue(unsigned _block_num);
bool writeValue(const Bstr* _bp);
bool readValue(unsigned _block_num, char*& _str, unsigned& _len);
unsigned writeValue(const char* _str, unsigned _len);
bool removeValue(unsigned _block_num);
~VList();
static bool isLongList(unsigned _len);

View File

@ -72,9 +72,9 @@ sitreeobj = $(objdir)SITree.o $(objdir)SIStorage.o $(objdir)SINode.o $(objdir)SI
istreeobj = $(objdir)ISTree.o $(objdir)ISStorage.o $(objdir)ISNode.o $(objdir)ISIntlNode.o $(objdir)ISLeafNode.o $(objdir)ISHeap.o
ivtreeobj = $(objdir)IVTree.o $(objdir)IVStorage.o $(objdir)IVNode.o $(objdir)IVIntlNode.o $(objdir)IVLeafNode.o $(objdir)IVHeap.o
kvstoreobj = $(objdir)KVstore.o $(sitreeobj) $(istreeobj) #$(sstreeobj)
kvstoreobj = $(objdir)KVstore.o $(sitreeobj) $(istreeobj) $(ivtreeobj) #$(sstreeobj)
utilobj = $(objdir)Util.o $(objdir)Bstr.o $(objdir)Stream.o $(objdir)Triple.o $(objdir)BloomFilter.o
utilobj = $(objdir)Util.o $(objdir)Bstr.o $(objdir)Stream.o $(objdir)Triple.o $(objdir)BloomFilter.o $(objdir)VList.o
queryobj = $(objdir)SPARQLquery.o $(objdir)BasicQuery.o $(objdir)ResultSet.o $(objdir)IDList.o \
$(objdir)Varset.o $(objdir)QueryTree.o $(objdir)ResultFilter.o $(objdir)GeneralEvaluation.o
@ -219,7 +219,7 @@ $(objdir)ISHeap.o: KVstore/ISTree/heap/ISHeap.cpp KVstore/ISTree/heap/ISHeap.h $
#objects in istree/ end
#objects in ivtree/ begin
$(objdir)IVTree.o: KVstore/IVTree/IVTree.cpp KVstore/IVTree/IVTree.h $(objdir)Stream.o
$(objdir)IVTree.o: KVstore/IVTree/IVTree.cpp KVstore/IVTree/IVTree.h $(objdir)Stream.o $(objdir)VList.o
$(CC) $(CFLAGS) KVstore/IVTree/IVTree.cpp -o $(objdir)IVTree.o
$(objdir)IVStorage.o: KVstore/IVTree/storage/IVStorage.cpp KVstore/IVTree/storage/IVStorage.h $(objdir)Util.o
@ -323,6 +323,9 @@ $(objdir)Triple.o: Util/Triple.cpp Util/Triple.h $(objdir)Util.o
$(objdir)BloomFilter.o: Util/BloomFilter.cpp Util/BloomFilter.h $(objdir)Util.o
$(CC) $(CFLAGS) Util/BloomFilter.cpp -o $(objdir)BloomFilter.o
$(objdir)VList.o: Util/VList.cpp Util/VList.h
$(CC) $(CFLAGS) Util/VList.cpp -o $(objdir)VList.o
#objects in util/ end