refactor: add value list for IVTree; lower the copy cost of string
not done, waiting to be debugged by zengli, long list must be cleared after got, no changes besides KVstore
This commit is contained in:
parent
939e84c8bf
commit
ceff3544ae
|
@ -115,13 +115,13 @@ IVTree::prepare(IVNode* _np)
|
|||
}
|
||||
|
||||
bool
|
||||
IVTree::search(int _key, char*& _str, int& _len)
|
||||
IVTree::search(unsigned _key, char*& _str, unsigned& _len)
|
||||
{
|
||||
if (_key < 0)
|
||||
{
|
||||
printf("error in IVTree-search: empty string\n");
|
||||
return false;
|
||||
}
|
||||
//if (_key < 0)
|
||||
//{
|
||||
//printf("error in IVTree-search: empty string\n");
|
||||
//return false;
|
||||
//}
|
||||
|
||||
this->request = 0;
|
||||
int store;
|
||||
|
@ -142,13 +142,13 @@ IVTree::search(int _key, char*& _str, int& _len)
|
|||
}
|
||||
|
||||
bool
|
||||
IVTree::insert(int _key, char* _str, unsigned _len)
|
||||
IVTree::insert(unsigned _key, char* _str, unsigned _len)
|
||||
{
|
||||
if (_key < 0)
|
||||
{
|
||||
printf("error in IVTree-insert: empty string\n");
|
||||
return false;
|
||||
}
|
||||
//if (_key < 0)
|
||||
//{
|
||||
//printf("error in IVTree-insert: empty string\n");
|
||||
//return false;
|
||||
//}
|
||||
|
||||
//this->CopyToTransfer(_str, _len, 2);
|
||||
//const Bstr* val = &(this->transfer[2]);
|
||||
|
@ -250,13 +250,13 @@ IVTree::insert(int _key, char* _str, unsigned _len)
|
|||
}
|
||||
|
||||
bool
|
||||
IVTree::modify(int _key, char* _str, unsigned _len)
|
||||
IVTree::modify(unsigned _key, char* _str, unsigned _len)
|
||||
{
|
||||
if (_key < 0)
|
||||
{
|
||||
printf("error in IVTree-modify: empty string\n");
|
||||
return false;
|
||||
}
|
||||
//if (_key < 0)
|
||||
//{
|
||||
//printf("error in IVTree-modify: empty string\n");
|
||||
//return false;
|
||||
//}
|
||||
|
||||
//this->CopyToTransfer(_str, _len, 2); //not check value
|
||||
//const Bstr* val = &(this->transfer[2]);
|
||||
|
@ -291,7 +291,7 @@ IVTree::modify(int _key, char* _str, unsigned _len)
|
|||
|
||||
//this function is useful for search and modify, and range-query
|
||||
IVNode* //return the first key's position that >= *_key
|
||||
IVTree::find(int _key, int* _store, bool ifmodify)
|
||||
IVTree::find(unsigned _key, int* _store, bool ifmodify)
|
||||
{ //to assign value for this->bstr, function shouldn't be const!
|
||||
if (this->root == NULL)
|
||||
return NULL; //IVTree Is Empty
|
||||
|
@ -334,13 +334,13 @@ IVTree::find(unsigned _len, const char* _str, int* store) const
|
|||
*/
|
||||
|
||||
bool
|
||||
IVTree::remove(int _key)
|
||||
IVTree::remove(unsigned _key)
|
||||
{
|
||||
if (_key < 0)
|
||||
{
|
||||
printf("error in IVTree-remove: empty string\n");
|
||||
return false;
|
||||
}
|
||||
//if (_key < 0)
|
||||
//{
|
||||
//printf("error in IVTree-remove: empty string\n");
|
||||
//return false;
|
||||
//}
|
||||
|
||||
this->request = 0;
|
||||
IVNode* ret;
|
||||
|
@ -468,7 +468,7 @@ IVTree::resetStream()
|
|||
|
||||
//TODO: change to using value list, getValue() maybe not get real long list
|
||||
bool //special case: not exist, one-edge-case
|
||||
IVTree::range_query(int _key1, int _key2)
|
||||
IVTree::range_query(unsigned _key1, unsigned _key2)
|
||||
{ //the range is: *_key1 <= x < *_key2
|
||||
//if(_key1 <0 && _key2 <0)
|
||||
//return false;
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
class IVTree
|
||||
{
|
||||
protected:
|
||||
unsigned int height; //0 indicates an empty tree
|
||||
unsigned height; //0 indicates an empty tree
|
||||
IVNode* root;
|
||||
IVNode* leaves_head; //the head of LeafNode-list
|
||||
IVNode* leaves_tail; //the tail of LeafNode-list
|
||||
|
@ -74,19 +74,19 @@ protected:
|
|||
public:
|
||||
IVTree(); //always need to initial transfer
|
||||
IVTree(std::string _storepath, std::string _filename, std::string _mode, unsigned long long _buffer_size);
|
||||
unsigned int getHeight() const;
|
||||
unsigned getHeight() const;
|
||||
void setHeight(unsigned _h);
|
||||
IVNode* getRoot() const;
|
||||
//void setRoot(Node* _root);
|
||||
//insert, search, remove, set
|
||||
bool search(int _key, char*& _str, int& _len);
|
||||
bool insert(int _key, char* _str, unsigned _len);
|
||||
bool modify(int _key, char* _str, unsigned _len);
|
||||
IVNode* find(int _key, int* store, bool ifmodify);
|
||||
bool remove(int _key);
|
||||
bool search(unsigned _key, char*& _str, unsigned& _len);
|
||||
bool insert(unsigned _key, char* _str, unsigned _len);
|
||||
bool modify(unsigned _key, char* _str, unsigned _len);
|
||||
IVNode* find(unsigned _key, int* store, bool ifmodify);
|
||||
bool remove(unsigned _key);
|
||||
const Bstr* getRangeValue();
|
||||
void resetStream();
|
||||
bool range_query(int _key1, int _key2);
|
||||
bool range_query(unsigned _key1, unsigned _key2);
|
||||
bool save();
|
||||
~IVTree();
|
||||
void print(std::string s); //DEBUG(print the tree)
|
||||
|
|
|
@ -89,25 +89,74 @@ IVLeafNode::getValue(int _index) const
|
|||
return this->values + _index;
|
||||
}
|
||||
|
||||
//TODO!!!
|
||||
bool
|
||||
IVLeafNode::getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const
|
||||
IVLeafNode::setValue(const Bstr* _value, int _index, bool _ifcopy)
|
||||
{
|
||||
//TODO: read long list
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy)
|
||||
{
|
||||
//TODO: consider the long list, how to cancel and reset
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index >= num)
|
||||
{
|
||||
print(string("error in setValue: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
|
||||
this->values[_index].release(); //NOTICE: only used in modify
|
||||
|
||||
if(_ifcopy)
|
||||
{
|
||||
this->values[_index].copy(_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->values[_index] = *_value;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVLeafNode::getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index >= num)
|
||||
{
|
||||
//print(string("error in getValue: Invalid index ") + Util::int2string(_index));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//read long list
|
||||
if(this->values[_index].isBstrLongList())
|
||||
{
|
||||
unsigned block_num = this->values[_index].getLen();
|
||||
_vlist->readValue(block_num, _str, _len);
|
||||
}
|
||||
else
|
||||
{
|
||||
_str = this->values[_index].getStr();
|
||||
_len = this->values[_index].getLen();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index >= num)
|
||||
{
|
||||
print(string("error in setValue: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
|
||||
if(this->values[_index].isBstrLongList())
|
||||
{
|
||||
unsigned block_num = this->values[_index].getLen();
|
||||
_vlist->removeValue(block_num);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->values[_index].release(); //NOTICE: only used in modify
|
||||
}
|
||||
|
||||
//DEBUG: we do not need to copy here
|
||||
//we just need to ensure that the pointer's memory is not released
|
||||
|
@ -119,8 +168,17 @@ IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool
|
|||
//else
|
||||
//{
|
||||
//this->values[_index] = *_value;
|
||||
if(VList::isLongList(_len))
|
||||
{
|
||||
unsigned block_num = _vlist->writeValue(_str, _len);
|
||||
this->values[_index].setStr(NULL);
|
||||
this->values[_index].setLen(block_num);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->values[_index].setStr(_str);
|
||||
this->values[_index].setLen(_len);
|
||||
}
|
||||
//}
|
||||
return true;
|
||||
}
|
||||
|
@ -128,23 +186,34 @@ IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool
|
|||
bool
|
||||
IVLeafNode::addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy)
|
||||
{
|
||||
//TODO:if the list is too large
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index > num)
|
||||
{
|
||||
print(string("error in addValue: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
int i;
|
||||
for (i = num - 1; i >= _index; --i)
|
||||
|
||||
for (int i = num - 1; i >= _index; --i)
|
||||
this->values[i + 1] = this->values[i];
|
||||
|
||||
//if (ifcopy)
|
||||
//this->values[_index].copy(_value);
|
||||
//else
|
||||
//this->values[_index] = *_value;
|
||||
this->values[_index].setStr(_str);
|
||||
this->values[_index].setLen(_len);
|
||||
|
||||
if(VList::isLongList(_len))
|
||||
{
|
||||
unsigned block_num = _vlist->writeValue(_str, _len);
|
||||
this->values[_index].setStr(NULL);
|
||||
this->values[_index].setLen(block_num);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->values[_index].setStr(_str);
|
||||
this->values[_index].setLen(_len);
|
||||
}
|
||||
//this->values[_index].setStr(_str);
|
||||
//this->values[_index].setLen(_len);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -152,7 +221,6 @@ IVLeafNode::addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool
|
|||
bool
|
||||
IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel)
|
||||
{
|
||||
//TODO: if is to sub long list
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index >= num)
|
||||
{
|
||||
|
@ -160,10 +228,20 @@ IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel)
|
|||
return false;
|
||||
}
|
||||
|
||||
int i;
|
||||
if (ifdel)
|
||||
values[_index].release();
|
||||
for (i = _index; i < num - 1; ++i)
|
||||
if(this->values[_index].isBstrLongList())
|
||||
{
|
||||
unsigned block_num = this->values[_index].getLen();
|
||||
_vlist->removeValue(block_num);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ifdel)
|
||||
{
|
||||
values[_index].release();
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = _index; i < num - 1; ++i)
|
||||
this->values[i] = this->values[i + 1];
|
||||
|
||||
return true;
|
||||
|
|
|
@ -28,6 +28,8 @@ public:
|
|||
IVNode* getPrev() const;
|
||||
IVNode* getNext() const;
|
||||
const Bstr* getValue(int _index) const;
|
||||
bool setValue(const Bstr* _value, int _index, bool _ifcopy=false);
|
||||
|
||||
bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const;
|
||||
bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false);
|
||||
|
||||
|
|
|
@ -83,6 +83,7 @@ public:
|
|||
virtual IVNode* getNext() const { return NULL; };
|
||||
|
||||
virtual const Bstr* getValue(int _index) const { return NULL; };
|
||||
virtual bool setValue(const Bstr* _value, int _index, bool _ifcopy=false) { return true; };
|
||||
virtual bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const { return NULL; };
|
||||
virtual bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false) { return true; };
|
||||
|
||||
|
|
|
@ -348,6 +348,11 @@ IVStorage::createNode(IVNode*& _np) //cretae virtual nodes, not in-mem
|
|||
return true;
|
||||
}
|
||||
|
||||
//BETTER: Does SpecialBlock really needed? why can't we place next before flag??
|
||||
//
|
||||
//NOTICE: root num begins from 1, if root num is 0, then it is invalid, i.e. the tree is NULL
|
||||
//(and ftell(root address) will be 0 either)
|
||||
|
||||
bool
|
||||
IVStorage::writeNode(IVNode* _np)
|
||||
{
|
||||
|
@ -446,7 +451,10 @@ IVStorage::readBstr(Bstr* _bp, unsigned* _next)
|
|||
}
|
||||
|
||||
//this->request(len);
|
||||
char* s = (char*)malloc(len);
|
||||
|
||||
//NOTICE: we use new for all, consistent with Bstr and KVstore
|
||||
//char* s = (char*)malloc(len);
|
||||
char* s = new char[len];
|
||||
_bp->setLen(len);
|
||||
for (i = 0; i + 4 < len; i += 4)
|
||||
{
|
||||
|
|
|
@ -99,8 +99,8 @@ int KVstore::getEntityDegree(int _entity_id) const {
|
|||
|
||||
int KVstore::getEntityInDegree(int _entity_id) const {
|
||||
//cout << "In getEntityInDegree " << _entity_id << endl;
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned* _tmp = NULL;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->objID2values, _entity_id, (char*&)_tmp, _len);
|
||||
if (!_get) {
|
||||
return 0;
|
||||
|
@ -110,8 +110,8 @@ int KVstore::getEntityInDegree(int _entity_id) const {
|
|||
|
||||
int KVstore::getEntityOutDegree(int _entity_id) const {
|
||||
//cout << "In getEntityOutDegree " << _entity_id << endl;
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned* _tmp = NULL;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->subID2values, _entity_id, (char*&)_tmp, _len);
|
||||
if (!_get) {
|
||||
return 0;
|
||||
|
@ -121,8 +121,8 @@ int KVstore::getEntityOutDegree(int _entity_id) const {
|
|||
|
||||
int KVstore::getLiteralDegree(int _literal_id) const {
|
||||
//cout << "In getLiteralDegree " << _literal_id << endl;
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned* _tmp = NULL;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->objID2values, _literal_id, (char*&)_tmp, _len);
|
||||
if (!_get) {
|
||||
return 0;
|
||||
|
@ -132,8 +132,8 @@ int KVstore::getLiteralDegree(int _literal_id) const {
|
|||
|
||||
int KVstore::getPredicateDegree(int _predicate_id) const {
|
||||
//cout << "In getPredicate Degree " << _predicate_id << endl;
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned* _tmp = NULL;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->preID2values, _predicate_id, (char*&)_tmp, _len);
|
||||
if (!_get) {
|
||||
return 0;
|
||||
|
@ -143,8 +143,10 @@ int KVstore::getPredicateDegree(int _predicate_id) const {
|
|||
|
||||
int KVstore::getSubjectPredicateDegree(int _subid, int _preid) const {
|
||||
//cout << "In getSubjectPredicateDegree " << _subid << ' ' << _preid << endl;
|
||||
|
||||
//TODO: use unsigned
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len);
|
||||
if (!_get) {
|
||||
return 0;
|
||||
|
@ -166,8 +168,10 @@ int KVstore::getSubjectPredicateDegree(int _subid, int _preid) const {
|
|||
|
||||
int KVstore::getObjectPredicateDegree(int _objid, int _preid) const {
|
||||
//cout << "In getObjectPredicateDegree " << _objid << _preid << endl;
|
||||
|
||||
//TODO: use unsigned
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len);
|
||||
if (!_get) {
|
||||
return 0;
|
||||
|
@ -352,8 +356,9 @@ bool KVstore::updateTupleslist_remove(int _sub_id, int _pre_id, int _obj_id) {
|
|||
}
|
||||
|
||||
bool KVstore::updateInsert_s2values(int _sub_id, int _pre_id, int _obj_id) {
|
||||
//TODO: use unsigned
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len);
|
||||
bool _is_entity = Util::is_entity_ele(_obj_id);
|
||||
|
||||
|
@ -453,8 +458,9 @@ bool KVstore::updateInsert_s2values(int _sub_id, int _pre_id, int _obj_id) {
|
|||
}
|
||||
|
||||
bool KVstore::updateRemove_s2values(int _sub_id, int _pre_id, int _obj_id) {
|
||||
//TODO: use unsigned
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->subID2values, _sub_id, (char*&)_tmp, _len);
|
||||
bool _is_entity = Util::is_entity_ele(_obj_id);
|
||||
|
||||
|
@ -564,8 +570,9 @@ bool KVstore::updateRemove_s2values(int _subid, const std::vector<int>& _pidoidl
|
|||
}
|
||||
|
||||
bool KVstore::updateInsert_o2values(int _sub_id, int _pre_id, int _obj_id) {
|
||||
//TODO: use unsigned
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->objID2values, _obj_id, (char*&)_tmp, _len);
|
||||
|
||||
//objID doesn't exist
|
||||
|
@ -659,8 +666,9 @@ bool KVstore::updateInsert_o2values(int _sub_id, int _pre_id, int _obj_id) {
|
|||
}
|
||||
|
||||
bool KVstore::updateRemove_o2values(int _sub_id, int _pre_id, int _obj_id) {
|
||||
//TODO: use unsigned
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->objID2values, _obj_id, (char*&)_tmp, _len);
|
||||
|
||||
if (!_get) {
|
||||
|
@ -763,8 +771,9 @@ bool KVstore::updateRemove_o2values(int _objid, const std::vector<int>& _pidsidl
|
|||
}
|
||||
|
||||
bool KVstore::updateInsert_p2values(int _sub_id, int _pre_id, int _obj_id) {
|
||||
//TODO: use unsigned
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->preID2values, _pre_id, (char*&)_tmp, _len);
|
||||
|
||||
//preid doesn't exist
|
||||
|
@ -804,8 +813,9 @@ bool KVstore::updateInsert_p2values(int _sub_id, int _pre_id, int _obj_id) {
|
|||
}
|
||||
|
||||
bool KVstore::updateRemove_p2values(int _sub_id, int _pre_id, int _obj_id) {
|
||||
//TODO: use unsigned
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->preID2values, _pre_id, (char*&)_tmp, _len);
|
||||
|
||||
if (!_get) {
|
||||
|
@ -1405,8 +1415,9 @@ KVstore::getpreIDlistBysubID(int _subid, int*& _preidlist, int& _list_len, bool
|
|||
_list_len = 0;
|
||||
return false;
|
||||
}
|
||||
//TODO: use unsigned
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len);
|
||||
|
||||
if (!_get)
|
||||
|
@ -1442,8 +1453,9 @@ KVstore::getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len, bool
|
|||
_list_len = 0;
|
||||
return false;
|
||||
}
|
||||
//TODO: use unsigned
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len);
|
||||
if (!_get)
|
||||
{
|
||||
|
@ -1481,8 +1493,9 @@ KVstore::getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist, int&
|
|||
return false;
|
||||
}
|
||||
|
||||
//TODO: use unsigned
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len);
|
||||
if (!_get) {
|
||||
_objidlist = NULL;
|
||||
|
@ -1531,8 +1544,9 @@ KVstore::getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _list
|
|||
return false;
|
||||
}
|
||||
|
||||
//TODO: use unsigned
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len);
|
||||
if (!_get) {
|
||||
_preid_objidlist = NULL;
|
||||
|
@ -1682,8 +1696,10 @@ bool
|
|||
KVstore::getpreIDlistByobjID(int _objid, int*& _preidlist, int& _list_len, bool _no_duplicate) const
|
||||
{
|
||||
//cout << "In getpreIDlistByobjID " << _objid << endl;
|
||||
|
||||
//TODO: use unsigned
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len);
|
||||
if (!_get) {
|
||||
_preidlist = NULL;
|
||||
|
@ -1711,8 +1727,10 @@ bool
|
|||
KVstore::getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len, bool _no_duplicate) const
|
||||
{
|
||||
//cout << "In getsubIDlistByobjID " << _objid << endl;
|
||||
|
||||
//TODO: use unsigned
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len);
|
||||
if (!_get) {
|
||||
_subidlist = NULL;
|
||||
|
@ -1743,8 +1761,9 @@ bool
|
|||
KVstore::getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, int& _list_len, bool _no_duplicate) const
|
||||
{
|
||||
//cout << "In getsubIDlistByobjIDpreID " << _objid << ' ' << _preid << endl;
|
||||
//TODO: use unsigned
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len);
|
||||
if (!_get) {
|
||||
_subidlist = NULL;
|
||||
|
@ -1786,8 +1805,9 @@ bool
|
|||
KVstore::getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _list_len, bool _no_duplicate) const
|
||||
{
|
||||
//cout << "In getpreIDsubIDlistByobjID " << _objid << endl;
|
||||
//TODO: use unsigned
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len);
|
||||
if (!_get) {
|
||||
_preid_subidlist = NULL;
|
||||
|
@ -1915,8 +1935,9 @@ bool
|
|||
KVstore::getsubIDlistBypreID(int _preid, int*& _subidlist, int& _list_len, bool _no_duplicate) const
|
||||
{
|
||||
//cout << "In getsubIDlistBypreID " << _preid << endl;
|
||||
//TODO: use unsigned
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->preID2values, _preid, (char*&)_tmp, _len);
|
||||
if (!_get) {
|
||||
_subidlist = NULL;
|
||||
|
@ -1946,8 +1967,9 @@ bool
|
|||
KVstore::getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len, bool _no_duplicate) const
|
||||
{
|
||||
//cout << "In getobjIDlistBypreID " << _preid << endl;
|
||||
//TODO: use unsigned
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->preID2values, _preid, (char*&)_tmp, _len);
|
||||
if (!_get) {
|
||||
_objidlist = NULL;
|
||||
|
@ -1978,8 +2000,9 @@ bool
|
|||
KVstore::getsubIDobjIDlistBypreID(int _preid, int*& _subid_objidlist, int& _list_len, bool _no_duplicate) const
|
||||
{
|
||||
//cout << "In getsubIDobjIDlistBypreID " << _preid << endl;
|
||||
//TODO: use unsigned
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned _len = 0;
|
||||
bool _get = this->getValueByKey(this->preID2values, _preid, (char*&)_tmp, _len);
|
||||
if (!_get) {
|
||||
_subid_objidlist = NULL;
|
||||
|
@ -2032,8 +2055,9 @@ KVstore::getpreIDlistBysubIDobjID(int _subid, int _objid, int*& _preidlist, int&
|
|||
return false;
|
||||
}
|
||||
|
||||
//TODO: use unsigned
|
||||
int* _tmp = NULL;
|
||||
int _len = 0;
|
||||
unsigned _len = 0;
|
||||
this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len);
|
||||
_list_len = len;
|
||||
int _result = 0;
|
||||
|
@ -2199,7 +2223,7 @@ KVstore::addValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen)
|
|||
}
|
||||
|
||||
bool
|
||||
KVstore::addValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen)
|
||||
KVstore::addValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen)
|
||||
{
|
||||
return _p_btree->insert(_key, _val, _vlen);
|
||||
}
|
||||
|
@ -2217,7 +2241,7 @@ KVstore::setValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen)
|
|||
}
|
||||
|
||||
bool
|
||||
KVstore::setValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen)
|
||||
KVstore::setValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen)
|
||||
{
|
||||
return _p_btree->modify(_key, _val, _vlen);
|
||||
}
|
||||
|
@ -2235,7 +2259,7 @@ KVstore::getValueByKey(ISTree* _p_btree, int _key, char*& _val, int& _vlen) cons
|
|||
}
|
||||
|
||||
bool
|
||||
KVstore::getValueByKey(IVTree* _p_btree, int _key, char*& _val, int& _vlen) const
|
||||
KVstore::getValueByKey(IVTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const
|
||||
{
|
||||
return _p_btree->search(_key, _val, _vlen);
|
||||
}
|
||||
|
|
|
@ -19,6 +19,11 @@
|
|||
//QUERY: but to count the length each time maybe very costly?
|
||||
//No, because triple num is stored in char* now!!!! we do not need to save it again
|
||||
//TODO: entity_border in s2values list is not needed!!! not waste memory here
|
||||
//
|
||||
//QUERY: but to implement vlist, we need a unsigned flag
|
||||
//What is more, we need to store the string in disk, how can we store it if without the length?
|
||||
//unsigned type stored as chars, maybe will have '\0'
|
||||
//In memory, we do not know when the oidlist ends if without the original length (butthe triple num will answer this!)
|
||||
|
||||
class KVstore
|
||||
{
|
||||
|
@ -197,15 +202,15 @@ private:
|
|||
|
||||
bool addValueByKey(SITree* _p_btree, char* _key, int _klen, int _val);
|
||||
bool addValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen);
|
||||
bool addValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen);
|
||||
bool addValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen);
|
||||
|
||||
bool setValueByKey(SITree* _p_btree, char* _key, int _klen, int _val);
|
||||
bool setValueByKey(ISTree* _p_btree, int _key, char* _val, int _vlen);
|
||||
bool setValueByKey(IVTree* _p_btree, int _key, char* _val, int _vlen);
|
||||
bool setValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen);
|
||||
|
||||
bool getValueByKey(SITree* _p_btree, const char* _key, int _klen, int* _val) const;
|
||||
bool getValueByKey(ISTree* _p_btree, int _key, char*& _val, int& _vlen) const;
|
||||
bool getValueByKey(IVTree* _p_btree, int _key, char*& _val, int& _vlen) const;
|
||||
bool getValueByKey(IVTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const;
|
||||
|
||||
int getIDByStr(SITree* _p_btree, const char* _key, int _klen) const;
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
//headers wrapper for all kinds of BPlusTree
|
||||
|
||||
#include "IVTree/IVTree.h"
|
||||
#include "ISTree/ISTree.h"
|
||||
#include "SITree/SITree.h"
|
||||
#include "IVTree/IVTree.h"
|
||||
|
|
5
NOTES.md
5
NOTES.md
|
@ -88,9 +88,8 @@ http://blog.csdn.net/infoworld/article/details/8670951
|
|||
要在单机支持到10亿triple,最坏情况下最多有20亿entity和20亿literal,目前的编号方式是不行的(int扩展为unsigned)
|
||||
最好在单机100G内存上支持起freebase(2.5B triples)这个规模的数据集,就像jena和virtuoso一样,慢不要紧
|
||||
|
||||
type分支中query过程可能还有问题,需要修改Query/里面的类型
|
||||
去掉tree里面的复制,另外kvstore里面的复制可以考虑通过一个或若干个bstr buffer来实现,避免每次都重新new,但这会影响多线程程序
|
||||
而且在kvstore中往往需要对原始list做一些额外处理
|
||||
type分支中query过程可能还有问题,需要修改Query/里面的类型,另外stringindex中也要修改,分界线已经是20亿且非法不再是-1
|
||||
vstree在build和query时可以用不同大小的缓存,来加速build过程
|
||||
---
|
||||
UBSTR: 类型bstr的length问题也需要解决 如果把类型直接改成long long,空间开销一下子就上升了一倍
|
||||
解决方法:对于ID2string,仍然用char*和unsigned,但对于s2xx p2xx o2xx,应该用unsigned long long*和unsigned来表示,这样最高可支持到40亿triple
|
||||
|
|
|
@ -122,7 +122,8 @@ unsigned
|
|||
Bstr::getLen() const
|
||||
{
|
||||
//NOTICE: this is for VList
|
||||
if(this->str == NULL)
|
||||
if(this->isBstrLongList())
|
||||
//if(this->str == NULL)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
@ -214,3 +215,9 @@ Bstr::print(string s) const
|
|||
//#endif
|
||||
}
|
||||
|
||||
bool
|
||||
Bstr::isBstrLongList() const
|
||||
{
|
||||
return this->str == NULL;
|
||||
}
|
||||
|
||||
|
|
|
@ -48,6 +48,9 @@ public:
|
|||
//int write(FILE* _fp);
|
||||
~Bstr();
|
||||
void print(std::string s) const; //DEBUG
|
||||
|
||||
//judge if this Bstr represent a long list value, and waiting to be each time on need
|
||||
bool isBstrLongList() const;
|
||||
};
|
||||
|
||||
#endif // _UTIL_BSTR_H
|
||||
|
|
|
@ -149,6 +149,10 @@ VList::FreeBlock(unsigned _blocknum)
|
|||
|
||||
//NOTICE: all reads are aligned to 4 bytes(including a string)
|
||||
//a string may acrossseveral blocks
|
||||
//
|
||||
//NOTICE: not use buffer, read/write on need, update at once, so no need to write back at last
|
||||
//NOTICE: the next is placed at the begin of a block
|
||||
|
||||
|
||||
void
|
||||
VList::ReadAlign(unsigned* _next)
|
||||
|
@ -161,38 +165,68 @@ VList::ReadAlign(unsigned* _next)
|
|||
}
|
||||
|
||||
void
|
||||
VList::WriteAlign(unsigned* _curnum, bool& _SpecialBlock)
|
||||
VList::WriteAlign(unsigned* _curnum)
|
||||
{
|
||||
if (ftell(valfp) % BLOCK_SIZE == 0)
|
||||
{
|
||||
unsigned blocknum = this->AllocBlock();
|
||||
fseek(valfp, Address(*_curnum), SEEK_SET);
|
||||
if (_SpecialBlock)
|
||||
{
|
||||
fseek(valfp, 4, SEEK_CUR);
|
||||
_SpecialBlock = false;
|
||||
}
|
||||
fwrite(&blocknum, sizeof(unsigned), 1, valfp);
|
||||
fseek(valfp, Address(blocknum) + 4, SEEK_SET);
|
||||
*_curnum = blocknum;
|
||||
}
|
||||
}
|
||||
|
||||
//TODO: check , read/write a long list, across several blocks
|
||||
//not use buffer, read/write on need, update at once, so no need to write back at last
|
||||
bool
|
||||
VList::readValue(unsigned _block_num, char*& _str, unsigned& _len)
|
||||
{
|
||||
fseek(valfp, Address(_block_num), SEEK_SET);
|
||||
unsigned next;
|
||||
fread(&next, sizeof(unsigned), 1, valfp);
|
||||
this->readBstr(_str, _len, &next);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned
|
||||
VList::writeValue(const char* _str, unsigned _len)
|
||||
{
|
||||
unsigned blocknum = this->AllocBlock();
|
||||
unsigned curnum = blocknum;
|
||||
this->writeBstr(_str, _len, &curnum);
|
||||
|
||||
return blocknum;
|
||||
}
|
||||
|
||||
bool
|
||||
VList::removeValue(unsigned _block_num)
|
||||
{
|
||||
unsigned store = _block_num, next;
|
||||
fseek(this->valfp, Address(store), SEEK_SET);
|
||||
fread(&next, sizeof(unsigned), 1, valfp);
|
||||
|
||||
while (store != 0)
|
||||
{
|
||||
this->FreeBlock(store);
|
||||
store = next;
|
||||
fseek(valfp, Address(store), SEEK_SET);
|
||||
fread(&next, sizeof(unsigned), 1, valfp);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//TODO: still use Bstr?? how can we get the next pointer?? use NULL to init
|
||||
//NOTICE: the next is placed at the begin of a block
|
||||
bool
|
||||
VList::readBstr(Bstr* _bp, unsigned* _next)
|
||||
VList::readBstr(char*& _str, unsigned& _len, unsigned* _next)
|
||||
{
|
||||
//long address;
|
||||
unsigned len, i, j;
|
||||
fread(&len, sizeof(unsigned), 1, this->valfp);
|
||||
this->ReadAlign(_next);
|
||||
//this->request(len);
|
||||
|
||||
char* s = (char*)malloc(len);
|
||||
_bp->setLen(len);
|
||||
_len = len;
|
||||
|
||||
for (i = 0; i + 4 < len; i += 4)
|
||||
{
|
||||
fread(s + i, sizeof(char), 4, valfp);
|
||||
|
@ -203,38 +237,52 @@ VList::readBstr(Bstr* _bp, unsigned* _next)
|
|||
fread(s + i, sizeof(char), 1, valfp); //BETTER
|
||||
i++;
|
||||
}
|
||||
|
||||
j = len % 4;
|
||||
if (j > 0)
|
||||
j = 4 - j;
|
||||
fseek(valfp, j, SEEK_CUR);
|
||||
this->ReadAlign(_next);
|
||||
_bp->setStr(s);
|
||||
|
||||
//NOTICE+DEBUG: I think no need to align here, later no data to read
|
||||
//(if need to read, then fseek again to find a new value)
|
||||
//this->ReadAlign(_next);
|
||||
|
||||
_str = s;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
VList::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock)
|
||||
VList::writeBstr(const char* _str, unsigned _len, unsigned* _curnum)
|
||||
{
|
||||
unsigned i, j, len = _bp->getLen();
|
||||
unsigned i, j, len = _len;
|
||||
fwrite(&len, sizeof(unsigned), 1, valfp);
|
||||
this->WriteAlign(_curnum, _SpecialBlock);
|
||||
char* s = _bp->getStr();
|
||||
this->WriteAlign(_curnum);
|
||||
|
||||
//BETTER: compute this need how many blocks first, then write a block a time
|
||||
|
||||
const char* s = _str;
|
||||
for (i = 0; i + 4 < len; i += 4)
|
||||
{
|
||||
fwrite(s + i, sizeof(char), 4, valfp);
|
||||
this->WriteAlign(_curnum, _SpecialBlock);
|
||||
this->WriteAlign(_curnum);
|
||||
}
|
||||
while (i < len)
|
||||
{
|
||||
fwrite(s + i, sizeof(char), 1, valfp);
|
||||
i++;
|
||||
}
|
||||
|
||||
j = len % 4;
|
||||
if (j > 0)
|
||||
j = 4 - j;
|
||||
fseek(valfp, j, SEEK_CUR);
|
||||
this->WriteAlign(_curnum, _SpecialBlock);
|
||||
|
||||
//NOTICE+DEBUG: I think no need to align here, later no data to write
|
||||
//(if need to write, then fseek again to write a new value)
|
||||
//this->WriteAlign(_curnum);
|
||||
fseek(valfp, Address(*_curnum), SEEK_SET);
|
||||
unsigned t = 0;
|
||||
fwrite(&t, sizeof(unsigned), 1, valfp);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
16
Util/VList.h
16
Util/VList.h
|
@ -12,7 +12,10 @@
|
|||
#include "Util.h"
|
||||
#include "Bstr.h"
|
||||
|
||||
//TODO: not keep long list in memory, read each time
|
||||
//TODO: all use new/delete for Bstr, KVstore and trees, including Stream
|
||||
//then give a full test, including valgrind
|
||||
|
||||
//NOTICE: not keep long list in memory, read each time
|
||||
//but when can you free the long list(kvstore should release it after parsing)
|
||||
//
|
||||
//CONSIDER: if to keep long list in memory, should adjust the bstr in memory:
|
||||
|
@ -61,15 +64,16 @@ private:
|
|||
unsigned AllocBlock();
|
||||
void FreeBlock(unsigned _blocknum);
|
||||
void ReadAlign(unsigned* _next);
|
||||
void WriteAlign(unsigned* _next, bool& _SpecialBlock);
|
||||
void WriteAlign(unsigned* _next);
|
||||
bool readBstr(char*& _bp, unsigned& _len, unsigned* _next);
|
||||
bool writeBstr(const char* _str, unsigned _len, unsigned* _curnum);
|
||||
|
||||
public:
|
||||
VList();
|
||||
VList(std::string& _filepath, std::string& _mode, unsigned long long _buffer_size);//create a fixed-size file or open an existence
|
||||
bool readBstr(Bstr* _bp, unsigned* _next);
|
||||
bool writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock);
|
||||
bool readValue(unsigned _block_num);
|
||||
bool writeValue(const Bstr* _bp);
|
||||
bool readValue(unsigned _block_num, char*& _str, unsigned& _len);
|
||||
unsigned writeValue(const char* _str, unsigned _len);
|
||||
bool removeValue(unsigned _block_num);
|
||||
~VList();
|
||||
|
||||
static bool isLongList(unsigned _len);
|
||||
|
|
9
makefile
9
makefile
|
@ -72,9 +72,9 @@ sitreeobj = $(objdir)SITree.o $(objdir)SIStorage.o $(objdir)SINode.o $(objdir)SI
|
|||
istreeobj = $(objdir)ISTree.o $(objdir)ISStorage.o $(objdir)ISNode.o $(objdir)ISIntlNode.o $(objdir)ISLeafNode.o $(objdir)ISHeap.o
|
||||
ivtreeobj = $(objdir)IVTree.o $(objdir)IVStorage.o $(objdir)IVNode.o $(objdir)IVIntlNode.o $(objdir)IVLeafNode.o $(objdir)IVHeap.o
|
||||
|
||||
kvstoreobj = $(objdir)KVstore.o $(sitreeobj) $(istreeobj) #$(sstreeobj)
|
||||
kvstoreobj = $(objdir)KVstore.o $(sitreeobj) $(istreeobj) $(ivtreeobj) #$(sstreeobj)
|
||||
|
||||
utilobj = $(objdir)Util.o $(objdir)Bstr.o $(objdir)Stream.o $(objdir)Triple.o $(objdir)BloomFilter.o
|
||||
utilobj = $(objdir)Util.o $(objdir)Bstr.o $(objdir)Stream.o $(objdir)Triple.o $(objdir)BloomFilter.o $(objdir)VList.o
|
||||
|
||||
queryobj = $(objdir)SPARQLquery.o $(objdir)BasicQuery.o $(objdir)ResultSet.o $(objdir)IDList.o \
|
||||
$(objdir)Varset.o $(objdir)QueryTree.o $(objdir)ResultFilter.o $(objdir)GeneralEvaluation.o
|
||||
|
@ -219,7 +219,7 @@ $(objdir)ISHeap.o: KVstore/ISTree/heap/ISHeap.cpp KVstore/ISTree/heap/ISHeap.h $
|
|||
#objects in istree/ end
|
||||
|
||||
#objects in ivtree/ begin
|
||||
$(objdir)IVTree.o: KVstore/IVTree/IVTree.cpp KVstore/IVTree/IVTree.h $(objdir)Stream.o
|
||||
$(objdir)IVTree.o: KVstore/IVTree/IVTree.cpp KVstore/IVTree/IVTree.h $(objdir)Stream.o $(objdir)VList.o
|
||||
$(CC) $(CFLAGS) KVstore/IVTree/IVTree.cpp -o $(objdir)IVTree.o
|
||||
|
||||
$(objdir)IVStorage.o: KVstore/IVTree/storage/IVStorage.cpp KVstore/IVTree/storage/IVStorage.h $(objdir)Util.o
|
||||
|
@ -323,6 +323,9 @@ $(objdir)Triple.o: Util/Triple.cpp Util/Triple.h $(objdir)Util.o
|
|||
$(objdir)BloomFilter.o: Util/BloomFilter.cpp Util/BloomFilter.h $(objdir)Util.o
|
||||
$(CC) $(CFLAGS) Util/BloomFilter.cpp -o $(objdir)BloomFilter.o
|
||||
|
||||
$(objdir)VList.o: Util/VList.cpp Util/VList.h
|
||||
$(CC) $(CFLAGS) Util/VList.cpp -o $(objdir)VList.o
|
||||
|
||||
#objects in util/ end
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue