refactor: add VList for IVTree
also, lower the copy cost in KVstore by zengli, all changes closed in KVstore, using new/delete for all instead of malloc/free
This commit is contained in:
parent
ceff3544ae
commit
80080d1bca
|
@ -800,6 +800,7 @@ Database::build(const string& _rdf_file)
|
||||||
//sync();
|
//sync();
|
||||||
//cout << "sync vstree" << endl;
|
//cout << "sync vstree" << endl;
|
||||||
|
|
||||||
|
//TODO: use fopen w+ to remove signature.binary file
|
||||||
//string cmd = "rm -rf " + _entry_file;
|
//string cmd = "rm -rf " + _entry_file;
|
||||||
//system(cmd.c_str());
|
//system(cmd.c_str());
|
||||||
//cout << "signature file removed" << endl;
|
//cout << "signature file removed" << endl;
|
||||||
|
|
|
@ -419,7 +419,8 @@ ISStorage::readBstr(Bstr* _bp, unsigned* _next)
|
||||||
fread(&len, sizeof(unsigned), 1, this->treefp);
|
fread(&len, sizeof(unsigned), 1, this->treefp);
|
||||||
this->ReadAlign(_next);
|
this->ReadAlign(_next);
|
||||||
//this->request(len);
|
//this->request(len);
|
||||||
char* s = (char*)malloc(len);
|
//char* s = (char*)malloc(len);
|
||||||
|
char* s = new char[len];
|
||||||
_bp->setLen(len);
|
_bp->setLen(len);
|
||||||
for (i = 0; i + 4 < len; i += 4)
|
for (i = 0; i + 4 < len; i += 4)
|
||||||
{
|
{
|
||||||
|
|
|
@ -237,7 +237,11 @@ IVTree::insert(unsigned _key, char* _str, unsigned _len)
|
||||||
p->addKey(_key, i);
|
p->addKey(_key, i);
|
||||||
p->addValue(this->value_list, i, _str, _len, true);
|
p->addValue(this->value_list, i, _str, _len, true);
|
||||||
p->addNum();
|
p->addNum();
|
||||||
request += _len;
|
//NOTICE: is this is a vlist, then it will be freed, and should not be included in the request memory
|
||||||
|
if(!VList::isLongList(_len))
|
||||||
|
{
|
||||||
|
request += _len;
|
||||||
|
}
|
||||||
//request += val->getLen();
|
//request += val->getLen();
|
||||||
p->setDirty();
|
p->setDirty();
|
||||||
this->TSM->updateHeap(p, p->getRank(), true);
|
this->TSM->updateHeap(p, p->getRank(), true);
|
||||||
|
@ -272,13 +276,20 @@ IVTree::modify(unsigned _key, char* _str, unsigned _len)
|
||||||
|
|
||||||
//NOTICE+DEBUG: if this value is a long list, then it is not saved in memory, here should return 0 in Bstr
|
//NOTICE+DEBUG: if this value is a long list, then it is not saved in memory, here should return 0 in Bstr
|
||||||
unsigned len = ret->getValue(store)->getLen();
|
unsigned len = ret->getValue(store)->getLen();
|
||||||
|
if(ret->getValue(store)->isBstrLongList())
|
||||||
|
{
|
||||||
|
len = 0;
|
||||||
|
}
|
||||||
ret->setValue(this->value_list, store, _str, _len, true);
|
ret->setValue(this->value_list, store, _str, _len, true);
|
||||||
//ret->setValue(val, store, true);
|
//ret->setValue(val, store, true);
|
||||||
//cout<<"value reset"<<endl;
|
//cout<<"value reset"<<endl;
|
||||||
//cout<<"newlen: "<<val->getLen()<<" oldlen: "<<len<<endl;
|
//cout<<"newlen: "<<val->getLen()<<" oldlen: "<<len<<endl;
|
||||||
|
|
||||||
//request += (val->getLen() - len);
|
//request += (val->getLen() - len);
|
||||||
this->request = _len;
|
if(!VList::isLongList(_len))
|
||||||
|
{
|
||||||
|
this->request += _len;
|
||||||
|
}
|
||||||
//this->request = val->getLen();
|
//this->request = val->getLen();
|
||||||
this->request -= len;
|
this->request -= len;
|
||||||
ret->setDirty();
|
ret->setDirty();
|
||||||
|
@ -417,7 +428,10 @@ IVTree::remove(unsigned _key)
|
||||||
//WARN+NOTICE:here must check, because the key to remove maybe not exist
|
//WARN+NOTICE:here must check, because the key to remove maybe not exist
|
||||||
if (i != (int)p->getNum())
|
if (i != (int)p->getNum())
|
||||||
{
|
{
|
||||||
request -= p->getValue(i)->getLen();
|
if(!p->getValue(i)->isBstrLongList())
|
||||||
|
{
|
||||||
|
request -= p->getValue(i)->getLen();
|
||||||
|
}
|
||||||
p->subKey(i); //to release
|
p->subKey(i); //to release
|
||||||
p->subValue(this->value_list, i, true); //to release
|
p->subValue(this->value_list, i, true); //to release
|
||||||
p->subNum();
|
p->subNum();
|
||||||
|
@ -605,6 +619,8 @@ IVTree::release(IVNode* _np) const
|
||||||
|
|
||||||
IVTree::~IVTree()
|
IVTree::~IVTree()
|
||||||
{
|
{
|
||||||
|
delete this->value_list;
|
||||||
|
|
||||||
delete this->stream; //maybe NULL
|
delete this->stream; //maybe NULL
|
||||||
delete TSM;
|
delete TSM;
|
||||||
#ifdef DEBUG_KVSTORE
|
#ifdef DEBUG_KVSTORE
|
||||||
|
|
|
@ -126,6 +126,9 @@ IVLeafNode::getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) con
|
||||||
//read long list
|
//read long list
|
||||||
if(this->values[_index].isBstrLongList())
|
if(this->values[_index].isBstrLongList())
|
||||||
{
|
{
|
||||||
|
#ifdef DEBUG_VLIST
|
||||||
|
cout<<"this is a vlist in get()"<<endl;
|
||||||
|
#endif
|
||||||
unsigned block_num = this->values[_index].getLen();
|
unsigned block_num = this->values[_index].getLen();
|
||||||
_vlist->readValue(block_num, _str, _len);
|
_vlist->readValue(block_num, _str, _len);
|
||||||
}
|
}
|
||||||
|
@ -150,6 +153,9 @@ IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool
|
||||||
|
|
||||||
if(this->values[_index].isBstrLongList())
|
if(this->values[_index].isBstrLongList())
|
||||||
{
|
{
|
||||||
|
#ifdef DEBUG_VLIST
|
||||||
|
cout<<"this is a vlist in set()"<<endl;
|
||||||
|
#endif
|
||||||
unsigned block_num = this->values[_index].getLen();
|
unsigned block_num = this->values[_index].getLen();
|
||||||
_vlist->removeValue(block_num);
|
_vlist->removeValue(block_num);
|
||||||
}
|
}
|
||||||
|
@ -173,6 +179,8 @@ IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool
|
||||||
unsigned block_num = _vlist->writeValue(_str, _len);
|
unsigned block_num = _vlist->writeValue(_str, _len);
|
||||||
this->values[_index].setStr(NULL);
|
this->values[_index].setStr(NULL);
|
||||||
this->values[_index].setLen(block_num);
|
this->values[_index].setLen(block_num);
|
||||||
|
//NOTICE: we need to free the long list value
|
||||||
|
delete[] _str;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -203,9 +211,17 @@ IVLeafNode::addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool
|
||||||
|
|
||||||
if(VList::isLongList(_len))
|
if(VList::isLongList(_len))
|
||||||
{
|
{
|
||||||
|
#ifdef DEBUG_VLIST
|
||||||
|
cout<<"this is a vlist in add()"<<endl;
|
||||||
|
#endif
|
||||||
unsigned block_num = _vlist->writeValue(_str, _len);
|
unsigned block_num = _vlist->writeValue(_str, _len);
|
||||||
this->values[_index].setStr(NULL);
|
this->values[_index].setStr(NULL);
|
||||||
this->values[_index].setLen(block_num);
|
this->values[_index].setLen(block_num);
|
||||||
|
//NOTICE: we need to free the long list value
|
||||||
|
delete[] _str;
|
||||||
|
#ifdef DEBUG_VLIST
|
||||||
|
//cout<<"to check vlist: "<<this->values[_index].getLen()<<endl;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -444,6 +444,9 @@ IVStorage::readBstr(Bstr* _bp, unsigned* _next)
|
||||||
{
|
{
|
||||||
unsigned addr = 0;
|
unsigned addr = 0;
|
||||||
fread(&addr, sizeof(unsigned), 1, this->treefp);
|
fread(&addr, sizeof(unsigned), 1, this->treefp);
|
||||||
|
#ifdef DEBUG_VLIST
|
||||||
|
cout<<"read a vlist in IVStorage - addr: "<<addr<<endl;
|
||||||
|
#endif
|
||||||
_bp->setLen(addr);
|
_bp->setLen(addr);
|
||||||
_bp->setStr(NULL);
|
_bp->setStr(NULL);
|
||||||
this->ReadAlign(_next);
|
this->ReadAlign(_next);
|
||||||
|
@ -489,6 +492,9 @@ IVStorage::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock)
|
||||||
this->WriteAlign(_curnum, _SpecialBlock);
|
this->WriteAlign(_curnum, _SpecialBlock);
|
||||||
//then this is the real block num
|
//then this is the real block num
|
||||||
fwrite(&len, sizeof(unsigned), 1, treefp);
|
fwrite(&len, sizeof(unsigned), 1, treefp);
|
||||||
|
#ifdef DEBUG_VLIST
|
||||||
|
cout<<"to write a vlist in IVStorage::writeBstr() - blocknum: "<<len<<endl;
|
||||||
|
#endif
|
||||||
this->WriteAlign(_curnum, _SpecialBlock);
|
this->WriteAlign(_curnum, _SpecialBlock);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,7 +12,8 @@ using namespace std;
|
||||||
|
|
||||||
//sets store_path as the root dir of this KVstore
|
//sets store_path as the root dir of this KVstore
|
||||||
//initial all Tree pointers as NULL
|
//initial all Tree pointers as NULL
|
||||||
KVstore::KVstore(string _store_path) {
|
KVstore::KVstore(string _store_path)
|
||||||
|
{
|
||||||
this->store_path = _store_path;
|
this->store_path = _store_path;
|
||||||
|
|
||||||
this->entity2id = NULL;
|
this->entity2id = NULL;
|
||||||
|
@ -30,14 +31,17 @@ KVstore::KVstore(string _store_path) {
|
||||||
}
|
}
|
||||||
|
|
||||||
//Release all the memory used in this KVstore before destruction
|
//Release all the memory used in this KVstore before destruction
|
||||||
KVstore::~KVstore() {
|
KVstore::~KVstore()
|
||||||
|
{
|
||||||
this->flush();
|
this->flush();
|
||||||
this->release();
|
this->release();
|
||||||
}
|
}
|
||||||
|
|
||||||
//Flush all modified parts into the disk, which will not release any memory
|
//Flush all modified parts into the disk, which will not release any memory
|
||||||
//Does nothing to null tree pointers or parts that has not been modified
|
//Does nothing to null tree pointers or parts that has not been modified
|
||||||
void KVstore::flush() {
|
void
|
||||||
|
KVstore::flush()
|
||||||
|
{
|
||||||
this->flush(this->entity2id);
|
this->flush(this->entity2id);
|
||||||
this->flush(this->id2entity);
|
this->flush(this->id2entity);
|
||||||
|
|
||||||
|
@ -52,7 +56,9 @@ void KVstore::flush() {
|
||||||
this->flush(this->objID2values);
|
this->flush(this->objID2values);
|
||||||
}
|
}
|
||||||
|
|
||||||
void KVstore::release() {
|
void
|
||||||
|
KVstore::release()
|
||||||
|
{
|
||||||
delete this->entity2id;
|
delete this->entity2id;
|
||||||
this->entity2id = NULL;
|
this->entity2id = NULL;
|
||||||
delete this->id2entity;
|
delete this->id2entity;
|
||||||
|
@ -76,7 +82,9 @@ void KVstore::release() {
|
||||||
this->objID2values = NULL;
|
this->objID2values = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void KVstore::open() {
|
void
|
||||||
|
KVstore::open()
|
||||||
|
{
|
||||||
cout << "open KVstore" << endl;
|
cout << "open KVstore" << endl;
|
||||||
|
|
||||||
this->open_entity2id(KVstore::READ_WRITE_MODE);
|
this->open_entity2id(KVstore::READ_WRITE_MODE);
|
||||||
|
@ -93,102 +101,192 @@ void KVstore::open() {
|
||||||
this->open_preID2values(KVstore::READ_WRITE_MODE);
|
this->open_preID2values(KVstore::READ_WRITE_MODE);
|
||||||
}
|
}
|
||||||
|
|
||||||
int KVstore::getEntityDegree(int _entity_id) const {
|
int
|
||||||
|
KVstore::getEntityDegree(int _entity_id) const
|
||||||
|
{
|
||||||
return this->getEntityInDegree(_entity_id) + this->getEntityOutDegree(_entity_id);
|
return this->getEntityInDegree(_entity_id) + this->getEntityOutDegree(_entity_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
int KVstore::getEntityInDegree(int _entity_id) const {
|
int
|
||||||
|
KVstore::getEntityInDegree(int _entity_id) const
|
||||||
|
{
|
||||||
//cout << "In getEntityInDegree " << _entity_id << endl;
|
//cout << "In getEntityInDegree " << _entity_id << endl;
|
||||||
unsigned* _tmp = NULL;
|
unsigned* _tmp = NULL;
|
||||||
unsigned _len = 0;
|
unsigned _len = 0;
|
||||||
bool _get = this->getValueByKey(this->objID2values, _entity_id, (char*&)_tmp, _len);
|
bool _get = this->getValueByKey(this->objID2values, _entity_id, (char*&)_tmp, _len);
|
||||||
if (!_get) {
|
|
||||||
return 0;
|
int ret = 0;
|
||||||
|
if (_get)
|
||||||
|
{
|
||||||
|
ret = _tmp[0];
|
||||||
}
|
}
|
||||||
return _tmp[0];
|
|
||||||
|
//if this is a long list, then we should remove itself after copying
|
||||||
|
//otherwise, we should not free the list memory
|
||||||
|
if(VList::isLongList(_len))
|
||||||
|
{
|
||||||
|
delete[] _tmp;
|
||||||
|
//_tmp = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int KVstore::getEntityOutDegree(int _entity_id) const {
|
int
|
||||||
|
KVstore::getEntityOutDegree(int _entity_id) const
|
||||||
|
{
|
||||||
//cout << "In getEntityOutDegree " << _entity_id << endl;
|
//cout << "In getEntityOutDegree " << _entity_id << endl;
|
||||||
unsigned* _tmp = NULL;
|
unsigned* _tmp = NULL;
|
||||||
unsigned _len = 0;
|
unsigned _len = 0;
|
||||||
bool _get = this->getValueByKey(this->subID2values, _entity_id, (char*&)_tmp, _len);
|
bool _get = this->getValueByKey(this->subID2values, _entity_id, (char*&)_tmp, _len);
|
||||||
if (!_get) {
|
|
||||||
return 0;
|
int ret = 0;
|
||||||
|
if (_get)
|
||||||
|
{
|
||||||
|
ret = _tmp[0];
|
||||||
}
|
}
|
||||||
return _tmp[0];
|
|
||||||
|
//if this is a long list, then we should remove itself after copying
|
||||||
|
//otherwise, we should not free the list memory
|
||||||
|
if(VList::isLongList(_len))
|
||||||
|
{
|
||||||
|
delete[] _tmp;
|
||||||
|
//_tmp = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int KVstore::getLiteralDegree(int _literal_id) const {
|
int
|
||||||
|
KVstore::getLiteralDegree(int _literal_id) const
|
||||||
|
{
|
||||||
//cout << "In getLiteralDegree " << _literal_id << endl;
|
//cout << "In getLiteralDegree " << _literal_id << endl;
|
||||||
unsigned* _tmp = NULL;
|
unsigned* _tmp = NULL;
|
||||||
unsigned _len = 0;
|
unsigned _len = 0;
|
||||||
bool _get = this->getValueByKey(this->objID2values, _literal_id, (char*&)_tmp, _len);
|
bool _get = this->getValueByKey(this->objID2values, _literal_id, (char*&)_tmp, _len);
|
||||||
if (!_get) {
|
|
||||||
return 0;
|
int ret = 0;
|
||||||
|
if (_get)
|
||||||
|
{
|
||||||
|
ret = _tmp[0];
|
||||||
}
|
}
|
||||||
return _tmp[0];
|
|
||||||
|
//if this is a long list, then we should remove itself after copying
|
||||||
|
//otherwise, we should not free the list memory
|
||||||
|
if(VList::isLongList(_len))
|
||||||
|
{
|
||||||
|
delete[] _tmp;
|
||||||
|
//_tmp = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int KVstore::getPredicateDegree(int _predicate_id) const {
|
int
|
||||||
|
KVstore::getPredicateDegree(int _predicate_id) const
|
||||||
|
{
|
||||||
//cout << "In getPredicate Degree " << _predicate_id << endl;
|
//cout << "In getPredicate Degree " << _predicate_id << endl;
|
||||||
unsigned* _tmp = NULL;
|
unsigned* _tmp = NULL;
|
||||||
unsigned _len = 0;
|
unsigned _len = 0;
|
||||||
bool _get = this->getValueByKey(this->preID2values, _predicate_id, (char*&)_tmp, _len);
|
bool _get = this->getValueByKey(this->preID2values, _predicate_id, (char*&)_tmp, _len);
|
||||||
if (!_get) {
|
|
||||||
return 0;
|
int ret = 0;
|
||||||
|
if (_get)
|
||||||
|
{
|
||||||
|
ret = _tmp[0];
|
||||||
}
|
}
|
||||||
return _tmp[0];
|
|
||||||
|
//if this is a long list, then we should remove itself after copying
|
||||||
|
//otherwise, we should not free the list memory
|
||||||
|
if(VList::isLongList(_len))
|
||||||
|
{
|
||||||
|
delete[] _tmp;
|
||||||
|
//_tmp = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int KVstore::getSubjectPredicateDegree(int _subid, int _preid) const {
|
int
|
||||||
|
KVstore::getSubjectPredicateDegree(int _subid, int _preid) const
|
||||||
|
{
|
||||||
//cout << "In getSubjectPredicateDegree " << _subid << ' ' << _preid << endl;
|
//cout << "In getSubjectPredicateDegree " << _subid << ' ' << _preid << endl;
|
||||||
|
|
||||||
//TODO: use unsigned
|
//TODO: use unsigned
|
||||||
int* _tmp = NULL;
|
int* _tmp = NULL;
|
||||||
unsigned _len = 0;
|
unsigned _len = 0;
|
||||||
bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len);
|
bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len);
|
||||||
if (!_get) {
|
|
||||||
return 0;
|
int ret = 0;
|
||||||
|
if(_get)
|
||||||
|
{
|
||||||
|
int _result = KVstore::binarySearch(_preid, _tmp + 3, _tmp[1], 2);
|
||||||
|
if (_result != -1)
|
||||||
|
{
|
||||||
|
int _offset = _tmp[4 + 2 * _result];
|
||||||
|
int _offset_next;
|
||||||
|
if (_result == _tmp[1] - 1)
|
||||||
|
{
|
||||||
|
_offset_next = 3 + 2 * _tmp[1] + _tmp[0];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_offset_next = _tmp[6 + 2 * _result];
|
||||||
|
}
|
||||||
|
ret = _offset_next - _offset;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
int _result = KVstore::binarySearch(_preid, _tmp + 3, _tmp[1], 2);
|
|
||||||
if (_result == -1) {
|
//if this is a long list, then we should remove itself after copying
|
||||||
return 0;
|
//otherwise, we should not free the list memory
|
||||||
|
if(VList::isLongList(_len))
|
||||||
|
{
|
||||||
|
delete[] _tmp;
|
||||||
|
//_tmp = NULL;
|
||||||
}
|
}
|
||||||
int _offset = _tmp[4 + 2 * _result];
|
|
||||||
int _offset_next;
|
return ret;
|
||||||
if (_result == _tmp[1] - 1) {
|
|
||||||
_offset_next = 3 + 2 * _tmp[1] + _tmp[0];
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
_offset_next = _tmp[6 + 2 * _result];
|
|
||||||
}
|
|
||||||
return _offset_next - _offset;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int KVstore::getObjectPredicateDegree(int _objid, int _preid) const {
|
int
|
||||||
|
KVstore::getObjectPredicateDegree(int _objid, int _preid) const
|
||||||
|
{
|
||||||
//cout << "In getObjectPredicateDegree " << _objid << _preid << endl;
|
//cout << "In getObjectPredicateDegree " << _objid << _preid << endl;
|
||||||
|
|
||||||
//TODO: use unsigned
|
//TODO: use unsigned
|
||||||
int* _tmp = NULL;
|
int* _tmp = NULL;
|
||||||
unsigned _len = 0;
|
unsigned _len = 0;
|
||||||
bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len);
|
bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len);
|
||||||
if (!_get) {
|
|
||||||
return 0;
|
int ret = 0;
|
||||||
|
if (_get)
|
||||||
|
{
|
||||||
|
int _result = KVstore::binarySearch(_preid, _tmp + 2, _tmp[1], 2);
|
||||||
|
if (_result != -1)
|
||||||
|
{
|
||||||
|
int _offset = _tmp[3 + 2 * _result];
|
||||||
|
int _offset_next;
|
||||||
|
if (_result == _tmp[1] - 1)
|
||||||
|
{
|
||||||
|
_offset_next = 2 + 2 * _tmp[1] + _tmp[0];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_offset_next = _tmp[5 + 2 * _result];
|
||||||
|
}
|
||||||
|
ret = _offset_next - _offset;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
int _result = KVstore::binarySearch(_preid, _tmp + 2, _tmp[1], 2);
|
|
||||||
if (_result == -1) {
|
//if this is a long list, then we should remove itself after copying
|
||||||
return 0;
|
//otherwise, we should not free the list memory
|
||||||
|
if(VList::isLongList(_len))
|
||||||
|
{
|
||||||
|
delete[] _tmp;
|
||||||
|
//_tmp = NULL;
|
||||||
}
|
}
|
||||||
int _offset = _tmp[3 + 2 * _result];
|
|
||||||
int _offset_next;
|
return ret;
|
||||||
if (_result == _tmp[1] - 1) {
|
|
||||||
_offset_next = 2 + 2 * _tmp[1] + _tmp[0];
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
_offset_next = _tmp[5 + 2 * _result];
|
|
||||||
}
|
|
||||||
return _offset_next - _offset;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool KVstore::updateTupleslist_insert(int _sub_id, int _pre_id, int _obj_id) {
|
bool KVstore::updateTupleslist_insert(int _sub_id, int _pre_id, int _obj_id) {
|
||||||
|
|
|
@ -419,7 +419,8 @@ SIStorage::readBstr(Bstr* _bp, unsigned* _next)
|
||||||
fread(&len, sizeof(unsigned), 1, this->treefp);
|
fread(&len, sizeof(unsigned), 1, this->treefp);
|
||||||
this->ReadAlign(_next);
|
this->ReadAlign(_next);
|
||||||
//this->request(len);
|
//this->request(len);
|
||||||
char* s = (char*)malloc(len);
|
//char* s = (char*)malloc(len);
|
||||||
|
char* s = new char[len];
|
||||||
_bp->setLen(len);
|
_bp->setLen(len);
|
||||||
for (i = 0; i + 4 < len; i += 4)
|
for (i = 0; i + 4 < len; i += 4)
|
||||||
{
|
{
|
||||||
|
|
3
NOTES.md
3
NOTES.md
|
@ -89,6 +89,7 @@ http://blog.csdn.net/infoworld/article/details/8670951
|
||||||
最好在单机100G内存上支持起freebase(2.5B triples)这个规模的数据集,就像jena和virtuoso一样,慢不要紧
|
最好在单机100G内存上支持起freebase(2.5B triples)这个规模的数据集,就像jena和virtuoso一样,慢不要紧
|
||||||
|
|
||||||
type分支中query过程可能还有问题,需要修改Query/里面的类型,另外stringindex中也要修改,分界线已经是20亿且非法不再是-1
|
type分支中query过程可能还有问题,需要修改Query/里面的类型,另外stringindex中也要修改,分界线已经是20亿且非法不再是-1
|
||||||
|
remove signature.binary, 合并两个分支type value
|
||||||
vstree在build和query时可以用不同大小的缓存,来加速build过程
|
vstree在build和query时可以用不同大小的缓存,来加速build过程
|
||||||
---
|
---
|
||||||
UBSTR: 类型bstr的length问题也需要解决 如果把类型直接改成long long,空间开销一下子就上升了一倍
|
UBSTR: 类型bstr的length问题也需要解决 如果把类型直接改成long long,空间开销一下子就上升了一倍
|
||||||
|
@ -469,6 +470,8 @@ build db error if triple num > 500M
|
||||||
|
|
||||||
# BETTER
|
# BETTER
|
||||||
|
|
||||||
|
#### 添加数据访问层,数据范式和生成数据访问的源码
|
||||||
|
|
||||||
#### 在BasicQuery.cpp中的encodeBasicQuery函数中发现有pre_id==-1时就可以直接中止查询,返回空值!
|
#### 在BasicQuery.cpp中的encodeBasicQuery函数中发现有pre_id==-1时就可以直接中止查询,返回空值!
|
||||||
|
|
||||||
#### 将KVstore模块中在堆中寻找Node*的操作改为用treap实现(或多存指针避开搜索?)
|
#### 将KVstore模块中在堆中寻找Node*的操作改为用treap实现(或多存指针避开搜索?)
|
||||||
|
|
|
@ -28,7 +28,9 @@ Bstr::Bstr(const char* _str, unsigned _len, bool _nocopy)
|
||||||
//return;
|
//return;
|
||||||
//}
|
//}
|
||||||
|
|
||||||
this->str = (char*)malloc(_len);
|
//NOTICE: we decide to use new/delete in global area
|
||||||
|
//this->str = (char*)malloc(_len);
|
||||||
|
this->str = new char[_len];
|
||||||
memcpy(this->str, _str, sizeof(char) * _len);
|
memcpy(this->str, _str, sizeof(char) * _len);
|
||||||
//this->str[_len]='\0';
|
//this->str[_len]='\0';
|
||||||
}
|
}
|
||||||
|
@ -121,12 +123,14 @@ Bstr::operator != (const Bstr& _bstr)
|
||||||
unsigned
|
unsigned
|
||||||
Bstr::getLen() const
|
Bstr::getLen() const
|
||||||
{
|
{
|
||||||
|
//WARN: we should not include too complicate logic here!!!!
|
||||||
|
|
||||||
//NOTICE: this is for VList
|
//NOTICE: this is for VList
|
||||||
if(this->isBstrLongList())
|
//if(this->isBstrLongList())
|
||||||
//if(this->str == NULL)
|
////if(this->str == NULL)
|
||||||
{
|
//{
|
||||||
return 0;
|
//return 0;
|
||||||
}
|
//}
|
||||||
|
|
||||||
return length;
|
return length;
|
||||||
}
|
}
|
||||||
|
@ -158,15 +162,18 @@ Bstr::copy(const Bstr* _bp)
|
||||||
this->length = _bp->getLen();
|
this->length = _bp->getLen();
|
||||||
//DEBUG!!!
|
//DEBUG!!!
|
||||||
//cerr<<"bstr length: "<<this->length<<endl;
|
//cerr<<"bstr length: "<<this->length<<endl;
|
||||||
this->str = (char*)malloc(this->length);
|
|
||||||
memcpy(this->str, _bp->getStr(), this->length);
|
//this->str = (char*)malloc(this->length);
|
||||||
|
this->str = new char[this->length];
|
||||||
|
memcpy(this->str, _bp->getStr(), sizeof(char) * this->length);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
Bstr::copy(const char* _str, unsigned _len)
|
Bstr::copy(const char* _str, unsigned _len)
|
||||||
{
|
{
|
||||||
this->length = _len;
|
this->length = _len;
|
||||||
this->str = (char*)malloc(this->length);
|
//this->str = (char*)malloc(this->length);
|
||||||
|
this->str = new char[this->length];
|
||||||
memcpy(this->str, _str, this->length);
|
memcpy(this->str, _str, this->length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -180,7 +187,8 @@ Bstr::clear()
|
||||||
void
|
void
|
||||||
Bstr::release()
|
Bstr::release()
|
||||||
{
|
{
|
||||||
free(this->str); //ok to be null, do nothing
|
//free(this->str); //ok to be null, do nothing
|
||||||
|
delete[] this->str;
|
||||||
clear();
|
clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -58,7 +58,8 @@ Stream::Stream(std::vector<int>& _keys, std::vector<bool>& _desc, unsigned _rown
|
||||||
this->record_size = new unsigned[this->colnum];
|
this->record_size = new unsigned[this->colnum];
|
||||||
for(unsigned i = 0; i < this->colnum; ++i)
|
for(unsigned i = 0; i < this->colnum; ++i)
|
||||||
{
|
{
|
||||||
this->record[i].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
char* tmptr = new char[Util::TRANSFER_SIZE];
|
||||||
|
this->record[i].setStr(tmptr);
|
||||||
this->record_size[i] = Util::TRANSFER_SIZE;
|
this->record_size[i] = Util::TRANSFER_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -148,7 +149,8 @@ Stream::copyToRecord(const char* _str, unsigned _len, unsigned _idx)
|
||||||
if(length + 1 > this->record_size[_idx])
|
if(length + 1 > this->record_size[_idx])
|
||||||
{
|
{
|
||||||
this->record[_idx].release();
|
this->record[_idx].release();
|
||||||
this->record[_idx].setStr((char*)malloc((length + 1) * sizeof(char)));
|
char* tmptr = new char[length+1];
|
||||||
|
this->record[_idx].setStr(tmptr);
|
||||||
this->record_size[_idx] = length + 1; //one more byte: convenient to add \0
|
this->record_size[_idx] = length + 1; //one more byte: convenient to add \0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -187,7 +189,8 @@ Stream::outputCache()
|
||||||
{
|
{
|
||||||
unsigned len;
|
unsigned len;
|
||||||
fread(&len, sizeof(unsigned), 1, this->tempfp);
|
fread(&len, sizeof(unsigned), 1, this->tempfp);
|
||||||
char* p = (char*)malloc(len * sizeof(char));
|
//char* p = (char*)malloc(len * sizeof(char));
|
||||||
|
char* p = new char[len];
|
||||||
fread(p, sizeof(char), len, this->tempfp);
|
fread(p, sizeof(char), len, this->tempfp);
|
||||||
bp[i].setLen(len);
|
bp[i].setLen(len);
|
||||||
bp[i].setStr(p);
|
bp[i].setStr(p);
|
||||||
|
@ -320,13 +323,16 @@ Stream::read()
|
||||||
//FILE* fp = (FILE*)(this->ans);
|
//FILE* fp = (FILE*)(this->ans);
|
||||||
for(unsigned i = 0; i < this->colnum; ++i)
|
for(unsigned i = 0; i < this->colnum; ++i)
|
||||||
{
|
{
|
||||||
//BETTER:alloca and reuse the space in Bstr?
|
//BETTER:alloc and reuse the space in Bstr?
|
||||||
unsigned len;
|
unsigned len;
|
||||||
fread(&len, sizeof(unsigned), 1, this->ansDisk);
|
fread(&len, sizeof(unsigned), 1, this->ansDisk);
|
||||||
char* s = (char*)calloc(len + 1, sizeof(char));
|
//char* s = (char*)calloc(len + 1, sizeof(char));
|
||||||
|
char* s = new char[len+1];
|
||||||
fread(s, sizeof(char), len, this->ansDisk);
|
fread(s, sizeof(char), len, this->ansDisk);
|
||||||
|
s[len] = '\0';
|
||||||
this->copyToRecord(s, len, i);
|
this->copyToRecord(s, len, i);
|
||||||
free(s);
|
//free(s);
|
||||||
|
delete[] s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
this->xpos++;
|
this->xpos++;
|
||||||
|
@ -420,7 +426,9 @@ Stream::mergeSort()
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
s = (char*)malloc(sizeof(char) * len);
|
|
||||||
|
//s = (char*)malloc(sizeof(char) * len);
|
||||||
|
s = new char[len];
|
||||||
fread(s, sizeof(char), len, tp);
|
fread(s, sizeof(char), len, tp);
|
||||||
bp[i].setLen(len);
|
bp[i].setLen(len);
|
||||||
bp[i].setStr(s);
|
bp[i].setStr(s);
|
||||||
|
|
|
@ -651,7 +651,14 @@ Util::result_id_str(vector<int*>& _v, int _var_num)
|
||||||
bool
|
bool
|
||||||
Util::dir_exist(const string _dir)
|
Util::dir_exist(const string _dir)
|
||||||
{
|
{
|
||||||
return (opendir(_dir.c_str()) != NULL);
|
DIR* dirptr = opendir(_dir.c_str());
|
||||||
|
if(dirptr != NULL)
|
||||||
|
{
|
||||||
|
closedir(dirptr);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
|
|
@ -91,6 +91,7 @@ in the sparql query can point to the same node in data graph)
|
||||||
//#define DEBUG_VSTREE 1 //in Database
|
//#define DEBUG_VSTREE 1 //in Database
|
||||||
//#define DEBUG_LRUCACHE 1
|
//#define DEBUG_LRUCACHE 1
|
||||||
//#define DEBUG_DATABASE 1 //in Database
|
//#define DEBUG_DATABASE 1 //in Database
|
||||||
|
//#define DEBUG_VLIST 1
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
|
|
||||||
|
@ -124,6 +125,12 @@ in the sparql query can point to the same node in data graph)
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef DEBUG_VLIST
|
||||||
|
#ifndef DEBUG
|
||||||
|
#define DEBUG
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef DEBUG
|
#ifndef DEBUG
|
||||||
//#define DEBUG
|
//#define DEBUG
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -180,6 +180,9 @@ VList::WriteAlign(unsigned* _curnum)
|
||||||
bool
|
bool
|
||||||
VList::readValue(unsigned _block_num, char*& _str, unsigned& _len)
|
VList::readValue(unsigned _block_num, char*& _str, unsigned& _len)
|
||||||
{
|
{
|
||||||
|
#ifdef DEBUG_VLIST
|
||||||
|
cout<<"to get value of block num: "<<_block_num<<endl;
|
||||||
|
#endif
|
||||||
fseek(valfp, Address(_block_num), SEEK_SET);
|
fseek(valfp, Address(_block_num), SEEK_SET);
|
||||||
unsigned next;
|
unsigned next;
|
||||||
fread(&next, sizeof(unsigned), 1, valfp);
|
fread(&next, sizeof(unsigned), 1, valfp);
|
||||||
|
@ -193,8 +196,14 @@ VList::writeValue(const char* _str, unsigned _len)
|
||||||
{
|
{
|
||||||
unsigned blocknum = this->AllocBlock();
|
unsigned blocknum = this->AllocBlock();
|
||||||
unsigned curnum = blocknum;
|
unsigned curnum = blocknum;
|
||||||
|
|
||||||
|
//NOTICE: here we must skip the next position first
|
||||||
|
fseek(valfp, Address(curnum) + 4, SEEK_SET);
|
||||||
this->writeBstr(_str, _len, &curnum);
|
this->writeBstr(_str, _len, &curnum);
|
||||||
|
|
||||||
|
#ifdef DEBUG_VLIST
|
||||||
|
cout<<"to write value - block num: "<<blocknum<<endl;
|
||||||
|
#endif
|
||||||
return blocknum;
|
return blocknum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -222,9 +231,13 @@ VList::readBstr(char*& _str, unsigned& _len, unsigned* _next)
|
||||||
//long address;
|
//long address;
|
||||||
unsigned len, i, j;
|
unsigned len, i, j;
|
||||||
fread(&len, sizeof(unsigned), 1, this->valfp);
|
fread(&len, sizeof(unsigned), 1, this->valfp);
|
||||||
|
#ifdef DEBUG_VLIST
|
||||||
|
cout<<"the length of value: "<<len<<endl;
|
||||||
|
#endif
|
||||||
this->ReadAlign(_next);
|
this->ReadAlign(_next);
|
||||||
|
|
||||||
char* s = (char*)malloc(len);
|
//char* s = (char*)malloc(len);
|
||||||
|
char* s = new char[len];
|
||||||
_len = len;
|
_len = len;
|
||||||
|
|
||||||
for (i = 0; i + 4 < len; i += 4)
|
for (i = 0; i + 4 < len; i += 4)
|
||||||
|
@ -257,6 +270,7 @@ VList::writeBstr(const char* _str, unsigned _len, unsigned* _curnum)
|
||||||
unsigned i, j, len = _len;
|
unsigned i, j, len = _len;
|
||||||
fwrite(&len, sizeof(unsigned), 1, valfp);
|
fwrite(&len, sizeof(unsigned), 1, valfp);
|
||||||
this->WriteAlign(_curnum);
|
this->WriteAlign(_curnum);
|
||||||
|
//cout<<"to write bstr, length: "<<len<<endl;
|
||||||
|
|
||||||
//BETTER: compute this need how many blocks first, then write a block a time
|
//BETTER: compute this need how many blocks first, then write a block a time
|
||||||
|
|
||||||
|
@ -289,7 +303,39 @@ VList::writeBstr(const char* _str, unsigned _len, unsigned* _curnum)
|
||||||
|
|
||||||
VList::~VList()
|
VList::~VList()
|
||||||
{
|
{
|
||||||
BlockInfo* bp = this->freelist;
|
//write the info back
|
||||||
|
fseek(this->valfp, 0, SEEK_SET);
|
||||||
|
fwrite(&cur_block_num, sizeof(unsigned), 1, valfp);//write current blocks num
|
||||||
|
fseek(valfp, BLOCK_SIZE, SEEK_SET);
|
||||||
|
int i, j = cur_block_num / 8; //(SuperNum-1)*BLOCK_SIZE;
|
||||||
|
for (i = 0; i < j; ++i)
|
||||||
|
{
|
||||||
|
//reset to 1 first
|
||||||
|
fputc(0xff, valfp);
|
||||||
|
}
|
||||||
|
char c;
|
||||||
|
BlockInfo* bp = this->freelist->next;
|
||||||
|
while (bp != NULL)
|
||||||
|
{
|
||||||
|
//if not-use then set 0, aligned to byte!
|
||||||
|
#ifdef DEBUG_KVSTORE
|
||||||
|
if (bp->num > cur_block_num)
|
||||||
|
{
|
||||||
|
printf("blocks num exceed, cur_block_num: %u\n", cur_block_num);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
j = bp->num - 1;
|
||||||
|
i = j / 8;
|
||||||
|
j = 7 - j % 8;
|
||||||
|
fseek(valfp, BLOCK_SIZE + i, SEEK_SET);
|
||||||
|
c = fgetc(valfp);
|
||||||
|
fseek(valfp, -1, SEEK_CUR);
|
||||||
|
fputc(c & ~(1 << j), valfp);
|
||||||
|
bp = bp->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
bp = this->freelist;
|
||||||
BlockInfo* next;
|
BlockInfo* next;
|
||||||
while (bp != NULL)
|
while (bp != NULL)
|
||||||
{
|
{
|
||||||
|
|
11
Util/VList.h
11
Util/VList.h
|
@ -12,9 +12,6 @@
|
||||||
#include "Util.h"
|
#include "Util.h"
|
||||||
#include "Bstr.h"
|
#include "Bstr.h"
|
||||||
|
|
||||||
//TODO: all use new/delete for Bstr, KVstore and trees, including Stream
|
|
||||||
//then give a full test, including valgrind
|
|
||||||
|
|
||||||
//NOTICE: not keep long list in memory, read each time
|
//NOTICE: not keep long list in memory, read each time
|
||||||
//but when can you free the long list(kvstore should release it after parsing)
|
//but when can you free the long list(kvstore should release it after parsing)
|
||||||
//
|
//
|
||||||
|
@ -31,15 +28,19 @@
|
||||||
//file1 is tree file, the long list is represented as: 0 real-address
|
//file1 is tree file, the long list is represented as: 0 real-address
|
||||||
//NOTICE: long list is not kept in mmeory for cache, it is read/update each time on need!
|
//NOTICE: long list is not kept in mmeory for cache, it is read/update each time on need!
|
||||||
|
|
||||||
|
//TODO: use fread/fwrite here instead of fgetc/fputc
|
||||||
|
//including other trees
|
||||||
|
|
||||||
class VList
|
class VList
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
//NOTICE:the border is 10^6, but the block is larger, 1M
|
//NOTICE:the border is 10^6, but the block is larger, 1M
|
||||||
static const unsigned LENGTH_BORDER = 1000000;
|
//static const unsigned LENGTH_BORDER = 1000000;
|
||||||
|
static const unsigned LENGTH_BORDER = 1000;
|
||||||
static const unsigned BLOCK_SIZE = 1 << 20; //fixed size of disk-block
|
static const unsigned BLOCK_SIZE = 1 << 20; //fixed size of disk-block
|
||||||
static const unsigned MAX_BLOCK_NUM = 1 << 23; //max block-num
|
static const unsigned MAX_BLOCK_NUM = 1 << 23; //max block-num
|
||||||
//below two constants: must can be exactly divided by 8
|
//below two constants: must can be exactly divided by 8
|
||||||
static const unsigned SET_BLOCK_NUM = 1 << 2; //initial blocks num
|
static const unsigned SET_BLOCK_NUM = 1 << 3; //initial blocks num
|
||||||
static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc
|
static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc
|
||||||
static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1;
|
static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
INSERT DATA
|
||||||
|
{
|
||||||
|
<http://www.founder/102> <http://www.founder.20.link:52> <http://www.founder/106> .
|
||||||
|
<http://www.founder/102> <http://www.founder> <http://www.founder/73> .
|
||||||
|
}
|
|
@ -0,0 +1 @@
|
||||||
|
DELETE DATA { <http://www.founder/102> <http://www.founder.20.link:52> <http://www.founder/106> . }
|
|
@ -0,0 +1 @@
|
||||||
|
select ?subject ?predict ?object WHERE { ?subject <http://www.founder.20.link:52> ?object; ?predict ?object . }
|
|
@ -0,0 +1 @@
|
||||||
|
DELETE WHERE { <http://www.founder/101> ?predict ?object . }
|
|
@ -0,0 +1 @@
|
||||||
|
select ?predict where {<http://www.founder/102> ?predict <http://www.founder/73> .}
|
|
@ -0,0 +1,5 @@
|
||||||
|
select ?subject ?predict ?object where
|
||||||
|
{
|
||||||
|
<http://www.founder/102> <http://www.founder.20.link:52> ?object.
|
||||||
|
?subject ?predict ?object.
|
||||||
|
}
|
|
@ -0,0 +1 @@
|
||||||
|
select ?subject ?predict ?object where {?subject <http://www.founder.20.link:52> <http://www.founder/106>; ?predict ?object . }
|
|
@ -0,0 +1 @@
|
||||||
|
DELETE WHERE { ?subject <http://www.founder.20.link:52> ?objcet. }
|
Loading…
Reference in New Issue