refactor: add VList for IVTree

also, lower the copy cost in KVstore

by zengli, all changes closed in KVstore, using new/delete for all instead of malloc/free
This commit is contained in:
bookug 2017-04-01 16:03:05 +08:00
parent ceff3544ae
commit 80080d1bca
22 changed files with 317 additions and 82 deletions

View File

@ -800,6 +800,7 @@ Database::build(const string& _rdf_file)
//sync(); //sync();
//cout << "sync vstree" << endl; //cout << "sync vstree" << endl;
//TODO: use fopen w+ to remove signature.binary file
//string cmd = "rm -rf " + _entry_file; //string cmd = "rm -rf " + _entry_file;
//system(cmd.c_str()); //system(cmd.c_str());
//cout << "signature file removed" << endl; //cout << "signature file removed" << endl;

View File

@ -419,7 +419,8 @@ ISStorage::readBstr(Bstr* _bp, unsigned* _next)
fread(&len, sizeof(unsigned), 1, this->treefp); fread(&len, sizeof(unsigned), 1, this->treefp);
this->ReadAlign(_next); this->ReadAlign(_next);
//this->request(len); //this->request(len);
char* s = (char*)malloc(len); //char* s = (char*)malloc(len);
char* s = new char[len];
_bp->setLen(len); _bp->setLen(len);
for (i = 0; i + 4 < len; i += 4) for (i = 0; i + 4 < len; i += 4)
{ {

View File

@ -237,7 +237,11 @@ IVTree::insert(unsigned _key, char* _str, unsigned _len)
p->addKey(_key, i); p->addKey(_key, i);
p->addValue(this->value_list, i, _str, _len, true); p->addValue(this->value_list, i, _str, _len, true);
p->addNum(); p->addNum();
request += _len; //NOTICE: is this is a vlist, then it will be freed, and should not be included in the request memory
if(!VList::isLongList(_len))
{
request += _len;
}
//request += val->getLen(); //request += val->getLen();
p->setDirty(); p->setDirty();
this->TSM->updateHeap(p, p->getRank(), true); this->TSM->updateHeap(p, p->getRank(), true);
@ -272,13 +276,20 @@ IVTree::modify(unsigned _key, char* _str, unsigned _len)
//NOTICE+DEBUG: if this value is a long list, then it is not saved in memory, here should return 0 in Bstr //NOTICE+DEBUG: if this value is a long list, then it is not saved in memory, here should return 0 in Bstr
unsigned len = ret->getValue(store)->getLen(); unsigned len = ret->getValue(store)->getLen();
if(ret->getValue(store)->isBstrLongList())
{
len = 0;
}
ret->setValue(this->value_list, store, _str, _len, true); ret->setValue(this->value_list, store, _str, _len, true);
//ret->setValue(val, store, true); //ret->setValue(val, store, true);
//cout<<"value reset"<<endl; //cout<<"value reset"<<endl;
//cout<<"newlen: "<<val->getLen()<<" oldlen: "<<len<<endl; //cout<<"newlen: "<<val->getLen()<<" oldlen: "<<len<<endl;
//request += (val->getLen() - len); //request += (val->getLen() - len);
this->request = _len; if(!VList::isLongList(_len))
{
this->request += _len;
}
//this->request = val->getLen(); //this->request = val->getLen();
this->request -= len; this->request -= len;
ret->setDirty(); ret->setDirty();
@ -417,7 +428,10 @@ IVTree::remove(unsigned _key)
//WARN+NOTICE:here must check, because the key to remove maybe not exist //WARN+NOTICE:here must check, because the key to remove maybe not exist
if (i != (int)p->getNum()) if (i != (int)p->getNum())
{ {
request -= p->getValue(i)->getLen(); if(!p->getValue(i)->isBstrLongList())
{
request -= p->getValue(i)->getLen();
}
p->subKey(i); //to release p->subKey(i); //to release
p->subValue(this->value_list, i, true); //to release p->subValue(this->value_list, i, true); //to release
p->subNum(); p->subNum();
@ -605,6 +619,8 @@ IVTree::release(IVNode* _np) const
IVTree::~IVTree() IVTree::~IVTree()
{ {
delete this->value_list;
delete this->stream; //maybe NULL delete this->stream; //maybe NULL
delete TSM; delete TSM;
#ifdef DEBUG_KVSTORE #ifdef DEBUG_KVSTORE

View File

@ -126,6 +126,9 @@ IVLeafNode::getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) con
//read long list //read long list
if(this->values[_index].isBstrLongList()) if(this->values[_index].isBstrLongList())
{ {
#ifdef DEBUG_VLIST
cout<<"this is a vlist in get()"<<endl;
#endif
unsigned block_num = this->values[_index].getLen(); unsigned block_num = this->values[_index].getLen();
_vlist->readValue(block_num, _str, _len); _vlist->readValue(block_num, _str, _len);
} }
@ -150,6 +153,9 @@ IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool
if(this->values[_index].isBstrLongList()) if(this->values[_index].isBstrLongList())
{ {
#ifdef DEBUG_VLIST
cout<<"this is a vlist in set()"<<endl;
#endif
unsigned block_num = this->values[_index].getLen(); unsigned block_num = this->values[_index].getLen();
_vlist->removeValue(block_num); _vlist->removeValue(block_num);
} }
@ -173,6 +179,8 @@ IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool
unsigned block_num = _vlist->writeValue(_str, _len); unsigned block_num = _vlist->writeValue(_str, _len);
this->values[_index].setStr(NULL); this->values[_index].setStr(NULL);
this->values[_index].setLen(block_num); this->values[_index].setLen(block_num);
//NOTICE: we need to free the long list value
delete[] _str;
} }
else else
{ {
@ -203,9 +211,17 @@ IVLeafNode::addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool
if(VList::isLongList(_len)) if(VList::isLongList(_len))
{ {
#ifdef DEBUG_VLIST
cout<<"this is a vlist in add()"<<endl;
#endif
unsigned block_num = _vlist->writeValue(_str, _len); unsigned block_num = _vlist->writeValue(_str, _len);
this->values[_index].setStr(NULL); this->values[_index].setStr(NULL);
this->values[_index].setLen(block_num); this->values[_index].setLen(block_num);
//NOTICE: we need to free the long list value
delete[] _str;
#ifdef DEBUG_VLIST
//cout<<"to check vlist: "<<this->values[_index].getLen()<<endl;
#endif
} }
else else
{ {

View File

@ -444,6 +444,9 @@ IVStorage::readBstr(Bstr* _bp, unsigned* _next)
{ {
unsigned addr = 0; unsigned addr = 0;
fread(&addr, sizeof(unsigned), 1, this->treefp); fread(&addr, sizeof(unsigned), 1, this->treefp);
#ifdef DEBUG_VLIST
cout<<"read a vlist in IVStorage - addr: "<<addr<<endl;
#endif
_bp->setLen(addr); _bp->setLen(addr);
_bp->setStr(NULL); _bp->setStr(NULL);
this->ReadAlign(_next); this->ReadAlign(_next);
@ -489,6 +492,9 @@ IVStorage::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock)
this->WriteAlign(_curnum, _SpecialBlock); this->WriteAlign(_curnum, _SpecialBlock);
//then this is the real block num //then this is the real block num
fwrite(&len, sizeof(unsigned), 1, treefp); fwrite(&len, sizeof(unsigned), 1, treefp);
#ifdef DEBUG_VLIST
cout<<"to write a vlist in IVStorage::writeBstr() - blocknum: "<<len<<endl;
#endif
this->WriteAlign(_curnum, _SpecialBlock); this->WriteAlign(_curnum, _SpecialBlock);
return true; return true;
} }

View File

@ -12,7 +12,8 @@ using namespace std;
//sets store_path as the root dir of this KVstore //sets store_path as the root dir of this KVstore
//initial all Tree pointers as NULL //initial all Tree pointers as NULL
KVstore::KVstore(string _store_path) { KVstore::KVstore(string _store_path)
{
this->store_path = _store_path; this->store_path = _store_path;
this->entity2id = NULL; this->entity2id = NULL;
@ -30,14 +31,17 @@ KVstore::KVstore(string _store_path) {
} }
//Release all the memory used in this KVstore before destruction //Release all the memory used in this KVstore before destruction
KVstore::~KVstore() { KVstore::~KVstore()
{
this->flush(); this->flush();
this->release(); this->release();
} }
//Flush all modified parts into the disk, which will not release any memory //Flush all modified parts into the disk, which will not release any memory
//Does nothing to null tree pointers or parts that has not been modified //Does nothing to null tree pointers or parts that has not been modified
void KVstore::flush() { void
KVstore::flush()
{
this->flush(this->entity2id); this->flush(this->entity2id);
this->flush(this->id2entity); this->flush(this->id2entity);
@ -52,7 +56,9 @@ void KVstore::flush() {
this->flush(this->objID2values); this->flush(this->objID2values);
} }
void KVstore::release() { void
KVstore::release()
{
delete this->entity2id; delete this->entity2id;
this->entity2id = NULL; this->entity2id = NULL;
delete this->id2entity; delete this->id2entity;
@ -76,7 +82,9 @@ void KVstore::release() {
this->objID2values = NULL; this->objID2values = NULL;
} }
void KVstore::open() { void
KVstore::open()
{
cout << "open KVstore" << endl; cout << "open KVstore" << endl;
this->open_entity2id(KVstore::READ_WRITE_MODE); this->open_entity2id(KVstore::READ_WRITE_MODE);
@ -93,102 +101,192 @@ void KVstore::open() {
this->open_preID2values(KVstore::READ_WRITE_MODE); this->open_preID2values(KVstore::READ_WRITE_MODE);
} }
int KVstore::getEntityDegree(int _entity_id) const { int
KVstore::getEntityDegree(int _entity_id) const
{
return this->getEntityInDegree(_entity_id) + this->getEntityOutDegree(_entity_id); return this->getEntityInDegree(_entity_id) + this->getEntityOutDegree(_entity_id);
} }
int KVstore::getEntityInDegree(int _entity_id) const { int
KVstore::getEntityInDegree(int _entity_id) const
{
//cout << "In getEntityInDegree " << _entity_id << endl; //cout << "In getEntityInDegree " << _entity_id << endl;
unsigned* _tmp = NULL; unsigned* _tmp = NULL;
unsigned _len = 0; unsigned _len = 0;
bool _get = this->getValueByKey(this->objID2values, _entity_id, (char*&)_tmp, _len); bool _get = this->getValueByKey(this->objID2values, _entity_id, (char*&)_tmp, _len);
if (!_get) {
return 0; int ret = 0;
if (_get)
{
ret = _tmp[0];
} }
return _tmp[0];
//if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory
if(VList::isLongList(_len))
{
delete[] _tmp;
//_tmp = NULL;
}
return ret;
} }
int KVstore::getEntityOutDegree(int _entity_id) const { int
KVstore::getEntityOutDegree(int _entity_id) const
{
//cout << "In getEntityOutDegree " << _entity_id << endl; //cout << "In getEntityOutDegree " << _entity_id << endl;
unsigned* _tmp = NULL; unsigned* _tmp = NULL;
unsigned _len = 0; unsigned _len = 0;
bool _get = this->getValueByKey(this->subID2values, _entity_id, (char*&)_tmp, _len); bool _get = this->getValueByKey(this->subID2values, _entity_id, (char*&)_tmp, _len);
if (!_get) {
return 0; int ret = 0;
if (_get)
{
ret = _tmp[0];
} }
return _tmp[0];
//if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory
if(VList::isLongList(_len))
{
delete[] _tmp;
//_tmp = NULL;
}
return ret;
} }
int KVstore::getLiteralDegree(int _literal_id) const { int
KVstore::getLiteralDegree(int _literal_id) const
{
//cout << "In getLiteralDegree " << _literal_id << endl; //cout << "In getLiteralDegree " << _literal_id << endl;
unsigned* _tmp = NULL; unsigned* _tmp = NULL;
unsigned _len = 0; unsigned _len = 0;
bool _get = this->getValueByKey(this->objID2values, _literal_id, (char*&)_tmp, _len); bool _get = this->getValueByKey(this->objID2values, _literal_id, (char*&)_tmp, _len);
if (!_get) {
return 0; int ret = 0;
if (_get)
{
ret = _tmp[0];
} }
return _tmp[0];
//if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory
if(VList::isLongList(_len))
{
delete[] _tmp;
//_tmp = NULL;
}
return ret;
} }
int KVstore::getPredicateDegree(int _predicate_id) const { int
KVstore::getPredicateDegree(int _predicate_id) const
{
//cout << "In getPredicate Degree " << _predicate_id << endl; //cout << "In getPredicate Degree " << _predicate_id << endl;
unsigned* _tmp = NULL; unsigned* _tmp = NULL;
unsigned _len = 0; unsigned _len = 0;
bool _get = this->getValueByKey(this->preID2values, _predicate_id, (char*&)_tmp, _len); bool _get = this->getValueByKey(this->preID2values, _predicate_id, (char*&)_tmp, _len);
if (!_get) {
return 0; int ret = 0;
if (_get)
{
ret = _tmp[0];
} }
return _tmp[0];
//if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory
if(VList::isLongList(_len))
{
delete[] _tmp;
//_tmp = NULL;
}
return ret;
} }
int KVstore::getSubjectPredicateDegree(int _subid, int _preid) const { int
KVstore::getSubjectPredicateDegree(int _subid, int _preid) const
{
//cout << "In getSubjectPredicateDegree " << _subid << ' ' << _preid << endl; //cout << "In getSubjectPredicateDegree " << _subid << ' ' << _preid << endl;
//TODO: use unsigned //TODO: use unsigned
int* _tmp = NULL; int* _tmp = NULL;
unsigned _len = 0; unsigned _len = 0;
bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len); bool _get = this->getValueByKey(this->subID2values, _subid, (char*&)_tmp, _len);
if (!_get) {
return 0; int ret = 0;
if(_get)
{
int _result = KVstore::binarySearch(_preid, _tmp + 3, _tmp[1], 2);
if (_result != -1)
{
int _offset = _tmp[4 + 2 * _result];
int _offset_next;
if (_result == _tmp[1] - 1)
{
_offset_next = 3 + 2 * _tmp[1] + _tmp[0];
}
else
{
_offset_next = _tmp[6 + 2 * _result];
}
ret = _offset_next - _offset;
}
} }
int _result = KVstore::binarySearch(_preid, _tmp + 3, _tmp[1], 2);
if (_result == -1) { //if this is a long list, then we should remove itself after copying
return 0; //otherwise, we should not free the list memory
if(VList::isLongList(_len))
{
delete[] _tmp;
//_tmp = NULL;
} }
int _offset = _tmp[4 + 2 * _result];
int _offset_next; return ret;
if (_result == _tmp[1] - 1) {
_offset_next = 3 + 2 * _tmp[1] + _tmp[0];
}
else {
_offset_next = _tmp[6 + 2 * _result];
}
return _offset_next - _offset;
} }
int KVstore::getObjectPredicateDegree(int _objid, int _preid) const { int
KVstore::getObjectPredicateDegree(int _objid, int _preid) const
{
//cout << "In getObjectPredicateDegree " << _objid << _preid << endl; //cout << "In getObjectPredicateDegree " << _objid << _preid << endl;
//TODO: use unsigned //TODO: use unsigned
int* _tmp = NULL; int* _tmp = NULL;
unsigned _len = 0; unsigned _len = 0;
bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len); bool _get = this->getValueByKey(this->objID2values, _objid, (char*&)_tmp, _len);
if (!_get) {
return 0; int ret = 0;
if (_get)
{
int _result = KVstore::binarySearch(_preid, _tmp + 2, _tmp[1], 2);
if (_result != -1)
{
int _offset = _tmp[3 + 2 * _result];
int _offset_next;
if (_result == _tmp[1] - 1)
{
_offset_next = 2 + 2 * _tmp[1] + _tmp[0];
}
else
{
_offset_next = _tmp[5 + 2 * _result];
}
ret = _offset_next - _offset;
}
} }
int _result = KVstore::binarySearch(_preid, _tmp + 2, _tmp[1], 2);
if (_result == -1) { //if this is a long list, then we should remove itself after copying
return 0; //otherwise, we should not free the list memory
if(VList::isLongList(_len))
{
delete[] _tmp;
//_tmp = NULL;
} }
int _offset = _tmp[3 + 2 * _result];
int _offset_next; return ret;
if (_result == _tmp[1] - 1) {
_offset_next = 2 + 2 * _tmp[1] + _tmp[0];
}
else {
_offset_next = _tmp[5 + 2 * _result];
}
return _offset_next - _offset;
} }
bool KVstore::updateTupleslist_insert(int _sub_id, int _pre_id, int _obj_id) { bool KVstore::updateTupleslist_insert(int _sub_id, int _pre_id, int _obj_id) {

View File

@ -419,7 +419,8 @@ SIStorage::readBstr(Bstr* _bp, unsigned* _next)
fread(&len, sizeof(unsigned), 1, this->treefp); fread(&len, sizeof(unsigned), 1, this->treefp);
this->ReadAlign(_next); this->ReadAlign(_next);
//this->request(len); //this->request(len);
char* s = (char*)malloc(len); //char* s = (char*)malloc(len);
char* s = new char[len];
_bp->setLen(len); _bp->setLen(len);
for (i = 0; i + 4 < len; i += 4) for (i = 0; i + 4 < len; i += 4)
{ {

View File

@ -89,6 +89,7 @@ http://blog.csdn.net/infoworld/article/details/8670951
最好在单机100G内存上支持起freebase(2.5B triples)这个规模的数据集就像jena和virtuoso一样慢不要紧 最好在单机100G内存上支持起freebase(2.5B triples)这个规模的数据集就像jena和virtuoso一样慢不要紧
type分支中query过程可能还有问题需要修改Query/里面的类型另外stringindex中也要修改分界线已经是20亿且非法不再是-1 type分支中query过程可能还有问题需要修改Query/里面的类型另外stringindex中也要修改分界线已经是20亿且非法不再是-1
remove signature.binary, 合并两个分支type value
vstree在build和query时可以用不同大小的缓存来加速build过程 vstree在build和query时可以用不同大小的缓存来加速build过程
--- ---
UBSTR: 类型bstr的length问题也需要解决 如果把类型直接改成long long空间开销一下子就上升了一倍 UBSTR: 类型bstr的length问题也需要解决 如果把类型直接改成long long空间开销一下子就上升了一倍
@ -469,6 +470,8 @@ build db error if triple num > 500M
# BETTER # BETTER
#### 添加数据访问层,数据范式和生成数据访问的源码
#### 在BasicQuery.cpp中的encodeBasicQuery函数中发现有pre_id==-1时就可以直接中止查询返回空值 #### 在BasicQuery.cpp中的encodeBasicQuery函数中发现有pre_id==-1时就可以直接中止查询返回空值
#### 将KVstore模块中在堆中寻找Node*的操作改为用treap实现(或多存指针避开搜索?) #### 将KVstore模块中在堆中寻找Node*的操作改为用treap实现(或多存指针避开搜索?)

View File

@ -28,7 +28,9 @@ Bstr::Bstr(const char* _str, unsigned _len, bool _nocopy)
//return; //return;
//} //}
this->str = (char*)malloc(_len); //NOTICE: we decide to use new/delete in global area
//this->str = (char*)malloc(_len);
this->str = new char[_len];
memcpy(this->str, _str, sizeof(char) * _len); memcpy(this->str, _str, sizeof(char) * _len);
//this->str[_len]='\0'; //this->str[_len]='\0';
} }
@ -121,12 +123,14 @@ Bstr::operator != (const Bstr& _bstr)
unsigned unsigned
Bstr::getLen() const Bstr::getLen() const
{ {
//WARN: we should not include too complicate logic here!!!!
//NOTICE: this is for VList //NOTICE: this is for VList
if(this->isBstrLongList()) //if(this->isBstrLongList())
//if(this->str == NULL) ////if(this->str == NULL)
{ //{
return 0; //return 0;
} //}
return length; return length;
} }
@ -158,15 +162,18 @@ Bstr::copy(const Bstr* _bp)
this->length = _bp->getLen(); this->length = _bp->getLen();
//DEBUG!!! //DEBUG!!!
//cerr<<"bstr length: "<<this->length<<endl; //cerr<<"bstr length: "<<this->length<<endl;
this->str = (char*)malloc(this->length);
memcpy(this->str, _bp->getStr(), this->length); //this->str = (char*)malloc(this->length);
this->str = new char[this->length];
memcpy(this->str, _bp->getStr(), sizeof(char) * this->length);
} }
void void
Bstr::copy(const char* _str, unsigned _len) Bstr::copy(const char* _str, unsigned _len)
{ {
this->length = _len; this->length = _len;
this->str = (char*)malloc(this->length); //this->str = (char*)malloc(this->length);
this->str = new char[this->length];
memcpy(this->str, _str, this->length); memcpy(this->str, _str, this->length);
} }
@ -180,7 +187,8 @@ Bstr::clear()
void void
Bstr::release() Bstr::release()
{ {
free(this->str); //ok to be null, do nothing //free(this->str); //ok to be null, do nothing
delete[] this->str;
clear(); clear();
} }

View File

@ -58,7 +58,8 @@ Stream::Stream(std::vector<int>& _keys, std::vector<bool>& _desc, unsigned _rown
this->record_size = new unsigned[this->colnum]; this->record_size = new unsigned[this->colnum];
for(unsigned i = 0; i < this->colnum; ++i) for(unsigned i = 0; i < this->colnum; ++i)
{ {
this->record[i].setStr((char*)malloc(Util::TRANSFER_SIZE)); char* tmptr = new char[Util::TRANSFER_SIZE];
this->record[i].setStr(tmptr);
this->record_size[i] = Util::TRANSFER_SIZE; this->record_size[i] = Util::TRANSFER_SIZE;
} }
@ -148,7 +149,8 @@ Stream::copyToRecord(const char* _str, unsigned _len, unsigned _idx)
if(length + 1 > this->record_size[_idx]) if(length + 1 > this->record_size[_idx])
{ {
this->record[_idx].release(); this->record[_idx].release();
this->record[_idx].setStr((char*)malloc((length + 1) * sizeof(char))); char* tmptr = new char[length+1];
this->record[_idx].setStr(tmptr);
this->record_size[_idx] = length + 1; //one more byte: convenient to add \0 this->record_size[_idx] = length + 1; //one more byte: convenient to add \0
} }
@ -187,7 +189,8 @@ Stream::outputCache()
{ {
unsigned len; unsigned len;
fread(&len, sizeof(unsigned), 1, this->tempfp); fread(&len, sizeof(unsigned), 1, this->tempfp);
char* p = (char*)malloc(len * sizeof(char)); //char* p = (char*)malloc(len * sizeof(char));
char* p = new char[len];
fread(p, sizeof(char), len, this->tempfp); fread(p, sizeof(char), len, this->tempfp);
bp[i].setLen(len); bp[i].setLen(len);
bp[i].setStr(p); bp[i].setStr(p);
@ -320,13 +323,16 @@ Stream::read()
//FILE* fp = (FILE*)(this->ans); //FILE* fp = (FILE*)(this->ans);
for(unsigned i = 0; i < this->colnum; ++i) for(unsigned i = 0; i < this->colnum; ++i)
{ {
//BETTER:alloca and reuse the space in Bstr? //BETTER:alloc and reuse the space in Bstr?
unsigned len; unsigned len;
fread(&len, sizeof(unsigned), 1, this->ansDisk); fread(&len, sizeof(unsigned), 1, this->ansDisk);
char* s = (char*)calloc(len + 1, sizeof(char)); //char* s = (char*)calloc(len + 1, sizeof(char));
char* s = new char[len+1];
fread(s, sizeof(char), len, this->ansDisk); fread(s, sizeof(char), len, this->ansDisk);
s[len] = '\0';
this->copyToRecord(s, len, i); this->copyToRecord(s, len, i);
free(s); //free(s);
delete[] s;
} }
} }
this->xpos++; this->xpos++;
@ -420,7 +426,9 @@ Stream::mergeSort()
#endif #endif
break; break;
} }
s = (char*)malloc(sizeof(char) * len);
//s = (char*)malloc(sizeof(char) * len);
s = new char[len];
fread(s, sizeof(char), len, tp); fread(s, sizeof(char), len, tp);
bp[i].setLen(len); bp[i].setLen(len);
bp[i].setStr(s); bp[i].setStr(s);

View File

@ -651,7 +651,14 @@ Util::result_id_str(vector<int*>& _v, int _var_num)
bool bool
Util::dir_exist(const string _dir) Util::dir_exist(const string _dir)
{ {
return (opendir(_dir.c_str()) != NULL); DIR* dirptr = opendir(_dir.c_str());
if(dirptr != NULL)
{
closedir(dirptr);
return true;
}
return false;
} }
bool bool

View File

@ -91,6 +91,7 @@ in the sparql query can point to the same node in data graph)
//#define DEBUG_VSTREE 1 //in Database //#define DEBUG_VSTREE 1 //in Database
//#define DEBUG_LRUCACHE 1 //#define DEBUG_LRUCACHE 1
//#define DEBUG_DATABASE 1 //in Database //#define DEBUG_DATABASE 1 //in Database
//#define DEBUG_VLIST 1
// //
// //
@ -124,6 +125,12 @@ in the sparql query can point to the same node in data graph)
#endif #endif
#endif #endif
#ifdef DEBUG_VLIST
#ifndef DEBUG
#define DEBUG
#endif
#endif
#ifndef DEBUG #ifndef DEBUG
//#define DEBUG //#define DEBUG
#endif #endif

View File

@ -180,6 +180,9 @@ VList::WriteAlign(unsigned* _curnum)
bool bool
VList::readValue(unsigned _block_num, char*& _str, unsigned& _len) VList::readValue(unsigned _block_num, char*& _str, unsigned& _len)
{ {
#ifdef DEBUG_VLIST
cout<<"to get value of block num: "<<_block_num<<endl;
#endif
fseek(valfp, Address(_block_num), SEEK_SET); fseek(valfp, Address(_block_num), SEEK_SET);
unsigned next; unsigned next;
fread(&next, sizeof(unsigned), 1, valfp); fread(&next, sizeof(unsigned), 1, valfp);
@ -193,8 +196,14 @@ VList::writeValue(const char* _str, unsigned _len)
{ {
unsigned blocknum = this->AllocBlock(); unsigned blocknum = this->AllocBlock();
unsigned curnum = blocknum; unsigned curnum = blocknum;
//NOTICE: here we must skip the next position first
fseek(valfp, Address(curnum) + 4, SEEK_SET);
this->writeBstr(_str, _len, &curnum); this->writeBstr(_str, _len, &curnum);
#ifdef DEBUG_VLIST
cout<<"to write value - block num: "<<blocknum<<endl;
#endif
return blocknum; return blocknum;
} }
@ -222,9 +231,13 @@ VList::readBstr(char*& _str, unsigned& _len, unsigned* _next)
//long address; //long address;
unsigned len, i, j; unsigned len, i, j;
fread(&len, sizeof(unsigned), 1, this->valfp); fread(&len, sizeof(unsigned), 1, this->valfp);
#ifdef DEBUG_VLIST
cout<<"the length of value: "<<len<<endl;
#endif
this->ReadAlign(_next); this->ReadAlign(_next);
char* s = (char*)malloc(len); //char* s = (char*)malloc(len);
char* s = new char[len];
_len = len; _len = len;
for (i = 0; i + 4 < len; i += 4) for (i = 0; i + 4 < len; i += 4)
@ -257,6 +270,7 @@ VList::writeBstr(const char* _str, unsigned _len, unsigned* _curnum)
unsigned i, j, len = _len; unsigned i, j, len = _len;
fwrite(&len, sizeof(unsigned), 1, valfp); fwrite(&len, sizeof(unsigned), 1, valfp);
this->WriteAlign(_curnum); this->WriteAlign(_curnum);
//cout<<"to write bstr, length: "<<len<<endl;
//BETTER: compute this need how many blocks first, then write a block a time //BETTER: compute this need how many blocks first, then write a block a time
@ -289,7 +303,39 @@ VList::writeBstr(const char* _str, unsigned _len, unsigned* _curnum)
VList::~VList() VList::~VList()
{ {
BlockInfo* bp = this->freelist; //write the info back
fseek(this->valfp, 0, SEEK_SET);
fwrite(&cur_block_num, sizeof(unsigned), 1, valfp);//write current blocks num
fseek(valfp, BLOCK_SIZE, SEEK_SET);
int i, j = cur_block_num / 8; //(SuperNum-1)*BLOCK_SIZE;
for (i = 0; i < j; ++i)
{
//reset to 1 first
fputc(0xff, valfp);
}
char c;
BlockInfo* bp = this->freelist->next;
while (bp != NULL)
{
//if not-use then set 0, aligned to byte!
#ifdef DEBUG_KVSTORE
if (bp->num > cur_block_num)
{
printf("blocks num exceed, cur_block_num: %u\n", cur_block_num);
exit(1);
}
#endif
j = bp->num - 1;
i = j / 8;
j = 7 - j % 8;
fseek(valfp, BLOCK_SIZE + i, SEEK_SET);
c = fgetc(valfp);
fseek(valfp, -1, SEEK_CUR);
fputc(c & ~(1 << j), valfp);
bp = bp->next;
}
bp = this->freelist;
BlockInfo* next; BlockInfo* next;
while (bp != NULL) while (bp != NULL)
{ {

View File

@ -12,9 +12,6 @@
#include "Util.h" #include "Util.h"
#include "Bstr.h" #include "Bstr.h"
//TODO: all use new/delete for Bstr, KVstore and trees, including Stream
//then give a full test, including valgrind
//NOTICE: not keep long list in memory, read each time //NOTICE: not keep long list in memory, read each time
//but when can you free the long list(kvstore should release it after parsing) //but when can you free the long list(kvstore should release it after parsing)
// //
@ -31,15 +28,19 @@
//file1 is tree file, the long list is represented as: 0 real-address //file1 is tree file, the long list is represented as: 0 real-address
//NOTICE: long list is not kept in mmeory for cache, it is read/update each time on need! //NOTICE: long list is not kept in mmeory for cache, it is read/update each time on need!
//TODO: use fread/fwrite here instead of fgetc/fputc
//including other trees
class VList class VList
{ {
public: public:
//NOTICE:the border is 10^6, but the block is larger, 1M //NOTICE:the border is 10^6, but the block is larger, 1M
static const unsigned LENGTH_BORDER = 1000000; //static const unsigned LENGTH_BORDER = 1000000;
static const unsigned LENGTH_BORDER = 1000;
static const unsigned BLOCK_SIZE = 1 << 20; //fixed size of disk-block static const unsigned BLOCK_SIZE = 1 << 20; //fixed size of disk-block
static const unsigned MAX_BLOCK_NUM = 1 << 23; //max block-num static const unsigned MAX_BLOCK_NUM = 1 << 23; //max block-num
//below two constants: must can be exactly divided by 8 //below two constants: must can be exactly divided by 8
static const unsigned SET_BLOCK_NUM = 1 << 2; //initial blocks num static const unsigned SET_BLOCK_NUM = 1 << 3; //initial blocks num
static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc
static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1; static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1;

5
data/bbug0.sql Normal file
View File

@ -0,0 +1,5 @@
INSERT DATA
{
<http://www.founder/102> <http://www.founder.20.link:52> <http://www.founder/106> .
<http://www.founder/102> <http://www.founder> <http://www.founder/73> .
}

1
data/bbug0d.sql Normal file
View File

@ -0,0 +1 @@
DELETE DATA { <http://www.founder/102> <http://www.founder.20.link:52> <http://www.founder/106> . }

1
data/bbug1.sql Normal file
View File

@ -0,0 +1 @@
select ?subject ?predict ?object WHERE { ?subject <http://www.founder.20.link:52> ?object; ?predict ?object . }

1
data/bbug2.sql Normal file
View File

@ -0,0 +1 @@
DELETE WHERE { <http://www.founder/101> ?predict ?object . }

1
data/bbug3.sql Normal file
View File

@ -0,0 +1 @@
select ?predict where {<http://www.founder/102> ?predict <http://www.founder/73> .}

5
data/bbug4.sql Normal file
View File

@ -0,0 +1,5 @@
select ?subject ?predict ?object where
{
<http://www.founder/102> <http://www.founder.20.link:52> ?object.
?subject ?predict ?object.
}

1
data/bbug5.sql Normal file
View File

@ -0,0 +1 @@
select ?subject ?predict ?object where {?subject <http://www.founder.20.link:52> <http://www.founder/106>; ?predict ?object . }

1
data/bbug6.sql Normal file
View File

@ -0,0 +1 @@
DELETE WHERE { ?subject <http://www.founder.20.link:52> ?objcet. }