refactor: adjust vlist and merge signature;

add cache for VList;
use binding in Signature;

by zengli and qinzongyue
This commit is contained in:
bookug 2017-05-19 23:05:38 +08:00
parent c11088ee3e
commit 9903c3d5cb
11 changed files with 239 additions and 59 deletions

View File

@ -989,19 +989,20 @@ Database::calculateEntityBitSet(TYPE_ENTITY_LITERAL_ID _entity_id, EntityBitSet
//when as subject //when as subject
unsigned* _polist = NULL; unsigned* _polist = NULL;
(this->kvstore)->getpreIDobjIDlistBysubID(_entity_id, _polist, _list_len); (this->kvstore)->getpreIDobjIDlistBysubID(_entity_id, _polist, _list_len);
Triple _triple; //Triple _triple;
_triple.subject = (this->kvstore)->getEntityByID(_entity_id); //_triple.subject = (this->kvstore)->getEntityByID(_entity_id);
for (unsigned i = 0; i < _list_len; i += 2) for (unsigned i = 0; i < _list_len; i += 2)
{ {
TYPE_PREDICATE_ID _pre_id = _polist[i]; TYPE_PREDICATE_ID _pre_id = _polist[i];
TYPE_ENTITY_LITERAL_ID _obj_id = _polist[i + 1]; TYPE_ENTITY_LITERAL_ID _obj_id = _polist[i + 1];
_triple.object = (this->kvstore)->getEntityByID(_obj_id); //_triple.object = (this->kvstore)->getEntityByID(_obj_id);
if (_triple.object == "") //if (_triple.object == "")
{ //{
_triple.object = (this->kvstore)->getLiteralByID(_obj_id); //_triple.object = (this->kvstore)->getLiteralByID(_obj_id);
} //}
_triple.predicate = (this->kvstore)->getPredicateByID(_pre_id); //_triple.predicate = (this->kvstore)->getPredicateByID(_pre_id);
this->encodeTriple2SubEntityBitSet(_bitset, &_triple); //this->encodeTriple2SubEntityBitSet(_bitset, &_triple);
this->encodeTriple2SubEntityBitSet(_bitset, _pre_id, _obj_id);
} }
delete[] _polist; delete[] _polist;
@ -1009,14 +1010,15 @@ Database::calculateEntityBitSet(TYPE_ENTITY_LITERAL_ID _entity_id, EntityBitSet
unsigned* _pslist = NULL; unsigned* _pslist = NULL;
_list_len = 0; _list_len = 0;
(this->kvstore)->getpreIDsubIDlistByobjID(_entity_id, _pslist, _list_len); (this->kvstore)->getpreIDsubIDlistByobjID(_entity_id, _pslist, _list_len);
_triple.object = (this->kvstore)->getEntityByID(_entity_id); //_triple.object = (this->kvstore)->getEntityByID(_entity_id);
for (unsigned i = 0; i < _list_len; i += 2) for (unsigned i = 0; i < _list_len; i += 2)
{ {
TYPE_PREDICATE_ID _pre_id = _pslist[i]; TYPE_PREDICATE_ID _pre_id = _pslist[i];
TYPE_ENTITY_LITERAL_ID _sub_id = _pslist[i + 1]; TYPE_ENTITY_LITERAL_ID _sub_id = _pslist[i + 1];
_triple.subject = (this->kvstore)->getEntityByID(_sub_id); //_triple.subject = (this->kvstore)->getEntityByID(_sub_id);
_triple.predicate = (this->kvstore)->getPredicateByID(_pre_id); //_triple.predicate = (this->kvstore)->getPredicateByID(_pre_id);
this->encodeTriple2ObjEntityBitSet(_bitset, &_triple); //this->encodeTriple2ObjEntityBitSet(_bitset, &_triple);
this->encodeTriple2ObjEntityBitSet(_bitset, _pre_id, _sub_id);
} }
delete[] _pslist; delete[] _pslist;
@ -1054,6 +1056,14 @@ Database::encodeTriple2SubEntityBitSet(EntityBitSet& _bitset, const Triple* _p_t
return true; return true;
} }
bool
Database::encodeTriple2SubEntityBitSet(EntityBitSet& _bitset, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id)
{
Signature::encodeEdge2Entity(_bitset, _pre_id, _obj_id, Util::EDGE_OUT);
return true;
}
//encode Triple into object SigEntry //encode Triple into object SigEntry
bool bool
Database::encodeTriple2ObjEntityBitSet(EntityBitSet& _bitset, const Triple* _p_triple) Database::encodeTriple2ObjEntityBitSet(EntityBitSet& _bitset, const Triple* _p_triple)
@ -1082,6 +1092,14 @@ Database::encodeTriple2ObjEntityBitSet(EntityBitSet& _bitset, const Triple* _p_t
return true; return true;
} }
bool
Database::encodeTriple2ObjEntityBitSet(EntityBitSet& _bitset, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _sub_id)
{
Signature::encodeEdge2Entity(_bitset, _pre_id, _sub_id, Util::EDGE_IN);
return true;
}
//check whether the relative 3-tuples exist usually, through sp2olist //check whether the relative 3-tuples exist usually, through sp2olist
bool bool
Database::exist_triple(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id) Database::exist_triple(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id)
@ -1961,7 +1979,8 @@ Database::insertTriple(const TripleWithObjType& _triple, vector<unsigned>* _vert
EntityBitSet _sub_entity_bitset; EntityBitSet _sub_entity_bitset;
_sub_entity_bitset.reset(); _sub_entity_bitset.reset();
this->encodeTriple2SubEntityBitSet(_sub_entity_bitset, &_triple); //this->encodeTriple2SubEntityBitSet(_sub_entity_bitset, &_triple);
this->encodeTriple2SubEntityBitSet(_sub_entity_bitset, _pre_id, _obj_id);
//if new entity then insert it, else update it. //if new entity then insert it, else update it.
if (_is_new_sub) if (_is_new_sub)
@ -1982,7 +2001,8 @@ Database::insertTriple(const TripleWithObjType& _triple, vector<unsigned>* _vert
EntityBitSet _obj_entity_bitset; EntityBitSet _obj_entity_bitset;
_obj_entity_bitset.reset(); _obj_entity_bitset.reset();
this->encodeTriple2ObjEntityBitSet(_obj_entity_bitset, &_triple); //this->encodeTriple2ObjEntityBitSet(_obj_entity_bitset, &_triple);
this->encodeTriple2ObjEntityBitSet(_obj_entity_bitset, _pre_id, _sub_id);
if (_is_new_obj) if (_is_new_obj)
{ {
@ -2016,14 +2036,22 @@ Database::removeTriple(const TripleWithObjType& _triple, vector<unsigned>* _vert
TYPE_ENTITY_LITERAL_ID _sub_id = (this->kvstore)->getIDByEntity(_triple.subject); TYPE_ENTITY_LITERAL_ID _sub_id = (this->kvstore)->getIDByEntity(_triple.subject);
TYPE_PREDICATE_ID _pre_id = (this->kvstore)->getIDByPredicate(_triple.predicate); TYPE_PREDICATE_ID _pre_id = (this->kvstore)->getIDByPredicate(_triple.predicate);
TYPE_ENTITY_LITERAL_ID _obj_id = (this->kvstore)->getIDByEntity(_triple.object); TYPE_ENTITY_LITERAL_ID _obj_id = INVALID_ENTITY_LITERAL_ID;
if(_triple.isObjEntity())
//if (_obj_id == -1) {
if (_obj_id == INVALID_ENTITY_LITERAL_ID) _obj_id = (this->kvstore)->getIDByEntity(_triple.object);
}
else
{ {
_obj_id = (this->kvstore)->getIDByLiteral(_triple.object); _obj_id = (this->kvstore)->getIDByLiteral(_triple.object);
} }
//if (_obj_id == -1)
//if (_obj_id == INVALID_ENTITY_LITERAL_ID)
//{
//_obj_id = (this->kvstore)->getIDByLiteral(_triple.object);
//}
//if (_sub_id == -1 || _pre_id == -1 || _obj_id == -1) //if (_sub_id == -1 || _pre_id == -1 || _obj_id == -1)
if (_sub_id == INVALID_ENTITY_LITERAL_ID || _pre_id == INVALID_PREDICATE_ID || _obj_id == INVALID_ENTITY_LITERAL_ID) if (_sub_id == INVALID_ENTITY_LITERAL_ID || _pre_id == INVALID_PREDICATE_ID || _obj_id == INVALID_ENTITY_LITERAL_ID)
{ {

View File

@ -174,8 +174,11 @@ private:
//encode Triple into Subject EntityBitSet //encode Triple into Subject EntityBitSet
bool encodeTriple2SubEntityBitSet(EntityBitSet& _bitset, const Triple* _p_triple); bool encodeTriple2SubEntityBitSet(EntityBitSet& _bitset, const Triple* _p_triple);
//NOTICE: the encodeTriple with Triple* is invalid now(not enocde the linkage of neighbor-predicate)
bool encodeTriple2SubEntityBitSet(EntityBitSet& _bitset, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id);
//encode Triple into Object EntityBitSet //encode Triple into Object EntityBitSet
bool encodeTriple2ObjEntityBitSet(EntityBitSet& _bitset, const Triple* _p_triple); bool encodeTriple2ObjEntityBitSet(EntityBitSet& _bitset, const Triple* _p_triple);
bool encodeTriple2ObjEntityBitSet(EntityBitSet& _bitset, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _sub_id);
bool calculateEntityBitSet(TYPE_ENTITY_LITERAL_ID _entity_id, EntityBitSet & _bitset); bool calculateEntityBitSet(TYPE_ENTITY_LITERAL_ID _entity_id, EntityBitSet & _bitset);

View File

@ -123,7 +123,7 @@ KVstore::getEntityInDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const
//if this is a long list, then we should remove itself after copying //if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory //otherwise, we should not free the list memory
if(VList::isLongList(_len)) if(VList::listNeedDelete(_len))
{ {
delete[] _tmp; delete[] _tmp;
//_tmp = NULL; //_tmp = NULL;
@ -148,7 +148,7 @@ KVstore::getEntityOutDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const
//if this is a long list, then we should remove itself after copying //if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory //otherwise, we should not free the list memory
if(VList::isLongList(_len)) if(VList::listNeedDelete(_len))
{ {
delete[] _tmp; delete[] _tmp;
//_tmp = NULL; //_tmp = NULL;
@ -173,7 +173,7 @@ KVstore::getLiteralDegree(TYPE_ENTITY_LITERAL_ID _literal_id) const
//if this is a long list, then we should remove itself after copying //if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory //otherwise, we should not free the list memory
if(VList::isLongList(_len)) if(VList::listNeedDelete(_len))
{ {
delete[] _tmp; delete[] _tmp;
//_tmp = NULL; //_tmp = NULL;
@ -198,7 +198,7 @@ KVstore::getPredicateDegree(TYPE_PREDICATE_ID _predicate_id) const
//if this is a long list, then we should remove itself after copying //if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory //otherwise, we should not free the list memory
if(VList::isLongList(_len)) if(VList::listNeedDelete(_len))
{ {
delete[] _tmp; delete[] _tmp;
//_tmp = NULL; //_tmp = NULL;
@ -238,7 +238,7 @@ KVstore::getSubjectPredicateDegree(TYPE_ENTITY_LITERAL_ID _subid, TYPE_PREDICATE
//if this is a long list, then we should remove itself after copying //if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory //otherwise, we should not free the list memory
if(VList::isLongList(_len)) if(VList::listNeedDelete(_len))
{ {
delete[] _tmp; delete[] _tmp;
//_tmp = NULL; //_tmp = NULL;
@ -278,7 +278,7 @@ KVstore::getObjectPredicateDegree(TYPE_ENTITY_LITERAL_ID _objid, TYPE_PREDICATE_
//if this is a long list, then we should remove itself after copying //if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory //otherwise, we should not free the list memory
if(VList::isLongList(_len)) if(VList::listNeedDelete(_len))
{ {
delete[] _tmp; delete[] _tmp;
//_tmp = NULL; //_tmp = NULL;
@ -1570,7 +1570,7 @@ KVstore::getpreIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _preidlis
//if this is a long list, then we should remove itself after copying //if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory //otherwise, we should not free the list memory
if(VList::isLongList(_len)) if(VList::listNeedDelete(_len))
{ {
delete[] _tmp; delete[] _tmp;
//_tmp = NULL; //_tmp = NULL;
@ -1608,7 +1608,7 @@ KVstore::getobjIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _objidlis
//if this is a long list, then we should remove itself after copying //if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory //otherwise, we should not free the list memory
if(VList::isLongList(_len)) if(VList::listNeedDelete(_len))
{ {
delete[] _tmp; delete[] _tmp;
//_tmp = NULL; //_tmp = NULL;
@ -1658,7 +1658,7 @@ KVstore::getobjIDlistBysubIDpreID(TYPE_ENTITY_LITERAL_ID _subid, TYPE_PREDICATE_
//if this is a long list, then we should remove itself after copying //if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory //otherwise, we should not free the list memory
if(VList::isLongList(_len)) if(VList::listNeedDelete(_len))
{ {
delete[] _tmp; delete[] _tmp;
//_tmp = NULL; //_tmp = NULL;
@ -1706,7 +1706,7 @@ KVstore::getpreIDobjIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _pre
//if this is a long list, then we should remove itself after copying //if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory //otherwise, we should not free the list memory
if(VList::isLongList(_len)) if(VList::listNeedDelete(_len))
{ {
delete[] _tmp; delete[] _tmp;
//_tmp = NULL; //_tmp = NULL;
@ -1846,7 +1846,7 @@ KVstore::getpreIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _preidlis
//if this is a long list, then we should remove itself after copying //if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory //otherwise, we should not free the list memory
if(VList::isLongList(_len)) if(VList::listNeedDelete(_len))
{ {
delete[] _tmp; delete[] _tmp;
//_tmp = NULL; //_tmp = NULL;
@ -1878,7 +1878,7 @@ KVstore::getsubIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _subidlis
//if this is a long list, then we should remove itself after copying //if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory //otherwise, we should not free the list memory
if(VList::isLongList(_len)) if(VList::listNeedDelete(_len))
{ {
delete[] _tmp; delete[] _tmp;
//_tmp = NULL; //_tmp = NULL;
@ -1922,7 +1922,7 @@ KVstore::getsubIDlistByobjIDpreID(TYPE_ENTITY_LITERAL_ID _objid, TYPE_PREDICATE_
//if this is a long list, then we should remove itself after copying //if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory //otherwise, we should not free the list memory
if(VList::isLongList(_len)) if(VList::listNeedDelete(_len))
{ {
delete[] _tmp; delete[] _tmp;
//_tmp = NULL; //_tmp = NULL;
@ -1963,7 +1963,7 @@ KVstore::getpreIDsubIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _pre
//if this is a long list, then we should remove itself after copying //if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory //otherwise, we should not free the list memory
if(VList::isLongList(_len)) if(VList::listNeedDelete(_len))
{ {
delete[] _tmp; delete[] _tmp;
//_tmp = NULL; //_tmp = NULL;
@ -2083,7 +2083,7 @@ KVstore::getsubIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _subidlist, un
//if this is a long list, then we should remove itself after copying //if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory //otherwise, we should not free the list memory
if(VList::isLongList(_len)) if(VList::listNeedDelete(_len))
{ {
delete[] _tmp; delete[] _tmp;
//_tmp = NULL; //_tmp = NULL;
@ -2114,7 +2114,7 @@ KVstore::getobjIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _objidlist, un
//if this is a long list, then we should remove itself after copying //if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory //otherwise, we should not free the list memory
if(VList::isLongList(_len)) if(VList::listNeedDelete(_len))
{ {
delete[] _tmp; delete[] _tmp;
//_tmp = NULL; //_tmp = NULL;
@ -2154,7 +2154,7 @@ KVstore::getsubIDobjIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _subid_ob
//if this is a long list, then we should remove itself after copying //if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory //otherwise, we should not free the list memory
if(VList::isLongList(_len)) if(VList::listNeedDelete(_len))
{ {
#ifdef DEBUG_KVSTORE #ifdef DEBUG_KVSTORE
cout<<"this is a vlist"<<endl; cout<<"this is a vlist"<<endl;
@ -2228,7 +2228,7 @@ KVstore::getpreIDlistBysubIDobjID(TYPE_ENTITY_LITERAL_ID _subid, TYPE_ENTITY_LIT
//if this is a long list, then we should remove itself after copying //if this is a long list, then we should remove itself after copying
//otherwise, we should not free the list memory //otherwise, we should not free the list memory
if(VList::isLongList(_len)) if(VList::listNeedDelete(_len))
{ {
delete[] _tmp; delete[] _tmp;
//_tmp = NULL; //_tmp = NULL;

View File

@ -8,6 +8,8 @@
将IRC聊天放到gstore文档上freenode #gStore 将IRC聊天放到gstore文档上freenode #gStore
storage中大量使用long类型文件大小也可能达到64G最好在64位机器上运行。 storage中大量使用long类型文件大小也可能达到64G最好在64位机器上运行。
在将unsigned转换为long long或者unsigned long long的时候要注意补全问题long类型不知是否类似情况
也许可以考虑Bstr中不存length只存str(内存中还有对齐的开销)但对于特别长的串来说可能strlen过于耗时
# 推广 # 推广

View File

@ -350,11 +350,18 @@ BasicQuery::updateSubSig(int _sub_var_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY
//if(_obj_id >= 0) //if(_obj_id >= 0)
{ {
//Signature::encodeStr2Entity(_obj.c_str(), this->var_sig[_sub_id]); //Signature::encodeStr2Entity(_obj.c_str(), this->var_sig[_sub_id]);
//Signature::encodeStr2Entity(this->var_sig[_sub_var_id], _obj_id, Util::EDGE_OUT);
if(_pre_id >= 0)
{
Signature::encodeEdge2Entity(this->var_sig[_sub_var_id], _pre_id, _obj_id, Util::EDGE_OUT);
}
else
{
Signature::encodeStr2Entity(this->var_sig[_sub_var_id], _obj_id, Util::EDGE_OUT); Signature::encodeStr2Entity(this->var_sig[_sub_var_id], _obj_id, Util::EDGE_OUT);
} }
}
//DEBUG: if type of pre id is changed to usnigned, this will cause error //DEBUG: if type of pre id is changed to usnigned, this will cause error
if(_pre_id >= 0) else if(_pre_id >= 0)
{ {
Signature::encodePredicate2Entity(this->var_sig[_sub_var_id], _pre_id, Util::EDGE_OUT); Signature::encodePredicate2Entity(this->var_sig[_sub_var_id], _pre_id, Util::EDGE_OUT);
} }
@ -380,10 +387,17 @@ BasicQuery::updateObjSig(int _obj_var_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY
//if(_sub_id >= 0) //if(_sub_id >= 0)
{ {
//cout << "str2entity" << endl; //cout << "str2entity" << endl;
//Signature::encodeStr2Entity(this->var_sig[_obj_var_id], _sub_id, Util::EDGE_IN);
if(_pre_id >= 0)
{
Signature::encodeEdge2Entity(this->var_sig[_obj_var_id], _pre_id, _sub_id, Util::EDGE_IN);
}
else
{
Signature::encodeStr2Entity(this->var_sig[_obj_var_id], _sub_id, Util::EDGE_IN); Signature::encodeStr2Entity(this->var_sig[_obj_var_id], _sub_id, Util::EDGE_IN);
} }
}
if(_pre_id >= 0) else if(_pre_id >= 0)
{ {
//cout << "pre2entity" << endl; //cout << "pre2entity" << endl;
Signature::encodePredicate2Entity(this->var_sig[_obj_var_id], _pre_id, Util::EDGE_IN); Signature::encodePredicate2Entity(this->var_sig[_obj_var_id], _pre_id, Util::EDGE_IN);

View File

@ -44,6 +44,29 @@ Signature::encodeEdge2Entity(EntityBitSet& _entity_bs, TYPE_PREDICATE_ID _pre_id
#endif #endif
Signature::encodeStr2Entity(_entity_bs, _neighbor_id, _type); Signature::encodeStr2Entity(_entity_bs, _neighbor_id, _type);
// for(int i = 800; i < _entity_bs.size(); i++){
// _entity_bs.set(i);
// }
//encode predicate and entity together
int x = _pre_id % Signature::STR_AND_EDGE_INTERVAL_BASE;
int y = _neighbor_id % Signature::STR_AND_EDGE_INTERVAL_BASE;
int seed = x + (x + y + 1) * (x + y) / 2;
seed %= Signature::STR_AND_EDGE_INTERVAL_BASE;
seed = seed + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_LENGTH;
if(Util::is_literal_ele(_neighbor_id))
{
seed += (Signature::STR_AND_EDGE_INTERVAL_BASE * 2);
}
else //entity part
{
//entity can be in edge or out edge
if (_type == Util::EDGE_OUT)
{
seed += Signature::STR_AND_EDGE_INTERVAL_BASE;
}
}
_entity_bs.set(seed);
} }
void void
@ -87,9 +110,13 @@ Signature::encodePredicate2Entity(EntityBitSet& _entity_bs, TYPE_PREDICATE_ID _p
//unsigned pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num; //unsigned pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
//_entity_bs.set(pos); //_entity_bs.set(pos);
//} //}
unsigned seed = id * 5003 % 49957; //unsigned seed = id * 5003 % 49957;
unsigned pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num; //unsigned pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
_entity_bs.set(pos); //_entity_bs.set(pos);
long long seed = id * 5003 % 49957;
seed = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
_entity_bs.set(seed);
} }
} }
@ -132,9 +159,11 @@ Signature::encodeStr2Entity(EntityBitSet& _entity_bs, TYPE_ENTITY_LITERAL_ID _ne
//NOTICE: we assume the parameter is always valid(invalid args should not be passed here) //NOTICE: we assume the parameter is always valid(invalid args should not be passed here)
long long id = _neighbor_id; long long id = _neighbor_id;
//NOTICE: in * maybe the unsigned will overflow //NOTICE: in * maybe the unsigned will overflow
long long seed = id * 5003 % 49957; //long long seed = id * 5003 % 49957;
seed = seed % Signature::STR_SIG_INTERVAL_BASE; //seed = seed % Signature::STR_SIG_INTERVAL_BASE;
seed = seed + (id % Signature::STR_SIG_INTERVAL_NUM) * Signature::STR_SIG_INTERVAL_BASE; //seed = seed + (id % Signature::STR_SIG_INTERVAL_NUM) * Signature::STR_SIG_INTERVAL_BASE;
int seed = _neighbor_id % Signature::STR_SIG_LITERAL;
if(Util::is_literal_ele(_neighbor_id)) if(Util::is_literal_ele(_neighbor_id))
{ {

View File

@ -24,10 +24,11 @@ public:
//static HashFunction hash[HashNum]; //static HashFunction hash[HashNum];
//must make sure: ENTITY_SIG_LENGTH = EDGE_SIG_LENGTH + STR_SIG_LENGTH //must make sure: ENTITY_SIG_LENGTH = EDGE_SIG_LENGTH + STR_SIG_LENGTH
static const unsigned STR_SIG_INTERVAL_NUM = 20; //static const unsigned STR_SIG_INTERVAL_NUM = 20;
//static const unsigned STR_SIG_INTERVAL_NUM = 16; //static const unsigned STR_SIG_INTERVAL_NUM = 16;
static const unsigned STR_SIG_INTERVAL_BASE = 10; //static const unsigned STR_SIG_INTERVAL_BASE = 10;
static const unsigned STR_SIG_LITERAL = STR_SIG_INTERVAL_NUM * STR_SIG_INTERVAL_BASE; //static const unsigned STR_SIG_LITERAL = STR_SIG_INTERVAL_NUM * STR_SIG_INTERVAL_BASE;
static const int STR_SIG_LITERAL = 200;
static const unsigned STR_SIG_ENTITY = STR_SIG_LITERAL * 2; static const unsigned STR_SIG_ENTITY = STR_SIG_LITERAL * 2;
//here we divide as entity neighbors and literal neighbors: ENTITY(in and out), LITERAL(only for out edges) //here we divide as entity neighbors and literal neighbors: ENTITY(in and out), LITERAL(only for out edges)
static const unsigned STR_SIG_LENGTH = STR_SIG_ENTITY + STR_SIG_LITERAL; //600 static const unsigned STR_SIG_LENGTH = STR_SIG_ENTITY + STR_SIG_LITERAL; //600
@ -46,14 +47,20 @@ public:
static const unsigned EDGE_SIG_LENGTH = EDGE_SIG_INTERVAL_NUM * EDGE_SIG_INTERVAL_BASE; //200 static const unsigned EDGE_SIG_LENGTH = EDGE_SIG_INTERVAL_NUM * EDGE_SIG_INTERVAL_BASE; //200
//static const unsigned EDGE_SIG_LENGTH2 = EDGE_SIG_INTERVAL_NUM_HALF * EDGE_SIG_INTERVAL_BASE; //static const unsigned EDGE_SIG_LENGTH2 = EDGE_SIG_INTERVAL_NUM_HALF * EDGE_SIG_INTERVAL_BASE;
static const unsigned ENTITY_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH; //1000 static const unsigned STR_AND_EDGE_INTERVAL_BASE = 48;
static const unsigned STR_AND_EDGE_INTERVAL_NUM = 3;
static const unsigned STR_AND_EDGE_SIG_LENGTH = STR_AND_EDGE_INTERVAL_BASE * STR_AND_EDGE_INTERVAL_NUM;//144
static const unsigned SEPARATE_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH;
static const unsigned ENTITY_SIG_LENGTH = SEPARATE_SIG_LENGTH + STR_AND_EDGE_SIG_LENGTH; //944
//static const unsigned ENTITY_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH; //1000
//static const unsigned ENTITY_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH + NEIGHBOR_SIG_LENGTH; //static const unsigned ENTITY_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH + NEIGHBOR_SIG_LENGTH;
//QUERY: the num of bitset must be based on 16, i.e. unsigned short? 1000 is not allowed //QUERY: the num of bitset must be based on 16, i.e. unsigned short? 1000 is not allowed
//but 800, 500 is ok //but 800, 500 is ok
//typedef std::bitset<Signature::EDGE_SIG_LENGTH2> EdgeBitSet; //typedef std::bitset<Signature::EDGE_SIG_LENGTH2> EdgeBitSet;
typedef std::bitset<Signature::ENTITY_SIG_LENGTH> EntityBitSet; typedef std::bitset<ENTITY_SIG_LENGTH> EntityBitSet;
static std::string BitSet2str(const EntityBitSet& _bitset); static std::string BitSet2str(const EntityBitSet& _bitset);

View File

@ -458,6 +458,32 @@ Util::is_entity_ele(TYPE_ENTITY_LITERAL_ID id)
return id < Util::LITERAL_FIRST_ID; return id < Util::LITERAL_FIRST_ID;
} }
bool
Util::isEntity(const std::string& _str)
{
if(_str[0] == '<')
{
return true;
}
else
{
return false;
}
}
bool
Util::isLiteral(const std::string& _str)
{
if(_str[0] == '"')
{
return true;
}
else
{
return false;
}
}
//NOTICE: require that the list is ordered //NOTICE: require that the list is ordered
unsigned unsigned

View File

@ -270,6 +270,8 @@ public:
static bool is_literal_ele(TYPE_ENTITY_LITERAL_ID id); static bool is_literal_ele(TYPE_ENTITY_LITERAL_ID id);
static bool is_entity_ele(TYPE_ENTITY_LITERAL_ID id); static bool is_entity_ele(TYPE_ENTITY_LITERAL_ID id);
static bool isEntity(const std::string& _str);
static bool isLiteral(const std::string& _str);
static unsigned removeDuplicate(unsigned*, unsigned); static unsigned removeDuplicate(unsigned*, unsigned);

View File

@ -16,8 +16,15 @@ VList::isLongList(unsigned _len)
return _len > VList::LENGTH_BORDER; return _len > VList::LENGTH_BORDER;
} }
bool
VList::listNeedDelete(unsigned _len)
{
return _len > VList::CACHE_LIMIT;
}
VList::VList() VList::VList()
{ //not use ../logs/, notice the location of program { //not use ../logs/, notice the location of program
vlist_cache_left = CACHE_CAPACITY;
cur_block_num = SET_BLOCK_NUM; cur_block_num = SET_BLOCK_NUM;
filepath = ""; filepath = "";
freelist = NULL; freelist = NULL;
@ -27,6 +34,7 @@ VList::VList()
VList::VList(string& _filepath, string& _mode, unsigned long long _buffer_size) VList::VList(string& _filepath, string& _mode, unsigned long long _buffer_size)
{ {
vlist_cache_left = CACHE_CAPACITY;
cur_block_num = SET_BLOCK_NUM; //initialize cur_block_num = SET_BLOCK_NUM; //initialize
this->filepath = _filepath; this->filepath = _filepath;
@ -183,16 +191,42 @@ VList::readValue(unsigned _block_num, char*& _str, unsigned& _len)
#ifdef DEBUG_VLIST #ifdef DEBUG_VLIST
cout<<"to get value of block num: "<<_block_num<<endl; cout<<"to get value of block num: "<<_block_num<<endl;
#endif #endif
//if in cache, get directly(and this pointer shouldn't be clear in upper layer)
CACHE_ITERATOR it = this->vlist_cache.find(_block_num);
if(it != this->vlist_cache.end())
{
_str = it->second;
_len = strlen(_str);
return true;
}
//if not in cache, read from disk(add a random seek time), the pointer should be clear in upper layer
fseek(valfp, Address(_block_num), SEEK_SET); fseek(valfp, Address(_block_num), SEEK_SET);
unsigned next; unsigned next;
fread(&next, sizeof(unsigned), 1, valfp); fread(&next, sizeof(unsigned), 1, valfp);
this->readBstr(_str, _len, &next); this->readBstr(_str, _len, &next);
//add this to cache if the list is not too long
if(!this->listNeedDelete(_len))
{
//TODO: swap the oldest when overflow detected
//DEBUG: if simple stop adding here, then listNeedDelete will be invalid!
if(this->vlist_cache_left < _len)
{
cout<<"WARN in VList::readValue() -- cache overflow"<<endl;
}
else
{
this->vlist_cache_left -= _len;
}
this->vlist_cache.insert(CACHE_TYPE::value_type(_block_num, _str));
}
return true; return true;
} }
unsigned unsigned
VList::writeValue(const char* _str, unsigned _len) VList::writeValue(char* _str, unsigned _len)
{ {
unsigned blocknum = this->AllocBlock(); unsigned blocknum = this->AllocBlock();
unsigned curnum = blocknum; unsigned curnum = blocknum;
@ -210,6 +244,15 @@ VList::writeValue(const char* _str, unsigned _len)
bool bool
VList::removeValue(unsigned _block_num) VList::removeValue(unsigned _block_num)
{ {
CACHE_ITERATOR it = this->vlist_cache.find(_block_num);
if(it != this->vlist_cache.end())
{
this->vlist_cache_left += strlen(it->second);
delete[] it->second;
this->vlist_cache.erase(it);
}
//this->vlist_cache.erase(_block_num);
unsigned store = _block_num, next; unsigned store = _block_num, next;
fseek(this->valfp, Address(store), SEEK_SET); fseek(this->valfp, Address(store), SEEK_SET);
fread(&next, sizeof(unsigned), 1, valfp); fread(&next, sizeof(unsigned), 1, valfp);
@ -237,7 +280,7 @@ VList::readBstr(char*& _str, unsigned& _len, unsigned* _next)
this->ReadAlign(_next); this->ReadAlign(_next);
//char* s = (char*)malloc(len); //char* s = (char*)malloc(len);
char* s = new char[len]; char* s = new char[len+1];
_len = len; _len = len;
for (i = 0; i + 4 < len; i += 4) for (i = 0; i + 4 < len; i += 4)
@ -260,6 +303,7 @@ VList::readBstr(char*& _str, unsigned& _len, unsigned* _next)
//(if need to read, then fseek again to find a new value) //(if need to read, then fseek again to find a new value)
//this->ReadAlign(_next); //this->ReadAlign(_next);
s[len] = '\0';
_str = s; _str = s;
return true; return true;
} }
@ -303,6 +347,13 @@ VList::writeBstr(const char* _str, unsigned _len, unsigned* _curnum)
VList::~VList() VList::~VList()
{ {
//clear the cache
for(CACHE_ITERATOR it = this->vlist_cache.begin(); it != this->vlist_cache.end(); ++it)
{
delete[] it->second;
}
this->vlist_cache.clear();
//write the info back //write the info back
fseek(this->valfp, 0, SEEK_SET); fseek(this->valfp, 0, SEEK_SET);
fwrite(&cur_block_num, sizeof(unsigned), 1, valfp);//write current blocks num fwrite(&cur_block_num, sizeof(unsigned), 1, valfp);//write current blocks num

View File

@ -31,6 +31,10 @@
//TODO: use fread/fwrite here instead of fgetc/fputc //TODO: use fread/fwrite here instead of fgetc/fputc
//including other trees //including other trees
typedef char* CACHE_VALUE;
typedef std::map<unsigned, CACHE_VALUE> CACHE_TYPE;
typedef CACHE_TYPE::iterator CACHE_ITERATOR;
class VList class VList
{ {
public: public:
@ -48,6 +52,19 @@ public:
static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1; static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1;
private: private:
//NOTICE: we need to set a buffer for vlist
static const unsigned CACHE_LIMIT = 1 << 26; //not cache too long list
//TODO+BETTER: get this paramemter by MemoryManager
static const unsigned CACHE_CAPACITY = UINT_MAX;
//BETTER+TODO: swap the buffer in and out according to access frequence
//Here we simply add and give a warnning if cache overflow, but not swap/lock
//TODO: swap if full, check if one is locked(being used by some query)
std::map<unsigned, char*> vlist_cache;
unsigned vlist_cache_left; //size of cache left
//QUERY: maybe use array isntead of map will bAe better - char*[NULL]
//NOTICE: check if the cache consumes too much memory.
//In addition, for different trees, maybe different size of caches should be used, i.e. p2values can have longer list!
unsigned long long max_buffer_size; unsigned long long max_buffer_size;
unsigned cur_block_num; unsigned cur_block_num;
std::string filepath; std::string filepath;
@ -76,11 +93,12 @@ public:
VList(); VList();
VList(std::string& _filepath, std::string& _mode, unsigned long long _buffer_size);//create a fixed-size file or open an existence VList(std::string& _filepath, std::string& _mode, unsigned long long _buffer_size);//create a fixed-size file or open an existence
bool readValue(unsigned _block_num, char*& _str, unsigned& _len); bool readValue(unsigned _block_num, char*& _str, unsigned& _len);
unsigned writeValue(const char* _str, unsigned _len); unsigned writeValue(char* _str, unsigned _len);
bool removeValue(unsigned _block_num); bool removeValue(unsigned _block_num);
~VList(); ~VList();
static bool isLongList(unsigned _len); static bool isLongList(unsigned _len);
static bool listNeedDelete(unsigned _len);
}; };
#endif #endif