refactor: adjust vlist and merge signature;
add cache for VList; use binding in Signature; by zengli and qinzongyue
This commit is contained in:
parent
c11088ee3e
commit
9903c3d5cb
|
@ -989,19 +989,20 @@ Database::calculateEntityBitSet(TYPE_ENTITY_LITERAL_ID _entity_id, EntityBitSet
|
|||
//when as subject
|
||||
unsigned* _polist = NULL;
|
||||
(this->kvstore)->getpreIDobjIDlistBysubID(_entity_id, _polist, _list_len);
|
||||
Triple _triple;
|
||||
_triple.subject = (this->kvstore)->getEntityByID(_entity_id);
|
||||
//Triple _triple;
|
||||
//_triple.subject = (this->kvstore)->getEntityByID(_entity_id);
|
||||
for (unsigned i = 0; i < _list_len; i += 2)
|
||||
{
|
||||
TYPE_PREDICATE_ID _pre_id = _polist[i];
|
||||
TYPE_ENTITY_LITERAL_ID _obj_id = _polist[i + 1];
|
||||
_triple.object = (this->kvstore)->getEntityByID(_obj_id);
|
||||
if (_triple.object == "")
|
||||
{
|
||||
_triple.object = (this->kvstore)->getLiteralByID(_obj_id);
|
||||
}
|
||||
_triple.predicate = (this->kvstore)->getPredicateByID(_pre_id);
|
||||
this->encodeTriple2SubEntityBitSet(_bitset, &_triple);
|
||||
//_triple.object = (this->kvstore)->getEntityByID(_obj_id);
|
||||
//if (_triple.object == "")
|
||||
//{
|
||||
//_triple.object = (this->kvstore)->getLiteralByID(_obj_id);
|
||||
//}
|
||||
//_triple.predicate = (this->kvstore)->getPredicateByID(_pre_id);
|
||||
//this->encodeTriple2SubEntityBitSet(_bitset, &_triple);
|
||||
this->encodeTriple2SubEntityBitSet(_bitset, _pre_id, _obj_id);
|
||||
}
|
||||
delete[] _polist;
|
||||
|
||||
|
@ -1009,14 +1010,15 @@ Database::calculateEntityBitSet(TYPE_ENTITY_LITERAL_ID _entity_id, EntityBitSet
|
|||
unsigned* _pslist = NULL;
|
||||
_list_len = 0;
|
||||
(this->kvstore)->getpreIDsubIDlistByobjID(_entity_id, _pslist, _list_len);
|
||||
_triple.object = (this->kvstore)->getEntityByID(_entity_id);
|
||||
//_triple.object = (this->kvstore)->getEntityByID(_entity_id);
|
||||
for (unsigned i = 0; i < _list_len; i += 2)
|
||||
{
|
||||
TYPE_PREDICATE_ID _pre_id = _pslist[i];
|
||||
TYPE_ENTITY_LITERAL_ID _sub_id = _pslist[i + 1];
|
||||
_triple.subject = (this->kvstore)->getEntityByID(_sub_id);
|
||||
_triple.predicate = (this->kvstore)->getPredicateByID(_pre_id);
|
||||
this->encodeTriple2ObjEntityBitSet(_bitset, &_triple);
|
||||
//_triple.subject = (this->kvstore)->getEntityByID(_sub_id);
|
||||
//_triple.predicate = (this->kvstore)->getPredicateByID(_pre_id);
|
||||
//this->encodeTriple2ObjEntityBitSet(_bitset, &_triple);
|
||||
this->encodeTriple2ObjEntityBitSet(_bitset, _pre_id, _sub_id);
|
||||
}
|
||||
delete[] _pslist;
|
||||
|
||||
|
@ -1054,6 +1056,14 @@ Database::encodeTriple2SubEntityBitSet(EntityBitSet& _bitset, const Triple* _p_t
|
|||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Database::encodeTriple2SubEntityBitSet(EntityBitSet& _bitset, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id)
|
||||
{
|
||||
Signature::encodeEdge2Entity(_bitset, _pre_id, _obj_id, Util::EDGE_OUT);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//encode Triple into object SigEntry
|
||||
bool
|
||||
Database::encodeTriple2ObjEntityBitSet(EntityBitSet& _bitset, const Triple* _p_triple)
|
||||
|
@ -1082,6 +1092,14 @@ Database::encodeTriple2ObjEntityBitSet(EntityBitSet& _bitset, const Triple* _p_t
|
|||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Database::encodeTriple2ObjEntityBitSet(EntityBitSet& _bitset, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _sub_id)
|
||||
{
|
||||
Signature::encodeEdge2Entity(_bitset, _pre_id, _sub_id, Util::EDGE_IN);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//check whether the relative 3-tuples exist usually, through sp2olist
|
||||
bool
|
||||
Database::exist_triple(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id)
|
||||
|
@ -1961,7 +1979,8 @@ Database::insertTriple(const TripleWithObjType& _triple, vector<unsigned>* _vert
|
|||
EntityBitSet _sub_entity_bitset;
|
||||
_sub_entity_bitset.reset();
|
||||
|
||||
this->encodeTriple2SubEntityBitSet(_sub_entity_bitset, &_triple);
|
||||
//this->encodeTriple2SubEntityBitSet(_sub_entity_bitset, &_triple);
|
||||
this->encodeTriple2SubEntityBitSet(_sub_entity_bitset, _pre_id, _obj_id);
|
||||
|
||||
//if new entity then insert it, else update it.
|
||||
if (_is_new_sub)
|
||||
|
@ -1982,7 +2001,8 @@ Database::insertTriple(const TripleWithObjType& _triple, vector<unsigned>* _vert
|
|||
EntityBitSet _obj_entity_bitset;
|
||||
_obj_entity_bitset.reset();
|
||||
|
||||
this->encodeTriple2ObjEntityBitSet(_obj_entity_bitset, &_triple);
|
||||
//this->encodeTriple2ObjEntityBitSet(_obj_entity_bitset, &_triple);
|
||||
this->encodeTriple2ObjEntityBitSet(_obj_entity_bitset, _pre_id, _sub_id);
|
||||
|
||||
if (_is_new_obj)
|
||||
{
|
||||
|
@ -2016,14 +2036,22 @@ Database::removeTriple(const TripleWithObjType& _triple, vector<unsigned>* _vert
|
|||
|
||||
TYPE_ENTITY_LITERAL_ID _sub_id = (this->kvstore)->getIDByEntity(_triple.subject);
|
||||
TYPE_PREDICATE_ID _pre_id = (this->kvstore)->getIDByPredicate(_triple.predicate);
|
||||
TYPE_ENTITY_LITERAL_ID _obj_id = (this->kvstore)->getIDByEntity(_triple.object);
|
||||
|
||||
//if (_obj_id == -1)
|
||||
if (_obj_id == INVALID_ENTITY_LITERAL_ID)
|
||||
TYPE_ENTITY_LITERAL_ID _obj_id = INVALID_ENTITY_LITERAL_ID;
|
||||
if(_triple.isObjEntity())
|
||||
{
|
||||
_obj_id = (this->kvstore)->getIDByEntity(_triple.object);
|
||||
}
|
||||
else
|
||||
{
|
||||
_obj_id = (this->kvstore)->getIDByLiteral(_triple.object);
|
||||
}
|
||||
|
||||
//if (_obj_id == -1)
|
||||
//if (_obj_id == INVALID_ENTITY_LITERAL_ID)
|
||||
//{
|
||||
//_obj_id = (this->kvstore)->getIDByLiteral(_triple.object);
|
||||
//}
|
||||
|
||||
//if (_sub_id == -1 || _pre_id == -1 || _obj_id == -1)
|
||||
if (_sub_id == INVALID_ENTITY_LITERAL_ID || _pre_id == INVALID_PREDICATE_ID || _obj_id == INVALID_ENTITY_LITERAL_ID)
|
||||
{
|
||||
|
|
|
@ -174,8 +174,11 @@ private:
|
|||
|
||||
//encode Triple into Subject EntityBitSet
|
||||
bool encodeTriple2SubEntityBitSet(EntityBitSet& _bitset, const Triple* _p_triple);
|
||||
//NOTICE: the encodeTriple with Triple* is invalid now(not enocde the linkage of neighbor-predicate)
|
||||
bool encodeTriple2SubEntityBitSet(EntityBitSet& _bitset, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id);
|
||||
//encode Triple into Object EntityBitSet
|
||||
bool encodeTriple2ObjEntityBitSet(EntityBitSet& _bitset, const Triple* _p_triple);
|
||||
bool encodeTriple2ObjEntityBitSet(EntityBitSet& _bitset, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _sub_id);
|
||||
|
||||
bool calculateEntityBitSet(TYPE_ENTITY_LITERAL_ID _entity_id, EntityBitSet & _bitset);
|
||||
|
||||
|
|
|
@ -123,7 +123,7 @@ KVstore::getEntityInDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const
|
|||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
if(VList::listNeedDelete(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
|
@ -148,7 +148,7 @@ KVstore::getEntityOutDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const
|
|||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
if(VList::listNeedDelete(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
|
@ -173,7 +173,7 @@ KVstore::getLiteralDegree(TYPE_ENTITY_LITERAL_ID _literal_id) const
|
|||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
if(VList::listNeedDelete(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
|
@ -198,7 +198,7 @@ KVstore::getPredicateDegree(TYPE_PREDICATE_ID _predicate_id) const
|
|||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
if(VList::listNeedDelete(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
|
@ -238,7 +238,7 @@ KVstore::getSubjectPredicateDegree(TYPE_ENTITY_LITERAL_ID _subid, TYPE_PREDICATE
|
|||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
if(VList::listNeedDelete(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
|
@ -278,7 +278,7 @@ KVstore::getObjectPredicateDegree(TYPE_ENTITY_LITERAL_ID _objid, TYPE_PREDICATE_
|
|||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
if(VList::listNeedDelete(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
|
@ -1570,7 +1570,7 @@ KVstore::getpreIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _preidlis
|
|||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
if(VList::listNeedDelete(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
|
@ -1608,7 +1608,7 @@ KVstore::getobjIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _objidlis
|
|||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
if(VList::listNeedDelete(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
|
@ -1658,7 +1658,7 @@ KVstore::getobjIDlistBysubIDpreID(TYPE_ENTITY_LITERAL_ID _subid, TYPE_PREDICATE_
|
|||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
if(VList::listNeedDelete(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
|
@ -1706,7 +1706,7 @@ KVstore::getpreIDobjIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _pre
|
|||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
if(VList::listNeedDelete(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
|
@ -1846,7 +1846,7 @@ KVstore::getpreIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _preidlis
|
|||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
if(VList::listNeedDelete(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
|
@ -1878,7 +1878,7 @@ KVstore::getsubIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _subidlis
|
|||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
if(VList::listNeedDelete(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
|
@ -1922,7 +1922,7 @@ KVstore::getsubIDlistByobjIDpreID(TYPE_ENTITY_LITERAL_ID _objid, TYPE_PREDICATE_
|
|||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
if(VList::listNeedDelete(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
|
@ -1963,7 +1963,7 @@ KVstore::getpreIDsubIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _pre
|
|||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
if(VList::listNeedDelete(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
|
@ -2083,7 +2083,7 @@ KVstore::getsubIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _subidlist, un
|
|||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
if(VList::listNeedDelete(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
|
@ -2114,7 +2114,7 @@ KVstore::getobjIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _objidlist, un
|
|||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
if(VList::listNeedDelete(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
|
@ -2154,7 +2154,7 @@ KVstore::getsubIDobjIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _subid_ob
|
|||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
if(VList::listNeedDelete(_len))
|
||||
{
|
||||
#ifdef DEBUG_KVSTORE
|
||||
cout<<"this is a vlist"<<endl;
|
||||
|
@ -2228,7 +2228,7 @@ KVstore::getpreIDlistBysubIDobjID(TYPE_ENTITY_LITERAL_ID _subid, TYPE_ENTITY_LIT
|
|||
|
||||
//if this is a long list, then we should remove itself after copying
|
||||
//otherwise, we should not free the list memory
|
||||
if(VList::isLongList(_len))
|
||||
if(VList::listNeedDelete(_len))
|
||||
{
|
||||
delete[] _tmp;
|
||||
//_tmp = NULL;
|
||||
|
|
2
NOTES.md
2
NOTES.md
|
@ -8,6 +8,8 @@
|
|||
将IRC聊天放到gstore文档上,freenode #gStore
|
||||
|
||||
storage中大量使用long类型,文件大小也可能达到64G,最好在64位机器上运行。
|
||||
在将unsigned转换为long long或者unsigned long long的时候要注意补全问题,long类型不知是否类似情况
|
||||
也许可以考虑Bstr中不存length只存str(内存中还有对齐的开销),但对于特别长的串来说可能strlen过于耗时
|
||||
|
||||
# 推广
|
||||
|
||||
|
|
|
@ -350,11 +350,18 @@ BasicQuery::updateSubSig(int _sub_var_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY
|
|||
//if(_obj_id >= 0)
|
||||
{
|
||||
//Signature::encodeStr2Entity(_obj.c_str(), this->var_sig[_sub_id]);
|
||||
//Signature::encodeStr2Entity(this->var_sig[_sub_var_id], _obj_id, Util::EDGE_OUT);
|
||||
if(_pre_id >= 0)
|
||||
{
|
||||
Signature::encodeEdge2Entity(this->var_sig[_sub_var_id], _pre_id, _obj_id, Util::EDGE_OUT);
|
||||
}
|
||||
else
|
||||
{
|
||||
Signature::encodeStr2Entity(this->var_sig[_sub_var_id], _obj_id, Util::EDGE_OUT);
|
||||
}
|
||||
|
||||
}
|
||||
//DEBUG: if type of pre id is changed to usnigned, this will cause error
|
||||
if(_pre_id >= 0)
|
||||
else if(_pre_id >= 0)
|
||||
{
|
||||
Signature::encodePredicate2Entity(this->var_sig[_sub_var_id], _pre_id, Util::EDGE_OUT);
|
||||
}
|
||||
|
@ -380,10 +387,17 @@ BasicQuery::updateObjSig(int _obj_var_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY
|
|||
//if(_sub_id >= 0)
|
||||
{
|
||||
//cout << "str2entity" << endl;
|
||||
//Signature::encodeStr2Entity(this->var_sig[_obj_var_id], _sub_id, Util::EDGE_IN);
|
||||
if(_pre_id >= 0)
|
||||
{
|
||||
Signature::encodeEdge2Entity(this->var_sig[_obj_var_id], _pre_id, _sub_id, Util::EDGE_IN);
|
||||
}
|
||||
else
|
||||
{
|
||||
Signature::encodeStr2Entity(this->var_sig[_obj_var_id], _sub_id, Util::EDGE_IN);
|
||||
}
|
||||
|
||||
if(_pre_id >= 0)
|
||||
}
|
||||
else if(_pre_id >= 0)
|
||||
{
|
||||
//cout << "pre2entity" << endl;
|
||||
Signature::encodePredicate2Entity(this->var_sig[_obj_var_id], _pre_id, Util::EDGE_IN);
|
||||
|
|
|
@ -44,6 +44,29 @@ Signature::encodeEdge2Entity(EntityBitSet& _entity_bs, TYPE_PREDICATE_ID _pre_id
|
|||
#endif
|
||||
|
||||
Signature::encodeStr2Entity(_entity_bs, _neighbor_id, _type);
|
||||
|
||||
// for(int i = 800; i < _entity_bs.size(); i++){
|
||||
// _entity_bs.set(i);
|
||||
// }
|
||||
//encode predicate and entity together
|
||||
int x = _pre_id % Signature::STR_AND_EDGE_INTERVAL_BASE;
|
||||
int y = _neighbor_id % Signature::STR_AND_EDGE_INTERVAL_BASE;
|
||||
int seed = x + (x + y + 1) * (x + y) / 2;
|
||||
seed %= Signature::STR_AND_EDGE_INTERVAL_BASE;
|
||||
seed = seed + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_LENGTH;
|
||||
if(Util::is_literal_ele(_neighbor_id))
|
||||
{
|
||||
seed += (Signature::STR_AND_EDGE_INTERVAL_BASE * 2);
|
||||
}
|
||||
else //entity part
|
||||
{
|
||||
//entity can be in edge or out edge
|
||||
if (_type == Util::EDGE_OUT)
|
||||
{
|
||||
seed += Signature::STR_AND_EDGE_INTERVAL_BASE;
|
||||
}
|
||||
}
|
||||
_entity_bs.set(seed);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -87,9 +110,13 @@ Signature::encodePredicate2Entity(EntityBitSet& _entity_bs, TYPE_PREDICATE_ID _p
|
|||
//unsigned pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
|
||||
//_entity_bs.set(pos);
|
||||
//}
|
||||
unsigned seed = id * 5003 % 49957;
|
||||
unsigned pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
|
||||
_entity_bs.set(pos);
|
||||
//unsigned seed = id * 5003 % 49957;
|
||||
//unsigned pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
|
||||
//_entity_bs.set(pos);
|
||||
|
||||
long long seed = id * 5003 % 49957;
|
||||
seed = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
|
||||
_entity_bs.set(seed);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -132,9 +159,11 @@ Signature::encodeStr2Entity(EntityBitSet& _entity_bs, TYPE_ENTITY_LITERAL_ID _ne
|
|||
//NOTICE: we assume the parameter is always valid(invalid args should not be passed here)
|
||||
long long id = _neighbor_id;
|
||||
//NOTICE: in * maybe the unsigned will overflow
|
||||
long long seed = id * 5003 % 49957;
|
||||
seed = seed % Signature::STR_SIG_INTERVAL_BASE;
|
||||
seed = seed + (id % Signature::STR_SIG_INTERVAL_NUM) * Signature::STR_SIG_INTERVAL_BASE;
|
||||
//long long seed = id * 5003 % 49957;
|
||||
//seed = seed % Signature::STR_SIG_INTERVAL_BASE;
|
||||
//seed = seed + (id % Signature::STR_SIG_INTERVAL_NUM) * Signature::STR_SIG_INTERVAL_BASE;
|
||||
|
||||
int seed = _neighbor_id % Signature::STR_SIG_LITERAL;
|
||||
|
||||
if(Util::is_literal_ele(_neighbor_id))
|
||||
{
|
||||
|
|
|
@ -24,10 +24,11 @@ public:
|
|||
|
||||
//static HashFunction hash[HashNum];
|
||||
//must make sure: ENTITY_SIG_LENGTH = EDGE_SIG_LENGTH + STR_SIG_LENGTH
|
||||
static const unsigned STR_SIG_INTERVAL_NUM = 20;
|
||||
//static const unsigned STR_SIG_INTERVAL_NUM = 20;
|
||||
//static const unsigned STR_SIG_INTERVAL_NUM = 16;
|
||||
static const unsigned STR_SIG_INTERVAL_BASE = 10;
|
||||
static const unsigned STR_SIG_LITERAL = STR_SIG_INTERVAL_NUM * STR_SIG_INTERVAL_BASE;
|
||||
//static const unsigned STR_SIG_INTERVAL_BASE = 10;
|
||||
//static const unsigned STR_SIG_LITERAL = STR_SIG_INTERVAL_NUM * STR_SIG_INTERVAL_BASE;
|
||||
static const int STR_SIG_LITERAL = 200;
|
||||
static const unsigned STR_SIG_ENTITY = STR_SIG_LITERAL * 2;
|
||||
//here we divide as entity neighbors and literal neighbors: ENTITY(in and out), LITERAL(only for out edges)
|
||||
static const unsigned STR_SIG_LENGTH = STR_SIG_ENTITY + STR_SIG_LITERAL; //600
|
||||
|
@ -46,14 +47,20 @@ public:
|
|||
static const unsigned EDGE_SIG_LENGTH = EDGE_SIG_INTERVAL_NUM * EDGE_SIG_INTERVAL_BASE; //200
|
||||
//static const unsigned EDGE_SIG_LENGTH2 = EDGE_SIG_INTERVAL_NUM_HALF * EDGE_SIG_INTERVAL_BASE;
|
||||
|
||||
static const unsigned ENTITY_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH; //1000
|
||||
static const unsigned STR_AND_EDGE_INTERVAL_BASE = 48;
|
||||
static const unsigned STR_AND_EDGE_INTERVAL_NUM = 3;
|
||||
static const unsigned STR_AND_EDGE_SIG_LENGTH = STR_AND_EDGE_INTERVAL_BASE * STR_AND_EDGE_INTERVAL_NUM;//144
|
||||
static const unsigned SEPARATE_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH;
|
||||
static const unsigned ENTITY_SIG_LENGTH = SEPARATE_SIG_LENGTH + STR_AND_EDGE_SIG_LENGTH; //944
|
||||
|
||||
//static const unsigned ENTITY_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH; //1000
|
||||
//static const unsigned ENTITY_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH + NEIGHBOR_SIG_LENGTH;
|
||||
|
||||
//QUERY: the num of bitset must be based on 16, i.e. unsigned short? 1000 is not allowed
|
||||
//but 800, 500 is ok
|
||||
|
||||
//typedef std::bitset<Signature::EDGE_SIG_LENGTH2> EdgeBitSet;
|
||||
typedef std::bitset<Signature::ENTITY_SIG_LENGTH> EntityBitSet;
|
||||
typedef std::bitset<ENTITY_SIG_LENGTH> EntityBitSet;
|
||||
|
||||
static std::string BitSet2str(const EntityBitSet& _bitset);
|
||||
|
||||
|
|
|
@ -458,6 +458,32 @@ Util::is_entity_ele(TYPE_ENTITY_LITERAL_ID id)
|
|||
return id < Util::LITERAL_FIRST_ID;
|
||||
}
|
||||
|
||||
bool
|
||||
Util::isEntity(const std::string& _str)
|
||||
{
|
||||
if(_str[0] == '<')
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
Util::isLiteral(const std::string& _str)
|
||||
{
|
||||
if(_str[0] == '"')
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//NOTICE: require that the list is ordered
|
||||
unsigned
|
||||
|
|
|
@ -270,6 +270,8 @@ public:
|
|||
|
||||
static bool is_literal_ele(TYPE_ENTITY_LITERAL_ID id);
|
||||
static bool is_entity_ele(TYPE_ENTITY_LITERAL_ID id);
|
||||
static bool isEntity(const std::string& _str);
|
||||
static bool isLiteral(const std::string& _str);
|
||||
|
||||
static unsigned removeDuplicate(unsigned*, unsigned);
|
||||
|
||||
|
|
|
@ -16,8 +16,15 @@ VList::isLongList(unsigned _len)
|
|||
return _len > VList::LENGTH_BORDER;
|
||||
}
|
||||
|
||||
bool
|
||||
VList::listNeedDelete(unsigned _len)
|
||||
{
|
||||
return _len > VList::CACHE_LIMIT;
|
||||
}
|
||||
|
||||
VList::VList()
|
||||
{ //not use ../logs/, notice the location of program
|
||||
vlist_cache_left = CACHE_CAPACITY;
|
||||
cur_block_num = SET_BLOCK_NUM;
|
||||
filepath = "";
|
||||
freelist = NULL;
|
||||
|
@ -27,6 +34,7 @@ VList::VList()
|
|||
|
||||
VList::VList(string& _filepath, string& _mode, unsigned long long _buffer_size)
|
||||
{
|
||||
vlist_cache_left = CACHE_CAPACITY;
|
||||
cur_block_num = SET_BLOCK_NUM; //initialize
|
||||
this->filepath = _filepath;
|
||||
|
||||
|
@ -183,16 +191,42 @@ VList::readValue(unsigned _block_num, char*& _str, unsigned& _len)
|
|||
#ifdef DEBUG_VLIST
|
||||
cout<<"to get value of block num: "<<_block_num<<endl;
|
||||
#endif
|
||||
//if in cache, get directly(and this pointer shouldn't be clear in upper layer)
|
||||
CACHE_ITERATOR it = this->vlist_cache.find(_block_num);
|
||||
if(it != this->vlist_cache.end())
|
||||
{
|
||||
_str = it->second;
|
||||
_len = strlen(_str);
|
||||
return true;
|
||||
}
|
||||
|
||||
//if not in cache, read from disk(add a random seek time), the pointer should be clear in upper layer
|
||||
fseek(valfp, Address(_block_num), SEEK_SET);
|
||||
unsigned next;
|
||||
fread(&next, sizeof(unsigned), 1, valfp);
|
||||
this->readBstr(_str, _len, &next);
|
||||
|
||||
//add this to cache if the list is not too long
|
||||
if(!this->listNeedDelete(_len))
|
||||
{
|
||||
//TODO: swap the oldest when overflow detected
|
||||
//DEBUG: if simple stop adding here, then listNeedDelete will be invalid!
|
||||
if(this->vlist_cache_left < _len)
|
||||
{
|
||||
cout<<"WARN in VList::readValue() -- cache overflow"<<endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
this->vlist_cache_left -= _len;
|
||||
}
|
||||
this->vlist_cache.insert(CACHE_TYPE::value_type(_block_num, _str));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned
|
||||
VList::writeValue(const char* _str, unsigned _len)
|
||||
VList::writeValue(char* _str, unsigned _len)
|
||||
{
|
||||
unsigned blocknum = this->AllocBlock();
|
||||
unsigned curnum = blocknum;
|
||||
|
@ -210,6 +244,15 @@ VList::writeValue(const char* _str, unsigned _len)
|
|||
bool
|
||||
VList::removeValue(unsigned _block_num)
|
||||
{
|
||||
CACHE_ITERATOR it = this->vlist_cache.find(_block_num);
|
||||
if(it != this->vlist_cache.end())
|
||||
{
|
||||
this->vlist_cache_left += strlen(it->second);
|
||||
delete[] it->second;
|
||||
this->vlist_cache.erase(it);
|
||||
}
|
||||
//this->vlist_cache.erase(_block_num);
|
||||
|
||||
unsigned store = _block_num, next;
|
||||
fseek(this->valfp, Address(store), SEEK_SET);
|
||||
fread(&next, sizeof(unsigned), 1, valfp);
|
||||
|
@ -237,7 +280,7 @@ VList::readBstr(char*& _str, unsigned& _len, unsigned* _next)
|
|||
this->ReadAlign(_next);
|
||||
|
||||
//char* s = (char*)malloc(len);
|
||||
char* s = new char[len];
|
||||
char* s = new char[len+1];
|
||||
_len = len;
|
||||
|
||||
for (i = 0; i + 4 < len; i += 4)
|
||||
|
@ -260,6 +303,7 @@ VList::readBstr(char*& _str, unsigned& _len, unsigned* _next)
|
|||
//(if need to read, then fseek again to find a new value)
|
||||
//this->ReadAlign(_next);
|
||||
|
||||
s[len] = '\0';
|
||||
_str = s;
|
||||
return true;
|
||||
}
|
||||
|
@ -303,6 +347,13 @@ VList::writeBstr(const char* _str, unsigned _len, unsigned* _curnum)
|
|||
|
||||
VList::~VList()
|
||||
{
|
||||
//clear the cache
|
||||
for(CACHE_ITERATOR it = this->vlist_cache.begin(); it != this->vlist_cache.end(); ++it)
|
||||
{
|
||||
delete[] it->second;
|
||||
}
|
||||
this->vlist_cache.clear();
|
||||
|
||||
//write the info back
|
||||
fseek(this->valfp, 0, SEEK_SET);
|
||||
fwrite(&cur_block_num, sizeof(unsigned), 1, valfp);//write current blocks num
|
||||
|
|
20
Util/VList.h
20
Util/VList.h
|
@ -31,6 +31,10 @@
|
|||
//TODO: use fread/fwrite here instead of fgetc/fputc
|
||||
//including other trees
|
||||
|
||||
typedef char* CACHE_VALUE;
|
||||
typedef std::map<unsigned, CACHE_VALUE> CACHE_TYPE;
|
||||
typedef CACHE_TYPE::iterator CACHE_ITERATOR;
|
||||
|
||||
class VList
|
||||
{
|
||||
public:
|
||||
|
@ -48,6 +52,19 @@ public:
|
|||
static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1;
|
||||
|
||||
private:
|
||||
//NOTICE: we need to set a buffer for vlist
|
||||
static const unsigned CACHE_LIMIT = 1 << 26; //not cache too long list
|
||||
//TODO+BETTER: get this paramemter by MemoryManager
|
||||
static const unsigned CACHE_CAPACITY = UINT_MAX;
|
||||
//BETTER+TODO: swap the buffer in and out according to access frequence
|
||||
//Here we simply add and give a warnning if cache overflow, but not swap/lock
|
||||
//TODO: swap if full, check if one is locked(being used by some query)
|
||||
std::map<unsigned, char*> vlist_cache;
|
||||
unsigned vlist_cache_left; //size of cache left
|
||||
//QUERY: maybe use array isntead of map will bAe better - char*[NULL]
|
||||
//NOTICE: check if the cache consumes too much memory.
|
||||
//In addition, for different trees, maybe different size of caches should be used, i.e. p2values can have longer list!
|
||||
|
||||
unsigned long long max_buffer_size;
|
||||
unsigned cur_block_num;
|
||||
std::string filepath;
|
||||
|
@ -76,11 +93,12 @@ public:
|
|||
VList();
|
||||
VList(std::string& _filepath, std::string& _mode, unsigned long long _buffer_size);//create a fixed-size file or open an existence
|
||||
bool readValue(unsigned _block_num, char*& _str, unsigned& _len);
|
||||
unsigned writeValue(const char* _str, unsigned _len);
|
||||
unsigned writeValue(char* _str, unsigned _len);
|
||||
bool removeValue(unsigned _block_num);
|
||||
~VList();
|
||||
|
||||
static bool isLongList(unsigned _len);
|
||||
static bool listNeedDelete(unsigned _len);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue