feat: merge type branch;
value branch has already been merged into type; by zengli
This commit is contained in:
commit
25e1c3ed10
|
@ -91,3 +91,10 @@ tags
|
|||
*.out
|
||||
*.bak~
|
||||
|
||||
# queries
|
||||
*.sql
|
||||
*.sh
|
||||
|
||||
# modules
|
||||
node_modules
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -28,13 +28,14 @@
|
|||
class Database
|
||||
{
|
||||
public:
|
||||
static const bool only_sub2idpre2id = true;
|
||||
static const int internal = 100 * 1000;
|
||||
void test();
|
||||
void test_build_sig();
|
||||
void test_join();
|
||||
void printIDlist(int _i, int* _list, int _len, std::string _log);
|
||||
void printPairList(int _i, int* _list, int _len, std::string _log);
|
||||
//static const bool only_sub2idpre2id = true;
|
||||
//static const int internal = 100 * 1000;
|
||||
|
||||
//void test();
|
||||
//void test_build_sig();
|
||||
//void test_join();
|
||||
//void printIDlist(int _i, int* _list, int _len, std::string _log);
|
||||
//void printPairList(int _i, int* _list, int _len, std::string _log);
|
||||
|
||||
//when encode EntitySig, one way uses STRING-hash, the other one uses ID-hash
|
||||
//depending on this->encode_mode
|
||||
|
@ -59,26 +60,30 @@ public:
|
|||
bool insert(std::string _rdf_file);
|
||||
bool remove(std::string _rdf_file);
|
||||
|
||||
/* name of this DB*/
|
||||
//name of this DB
|
||||
string getName();
|
||||
/* root Path of this DB + sixTuplesFile */
|
||||
|
||||
//root Path of this DB + sixTuplesFile
|
||||
string getSixTuplesFile();
|
||||
|
||||
/* root Path of this DB + signatureBFile */
|
||||
//root Path of this DB + signatureBFile
|
||||
string getSignatureBFile();
|
||||
|
||||
/* root Path of this DB + DBInfoFile */
|
||||
//root Path of this DB + DBInfoFile
|
||||
string getDBInfoFile();
|
||||
|
||||
//id tuples file
|
||||
string getIDTuplesFile();
|
||||
|
||||
private:
|
||||
string name;
|
||||
string store_path;
|
||||
bool is_active;
|
||||
int triples_num;
|
||||
int entity_num;
|
||||
int sub_num;
|
||||
int pre_num;
|
||||
int literal_num;
|
||||
TYPE_TRIPLE_NUM triples_num;
|
||||
TYPE_ENTITY_LITERAL_ID entity_num;
|
||||
TYPE_ENTITY_LITERAL_ID sub_num;
|
||||
TYPE_PREDICATE_ID pre_num;
|
||||
TYPE_ENTITY_LITERAL_ID literal_num;
|
||||
|
||||
int encode_mode;
|
||||
|
||||
|
@ -94,51 +99,59 @@ private:
|
|||
|
||||
//six tuples: <sub pre obj sid pid oid>
|
||||
string six_tuples_file;
|
||||
|
||||
//B means binary
|
||||
string signature_binary_file;
|
||||
|
||||
//id tuples file
|
||||
string id_tuples_file;
|
||||
|
||||
//pre2num mapping
|
||||
TNUM* pre2num;
|
||||
TYPE_TRIPLE_NUM* pre2num;
|
||||
//valid: check from minNumPID to maxNumPID
|
||||
int maxNumPID, minNumPID;
|
||||
TYPE_PREDICATE_ID maxNumPID, minNumPID;
|
||||
void setPreMap();
|
||||
|
||||
//TODO: set the buffer capacity as dynamic according to the current memory usage
|
||||
//string buffer
|
||||
Buffer* entity_buffer;
|
||||
//unsigned offset; //maybe let id start from an offset
|
||||
unsigned entity_buffer_size;
|
||||
Buffer* literal_buffer;
|
||||
unsigned literal_buffer_size;
|
||||
|
||||
void setStringBuffer();
|
||||
void warmUp();
|
||||
//BETTER+TODO:add a predicate buffer for ?p query
|
||||
//BETTER:add a predicate buffer for ?p query
|
||||
//However, I think this is not necessary because ?p is rare and the p2xx tree is small enough
|
||||
|
||||
//triple num per group for insert/delete
|
||||
//can not be too high, otherwise the heap will over
|
||||
static const int GROUP_SIZE = 1000;
|
||||
//manage the ID allocate and garbage
|
||||
static const int START_ID_NUM = 0;
|
||||
static const TYPE_ENTITY_LITERAL_ID START_ID_NUM = 0;
|
||||
//static const int START_ID_NUM = 1000;
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
//NOTICE:error if >= LITERAL_FIRST_ID
|
||||
string free_id_file_entity; //the first is limitID, then free id list
|
||||
int limitID_entity; //the current maxium ID num(maybe not used so much)
|
||||
TYPE_ENTITY_LITERAL_ID limitID_entity; //the current maxium ID num(maybe not used so much)
|
||||
BlockInfo* freelist_entity; //free id list, reuse BlockInfo for Storage class
|
||||
int allocEntityID();
|
||||
void freeEntityID(int _id);
|
||||
TYPE_ENTITY_LITERAL_ID allocEntityID();
|
||||
void freeEntityID(TYPE_ENTITY_LITERAL_ID _id);
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
//NOTICE:error if >= 2*LITERAL_FIRST_ID
|
||||
string free_id_file_literal;
|
||||
int limitID_literal;
|
||||
TYPE_ENTITY_LITERAL_ID limitID_literal;
|
||||
BlockInfo* freelist_literal;
|
||||
int allocLiteralID();
|
||||
void freeLiteralID(int _id);
|
||||
TYPE_ENTITY_LITERAL_ID allocLiteralID();
|
||||
void freeLiteralID(TYPE_ENTITY_LITERAL_ID _id);
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
//NOTICE:error if >= 2*LITERAL_FIRST_ID
|
||||
string free_id_file_predicate;
|
||||
int limitID_predicate;
|
||||
TYPE_PREDICATE_ID limitID_predicate;
|
||||
BlockInfo* freelist_predicate;
|
||||
int allocPredicateID();
|
||||
void freePredicateID(int _id);
|
||||
TYPE_PREDICATE_ID allocPredicateID();
|
||||
void freePredicateID(TYPE_PREDICATE_ID _id);
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
void initIDinfo(); //initialize the members
|
||||
void resetIDinfo(); //reset the id info for build
|
||||
|
@ -158,11 +171,11 @@ private:
|
|||
//encode Triple into Object EntityBitSet
|
||||
bool encodeTriple2ObjEntityBitSet(EntityBitSet& _bitset, const Triple* _p_triple);
|
||||
|
||||
bool calculateEntityBitSet(int _entity_id, EntityBitSet & _bitset);
|
||||
bool calculateEntityBitSet(TYPE_ENTITY_LITERAL_ID _entity_id, EntityBitSet & _bitset);
|
||||
|
||||
//check whether the relative 3-tuples exist
|
||||
//usually, through sp2olist
|
||||
bool exist_triple(int _sub_id, int _pre_id, int _obj_id);
|
||||
bool exist_triple(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id);
|
||||
|
||||
//* _rdf_file denotes the path of the RDF file, where stores the rdf data
|
||||
//* there are many step in this function, each one responds to an sub-function
|
||||
|
@ -174,24 +187,25 @@ private:
|
|||
//* 4. build: objID2subIDlist, <objIDpreID>2subIDlist objID2<preIDsubID>list
|
||||
//encodeRDF_new invoke new rdfParser to solve task 1 & 2 in one time scan.
|
||||
bool encodeRDF_new(const string _rdf_file);
|
||||
void build_s2xx(int**);
|
||||
void build_o2xx(int**);
|
||||
void build_p2xx(int**);
|
||||
void readIDTuples(ID_TUPLE*& _p_id_tuples);
|
||||
void build_s2xx(ID_TUPLE*);
|
||||
void build_o2xx(ID_TUPLE*);
|
||||
void build_p2xx(ID_TUPLE*);
|
||||
|
||||
//insert and delete, notice that modify is not needed here
|
||||
//we can read from file or use sparql syntax
|
||||
bool insertTriple(const TripleWithObjType& _triple, vector<int>* _vertices = NULL, vector<int>* _predicates = NULL);
|
||||
bool removeTriple(const TripleWithObjType& _triple, vector<int>* _vertices = NULL, vector<int>* _predicates = NULL);
|
||||
bool insertTriple(const TripleWithObjType& _triple, vector<unsigned>* _vertices = NULL, vector<unsigned>* _predicates = NULL);
|
||||
bool removeTriple(const TripleWithObjType& _triple, vector<unsigned>* _vertices = NULL, vector<unsigned>* _predicates = NULL);
|
||||
//NOTICE:one by one is too costly, sort and insert/delete at a time will be better
|
||||
int insert(const TripleWithObjType* _triples, int _triple_num);
|
||||
unsigned insert(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num);
|
||||
//bool insert(const vector<TripleWithObjType>& _triples, vector<int>& _vertices, vector<int>& _predicates);
|
||||
int remove(const TripleWithObjType* _triples, int _triple_num);
|
||||
unsigned remove(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num);
|
||||
//bool remove(const vector<TripleWithObjType>& _triples, vector<int>& _vertices, vector<int>& _predicates);
|
||||
|
||||
bool sub2id_pre2id_obj2id_RDFintoSignature(const string _rdf_file, int**& _p_id_tuples, int & _id_tuples_max);
|
||||
bool literal2id_RDFintoSignature(const string _rdf_file, int** _p_id_tuples, int _id_tuples_max);
|
||||
bool sub2id_pre2id_obj2id_RDFintoSignature(const string _rdf_file);
|
||||
//bool literal2id_RDFintoSignature(const string _rdf_file, int** _p_id_tuples, TYPE_TRIPLE_NUM _id_tuples_max);
|
||||
|
||||
bool objIDIsEntityID(int _id);
|
||||
bool objIDIsEntityID(TYPE_ENTITY_LITERAL_ID _id);
|
||||
|
||||
//* join on the vector of CandidateList, available after retrieve from the VSTREE
|
||||
//* and store the resut in _result_set
|
||||
|
|
|
@ -16,7 +16,7 @@ Join::Join()
|
|||
this->result_list = NULL;
|
||||
}
|
||||
|
||||
Join::Join(KVstore* _kvstore, TNUM* _pre2num, int _limitID_predicate, int _limitID_literal)
|
||||
Join::Join(KVstore* _kvstore, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal)
|
||||
{
|
||||
this->kvstore = _kvstore;
|
||||
this->result_list = NULL;
|
||||
|
@ -103,7 +103,8 @@ Join::score_node(int var)
|
|||
continue;
|
||||
}
|
||||
//CHECK:if the pre id is valid (0<=p<limit_predicateID)
|
||||
int pid = this->basic_query->getEdgePreID(var, i);
|
||||
TYPE_PREDICATE_ID pid = this->basic_query->getEdgePreID(var, i);
|
||||
//DEBUG: if TYPE_PREDICATE_ID is changed to unsigned
|
||||
if(pid < 0 || pid >= this->limitID_predicate)
|
||||
{
|
||||
continue;
|
||||
|
@ -123,7 +124,7 @@ Join::score_node(int var)
|
|||
}
|
||||
|
||||
int
|
||||
Join::judge(int _smallest, int _biggest)
|
||||
Join::judge(unsigned _smallest, unsigned _biggest)
|
||||
{
|
||||
return 0; //DEBUG:remove when index_join is ok
|
||||
//BETTER?:use appropiate method according to size and structure
|
||||
|
@ -132,7 +133,8 @@ Join::judge(int _smallest, int _biggest)
|
|||
//BETTER:how to guess the size of can_lists
|
||||
double size = (_smallest + _biggest) / 2.0;
|
||||
double ans = Join::PARAM_DENSE * dense - size / Join::PARAM_SIZE;
|
||||
if (ans > Join::JUDGE_LIMIT)
|
||||
double limit = 1.0 / (double)Join::JUDGE_LIMIT;
|
||||
if (ans > limit)
|
||||
return 0; //multi_join method
|
||||
else
|
||||
return 1; //index_join method
|
||||
|
@ -321,7 +323,9 @@ Join::pre_var_handler()
|
|||
#ifdef DEBUG_JOIN
|
||||
//cout << sub_name << endl << triple.predicate << endl << obj_name << endl;
|
||||
#endif
|
||||
int sub_id = -1, obj_id = -1, var1 = -1, var2 = -1;
|
||||
TYPE_ENTITY_LITERAL_ID sub_id, obj_id;
|
||||
sub_id = obj_id = INVALID_ENTITY_LITERAL_ID;
|
||||
int var1 = -1, var2 = -1;
|
||||
|
||||
if (sub_name[0] != '?')
|
||||
{
|
||||
|
@ -339,7 +343,8 @@ Join::pre_var_handler()
|
|||
if (obj_name[0] != '?')
|
||||
{
|
||||
obj_id = this->kvstore->getIDByEntity(obj_name);
|
||||
if (obj_id == -1)
|
||||
//if (obj_id == -1)
|
||||
if (obj_id == INVALID_ENTITY_LITERAL_ID)
|
||||
obj_id = this->kvstore->getIDByLiteral(obj_name);
|
||||
}
|
||||
else
|
||||
|
@ -355,10 +360,11 @@ Join::pre_var_handler()
|
|||
cout<<"subid: "<<sub_id<<" objid: "<<obj_id<<endl;
|
||||
#endif
|
||||
|
||||
int* id_list = NULL;
|
||||
int id_list_len = 0;
|
||||
unsigned* id_list = NULL;
|
||||
unsigned id_list_len = 0;
|
||||
//two vars in query
|
||||
if (sub_id == -1 && obj_id == -1)
|
||||
if (sub_id == INVALID_ENTITY_LITERAL_ID && obj_id == INVALID_ENTITY_LITERAL_ID)
|
||||
//if (sub_id == -1 && obj_id == -1)
|
||||
{
|
||||
if (var1 == -1 && var2 == -1)
|
||||
{
|
||||
|
@ -407,15 +413,15 @@ Join::pre_var_handler()
|
|||
}
|
||||
}
|
||||
//two constants in query
|
||||
else if (sub_id != -1 && obj_id != -1)
|
||||
else if (sub_id != INVALID_ENTITY_LITERAL_ID && obj_id != INVALID_ENTITY_LITERAL_ID)
|
||||
{
|
||||
//just use so2p in query graph to find predicates
|
||||
//this->kvstore->getpreIDlistBysubIDobjID(sub_id, obj_id, id_list, id_list_len);
|
||||
int sid = sub_id, oid = obj_id;
|
||||
TYPE_ENTITY_LITERAL_ID sid = sub_id, oid = obj_id;
|
||||
this->kvstore->getpreIDlistBysubIDobjID(sid, oid, id_list, id_list_len, true);
|
||||
}
|
||||
//sub is var while obj is constant
|
||||
else if (sub_id == -1 && obj_id != -1)
|
||||
else if (sub_id == INVALID_ENTITY_LITERAL_ID && obj_id != INVALID_ENTITY_LITERAL_ID)
|
||||
{
|
||||
if (var1 == -1)
|
||||
{
|
||||
|
@ -424,12 +430,12 @@ Join::pre_var_handler()
|
|||
else
|
||||
{
|
||||
this->kvstore->getpreIDlistBysubIDobjID((*it)[this->id2pos[var1]], obj_id, id_list, id_list_len, true);
|
||||
int sid = (*it)[this->id2pos[var1]], oid = obj_id;
|
||||
TYPE_ENTITY_LITERAL_ID sid = (*it)[this->id2pos[var1]], oid = obj_id;
|
||||
this->kvstore->getpreIDlistBysubIDobjID(sid, oid, id_list, id_list_len, true);
|
||||
}
|
||||
}
|
||||
//sub is constant while obj is var
|
||||
else if (sub_id != -1 && obj_id == -1)
|
||||
else if (sub_id != INVALID_ENTITY_LITERAL_ID && obj_id == INVALID_ENTITY_LITERAL_ID)
|
||||
{
|
||||
if (var2 == -1)
|
||||
{
|
||||
|
@ -439,7 +445,7 @@ Join::pre_var_handler()
|
|||
{
|
||||
//NOTICE:no need to add literals here because they are added in add_literal_candidate using s2o
|
||||
//this->kvstore->getpreIDlistBysubIDobjID(sub_id, (*it)[this->id2pos[var2]], id_list, id_list_len);
|
||||
int sid = sub_id, oid = (*it)[this->id2pos[var2]];
|
||||
TYPE_ENTITY_LITERAL_ID sid = sub_id, oid = (*it)[this->id2pos[var2]];
|
||||
this->kvstore->getpreIDlistBysubIDobjID(sid, oid, id_list, id_list_len, true);
|
||||
}
|
||||
}
|
||||
|
@ -465,7 +471,7 @@ Join::pre_var_handler()
|
|||
else
|
||||
{
|
||||
#ifdef DEBUG_JOIN
|
||||
for(int k = 0; k < valid_ans.size(); ++k)
|
||||
for(unsigned k = 0; k < valid_ans.size(); ++k)
|
||||
cout << this->kvstore->getPredicateByID(valid_ans[k])<<" ";
|
||||
cout<<endl;
|
||||
#endif
|
||||
|
@ -478,7 +484,7 @@ Join::pre_var_handler()
|
|||
//
|
||||
//NOTICE: we add all here(select/not) because they maybe needed by generating satellites
|
||||
//we need to copy only the selected ones in copyToResult
|
||||
int size = valid_ans.size();
|
||||
unsigned size = valid_ans.size();
|
||||
|
||||
//BETTER:only add pre vars which are selected or linked with satellite
|
||||
if (size > 0)
|
||||
|
@ -489,7 +495,7 @@ Join::pre_var_handler()
|
|||
//continue;
|
||||
//}
|
||||
it->push_back(valid_ans[0]);
|
||||
int begin = 1;
|
||||
unsigned begin = 1;
|
||||
if (!if_new_start && size > 1)
|
||||
{
|
||||
this->add_new_to_results(it, valid_ans[1]);
|
||||
|
@ -499,7 +505,7 @@ Join::pre_var_handler()
|
|||
this->new_start--;
|
||||
begin = 2;
|
||||
}
|
||||
for (int j = begin; j < size; ++j)
|
||||
for (unsigned j = begin; j < size; ++j)
|
||||
{
|
||||
this->add_new_to_results(it, valid_ans[j]);
|
||||
}
|
||||
|
@ -543,7 +549,7 @@ Join::copyToResult()
|
|||
cout << "core var num: " << core_var_num << " select var num: " << select_var_num << endl;
|
||||
#endif
|
||||
this->record_len = select_var_num + selected_pre_var_num;
|
||||
this->record = new int[this->record_len];
|
||||
this->record = new unsigned[this->record_len];
|
||||
|
||||
for (TableIterator it = this->current_table.begin(); it != this->current_table.end(); ++it)
|
||||
{
|
||||
|
@ -581,7 +587,7 @@ Join::copyToResult()
|
|||
for (i = 0; i < core_var_num; ++i)
|
||||
{
|
||||
int id = this->pos2id[i];
|
||||
int ele = (*it)[i];
|
||||
unsigned ele = (*it)[i];
|
||||
int degree = this->basic_query->getVarDegree(id);
|
||||
for (int j = 0; j < degree; ++j)
|
||||
{
|
||||
|
@ -591,12 +597,12 @@ Join::copyToResult()
|
|||
#ifdef DEBUG_JOIN
|
||||
//cout << "to generate "<<id2<<endl;
|
||||
#endif
|
||||
int* idlist = NULL;
|
||||
int idlist_len = 0;
|
||||
unsigned* idlist = NULL;
|
||||
unsigned idlist_len = 0;
|
||||
int triple_id = this->basic_query->getEdgeID(id, j);
|
||||
Triple triple = this->basic_query->getTriple(triple_id);
|
||||
|
||||
int preid = this->basic_query->getEdgePreID(id, j);
|
||||
TYPE_PREDICATE_ID preid = this->basic_query->getEdgePreID(id, j);
|
||||
if (preid == -2) //?p
|
||||
{
|
||||
string predicate = triple.predicate;
|
||||
|
@ -606,7 +612,7 @@ Join::copyToResult()
|
|||
preid = (*it)[this->id2pos[pre_var_id+this->var_num]];
|
||||
//}
|
||||
}
|
||||
else if (preid == -1)
|
||||
else if (preid == -1) //INVALID_PREDICATE_ID
|
||||
{
|
||||
//ERROR
|
||||
}
|
||||
|
@ -686,17 +692,17 @@ Join::cartesian(int pos, int end)
|
|||
{
|
||||
if (pos == end)
|
||||
{
|
||||
int* new_record = new int[this->record_len];
|
||||
memcpy(new_record, this->record, sizeof(int) * this->record_len);
|
||||
unsigned* new_record = new unsigned[this->record_len];
|
||||
memcpy(new_record, this->record, sizeof(unsigned) * this->record_len);
|
||||
this->result_list->push_back(new_record);
|
||||
return;
|
||||
}
|
||||
|
||||
int size = this->satellites[pos].idlist_len;
|
||||
unsigned size = this->satellites[pos].idlist_len;
|
||||
int id = this->satellites[pos].id;
|
||||
int vpos = this->basic_query->getSelectedVarPosition(id);
|
||||
int* list = this->satellites[pos].idlist;
|
||||
for (int i = 0; i < size; ++i)
|
||||
unsigned* list = this->satellites[pos].idlist;
|
||||
for (unsigned i = 0; i < size; ++i)
|
||||
{
|
||||
this->record[vpos] = list[i];
|
||||
this->cartesian(pos + 1, end);
|
||||
|
@ -740,13 +746,13 @@ Join::toStartJoin()
|
|||
for (int j = 0; j < var_degree; ++j)
|
||||
{
|
||||
//int neighbor_id = this->basic_query->getEdgeNeighborID(var_id, j);
|
||||
int predicate_id = this->basic_query->getEdgePreID(var_id, j);
|
||||
TYPE_PREDICATE_ID predicate_id = this->basic_query->getEdgePreID(var_id, j);
|
||||
int triple_id = this->basic_query->getEdgeID(var_id, j);
|
||||
Triple triple = this->basic_query->getTriple(triple_id);
|
||||
string neighbor_name = triple.subject;
|
||||
IDList this_edge_literal_list;
|
||||
int* object_list = NULL;
|
||||
int object_list_len = 0;
|
||||
unsigned* object_list = NULL;
|
||||
unsigned object_list_len = 0;
|
||||
|
||||
if (predicate_id >= 0)
|
||||
{
|
||||
|
@ -782,9 +788,9 @@ Join::toStartJoin()
|
|||
{
|
||||
cout<<"Special Case: star graph whose pres are all var"<<endl;
|
||||
//get all literals in this db
|
||||
for(int i = 0; i < this->limitID_literal; ++i)
|
||||
for(TYPE_ENTITY_LITERAL_ID i = 0; i < this->limitID_literal; ++i)
|
||||
{
|
||||
int id = i + Util::LITERAL_FIRST_ID;
|
||||
TYPE_ENTITY_LITERAL_ID id = i + Util::LITERAL_FIRST_ID;
|
||||
string literal = this->kvstore->getLiteralByID(id);
|
||||
if(literal == "")
|
||||
{
|
||||
|
@ -815,10 +821,38 @@ Join::join()
|
|||
|
||||
//the smallest candidate list size of the not-satellite vars
|
||||
int id = this->basic_query->getVarID_FirstProcessWhenJoin();
|
||||
int smallest = this->basic_query->getCandidateSize(id);
|
||||
unsigned smallest = 0;
|
||||
if(id >= 0)
|
||||
{
|
||||
smallest = this->basic_query->getCandidateSize(id);
|
||||
}
|
||||
else
|
||||
{
|
||||
cout<<"error in join() - id < 0"<<endl;
|
||||
return false;
|
||||
}
|
||||
if(!this->is_literal_var(id) && smallest == 0)
|
||||
{
|
||||
cout<<"join() - already empty"<<endl;
|
||||
return false; //empty result
|
||||
int biggest = this->basic_query->getVarID_MaxCandidateList();
|
||||
}
|
||||
|
||||
int id_max = this->basic_query->getVarID_MaxCandidateList();
|
||||
unsigned biggest = 0;
|
||||
if(id_max >= 0)
|
||||
{
|
||||
biggest = this->basic_query->getCandidateSize(id_max);
|
||||
}
|
||||
else
|
||||
{
|
||||
cout<<"error in join() - id < 0"<<endl;
|
||||
return false;
|
||||
}
|
||||
if(!this->is_literal_var(id_max) && biggest == 0)
|
||||
{
|
||||
cout<<"join() - already empty"<<endl;
|
||||
return false; //empty result
|
||||
}
|
||||
|
||||
int method = this->judge(smallest, biggest);
|
||||
bool ret = true;
|
||||
|
@ -911,7 +945,7 @@ Join::is_literal_var(int _id)
|
|||
//===================================================================================================
|
||||
|
||||
void
|
||||
Join::add_new_to_results(TableIterator it, int id)
|
||||
Join::add_new_to_results(TableIterator it, unsigned id)
|
||||
{
|
||||
//NTC:already have one more in *it if need to push back
|
||||
RecordType tmp(*it);
|
||||
|
@ -920,7 +954,7 @@ Join::add_new_to_results(TableIterator it, int id)
|
|||
}
|
||||
|
||||
void
|
||||
Join::update_answer_list(IDList*& valid_ans_list, IDList& _can_list, int* id_list, int id_list_len, bool _is_literal)
|
||||
Join::update_answer_list(IDList*& valid_ans_list, IDList& _can_list, unsigned* id_list, unsigned id_list_len, bool _is_literal)
|
||||
{
|
||||
if (valid_ans_list == NULL)
|
||||
{
|
||||
|
@ -928,7 +962,7 @@ Join::update_answer_list(IDList*& valid_ans_list, IDList& _can_list, int* id_lis
|
|||
//valid_ans_list.unionList(_can_list);
|
||||
if (_is_literal)
|
||||
{
|
||||
int entity_len = 0;
|
||||
unsigned entity_len = 0;
|
||||
while (true)
|
||||
{
|
||||
if (entity_len == id_list_len || Util::is_literal_ele(id_list[entity_len]))
|
||||
|
@ -951,6 +985,11 @@ Join::update_answer_list(IDList*& valid_ans_list, IDList& _can_list, int* id_lis
|
|||
}
|
||||
}
|
||||
|
||||
//TODO: multiple lists intersect, how about sort and intersect from small to big?
|
||||
//but this need to generate all first, I think sort by pre2num if better!
|
||||
//
|
||||
//TODO: set the entity_literal border in kvstore, and intersect entity part and literal part respectively
|
||||
|
||||
//NOTICE: consider two directions according to table1 size and table2 size
|
||||
//1. -> add ID mapping record for the first linking column, whole(offset, size) zengli
|
||||
//2. <- join using inverted index for each column, offset and size for each column, hulin
|
||||
|
@ -961,7 +1000,7 @@ Join::update_answer_list(IDList*& valid_ans_list, IDList& _can_list, int* id_lis
|
|||
//However, the case is really rare in our test(the reason may be that the web graph is always very sparse)
|
||||
//If we add a buffer for this case, will cause worse performance
|
||||
bool
|
||||
Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, int _can_list_size, int _id, bool _is_literal)
|
||||
Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, unsigned _can_list_size, int _id, bool _is_literal)
|
||||
{
|
||||
if(_can_list_size == 0 && !_is_literal)
|
||||
{
|
||||
|
@ -1020,14 +1059,14 @@ Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, int _can_list_s
|
|||
#ifdef DEBUG_JOIN
|
||||
cout << "edge exists!" << endl;
|
||||
#endif
|
||||
int ele = *it1;
|
||||
unsigned ele = *it1;
|
||||
bool exist_constant_pre = false;
|
||||
bool s2o_pre_var = false;
|
||||
bool o2s_pre_var = false;
|
||||
for(RecordIterator it2 = edge_index.begin(); it2 != edge_index.end(); ++it2)
|
||||
for(vector<int>::iterator it2 = edge_index.begin(); it2 != edge_index.end(); ++it2)
|
||||
{
|
||||
int edge_type = this->basic_query->getEdgeType(_id, *it2);
|
||||
int pre_id = this->basic_query->getEdgePreID(_id, *it2);
|
||||
TYPE_PREDICATE_ID pre_id = this->basic_query->getEdgePreID(_id, *it2);
|
||||
|
||||
if (pre_id == -2) //predicate var
|
||||
{
|
||||
|
@ -1058,8 +1097,8 @@ Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, int _can_list_s
|
|||
break;
|
||||
}
|
||||
|
||||
int* id_list;
|
||||
int id_list_len;
|
||||
unsigned* id_list;
|
||||
unsigned id_list_len;
|
||||
if (edge_type == Util::EDGE_IN)
|
||||
{
|
||||
#ifdef DEBUG_JOIN
|
||||
|
@ -1113,8 +1152,8 @@ Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, int _can_list_s
|
|||
//all pres are variable, so use s2o or o2s to add
|
||||
if(s2o_pre_var)
|
||||
{
|
||||
int* id_list2;
|
||||
int id_list2_len;
|
||||
unsigned* id_list2;
|
||||
unsigned id_list2_len;
|
||||
this->kvstore->getobjIDlistBysubID(ele, id_list2, id_list2_len, true);
|
||||
update_answer_list(valid_ans_list, _can_list, id_list2, id_list2_len, _is_literal);
|
||||
delete[] id_list2;
|
||||
|
@ -1126,8 +1165,8 @@ Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, int _can_list_s
|
|||
}
|
||||
if(o2s_pre_var)
|
||||
{
|
||||
int* id_list2;
|
||||
int id_list2_len;
|
||||
unsigned* id_list2;
|
||||
unsigned id_list2_len;
|
||||
this->kvstore->getsubIDlistByobjID(ele, id_list2, id_list2_len, true);
|
||||
update_answer_list(valid_ans_list, _can_list, id_list2, id_list2_len, _is_literal);
|
||||
delete[] id_list2;
|
||||
|
@ -1145,10 +1184,10 @@ Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, int _can_list_s
|
|||
cout << "this record is matched!!" << endl;
|
||||
#endif
|
||||
found = true;
|
||||
int size = valid_ans_list->size();
|
||||
unsigned size = valid_ans_list->size();
|
||||
|
||||
it0->push_back((*valid_ans_list)[0]);
|
||||
int begin = 1;
|
||||
unsigned begin = 1;
|
||||
if (!if_new_start && size > 1)
|
||||
{
|
||||
this->add_new_to_results(it0, (*valid_ans_list)[1]);
|
||||
|
@ -1159,7 +1198,7 @@ Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, int _can_list_s
|
|||
begin = 2;
|
||||
}
|
||||
|
||||
for (int i = begin; i < size; ++i)
|
||||
for (unsigned i = begin; i < size; ++i)
|
||||
{
|
||||
//WARN+NOTICE:this strategy may cause that duplicates are not together!
|
||||
this->add_new_to_results(it0, (*valid_ans_list)[i]);
|
||||
|
@ -1222,13 +1261,13 @@ Join::multi_join()
|
|||
//this->filterBySatellites(this->start_id);
|
||||
|
||||
IDList& start_table = this->basic_query->getCandidateList(this->start_id);
|
||||
int start_size = this->basic_query->getCandidateSize(this->start_id);
|
||||
unsigned start_size = this->basic_query->getCandidateSize(this->start_id);
|
||||
#ifdef DEBUG_JOIN
|
||||
cout << "the start size " << start_size << endl;
|
||||
#endif
|
||||
for (int i = 0; i < start_size; ++i)
|
||||
for (unsigned i = 0; i < start_size; ++i)
|
||||
{
|
||||
int ele = start_table.getID(i);
|
||||
unsigned ele = start_table.getID(i);
|
||||
RecordType record(1, ele);
|
||||
this->current_table.push_back(record);
|
||||
//this->table_row_new.push_back(false);
|
||||
|
@ -1277,7 +1316,7 @@ Join::multi_join()
|
|||
//int* tmp_id_list;
|
||||
//int tmp_id_list_len;
|
||||
IDList& can_list = this->basic_query->getCandidateList(id2);
|
||||
int can_list_size = can_list.size();
|
||||
unsigned can_list_size = can_list.size();
|
||||
|
||||
for (int i = 0; i < this->id_pos; ++i)
|
||||
{
|
||||
|
@ -1333,7 +1372,7 @@ Join::multi_join()
|
|||
for (int i = 0; i < this->id_pos; ++i)
|
||||
{
|
||||
vector<int> edge_index = edges[i];
|
||||
for(RecordIterator it = edge_index.begin(); it != edge_index.end(); ++it)
|
||||
for(vector<int>::iterator it = edge_index.begin(); it != edge_index.end(); ++it)
|
||||
{
|
||||
int edge_id = this->basic_query->getEdgeID(id2, *it);
|
||||
dealed_triple[edge_id] = true;
|
||||
|
@ -1459,9 +1498,10 @@ Join::constant_edge_filter(int _var_i)
|
|||
this->dealed_triple[triple_id] = true;
|
||||
}
|
||||
|
||||
int pre_id = this->basic_query->getEdgePreID(_var_i, j);
|
||||
int lit_id = (this->kvstore)->getIDByEntity(neighbor_name);
|
||||
if (lit_id == -1)
|
||||
TYPE_PREDICATE_ID pre_id = this->basic_query->getEdgePreID(_var_i, j);
|
||||
TYPE_ENTITY_LITERAL_ID lit_id = (this->kvstore)->getIDByEntity(neighbor_name);
|
||||
//if (lit_id == -1)
|
||||
if (lit_id == INVALID_ENTITY_LITERAL_ID)
|
||||
{
|
||||
lit_id = (this->kvstore)->getIDByLiteral(neighbor_name);
|
||||
}
|
||||
|
@ -1477,8 +1517,8 @@ Join::constant_edge_filter(int _var_i)
|
|||
// Util::logging(_ss.str());
|
||||
// }
|
||||
|
||||
int id_list_len = 0;
|
||||
int* id_list = NULL;
|
||||
unsigned id_list_len = 0;
|
||||
unsigned* id_list = NULL;
|
||||
if (pre_id >= 0)
|
||||
{
|
||||
if (edge_type == Util::EDGE_OUT)
|
||||
|
@ -1531,7 +1571,7 @@ Join::constant_edge_filter(int _var_i)
|
|||
if (id_list_len == 0)
|
||||
{
|
||||
_list.clear();
|
||||
delete[]id_list;
|
||||
delete[] id_list;
|
||||
return false;
|
||||
}
|
||||
// cout << "\t\t can:" << can_list.to_str() << endl;
|
||||
|
@ -1598,7 +1638,7 @@ Join::add_literal_candidate()
|
|||
for (int j = 0; j < var_degree; j++)
|
||||
{
|
||||
int neighbor_id = this->basic_query->getEdgeNeighborID(var_id, j);
|
||||
int predicate_id = this->basic_query->getEdgePreID(var_id, j);
|
||||
TYPE_PREDICATE_ID predicate_id = this->basic_query->getEdgePreID(var_id, j);
|
||||
int triple_id = this->basic_query->getEdgeID(var_id, j);
|
||||
Triple triple = this->basic_query->getTriple(triple_id);
|
||||
string neighbor_name = triple.subject;
|
||||
|
@ -1607,19 +1647,21 @@ Join::add_literal_candidate()
|
|||
// if the neighbor of this edge is an entity, we can add all literals which has an exact predicate edge linking to this entity.
|
||||
if (neighbor_id == -1)
|
||||
{
|
||||
int subject_id = (this->kvstore)->getIDByEntity(neighbor_name);
|
||||
int* object_list = NULL;
|
||||
int object_list_len = 0;
|
||||
TYPE_ENTITY_LITERAL_ID subject_id = (this->kvstore)->getIDByEntity(neighbor_name);
|
||||
unsigned* object_list = NULL;
|
||||
unsigned object_list_len = 0;
|
||||
|
||||
if (predicate_id >= 0)
|
||||
{
|
||||
(this->kvstore)->getobjIDlistBysubIDpreID(subject_id, predicate_id, object_list, object_list_len, true);
|
||||
}
|
||||
else if (predicate_id == -2)
|
||||
{
|
||||
this->kvstore->getobjIDlistBysubID(subject_id, object_list, object_list_len, true);
|
||||
}
|
||||
//NOTICE:only literals should be unioned
|
||||
this_edge_literal_list.unionList(object_list, object_list_len, true);
|
||||
delete[]object_list;
|
||||
delete[] object_list;
|
||||
}
|
||||
// if the neighbor of this edge is variable, then the neighbor variable can not have any literal results,
|
||||
// we should add literals when join these two variables, see the Database::join function for details.
|
||||
|
@ -1732,7 +1774,7 @@ Join::preFilter(int _var)
|
|||
//if size is very large, the cost is high and not many can be filtered!
|
||||
//(keep state for each one-degree node, if considered)
|
||||
IDList& cans = this->basic_query->getCandidateList(_var);
|
||||
int size = this->basic_query->getCandidateSize(_var);
|
||||
unsigned size = this->basic_query->getCandidateSize(_var);
|
||||
|
||||
//result if already empty for non-literal variable
|
||||
if (size == 0)
|
||||
|
@ -1745,8 +1787,8 @@ Join::preFilter(int _var)
|
|||
|
||||
int var_degree = this->basic_query->getVarDegree(_var);
|
||||
//NOTICE:maybe several same predicates
|
||||
set<int> in_edge_pre_id;
|
||||
set<int> out_edge_pre_id;
|
||||
set<TYPE_PREDICATE_ID> in_edge_pre_id;
|
||||
set<TYPE_PREDICATE_ID> out_edge_pre_id;
|
||||
|
||||
for (int i = 0; i < var_degree; i++)
|
||||
{
|
||||
|
@ -1772,18 +1814,20 @@ Join::preFilter(int _var)
|
|||
//else
|
||||
//cout << "need to filter: " << neighbor_name << endl;
|
||||
|
||||
int pre_id = this->basic_query->getEdgePreID(_var, i);
|
||||
TYPE_PREDICATE_ID pre_id = this->basic_query->getEdgePreID(_var, i);
|
||||
//WARN+BETTER:invalid(should be discarded in Query) or ?p(should not be considered here)
|
||||
if (pre_id < 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
//TODO+BETTER: is any pre really used? do we need to losen the restrictions?
|
||||
|
||||
//size:m<n; time:mlgn < n-m
|
||||
//The former time is computed because the m should be small if we select this p, tending to use binary-search
|
||||
//when doing intersectList operation(mlgn < m+n).
|
||||
//The latter time is computed due to the unnecessary copy cost if not using this p
|
||||
TNUM border = size / (Util::logarithm(2, size) + 1);
|
||||
TYPE_TRIPLE_NUM border = size / (Util::logarithm(2, size) + 1);
|
||||
//not use inefficient pre to filter
|
||||
if(this->dealed_triple[triple_id] || this->pre2num[pre_id] > border)
|
||||
{
|
||||
|
@ -1810,9 +1854,9 @@ Join::preFilter(int _var)
|
|||
}
|
||||
|
||||
//NOTICE:use p2s here, use s2p in only_pre_filter_after_join because pres there are not efficient
|
||||
set<int>::iterator it;
|
||||
int* list = NULL;
|
||||
int len = 0;
|
||||
set<TYPE_PREDICATE_ID>::iterator it;
|
||||
unsigned* list = NULL;
|
||||
unsigned len = 0;
|
||||
for(it = in_edge_pre_id.begin(); it != in_edge_pre_id.end(); ++it)
|
||||
{
|
||||
this->kvstore->getobjIDlistBypreID(*it, list, len, true);
|
||||
|
@ -1852,8 +1896,9 @@ Join::only_pre_filter_after_join()
|
|||
//cout<<"var: "<<this->basic_query->getVarName(var_id)<<endl;
|
||||
|
||||
//get all the only predicate filter edges for this variable.
|
||||
vector<int> in_edge_pre_id;
|
||||
vector<int> out_edge_pre_id;
|
||||
vector<TYPE_PREDICATE_ID> in_edge_pre_id;
|
||||
vector<TYPE_PREDICATE_ID> out_edge_pre_id;
|
||||
|
||||
for (int i = 0; i < var_degree; i++)
|
||||
{
|
||||
//cout<<"var linking edge: "<<i<<endl;
|
||||
|
@ -1892,7 +1937,7 @@ Join::only_pre_filter_after_join()
|
|||
//else
|
||||
//cout << "need to filter: " << neighbor_name << endl;
|
||||
|
||||
int pre_id = this->basic_query->getEdgePreID(var_id, i);
|
||||
TYPE_PREDICATE_ID pre_id = this->basic_query->getEdgePreID(var_id, i);
|
||||
if (pre_id < 0)
|
||||
{
|
||||
continue;
|
||||
|
@ -1916,9 +1961,9 @@ Join::only_pre_filter_after_join()
|
|||
|
||||
for (TableIterator it = this->current_table.begin(); it != this->current_table.end();)
|
||||
{
|
||||
int entity_id = (*it)[this->id2pos[var_id]];
|
||||
int* pair_list = NULL;
|
||||
int pair_len = 0;
|
||||
TYPE_ENTITY_LITERAL_ID entity_id = (*it)[this->id2pos[var_id]];
|
||||
unsigned* pair_list = NULL;
|
||||
unsigned pair_len = 0;
|
||||
bool exist_preid = true;
|
||||
|
||||
//NOTICE: four ways to judge if the predicates exist
|
||||
|
@ -1934,12 +1979,14 @@ Join::only_pre_filter_after_join()
|
|||
//(this->kvstore)->getpreIDsubIDlistByobjID(entity_id, pair_list, pair_len);
|
||||
(this->kvstore)->getpreIDlistByobjID(entity_id, pair_list, pair_len, true);
|
||||
|
||||
for (vector<int>::iterator itr_pre = in_edge_pre_id.begin(); itr_pre != in_edge_pre_id.end(); itr_pre++)
|
||||
for (vector<TYPE_PREDICATE_ID>::iterator itr_pre = in_edge_pre_id.begin(); itr_pre != in_edge_pre_id.end(); itr_pre++)
|
||||
{
|
||||
int pre_id = (*itr_pre);
|
||||
TYPE_PREDICATE_ID pre_id = (*itr_pre);
|
||||
//exist_preid = Util::bsearch_preid_uporder(pre_id, pair_list, pair_len);
|
||||
if (Util::bsearch_int_uporder(pre_id, pair_list, pair_len) == -1)
|
||||
if (Util::bsearch_int_uporder(pre_id, pair_list, pair_len) == INVALID)
|
||||
{
|
||||
exist_preid = false;
|
||||
}
|
||||
if (!exist_preid)
|
||||
{
|
||||
break;
|
||||
|
@ -1952,12 +1999,14 @@ Join::only_pre_filter_after_join()
|
|||
//(this->kvstore)->getpreIDobjIDlistBysubID(entity_id, pair_list, pair_len);
|
||||
(this->kvstore)->getpreIDlistBysubID(entity_id, pair_list, pair_len, true);
|
||||
|
||||
for (vector<int>::iterator itr_pre = out_edge_pre_id.begin(); itr_pre != out_edge_pre_id.end(); itr_pre++)
|
||||
for (vector<TYPE_PREDICATE_ID>::iterator itr_pre = out_edge_pre_id.begin(); itr_pre != out_edge_pre_id.end(); itr_pre++)
|
||||
{
|
||||
int pre_id = (*itr_pre);
|
||||
TYPE_PREDICATE_ID pre_id = (*itr_pre);
|
||||
//exist_preid = Util::bsearch_preid_uporder(pre_id, pair_list, pair_len);
|
||||
if (Util::bsearch_int_uporder(pre_id, pair_list, pair_len) == -1)
|
||||
if (Util::bsearch_int_uporder(pre_id, pair_list, pair_len) == INVALID)
|
||||
{
|
||||
exist_preid = false;
|
||||
}
|
||||
if (!exist_preid)
|
||||
{
|
||||
break;
|
||||
|
|
|
@ -15,23 +15,23 @@
|
|||
#include "../KVstore/KVstore.h"
|
||||
#include "../Util/Util.h"
|
||||
|
||||
typedef vector<int> RecordType;
|
||||
typedef vector<int>::iterator RecordIterator;
|
||||
typedef vector<unsigned> RecordType;
|
||||
typedef vector<unsigned>::iterator RecordIterator;
|
||||
typedef list<RecordType> TableType;
|
||||
typedef list<RecordType>::iterator TableIterator;
|
||||
typedef list<RecordType>::reverse_iterator TableReverseIterator;
|
||||
//typedef list< vector<int> > TableType;
|
||||
//typedef list< vector<int> >::iterator TableIterator;
|
||||
//typedef list< vector<int> >::reverse_iterator TableReverseIterator;
|
||||
typedef vector< vector<int*> > IdLists;
|
||||
typedef vector< vector<int> > IdListsLen;
|
||||
//typedef vector< vector<int*> > IdLists;
|
||||
//typedef vector< vector<int> > IdListsLen;
|
||||
|
||||
typedef struct Satellite
|
||||
{
|
||||
int id;
|
||||
int* idlist;
|
||||
int idlist_len;
|
||||
Satellite(int _id, int* _idlist, int _idlist_len)
|
||||
unsigned* idlist;
|
||||
unsigned idlist_len;
|
||||
Satellite(int _id, unsigned* _idlist, unsigned _idlist_len)
|
||||
{
|
||||
this->id = _id;
|
||||
this->idlist = _idlist;
|
||||
|
@ -47,16 +47,21 @@ private:
|
|||
int var_num;
|
||||
BasicQuery* basic_query;
|
||||
KVstore* kvstore;
|
||||
TNUM* pre2num;
|
||||
int limitID_predicate;
|
||||
int limitID_literal;
|
||||
TYPE_TRIPLE_NUM* pre2num;
|
||||
TYPE_PREDICATE_ID limitID_predicate;
|
||||
TYPE_ENTITY_LITERAL_ID limitID_literal;
|
||||
//used by score_node for parameters
|
||||
static const unsigned PARAM_DEGREE = 1;
|
||||
static const unsigned PARAM_SIZE = 1000000;
|
||||
static const unsigned PARAM_PRE = 10000;
|
||||
static const unsigned PARAM_DENSE = 1;
|
||||
static const double JUDGE_LIMIT = 0.5;
|
||||
static const int LIMIT_CANDIDATE_LIST_SIZE = 1000;
|
||||
|
||||
static const unsigned JUDGE_LIMIT = 2;
|
||||
//NOTICE+DEBUG: please use constexpr below instead of the phase above(constexpr is supported in C++11)
|
||||
//http://www.cnblogs.com/wanyuanchun/p/4041080.html
|
||||
//constexpr static const double JUDGE_LIMIT = 0.5;
|
||||
|
||||
static const unsigned LIMIT_CANDIDATE_LIST_SIZE = 1000;
|
||||
//BETTER?:predefine size to avoid copy cost
|
||||
TableType current_table;
|
||||
TableIterator new_start; //keep to end() as default
|
||||
|
@ -69,10 +74,10 @@ private:
|
|||
bool* dealed_triple;
|
||||
stack<int> mystack;
|
||||
|
||||
vector<int*>* result_list;
|
||||
vector<unsigned*>* result_list;
|
||||
vector<Satellite> satellites;
|
||||
int* record;
|
||||
int record_len;
|
||||
unsigned* record;
|
||||
unsigned record_len;
|
||||
|
||||
void init(BasicQuery* _basic_query);
|
||||
void clear();
|
||||
|
@ -81,7 +86,7 @@ private:
|
|||
|
||||
//judge which method should be used according to
|
||||
//the size of candidates and structure of quering graph
|
||||
int judge(int _smallest, int _biggest);
|
||||
int judge(unsigned _smallest, unsigned _biggest);
|
||||
|
||||
//select the start point and search order
|
||||
void select();
|
||||
|
@ -108,20 +113,20 @@ private:
|
|||
|
||||
//functions for help
|
||||
//copy/add to the end of current_table and set true
|
||||
void add_new_to_results(TableIterator it, int id);
|
||||
void add_new_to_results(TableIterator it, unsigned id);
|
||||
|
||||
//void set_results_old(list<bool>::iterator it);
|
||||
int choose_next_node(int id);
|
||||
|
||||
bool is_literal_var(int id);
|
||||
bool is_literal_ele(int _id);
|
||||
//bool is_literal_ele(int _id);
|
||||
|
||||
void copyToResult();
|
||||
|
||||
//BETTER?:change these params to members in class
|
||||
void acquire_all_id_lists(IdLists& _id_lists, IdListsLen& _id_lists_len, IDList& _can_list, vector<int>& _edges, int _id, int _can_list_size);
|
||||
void update_answer_list(IDList*& valid_ans_list, IDList& _can_list, int* id_list, int id_list_len, bool _is_literal);
|
||||
bool join_two(vector< vector<int> >& _edges, IDList& _can_list, int _can_list_size, int _id, bool _is_literal);
|
||||
//void acquire_all_id_lists(IdLists& _id_lists, IdListsLen& _id_lists_len, IDList& _can_list, vector<int>& _edges, int _id, unsigned _can_list_size);
|
||||
void update_answer_list(IDList*& valid_ans_list, IDList& _can_list, unsigned* id_list, unsigned id_list_len, bool _is_literal);
|
||||
bool join_two(vector< vector<int> >& _edges, IDList& _can_list, unsigned _can_list_size, int _id, bool _is_literal);
|
||||
|
||||
bool multi_join();
|
||||
//NOTICE:this is only used to join a BasicQuery
|
||||
|
@ -129,7 +134,7 @@ private:
|
|||
|
||||
public:
|
||||
Join();
|
||||
Join(KVstore* _kvstore, TNUM* _pre2num, int _limitID_predicate, int _limitID_literal);
|
||||
Join(KVstore* _kvstore, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal);
|
||||
//these functions can be called by Database
|
||||
bool join_sparql(SPARQLquery& _sparql_query);
|
||||
bool join_basic(BasicQuery* _basic_query);
|
||||
|
|
|
@ -18,7 +18,7 @@ Strategy::Strategy()
|
|||
//this->prepare_handler();
|
||||
}
|
||||
|
||||
Strategy::Strategy(KVstore* _kvstore, VSTree* _vstree, TNUM* _pre2num, int _limitID_predicate, int _limitID_literal)
|
||||
Strategy::Strategy(KVstore* _kvstore, VSTree* _vstree, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal)
|
||||
{
|
||||
this->method = 0;
|
||||
this->kvstore = _kvstore;
|
||||
|
@ -58,7 +58,7 @@ Strategy::handle(SPARQLquery& _query, ResultFilter* _result_filter)
|
|||
{
|
||||
this->method = -1;
|
||||
|
||||
vector<int*>& result_list = (*iter)->getResultList();
|
||||
vector<unsigned*>& result_list = (*iter)->getResultList();
|
||||
//int select_var_num = (*iter)->getSelectVarNum();
|
||||
//the num of vars needing to be joined, i.e. selectVarNum if only one triple
|
||||
int varNum = (*iter)->getVarNum();
|
||||
|
@ -157,7 +157,7 @@ Strategy::handle(SPARQLquery& _query, ResultFilter* _result_filter)
|
|||
}
|
||||
|
||||
void
|
||||
Strategy::handler0(BasicQuery* _bq, vector<int*>& _result_list, ResultFilter* _result_filter)
|
||||
Strategy::handler0(BasicQuery* _bq, vector<unsigned*>& _result_list, ResultFilter* _result_filter)
|
||||
{
|
||||
//long before_filter = Util::get_cur_time();
|
||||
cout << "this BasicQuery use query strategy 0" << endl;
|
||||
|
@ -215,7 +215,7 @@ Strategy::handler0(BasicQuery* _bq, vector<int*>& _result_list, ResultFilter* _r
|
|||
}
|
||||
|
||||
void
|
||||
Strategy::handler1(BasicQuery* _bq, vector<int*>& _result_list)
|
||||
Strategy::handler1(BasicQuery* _bq, vector<unsigned*>& _result_list)
|
||||
{
|
||||
long before_filter = Util::get_cur_time();
|
||||
cout << "this BasicQuery use query strategy 1" << endl;
|
||||
|
@ -223,14 +223,15 @@ Strategy::handler1(BasicQuery* _bq, vector<int*>& _result_list)
|
|||
char edge_type = _bq->getEdgeType(0, 0);
|
||||
int triple_id = _bq->getEdgeID(0, 0);
|
||||
Triple triple = _bq->getTriple(triple_id);
|
||||
int pre_id = _bq->getEdgePreID(0, 0);
|
||||
int* id_list = NULL;
|
||||
int id_list_len = 0;
|
||||
TYPE_PREDICATE_ID pre_id = _bq->getEdgePreID(0, 0);
|
||||
unsigned* id_list = NULL;
|
||||
unsigned id_list_len = 0;
|
||||
if (edge_type == Util::EDGE_OUT)
|
||||
{
|
||||
//cout<<"edge out!!!"<<endl;
|
||||
int nid = (this->kvstore)->getIDByEntity(triple.object);
|
||||
if (nid == -1)
|
||||
TYPE_ENTITY_LITERAL_ID nid = (this->kvstore)->getIDByEntity(triple.object);
|
||||
//if (nid == -1)
|
||||
if (nid == INVALID_ENTITY_LITERAL_ID)
|
||||
{
|
||||
nid = (this->kvstore)->getIDByLiteral(triple.object);
|
||||
}
|
||||
|
@ -246,9 +247,9 @@ Strategy::handler1(BasicQuery* _bq, vector<int*>& _result_list)
|
|||
cout << "after filter, used " << (after_filter - before_filter) << "ms" << endl;
|
||||
_result_list.clear();
|
||||
//cout<<"now to copy result to list"<<endl;
|
||||
for (int i = 0; i < id_list_len; ++i)
|
||||
for (unsigned i = 0; i < id_list_len; ++i)
|
||||
{
|
||||
int* record = new int[1]; //only this var is selected
|
||||
unsigned* record = new unsigned[1]; //only this var is selected
|
||||
record[0] = id_list[i];
|
||||
//cout<<this->kvstore->getEntityByID(record[0])<<endl;
|
||||
_result_list.push_back(record);
|
||||
|
@ -260,20 +261,20 @@ Strategy::handler1(BasicQuery* _bq, vector<int*>& _result_list)
|
|||
}
|
||||
|
||||
void
|
||||
Strategy::handler2(BasicQuery* _bq, vector<int*>& _result_list)
|
||||
Strategy::handler2(BasicQuery* _bq, vector<unsigned*>& _result_list)
|
||||
{
|
||||
long before_filter = Util::get_cur_time();
|
||||
cout << "this BasicQuery use query strategy 2" << endl;
|
||||
int triple_id = _bq->getEdgeID(0, 0);
|
||||
Triple triple = _bq->getTriple(triple_id);
|
||||
int pre_id = _bq->getEdgePreID(0, 0);
|
||||
TYPE_PREDICATE_ID pre_id = _bq->getEdgePreID(0, 0);
|
||||
|
||||
//NOTICE:it is ok for var1 or var2 to be -1, i.e. not encoded
|
||||
int var1_id = _bq->getIDByVarName(triple.subject);
|
||||
int var2_id = _bq->getIDByVarName(triple.object);
|
||||
|
||||
int* id_list = NULL;
|
||||
int id_list_len = 0;
|
||||
unsigned* id_list = NULL;
|
||||
unsigned id_list_len = 0;
|
||||
if (var1_id == 0) //subject var selected
|
||||
{
|
||||
//use p2s directly
|
||||
|
@ -291,9 +292,9 @@ Strategy::handler2(BasicQuery* _bq, vector<int*>& _result_list)
|
|||
long after_filter = Util::get_cur_time();
|
||||
cout << "after filter, used " << (after_filter - before_filter) << "ms" << endl;
|
||||
_result_list.clear();
|
||||
for (int i = 0; i < id_list_len; ++i)
|
||||
for (unsigned i = 0; i < id_list_len; ++i)
|
||||
{
|
||||
int* record = new int[1]; //only one var
|
||||
unsigned* record = new unsigned[1]; //only one var
|
||||
record[0] = id_list[i];
|
||||
_result_list.push_back(record);
|
||||
}
|
||||
|
@ -304,15 +305,15 @@ Strategy::handler2(BasicQuery* _bq, vector<int*>& _result_list)
|
|||
}
|
||||
|
||||
void
|
||||
Strategy::handler3(BasicQuery* _bq, vector<int*>& _result_list)
|
||||
Strategy::handler3(BasicQuery* _bq, vector<unsigned*>& _result_list)
|
||||
{
|
||||
long before_filter = Util::get_cur_time();
|
||||
cout << "this BasicQuery use query strategy 3" << endl;
|
||||
int triple_id = _bq->getEdgeID(0, 0);
|
||||
Triple triple = _bq->getTriple(triple_id);
|
||||
int pre_id = _bq->getEdgePreID(0, 0);
|
||||
int* id_list = NULL;
|
||||
int id_list_len = 0;
|
||||
TYPE_PREDICATE_ID pre_id = _bq->getEdgePreID(0, 0);
|
||||
unsigned* id_list = NULL;
|
||||
unsigned id_list_len = 0;
|
||||
|
||||
_result_list.clear();
|
||||
this->kvstore->getsubIDobjIDlistBypreID(pre_id, id_list, id_list_len);
|
||||
|
@ -328,9 +329,9 @@ Strategy::handler3(BasicQuery* _bq, vector<int*>& _result_list)
|
|||
long after_filter = Util::get_cur_time();
|
||||
cout << "after filter, used " << (after_filter - before_filter) << "ms" << endl;
|
||||
|
||||
for (int i = 0; i < id_list_len; i += 2)
|
||||
for (unsigned i = 0; i < id_list_len; i += 2)
|
||||
{
|
||||
int* record = new int[2]; //2 vars and selected
|
||||
unsigned* record = new unsigned[2]; //2 vars and selected
|
||||
record[var1_id] = id_list[i];
|
||||
record[var2_id] = id_list[i + 1];
|
||||
_result_list.push_back(record);
|
||||
|
@ -343,7 +344,7 @@ Strategy::handler3(BasicQuery* _bq, vector<int*>& _result_list)
|
|||
}
|
||||
|
||||
void
|
||||
Strategy::handler4(BasicQuery* _bq, vector<int*>& _result_list)
|
||||
Strategy::handler4(BasicQuery* _bq, vector<unsigned*>& _result_list)
|
||||
{
|
||||
cout<<"Special Case: consider pre var in this triple"<<endl;
|
||||
int varNum = _bq->getVarNum();
|
||||
|
@ -354,8 +355,9 @@ Strategy::handler4(BasicQuery* _bq, vector<int*>& _result_list)
|
|||
int selected_var_num = _bq->getSelectVarNum();
|
||||
Triple triple = _bq->getTriple(0);
|
||||
int pvpos = _bq->getSelectedPreVarPosition(triple.predicate);
|
||||
int* id_list = NULL;
|
||||
int id_list_len = 0;
|
||||
|
||||
unsigned* id_list = NULL;
|
||||
unsigned id_list_len = 0;
|
||||
_result_list.clear();
|
||||
|
||||
//cout<<"total num: "<<total_num <<endl;
|
||||
|
@ -369,9 +371,9 @@ Strategy::handler4(BasicQuery* _bq, vector<int*>& _result_list)
|
|||
cout<<"predicate: "<<triple.predicate<<" "<<pvpos<<endl;
|
||||
//very special case, to find all triples, select ?s (?p) ?o where { ?s ?p ?o . }
|
||||
//filter and join is too costly, should enum all predicates and use p2so
|
||||
for(int i = 0; i < this->limitID_predicate; ++i)
|
||||
for(TYPE_PREDICATE_ID i = 0; i < this->limitID_predicate; ++i)
|
||||
{
|
||||
int pid = i;
|
||||
TYPE_PREDICATE_ID pid = i;
|
||||
this->kvstore->getsubIDobjIDlistBypreID(pid, id_list, id_list_len);
|
||||
int rsize = selected_var_num;
|
||||
if(selected_pre_var_num == 1)
|
||||
|
@ -380,9 +382,9 @@ Strategy::handler4(BasicQuery* _bq, vector<int*>& _result_list)
|
|||
}
|
||||
|
||||
//always place s/o before p in result list
|
||||
for (int j = 0; j < id_list_len; j += 2)
|
||||
for (unsigned j = 0; j < id_list_len; j += 2)
|
||||
{
|
||||
int* record = new int[rsize];
|
||||
unsigned* record = new unsigned[rsize];
|
||||
//check the s/o var if selected, need to ensure the placement order
|
||||
if(ovpos >= 0)
|
||||
{
|
||||
|
@ -409,14 +411,15 @@ Strategy::handler4(BasicQuery* _bq, vector<int*>& _result_list)
|
|||
int vpos = -1;
|
||||
if(triple.subject[0] != '?') //constant
|
||||
{
|
||||
int sid = (this->kvstore)->getIDByEntity(triple.subject);
|
||||
TYPE_ENTITY_LITERAL_ID sid = (this->kvstore)->getIDByEntity(triple.subject);
|
||||
this->kvstore->getpreIDobjIDlistBysubID(sid, id_list, id_list_len);
|
||||
vpos = _bq->getSelectedVarPosition(triple.object);
|
||||
}
|
||||
else if(triple.object[0] != '?') //constant
|
||||
{
|
||||
int oid = (this->kvstore)->getIDByEntity(triple.object);
|
||||
if (oid == -1)
|
||||
TYPE_ENTITY_LITERAL_ID oid = (this->kvstore)->getIDByEntity(triple.object);
|
||||
//if (oid == -1)
|
||||
if (oid == INVALID_ENTITY_LITERAL_ID)
|
||||
{
|
||||
oid = (this->kvstore)->getIDByLiteral(triple.object);
|
||||
}
|
||||
|
@ -430,9 +433,9 @@ Strategy::handler4(BasicQuery* _bq, vector<int*>& _result_list)
|
|||
rsize++;
|
||||
}
|
||||
//always place s/o before p in result list
|
||||
for (int i = 0; i < id_list_len; i += 2)
|
||||
for (unsigned i = 0; i < id_list_len; i += 2)
|
||||
{
|
||||
int* record = new int[rsize];
|
||||
unsigned* record = new unsigned[rsize];
|
||||
if(vpos >= 0)
|
||||
{
|
||||
record[vpos] = id_list[i + 1]; //for the s/o var
|
||||
|
@ -448,8 +451,8 @@ Strategy::handler4(BasicQuery* _bq, vector<int*>& _result_list)
|
|||
{
|
||||
cout<<"Special Case 3"<<endl;
|
||||
//just use so2p
|
||||
int sid = (this->kvstore)->getIDByEntity(triple.subject);
|
||||
int oid = (this->kvstore)->getIDByEntity(triple.object);
|
||||
unsigned sid = (this->kvstore)->getIDByEntity(triple.subject);
|
||||
unsigned oid = (this->kvstore)->getIDByEntity(triple.object);
|
||||
if (oid == -1)
|
||||
{
|
||||
oid = (this->kvstore)->getIDByLiteral(triple.object);
|
||||
|
@ -457,9 +460,9 @@ Strategy::handler4(BasicQuery* _bq, vector<int*>& _result_list)
|
|||
|
||||
this->kvstore->getpreIDlistBysubIDobjID(sid, oid, id_list, id_list_len);
|
||||
//copy to result list
|
||||
for (int i = 0; i < id_list_len; ++i)
|
||||
for (unsigned i = 0; i < id_list_len; ++i)
|
||||
{
|
||||
int* record = new int[1];
|
||||
unsigned* record = new unsigned[1];
|
||||
record[0] = id_list[i];
|
||||
_result_list.push_back(record);
|
||||
}
|
||||
|
@ -471,38 +474,42 @@ Strategy::handler4(BasicQuery* _bq, vector<int*>& _result_list)
|
|||
//TODO:if any constants in a query are not found in kvstore, then this BGP should end to speed up the processing
|
||||
|
||||
void
|
||||
Strategy::handler5(BasicQuery* _bq, vector<int*>& _result_list)
|
||||
Strategy::handler5(BasicQuery* _bq, vector<unsigned*>& _result_list)
|
||||
{
|
||||
cout<<"Special Case: consider constant triple"<<endl;
|
||||
Triple triple = _bq->getTriple(0);
|
||||
_result_list.clear();
|
||||
|
||||
int subid = this->kvstore->getIDByEntity(triple.subject);
|
||||
if(subid == -1) //not found
|
||||
TYPE_ENTITY_LITERAL_ID subid = this->kvstore->getIDByEntity(triple.subject);
|
||||
//if(subid == -1) //not found
|
||||
if(subid == INVALID_ENTITY_LITERAL_ID) //not found
|
||||
{
|
||||
return;
|
||||
}
|
||||
int preid = this->kvstore->getIDByPredicate(triple.predicate);
|
||||
if(preid == -1) //not found
|
||||
TYPE_PREDICATE_ID preid = this->kvstore->getIDByPredicate(triple.predicate);
|
||||
//if(preid == -1) //not found
|
||||
if(preid == INVALID_PREDICATE_ID) //not found
|
||||
{
|
||||
return;
|
||||
}
|
||||
int objid = this->kvstore->getIDByEntity(triple.object);
|
||||
if(objid == -1)
|
||||
TYPE_ENTITY_LITERAL_ID objid = this->kvstore->getIDByEntity(triple.object);
|
||||
//if(objid == -1)
|
||||
if(objid == INVALID_ENTITY_LITERAL_ID)
|
||||
{
|
||||
objid = this->kvstore->getIDByLiteral(triple.object);
|
||||
}
|
||||
if(objid == -1)
|
||||
//if(objid == -1)
|
||||
if(objid == INVALID_ENTITY_LITERAL_ID)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
int* id_list = NULL;
|
||||
int id_list_len = 0;
|
||||
unsigned* id_list = NULL;
|
||||
unsigned id_list_len = 0;
|
||||
(this->kvstore)->getobjIDlistBysubIDpreID(subid, preid, id_list, id_list_len);
|
||||
if (Util::bsearch_int_uporder(objid, id_list, id_list_len) != -1)
|
||||
if (Util::bsearch_int_uporder(objid, id_list, id_list_len) != INVALID)
|
||||
{
|
||||
int* record = new int[3];
|
||||
unsigned* record = new unsigned[3];
|
||||
record[0] = subid;
|
||||
record[1] = preid;
|
||||
record[2] = objid;
|
||||
|
|
|
@ -23,7 +23,7 @@ class Strategy
|
|||
{
|
||||
public:
|
||||
Strategy();
|
||||
Strategy(KVstore*, VSTree*, TNUM*, int, int);
|
||||
Strategy(KVstore*, VSTree*, TYPE_TRIPLE_NUM*, TYPE_PREDICATE_ID, TYPE_ENTITY_LITERAL_ID);
|
||||
~Strategy();
|
||||
//select efficient strategy to do the sparql query
|
||||
bool handle(SPARQLquery&, ResultFilter* _result_filter = NULL);
|
||||
|
@ -32,22 +32,25 @@ private:
|
|||
int method;
|
||||
KVstore* kvstore;
|
||||
VSTree* vstree;
|
||||
TNUM* pre2num;
|
||||
int limitID_predicate;
|
||||
int limitID_literal;
|
||||
TYPE_TRIPLE_NUM* pre2num;
|
||||
TYPE_PREDICATE_ID limitID_predicate;
|
||||
TYPE_ENTITY_LITERAL_ID limitID_literal;
|
||||
|
||||
void handler0(BasicQuery*, vector<int*>&, ResultFilter* _result_filter = NULL);
|
||||
void handler1(BasicQuery*, vector<int*>&);
|
||||
void handler2(BasicQuery*, vector<int*>&);
|
||||
void handler3(BasicQuery*, vector<int*>&);
|
||||
void handler4(BasicQuery*, vector<int*>&);
|
||||
void handler5(BasicQuery*, vector<int*>&);
|
||||
//NOTICE: even the ID type is int, it is no problem and no waste that we use unsigned in answer
|
||||
//(because -1, -2 or other invalid IDs can not be in answer)
|
||||
void handler0(BasicQuery*, vector<unsigned*>&, ResultFilter* _result_filter = NULL);
|
||||
void handler1(BasicQuery*, vector<unsigned*>&);
|
||||
void handler2(BasicQuery*, vector<unsigned*>&);
|
||||
void handler3(BasicQuery*, vector<unsigned*>&);
|
||||
void handler4(BasicQuery*, vector<unsigned*>&);
|
||||
void handler5(BasicQuery*, vector<unsigned*>&);
|
||||
//QueryHandler *dispatch;
|
||||
//void prepare_handler();
|
||||
};
|
||||
|
||||
//function pointer array
|
||||
static const unsigned QUERY_HANDLER_NUM = 4;
|
||||
typedef void (Strategy::*QueryHandler[QUERY_HANDLER_NUM]) (BasicQuery*, vector<int*>&);
|
||||
typedef void (Strategy::*QueryHandler[QUERY_HANDLER_NUM]) (BasicQuery*, vector<unsigned*>&);
|
||||
//QueryHandler dispatch;
|
||||
|
||||
#endif //_DATABASE_STRATEGY_H
|
||||
|
|
|
@ -20,7 +20,7 @@ ISTree::ISTree()
|
|||
TSM = NULL;
|
||||
storepath = "";
|
||||
filename = "";
|
||||
transfer_size[0] = transfer_size[1] = transfer_size[2] = 0;
|
||||
//transfer_size[0] = transfer_size[1] = transfer_size[2] = 0;
|
||||
this->stream = NULL;
|
||||
this->request = 0;
|
||||
}
|
||||
|
@ -37,10 +37,10 @@ ISTree::ISTree(string _storepath, string _filename, string _mode, unsigned long
|
|||
this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail);
|
||||
else
|
||||
this->root = NULL;
|
||||
this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M
|
||||
//this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
//this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
//this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
//this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M
|
||||
this->stream = NULL;
|
||||
this->request = 0;
|
||||
}
|
||||
|
@ -51,30 +51,30 @@ ISTree::getFilePath()
|
|||
return storepath + "/" + filename;
|
||||
}
|
||||
|
||||
void //WARN: not check _str and _len
|
||||
ISTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index)
|
||||
{
|
||||
if (_index > 2)
|
||||
return;
|
||||
/*
|
||||
if(_str == NULL || _len == 0)
|
||||
{
|
||||
printf("error in CopyToTransfer: empty string\n");
|
||||
return;
|
||||
}
|
||||
*/
|
||||
//unsigned length = _bstr->getLen();
|
||||
unsigned length = _len;
|
||||
if (length + 1 > this->transfer_size[_index])
|
||||
{
|
||||
transfer[_index].release();
|
||||
transfer[_index].setStr((char*)malloc(length + 1));
|
||||
this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0
|
||||
}
|
||||
memcpy(this->transfer[_index].getStr(), _str, length);
|
||||
this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore
|
||||
this->transfer[_index].setLen(length);
|
||||
}
|
||||
//void //WARN: not check _str and _len
|
||||
//ISTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index)
|
||||
//{
|
||||
//if (_index > 2)
|
||||
//return;
|
||||
//[>
|
||||
//if(_str == NULL || _len == 0)
|
||||
//{
|
||||
//printf("error in CopyToTransfer: empty string\n");
|
||||
//return;
|
||||
//}
|
||||
//*/
|
||||
////unsigned length = _bstr->getLen();
|
||||
//unsigned length = _len;
|
||||
//if (length + 1 > this->transfer_size[_index])
|
||||
//{
|
||||
//transfer[_index].release();
|
||||
//transfer[_index].setStr((char*)malloc(length + 1));
|
||||
//this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0
|
||||
//}
|
||||
//memcpy(this->transfer[_index].getStr(), _str, length);
|
||||
//this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore
|
||||
//this->transfer[_index].setLen(length);
|
||||
//}
|
||||
|
||||
unsigned
|
||||
ISTree::getHeight() const
|
||||
|
@ -104,41 +104,46 @@ ISTree::prepare(ISNode* _np)
|
|||
}
|
||||
|
||||
bool
|
||||
ISTree::search(int _key, char*& _str, int& _len)
|
||||
ISTree::search(unsigned _key, char*& _str, unsigned& _len)
|
||||
{
|
||||
if (_key < 0)
|
||||
{
|
||||
printf("error in ISTree-search: empty string\n");
|
||||
return false;
|
||||
}
|
||||
//DEBUG
|
||||
//if (_key < 0)
|
||||
//{
|
||||
//printf("error in ISTree-search: empty string\n");
|
||||
//return false;
|
||||
//}
|
||||
|
||||
this->request = 0;
|
||||
int store;
|
||||
ISNode* ret = this->find(_key, &store, false);
|
||||
//cout<<"to find the position: "<<store<<endl;
|
||||
if (ret == NULL || store == -1 || _key != ret->getKey(store)) //tree is empty or not found
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
const Bstr* val = ret->getValue(store);
|
||||
this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request
|
||||
_str = this->transfer[0].getStr();
|
||||
_len = this->transfer[0].getLen();
|
||||
//this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request
|
||||
//_str = this->transfer[0].getStr();
|
||||
//_len = this->transfer[0].getLen();
|
||||
_str = val->getStr();
|
||||
_len = val->getLen();
|
||||
|
||||
this->TSM->request(request);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ISTree::insert(int _key, const char* _str, unsigned _len)
|
||||
ISTree::insert(unsigned _key, char* _str, unsigned _len)
|
||||
{
|
||||
if (_key < 0)
|
||||
{
|
||||
printf("error in ISTree-insert: empty string\n");
|
||||
return false;
|
||||
}
|
||||
//if (_key < 0)
|
||||
//{
|
||||
//printf("error in ISTree-insert: empty string\n");
|
||||
//return false;
|
||||
//}
|
||||
|
||||
this->CopyToTransfer(_str, _len, 2);
|
||||
const Bstr* val = &(this->transfer[2]);
|
||||
//this->CopyToTransfer(_str, _len, 2);
|
||||
//const Bstr* val = &(this->transfer[2]);
|
||||
this->request = 0;
|
||||
ISNode* ret;
|
||||
if (this->root == NULL) //tree is empty
|
||||
|
@ -222,29 +227,33 @@ ISTree::insert(int _key, const char* _str, unsigned _len)
|
|||
else
|
||||
{
|
||||
p->addKey(_key, i);
|
||||
p->addValue(val, i, true);
|
||||
p->addValue(_str, _len, i, true);
|
||||
p->addNum();
|
||||
request += val->getLen();
|
||||
request += _len;
|
||||
p->setDirty();
|
||||
this->TSM->updateHeap(p, p->getRank(), true);
|
||||
//_key->clear();
|
||||
//_value->clear();
|
||||
}
|
||||
this->TSM->request(request);
|
||||
//if(_key == 0)
|
||||
//{
|
||||
//cout<<"the 0th element is: "<<_str[0]<<endl;
|
||||
//}
|
||||
return !ifexist; //QUERY(which case:return false)
|
||||
}
|
||||
|
||||
bool
|
||||
ISTree::modify(int _key, const char* _str, unsigned _len)
|
||||
ISTree::modify(unsigned _key, char* _str, unsigned _len)
|
||||
{
|
||||
if (_key < 0)
|
||||
{
|
||||
printf("error in ISTree-modify: empty string\n");
|
||||
return false;
|
||||
}
|
||||
//if (_key < 0)
|
||||
//{
|
||||
//printf("error in ISTree-modify: empty string\n");
|
||||
//return false;
|
||||
//}
|
||||
|
||||
this->CopyToTransfer(_str, _len, 2); //not check value
|
||||
const Bstr* val = &(this->transfer[2]);
|
||||
//this->CopyToTransfer(_str, _len, 2); //not check value
|
||||
//const Bstr* val = &(this->transfer[2]);
|
||||
this->request = 0;
|
||||
int store;
|
||||
ISNode* ret = this->find(_key, &store, true);
|
||||
|
@ -255,22 +264,23 @@ ISTree::modify(int _key, const char* _str, unsigned _len)
|
|||
}
|
||||
//cout<<"ISTree::modify() - key is found, now to remove"<<endl;
|
||||
unsigned len = ret->getValue(store)->getLen();
|
||||
ret->setValue(val, store, true);
|
||||
ret->setValue(_str, _len, store, true);
|
||||
//cout<<"value reset"<<endl;
|
||||
//cout<<"newlen: "<<val->getLen()<<" oldlen: "<<len<<endl;
|
||||
//request += (val->getLen() - len);
|
||||
this->request = val->getLen();
|
||||
this->request = _len;
|
||||
this->request -= len;
|
||||
ret->setDirty();
|
||||
//cout<<"to request"<<endl;
|
||||
this->TSM->request(request);
|
||||
//cout<<"memory requested"<<endl;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//this function is useful for search and modify, and range-query
|
||||
ISNode* //return the first key's position that >= *_key
|
||||
ISTree::find(int _key, int* _store, bool ifmodify)
|
||||
ISTree::find(unsigned _key, int* _store, bool ifmodify)
|
||||
{ //to assign value for this->bstr, function shouldn't be const!
|
||||
if (this->root == NULL)
|
||||
return NULL; //ISTree Is Empty
|
||||
|
@ -300,6 +310,7 @@ ISTree::find(int _key, int* _store, bool ifmodify)
|
|||
*_store = -1; //Not Found
|
||||
else
|
||||
*_store = i;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
|
@ -311,13 +322,14 @@ ISTree::find(unsigned _len, const char* _str, int* store) const
|
|||
*/
|
||||
|
||||
bool
|
||||
ISTree::remove(int _key)
|
||||
ISTree::remove(unsigned _key)
|
||||
{
|
||||
if (_key < 0)
|
||||
{
|
||||
printf("error in ISTree-remove: empty string\n");
|
||||
return false;
|
||||
}
|
||||
//DEBUG
|
||||
//if (_key < 0)
|
||||
//{
|
||||
//printf("error in ISTree-remove: empty string\n");
|
||||
//return false;
|
||||
//}
|
||||
|
||||
this->request = 0;
|
||||
ISNode* ret;
|
||||
|
@ -443,7 +455,7 @@ ISTree::resetStream()
|
|||
}
|
||||
|
||||
bool //special case: not exist, one-edge-case
|
||||
ISTree::range_query(int _key1, int _key2)
|
||||
ISTree::range_query(unsigned _key1, unsigned _key2)
|
||||
{ //the range is: *_key1 <= x < *_key2
|
||||
//if(_key1 <0 && _key2 <0)
|
||||
//return false;
|
||||
|
@ -516,7 +528,7 @@ ISTree::range_query(int _key1, int _key2)
|
|||
delete this->stream;
|
||||
this->stream = NULL;
|
||||
}
|
||||
vector<int> keys;
|
||||
vector<unsigned> keys;
|
||||
vector<bool> desc;
|
||||
this->stream = new Stream(keys, desc, ansNum, 1, false);
|
||||
|
||||
|
@ -570,6 +582,7 @@ ISTree::release(ISNode* _np) const
|
|||
return;
|
||||
}
|
||||
int cnt = _np->getNum();
|
||||
//WARN: not chnage cnt to int type here(otherwise endless loop)
|
||||
for (; cnt >= 0; --cnt)
|
||||
release(_np->getChild(cnt));
|
||||
delete _np;
|
||||
|
@ -655,3 +668,4 @@ ISTree::print(string s)
|
|||
else;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:44
|
||||
# Description: struct and interface of the B+ tree
|
||||
# Description: ID2string, including id2entity, id2literal and id2predicate
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _KVSTORE_ISTREE_ISTREE_H
|
||||
|
@ -19,7 +19,7 @@
|
|||
class ISTree
|
||||
{
|
||||
protected:
|
||||
unsigned int height; //0 indicates an empty tree
|
||||
unsigned height; //0 indicates an empty tree
|
||||
ISNode* root;
|
||||
ISNode* leaves_head; //the head of LeafNode-list
|
||||
ISNode* leaves_tail; //the tail of LeafNode-list
|
||||
|
@ -36,8 +36,8 @@ protected:
|
|||
//so lock is a must. Add lock to transfer is better than to add
|
||||
//lock to every key/value. However, modify requires a lock for a
|
||||
//key/value, and multiple search for different keys are ok!!!
|
||||
Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char*
|
||||
unsigned transfer_size[3];
|
||||
//Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char*
|
||||
//unsigned transfer_size[3];
|
||||
|
||||
//tree's operations should be atom(if read nodes)
|
||||
//sum the request and send to ISStorage at last
|
||||
|
@ -49,25 +49,25 @@ protected:
|
|||
std::string filename; //ok for user to change
|
||||
/* some private functions */
|
||||
std::string getFilePath(); //in UNIX system
|
||||
void CopyToTransfer(const char* _str, unsigned _len, unsigned _index);
|
||||
//void CopyToTransfer(const char* _str, unsigned _len, unsigned _index);
|
||||
void release(ISNode* _np) const;
|
||||
|
||||
public:
|
||||
ISTree(); //always need to initial transfer
|
||||
ISTree(std::string _storepath, std::string _filename, std::string _mode, unsigned long long _buffer_size);
|
||||
unsigned int getHeight() const;
|
||||
unsigned getHeight() const;
|
||||
void setHeight(unsigned _h);
|
||||
ISNode* getRoot() const;
|
||||
//void setRoot(Node* _root);
|
||||
//insert, search, remove, set
|
||||
bool search(int _key, char*& _str, int& _len);
|
||||
bool insert(int _key, const char* _str, unsigned _len);
|
||||
bool modify(int _key, const char* _str, unsigned _len);
|
||||
ISNode* find(int _key, int* store, bool ifmodify);
|
||||
bool remove(int _key);
|
||||
bool search(unsigned _key, char*& _str, unsigned& _len);
|
||||
bool insert(unsigned _key, char* _str, unsigned _len);
|
||||
bool modify(unsigned _key, char* _str, unsigned _len);
|
||||
ISNode* find(unsigned _key, int* store, bool ifmodify);
|
||||
bool remove(unsigned _key);
|
||||
const Bstr* getRangeValue();
|
||||
void resetStream();
|
||||
bool range_query(int _key1, int _key2);
|
||||
bool range_query(unsigned _key1, unsigned _key2);
|
||||
bool save();
|
||||
~ISTree();
|
||||
void print(std::string s); //DEBUG(print the tree)
|
||||
|
@ -76,4 +76,5 @@ public:
|
|||
//(problem range between two extremes: not-modified, totally-modified)
|
||||
//After saved, it's ok to continue operations on tree!
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -38,4 +38,5 @@ public:
|
|||
void print(std::string s); //DEBUG
|
||||
};
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -131,7 +131,7 @@ ISIntlNode::split(ISNode* _father, int _index)
|
|||
p->addNum();
|
||||
}
|
||||
p->addChild(this->childs[i], k);
|
||||
int tp = this->keys[MIN_KEY_NUM];
|
||||
unsigned tp = this->keys[MIN_KEY_NUM];
|
||||
this->setNum(MIN_KEY_NUM);
|
||||
_father->addKey(tp, _index);
|
||||
_father->addChild(p, _index + 1); //DEBUG(check the index)
|
||||
|
@ -177,7 +177,7 @@ ISIntlNode::coalesce(ISNode* _father, int _index)
|
|||
}
|
||||
}
|
||||
|
||||
int tmp = 0;
|
||||
unsigned tmp = 0;
|
||||
switch (ccase)
|
||||
{
|
||||
case 1: //union right to this
|
||||
|
@ -290,4 +290,4 @@ ISIntlNode::print(string s)
|
|||
}
|
||||
else;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
|
@ -45,4 +45,5 @@ public:
|
|||
*/
|
||||
};
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -82,6 +82,7 @@ ISLeafNode::getValue(int _index) const
|
|||
int num = this->getNum();
|
||||
if (_index < 0 || _index >= num)
|
||||
{
|
||||
//cout<<"null in getValue: "<<_index<<endl;
|
||||
//print(string("error in getValue: Invalid index ") + Util::int2string(_index));
|
||||
return NULL;
|
||||
}
|
||||
|
@ -125,13 +126,53 @@ ISLeafNode::addValue(const Bstr* _value, int _index, bool ifcopy)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ISLeafNode::setValue(char* _str, unsigned _len, int _index, bool ifcopy)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index >= num)
|
||||
{
|
||||
//print(string("error in setValue: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
this->values[_index].release(); //NOTICE: only used in modify
|
||||
|
||||
this->values[_index].setStr(_str);
|
||||
this->values[_index].setLen(_len);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ISLeafNode::addValue(char* _str, unsigned _len, int _index, bool ifcopy)
|
||||
{
|
||||
int num = this->getNum();
|
||||
//cout<<"addValue: "<<num<<" "<<_index<<endl;
|
||||
|
||||
if (_index < 0 || _index > num)
|
||||
{
|
||||
//print(string("error in addValue: Invalid index ") + Util::int2string(_index));
|
||||
//cout<<"error in addValue: "<<_index<<" "<<num<<endl;
|
||||
return false;
|
||||
}
|
||||
int i;
|
||||
for (i = num - 1; i >= _index; --i)
|
||||
this->values[i + 1] = this->values[i];
|
||||
|
||||
this->values[_index].setStr(_str);
|
||||
this->values[_index].setLen(_len);
|
||||
//cout<<"show: "<<this->values[_index].getLen()<<" "<<this->values[_index].getStr()[0]<<endl;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ISLeafNode::subValue(int _index, bool ifdel)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index >= num)
|
||||
{
|
||||
print(string("error in subValue: Invalid index ") + Util::int2string(_index));
|
||||
//print(string("error in subValue: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
int i;
|
||||
|
@ -181,7 +222,7 @@ ISLeafNode::split(ISNode* _father, int _index)
|
|||
p->addValue(this->values + i, k);
|
||||
p->addNum();
|
||||
}
|
||||
int tp = this->keys[MIN_KEY_NUM];
|
||||
unsigned tp = this->keys[MIN_KEY_NUM];
|
||||
this->setNum(MIN_KEY_NUM);
|
||||
_father->addKey(tp, _index);
|
||||
_father->addChild(p, _index + 1); //DEBUG(check the index)
|
||||
|
@ -226,7 +267,7 @@ ISLeafNode::coalesce(ISNode* _father, int _index)
|
|||
}
|
||||
}
|
||||
|
||||
int tmp = 0;
|
||||
unsigned tmp = 0;
|
||||
switch (ccase)
|
||||
{
|
||||
case 1: //union right to this
|
||||
|
@ -283,8 +324,8 @@ ISLeafNode::coalesce(ISNode* _father, int _index)
|
|||
p->subNum();
|
||||
break;
|
||||
default:
|
||||
print("error in coalesce: Invalid case!");
|
||||
//printf("error in coalesce: Invalid case!");
|
||||
//print("error in coalesce: Invalid case!");
|
||||
cout<<"error in coalesce: Invalid case!"<<endl;
|
||||
}
|
||||
_father->setDirty();
|
||||
p->setDirty();
|
||||
|
@ -373,4 +414,5 @@ ISLeafNode::print(string s)
|
|||
}
|
||||
else;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@ public:
|
|||
void Normal();
|
||||
ISNode* getPrev() const;
|
||||
ISNode* getNext() const;
|
||||
|
||||
const Bstr* getValue(int _index) const;
|
||||
bool setValue(const Bstr* _value, int _index, bool ifcopy = false);
|
||||
bool addValue(const Bstr* _value, int _index, bool ifcopy = false);
|
||||
|
@ -34,6 +35,10 @@ public:
|
|||
void setPrev(ISNode* _prev);
|
||||
void setNext(ISNode* _next);
|
||||
unsigned getSize() const;
|
||||
|
||||
bool setValue(char* _str, unsigned _len, int _index, bool ifcopy = false);
|
||||
bool addValue(char* _str, unsigned _len, int _index, bool ifcopy = false);
|
||||
|
||||
ISNode* split(ISNode* _father, int _index);
|
||||
ISNode* coalesce(ISNode* _father, int _index);
|
||||
void release();
|
||||
|
@ -47,4 +52,5 @@ public:
|
|||
};
|
||||
//BETTER: prev isn't a must, and reverse-range can be achieved using recursive-next
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -203,7 +203,7 @@ ISNode::setFlag(unsigned _flag)
|
|||
this->flag = _flag;
|
||||
}
|
||||
|
||||
int
|
||||
unsigned
|
||||
ISNode::getKey(int _index) const
|
||||
{
|
||||
int num = this->getNum();
|
||||
|
@ -211,14 +211,15 @@ ISNode::getKey(int _index) const
|
|||
{
|
||||
//print(string("error in getKey: Invalid index ") + Util::int2string(_index));
|
||||
printf("error in getKey: Invalid index\n");
|
||||
return -1;
|
||||
//return -1;
|
||||
return INVALID;
|
||||
}
|
||||
else
|
||||
return this->keys[_index];
|
||||
}
|
||||
|
||||
bool
|
||||
ISNode::setKey(int _key, int _index)
|
||||
ISNode::setKey(unsigned _key, int _index)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index >= num)
|
||||
|
@ -231,7 +232,7 @@ ISNode::setKey(int _key, int _index)
|
|||
}
|
||||
|
||||
bool
|
||||
ISNode::addKey(int _key, int _index)
|
||||
ISNode::addKey(unsigned _key, int _index)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index > num)
|
||||
|
@ -264,7 +265,7 @@ ISNode::subKey(int _index)
|
|||
}
|
||||
|
||||
int
|
||||
ISNode::searchKey_less(int _key) const
|
||||
ISNode::searchKey_less(unsigned _key) const
|
||||
{
|
||||
int num = this->getNum();
|
||||
//for(i = 0; i < num; ++i)
|
||||
|
@ -290,7 +291,7 @@ ISNode::searchKey_less(int _key) const
|
|||
}
|
||||
|
||||
int
|
||||
ISNode::searchKey_equal(int _key) const
|
||||
ISNode::searchKey_equal(unsigned _key) const
|
||||
{
|
||||
int num = this->getNum();
|
||||
//for(i = 0; i < num; ++i)
|
||||
|
@ -305,7 +306,7 @@ ISNode::searchKey_equal(int _key) const
|
|||
}
|
||||
|
||||
int
|
||||
ISNode::searchKey_lessEqual(int _key) const
|
||||
ISNode::searchKey_lessEqual(unsigned _key) const
|
||||
{
|
||||
//int num = this->getNum();
|
||||
//for(i = 0; i < num; ++i)
|
||||
|
@ -317,4 +318,5 @@ ISNode::searchKey_lessEqual(int _key) const
|
|||
return ret - 1;
|
||||
else
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -63,15 +63,15 @@ public:
|
|||
void setStore(unsigned _store);
|
||||
unsigned getFlag() const;
|
||||
void setFlag(unsigned _flag);
|
||||
int getKey(int _index) const; //need to check the index
|
||||
bool setKey(int _key, int _index);
|
||||
bool addKey(int _key, int _index);
|
||||
unsigned getKey(int _index) const; //need to check the index
|
||||
bool setKey(unsigned _key, int _index);
|
||||
bool addKey(unsigned _key, int _index);
|
||||
bool subKey(int _index);
|
||||
|
||||
//several binary key search utilities
|
||||
int searchKey_less(int _key) const;
|
||||
int searchKey_equal(int _key) const;
|
||||
int searchKey_lessEqual(int _key) const;
|
||||
int searchKey_less(unsigned _key) const;
|
||||
int searchKey_equal(unsigned _key) const;
|
||||
int searchKey_lessEqual(unsigned _key) const;
|
||||
|
||||
//virtual functions: polymorphic
|
||||
virtual ISNode* getChild(int _index) const { return NULL; };
|
||||
|
@ -80,12 +80,18 @@ public:
|
|||
virtual bool subChild(int _index) { return true; };
|
||||
virtual ISNode* getPrev() const { return NULL; };
|
||||
virtual ISNode* getNext() const { return NULL; };
|
||||
|
||||
virtual const Bstr* getValue(int _index) const { return NULL; };
|
||||
virtual bool setValue(const Bstr* _value, int _index, bool ifcopy = false) { return true; };
|
||||
virtual bool addValue(const Bstr* _value, int _index, bool ifcopy = false) { return true; };
|
||||
virtual bool subValue(int _index, bool ifdel = false) { return true; };
|
||||
virtual void setPrev(ISNode* _prev) {};
|
||||
virtual void setNext(ISNode* _next) {};
|
||||
|
||||
virtual bool setValue(char* _str, unsigned _len, int _index, bool ifcopy = false) { return true; };
|
||||
virtual bool addValue(char* _str, unsigned _len, int _index, bool ifcopy = false) { return true; };
|
||||
|
||||
//pure virtual function
|
||||
virtual void Virtual() = 0;
|
||||
virtual void Normal() = 0;
|
||||
virtual unsigned getSize() const = 0; //return all memory owned
|
||||
|
@ -110,4 +116,5 @@ public:
|
|||
*to release the whole(pointer is invalid and rebuild problem)
|
||||
*/
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -69,7 +69,7 @@ ISStorage::ISStorage(string& _filepath, string& _mode, unsigned* _height, unsign
|
|||
else //_mode == "open"
|
||||
{
|
||||
//read basic information
|
||||
int rootnum;
|
||||
unsigned rootnum;
|
||||
char c;
|
||||
fread(this->treeheight, sizeof(unsigned), 1, this->treefp);
|
||||
fread(&rootnum, sizeof(unsigned), 1, this->treefp);
|
||||
|
@ -274,7 +274,8 @@ ISStorage::readNode(ISNode* _np, long long* _request)
|
|||
fseek(treefp, 4 * (num + 1), SEEK_CUR);
|
||||
|
||||
//to read all keys
|
||||
int tmp = -1;
|
||||
unsigned tmp = INVALID;
|
||||
//int tmp = -1;
|
||||
for (i = 0; i < num; ++i)
|
||||
{
|
||||
fread(&tmp, sizeof(int), 1, treefp);
|
||||
|
@ -384,7 +385,8 @@ ISStorage::writeNode(ISNode* _np)
|
|||
}
|
||||
}
|
||||
|
||||
int tmp = 0;
|
||||
//int tmp = 0;
|
||||
unsigned tmp = INVALID;
|
||||
//to write all keys
|
||||
for (i = 0; i < num; ++i)
|
||||
{
|
||||
|
@ -397,7 +399,13 @@ ISStorage::writeNode(ISNode* _np)
|
|||
{
|
||||
//to write all values
|
||||
for (i = 0; i < num; ++i)
|
||||
{
|
||||
this->writeBstr(_np->getValue(i), &blocknum, SpecialBlock);
|
||||
//if(_np->getKey(0) == 0)
|
||||
//{
|
||||
//cout<<"the 0th value: "<<_np->getValue(i)->getStr()[0]<<endl;
|
||||
//}
|
||||
}
|
||||
}
|
||||
fseek(treefp, Address(blocknum), SEEK_SET);
|
||||
if (SpecialBlock)
|
||||
|
@ -408,6 +416,7 @@ ISStorage::writeNode(ISNode* _np)
|
|||
//NOTICE:we may store the dirty bit into the tree file, but that is ok
|
||||
//Each time we read the tree file to construct a node, we always set the drity bit to 0
|
||||
_np->delDirty();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -419,7 +428,8 @@ ISStorage::readBstr(Bstr* _bp, unsigned* _next)
|
|||
fread(&len, sizeof(unsigned), 1, this->treefp);
|
||||
this->ReadAlign(_next);
|
||||
//this->request(len);
|
||||
char* s = (char*)malloc(len);
|
||||
//char* s = (char*)malloc(len);
|
||||
char* s = new char[len];
|
||||
_bp->setLen(len);
|
||||
for (i = 0; i + 4 < len; i += 4)
|
||||
{
|
||||
|
@ -437,6 +447,7 @@ ISStorage::readBstr(Bstr* _bp, unsigned* _next)
|
|||
fseek(treefp, j, SEEK_CUR);
|
||||
this->ReadAlign(_next);
|
||||
_bp->setStr(s);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -675,3 +686,4 @@ ISStorage::print(string s)
|
|||
fputs("\n", Util::debug_kvstore);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -43,6 +43,8 @@ private:
|
|||
//Because the bstr' size is controlled, so is the node.
|
||||
unsigned long long freemem; //free memory to use, non-negative
|
||||
//unsigned long long time; //QUERY(achieving an old-swap startegy?)
|
||||
|
||||
//QUERY: should this be long long? (otherwise will be different in 32-bit and 64-bit machine)
|
||||
long Address(unsigned _blocknum) const;
|
||||
unsigned Blocknum(long address) const;
|
||||
unsigned AllocBlock();
|
||||
|
@ -70,3 +72,4 @@ public:
|
|||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -0,0 +1,702 @@
|
|||
/*=============================================================================
|
||||
# Filename: IVTree.cpp
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:45
|
||||
# Description: achieve functions in IVTree.h
|
||||
=============================================================================*/
|
||||
|
||||
#include "IVTree.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
IVTree::IVTree()
|
||||
{
|
||||
height = 0;
|
||||
mode = "";
|
||||
root = NULL;
|
||||
leaves_head = NULL;
|
||||
leaves_tail = NULL;
|
||||
TSM = NULL;
|
||||
storepath = "";
|
||||
filename = "";
|
||||
//transfer_size[0] = transfer_size[1] = transfer_size[2] = 0;
|
||||
//transfer_size = 0;
|
||||
this->stream = NULL;
|
||||
this->request = 0;
|
||||
this->value_list = NULL;
|
||||
}
|
||||
|
||||
IVTree::IVTree(string _storepath, string _filename, string _mode, unsigned long long _buffer_size)
|
||||
{
|
||||
storepath = _storepath;
|
||||
filename = _filename;
|
||||
this->height = 0;
|
||||
this->mode = string(_mode);
|
||||
string filepath = this->getFilePath();
|
||||
|
||||
string vlist_file = filepath + "_vlist";
|
||||
this->value_list = new VList(vlist_file, this->mode, 1<<30);
|
||||
|
||||
TSM = new IVStorage(filepath, this->mode, &this->height, _buffer_size, this->value_list);
|
||||
if (this->mode == "open")
|
||||
this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail);
|
||||
else
|
||||
this->root = NULL;
|
||||
|
||||
//this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
//this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
//this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
//this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M
|
||||
//this->transfer.setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
|
||||
this->stream = NULL;
|
||||
this->request = 0;
|
||||
}
|
||||
|
||||
string
|
||||
IVTree::getFilePath()
|
||||
{
|
||||
return storepath + "/" + filename;
|
||||
}
|
||||
|
||||
//void //WARN: not check _str and _len
|
||||
//IVTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index)
|
||||
//{
|
||||
//if (_index > 2)
|
||||
//return;
|
||||
//[>
|
||||
//if(_str == NULL || _len == 0)
|
||||
//{
|
||||
//printf("error in CopyToTransfer: empty string\n");
|
||||
//return;
|
||||
//}
|
||||
//*/
|
||||
////unsigned length = _bstr->getLen();
|
||||
//unsigned length = _len;
|
||||
//if (length + 1 > this->transfer_size[_index])
|
||||
//{
|
||||
//transfer[_index].release();
|
||||
//transfer[_index].setStr((char*)malloc(length + 1));
|
||||
//this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0
|
||||
//}
|
||||
//memcpy(this->transfer[_index].getStr(), _str, length);
|
||||
//this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore
|
||||
//this->transfer[_index].setLen(length);
|
||||
//}
|
||||
|
||||
unsigned
|
||||
IVTree::getHeight() const
|
||||
{
|
||||
return this->height;
|
||||
}
|
||||
|
||||
void
|
||||
IVTree::setHeight(unsigned _h)
|
||||
{
|
||||
this->height = _h;
|
||||
}
|
||||
|
||||
IVNode*
|
||||
IVTree::getRoot() const
|
||||
{
|
||||
return this->root;
|
||||
}
|
||||
|
||||
void
|
||||
IVTree::prepare(IVNode* _np)
|
||||
{
|
||||
//this->request = 0;
|
||||
bool flag = _np->inMem();
|
||||
if (!flag)
|
||||
{
|
||||
this->TSM->readNode(_np, &request); //readNode deal with request
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
IVTree::search(unsigned _key, char*& _str, unsigned& _len)
|
||||
{
|
||||
//if (_key < 0)
|
||||
//{
|
||||
//printf("error in IVTree-search: empty string\n");
|
||||
//return false;
|
||||
//}
|
||||
|
||||
this->request = 0;
|
||||
int store;
|
||||
IVNode* ret = this->find(_key, &store, false);
|
||||
if (ret == NULL || store == -1 || _key != ret->getKey(store)) //tree is empty or not found
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
ret->getValue(this->value_list, store, _str, _len);
|
||||
//const Bstr* val = ret->getValue(store);
|
||||
//this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request
|
||||
//_str = this->transfer[0].getStr();
|
||||
//_len = this->transfer[0].getLen();
|
||||
|
||||
this->TSM->request(request);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVTree::insert(unsigned _key, char* _str, unsigned _len)
|
||||
{
|
||||
//if (_key < 0)
|
||||
//{
|
||||
//printf("error in IVTree-insert: empty string\n");
|
||||
//return false;
|
||||
//}
|
||||
|
||||
//this->CopyToTransfer(_str, _len, 2);
|
||||
//const Bstr* val = &(this->transfer[2]);
|
||||
this->request = 0;
|
||||
IVNode* ret;
|
||||
if (this->root == NULL) //tree is empty
|
||||
{
|
||||
leaves_tail = leaves_head = root = new IVLeafNode;
|
||||
request += IVNode::LEAF_SIZE;
|
||||
this->height = 1;
|
||||
root->setHeight(1); //add to heap later
|
||||
}
|
||||
|
||||
//this->prepare(this->root); //root must be in-mem
|
||||
if (root->getNum() == IVNode::MAX_KEY_NUM)
|
||||
{
|
||||
IVNode* father = new IVIntlNode;
|
||||
request += IVNode::INTL_SIZE;
|
||||
father->addChild(root, 0);
|
||||
ret = root->split(father, 0);
|
||||
if (ret->isLeaf() && ret->getNext() == NULL)
|
||||
this->leaves_tail = ret;
|
||||
if (ret->isLeaf())
|
||||
request += IVNode::LEAF_SIZE;
|
||||
else
|
||||
request += IVNode::INTL_SIZE;
|
||||
this->height++; //height rises only when root splits
|
||||
//WARN: height area in Node: 4 bit!
|
||||
father->setHeight(this->height); //add to heap later
|
||||
this->TSM->updateHeap(ret, ret->getRank(), false);
|
||||
this->root = father;
|
||||
}
|
||||
|
||||
IVNode* p = this->root;
|
||||
IVNode* q;
|
||||
int i;
|
||||
while (!p->isLeaf())
|
||||
{
|
||||
//j = p->getNum();
|
||||
//for(i = 0; i < j; ++i)
|
||||
//if(bstr < *(p->getKey(i)))
|
||||
//break;
|
||||
//NOTICE: using binary search is better here
|
||||
i = p->searchKey_less(_key);
|
||||
|
||||
q = p->getChild(i);
|
||||
this->prepare(q);
|
||||
if (q->getNum() == IVNode::MAX_KEY_NUM)
|
||||
{
|
||||
ret = q->split(p, i);
|
||||
if (ret->isLeaf() && ret->getNext() == NULL)
|
||||
this->leaves_tail = ret;
|
||||
if (ret->isLeaf())
|
||||
request += IVNode::LEAF_SIZE;
|
||||
else
|
||||
request += IVNode::INTL_SIZE;
|
||||
//BETTER: in loop may update multiple times
|
||||
this->TSM->updateHeap(ret, ret->getRank(), false);
|
||||
this->TSM->updateHeap(q, q->getRank(), true);
|
||||
this->TSM->updateHeap(p, p->getRank(), true);
|
||||
if (_key < p->getKey(i))
|
||||
p = q;
|
||||
else
|
||||
p = ret;
|
||||
}
|
||||
else
|
||||
{
|
||||
p->setDirty();
|
||||
this->TSM->updateHeap(p, p->getRank(), true);
|
||||
p = q;
|
||||
}
|
||||
}
|
||||
//j = p->getNum();
|
||||
//for(i = 0; i < j; ++i)
|
||||
//if(bstr < *(p->getKey(i)))
|
||||
//break;
|
||||
i = p->searchKey_less(_key);
|
||||
|
||||
//insert existing key is ok, but not inserted in
|
||||
//however, the tree-shape may change due to possible split in former code
|
||||
bool ifexist = false;
|
||||
if (i > 0 && _key == p->getKey(i - 1))
|
||||
ifexist = true;
|
||||
else
|
||||
{
|
||||
p->addKey(_key, i);
|
||||
p->addValue(this->value_list, i, _str, _len, true);
|
||||
p->addNum();
|
||||
//NOTICE: is this is a vlist, then it will be freed, and should not be included in the request memory
|
||||
if(!VList::isLongList(_len))
|
||||
{
|
||||
request += _len;
|
||||
}
|
||||
//request += val->getLen();
|
||||
p->setDirty();
|
||||
this->TSM->updateHeap(p, p->getRank(), true);
|
||||
//_key->clear();
|
||||
//_value->clear();
|
||||
}
|
||||
|
||||
this->TSM->request(request);
|
||||
return !ifexist; //QUERY(which case:return false)
|
||||
}
|
||||
|
||||
bool
|
||||
IVTree::modify(unsigned _key, char* _str, unsigned _len)
|
||||
{
|
||||
//if (_key < 0)
|
||||
//{
|
||||
//printf("error in IVTree-modify: empty string\n");
|
||||
//return false;
|
||||
//}
|
||||
|
||||
//this->CopyToTransfer(_str, _len, 2); //not check value
|
||||
//const Bstr* val = &(this->transfer[2]);
|
||||
this->request = 0;
|
||||
int store;
|
||||
IVNode* ret = this->find(_key, &store, true);
|
||||
if (ret == NULL || store == -1 || _key != ret->getKey(store)) //tree is empty or not found
|
||||
{
|
||||
cerr << "tree is empty or not found" << endl;
|
||||
return false;
|
||||
}
|
||||
//cout<<"IVTree::modify() - key is found, now to remove"<<endl;
|
||||
|
||||
//NOTICE+DEBUG: if this value is a long list, then it is not saved in memory, here should return 0 in Bstr
|
||||
unsigned len = ret->getValue(store)->getLen();
|
||||
if(ret->getValue(store)->isBstrLongList())
|
||||
{
|
||||
len = 0;
|
||||
}
|
||||
ret->setValue(this->value_list, store, _str, _len, true);
|
||||
//ret->setValue(val, store, true);
|
||||
//cout<<"value reset"<<endl;
|
||||
//cout<<"newlen: "<<val->getLen()<<" oldlen: "<<len<<endl;
|
||||
|
||||
//request += (val->getLen() - len);
|
||||
if(!VList::isLongList(_len))
|
||||
{
|
||||
this->request += _len;
|
||||
}
|
||||
//this->request = val->getLen();
|
||||
this->request -= len;
|
||||
ret->setDirty();
|
||||
//cout<<"to request"<<endl;
|
||||
this->TSM->request(request);
|
||||
//cout<<"memory requested"<<endl;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//this function is useful for search and modify, and range-query
|
||||
IVNode* //return the first key's position that >= *_key
|
||||
IVTree::find(unsigned _key, int* _store, bool ifmodify)
|
||||
{ //to assign value for this->bstr, function shouldn't be const!
|
||||
if (this->root == NULL)
|
||||
return NULL; //IVTree Is Empty
|
||||
|
||||
IVNode* p = root;
|
||||
int i, j;
|
||||
while (!p->isLeaf())
|
||||
{
|
||||
if (ifmodify)
|
||||
p->setDirty();
|
||||
//j = p->getNum();
|
||||
//for(i = 0; i < j; ++i) //BETTER(Binary-Search)
|
||||
//if(bstr < *(p->getKey(i)))
|
||||
//break;
|
||||
i = p->searchKey_less(_key);
|
||||
|
||||
p = p->getChild(i);
|
||||
this->prepare(p);
|
||||
}
|
||||
|
||||
j = p->getNum();
|
||||
//for(i = 0; i < j; ++i)
|
||||
//if(bstr <= *(p->getKey(i)))
|
||||
//break;
|
||||
i = p->searchKey_lessEqual(_key);
|
||||
|
||||
if (i == j)
|
||||
*_store = -1; //Not Found
|
||||
else
|
||||
*_store = i;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
Node*
|
||||
IVTree::find(unsigned _len, const char* _str, int* store) const
|
||||
{
|
||||
}
|
||||
*/
|
||||
|
||||
bool
|
||||
IVTree::remove(unsigned _key)
|
||||
{
|
||||
//if (_key < 0)
|
||||
//{
|
||||
//printf("error in IVTree-remove: empty string\n");
|
||||
//return false;
|
||||
//}
|
||||
|
||||
this->request = 0;
|
||||
IVNode* ret;
|
||||
if (this->root == NULL) //tree is empty
|
||||
return false;
|
||||
|
||||
IVNode* p = this->root;
|
||||
IVNode* q;
|
||||
int i, j;
|
||||
while (!p->isLeaf())
|
||||
{
|
||||
j = p->getNum();
|
||||
//for(i = 0; i < j; ++i)
|
||||
//if(bstr < *(p->getKey(i)))
|
||||
//break;
|
||||
i = p->searchKey_less(_key);
|
||||
|
||||
q = p->getChild(i);
|
||||
this->prepare(q);
|
||||
if (q->getNum() < IVNode::MIN_CHILD_NUM) //==MIN_KEY_NUM
|
||||
{
|
||||
if (i > 0)
|
||||
this->prepare(p->getChild(i - 1));
|
||||
if (i < j)
|
||||
this->prepare(p->getChild(i + 1));
|
||||
ret = q->coalesce(p, i);
|
||||
if (ret != NULL)
|
||||
this->TSM->updateHeap(ret, 0, true);//non-sense node
|
||||
this->TSM->updateHeap(q, q->getRank(), true);
|
||||
if (q->isLeaf())
|
||||
{
|
||||
if (q->getPrev() == NULL)
|
||||
this->leaves_head = q;
|
||||
if (q->getNext() == NULL)
|
||||
this->leaves_tail = q;
|
||||
}
|
||||
if (p->getNum() == 0) //root shrinks
|
||||
{
|
||||
//this->leaves_head = q;
|
||||
this->root = q;
|
||||
this->TSM->updateHeap(p, 0, true); //instead of delete p
|
||||
this->height--;
|
||||
}
|
||||
}
|
||||
else
|
||||
p->setDirty();
|
||||
this->TSM->updateHeap(p, p->getRank(), true);
|
||||
p = q;
|
||||
}
|
||||
|
||||
bool flag = false;
|
||||
//j = p->getNum(); //LeafNode(maybe root)
|
||||
//for(i = 0; i < j; ++i)
|
||||
// if(bstr == *(p->getKey(i)))
|
||||
// {
|
||||
// request -= p->getKey(i)->getLen();
|
||||
// request -= p->getValue(i)->getLen();
|
||||
// p->subKey(i, true); //to release
|
||||
// p->subValue(i, true); //to release
|
||||
// p->subNum();
|
||||
// if(p->getNum() == 0) //root leaf 0 key
|
||||
// {
|
||||
// this->root = NULL;
|
||||
// this->leaves_head = NULL;
|
||||
// this->leaves_tail = NULL;
|
||||
// this->height = 0;
|
||||
// this->TSM->updateHeap(p, 0, true); //instead of delete p
|
||||
// }
|
||||
// p->setDirty();
|
||||
// flag = true;
|
||||
// break;
|
||||
// }
|
||||
i = p->searchKey_equal(_key);
|
||||
//WARN+NOTICE:here must check, because the key to remove maybe not exist
|
||||
if (i != (int)p->getNum())
|
||||
{
|
||||
if(!p->getValue(i)->isBstrLongList())
|
||||
{
|
||||
request -= p->getValue(i)->getLen();
|
||||
}
|
||||
p->subKey(i); //to release
|
||||
p->subValue(this->value_list, i, true); //to release
|
||||
p->subNum();
|
||||
if (p->getNum() == 0) //root leaf 0 key
|
||||
{
|
||||
this->root = NULL;
|
||||
this->leaves_head = NULL;
|
||||
this->leaves_tail = NULL;
|
||||
this->height = 0;
|
||||
this->TSM->updateHeap(p, 0, true); //instead of delete p
|
||||
}
|
||||
p->setDirty();
|
||||
flag = true;
|
||||
}
|
||||
|
||||
this->TSM->request(request);
|
||||
return flag; //i == j, not found
|
||||
}
|
||||
|
||||
const Bstr*
|
||||
IVTree::getRangeValue()
|
||||
{
|
||||
if (this->stream == NULL)
|
||||
{
|
||||
fprintf(stderr, "IVTree::getRangeValue(): no results now!\n");
|
||||
return NULL;
|
||||
}
|
||||
if (this->stream->isEnd())
|
||||
{
|
||||
fprintf(stderr, "IVTree::getRangeValue(): read till end now!\n");
|
||||
return NULL;
|
||||
}
|
||||
//NOTICE:this is one record, and donot free the memory!
|
||||
//NOTICE:Bstr[] but only one element, used as Bstr*
|
||||
return this->stream->read();
|
||||
}
|
||||
|
||||
void
|
||||
IVTree::resetStream()
|
||||
{
|
||||
if (this->stream == NULL)
|
||||
{
|
||||
fprintf(stderr, "no results now!\n");
|
||||
return;
|
||||
}
|
||||
this->stream->setEnd();
|
||||
}
|
||||
|
||||
//TODO: change to using value list, getValue() maybe not get real long list
|
||||
bool //special case: not exist, one-edge-case
|
||||
IVTree::range_query(unsigned _key1, unsigned _key2)
|
||||
{ //the range is: *_key1 <= x < *_key2
|
||||
//if(_key1 <0 && _key2 <0)
|
||||
//return false;
|
||||
//ok to search one-edge, requiring only one be negative
|
||||
//find and write value
|
||||
int store1, store2;
|
||||
IVNode *p1, *p2;
|
||||
if (_key1 >= 0)
|
||||
{
|
||||
request = 0;
|
||||
p1 = this->find(_key1, &store1, false);
|
||||
if (p1 == NULL || store1 == -1)
|
||||
return false; //no element
|
||||
this->TSM->request(request);
|
||||
}
|
||||
else
|
||||
{
|
||||
p1 = this->leaves_head;
|
||||
store1 = 0;
|
||||
}
|
||||
if (_key2 >= 0)
|
||||
{ //QUERY: another strategy is to getnext and compare every time to tell end
|
||||
request = 0;
|
||||
p2 = this->find(_key2, &store2, false);
|
||||
if (p2 == NULL)
|
||||
return false;
|
||||
else if (store2 == -1)
|
||||
store2 = p2->getNum();
|
||||
else if (store2 == 0)
|
||||
{
|
||||
p2 = p2->getPrev();
|
||||
if (p2 == NULL)
|
||||
return false; //no element
|
||||
store2 = p2->getNum();
|
||||
}
|
||||
this->TSM->request(request);
|
||||
}
|
||||
else
|
||||
{
|
||||
p2 = this->leaves_tail;
|
||||
store2 = p2->getNum();
|
||||
}
|
||||
|
||||
IVNode* p = p1;
|
||||
unsigned i, l, r;
|
||||
//get the num of answers first, not need to prepare the node
|
||||
unsigned ansNum = 0;
|
||||
while (true)
|
||||
{
|
||||
//request = 0;
|
||||
//this->prepare(p);
|
||||
if (p == p1)
|
||||
l = store1;
|
||||
else
|
||||
l = 0;
|
||||
if (p == p2)
|
||||
r = store2;
|
||||
else
|
||||
r = p->getNum();
|
||||
ansNum += (r - l);
|
||||
//this->TSM->request(request);
|
||||
if (p != p2)
|
||||
p = p->getNext();
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
if (this->stream != NULL)
|
||||
{
|
||||
delete this->stream;
|
||||
this->stream = NULL;
|
||||
}
|
||||
vector<unsigned> keys;
|
||||
vector<bool> desc;
|
||||
this->stream = new Stream(keys, desc, ansNum, 1, false);
|
||||
|
||||
p = p1;
|
||||
while (1)
|
||||
{
|
||||
request = 0;
|
||||
this->prepare(p);
|
||||
if (p == p1)
|
||||
l = store1;
|
||||
else
|
||||
l = 0;
|
||||
if (p == p2)
|
||||
r = store2;
|
||||
else
|
||||
r = p->getNum();
|
||||
for (i = l; i < r; ++i)
|
||||
{
|
||||
//NOTICE:Bstr* in an array, used as Bstr[]
|
||||
//DEBUG+TODO: if long list?? clean
|
||||
this->stream->write(p->getValue(i));
|
||||
}
|
||||
this->TSM->request(request);
|
||||
if (p != p2)
|
||||
p = p->getNext();
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
this->stream->setEnd();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVTree::save() //save the whole tree to disk
|
||||
{
|
||||
#ifdef DEBUG_KVSTORE
|
||||
printf("now to save tree!\n");
|
||||
#endif
|
||||
if (TSM->writeTree(this->root))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
IVTree::release(IVNode* _np) const
|
||||
{
|
||||
if (_np == NULL) return;
|
||||
if (_np->isLeaf())
|
||||
{
|
||||
delete _np;
|
||||
return;
|
||||
}
|
||||
int cnt = _np->getNum();
|
||||
for (; cnt >= 0; --cnt)
|
||||
release(_np->getChild(cnt));
|
||||
delete _np;
|
||||
}
|
||||
|
||||
IVTree::~IVTree()
|
||||
{
|
||||
delete this->value_list;
|
||||
|
||||
delete this->stream; //maybe NULL
|
||||
delete TSM;
|
||||
#ifdef DEBUG_KVSTORE
|
||||
printf("already empty the buffer, now to delete all nodes in tree!\n");
|
||||
#endif
|
||||
//recursively delete each Node
|
||||
release(root);
|
||||
}
|
||||
|
||||
void
|
||||
IVTree::print(string s)
|
||||
{
|
||||
#ifdef DEBUG_KVSTORE
|
||||
fputs(Util::showtime().c_str(), Util::debug_kvstore);
|
||||
fputs("Class IVTree\n", Util::debug_kvstore);
|
||||
fputs("Message: ", Util::debug_kvstore);
|
||||
fputs(s.c_str(), Util::debug_kvstore);
|
||||
fputs("\n", Util::debug_kvstore);
|
||||
fprintf(Util::debug_kvstore, "Height: %d\n", this->height);
|
||||
if (s == "tree" || s == "TREE")
|
||||
{
|
||||
if (this->root == NULL)
|
||||
{
|
||||
fputs("Null IVTree\n", Util::debug_kvstore);
|
||||
return;
|
||||
}
|
||||
IVNode** ns = new IVNode*[this->height];
|
||||
int* ni = new int[this->height];
|
||||
IVNode* np;
|
||||
int i, pos = 0;
|
||||
ns[pos] = this->root;
|
||||
ni[pos] = this->root->getNum();
|
||||
pos++;
|
||||
while (pos > 0)
|
||||
{
|
||||
np = ns[pos - 1];
|
||||
i = ni[pos - 1];
|
||||
this->prepare(np);
|
||||
if (np->isLeaf() || i < 0) //LeafNode or ready IntlNode
|
||||
{ //child-num ranges: 0~num
|
||||
if (s == "tree")
|
||||
np->print("node");
|
||||
else
|
||||
np->print("NODE"); //print full node-information
|
||||
pos--;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
ns[pos] = np->getChild(i);
|
||||
ni[pos - 1]--;
|
||||
ni[pos] = ns[pos]->getNum();
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
delete[] ns;
|
||||
delete[] ni;
|
||||
}
|
||||
else if (s == "LEAVES" || s == "leaves")
|
||||
{
|
||||
IVNode* np;
|
||||
for (np = this->leaves_head; np != NULL; np = np->getNext())
|
||||
{
|
||||
this->prepare(np);
|
||||
if (s == "leaves")
|
||||
np->print("node");
|
||||
else
|
||||
np->print("NODE");
|
||||
}
|
||||
}
|
||||
else if (s == "check tree")
|
||||
{
|
||||
//check the tree, if satisfy B+ definition
|
||||
//TODO
|
||||
}
|
||||
else;
|
||||
#endif
|
||||
}
|
||||
|
|
@ -0,0 +1,98 @@
|
|||
/*=============================================================================
|
||||
# Filename: IVTree.h
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:44
|
||||
# Description: ID2valueList, including s2po, p2so and o2ps
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _KVSTORE_IVTREE_IVTREE_H
|
||||
#define _KVSTORE_IVTREE_IVTREE_H
|
||||
|
||||
#include "../../Util/Util.h"
|
||||
#include "../../Util/Stream.h"
|
||||
#include "../../Util/VList.h"
|
||||
#include "node/IVNode.h"
|
||||
#include "node/IVIntlNode.h"
|
||||
#include "node/IVLeafNode.h"
|
||||
#include "storage/IVStorage.h"
|
||||
|
||||
//TODO: for long list, do not read in time, just on need
|
||||
//the memory is kept with the node, updat ewith node
|
||||
//NOTICE: to release the node, maybe the value list is NULL
|
||||
//value bstr: unsigned=address, NULL
|
||||
//BETTER?: build a new block store for long list??
|
||||
|
||||
//NOTICE: we do not need to use transfer bstr here, neithor for two directions
|
||||
//when insert/query, we do not release the value in kvstore
|
||||
|
||||
class IVTree
|
||||
{
|
||||
protected:
|
||||
unsigned height; //0 indicates an empty tree
|
||||
IVNode* root;
|
||||
IVNode* leaves_head; //the head of LeafNode-list
|
||||
IVNode* leaves_tail; //the tail of LeafNode-list
|
||||
std::string mode; //BETTER(to use enum)
|
||||
IVStorage* TSM; //Tree-Storage-Manage
|
||||
//BETTER:multiple stream maybe needed:)
|
||||
Stream* stream;
|
||||
|
||||
//always alloc one more byte than length, then user can add a '\0'
|
||||
//to get a real string, instead of new and copy
|
||||
//other operations will be harmful to search, so store value in
|
||||
//transfer temporally, while length adjusted.
|
||||
//TODO: in multi-user case, multiple-search will cause problem,
|
||||
//so lock is a must. Add lock to transfer is better than to add
|
||||
//lock to every key/value. However, modify requires a lock for a
|
||||
//key/value, and multiple search for different keys are ok!!!
|
||||
//Bstr transfer;
|
||||
//unsigned transfer_size;
|
||||
//Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char*
|
||||
//unsigned transfer_size[3];
|
||||
|
||||
//tree's operations should be atom(if read nodes)
|
||||
//sum the request and send to IVStorage at last
|
||||
//ensure that all nodes operated are in memory
|
||||
long long request;
|
||||
void prepare(IVNode* _np);
|
||||
|
||||
std::string storepath;
|
||||
std::string filename; //ok for user to change
|
||||
/* some private functions */
|
||||
std::string getFilePath(); //in UNIX system
|
||||
//void CopyToTransfer(const char* _str, unsigned _len, unsigned _index);
|
||||
//void CopyToTransfer(const char* _str, unsigned _len);
|
||||
void release(IVNode* _np) const;
|
||||
|
||||
//very long value list are stored in a separate file(with large block)
|
||||
//
|
||||
//NOTICE: according to the summary result, 90% value lists are just below 100 bytes
|
||||
//<10%: 5000000~100M bytes
|
||||
VList* value_list;
|
||||
|
||||
public:
|
||||
IVTree(); //always need to initial transfer
|
||||
IVTree(std::string _storepath, std::string _filename, std::string _mode, unsigned long long _buffer_size);
|
||||
unsigned getHeight() const;
|
||||
void setHeight(unsigned _h);
|
||||
IVNode* getRoot() const;
|
||||
//void setRoot(Node* _root);
|
||||
//insert, search, remove, set
|
||||
bool search(unsigned _key, char*& _str, unsigned& _len);
|
||||
bool insert(unsigned _key, char* _str, unsigned _len);
|
||||
bool modify(unsigned _key, char* _str, unsigned _len);
|
||||
IVNode* find(unsigned _key, int* store, bool ifmodify);
|
||||
bool remove(unsigned _key);
|
||||
const Bstr* getRangeValue();
|
||||
void resetStream();
|
||||
bool range_query(unsigned _key1, unsigned _key2);
|
||||
bool save();
|
||||
~IVTree();
|
||||
void print(std::string s); //DEBUG(print the tree)
|
||||
};
|
||||
//NOTICE: need to save tree manually before delete, otherwise will cause problem.
|
||||
//(problem range between two extremes: not-modified, totally-modified)
|
||||
//After saved, it's ok to continue operations on tree!
|
||||
|
||||
#endif
|
|
@ -0,0 +1,186 @@
|
|||
/*=============================================================================
|
||||
# Filename: IVHeap.cpp
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:37
|
||||
# Description: achieve functions in IVHeap.h
|
||||
=============================================================================*/
|
||||
|
||||
#include "IVHeap.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
IVHeap::IVHeap()
|
||||
{
|
||||
this->length = this->size = 0;
|
||||
this->heap = NULL;
|
||||
}
|
||||
|
||||
IVHeap::IVHeap(unsigned _size)
|
||||
{
|
||||
this->length = 0;
|
||||
this->size = _size;
|
||||
//this->heap = (Node**)malloc(this->size * sizeof(Node*)); //not use 4 or 8
|
||||
this->heap = new IVNode*[this->size];
|
||||
if (this->heap == NULL)
|
||||
{
|
||||
this->print("error in IVHeap: Allocation fail!");
|
||||
exit(1);
|
||||
}
|
||||
/*
|
||||
this->npmap = (Map*)malloc(this->size * sizeof(struct Map));
|
||||
if(this->npmap == NULL)
|
||||
{
|
||||
this->print("error in IVHeap: Allocation fail!");
|
||||
exit(1);
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
IVNode*
|
||||
IVHeap::getTop() const
|
||||
{
|
||||
if (this->length > 0)
|
||||
return this->heap[0];
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
unsigned
|
||||
IVHeap::getLen() const
|
||||
{
|
||||
return this->length;
|
||||
}
|
||||
|
||||
unsigned
|
||||
IVHeap::getSize() const
|
||||
{
|
||||
return this->size;
|
||||
}
|
||||
|
||||
bool
|
||||
IVHeap::isEmpty() const
|
||||
{
|
||||
return this->length == 0;
|
||||
}
|
||||
|
||||
bool
|
||||
IVHeap::insert(IVNode* _np)
|
||||
{
|
||||
if (this->length == this->size) //when full, reallocate
|
||||
{
|
||||
this->heap = (IVNode**)realloc(this->heap, 2 * this->size * sizeof(IVNode*));
|
||||
if (this->heap == NULL)
|
||||
{
|
||||
print("error in isert: Reallocation fail!");
|
||||
return false;
|
||||
}
|
||||
/*
|
||||
this->npmap = (struct Map*)realloc(this->npmap, 2 * this->size * sizeof(struct Map));
|
||||
if(this->npmap == NULL)
|
||||
{
|
||||
print("error in insert: Reallocation fail!");
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
this->size = 2 * this->size;
|
||||
}
|
||||
unsigned i = this->length, j;
|
||||
while (i != 0)
|
||||
{
|
||||
j = (i - 1) / 2;
|
||||
if (_np->getRank() >= this->heap[j]->getRank())
|
||||
break;
|
||||
heap[i] = heap[j];
|
||||
//this->npmap[k].pos = i; //adjust the position
|
||||
i = j;
|
||||
}
|
||||
this->heap[i] = _np;
|
||||
this->length++;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVHeap::remove()
|
||||
{
|
||||
if (this->length == 0)
|
||||
{
|
||||
print("error in remove: remove from empty heap!");
|
||||
return false;
|
||||
}
|
||||
//Node* tp = this->heap[0];
|
||||
this->length--;
|
||||
if (this->length == 0)
|
||||
return true;
|
||||
IVNode* xp = this->heap[this->length];
|
||||
unsigned i = 0, j = 1;
|
||||
while (j < this->length)
|
||||
{
|
||||
if (j < this->length - 1 && this->heap[j]->getRank() > this->heap[j + 1]->getRank())
|
||||
j++;
|
||||
if (xp->getRank() <= this->heap[j]->getRank())
|
||||
break;
|
||||
this->heap[i] = this->heap[j];
|
||||
i = j;
|
||||
j = 2 * i + 1;
|
||||
}
|
||||
this->heap[i] = xp;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVHeap::modify(IVNode* _np, bool _flag) //control direction
|
||||
{
|
||||
//search and adjust
|
||||
unsigned i, j;
|
||||
for (i = 0; i < this->length; ++i)
|
||||
if (this->heap[i] == _np)
|
||||
break;
|
||||
if (_flag == true) //move up
|
||||
{
|
||||
while (i != 0)
|
||||
{
|
||||
j = (i - 1) / 2;
|
||||
if (_np->getRank() < heap[j]->getRank())
|
||||
{
|
||||
heap[i] = heap[j];
|
||||
heap[j] = _np;
|
||||
i = j;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
else //move down
|
||||
{
|
||||
j = 2 * i + 1;
|
||||
while (j < this->length)
|
||||
{
|
||||
if (j < this->length - 1 && heap[j]->getRank() > heap[j + 1]->getRank())
|
||||
j++;
|
||||
if (heap[j]->getRank() < _np->getRank())
|
||||
{
|
||||
heap[i] = heap[j];
|
||||
heap[j] = _np;
|
||||
i = j;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
IVHeap::~IVHeap()
|
||||
{
|
||||
delete[] this->heap;
|
||||
this->heap = NULL;
|
||||
this->length = this->size = 0;
|
||||
}
|
||||
|
||||
void
|
||||
IVHeap::print(string s)
|
||||
{
|
||||
#ifdef DEBUG_KVSTORE
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
/*=============================================================================
|
||||
# Filename: IVHeap.h
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:37
|
||||
# Description: set and deal of IVNode*s in memory
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _KVSTORE_IVTREE_HEAP_IVHEAP_H
|
||||
#define _KVSTORE_IVTREE_HEAP_IVHEAP_H
|
||||
|
||||
#include "../../../Util/Util.h"
|
||||
#include "../node/IVNode.h"
|
||||
|
||||
/* add, sub, modify: all can be done within O(logn) using adjust-function */
|
||||
//QUERY: when modified, finding right position consumes O(n). How about keeping smallest?
|
||||
//(add O(1), sub O(2n), modify O(n)
|
||||
//TODO: to solve this probem, use another hash: (pointer, pos), to find the right position of
|
||||
//given p in O(lgn) time
|
||||
|
||||
class IVHeap
|
||||
{
|
||||
private:
|
||||
IVNode** heap; //dynamic array
|
||||
unsigned length; //valid elements num
|
||||
unsigned size; //max-size of heap
|
||||
public:
|
||||
IVHeap();
|
||||
IVHeap(unsigned _size);
|
||||
IVNode* getTop() const; //return the top element
|
||||
unsigned getLen() const;
|
||||
unsigned getSize() const;
|
||||
bool isEmpty() const;
|
||||
bool insert(IVNode* _np); //insert and adjust
|
||||
bool remove(); //remove top and adjust
|
||||
bool modify(IVNode* _np, bool _flag); //searech modified element and adjust
|
||||
~IVHeap();
|
||||
void print(std::string s); //DEBUG
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,293 @@
|
|||
/*=============================================================================
|
||||
# Filename: IVIntlNode.cpp
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:40
|
||||
# Description: achieve functions in IVIntlNode.h
|
||||
=============================================================================*/
|
||||
|
||||
#include "IVIntlNode.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
/*
|
||||
void
|
||||
IVIntlNode::AllocChilds()
|
||||
{
|
||||
childs = (Node**)malloc(sizeof(Node*) * MAX_CHILD_NUM);
|
||||
}
|
||||
*/
|
||||
|
||||
IVIntlNode::IVIntlNode()
|
||||
{
|
||||
memset(childs, 0, sizeof(IVNode*) * MAX_CHILD_NUM);
|
||||
//this->AllocChilds();
|
||||
}
|
||||
|
||||
IVIntlNode::IVIntlNode(bool isVirtual) //call father-class's constructor automaticlly
|
||||
{
|
||||
memset(childs, 0, sizeof(IVNode*) * MAX_CHILD_NUM);
|
||||
//this->AllocChilds();
|
||||
}
|
||||
|
||||
/*
|
||||
IVIntlNode::IntlNode(Storage* TSM) //QUERY
|
||||
{
|
||||
TSM->readNode(this, Storage::OVER);
|
||||
}
|
||||
*/
|
||||
|
||||
void
|
||||
IVIntlNode::Virtual()
|
||||
{
|
||||
//this->FreeKeys();
|
||||
this->release();
|
||||
this->delMem();
|
||||
}
|
||||
|
||||
void
|
||||
IVIntlNode::Normal()
|
||||
{
|
||||
this->AllocKeys();
|
||||
this->setMem();
|
||||
}
|
||||
|
||||
IVNode*
|
||||
IVIntlNode::getChild(int _index) const
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index > num) //num keys, num+1 childs
|
||||
{
|
||||
//print(string("error in getChild: Invalid index ") + Util::int2string(_index));
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
return childs[_index];
|
||||
}
|
||||
|
||||
bool
|
||||
IVIntlNode::setChild(IVNode* _child, int _index)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index > num)
|
||||
{
|
||||
print(string("error in setChild: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
this->childs[_index] = _child;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVIntlNode::addChild(IVNode* _child, int _index)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index > num + 1)
|
||||
{
|
||||
print(string("error in addChild: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
int i;
|
||||
for (i = num; i >= _index; --i) //DEBUG: right bounder!!!
|
||||
childs[i + 1] = childs[i];
|
||||
childs[_index] = _child;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVIntlNode::subChild(int _index)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index > num)
|
||||
{
|
||||
print(string("error in subchild: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
int i;
|
||||
for (i = _index; i < num; ++i) //DEBUG: right bounder!!!
|
||||
childs[i] = childs[i + 1];
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned
|
||||
IVIntlNode::getSize() const
|
||||
{
|
||||
//unsigned sum = INTL_SIZE, num = this->getNum(), i;
|
||||
//return sum;
|
||||
return INTL_SIZE;
|
||||
}
|
||||
|
||||
IVNode*
|
||||
IVIntlNode::split(IVNode* _father, int _index)
|
||||
{
|
||||
int num = this->getNum();
|
||||
IVNode* p = new IVIntlNode; //right child
|
||||
p->setHeight(this->getHeight());
|
||||
int i, k;
|
||||
for (i = MIN_CHILD_NUM, k = 0; i < num; ++i, ++k)
|
||||
{
|
||||
p->addKey(this->keys[i], k);
|
||||
p->addChild(this->childs[i], k);
|
||||
p->addNum();
|
||||
}
|
||||
p->addChild(this->childs[i], k);
|
||||
int tp = this->keys[MIN_KEY_NUM];
|
||||
this->setNum(MIN_KEY_NUM);
|
||||
_father->addKey(tp, _index);
|
||||
_father->addChild(p, _index + 1); //DEBUG(check the index)
|
||||
_father->addNum();
|
||||
_father->setDirty();
|
||||
p->setDirty();
|
||||
this->setDirty();
|
||||
return p;
|
||||
}
|
||||
|
||||
IVNode*
|
||||
IVIntlNode::coalesce(IVNode* _father, int _index)
|
||||
{
|
||||
//int num = this->getNum();
|
||||
int i, j = _father->getNum(), k; //BETTER: unsigned?
|
||||
IVNode* p;
|
||||
int ccase = 0;
|
||||
//const Bstr* bstr;
|
||||
if (_index < j) //the right neighbor
|
||||
{
|
||||
p = _father->getChild(_index + 1);
|
||||
k = p->getNum();
|
||||
if ((unsigned)k > MIN_KEY_NUM)
|
||||
ccase = 2;
|
||||
else //==MIN_KEY_NUM
|
||||
ccase = 1;
|
||||
}
|
||||
if (_index > 0) //the left neighbor
|
||||
{
|
||||
IVNode* tp = _father->getChild(_index - 1);
|
||||
unsigned tk = tp->getNum();
|
||||
if (ccase < 2)
|
||||
{
|
||||
if (ccase == 0)
|
||||
ccase = 3;
|
||||
if (tk > MIN_KEY_NUM)
|
||||
ccase = 4;
|
||||
}
|
||||
if (ccase > 2)
|
||||
{
|
||||
p = tp;
|
||||
k = tk;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned tmp = 0;
|
||||
switch (ccase)
|
||||
{
|
||||
case 1: //union right to this
|
||||
this->addKey(_father->getKey(_index), this->getNum());
|
||||
this->addNum();
|
||||
for (i = 0; i < k; ++i)
|
||||
{
|
||||
this->addKey(p->getKey(i), this->getNum());
|
||||
this->addChild(p->getChild(i), this->getNum());
|
||||
this->addNum();
|
||||
}
|
||||
this->setChild(p->getChild(i), this->getNum());
|
||||
_father->subKey(_index);
|
||||
_father->subChild(_index + 1);
|
||||
_father->subNum();
|
||||
p->setNum(0);
|
||||
//delete p;
|
||||
break;
|
||||
case 2: //move one form right
|
||||
this->addKey(_father->getKey(_index), this->getNum());
|
||||
_father->setKey(p->getKey(0), _index);
|
||||
p->subKey(0);
|
||||
this->addChild(p->getChild(0), this->getNum() + 1);
|
||||
p->subChild(0);
|
||||
this->addNum();
|
||||
p->subNum();
|
||||
break;
|
||||
case 3: //union left to this
|
||||
this->addKey(_father->getKey(_index - 1), 0);
|
||||
this->addNum();
|
||||
for (i = k; i > 0; --i)
|
||||
{
|
||||
int t = i - 1;
|
||||
this->addKey(p->getKey(t), 0);
|
||||
this->addChild(p->getChild(i), 0);
|
||||
this->addNum();
|
||||
}
|
||||
this->addChild(p->getChild(0), 0);
|
||||
_father->subKey(_index - 1);
|
||||
_father->subChild(_index - 1);
|
||||
_father->subNum();
|
||||
p->setNum(0);
|
||||
//delete p;
|
||||
break;
|
||||
case 4: //move one from left
|
||||
tmp = p->getKey(k - 1);
|
||||
p->subKey(k - 1);
|
||||
this->addKey(_father->getKey(_index - 1), 0);
|
||||
_father->setKey(tmp, _index - 1);
|
||||
this->addChild(p->getChild(k), 0);
|
||||
p->subChild(k);
|
||||
this->addNum();
|
||||
p->subNum();
|
||||
break;
|
||||
default:
|
||||
print("error in coalesce: Invalid case!");
|
||||
//printf("error in coalesce: Invalid case!");
|
||||
}
|
||||
_father->setDirty();
|
||||
p->setDirty();
|
||||
this->setDirty();
|
||||
if (ccase == 1 || ccase == 3)
|
||||
return p;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
IVIntlNode::release()
|
||||
{
|
||||
if (!this->inMem())
|
||||
return;
|
||||
//unsigned num = this->getNum();
|
||||
delete[] keys; //this will release all!!!
|
||||
}
|
||||
|
||||
IVIntlNode::~IVIntlNode()
|
||||
{
|
||||
release();
|
||||
//free(childs);
|
||||
}
|
||||
|
||||
void
|
||||
IVIntlNode::print(string s)
|
||||
{
|
||||
#ifdef DEBUG_KVSTORE
|
||||
int num = this->getNum();
|
||||
fputs(Util::showtime().c_str(), Util::debug_kvstore);
|
||||
fputs("Class IVIntlNode\n", Util::debug_kvstore);
|
||||
fputs("Message: ", Util::debug_kvstore);
|
||||
fputs(s.c_str(), Util::debug_kvstore);
|
||||
fputs("\n", Util::debug_kvstore);
|
||||
if (s == "node" || s == "NODE")
|
||||
{
|
||||
fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag);
|
||||
/*
|
||||
int i;
|
||||
for (i = 0; i < num; ++i)
|
||||
{
|
||||
if (s == "node")
|
||||
this->keys[i].print("bstr");
|
||||
else
|
||||
this->keys[i].print("BSTR");
|
||||
}
|
||||
*/
|
||||
}
|
||||
else if (s == "check node")
|
||||
{
|
||||
//TODO(check node, if satisfy B+ definition)
|
||||
}
|
||||
else;
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*=============================================================================
|
||||
# Filename: IVIntlNode.h
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:40
|
||||
# Description: the internal-node of a B+ tree
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _KVSTORE_IVTREE_NODE_IVINTLNODE_H
|
||||
#define _KVSTORE_IVTREE_NODE_IVINTLNODE_H
|
||||
|
||||
#include "IVNode.h"
|
||||
|
||||
class IVIntlNode : public IVNode
|
||||
{
|
||||
protected:
|
||||
IVNode* childs[MAX_CHILD_NUM + 1];
|
||||
//Node** childs;
|
||||
//void AllocChilds();
|
||||
public:
|
||||
IVIntlNode();
|
||||
IVIntlNode(bool isVirtual);
|
||||
//IntlNode(Storage* TSM);
|
||||
void Virtual();
|
||||
void Normal();
|
||||
IVNode* getChild(int _index) const;
|
||||
bool setChild(IVNode* _child, int _index);
|
||||
bool addChild(IVNode* _child, int _index);
|
||||
bool subChild(int _index);
|
||||
unsigned getSize() const;
|
||||
IVNode* split(IVNode* _father, int _index);
|
||||
IVNode* coalesce(IVNode* _father, int _index);
|
||||
void release();
|
||||
~IVIntlNode();
|
||||
void print(std::string s); //DEBUG
|
||||
/*non-sense functions: polymorphic
|
||||
Node* getPrev() const;
|
||||
Node* getNext() const;
|
||||
const Bstr* getValue(int _index) const;
|
||||
bool setValue(const Bstr* _value, int _index);
|
||||
bool addValue(const Bstr* _value, int _index);
|
||||
bool subValue(int _index);
|
||||
void setPrev(Node* _prev);
|
||||
void setNext(Node* _next);
|
||||
*/
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,538 @@
|
|||
/*=============================================================================
|
||||
# Filename: IVLeafNode.cpp
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:40
|
||||
# Description: ahieve functions in IVLeafNode.h
|
||||
=============================================================================*/
|
||||
|
||||
#include "IVLeafNode.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
void
|
||||
IVLeafNode::AllocValues()
|
||||
{
|
||||
values = new Bstr[MAX_KEY_NUM];
|
||||
}
|
||||
|
||||
/*
|
||||
void
|
||||
IVLeafNode::FreeValues()
|
||||
{
|
||||
delete[] values;
|
||||
}
|
||||
*/
|
||||
|
||||
IVLeafNode::IVLeafNode()
|
||||
{
|
||||
flag |= NF_IL; //leaf flag
|
||||
prev = next = NULL;
|
||||
AllocValues();
|
||||
}
|
||||
|
||||
IVLeafNode::IVLeafNode(bool isVirtual)
|
||||
{
|
||||
flag |= NF_IL;
|
||||
prev = next = NULL;
|
||||
if (!isVirtual)
|
||||
AllocValues();
|
||||
}
|
||||
|
||||
/*
|
||||
IVLeafNode::LeafNode(Storage* TSM)
|
||||
{
|
||||
AllocValues();
|
||||
TSM->readNode(this, Storage::OVER);
|
||||
}
|
||||
*/
|
||||
|
||||
void
|
||||
IVLeafNode::Virtual()
|
||||
{
|
||||
//this->FreeKeys();
|
||||
//this->FreeValues();
|
||||
this->release();
|
||||
this->delMem();
|
||||
}
|
||||
|
||||
void
|
||||
IVLeafNode::Normal()
|
||||
{
|
||||
this->AllocKeys();
|
||||
this->AllocValues();
|
||||
this->setMem();
|
||||
}
|
||||
|
||||
IVNode*
|
||||
IVLeafNode::getPrev() const
|
||||
{
|
||||
return prev;
|
||||
}
|
||||
|
||||
IVNode*
|
||||
IVLeafNode::getNext() const
|
||||
{
|
||||
return next;
|
||||
}
|
||||
|
||||
const Bstr*
|
||||
IVLeafNode::getValue(int _index) const
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index >= num)
|
||||
{
|
||||
//print(string("error in getValue: Invalid index ") + Util::int2string(_index));
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
return this->values + _index;
|
||||
}
|
||||
|
||||
bool
|
||||
IVLeafNode::setValue(const Bstr* _value, int _index, bool _ifcopy)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index >= num)
|
||||
{
|
||||
print(string("error in setValue: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
|
||||
this->values[_index].release(); //NOTICE: only used in modify
|
||||
|
||||
if(_ifcopy)
|
||||
{
|
||||
this->values[_index].copy(_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->values[_index] = *_value;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVLeafNode::getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index >= num)
|
||||
{
|
||||
//print(string("error in getValue: Invalid index ") + Util::int2string(_index));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//read long list
|
||||
if(this->values[_index].isBstrLongList())
|
||||
{
|
||||
#ifdef DEBUG_VLIST
|
||||
cout<<"this is a vlist in get()"<<endl;
|
||||
#endif
|
||||
unsigned block_num = this->values[_index].getLen();
|
||||
_vlist->readValue(block_num, _str, _len);
|
||||
}
|
||||
else
|
||||
{
|
||||
_str = this->values[_index].getStr();
|
||||
_len = this->values[_index].getLen();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index >= num)
|
||||
{
|
||||
print(string("error in setValue: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
|
||||
if(this->values[_index].isBstrLongList())
|
||||
{
|
||||
#ifdef DEBUG_VLIST
|
||||
cout<<"this is a vlist in set()"<<endl;
|
||||
#endif
|
||||
unsigned block_num = this->values[_index].getLen();
|
||||
_vlist->removeValue(block_num);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->values[_index].release(); //NOTICE: only used in modify
|
||||
}
|
||||
|
||||
//DEBUG: we do not need to copy here
|
||||
//we just need to ensure that the pointer's memory is not released
|
||||
|
||||
//if (ifcopy)
|
||||
//{
|
||||
//this->values[_index].copy(_value);
|
||||
//}
|
||||
//else
|
||||
//{
|
||||
//this->values[_index] = *_value;
|
||||
if(VList::isLongList(_len))
|
||||
{
|
||||
unsigned block_num = _vlist->writeValue(_str, _len);
|
||||
this->values[_index].setStr(NULL);
|
||||
this->values[_index].setLen(block_num);
|
||||
//NOTICE: we need to free the long list value
|
||||
delete[] _str;
|
||||
}
|
||||
else
|
||||
{
|
||||
this->values[_index].setStr(_str);
|
||||
this->values[_index].setLen(_len);
|
||||
}
|
||||
//}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVLeafNode::addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index > num)
|
||||
{
|
||||
print(string("error in addValue: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = num - 1; i >= _index; --i)
|
||||
this->values[i + 1] = this->values[i];
|
||||
|
||||
//if (ifcopy)
|
||||
//this->values[_index].copy(_value);
|
||||
//else
|
||||
//this->values[_index] = *_value;
|
||||
|
||||
if(VList::isLongList(_len))
|
||||
{
|
||||
#ifdef DEBUG_VLIST
|
||||
cout<<"this is a vlist in add()"<<endl;
|
||||
#endif
|
||||
unsigned block_num = _vlist->writeValue(_str, _len);
|
||||
this->values[_index].setStr(NULL);
|
||||
this->values[_index].setLen(block_num);
|
||||
//NOTICE: we need to free the long list value
|
||||
delete[] _str;
|
||||
#ifdef DEBUG_VLIST
|
||||
//cout<<"to check vlist: "<<this->values[_index].getLen()<<endl;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
this->values[_index].setStr(_str);
|
||||
this->values[_index].setLen(_len);
|
||||
}
|
||||
//this->values[_index].setStr(_str);
|
||||
//this->values[_index].setLen(_len);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index >= num)
|
||||
{
|
||||
print(string("error in subValue: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
|
||||
if(this->values[_index].isBstrLongList())
|
||||
{
|
||||
unsigned block_num = this->values[_index].getLen();
|
||||
_vlist->removeValue(block_num);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ifdel)
|
||||
{
|
||||
values[_index].release();
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = _index; i < num - 1; ++i)
|
||||
this->values[i] = this->values[i + 1];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVLeafNode::addValue(const Bstr* _value, int _index, bool ifcopy)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index > num)
|
||||
{
|
||||
print(string("error in addValue: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
int i;
|
||||
for (i = num - 1; i >= _index; --i)
|
||||
this->values[i + 1] = this->values[i];
|
||||
|
||||
if (ifcopy)
|
||||
this->values[_index].copy(_value);
|
||||
else
|
||||
this->values[_index] = *_value;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVLeafNode::subValue(int _index, bool ifdel)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index >= num)
|
||||
{
|
||||
print(string("error in subValue: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
|
||||
int i;
|
||||
if (ifdel)
|
||||
values[_index].release();
|
||||
for (i = _index; i < num - 1; ++i)
|
||||
this->values[i] = this->values[i + 1];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
IVLeafNode::setPrev(IVNode* _prev)
|
||||
{
|
||||
this->prev = _prev;
|
||||
}
|
||||
|
||||
void
|
||||
IVLeafNode::setNext(IVNode* _next)
|
||||
{
|
||||
this->next = _next;
|
||||
}
|
||||
|
||||
unsigned
|
||||
IVLeafNode::getSize() const
|
||||
{
|
||||
unsigned sum = LEAF_SIZE, num = this->getNum(), i;
|
||||
for (i = 0; i < num; ++i)
|
||||
{
|
||||
sum += values[i].getLen();
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
IVNode*
|
||||
IVLeafNode::split(IVNode* _father, int _index)
|
||||
{
|
||||
int num = this->getNum();
|
||||
IVNode* p = new IVLeafNode; //right child
|
||||
p->setHeight(this->getHeight()); //NOTICE: assign height for new node
|
||||
p->setNext(this->next);
|
||||
this->setNext(p);
|
||||
p->setPrev(this);
|
||||
int i, k;
|
||||
for (i = MIN_KEY_NUM, k = 0; i < num; ++i, ++k)
|
||||
{
|
||||
p->addKey(this->keys[i], k);
|
||||
p->addValue(this->values + i, k);
|
||||
p->addNum();
|
||||
}
|
||||
int tp = this->keys[MIN_KEY_NUM];
|
||||
this->setNum(MIN_KEY_NUM);
|
||||
_father->addKey(tp, _index);
|
||||
_father->addChild(p, _index + 1); //DEBUG(check the index)
|
||||
_father->addNum();
|
||||
_father->setDirty();
|
||||
p->setDirty();
|
||||
this->setDirty();
|
||||
return p;
|
||||
}
|
||||
|
||||
IVNode*
|
||||
IVLeafNode::coalesce(IVNode* _father, int _index)
|
||||
{ //add a key or coalesce a neighbor to this
|
||||
int i, j = _father->getNum(), k; //BETTER: unsigned?
|
||||
IVNode* p = NULL;
|
||||
int ccase = 0;
|
||||
//const Bstr* bstr;
|
||||
if (_index < j) //the right neighbor
|
||||
{
|
||||
p = _father->getChild(_index + 1);
|
||||
k = p->getNum();
|
||||
if ((unsigned)k > MIN_KEY_NUM)
|
||||
ccase = 2;
|
||||
else //==MIN_KEY_NUM
|
||||
ccase = 1;
|
||||
}
|
||||
if (_index > 0) //the left neighbor
|
||||
{
|
||||
IVNode* tp = _father->getChild(_index - 1);
|
||||
unsigned tk = tp->getNum();
|
||||
if (ccase < 2)
|
||||
{
|
||||
if (ccase == 0)
|
||||
ccase = 3;
|
||||
if (tk > MIN_KEY_NUM)
|
||||
ccase = 4;
|
||||
}
|
||||
if (ccase > 2)
|
||||
{
|
||||
p = tp;
|
||||
k = tk;
|
||||
}
|
||||
}
|
||||
|
||||
int tmp = 0;
|
||||
switch (ccase)
|
||||
{
|
||||
case 1: //union right to this
|
||||
for (i = 0; i < k; ++i)
|
||||
{
|
||||
this->addKey(p->getKey(i), this->getNum());
|
||||
this->addValue(p->getValue(i), this->getNum());
|
||||
this->addNum();
|
||||
}
|
||||
_father->subKey(_index);
|
||||
_father->subChild(_index + 1);
|
||||
_father->subNum();
|
||||
this->next = p->getNext();
|
||||
if (this->next != NULL)
|
||||
this->next->setPrev(this);
|
||||
p->setNum(0); //NOTICE: adjust num before delete!
|
||||
//delete p;
|
||||
break;
|
||||
case 2: //move one from right
|
||||
this->addKey(p->getKey(0), this->getNum());
|
||||
_father->setKey(p->getKey(1), _index);
|
||||
p->subKey(0);
|
||||
this->addValue(p->getValue(0), this->getNum());
|
||||
p->subValue(0);
|
||||
this->addNum();
|
||||
p->subNum();
|
||||
break;
|
||||
case 3: //union left to this
|
||||
//BETTER: move all keys/etc one time
|
||||
for (i = k; i > 0; --i)
|
||||
{
|
||||
int t = i - 1;
|
||||
this->addKey(p->getKey(t), 0);
|
||||
this->addValue(p->getValue(t), 0);
|
||||
this->addNum();
|
||||
}
|
||||
_father->subKey(_index - 1);
|
||||
_father->subChild(_index - 1);
|
||||
_father->subNum();
|
||||
this->prev = p->getPrev();
|
||||
if (this->prev != NULL) //else: leaves-list
|
||||
this->prev->setNext(this);
|
||||
p->setNum(0);
|
||||
//delete p;
|
||||
break;
|
||||
case 4: //move one from left
|
||||
tmp = p->getKey(k - 1);
|
||||
p->subKey(k - 1);
|
||||
this->addKey(tmp, 0);
|
||||
_father->setKey(tmp, _index - 1);
|
||||
this->addValue(p->getValue(k - 1), 0);
|
||||
p->subValue(k - 1);
|
||||
this->addNum();
|
||||
p->subNum();
|
||||
break;
|
||||
default:
|
||||
print("error in coalesce: Invalid case!");
|
||||
//printf("error in coalesce: Invalid case!");
|
||||
}
|
||||
_father->setDirty();
|
||||
p->setDirty();
|
||||
this->setDirty();
|
||||
if (ccase == 1 || ccase == 3)
|
||||
return p;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
IVLeafNode::release()
|
||||
{
|
||||
if (!this->inMem())
|
||||
return;
|
||||
unsigned num = this->getNum();
|
||||
/*
|
||||
for(int i = 0; i < num; ++i)
|
||||
{
|
||||
keys[i].release();
|
||||
values[i].release();
|
||||
}
|
||||
*/
|
||||
for (unsigned i = num; i < MAX_KEY_NUM; ++i)
|
||||
{
|
||||
values[i].clear();
|
||||
}
|
||||
delete[] keys;
|
||||
delete[] values;
|
||||
}
|
||||
|
||||
IVLeafNode::~IVLeafNode()
|
||||
{
|
||||
release();
|
||||
}
|
||||
|
||||
void
|
||||
IVLeafNode::print(string s)
|
||||
{
|
||||
#ifdef DEBUG_KVSTORE
|
||||
unsigned num = this->getNum();
|
||||
fputs(Util::showtime().c_str(), Util::debug_kvstore);
|
||||
fputs("Class IVLeafNode\n", Util::debug_kvstore);
|
||||
fputs("Message: ", Util::debug_kvstore);
|
||||
fputs(s.c_str(), Util::debug_kvstore);
|
||||
fputs("\n", Util::debug_kvstore);
|
||||
unsigned i;
|
||||
if (s == "NODE")
|
||||
{
|
||||
fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag);
|
||||
fprintf(Util::debug_kvstore, "prev: %p\tnext: %p\n", this->prev, this->next);
|
||||
for (i = 0; i < num; ++i)
|
||||
{
|
||||
//this->keys[i].print("BSTR");
|
||||
this->values[i].print("BSTR");
|
||||
}
|
||||
}
|
||||
else if (s == "node")
|
||||
{
|
||||
fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag);
|
||||
fprintf(Util::debug_kvstore, "prev: %p\tnext: %p\n", this->prev, this->next);
|
||||
}
|
||||
else if (s == "check node")
|
||||
{
|
||||
//check the node, if satisfy B+ definition
|
||||
bool flag = true;
|
||||
if (num < MIN_KEY_NUM || num > MAX_KEY_NUM)
|
||||
flag = false;
|
||||
if (flag)
|
||||
{
|
||||
for (i = 1; i < num; ++i)
|
||||
{
|
||||
if (keys[i] > keys[i - 1])
|
||||
continue;
|
||||
else
|
||||
break;
|
||||
}
|
||||
if (i < num)
|
||||
flag = false;
|
||||
}
|
||||
this->print("node");
|
||||
if (flag)
|
||||
fprintf(Util::debug_kvstore, "This node is good\n");
|
||||
else
|
||||
fprintf(Util::debug_kvstore, "This node is bad\n");
|
||||
}
|
||||
else;
|
||||
#endif
|
||||
}
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
/*=============================================================================
|
||||
# Filename: IVLeafNode.h
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:39
|
||||
# Description: the leaf-node of a B+ tree
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _KVSTORE_IVTREE_NODE_IVLEAFNODE_H
|
||||
#define _KVSTORE_IVTREE_NODE_IVLEAFNODE_H
|
||||
|
||||
#include "IVNode.h"
|
||||
|
||||
class IVLeafNode : public IVNode
|
||||
{
|
||||
protected:
|
||||
IVNode* prev; //LeafNode
|
||||
IVNode* next;
|
||||
Bstr* values;
|
||||
void AllocValues();
|
||||
//void FreeValues();
|
||||
public:
|
||||
IVLeafNode();
|
||||
IVLeafNode(bool isVirtual);
|
||||
//LeafNode(Storage* TSM);
|
||||
void Virtual();
|
||||
void Normal();
|
||||
IVNode* getPrev() const;
|
||||
IVNode* getNext() const;
|
||||
const Bstr* getValue(int _index) const;
|
||||
bool setValue(const Bstr* _value, int _index, bool _ifcopy=false);
|
||||
|
||||
bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const;
|
||||
bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false);
|
||||
|
||||
bool addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false);
|
||||
bool subValue(VList* _vlist, int _index, bool ifdel = false);
|
||||
bool addValue(const Bstr* _val, int _index, bool ifcopy = false);
|
||||
bool subValue(int _index, bool ifdel = false);
|
||||
|
||||
void setPrev(IVNode* _prev);
|
||||
void setNext(IVNode* _next);
|
||||
unsigned getSize() const;
|
||||
IVNode* split(IVNode* _father, int _index);
|
||||
IVNode* coalesce(IVNode* _father, int _index);
|
||||
void release();
|
||||
~IVLeafNode();
|
||||
void print(std::string s); //DEBUG
|
||||
/*non-sense virtual function
|
||||
Node* getChild(int _index) const;
|
||||
bool addChild(Node* _child, int _index);
|
||||
bool subChild(int _index);
|
||||
*/
|
||||
};
|
||||
//BETTER: prev isn't a must, and reverse-range can be achieved using recursive-next
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,320 @@
|
|||
/*=============================================================================
|
||||
# Filename: IVNode.cpp
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:39
|
||||
# Description: achieve functions in IVNode.h
|
||||
=============================================================================*/
|
||||
|
||||
#include "IVNode.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
void
|
||||
IVNode::AllocKeys()
|
||||
{
|
||||
keys = new unsigned[MAX_KEY_NUM];
|
||||
}
|
||||
|
||||
/*
|
||||
void
|
||||
IVNode::FreeKeys()
|
||||
{
|
||||
delete[] keys;
|
||||
}
|
||||
*/
|
||||
|
||||
IVNode::IVNode()
|
||||
{
|
||||
store = flag = 0;
|
||||
flag |= NF_IM;
|
||||
AllocKeys();
|
||||
}
|
||||
|
||||
IVNode::IVNode(bool isVirtual)
|
||||
{
|
||||
store = flag = 0;
|
||||
if (!isVirtual)
|
||||
{
|
||||
flag |= NF_IM;
|
||||
AllocKeys();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
IVNode::Node(Storage* TSM)
|
||||
{
|
||||
AllocKeys();
|
||||
TSM->readIVNode(this, Storage::OVER);
|
||||
}
|
||||
*/
|
||||
bool
|
||||
IVNode::isLeaf() const
|
||||
{
|
||||
return this->flag & NF_IL;
|
||||
}
|
||||
|
||||
bool
|
||||
IVNode::isDirty() const
|
||||
{
|
||||
return this->flag & NF_ID;
|
||||
}
|
||||
|
||||
void
|
||||
IVNode::setDirty()
|
||||
{
|
||||
this->flag |= NF_ID;
|
||||
}
|
||||
|
||||
void
|
||||
IVNode::delDirty()
|
||||
{
|
||||
this->flag &= ~NF_ID;
|
||||
}
|
||||
|
||||
bool
|
||||
IVNode::inMem() const
|
||||
{
|
||||
return this->flag & NF_IM;
|
||||
}
|
||||
|
||||
void
|
||||
IVNode::setMem()
|
||||
{
|
||||
this->flag |= NF_IM;
|
||||
}
|
||||
|
||||
void
|
||||
IVNode::delMem()
|
||||
{
|
||||
this->flag &= ~NF_IM;
|
||||
}
|
||||
|
||||
/*
|
||||
bool
|
||||
IVNode::isVirtual() const
|
||||
{
|
||||
return this->flag & NF_IV;
|
||||
}
|
||||
|
||||
void
|
||||
IVNode::setVirtual()
|
||||
{
|
||||
this->flag |= NF_IV;
|
||||
}
|
||||
|
||||
void
|
||||
IVNode::delVirtual()
|
||||
{
|
||||
this->flag &= ~NF_IV;
|
||||
}
|
||||
*/
|
||||
|
||||
unsigned
|
||||
IVNode::getRank() const
|
||||
{
|
||||
return this->flag & NF_RK;
|
||||
}
|
||||
|
||||
void
|
||||
IVNode::setRank(unsigned _rank)
|
||||
{
|
||||
this->flag &= ~NF_RK;
|
||||
this->flag |= _rank;
|
||||
}
|
||||
|
||||
unsigned
|
||||
IVNode::getHeight() const
|
||||
{
|
||||
return (this->flag & NF_HT) >> 20;
|
||||
}
|
||||
|
||||
void
|
||||
IVNode::setHeight(unsigned _h)
|
||||
{
|
||||
this->flag &= ~NF_HT;
|
||||
this->flag |= (_h << 20);
|
||||
}
|
||||
|
||||
unsigned
|
||||
IVNode::getNum() const
|
||||
{
|
||||
return (this->flag & NF_KN) >> 12;
|
||||
}
|
||||
|
||||
bool
|
||||
IVNode::setNum(int _num)
|
||||
{
|
||||
if (_num < 0 || (unsigned)_num > MAX_KEY_NUM)
|
||||
{
|
||||
print(string("error in setNum: Invalid num ") + Util::int2string(_num));
|
||||
return false;
|
||||
}
|
||||
this->flag &= ~NF_KN;
|
||||
this->flag |= (_num << 12);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVNode::addNum()
|
||||
{
|
||||
if (this->getNum() + 1 > MAX_KEY_NUM)
|
||||
{
|
||||
print("error in addNum: Invalid!");
|
||||
return false;
|
||||
}
|
||||
this->flag += (1 << 12);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVNode::subNum()
|
||||
{
|
||||
if (this->getNum() < 1)
|
||||
{
|
||||
print("error in subNum: Invalid!");
|
||||
return false;
|
||||
}
|
||||
this->flag -= (1 << 12);
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned
|
||||
IVNode::getStore() const
|
||||
{
|
||||
return this->store;
|
||||
}
|
||||
|
||||
void
|
||||
IVNode::setStore(unsigned _store)
|
||||
{
|
||||
this->store = _store;
|
||||
}
|
||||
|
||||
unsigned
|
||||
IVNode::getFlag() const
|
||||
{
|
||||
return flag;
|
||||
}
|
||||
|
||||
void
|
||||
IVNode::setFlag(unsigned _flag)
|
||||
{
|
||||
this->flag = _flag;
|
||||
}
|
||||
|
||||
unsigned
|
||||
IVNode::getKey(int _index) const
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index >= num)
|
||||
{
|
||||
//print(string("error in getKey: Invalid index ") + Util::int2string(_index));
|
||||
printf("error in getKey: Invalid index\n");
|
||||
return -1;
|
||||
}
|
||||
else
|
||||
return this->keys[_index];
|
||||
}
|
||||
|
||||
bool
|
||||
IVNode::setKey(unsigned _key, int _index)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index >= num)
|
||||
{
|
||||
print(string("error in setKey: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
keys[_index] = _key;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVNode::addKey(unsigned _key, int _index)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index > num)
|
||||
{
|
||||
print(string("error in addKey: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
int i;
|
||||
//NOTICE: if num == MAX_KEY_NUM, will visit keys[MAX_KEY_NUM], not legal!!!
|
||||
//however. tree operations ensure that: when node is full, not add but split first!
|
||||
for (i = num - 1; i >= _index; --i)
|
||||
keys[i + 1] = keys[i];
|
||||
keys[_index] = _key;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVNode::subKey(int _index)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index >= num)
|
||||
{
|
||||
print(string("error in subKey: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
int i;
|
||||
for (i = _index; i < num - 1; ++i)
|
||||
keys[i] = keys[i + 1];
|
||||
return true;
|
||||
}
|
||||
|
||||
int
|
||||
IVNode::searchKey_less(unsigned _key) const
|
||||
{
|
||||
int num = this->getNum();
|
||||
//for(i = 0; i < num; ++i)
|
||||
//if(bstr < *(p->getKey(i)))
|
||||
//break;
|
||||
|
||||
int low = 0, high = num - 1, mid = -1;
|
||||
while (low <= high)
|
||||
{
|
||||
mid = (low + high) / 2;
|
||||
if (this->keys[mid] > _key)
|
||||
{
|
||||
if (low == mid)
|
||||
break;
|
||||
high = mid;
|
||||
}
|
||||
else
|
||||
{
|
||||
low = mid + 1;
|
||||
}
|
||||
}
|
||||
return low;
|
||||
}
|
||||
|
||||
int
|
||||
IVNode::searchKey_equal(unsigned _key) const
|
||||
{
|
||||
int num = this->getNum();
|
||||
//for(i = 0; i < num; ++i)
|
||||
// if(bstr == *(p->getKey(i)))
|
||||
// {
|
||||
|
||||
int ret = this->searchKey_less(_key);
|
||||
if (ret > 0 && this->keys[ret - 1] == _key)
|
||||
return ret - 1;
|
||||
else
|
||||
return num;
|
||||
}
|
||||
|
||||
int
|
||||
IVNode::searchKey_lessEqual(unsigned _key) const
|
||||
{
|
||||
//int num = this->getNum();
|
||||
//for(i = 0; i < num; ++i)
|
||||
//if(bstr <= *(p->getKey(i)))
|
||||
//break;
|
||||
|
||||
int ret = this->searchKey_less(_key);
|
||||
if (ret > 0 && this->keys[ret - 1] == _key)
|
||||
return ret - 1;
|
||||
else
|
||||
return ret;
|
||||
}
|
|
@ -0,0 +1,123 @@
|
|||
/*=============================================================================
|
||||
# Filename: IVNode.h
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:38
|
||||
# Description: basic Node class, father of IVIntlNode and IVLeafNode
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _KVSTORE_IVTREE_NODE_IVNODE_H
|
||||
#define _KVSTORE_IVTREE_NODE_IVNODE_H
|
||||
|
||||
#include "../../../Util/Util.h"
|
||||
#include "../../../Util/Bstr.h"
|
||||
#include "../../../Util/VList.h"
|
||||
|
||||
class IVNode //abstract basic class
|
||||
{
|
||||
public:
|
||||
static const unsigned DEGREE = 2 * 63; //the degree of B+ tree
|
||||
static const unsigned MAX_CHILD_NUM = DEGREE;
|
||||
static const unsigned MIN_CHILD_NUM = DEGREE >> 1;
|
||||
static const unsigned MAX_KEY_NUM = MAX_CHILD_NUM - 1; //max key-num
|
||||
static const unsigned MIN_KEY_NUM = MIN_CHILD_NUM - 1; //min key-num
|
||||
/* diffrent flags for tree-nodes, 32-bit put rank in low-bits means no need to move*/
|
||||
static const unsigned NF_IL = 0x80000000; //is leaf
|
||||
static const unsigned NF_ID = 0x00080000; //is dirty, in rank-area
|
||||
static const unsigned NF_IM = 0x20000000; //in memory, not virtual
|
||||
//static const unsigned NF_IV = 0x10000000; //is virtual
|
||||
static const unsigned NF_RK = 0x00ffffff; //select-rank, in Storage
|
||||
static const unsigned NF_HT = 0xf00000; //height area in rank
|
||||
static const unsigned NF_KN = 0x07f000; //NOTICE: decided by DEGREE
|
||||
static const unsigned INTL_SIZE = sizeof(int) * MAX_KEY_NUM;
|
||||
static const unsigned LEAF_SIZE = INTL_SIZE + sizeof(Bstr) * MAX_KEY_NUM;
|
||||
protected:
|
||||
unsigned store; //store address, the BLock index
|
||||
unsigned flag; //NF_RK, NF_IL,NF_ID, NF_IV, propety
|
||||
//int num; //totle keys num
|
||||
//Node* father; //point to father-node, which must be IntlNode
|
||||
unsigned* keys;
|
||||
void AllocKeys();
|
||||
//void FreeKeys();
|
||||
public:
|
||||
IVNode();
|
||||
IVNode(bool isVirtual);
|
||||
bool isLeaf() const;
|
||||
bool isDirty() const;
|
||||
void setDirty();
|
||||
void delDirty();
|
||||
bool inMem() const;
|
||||
void setMem();
|
||||
void delMem();
|
||||
//bool isVirtual() const;
|
||||
//void setVirtual();
|
||||
//void delVirtual();
|
||||
unsigned getRank() const;
|
||||
void setRank(unsigned _rank);
|
||||
unsigned getHeight() const;
|
||||
void setHeight(unsigned _h);
|
||||
unsigned getNum() const;
|
||||
bool setNum(int _num);
|
||||
bool addNum();
|
||||
bool subNum();
|
||||
unsigned getStore() const;
|
||||
void setStore(unsigned _store);
|
||||
unsigned getFlag() const;
|
||||
void setFlag(unsigned _flag);
|
||||
unsigned getKey(int _index) const; //need to check the index
|
||||
bool setKey(unsigned _key, int _index);
|
||||
bool addKey(unsigned _key, int _index);
|
||||
bool subKey(int _index);
|
||||
|
||||
//several binary key search utilities
|
||||
int searchKey_less(unsigned _key) const;
|
||||
int searchKey_equal(unsigned _key) const;
|
||||
int searchKey_lessEqual(unsigned _key) const;
|
||||
|
||||
//virtual functions: polymorphic
|
||||
virtual IVNode* getChild(int _index) const { return NULL; };
|
||||
virtual bool setChild(IVNode* _child, int _index) { return true; };
|
||||
virtual bool addChild(IVNode* _child, int _index) { return true; };
|
||||
virtual bool subChild(int _index) { return true; };
|
||||
virtual IVNode* getPrev() const { return NULL; };
|
||||
virtual IVNode* getNext() const { return NULL; };
|
||||
|
||||
virtual const Bstr* getValue(int _index) const { return NULL; };
|
||||
virtual bool setValue(const Bstr* _value, int _index, bool _ifcopy=false) { return true; };
|
||||
virtual bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const { return NULL; };
|
||||
virtual bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false) { return true; };
|
||||
|
||||
virtual bool addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false) { return true; };
|
||||
virtual bool subValue(VList* _vlist, int _index, bool ifdel = false) { return true; };
|
||||
virtual bool addValue(const Bstr* _val, int _index, bool ifcopy = false) { return true; };
|
||||
virtual bool subValue(int _index, bool ifdel = false) { return true; };
|
||||
|
||||
virtual void setPrev(IVNode* _prev) {};
|
||||
virtual void setNext(IVNode* _next) {};
|
||||
|
||||
//pure virtual functions
|
||||
virtual void Virtual() = 0;
|
||||
virtual void Normal() = 0;
|
||||
virtual unsigned getSize() const = 0; //return all memory owned
|
||||
virtual IVNode* split(IVNode* _father, int _index) = 0;
|
||||
virtual IVNode* coalesce(IVNode* _father, int _index) = 0;
|
||||
virtual void release() = 0; //release the node, only remain necessary information
|
||||
virtual ~IVNode() {};
|
||||
virtual void print(std::string s) = 0; //DEBUG(print the Node)
|
||||
};
|
||||
|
||||
/*NOTICE(operations in release())
|
||||
*To save memory, we can only remain store and flag(childs added for Leaf).
|
||||
*However, in this way childs'pointers is ok to change, use Node** or Node*& is also nonsense
|
||||
*because the pointer variable may die.
|
||||
*Another way is only to release dynamic memory, and store thw whole, read the Bstr only to
|
||||
*build. In this way nodes'pointer doesn't change, and operation is simplified, while memory
|
||||
*is consumed a bit more. Because Bstrs consume the most memory, and memory-disk swapping is
|
||||
*the most time-consuming thing, it seems to be a better way.
|
||||
*WARN:when a node is in memory and not deleted, its basic content is always being! If nodes are
|
||||
*really too many, this will cause disaster because we can't swap them out until tree is closed!
|
||||
*To solve this problem, there should be two types of release-function: one to release Bstr, one
|
||||
*to release the whole(pointer is invalid and rebuild problem)
|
||||
*/
|
||||
|
||||
#endif
|
|
@ -0,0 +1,738 @@
|
|||
/*=============================================================================
|
||||
# Filename: IVStorage.cpp
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:43
|
||||
# Description: achieve functions in IVStorage.h
|
||||
=============================================================================*/
|
||||
|
||||
#include "IVStorage.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
IVStorage::IVStorage()
|
||||
{ //not use ../logs/, notice the location of program
|
||||
cur_block_num = SET_BLOCK_NUM;
|
||||
filepath = "";
|
||||
freelist = NULL;
|
||||
treefp = NULL;
|
||||
max_buffer_size = Util::MAX_BUFFER_SIZE;
|
||||
heap_size = max_buffer_size / IVNode::INTL_SIZE;
|
||||
freemem = max_buffer_size;
|
||||
minheap = NULL;
|
||||
this->value_list = NULL;
|
||||
}
|
||||
|
||||
IVStorage::IVStorage(string& _filepath, string& _mode, unsigned* _height, unsigned long long _buffer_size, VList* _vlist)
|
||||
{
|
||||
cur_block_num = SET_BLOCK_NUM; //initialize
|
||||
this->filepath = _filepath;
|
||||
if (_mode == string("build"))
|
||||
treefp = fopen(_filepath.c_str(), "w+b");
|
||||
else if (_mode == string("open"))
|
||||
treefp = fopen(_filepath.c_str(), "r+b");
|
||||
else
|
||||
{
|
||||
print(string("error in IVStorage: Invalid mode ") + _mode);
|
||||
return;
|
||||
}
|
||||
if (treefp == NULL)
|
||||
{
|
||||
print(string("error in IVStorage: Open error ") + _filepath);
|
||||
return;
|
||||
}
|
||||
this->treeheight = _height; //originally set to 0
|
||||
this->max_buffer_size = _buffer_size;
|
||||
this->heap_size = this->max_buffer_size / IVNode::INTL_SIZE;
|
||||
this->freemem = this->max_buffer_size;
|
||||
this->freelist = new BlockInfo; //null-head
|
||||
unsigned i, j, k; //j = (SuperNum-1)*BLOCK_SIZE
|
||||
BlockInfo* bp;
|
||||
if (_mode == "build")
|
||||
{ //write basic information
|
||||
i = 0;
|
||||
fwrite(&i, sizeof(unsigned), 1, this->treefp); //height
|
||||
fwrite(&i, sizeof(unsigned), 1, this->treefp); //rootnum
|
||||
fwrite(&cur_block_num, sizeof(unsigned), 1, this->treefp); //current block num
|
||||
fseek(this->treefp, BLOCK_SIZE, SEEK_SET);
|
||||
bp = this->freelist;
|
||||
j = cur_block_num / 8;
|
||||
for (i = 0; i < j; ++i)
|
||||
{
|
||||
fputc(0, this->treefp);
|
||||
for (k = 0; k < 8; ++k)
|
||||
{
|
||||
bp->next = new BlockInfo(i * 8 + k + 1, NULL);
|
||||
bp = bp->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
else //_mode == "open"
|
||||
{
|
||||
//read basic information
|
||||
unsigned rootnum;
|
||||
char c;
|
||||
fread(this->treeheight, sizeof(unsigned), 1, this->treefp);
|
||||
fread(&rootnum, sizeof(unsigned), 1, this->treefp);
|
||||
fread(&cur_block_num, sizeof(unsigned), 1, this->treefp);
|
||||
fseek(this->treefp, BLOCK_SIZE, SEEK_SET);
|
||||
bp = this->freelist;
|
||||
j = cur_block_num / 8;
|
||||
for (i = 0; i < j; ++i)
|
||||
{
|
||||
c = fgetc(treefp);
|
||||
for (k = 0; k < 8; ++k)
|
||||
{
|
||||
if ((c & (1 << k)) == 0)
|
||||
{
|
||||
bp->next = new BlockInfo(i * 8 + 7 - k + 1, NULL);
|
||||
bp = bp->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
fseek(treefp, Address(rootnum), SEEK_SET);
|
||||
//treefp is now ahead of root-block
|
||||
}
|
||||
|
||||
this->minheap = new IVHeap(this->heap_size);
|
||||
this->value_list = _vlist;
|
||||
}
|
||||
|
||||
bool
|
||||
IVStorage::preRead(IVNode*& _root, IVNode*& _leaves_head, IVNode*& _leaves_tail) //pre-read and build whole tree
|
||||
{ //set root(in memory) and leaves_head
|
||||
//TODO: false when exceed memory
|
||||
_leaves_tail = _leaves_head = _root = NULL;
|
||||
if (ftell(this->treefp) == 0) //root is null
|
||||
{
|
||||
return true;
|
||||
}
|
||||
unsigned next, store, j, pos = 0;
|
||||
unsigned h = *this->treeheight;
|
||||
IVNode* p;
|
||||
//read root node
|
||||
this->createNode(p);
|
||||
_root = p;
|
||||
fread(&next, sizeof(unsigned), 1, treefp);
|
||||
//use stack to achieve
|
||||
long address[h]; //current address
|
||||
unsigned used[h]; //used child num
|
||||
unsigned total[h]; //total child num
|
||||
unsigned block[h]; //next block num
|
||||
IVNode* nodes[h];
|
||||
address[pos] = ftell(treefp);
|
||||
used[pos] = 0;
|
||||
total[pos] = p->getNum() + 1;
|
||||
block[pos] = next;
|
||||
nodes[pos] = p;
|
||||
pos++;
|
||||
IVNode* prev = NULL;
|
||||
while (pos > 0)
|
||||
{
|
||||
j = pos - 1;
|
||||
if (nodes[j]->isLeaf() || used[j] == total[j]) //LeafNode or ready IntlNode
|
||||
{
|
||||
if (nodes[j]->isLeaf())
|
||||
{
|
||||
if (prev != NULL)
|
||||
{
|
||||
prev->setNext(nodes[j]);
|
||||
nodes[j]->setPrev(prev);
|
||||
}
|
||||
prev = nodes[j];
|
||||
}
|
||||
pos--;
|
||||
continue;
|
||||
}
|
||||
fseek(this->treefp, address[j], SEEK_SET);
|
||||
fread(&store, sizeof(unsigned), 1, treefp);
|
||||
this->ReadAlign(block + j);
|
||||
address[j] = ftell(treefp);
|
||||
fseek(treefp, Address(store), SEEK_SET);
|
||||
this->createNode(p);
|
||||
nodes[j]->setChild(p, used[j]);
|
||||
used[j]++;
|
||||
fread(&next, sizeof(unsigned), 1, treefp);
|
||||
address[pos] = ftell(treefp);
|
||||
used[pos] = 0;
|
||||
total[pos] = p->getNum() + 1;
|
||||
block[pos] = next;
|
||||
nodes[pos] = p;
|
||||
pos++;
|
||||
}
|
||||
//set leaves and read root, which is always keeped in-mem
|
||||
p = _root;
|
||||
while (!p->isLeaf())
|
||||
{
|
||||
p = p->getChild(0);
|
||||
}
|
||||
_leaves_head = p;
|
||||
p = _root;
|
||||
while (!p->isLeaf())
|
||||
{
|
||||
p = p->getChild(p->getNum());
|
||||
}
|
||||
_leaves_tail = p;
|
||||
long long memory = 0;
|
||||
this->readNode(_root, &memory);
|
||||
this->request(memory);
|
||||
return true;
|
||||
}
|
||||
|
||||
long //8-byte in 64-bit machine
|
||||
IVStorage::Address(unsigned _blocknum) const //BETTER: inline function
|
||||
{
|
||||
if (_blocknum == 0)
|
||||
return 0;
|
||||
else if (_blocknum > cur_block_num)
|
||||
{
|
||||
//print(string("error in Address: Invalid blocknum ") + Util::int2string(_blocknum));
|
||||
return -1; //address should be non-negative
|
||||
}
|
||||
//NOTICE: here should explictly use long
|
||||
return (long)(this->SuperNum + _blocknum - 1) * (long)BLOCK_SIZE;
|
||||
}
|
||||
|
||||
unsigned
|
||||
IVStorage::Blocknum(long address) const
|
||||
{
|
||||
return (address / BLOCK_SIZE) + 1 - this->SuperNum;
|
||||
}
|
||||
|
||||
unsigned
|
||||
IVStorage::AllocBlock()
|
||||
{
|
||||
BlockInfo* p = this->freelist->next;
|
||||
if (p == NULL)
|
||||
{
|
||||
for (unsigned i = 0; i < SET_BLOCK_INC; ++i)
|
||||
{
|
||||
cur_block_num++; //BETTER: check if > MAX_BLOCK_NUM
|
||||
this->FreeBlock(cur_block_num);
|
||||
}
|
||||
p = this->freelist->next;
|
||||
}
|
||||
unsigned t = p->num;
|
||||
this->freelist->next = p->next;
|
||||
delete p;
|
||||
return t;
|
||||
}
|
||||
|
||||
void
|
||||
IVStorage::FreeBlock(unsigned _blocknum)
|
||||
{ //QUERY: head-sub and tail-add will be better?
|
||||
BlockInfo* bp = new BlockInfo(_blocknum, this->freelist->next);
|
||||
this->freelist->next = bp;
|
||||
}
|
||||
|
||||
//NOTICE: all reads are aligned to 4 bytes(including a string)
|
||||
//a string may acrossseveral blocks
|
||||
|
||||
void
|
||||
IVStorage::ReadAlign(unsigned* _next)
|
||||
{
|
||||
if (ftell(treefp) % BLOCK_SIZE == 0)
|
||||
{
|
||||
fseek(treefp, Address(*_next), SEEK_SET);
|
||||
fread(_next, sizeof(unsigned), 1, treefp);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
IVStorage::WriteAlign(unsigned* _curnum, bool& _SpecialBlock)
|
||||
{
|
||||
if (ftell(treefp) % BLOCK_SIZE == 0)
|
||||
{
|
||||
unsigned blocknum = this->AllocBlock();
|
||||
fseek(treefp, Address(*_curnum), SEEK_SET);
|
||||
if (_SpecialBlock)
|
||||
{
|
||||
fseek(treefp, 4, SEEK_CUR);
|
||||
_SpecialBlock = false;
|
||||
}
|
||||
fwrite(&blocknum, sizeof(unsigned), 1, treefp);
|
||||
fseek(treefp, Address(blocknum) + 4, SEEK_SET);
|
||||
*_curnum = blocknum;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
IVStorage::readNode(IVNode* _np, long long* _request)
|
||||
{
|
||||
if (_np == NULL || _np->inMem())
|
||||
return false; //can't read or needn't
|
||||
|
||||
fseek(treefp, Address(_np->getStore()), SEEK_SET);
|
||||
bool flag = _np->isLeaf();
|
||||
unsigned next;
|
||||
unsigned i, num = _np->getNum();
|
||||
Bstr bstr;
|
||||
fseek(treefp, 4, SEEK_CUR);
|
||||
fread(&next, sizeof(unsigned), 1, treefp);
|
||||
|
||||
//read data, use readBstr...
|
||||
//fread(treefp, "%u", &num);
|
||||
//_np->setNum(num);
|
||||
if (flag)
|
||||
*_request += IVNode::LEAF_SIZE;
|
||||
else
|
||||
*_request += IVNode::INTL_SIZE;
|
||||
_np->Normal();
|
||||
if (!flag)
|
||||
fseek(treefp, 4 * (num + 1), SEEK_CUR);
|
||||
|
||||
//to read all keys
|
||||
//int tmp = -1;
|
||||
unsigned tmp = INVALID;
|
||||
for (i = 0; i < num; ++i)
|
||||
{
|
||||
fread(&tmp, sizeof(int), 1, treefp);
|
||||
this->ReadAlign(&next);
|
||||
_np->setKey(tmp, i);
|
||||
}
|
||||
|
||||
if (flag)
|
||||
{
|
||||
//to read all values
|
||||
for (i = 0; i < num; ++i)
|
||||
{
|
||||
this->readBstr(&bstr, &next);
|
||||
//if not long list value
|
||||
if(bstr.getStr() != NULL)
|
||||
{
|
||||
*_request += bstr.getLen();
|
||||
}
|
||||
_np->setValue(&bstr, i);
|
||||
}
|
||||
}
|
||||
//_np->setFlag((_np->getFlag() & ~Node::NF_IV & ~Node::NF_ID) | Node::NF_IM);
|
||||
//_np->delVirtual();
|
||||
_np->delDirty();
|
||||
//_np->setMem();
|
||||
this->updateHeap(_np, _np->getRank(), false);
|
||||
bstr.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVStorage::createNode(IVNode*& _np) //cretae virtual nodes, not in-mem
|
||||
{
|
||||
/*
|
||||
if(ftell(this->treefp)== 0) //null root
|
||||
{
|
||||
_np = NULL;
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
unsigned t; //QUERY: maybe next-flag... will be better-storage?
|
||||
bool flag = false; //IntlNode
|
||||
fread(&t, sizeof(unsigned), 1, treefp);
|
||||
if ((t & IVNode::NF_IL) > 0) //WARN: according to setting
|
||||
flag = true; //LeafNode
|
||||
if (flag)
|
||||
{
|
||||
//this->request(sizeof(LeafNode));
|
||||
_np = new IVLeafNode(true);
|
||||
}
|
||||
else
|
||||
{
|
||||
//this->request(sizeof(IntlNode));
|
||||
_np = new IVIntlNode(true);
|
||||
}
|
||||
//fseek(treefp, -4, SEEK_CUR);
|
||||
//_np->setFlag(_np->getFlag() | (t & Node::NF_RK));
|
||||
//_np->setRank(t);
|
||||
_np->setFlag(t);
|
||||
_np->delDirty();
|
||||
_np->delMem();
|
||||
_np->setStore(Blocknum(ftell(treefp) - 4));
|
||||
return true;
|
||||
}
|
||||
|
||||
//BETTER: Does SpecialBlock really needed? why can't we place next before flag??
|
||||
//
|
||||
//NOTICE: root num begins from 1, if root num is 0, then it is invalid, i.e. the tree is NULL
|
||||
//(and ftell(root address) will be 0 either)
|
||||
|
||||
bool
|
||||
IVStorage::writeNode(IVNode* _np)
|
||||
{
|
||||
if (_np == NULL || !_np->inMem() || (_np->getRank() > 0 && !_np->isDirty()))
|
||||
return false; //not need to write back
|
||||
|
||||
unsigned num = _np->getNum(), i;
|
||||
bool flag = _np->isLeaf(), SpecialBlock = true;
|
||||
/*
|
||||
if(!flag)
|
||||
{
|
||||
for(i = 0; i <= num; ++i)
|
||||
if(_np->getChild(i)->isDirty())
|
||||
return false; //NOTICE: all childs must be clean!
|
||||
}
|
||||
*/
|
||||
//to release original blocks
|
||||
unsigned store = _np->getStore(), next;
|
||||
//if first store is 0, meaning a new node
|
||||
fseek(this->treefp, Address(store) + 4, SEEK_SET);
|
||||
fread(&next, sizeof(unsigned), 1, treefp);
|
||||
while (store != 0)
|
||||
{
|
||||
this->FreeBlock(store);
|
||||
store = next;
|
||||
fseek(treefp, Address(store), SEEK_SET);
|
||||
fread(&next, sizeof(unsigned), 1, treefp);
|
||||
}
|
||||
if (num == 0)
|
||||
return true; //node is empty!
|
||||
unsigned t;
|
||||
//write Node information
|
||||
unsigned blocknum = this->AllocBlock();
|
||||
_np->setStore(blocknum);
|
||||
long address = this->Address(blocknum);
|
||||
fseek(this->treefp, address, SEEK_SET);
|
||||
t = _np->getFlag();
|
||||
fwrite(&t, sizeof(unsigned), 1, treefp); //DEBUG
|
||||
fseek(treefp, 4, SEEK_CUR);
|
||||
if (!flag)
|
||||
{
|
||||
for (i = 0; i <= num; ++i)
|
||||
{
|
||||
t = _np->getChild(i)->getStore();
|
||||
fwrite(&t, sizeof(unsigned), 1, treefp); //DEBUG
|
||||
this->WriteAlign(&blocknum, SpecialBlock);
|
||||
}
|
||||
}
|
||||
|
||||
//int tmp = 0;
|
||||
unsigned tmp = INVALID;
|
||||
//to write all keys
|
||||
for (i = 0; i < num; ++i)
|
||||
{
|
||||
tmp = _np->getKey(i);
|
||||
fwrite(&tmp, sizeof(int), 1, treefp);
|
||||
this->WriteAlign(&blocknum, SpecialBlock);
|
||||
}
|
||||
|
||||
if (flag)
|
||||
{
|
||||
//to write all values
|
||||
for (i = 0; i < num; ++i)
|
||||
{
|
||||
this->writeBstr(_np->getValue(i), &blocknum, SpecialBlock);
|
||||
}
|
||||
}
|
||||
fseek(treefp, Address(blocknum), SEEK_SET);
|
||||
if (SpecialBlock)
|
||||
fseek(treefp, 4, SEEK_CUR);
|
||||
t = 0;
|
||||
fwrite(&t, sizeof(unsigned), 1, treefp); //the end-block
|
||||
//_np->setFlag(_np->getFlag() & ~Node::NF_ID);
|
||||
//NOTICE:we may store the dirty bit into the tree file, but that is ok
|
||||
//Each time we read the tree file to construct a node, we always set the drity bit to 0
|
||||
_np->delDirty();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVStorage::readBstr(Bstr* _bp, unsigned* _next)
|
||||
{
|
||||
//long address;
|
||||
unsigned len, i, j;
|
||||
fread(&len, sizeof(unsigned), 1, this->treefp);
|
||||
this->ReadAlign(_next);
|
||||
|
||||
//NOTICE: if this is a long list as value
|
||||
if(len == 0)
|
||||
{
|
||||
unsigned addr = 0;
|
||||
fread(&addr, sizeof(unsigned), 1, this->treefp);
|
||||
#ifdef DEBUG_VLIST
|
||||
cout<<"read a vlist in IVStorage - addr: "<<addr<<endl;
|
||||
#endif
|
||||
_bp->setLen(addr);
|
||||
_bp->setStr(NULL);
|
||||
this->ReadAlign(_next);
|
||||
return true;
|
||||
}
|
||||
|
||||
//this->request(len);
|
||||
|
||||
//NOTICE: we use new for all, consistent with Bstr and KVstore
|
||||
//char* s = (char*)malloc(len);
|
||||
char* s = new char[len];
|
||||
_bp->setLen(len);
|
||||
for (i = 0; i + 4 < len; i += 4)
|
||||
{
|
||||
fread(s + i, sizeof(char), 4, treefp);
|
||||
this->ReadAlign(_next);
|
||||
}
|
||||
while (i < len)
|
||||
{
|
||||
fread(s + i, sizeof(char), 1, treefp); //BETTER
|
||||
i++;
|
||||
}
|
||||
j = len % 4;
|
||||
if (j > 0)
|
||||
j = 4 - j;
|
||||
fseek(treefp, j, SEEK_CUR);
|
||||
this->ReadAlign(_next);
|
||||
_bp->setStr(s);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVStorage::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock)
|
||||
{
|
||||
unsigned i, j, len = _bp->getLen();
|
||||
|
||||
//NOTICE: to write long list value
|
||||
if(_bp->getStr() == NULL)
|
||||
{
|
||||
unsigned flag = 0;
|
||||
fwrite(&flag, sizeof(unsigned), 1, treefp);
|
||||
this->WriteAlign(_curnum, _SpecialBlock);
|
||||
//then this is the real block num
|
||||
fwrite(&len, sizeof(unsigned), 1, treefp);
|
||||
#ifdef DEBUG_VLIST
|
||||
cout<<"to write a vlist in IVStorage::writeBstr() - blocknum: "<<len<<endl;
|
||||
#endif
|
||||
this->WriteAlign(_curnum, _SpecialBlock);
|
||||
return true;
|
||||
}
|
||||
|
||||
fwrite(&len, sizeof(unsigned), 1, treefp);
|
||||
this->WriteAlign(_curnum, _SpecialBlock);
|
||||
|
||||
char* s = _bp->getStr();
|
||||
for (i = 0; i + 4 < len; i += 4)
|
||||
{
|
||||
fwrite(s + i, sizeof(char), 4, treefp);
|
||||
this->WriteAlign(_curnum, _SpecialBlock);
|
||||
}
|
||||
while (i < len)
|
||||
{
|
||||
fwrite(s + i, sizeof(char), 1, treefp);
|
||||
i++;
|
||||
}
|
||||
|
||||
j = len % 4;
|
||||
if (j > 0)
|
||||
j = 4 - j;
|
||||
fseek(treefp, j, SEEK_CUR);
|
||||
this->WriteAlign(_curnum, _SpecialBlock);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVStorage::writeTree(IVNode* _root) //write the whole tree back and close treefp
|
||||
{
|
||||
fseek(this->treefp, 0, SEEK_SET);
|
||||
fwrite(this->treeheight, sizeof(unsigned), 1, treefp);
|
||||
//delete all nonsense-node in heap, otherwise will waste storage permanently
|
||||
IVNode* p;
|
||||
while (1)
|
||||
{ //all non-sense nodes will be in-head-area, due to minimal rank
|
||||
p = minheap->getTop();
|
||||
if (p == NULL) //heap is empty, only when root==NULL
|
||||
break;
|
||||
if (p->getRank() == 0) //indicate non-sense node
|
||||
{
|
||||
this->minheap->remove();
|
||||
this->writeNode(p);
|
||||
delete p;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
unsigned i, j, t;
|
||||
//QUERY: another way to write all nodes back is to print out all nodes in heap
|
||||
//but this method will cause no node in heap any more, while operations may be
|
||||
//afetr tree-saving. Which method is better?
|
||||
//write nodes recursively using stack, including root-num
|
||||
if (_root != NULL)
|
||||
{
|
||||
IVNode* p = _root;
|
||||
unsigned h = *this->treeheight, pos = 0;
|
||||
IVNode* ns[h];
|
||||
int ni[h];
|
||||
ns[pos] = p;
|
||||
ni[pos] = p->getNum();
|
||||
pos++;
|
||||
while (pos > 0)
|
||||
{
|
||||
j = pos - 1;
|
||||
p = ns[j];
|
||||
if (p->isLeaf() || ni[j] < 0) //leaf or all childs are ready
|
||||
{
|
||||
this->writeNode(p);
|
||||
pos--;
|
||||
continue;
|
||||
}
|
||||
ns[pos] = p->getChild(ni[j]);
|
||||
ni[pos] = ns[pos]->getNum();
|
||||
pos++;
|
||||
ni[j]--;
|
||||
}
|
||||
t = _root->getStore();
|
||||
}
|
||||
else
|
||||
t = 0;
|
||||
|
||||
fseek(this->treefp, 4, SEEK_SET);
|
||||
fwrite(&t, sizeof(unsigned), 1, treefp); //write the root num
|
||||
fwrite(&cur_block_num, sizeof(unsigned), 1, treefp);//write current blocks num
|
||||
fseek(treefp, BLOCK_SIZE, SEEK_SET);
|
||||
j = cur_block_num / 8; //(SuperNum-1)*BLOCK_SIZE;
|
||||
//reset to 1 first
|
||||
for (i = 0; i < j; ++i)
|
||||
{
|
||||
fputc(0xff, treefp);
|
||||
}
|
||||
char c;
|
||||
BlockInfo* bp = this->freelist->next;
|
||||
while (bp != NULL)
|
||||
{
|
||||
//if not-use then set 0, aligned to byte!
|
||||
#ifdef DEBUG_KVSTORE
|
||||
if (bp->num > cur_block_num)
|
||||
{
|
||||
printf("blocks num exceed, cur_block_num: %u\n", cur_block_num);
|
||||
exit(1);
|
||||
}
|
||||
#endif
|
||||
j = bp->num - 1;
|
||||
i = j / 8;
|
||||
j = 7 - j % 8;
|
||||
fseek(treefp, BLOCK_SIZE + i, SEEK_SET);
|
||||
c = fgetc(treefp);
|
||||
fseek(treefp, -1, SEEK_CUR);
|
||||
fputc(c & ~(1 << j), treefp);
|
||||
bp = bp->next;
|
||||
}
|
||||
//fclose(this->treefp);
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
IVStorage::updateHeap(IVNode* _np, unsigned _rank, bool _inheap) const
|
||||
{
|
||||
if (_inheap) //already in heap, to modify
|
||||
{
|
||||
unsigned t = _np->getRank();
|
||||
_np->setRank(_rank);
|
||||
if (t < _rank)
|
||||
this->minheap->modify(_np, false);
|
||||
else if (t > _rank)
|
||||
this->minheap->modify(_np, true);
|
||||
else;
|
||||
}
|
||||
else //not in heap, to add
|
||||
{
|
||||
_np->setRank(_rank);
|
||||
this->minheap->insert(_np);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
IVStorage::request(long long _needmem) //aligned to byte
|
||||
{ //NOTICE: <0 means release
|
||||
//cout<<"freemem: "<<this->freemem<<" needmem: "<<_needmem<<endl;
|
||||
if (_needmem > 0 && this->freemem < (unsigned long long)_needmem)
|
||||
if (!this->handler(_needmem - freemem)) //disaster in buffer memory
|
||||
{
|
||||
print(string("error in request: out of buffer-mem, now to exit"));
|
||||
//exit(1);
|
||||
return false;
|
||||
}
|
||||
this->freemem -= _needmem;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IVStorage::handler(unsigned long long _needmem) //>0
|
||||
{
|
||||
//cout<<"swap happen"<<endl;
|
||||
IVNode* p;
|
||||
unsigned long long size;
|
||||
//if(_needmem < SET_BUFFER_SIZE) //to recover to SET_BUFFER_SIZE buffer
|
||||
// _needmem = SET_BUFFER_SIZE;
|
||||
//cout<<"IVStorage::handler() - now to loop to release nodes"<<endl;
|
||||
while (1)
|
||||
{
|
||||
p = this->minheap->getTop();
|
||||
//cout<<"get heap top"<<endl;
|
||||
if (p == NULL)
|
||||
{
|
||||
cout << "the heap top is null" << endl;
|
||||
return false; //can't satisfy or can't recover to SET_BUFFER_SIZE
|
||||
}
|
||||
|
||||
this->minheap->remove();
|
||||
//cout<<"node removed in heap"<<endl;
|
||||
size = p->getSize();
|
||||
this->freemem += size;
|
||||
this->writeNode(p);
|
||||
//cout<<"node write back"<<endl;
|
||||
if (p->getNum() > 0)
|
||||
p->Virtual();
|
||||
else
|
||||
delete p; //non-sense node
|
||||
//cout<<"node memory released"<<endl;
|
||||
if (_needmem > size)
|
||||
{
|
||||
//cout<<"reduce the request"<<endl;
|
||||
_needmem -= size;
|
||||
}
|
||||
else
|
||||
{
|
||||
//cout<<"ok to break"<<endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
//cout<<"IVStorage::handler() -- finished"<<endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
IVStorage::~IVStorage()
|
||||
{
|
||||
//release heap and freelist...
|
||||
#ifdef DEBUG_KVSTORE
|
||||
printf("now to release the kvstore!\n");
|
||||
#endif
|
||||
BlockInfo* bp = this->freelist;
|
||||
BlockInfo* next;
|
||||
while (bp != NULL)
|
||||
{
|
||||
next = bp->next;
|
||||
delete bp;
|
||||
bp = next;
|
||||
}
|
||||
#ifdef DEBUG_KVSTORE
|
||||
printf("already empty the freelist!\n");
|
||||
#endif
|
||||
delete this->minheap;
|
||||
#ifdef DEBUG_KVSTORE
|
||||
printf("already empty the buffer heap!\n");
|
||||
#endif
|
||||
fclose(this->treefp);
|
||||
//#ifdef DEBUG_KVSTORE
|
||||
//NOTICE:there is more than one tree
|
||||
//fclose(Util::debug_kvstore); //NULL is ok!
|
||||
//Util::debug_kvstore = NULL;
|
||||
//#endif
|
||||
}
|
||||
|
||||
void
|
||||
IVStorage::print(string s)
|
||||
{
|
||||
#ifdef DEBUG_KVSTORE
|
||||
fputs(Util::showtime().c_str(), Util::debug_kvstore);
|
||||
fputs("Class IVStorage\n", Util::debug_kvstore);
|
||||
fputs("Message: ", Util::debug_kvstore);
|
||||
fputs(s.c_str(), Util::debug_kvstore);
|
||||
fputs("\n", Util::debug_kvstore);
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
/*=============================================================================
|
||||
# Filename: IVStorage.h
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:43
|
||||
# Description: swap between memory and disk, achieving system-like method
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _KVSTORE_IVTREE_STORAGE_IVSTORAGE_H
|
||||
#define _KVSTORE_IVTREE_STORAGE_IVSTORAGE_H
|
||||
|
||||
#include "../../../Util/VList.h"
|
||||
#include "../node/IVIntlNode.h"
|
||||
#include "../node/IVLeafNode.h"
|
||||
#include "../heap/IVHeap.h"
|
||||
|
||||
//It controls read, write, swap
|
||||
class IVStorage
|
||||
{
|
||||
public:
|
||||
static const unsigned BLOCK_SIZE = Util::STORAGE_BLOCK_SIZE; //fixed size of disk-block
|
||||
//there are 18 B+Tree indexes and one vstree index, so set 3G buffer size
|
||||
//static const unsigned long long MAX_BUFFER_SIZE = Util::MAX_BUFFER_SIZE; //max buffer size
|
||||
//static const unsigned SET_BUFFER_SIZE = 1 << 30; //set buffer size
|
||||
//static const unsigned HEAP_SIZE = MAX_BUFFER_SIZE / IVNode::INTL_SIZE;
|
||||
|
||||
//DEBUG: maybe need to set larger, now the file size is 64G at most
|
||||
static const unsigned MAX_BLOCK_NUM = 1 << 24; //max block-num
|
||||
//below two constants: must can be exactly divided by 8
|
||||
static const unsigned SET_BLOCK_NUM = 1 << 8; //initial blocks num
|
||||
static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc
|
||||
static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1;
|
||||
//static const unsigned TRANSFER_CAPACITY = BLOCK_SIZE;
|
||||
//enum ReadType { OVER = 0, EXPAND, NORMAL };
|
||||
|
||||
private:
|
||||
unsigned long long max_buffer_size;
|
||||
unsigned heap_size;
|
||||
unsigned cur_block_num;
|
||||
std::string filepath;
|
||||
unsigned* treeheight;
|
||||
BlockInfo* freelist;
|
||||
FILE* treefp; //file: tree nodes
|
||||
IVHeap* minheap; //heap of Nodes's pointer, sorted in NF_RK
|
||||
|
||||
//very long value list are stored in a separate file(with large block)
|
||||
//
|
||||
//NOTICE: according to the summary result, 90% value lists are just below 100 bytes
|
||||
//<10%: 5000000~100M bytes
|
||||
VList* value_list;
|
||||
|
||||
//NOTICE: freemem's type is long long here, due to large memory in server.
|
||||
//However, needmem in handler() and request() is ok to be int/unsigned.
|
||||
//Because the bstr' size is controlled, so is the node.
|
||||
unsigned long long freemem; //free memory to use, non-negative
|
||||
//unsigned long long time; //QUERY(achieving an old-swap startegy?)
|
||||
long Address(unsigned _blocknum) const;
|
||||
unsigned Blocknum(long address) const;
|
||||
unsigned AllocBlock();
|
||||
void FreeBlock(unsigned _blocknum);
|
||||
void ReadAlign(unsigned* _next);
|
||||
void WriteAlign(unsigned* _next, bool& _SpecialBlock);
|
||||
|
||||
public:
|
||||
IVStorage();
|
||||
IVStorage(std::string& _filepath, std::string& _mode, unsigned* _height, unsigned long long _buffer_size, VList* _vlist); //create a fixed-size file or open an existence
|
||||
bool preRead(IVNode*& _root, IVNode*& _leaves_head, IVNode*& _leaves_tail); //read and build all nodes, only root in memory
|
||||
bool readNode(IVNode* _np, long long* _request); //read, if virtual
|
||||
bool createNode(IVNode*& _np); //use fp to create a new node
|
||||
//NOTICE(if children and child not exist, build children's Nodes)
|
||||
bool writeNode(IVNode* _np);
|
||||
bool readBstr(Bstr* _bp, unsigned* _next);
|
||||
bool writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock);
|
||||
bool writeTree(IVNode* _np);
|
||||
void updateHeap(IVNode* _np, unsigned _rank, bool _inheap) const;
|
||||
bool request(long long _needmem); //deal with memory request
|
||||
bool handler(unsigned long long _needmem); //swap some nodes out
|
||||
//bool update(); //update InMem Node's rank, with clock
|
||||
~IVStorage();
|
||||
void print(std::string s); //DEBUG
|
||||
};
|
||||
|
||||
#endif
|
||||
|
4421
KVstore/KVstore.cpp
4421
KVstore/KVstore.cpp
File diff suppressed because it is too large
Load Diff
|
@ -1,207 +1,229 @@
|
|||
/*=============================================================================
|
||||
# Filename: KVstore.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-23 14:23
|
||||
# Description: Modified by Wang Libo
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _KVSTORE_KVSTORE_H
|
||||
#define _KVSTORE_KVSTORE_H
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "Tree.h"
|
||||
|
||||
class KVstore
|
||||
{
|
||||
public:
|
||||
static const int READ_WRITE_MODE = 1; //Open a B tree, which must exist
|
||||
static const int CREATE_MODE = 2; //Build a new B tree and delete existing ones (if any)
|
||||
|
||||
KVstore(std::string _store_path = ".");
|
||||
~KVstore();
|
||||
void flush();
|
||||
void release();
|
||||
void open();
|
||||
|
||||
//===============================================================================
|
||||
|
||||
//including IN-neighbor & OUT-neighbor
|
||||
int getEntityDegree(int _entity_id) const;
|
||||
int getEntityInDegree(int _entity_id) const;
|
||||
int getEntityOutDegree(int _entity_id) const;
|
||||
|
||||
int getLiteralDegree(int _literal_id) const;
|
||||
int getPredicateDegree(int _predicate_id) const;
|
||||
|
||||
int getSubjectPredicateDegree(int _subid, int _preid) const;
|
||||
int getObjectPredicateDegree(int _objid, int _preid) const;
|
||||
|
||||
//===============================================================================
|
||||
//Before calling these functions, we are sure that the triples doesn't exist.
|
||||
|
||||
bool updateTupleslist_insert(int _sub_id, int _pre_id, int _obj_id);
|
||||
bool updateTupleslist_remove(int _sub_id, int _pre_id, int _obj_id);
|
||||
|
||||
bool updateInsert_s2values(int _sub_id, int _pre_id, int _obj_id);
|
||||
bool updateRemove_s2values(int _sub_id, int _pre_id, int _obj_id);
|
||||
bool updateInsert_s2values(int _subid, const std::vector<int>& _pidoidlist);
|
||||
bool updateRemove_s2values(int _subid, const std::vector<int>& _pidoidlist);
|
||||
|
||||
bool updateInsert_o2values(int _sub_id, int _pre_id, int _obj_id);
|
||||
bool updateRemove_o2values(int _sub_id, int _pre_id, int _obj_id);
|
||||
bool updateInsert_o2values(int _objid, const std::vector<int>& _pidsidlist);
|
||||
bool updateRemove_o2values(int _objid, const std::vector<int>& _pidsidlist);
|
||||
|
||||
bool updateInsert_p2values(int _sub_id, int _pre_id, int _obj_id);
|
||||
bool updateRemove_p2values(int _sub_id, int _pre_id, int _obj_id);
|
||||
bool updateInsert_p2values(int _preid, const std::vector<int>& _sidoidlist);
|
||||
bool updateRemove_p2values(int _preid, const std::vector<int>& _sidoidlist);
|
||||
|
||||
//===============================================================================
|
||||
|
||||
//for entity2id
|
||||
bool open_entity2id(int _mode);
|
||||
bool close_entity2id();
|
||||
bool subIDByEntity(std::string _entity);
|
||||
int getIDByEntity(std::string _entity) const;
|
||||
bool setIDByEntity(std::string _entity, int _id);
|
||||
|
||||
//for id2entity
|
||||
bool open_id2entity(int _mode);
|
||||
bool close_id2entity();
|
||||
bool subEntityByID(int _id);
|
||||
std::string getEntityByID(int _id) const;
|
||||
bool setEntityByID(int _id, std::string _entity);
|
||||
|
||||
//for predicate2id
|
||||
bool open_predicate2id(int _mode);
|
||||
bool close_predicate2id();
|
||||
bool subIDByPredicate(std::string _predicate);
|
||||
int getIDByPredicate(std::string _predicate) const;
|
||||
bool setIDByPredicate(std::string _predicate, int _id);
|
||||
|
||||
//for id2predicate
|
||||
bool open_id2predicate(int _mode);
|
||||
bool close_id2predicate();
|
||||
bool subPredicateByID(int _id);
|
||||
std::string getPredicateByID(int _id) const;
|
||||
bool setPredicateByID(int _id, std::string _predicate);
|
||||
|
||||
//for literal2id
|
||||
bool open_literal2id(int _mode);
|
||||
bool close_literal2id();
|
||||
bool subIDByLiteral(std::string _literal);
|
||||
int getIDByLiteral(std::string _literal) const;
|
||||
bool setIDByLiteral(std::string _literal, int _id);
|
||||
|
||||
//for id2literal
|
||||
bool open_id2literal(int _mode);
|
||||
bool close_id2literal();
|
||||
bool subLiteralByID(int _id);
|
||||
std::string getLiteralByID(int _id) const;
|
||||
bool setLiteralByID(int _id, std::string _literal);
|
||||
|
||||
//===============================================================================
|
||||
|
||||
//for subID2values
|
||||
bool open_subID2values(int _mode);
|
||||
bool close_subID2values();
|
||||
bool build_subID2values(int** _p_id_tuples, int _triples_num);
|
||||
bool getpreIDlistBysubID(int _subid, int*& _preidlist, int& _list_len, bool _no_duplicate = false) const;
|
||||
bool getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len, bool _no_duplicate = false) const;
|
||||
bool getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist, int& _list_len, bool _no_duplicate = false) const;
|
||||
bool getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _list_len, bool _no_duplicate = false) const;
|
||||
|
||||
//for objID2values
|
||||
bool open_objID2values(int _mode);
|
||||
bool close_objID2values();
|
||||
bool build_objID2values(int** _p_id_tuples, int _triples_num);
|
||||
bool getpreIDlistByobjID(int _objid, int*& _preidlist, int& _list_len, bool _no_duplicate = false) const;
|
||||
bool getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len, bool _no_duplicate = false) const;
|
||||
bool getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, int& _list_len, bool _no_duplicate = false) const;
|
||||
bool getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _list_len, bool _no_duplicate = false) const;
|
||||
|
||||
//for preID2values
|
||||
bool open_preID2values(int _mode);
|
||||
bool close_preID2values();
|
||||
bool build_preID2values(int** _p_id_tuples, int _triples_num);
|
||||
bool getsubIDlistBypreID(int _preid, int*& _subidlist, int& _list_len, bool _no_duplicate = false) const;
|
||||
bool getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len, bool _no_duplicate = false) const;
|
||||
bool getsubIDobjIDlistBypreID(int _preid, int*& _subid_objidlist, int& _list_len, bool _no_duplicate = false) const;
|
||||
|
||||
//for so2p
|
||||
bool getpreIDlistBysubIDobjID(int _subID, int _objID, int*& _preidlist, int& _list_len, bool _no_duplicate = false) const;
|
||||
|
||||
|
||||
private:
|
||||
std::string store_path;
|
||||
|
||||
SITree* entity2id;
|
||||
ISTree* id2entity;
|
||||
static std::string s_entity2id;
|
||||
static std::string s_id2entity;
|
||||
static unsigned short buffer_entity2id_build;
|
||||
static unsigned short buffer_id2entity_build;
|
||||
static unsigned short buffer_entity2id_query;
|
||||
static unsigned short buffer_id2entity_query;
|
||||
|
||||
SITree* predicate2id;
|
||||
ISTree* id2predicate;
|
||||
static std::string s_predicate2id;
|
||||
static std::string s_id2predicate;
|
||||
static unsigned short buffer_predicate2id_build;
|
||||
static unsigned short buffer_id2predicate_build;
|
||||
static unsigned short buffer_predicate2id_query;
|
||||
static unsigned short buffer_id2predicate_query;
|
||||
|
||||
SITree* literal2id;
|
||||
ISTree* id2literal;
|
||||
static std::string s_literal2id;
|
||||
static std::string s_id2literal;
|
||||
static unsigned short buffer_literal2id_build;
|
||||
static unsigned short buffer_id2literal_build;
|
||||
static unsigned short buffer_literal2id_query;
|
||||
static unsigned short buffer_id2literal_query;
|
||||
|
||||
ISTree* subID2values;
|
||||
ISTree* objID2values;
|
||||
ISTree* preID2values;
|
||||
static std::string s_sID2values;
|
||||
static std::string s_oID2values;
|
||||
static std::string s_pID2values;
|
||||
static unsigned short buffer_sID2values_build;
|
||||
static unsigned short buffer_oID2values_build;
|
||||
static unsigned short buffer_pID2values_build;
|
||||
static unsigned short buffer_sID2values_query;
|
||||
static unsigned short buffer_oID2values_query;
|
||||
static unsigned short buffer_pID2values_query;
|
||||
|
||||
//===============================================================================
|
||||
|
||||
bool open(SITree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size);
|
||||
bool open(ISTree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size);
|
||||
|
||||
void flush(SITree* _p_btree);
|
||||
void flush(ISTree* _p_btree);
|
||||
|
||||
bool addValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val);
|
||||
bool addValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen);
|
||||
|
||||
bool setValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val);
|
||||
bool setValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen);
|
||||
|
||||
bool getValueByKey(SITree* _p_btree, const char* _key, int _klen, int* _val) const;
|
||||
bool getValueByKey(ISTree* _p_btree, int _key, char*& _val, int& _vlen) const;
|
||||
|
||||
int getIDByStr(SITree* _p_btree, const char* _key, int _klen) const;
|
||||
|
||||
bool removeKey(SITree* _p_btree, const char* _key, int _klen);
|
||||
bool removeKey(ISTree* _p_btree, int _key);
|
||||
|
||||
static std::vector<int> intersect(const int* _list1, const int* _list2, int _len1, int _len2);
|
||||
static int binarySearch(int key, const int* _list, int _list_len, int step = 1);
|
||||
static bool isEntity(int id);
|
||||
};
|
||||
|
||||
#endif //_KVSTORE_KVSTORE_H
|
||||
/*=============================================================================
|
||||
# Filename: KVstore.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-23 14:23
|
||||
# Description: Modified by Wang Libo
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _KVSTORE_KVSTORE_H
|
||||
#define _KVSTORE_KVSTORE_H
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Util/VList.h"
|
||||
#include "Tree.h"
|
||||
|
||||
//TODO: is it needed to keep a length in Bstr?? especially for IVTree?
|
||||
//add a length: sizeof bstr from 8 to 16(4 -> 8 for alignment)
|
||||
//add a \0 in tail: only add 1 char
|
||||
//QUERY: but to count the length each time maybe very costly?
|
||||
//No, because triple num is stored in char* now!!!! we do not need to save it again
|
||||
//TODO: entity_border in s2values list is not needed!!! not waste memory here
|
||||
//
|
||||
//QUERY: but to implement vlist, we need a unsigned flag
|
||||
//What is more, we need to store the string in disk, how can we store it if without the length?
|
||||
//unsigned type stored as chars, maybe will have '\0'
|
||||
//In memory, we do not know when the oidlist ends if without the original length (butthe triple num will answer this!)
|
||||
|
||||
class KVstore
|
||||
{
|
||||
public:
|
||||
static const int READ_WRITE_MODE = 1; //Open a B tree, which must exist
|
||||
static const int CREATE_MODE = 2; //Build a new B tree and delete existing ones (if any)
|
||||
|
||||
KVstore(std::string _store_path = ".");
|
||||
~KVstore();
|
||||
void flush();
|
||||
void release();
|
||||
void open();
|
||||
|
||||
//===============================================================================
|
||||
|
||||
//including IN-neighbor & OUT-neighbor
|
||||
unsigned getEntityDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const;
|
||||
unsigned getEntityInDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const;
|
||||
unsigned getEntityOutDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const;
|
||||
|
||||
unsigned getLiteralDegree(TYPE_ENTITY_LITERAL_ID _literal_id) const;
|
||||
unsigned getPredicateDegree(TYPE_PREDICATE_ID _predicate_id) const;
|
||||
|
||||
unsigned getSubjectPredicateDegree(TYPE_ENTITY_LITERAL_ID _subid, TYPE_PREDICATE_ID _preid) const;
|
||||
unsigned getObjectPredicateDegree(TYPE_ENTITY_LITERAL_ID _objid, TYPE_PREDICATE_ID _preid) const;
|
||||
|
||||
//===============================================================================
|
||||
//Before calling these functions, we are sure that the triples doesn't exist.
|
||||
|
||||
bool updateTupleslist_insert(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id);
|
||||
bool updateTupleslist_remove(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id);
|
||||
|
||||
bool updateInsert_s2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id);
|
||||
bool updateRemove_s2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id);
|
||||
bool updateInsert_s2values(TYPE_ENTITY_LITERAL_ID _subid, const std::vector<unsigned>& _pidoidlist);
|
||||
bool updateRemove_s2values(TYPE_ENTITY_LITERAL_ID _subid, const std::vector<unsigned>& _pidoidlist);
|
||||
|
||||
bool updateInsert_o2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id);
|
||||
bool updateRemove_o2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id);
|
||||
bool updateInsert_o2values(TYPE_ENTITY_LITERAL_ID _objid, const std::vector<unsigned>& _pidsidlist);
|
||||
bool updateRemove_o2values(TYPE_ENTITY_LITERAL_ID _objid, const std::vector<unsigned>& _pidsidlist);
|
||||
|
||||
bool updateInsert_p2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id);
|
||||
bool updateRemove_p2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id);
|
||||
bool updateInsert_p2values(TYPE_PREDICATE_ID _preid, const std::vector<unsigned>& _sidoidlist);
|
||||
bool updateRemove_p2values(TYPE_PREDICATE_ID _preid, const std::vector<unsigned>& _sidoidlist);
|
||||
|
||||
//===============================================================================
|
||||
|
||||
//for entity2id
|
||||
bool open_entity2id(int _mode);
|
||||
bool close_entity2id();
|
||||
bool subIDByEntity(std::string _entity);
|
||||
TYPE_ENTITY_LITERAL_ID getIDByEntity(std::string _entity) const;
|
||||
bool setIDByEntity(std::string _entity, TYPE_ENTITY_LITERAL_ID _id);
|
||||
|
||||
//for id2entity
|
||||
bool open_id2entity(int _mode);
|
||||
bool close_id2entity();
|
||||
bool subEntityByID(TYPE_ENTITY_LITERAL_ID _id);
|
||||
std::string getEntityByID(TYPE_ENTITY_LITERAL_ID _id) const;
|
||||
bool setEntityByID(TYPE_ENTITY_LITERAL_ID _id, std::string _entity);
|
||||
|
||||
//for predicate2id
|
||||
bool open_predicate2id(int _mode);
|
||||
bool close_predicate2id();
|
||||
bool subIDByPredicate(std::string _predicate);
|
||||
TYPE_PREDICATE_ID getIDByPredicate(std::string _predicate) const;
|
||||
bool setIDByPredicate(std::string _predicate, TYPE_PREDICATE_ID _id);
|
||||
|
||||
//for id2predicate
|
||||
bool open_id2predicate(int _mode);
|
||||
bool close_id2predicate();
|
||||
bool subPredicateByID(TYPE_PREDICATE_ID _id);
|
||||
std::string getPredicateByID(TYPE_PREDICATE_ID _id) const;
|
||||
bool setPredicateByID(TYPE_PREDICATE_ID _id, std::string _predicate);
|
||||
|
||||
//for literal2id
|
||||
bool open_literal2id(int _mode);
|
||||
bool close_literal2id();
|
||||
bool subIDByLiteral(std::string _literal);
|
||||
TYPE_ENTITY_LITERAL_ID getIDByLiteral(std::string _literal) const;
|
||||
bool setIDByLiteral(std::string _literal, TYPE_ENTITY_LITERAL_ID _id);
|
||||
|
||||
//for id2literal
|
||||
bool open_id2literal(int _mode);
|
||||
bool close_id2literal();
|
||||
bool subLiteralByID(TYPE_ENTITY_LITERAL_ID _id);
|
||||
std::string getLiteralByID(TYPE_ENTITY_LITERAL_ID _id) const;
|
||||
bool setLiteralByID(TYPE_ENTITY_LITERAL_ID _id, std::string _literal);
|
||||
|
||||
//===============================================================================
|
||||
|
||||
//for subID2values
|
||||
bool open_subID2values(int _mode);
|
||||
bool close_subID2values();
|
||||
bool build_subID2values(ID_TUPLE* _p_id_tuples, TYPE_TRIPLE_NUM _triples_num);
|
||||
bool getpreIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _preidlist, unsigned& _list_len, bool _no_duplicate = false) const;
|
||||
bool getobjIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _objidlist, unsigned& _list_len, bool _no_duplicate = false) const;
|
||||
bool getobjIDlistBysubIDpreID(TYPE_ENTITY_LITERAL_ID _subid, TYPE_PREDICATE_ID _preid, unsigned*& _objidlist, unsigned& _list_len, bool _no_duplicate = false) const;
|
||||
bool getpreIDobjIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _preid_objidlist, unsigned& _list_len, bool _no_duplicate = false) const;
|
||||
|
||||
//for objID2values
|
||||
bool open_objID2values(int _mode);
|
||||
bool close_objID2values();
|
||||
bool build_objID2values(ID_TUPLE* _p_id_tuples, TYPE_TRIPLE_NUM _triples_num);
|
||||
bool getpreIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _preidlist, unsigned& _list_len, bool _no_duplicate = false) const;
|
||||
bool getsubIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _subidlist, unsigned& _list_len, bool _no_duplicate = false) const;
|
||||
bool getsubIDlistByobjIDpreID(TYPE_ENTITY_LITERAL_ID _objid, TYPE_PREDICATE_ID _preid, unsigned*& _subidlist, unsigned& _list_len, bool _no_duplicate = false) const;
|
||||
bool getpreIDsubIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _preid_subidlist, unsigned& _list_len, bool _no_duplicate = false) const;
|
||||
|
||||
//for preID2values
|
||||
bool open_preID2values(int _mode);
|
||||
bool close_preID2values();
|
||||
bool build_preID2values(ID_TUPLE* _p_id_tuples, TYPE_TRIPLE_NUM _triples_num);
|
||||
bool getsubIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _subidlist, unsigned& _list_len, bool _no_duplicate = false) const;
|
||||
bool getobjIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _objidlist, unsigned& _list_len, bool _no_duplicate = false) const;
|
||||
bool getsubIDobjIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _subid_objidlist, unsigned& _list_len, bool _no_duplicate = false) const;
|
||||
|
||||
//for so2p
|
||||
bool getpreIDlistBysubIDobjID(TYPE_ENTITY_LITERAL_ID _subID, TYPE_ENTITY_LITERAL_ID _objID, unsigned*& _preidlist, unsigned& _list_len, bool _no_duplicate = false) const;
|
||||
|
||||
|
||||
private:
|
||||
std::string store_path;
|
||||
|
||||
SITree* entity2id;
|
||||
ISTree* id2entity;
|
||||
static std::string s_entity2id;
|
||||
static std::string s_id2entity;
|
||||
static unsigned short buffer_entity2id_build;
|
||||
static unsigned short buffer_id2entity_build;
|
||||
static unsigned short buffer_entity2id_query;
|
||||
static unsigned short buffer_id2entity_query;
|
||||
|
||||
SITree* predicate2id;
|
||||
ISTree* id2predicate;
|
||||
static std::string s_predicate2id;
|
||||
static std::string s_id2predicate;
|
||||
static unsigned short buffer_predicate2id_build;
|
||||
static unsigned short buffer_id2predicate_build;
|
||||
static unsigned short buffer_predicate2id_query;
|
||||
static unsigned short buffer_id2predicate_query;
|
||||
|
||||
SITree* literal2id;
|
||||
ISTree* id2literal;
|
||||
static std::string s_literal2id;
|
||||
static std::string s_id2literal;
|
||||
static unsigned short buffer_literal2id_build;
|
||||
static unsigned short buffer_id2literal_build;
|
||||
static unsigned short buffer_literal2id_query;
|
||||
static unsigned short buffer_id2literal_query;
|
||||
|
||||
IVTree* subID2values;
|
||||
IVTree* objID2values;
|
||||
IVTree* preID2values;
|
||||
static std::string s_sID2values;
|
||||
static std::string s_oID2values;
|
||||
static std::string s_pID2values;
|
||||
static unsigned short buffer_sID2values_build;
|
||||
static unsigned short buffer_oID2values_build;
|
||||
static unsigned short buffer_pID2values_build;
|
||||
static unsigned short buffer_sID2values_query;
|
||||
static unsigned short buffer_oID2values_query;
|
||||
static unsigned short buffer_pID2values_query;
|
||||
|
||||
//===============================================================================
|
||||
|
||||
bool open(SITree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size);
|
||||
bool open(ISTree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size);
|
||||
bool open(IVTree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size);
|
||||
|
||||
void flush(SITree* _p_btree);
|
||||
void flush(ISTree* _p_btree);
|
||||
void flush(IVTree* _p_btree);
|
||||
|
||||
bool addValueByKey(SITree* _p_btree, char* _key, unsigned _klen, unsigned _val);
|
||||
bool addValueByKey(ISTree* _p_btree, unsigned _key, char* _val, unsigned _vlen);
|
||||
bool addValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen);
|
||||
|
||||
bool setValueByKey(SITree* _p_btree, char* _key, unsigned _klen, unsigned _val);
|
||||
bool setValueByKey(ISTree* _p_btree, unsigned _key, char* _val, unsigned _vlen);
|
||||
bool setValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen);
|
||||
|
||||
bool getValueByKey(SITree* _p_btree, const char* _key, unsigned _klen, unsigned* _val) const;
|
||||
bool getValueByKey(ISTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const;
|
||||
bool getValueByKey(IVTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const;
|
||||
|
||||
|
||||
|
||||
TYPE_ENTITY_LITERAL_ID getIDByStr(SITree* _p_btree, const char* _key, unsigned _klen) const;
|
||||
|
||||
bool removeKey(SITree* _p_btree, const char* _key, unsigned _klen);
|
||||
bool removeKey(ISTree* _p_btree, unsigned _key);
|
||||
bool removeKey(IVTree* _p_btree, unsigned _key);
|
||||
|
||||
static std::vector<unsigned> intersect(const unsigned* _list1, const unsigned* _list2, unsigned _len1, unsigned _len2);
|
||||
static unsigned binarySearch(unsigned key, const unsigned* _list, unsigned _list_len, int step = 1);
|
||||
static bool isEntity(TYPE_ENTITY_LITERAL_ID id);
|
||||
};
|
||||
|
||||
#endif //_KVSTORE_KVSTORE_H
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ SITree::SITree()
|
|||
TSM = NULL;
|
||||
storepath = "";
|
||||
filename = "";
|
||||
transfer_size[0] = transfer_size[1] = transfer_size[2] = 0;
|
||||
//transfer_size[0] = transfer_size[1] = transfer_size[2] = 0;
|
||||
this->request = 0;
|
||||
}
|
||||
|
||||
|
@ -36,10 +36,10 @@ SITree::SITree(string _storepath, string _filename, string _mode, unsigned long
|
|||
this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail);
|
||||
else
|
||||
this->root = NULL;
|
||||
this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M
|
||||
//this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
//this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
//this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
//this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M
|
||||
this->request = 0;
|
||||
}
|
||||
|
||||
|
@ -49,30 +49,30 @@ SITree::getFilePath()
|
|||
return storepath + "/" + filename;
|
||||
}
|
||||
|
||||
void //WARN: not check _str and _len
|
||||
SITree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index)
|
||||
{
|
||||
if (_index > 2)
|
||||
return;
|
||||
/*
|
||||
if(_str == NULL || _len == 0)
|
||||
{
|
||||
printf("error in CopyToTransfer: empty string\n");
|
||||
return;
|
||||
}
|
||||
*/
|
||||
//unsigned length = _bstr->getLen();
|
||||
unsigned length = _len;
|
||||
if (length + 1 > this->transfer_size[_index])
|
||||
{
|
||||
transfer[_index].release();
|
||||
transfer[_index].setStr((char*)malloc(length + 1));
|
||||
this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0
|
||||
}
|
||||
memcpy(this->transfer[_index].getStr(), _str, length);
|
||||
this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore
|
||||
this->transfer[_index].setLen(length);
|
||||
}
|
||||
//void //WARN: not check _str and _len
|
||||
//SITree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index)
|
||||
//{
|
||||
//if (_index > 2)
|
||||
//return;
|
||||
//[>
|
||||
//if(_str == NULL || _len == 0)
|
||||
//{
|
||||
//printf("error in CopyToTransfer: empty string\n");
|
||||
//return;
|
||||
//}
|
||||
//*/
|
||||
////unsigned length = _bstr->getLen();
|
||||
//unsigned length = _len;
|
||||
//if (length + 1 > this->transfer_size[_index])
|
||||
//{
|
||||
//transfer[_index].release();
|
||||
//transfer[_index].setStr((char*)malloc(length + 1));
|
||||
//this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0
|
||||
//}
|
||||
//memcpy(this->transfer[_index].getStr(), _str, length);
|
||||
//this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore
|
||||
//this->transfer[_index].setLen(length);
|
||||
//}
|
||||
|
||||
unsigned
|
||||
SITree::getHeight() const
|
||||
|
@ -102,40 +102,47 @@ SITree::prepare(SINode* _np)
|
|||
}
|
||||
|
||||
bool
|
||||
SITree::search(const char* _str, unsigned _len, int* _val)
|
||||
SITree::search(const char* _str, unsigned _len, unsigned* _val)
|
||||
{
|
||||
if (_str == NULL || _len == 0)
|
||||
{
|
||||
printf("error in SITree-search: empty string\n");
|
||||
*_val = -1;
|
||||
//*_val = -1;
|
||||
return false;
|
||||
}
|
||||
this->CopyToTransfer(_str, _len, 1);
|
||||
//this->CopyToTransfer(_str, _len, 1);
|
||||
|
||||
request = 0;
|
||||
Bstr bstr = this->transfer[1]; //not to modify its memory
|
||||
//Bstr bstr = this->transfer[1]; //not to modify its memory
|
||||
//Bstr bstr(_str, _len, true);
|
||||
int store;
|
||||
SINode* ret = this->find(&transfer[1], &store, false);
|
||||
if (ret == NULL || store == -1 || bstr != *(ret->getKey(store))) //tree is empty or not found
|
||||
SINode* ret = this->find(_str, _len, &store, false);
|
||||
if (ret == NULL || store == -1) //tree is empty or not found
|
||||
{
|
||||
//bstr.clear();
|
||||
return false;
|
||||
}
|
||||
const Bstr* tmp = ret->getKey(store);
|
||||
if (Util::compare(_str, _len, tmp->getStr(), tmp->getLen()) != 0) //tree is empty or not found
|
||||
{
|
||||
bstr.clear();
|
||||
return false;
|
||||
}
|
||||
*_val = ret->getValue(store);
|
||||
this->TSM->request(request);
|
||||
bstr.clear();
|
||||
|
||||
//bstr.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
SITree::insert(const char* _str, unsigned _len, int _val)
|
||||
SITree::insert(char* _str, unsigned _len, unsigned _val)
|
||||
{
|
||||
if (_str == NULL || _len == 0)
|
||||
{
|
||||
printf("error in SITree-insert: empty string\n");
|
||||
return false;
|
||||
}
|
||||
this->CopyToTransfer(_str, _len, 1);
|
||||
//this->CopyToTransfer(_str, _len, 1);
|
||||
|
||||
this->request = 0;
|
||||
SINode* ret;
|
||||
|
@ -170,8 +177,8 @@ SITree::insert(const char* _str, unsigned _len, int _val)
|
|||
SINode* p = this->root;
|
||||
SINode* q;
|
||||
int i;
|
||||
const Bstr* _key = &transfer[1];
|
||||
Bstr bstr = *_key;
|
||||
//const Bstr* _key = &transfer[1];
|
||||
//Bstr bstr = *_key;
|
||||
while (!p->isLeaf())
|
||||
{
|
||||
//j = p->getNum();
|
||||
|
@ -179,7 +186,7 @@ SITree::insert(const char* _str, unsigned _len, int _val)
|
|||
//if(bstr < *(p->getKey(i)))
|
||||
//break;
|
||||
//NOTICE: using binary search is better here
|
||||
i = p->searchKey_less(bstr);
|
||||
i = p->searchKey_less(_str, _len);
|
||||
|
||||
q = p->getChild(i);
|
||||
this->prepare(q);
|
||||
|
@ -196,7 +203,10 @@ SITree::insert(const char* _str, unsigned _len, int _val)
|
|||
this->TSM->updateHeap(ret, ret->getRank(), false);
|
||||
this->TSM->updateHeap(q, q->getRank(), true);
|
||||
this->TSM->updateHeap(p, p->getRank(), true);
|
||||
if (bstr < *(p->getKey(i)))
|
||||
//if (bstr < *(p->getKey(i)))
|
||||
const Bstr* tmp = p->getKey(i);
|
||||
int cmp_res = Util::compare(_str, _len, tmp->getStr(), tmp->getLen());
|
||||
if (cmp_res < 0)
|
||||
p = q;
|
||||
else
|
||||
p = ret;
|
||||
|
@ -212,63 +222,82 @@ SITree::insert(const char* _str, unsigned _len, int _val)
|
|||
//for(i = 0; i < j; ++i)
|
||||
//if(bstr < *(p->getKey(i)))
|
||||
//break;
|
||||
i = p->searchKey_less(bstr);
|
||||
i = p->searchKey_less(_str, _len);
|
||||
|
||||
//insert existing key is ok, but not inserted in
|
||||
//however, the tree-shape may change due to possible split in former code
|
||||
bool ifexist = false;
|
||||
if (i > 0 && bstr == *(p->getKey(i - 1)))
|
||||
ifexist = true;
|
||||
else
|
||||
//if (i > 0 && bstr == *(p->getKey(i - 1)))
|
||||
if (i > 0)
|
||||
{
|
||||
p->addKey(_key, i, true);
|
||||
const Bstr* tmp = p->getKey(i-1);
|
||||
int cmp_res = Util::compare(_str, _len, tmp->getStr(), tmp->getLen());
|
||||
if(cmp_res == 0)
|
||||
{
|
||||
ifexist = true;
|
||||
}
|
||||
}
|
||||
|
||||
if(!ifexist)
|
||||
{
|
||||
p->addKey(_str, _len, i, true);
|
||||
p->addValue(_val, i);
|
||||
p->addNum();
|
||||
request += _key->getLen();
|
||||
request += _len;
|
||||
p->setDirty();
|
||||
this->TSM->updateHeap(p, p->getRank(), true);
|
||||
}
|
||||
|
||||
this->TSM->request(request);
|
||||
bstr.clear(); //NOTICE: must be cleared!
|
||||
//bstr.clear(); //NOTICE: must be cleared!
|
||||
|
||||
return !ifexist; //QUERY(which case:return false)
|
||||
}
|
||||
|
||||
bool
|
||||
SITree::modify(const char* _str, unsigned _len, int _val)
|
||||
SITree::modify(const char* _str, unsigned _len, unsigned _val)
|
||||
{
|
||||
if (_str == NULL || _len == 0)
|
||||
{
|
||||
printf("error in SITree-modify: empty string\n");
|
||||
return false;
|
||||
}
|
||||
this->CopyToTransfer(_str, _len, 1);
|
||||
//this->CopyToTransfer(_str, _len, 1);
|
||||
|
||||
this->request = 0;
|
||||
const Bstr* _key = &transfer[1];
|
||||
Bstr bstr = *_key;
|
||||
//const Bstr* _key = &transfer[1];
|
||||
//Bstr bstr = *_key;
|
||||
int store;
|
||||
SINode* ret = this->find(_key, &store, true);
|
||||
if (ret == NULL || store == -1 || bstr != *(ret->getKey(store))) //tree is empty or not found
|
||||
SINode* ret = this->find(_str, _len, &store, true);
|
||||
if (ret == NULL || store == -1) //tree is empty or not found
|
||||
{
|
||||
bstr.clear();
|
||||
//bstr.clear();
|
||||
return false;
|
||||
}
|
||||
const Bstr* tmp = ret->getKey(store);
|
||||
if (Util::compare(_str, _len, tmp->getStr(), tmp->getLen()) != 0) //tree is empty or not found
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
ret->setValue(_val, store);
|
||||
ret->setDirty();
|
||||
this->TSM->request(request);
|
||||
bstr.clear();
|
||||
//bstr.clear();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//this function is useful for search and modify, and range-query
|
||||
SINode* //return the first key's position that >= *_key
|
||||
SITree::find(const Bstr* _key, int* _store, bool ifmodify)
|
||||
SITree::find(const char* _str, unsigned _len, int* _store, bool ifmodify)
|
||||
{ //to assign value for this->bstr, function shouldn't be const!
|
||||
if (this->root == NULL)
|
||||
return NULL; //SITree Is Empty
|
||||
|
||||
SINode* p = root;
|
||||
int i, j;
|
||||
Bstr bstr = *_key; //local Bstr: multiple delete
|
||||
//Bstr bstr = *_key; //local Bstr: multiple delete
|
||||
while (!p->isLeaf())
|
||||
{
|
||||
if (ifmodify)
|
||||
|
@ -277,7 +306,7 @@ SITree::find(const Bstr* _key, int* _store, bool ifmodify)
|
|||
//for(i = 0; i < j; ++i) //BETTER(Binary-Search)
|
||||
//if(bstr < *(p->getKey(i)))
|
||||
//break;
|
||||
i = p->searchKey_less(bstr);
|
||||
i = p->searchKey_less(_str, _len);
|
||||
|
||||
p = p->getChild(i);
|
||||
this->prepare(p);
|
||||
|
@ -287,13 +316,15 @@ SITree::find(const Bstr* _key, int* _store, bool ifmodify)
|
|||
//for(i = 0; i < j; ++i)
|
||||
//if(bstr <= *(p->getKey(i)))
|
||||
//break;
|
||||
i = p->searchKey_lessEqual(bstr);
|
||||
i = p->searchKey_lessEqual(_str, _len);
|
||||
|
||||
if (i == j)
|
||||
*_store = -1; //Not Found
|
||||
else
|
||||
*_store = i;
|
||||
bstr.clear();
|
||||
|
||||
//bstr.clear();
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
|
@ -312,24 +343,25 @@ SITree::remove(const char* _str, unsigned _len)
|
|||
printf("error in SITree-remove: empty string\n");
|
||||
return false;
|
||||
}
|
||||
this->CopyToTransfer(_str, _len, 1);
|
||||
//this->CopyToTransfer(_str, _len, 1);
|
||||
|
||||
request = 0;
|
||||
const Bstr* _key = &transfer[1];
|
||||
//const Bstr* _key = &transfer[1];
|
||||
SINode* ret;
|
||||
if (this->root == NULL) //tree is empty
|
||||
return false;
|
||||
|
||||
SINode* p = this->root;
|
||||
SINode* q;
|
||||
int i, j;
|
||||
Bstr bstr = *_key;
|
||||
//Bstr bstr = *_key;
|
||||
while (!p->isLeaf())
|
||||
{
|
||||
j = p->getNum();
|
||||
//for(i = 0; i < j; ++i)
|
||||
//if(bstr < *(p->getKey(i)))
|
||||
//break;
|
||||
i = p->searchKey_less(bstr);
|
||||
i = p->searchKey_less(_str, _len);
|
||||
|
||||
q = p->getChild(i);
|
||||
this->prepare(q);
|
||||
|
@ -343,6 +375,7 @@ SITree::remove(const char* _str, unsigned _len)
|
|||
if (ret != NULL)
|
||||
this->TSM->updateHeap(ret, 0, true);//non-sense node
|
||||
this->TSM->updateHeap(q, q->getRank(), true);
|
||||
|
||||
if (q->isLeaf())
|
||||
{
|
||||
if (q->getPrev() == NULL)
|
||||
|
@ -350,6 +383,7 @@ SITree::remove(const char* _str, unsigned _len)
|
|||
if (q->getNext() == NULL)
|
||||
this->leaves_tail = q;
|
||||
}
|
||||
|
||||
if (p->getNum() == 0) //root shrinks
|
||||
{
|
||||
//this->leaves_head = q;
|
||||
|
@ -365,7 +399,7 @@ SITree::remove(const char* _str, unsigned _len)
|
|||
}
|
||||
|
||||
bool flag = false;
|
||||
i = p->searchKey_equal(bstr);
|
||||
i = p->searchKey_equal(_str, _len);
|
||||
//WARN+NOTICE:here must check, because the key to remove maybe not exist
|
||||
if (i != (int)p->getNum())
|
||||
{
|
||||
|
@ -386,7 +420,8 @@ SITree::remove(const char* _str, unsigned _len)
|
|||
}
|
||||
|
||||
this->TSM->request(request);
|
||||
bstr.clear();
|
||||
//bstr.clear();
|
||||
|
||||
return flag; //i == j, not found
|
||||
}
|
||||
|
||||
|
@ -495,4 +530,5 @@ SITree::print(string s)
|
|||
}
|
||||
else;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:44
|
||||
# Description: struct and interface of the B+ tree
|
||||
# Description: string2ID, including entity2id, literal2id, predicate2id
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _KVSTORE_SITREE_SITREE_H
|
||||
|
@ -21,7 +21,7 @@
|
|||
class SITree
|
||||
{
|
||||
private:
|
||||
unsigned int height; //0 indicates an empty tree
|
||||
unsigned height; //0 indicates an empty tree
|
||||
SINode* root;
|
||||
SINode* leaves_head; //the head of LeafNode-list
|
||||
SINode* leaves_tail; //the tail of LeafNode-list
|
||||
|
@ -36,13 +36,19 @@ private:
|
|||
//so lock is a must. Add lock to transfer is better than to add
|
||||
//lock to every key/value. However, modify requires a lock for a
|
||||
//key/value, and multiple search for different keys are ok!!!
|
||||
Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char*
|
||||
unsigned transfer_size[3];
|
||||
//Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char*
|
||||
//unsigned transfer_size[3];
|
||||
|
||||
//TODO: in all B+ trees, updat eoperation should lock the whole tree, while search operations not
|
||||
//However, the transfer bstr maybe cause the parallism error!!!!
|
||||
//Why we need the transfer? It is ok to pass the original string pointer to return
|
||||
//A problem is that before the caller ends, the tree can not be modified(so a read-writ elock is required)
|
||||
|
||||
std::string storepath;
|
||||
std::string filename; //ok for user to change
|
||||
/* some private functions */
|
||||
std::string getFilePath(); //in UNIX system
|
||||
void CopyToTransfer(const char* _str, unsigned _len, unsigned _index);
|
||||
//void CopyToTransfer(const char* _str, unsigned _len, unsigned _index);
|
||||
void release(SINode* _np) const;
|
||||
|
||||
//tree's operations should be atom(if read nodes)
|
||||
|
@ -54,14 +60,15 @@ private:
|
|||
public:
|
||||
SITree(); //always need to initial transfer
|
||||
SITree(std::string _storepath, std::string _filename, std::string _mode, unsigned long long _buffer_size);
|
||||
unsigned int getHeight() const;
|
||||
unsigned getHeight() const;
|
||||
void setHeight(unsigned _h);
|
||||
SINode* getRoot() const;
|
||||
//insert, search, remove, set
|
||||
bool search(const char* _str, unsigned _len, int* _val);
|
||||
bool insert(const char* _str, unsigned _len, int _val);
|
||||
bool modify(const char* _str, unsigned _len, int _val);
|
||||
bool search(const char* _str, unsigned _len, unsigned* _val);
|
||||
bool insert(char* _str, unsigned _len, unsigned _val);
|
||||
bool modify(const char* _str, unsigned _len, unsigned _val);
|
||||
SINode* find(const Bstr* _key, int* store, bool ifmodify);
|
||||
SINode* find(const char* _key, unsigned _len, int* store, bool ifmodify);
|
||||
bool remove(const char* _str, unsigned _len);
|
||||
bool save();
|
||||
~SITree();
|
||||
|
@ -71,4 +78,5 @@ public:
|
|||
//(problem range between two extremes: not-modified, totally-modified)
|
||||
//After saved, it's ok to continue operations on tree!
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -183,4 +183,5 @@ SIHeap::print(string s)
|
|||
{
|
||||
#ifdef DEBUG_KVSTORE
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -38,4 +38,5 @@ public:
|
|||
void print(std::string s); //DEBUG
|
||||
};
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -75,6 +75,7 @@ SIIntlNode::setChild(SINode* _child, int _index)
|
|||
return false;
|
||||
}
|
||||
this->childs[_index] = _child;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -91,6 +92,7 @@ SIIntlNode::addChild(SINode* _child, int _index)
|
|||
for (i = num; i >= _index; --i) //DEBUG: right bounder!!!
|
||||
childs[i + 1] = childs[i];
|
||||
childs[_index] = _child;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -106,6 +108,7 @@ SIIntlNode::subChild(int _index)
|
|||
int i;
|
||||
for (i = _index; i < num; ++i) //DEBUG: right bounder!!!
|
||||
childs[i] = childs[i + 1];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -115,6 +118,7 @@ SIIntlNode::getSize() const
|
|||
unsigned sum = INTL_SIZE, num = this->getNum(), i;
|
||||
for (i = 0; i < num; ++i)
|
||||
sum += keys[i].getLen();
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
@ -140,6 +144,7 @@ SIIntlNode::split(SINode* _father, int _index)
|
|||
_father->setDirty();
|
||||
p->setDirty();
|
||||
this->setDirty();
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
|
@ -235,6 +240,7 @@ SIIntlNode::coalesce(SINode* _father, int _index)
|
|||
print("error in coalesce: Invalid case!");
|
||||
//printf("error in coalesce: Invalid case!");
|
||||
}
|
||||
|
||||
_father->setDirty();
|
||||
p->setDirty();
|
||||
this->setDirty();
|
||||
|
@ -290,4 +296,5 @@ SIIntlNode::print(string s)
|
|||
}
|
||||
else;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -45,4 +45,5 @@ public:
|
|||
*/
|
||||
};
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ using namespace std;
|
|||
void
|
||||
SILeafNode::AllocValues()
|
||||
{
|
||||
values = new int[MAX_KEY_NUM];
|
||||
values = new unsigned[MAX_KEY_NUM];
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -76,7 +76,7 @@ SILeafNode::getNext() const
|
|||
return next;
|
||||
}
|
||||
|
||||
int
|
||||
unsigned
|
||||
SILeafNode::getValue(int _index) const
|
||||
{
|
||||
int num = this->getNum();
|
||||
|
@ -90,7 +90,7 @@ SILeafNode::getValue(int _index) const
|
|||
}
|
||||
|
||||
bool
|
||||
SILeafNode::setValue(int _val, int _index)
|
||||
SILeafNode::setValue(unsigned _val, int _index)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index >= num)
|
||||
|
@ -99,11 +99,12 @@ SILeafNode::setValue(int _val, int _index)
|
|||
return false;
|
||||
}
|
||||
this->values[_index] = _val;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
SILeafNode::addValue(int _val, int _index)
|
||||
SILeafNode::addValue(unsigned _val, int _index)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index > num)
|
||||
|
@ -115,6 +116,7 @@ SILeafNode::addValue(int _val, int _index)
|
|||
for (i = num - 1; i >= _index; --i)
|
||||
this->values[i + 1] = this->values[i];
|
||||
this->values[_index] = _val;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -130,6 +132,7 @@ SILeafNode::subValue(int _index)
|
|||
int i;
|
||||
for (i = _index; i < num - 1; ++i)
|
||||
this->values[i] = this->values[i + 1];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -180,6 +183,7 @@ SILeafNode::split(SINode* _father, int _index)
|
|||
_father->setDirty();
|
||||
p->setDirty();
|
||||
this->setDirty();
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
|
@ -275,6 +279,7 @@ SILeafNode::coalesce(SINode* _father, int _index)
|
|||
print("error in coalesce: Invalid case!");
|
||||
//printf("error in coalesce: Invalid case!");
|
||||
}
|
||||
|
||||
_father->setDirty();
|
||||
p->setDirty();
|
||||
this->setDirty();
|
||||
|
@ -362,4 +367,5 @@ SILeafNode::print(string s)
|
|||
}
|
||||
else;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ class SILeafNode : public SINode
|
|||
protected:
|
||||
SINode* prev; //LeafNode
|
||||
SINode* next;
|
||||
int* values;
|
||||
unsigned* values;
|
||||
void AllocValues();
|
||||
//void FreeValues();
|
||||
public:
|
||||
|
@ -27,18 +27,24 @@ public:
|
|||
void Normal();
|
||||
SINode* getPrev() const;
|
||||
SINode* getNext() const;
|
||||
int getValue(int _index) const;
|
||||
bool setValue(int _val, int _index);
|
||||
bool addValue(int _val, int _index);
|
||||
unsigned getValue(int _index) const;
|
||||
bool setValue(unsigned _val, int _index);
|
||||
bool addValue(unsigned _val, int _index);
|
||||
|
||||
bool subValue(int _index);
|
||||
|
||||
void setPrev(SINode* _prev);
|
||||
void setNext(SINode* _next);
|
||||
|
||||
unsigned getSize() const;
|
||||
|
||||
SINode* split(SINode* _father, int _index);
|
||||
SINode* coalesce(SINode* _father, int _index);
|
||||
|
||||
void release();
|
||||
~SILeafNode();
|
||||
void print(std::string s); //DEBUG
|
||||
|
||||
/*non-sense virtual function
|
||||
Node* getChild(int _index) const;
|
||||
bool addChild(Node* _child, int _index);
|
||||
|
@ -47,4 +53,5 @@ public:
|
|||
};
|
||||
//BETTER: prev isn't a must, and reverse-range can be achieved using recursive-next
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -251,6 +251,28 @@ SINode::addKey(const Bstr* _key, int _index, bool ifcopy)
|
|||
keys[_index].copy(_key);
|
||||
else
|
||||
keys[_index] = *_key;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
SINode::addKey(char* _str, unsigned _len, int _index, bool ifcopy)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if (_index < 0 || _index > num)
|
||||
{
|
||||
print(string("error in addKey: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
int i;
|
||||
//NOTICE: if num == MAX_KEY_NUM, will visit keys[MAX_KEY_NUM], not legal!!!
|
||||
//however. tree operations ensure that: when node is full, not add but split first!
|
||||
for (i = num - 1; i >= _index; --i)
|
||||
keys[i + 1] = keys[i];
|
||||
|
||||
keys[_index].setStr(_str);
|
||||
keys[_index].setLen(_len);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -268,6 +290,7 @@ SINode::subKey(int _index, bool ifdel)
|
|||
keys[_index].release();
|
||||
for (i = _index; i < num - 1; ++i)
|
||||
keys[i] = keys[i + 1];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -294,6 +317,7 @@ SINode::searchKey_less(const Bstr& _bstr) const
|
|||
low = mid + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return low;
|
||||
}
|
||||
|
||||
|
@ -325,4 +349,57 @@ SINode::searchKey_lessEqual(const Bstr& _bstr) const
|
|||
return ret - 1;
|
||||
else
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
SINode::searchKey_less(const char* _str, unsigned _len) const
|
||||
{
|
||||
int num = this->getNum();
|
||||
|
||||
int low = 0, high = num - 1, mid = -1;
|
||||
while (low <= high)
|
||||
{
|
||||
mid = (low + high) / 2;
|
||||
//if (this->keys[mid] > _bstr)
|
||||
if (Util::compare(this->keys[mid].getStr(), this->keys[mid].getLen(), _str, _len) > 0)
|
||||
{
|
||||
if (low == mid)
|
||||
break;
|
||||
high = mid;
|
||||
}
|
||||
else
|
||||
{
|
||||
low = mid + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return low;
|
||||
}
|
||||
|
||||
int
|
||||
SINode::searchKey_equal(const char* _str, unsigned _len) const
|
||||
{
|
||||
int num = this->getNum();
|
||||
//for(i = 0; i < num; ++i)
|
||||
// if(bstr == *(p->getKey(i)))
|
||||
// {
|
||||
|
||||
int ret = this->searchKey_less(_str, _len);
|
||||
//if (ret > 0 && this->keys[ret - 1] == _bstr)
|
||||
if (ret > 0 && Util::compare(this->keys[ret-1].getStr(), this->keys[ret-1].getLen(), _str, _len) == 0)
|
||||
return ret - 1;
|
||||
else
|
||||
return num;
|
||||
}
|
||||
|
||||
int
|
||||
SINode::searchKey_lessEqual(const char* _str, unsigned _len) const
|
||||
{
|
||||
int ret = this->searchKey_less(_str, _len);
|
||||
//if (ret > 0 && this->keys[ret - 1] == _bstr)
|
||||
if (ret > 0 && Util::compare(this->keys[ret-1].getStr(), this->keys[ret-1].getLen(), _str, _len) == 0)
|
||||
return ret - 1;
|
||||
else
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ public:
|
|||
static const unsigned NF_HT = 0xf00000; //height area in rank
|
||||
static const unsigned NF_KN = 0x07f000; //NOTICE: decided by DEGREE
|
||||
static const unsigned INTL_SIZE = sizeof(Bstr) * MAX_KEY_NUM;
|
||||
static const unsigned LEAF_SIZE = sizeof(int) * MAX_KEY_NUM + INTL_SIZE;
|
||||
static const unsigned LEAF_SIZE = sizeof(unsigned) * MAX_KEY_NUM + INTL_SIZE;
|
||||
protected:
|
||||
unsigned store; //store address, the BLock index
|
||||
unsigned flag; //NF_RK, NF_IL,NF_ID, NF_IV, propety
|
||||
|
@ -64,9 +64,11 @@ public:
|
|||
void setStore(unsigned _store);
|
||||
unsigned getFlag() const;
|
||||
void setFlag(unsigned _flag);
|
||||
|
||||
const Bstr* getKey(int _index) const; //need to check the index
|
||||
bool setKey(const Bstr* _key, int _index, bool ifcopy = false);
|
||||
bool addKey(const Bstr* _key, int _index, bool ifcopy = false);
|
||||
bool addKey(char* _str, unsigned _len, int _index, bool ifcopy = false);
|
||||
bool subKey(int _index, bool ifdel = false);
|
||||
|
||||
//several binary key search utilities
|
||||
|
@ -74,19 +76,26 @@ public:
|
|||
int searchKey_equal(const Bstr& _bstr) const;
|
||||
int searchKey_lessEqual(const Bstr& _bstr) const;
|
||||
|
||||
int searchKey_less(const char* _str, unsigned _len) const;
|
||||
int searchKey_equal(const char* _str, unsigned _len) const;
|
||||
int searchKey_lessEqual(const char* _str, unsigned _len) const;
|
||||
|
||||
//virtual functions: polymorphic
|
||||
//NOTICE: not pure-virtual, not required to be implemented again, can be used now
|
||||
virtual SINode* getChild(int _index) const { return NULL; };
|
||||
virtual bool setChild(SINode* _child, int _index) { return true; };
|
||||
virtual bool addChild(SINode* _child, int _index) { return true; };
|
||||
virtual bool subChild(int _index) { return true; };
|
||||
virtual SINode* getPrev() const { return NULL; };
|
||||
virtual SINode* getNext() const { return NULL; };
|
||||
virtual int getValue(int _index) const { return -1; };
|
||||
virtual bool setValue(int _val, int _index) { return true; };
|
||||
virtual bool addValue(int _val, int _index) { return true; };
|
||||
virtual unsigned getValue(int _index) const { return -1; };
|
||||
virtual bool setValue(unsigned _val, int _index) { return true; };
|
||||
virtual bool addValue(unsigned _val, int _index) { return true; };
|
||||
virtual bool subValue(int _index) { return true; };
|
||||
virtual void setPrev(SINode* _prev) {};
|
||||
virtual void setNext(SINode* _next) {};
|
||||
|
||||
//NOTICE: pure-virtual, must to be implemented again in the sub-class
|
||||
virtual void Virtual() = 0;
|
||||
virtual void Normal() = 0;
|
||||
virtual unsigned getSize() const = 0; //return all memory owned
|
||||
|
@ -111,4 +120,5 @@ public:
|
|||
*to release the whole(pointer is invalid and rebuild problem)
|
||||
*/
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -71,7 +71,8 @@ SIStorage::SIStorage(string& _filepath, string& _mode, unsigned* _height, unsign
|
|||
else //_mode == "open"
|
||||
{
|
||||
//read basic information
|
||||
int rootnum;
|
||||
unsigned rootnum;
|
||||
//int rootnum;
|
||||
char c;
|
||||
fread(this->treeheight, sizeof(unsigned), 1, this->treefp);
|
||||
fread(&rootnum, sizeof(unsigned), 1, this->treefp);
|
||||
|
@ -214,6 +215,7 @@ SIStorage::AllocBlock()
|
|||
unsigned t = p->num;
|
||||
this->freelist->next = p->next;
|
||||
delete p;
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
|
@ -286,10 +288,11 @@ SIStorage::readNode(SINode* _np, long long* _request)
|
|||
if (flag)
|
||||
{
|
||||
//to read all values
|
||||
int tmp = -1;
|
||||
unsigned tmp = INVALID;
|
||||
//int tmp = -1;
|
||||
for (i = 0; i < num; ++i)
|
||||
{
|
||||
fread(&tmp, sizeof(int), 1, treefp);
|
||||
fread(&tmp, sizeof(unsigned), 1, treefp);
|
||||
this->ReadAlign(&next);
|
||||
_np->setValue(tmp, i);
|
||||
}
|
||||
|
@ -300,6 +303,7 @@ SIStorage::readNode(SINode* _np, long long* _request)
|
|||
//_np->setMem();
|
||||
this->updateHeap(_np, _np->getRank(), false);
|
||||
bstr.clear();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -335,6 +339,7 @@ SIStorage::createNode(SINode*& _np) //cretae virtual nodes, not in-mem
|
|||
_np->delDirty();
|
||||
_np->delMem();
|
||||
_np->setStore(Blocknum(ftell(treefp) - 4));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -343,6 +348,7 @@ SIStorage::writeNode(SINode* _np)
|
|||
{
|
||||
if (_np == NULL || !_np->inMem() || (_np->getRank() > 0 && !_np->isDirty()))
|
||||
return false; //not need to write back
|
||||
|
||||
unsigned num = _np->getNum(), i;
|
||||
bool flag = _np->isLeaf(), SpecialBlock = true;
|
||||
/*
|
||||
|
@ -392,12 +398,13 @@ SIStorage::writeNode(SINode* _np)
|
|||
|
||||
if (flag)
|
||||
{
|
||||
int tmp = -1;
|
||||
//int tmp = -1;
|
||||
unsigned tmp = INVALID;
|
||||
//to write all values
|
||||
for (i = 0; i < num; ++i)
|
||||
{
|
||||
tmp = _np->getValue(i);
|
||||
fwrite(&tmp, sizeof(int), 1, treefp);
|
||||
fwrite(&tmp, sizeof(unsigned), 1, treefp);
|
||||
this->WriteAlign(&blocknum, SpecialBlock);
|
||||
}
|
||||
}
|
||||
|
@ -408,6 +415,7 @@ SIStorage::writeNode(SINode* _np)
|
|||
fwrite(&t, sizeof(unsigned), 1, treefp); //the end-block
|
||||
//_np->setFlag(_np->getFlag() & ~Node::NF_ID);
|
||||
_np->delDirty();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -419,7 +427,8 @@ SIStorage::readBstr(Bstr* _bp, unsigned* _next)
|
|||
fread(&len, sizeof(unsigned), 1, this->treefp);
|
||||
this->ReadAlign(_next);
|
||||
//this->request(len);
|
||||
char* s = (char*)malloc(len);
|
||||
//char* s = (char*)malloc(len);
|
||||
char* s = new char[len];
|
||||
_bp->setLen(len);
|
||||
for (i = 0; i + 4 < len; i += 4)
|
||||
{
|
||||
|
@ -437,6 +446,7 @@ SIStorage::readBstr(Bstr* _bp, unsigned* _next)
|
|||
fseek(treefp, j, SEEK_CUR);
|
||||
this->ReadAlign(_next);
|
||||
_bp->setStr(s);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -462,6 +472,7 @@ SIStorage::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock)
|
|||
j = 4 - j;
|
||||
fseek(treefp, j, SEEK_CUR);
|
||||
this->WriteAlign(_curnum, _SpecialBlock);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -552,6 +563,7 @@ SIStorage::writeTree(SINode* _root) //write the whole tree back and close treefp
|
|||
bp = bp->next;
|
||||
}
|
||||
//fclose(this->treefp);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -586,6 +598,7 @@ SIStorage::request(long long _needmem) //aligned to byte
|
|||
return false;;
|
||||
}
|
||||
this->freemem -= _needmem;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -614,6 +627,7 @@ SIStorage::handler(unsigned long long _needmem) //>0
|
|||
else
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -657,3 +671,4 @@ SIStorage::print(string s)
|
|||
fputs("\n", Util::debug_kvstore);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -13,6 +13,14 @@
|
|||
#include "../node/SILeafNode.h"
|
||||
#include "../heap/SIHeap.h"
|
||||
|
||||
//TODO: whether to use heap or not, is a big question
|
||||
//For single-query application, it seems that LRU list like VSTree is a better choice(no much cost in the buffer itself)
|
||||
//But in multiple-queries case, things maybe different
|
||||
//BETTER:
|
||||
//add a heap position in node, to speed up the node-pointer searching
|
||||
//lower the update times of heap, if the size is 128M, then each update is 27 at most
|
||||
//if not update in time, then the heap maybe not be a heap, then why do we use heap? why not a simple array?
|
||||
|
||||
//It controls read, write, swap
|
||||
class SIStorage
|
||||
{
|
||||
|
@ -70,3 +78,4 @@ public:
|
|||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
//headers wrapper for all kinds of BPlusTree
|
||||
|
||||
#include "ISTree/ISTree.h"
|
||||
#include "SITree/SITree.h"
|
||||
#include "SITree/SITree.h"
|
||||
#include "IVTree/IVTree.h"
|
||||
|
|
|
@ -7,9 +7,9 @@ int
|
|||
main(int argc, char * argv[])
|
||||
{
|
||||
//chdir(dirname(argv[0]));
|
||||
#ifdef DEBUG
|
||||
//#ifdef DEBUG
|
||||
Util util;
|
||||
#endif
|
||||
//#endif
|
||||
|
||||
cout << "argc: " << argc << "\t";
|
||||
cout << "DB_store:" << argv[1] << "\t";
|
||||
|
|
|
@ -17,9 +17,9 @@ int
|
|||
main(int argc, char * argv[])
|
||||
{
|
||||
//chdir(dirname(argv[0]));
|
||||
#ifdef DEBUG
|
||||
//#ifdef DEBUG
|
||||
Util util;
|
||||
#endif
|
||||
//#endif
|
||||
if(argc < 3) //./gbuild
|
||||
{
|
||||
//output help info here
|
||||
|
|
|
@ -12,9 +12,9 @@
|
|||
int main(int argc, char * argv[])
|
||||
{
|
||||
//chdir(dirname(argv[0]));
|
||||
#ifdef DEBUG
|
||||
//#ifdef DEBUG
|
||||
Util util;
|
||||
#endif
|
||||
//#endif
|
||||
|
||||
std::string ip = Socket::DEFAULT_SERVER_IP;
|
||||
unsigned short port = Socket::DEFAULT_CONNECT_PORT;
|
||||
|
@ -38,4 +38,4 @@ int main(int argc, char * argv[])
|
|||
client.run();
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -122,9 +122,9 @@ main(int argc, char **argv)
|
|||
//NOTICE:this is needed to ensure the file path is the work path
|
||||
//chdir(dirname(argv[0]));
|
||||
//NOTICE:this is needed to set several debug files
|
||||
#ifdef DEBUG
|
||||
//#ifdef DEBUG
|
||||
Util util;
|
||||
#endif
|
||||
//#endif
|
||||
|
||||
db_home = Util::global_config["db_home"];
|
||||
|
||||
|
|
|
@ -38,9 +38,9 @@ int
|
|||
main(int argc, char * argv[])
|
||||
{
|
||||
//chdir(dirname(argv[0]));
|
||||
#ifdef DEBUG
|
||||
//#ifdef DEBUG
|
||||
Util util;
|
||||
#endif
|
||||
//#endif
|
||||
|
||||
if (argc == 1 || strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0)
|
||||
{
|
||||
|
|
115
Main/gserver.cpp
115
Main/gserver.cpp
|
@ -11,9 +11,9 @@
|
|||
|
||||
using namespace std;
|
||||
|
||||
#define GSERVER_PORT_FILE "bin/.gserver_port"
|
||||
#define GSERVER_PORT_SWAP "bin/.gserver_port.swap"
|
||||
#define GSERVER_LOG "logs/gserver.log"
|
||||
//#define GSERVER_PORT_FILE "bin/.gserver_port"
|
||||
//#define GSERVER_PORT_SWAP "bin/.gserver_port.swap"
|
||||
//#define GSERVER_LOG "logs/gserver.log"
|
||||
|
||||
bool isOnlyProcess(const char* argv0);
|
||||
void checkSwap();
|
||||
|
@ -22,9 +22,9 @@ bool stopServer();
|
|||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
#ifdef DEBUG
|
||||
//#ifdef DEBUG
|
||||
Util util;
|
||||
#endif
|
||||
//#endif
|
||||
|
||||
string mode;
|
||||
if (argc == 1) {
|
||||
|
@ -61,7 +61,7 @@ int main(int argc, char* argv[])
|
|||
unsigned short port = Socket::DEFAULT_CONNECT_PORT;
|
||||
if (argc == 3) {
|
||||
if (!Util::isValidPort(string(argv[2]))) {
|
||||
cout << "Invalid port: " << argv[2] << endl;
|
||||
cerr << "Invalid port: " << argv[2] << endl;
|
||||
return -1;
|
||||
}
|
||||
else {
|
||||
|
@ -70,9 +70,9 @@ int main(int argc, char* argv[])
|
|||
}
|
||||
}
|
||||
if (!isOnlyProcess(argv[0])) {
|
||||
ofstream out(GSERVER_PORT_SWAP, ios::out);
|
||||
ofstream out(Util::gserver_port_swap.c_str());
|
||||
if (!out) {
|
||||
cout << "Failed to change port!" << endl;
|
||||
cerr << "Failed to change port!" << endl;
|
||||
return -1;
|
||||
}
|
||||
out << port;
|
||||
|
@ -80,9 +80,9 @@ int main(int argc, char* argv[])
|
|||
cout << "Port will be changed to " << port << " after the current server stops or restarts." << endl;
|
||||
return 0;
|
||||
}
|
||||
ofstream out(GSERVER_PORT_FILE, ios::out);
|
||||
ofstream out(Util::gserver_port_file.c_str());
|
||||
if (!out) {
|
||||
cout << "Failed to change port!" << endl;
|
||||
cerr << "Failed to change port!" << endl;
|
||||
return -1;
|
||||
}
|
||||
out << port;
|
||||
|
@ -93,10 +93,15 @@ int main(int argc, char* argv[])
|
|||
|
||||
if (mode == "-s" || mode == "--start") {
|
||||
if (!isOnlyProcess(argv[0])) {
|
||||
cout << "gServer already running!" << endl;
|
||||
cerr << "gServer already running!" << endl;
|
||||
return -1;
|
||||
}
|
||||
if (startServer()) {
|
||||
sleep(1);
|
||||
if (isOnlyProcess(argv[0])) {
|
||||
cerr << "Server stopped unexpectedly. Check for port conflicts!" << endl;
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
|
@ -106,7 +111,7 @@ int main(int argc, char* argv[])
|
|||
|
||||
if (mode == "-t" || mode == "--stop") {
|
||||
if (isOnlyProcess(argv[0])) {
|
||||
cout << "gServer not running!" << endl;
|
||||
cerr << "gServer not running!" << endl;
|
||||
return -1;
|
||||
}
|
||||
if (stopServer()) {
|
||||
|
@ -119,7 +124,7 @@ int main(int argc, char* argv[])
|
|||
|
||||
if (mode == "-r" || mode == "--restart") {
|
||||
if (isOnlyProcess(argv[0])) {
|
||||
cout << "gServer not running!" << endl;
|
||||
cerr << "gServer not running!" << endl;
|
||||
return -1;
|
||||
}
|
||||
if (!stopServer()) {
|
||||
|
@ -133,14 +138,14 @@ int main(int argc, char* argv[])
|
|||
|
||||
if (mode == "-P" || mode == "--printport") {
|
||||
unsigned short port = Socket::DEFAULT_CONNECT_PORT;
|
||||
ifstream in(GSERVER_PORT_FILE);
|
||||
ifstream in(Util::gserver_port_file.c_str());
|
||||
if (in) {
|
||||
in >> port;
|
||||
in.close();
|
||||
}
|
||||
cout << "Current connection port is " << port << '.' << endl;
|
||||
unsigned short portSwap = 0;
|
||||
ifstream inSwap(GSERVER_PORT_SWAP);
|
||||
ifstream inSwap(Util::gserver_port_swap.c_str());
|
||||
if (inSwap) {
|
||||
inSwap >> portSwap;
|
||||
inSwap.close();
|
||||
|
@ -153,14 +158,14 @@ int main(int argc, char* argv[])
|
|||
|
||||
if (mode == "-k" || mode == "--kill") {
|
||||
if (isOnlyProcess(argv[0])) {
|
||||
cout << "No process to kill!" << endl;
|
||||
cerr << "No process to kill!" << endl;
|
||||
return -1;
|
||||
}
|
||||
execl("/usr/bin/killall", "killall", Util::getExactPath(argv[0]).c_str(), NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
cout << "Invalid arguments! Input \"bin/gserver -h\" for help." << endl;
|
||||
cerr << "Invalid arguments! Type \"bin/gserver -h\" for help." << endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -169,38 +174,38 @@ bool isOnlyProcess(const char* argv0) {
|
|||
}
|
||||
|
||||
void checkSwap() {
|
||||
if (access(GSERVER_PORT_SWAP, 00) != 0) {
|
||||
if (access(Util::gserver_port_swap.c_str(), 00) != 0) {
|
||||
return;
|
||||
}
|
||||
ifstream in(GSERVER_PORT_SWAP, ios::in);
|
||||
ifstream in(Util::gserver_port_swap.c_str());
|
||||
if (!in) {
|
||||
cout << "Failed in checkSwap(), port may not be changed." << endl;
|
||||
cerr << "Failed in checkSwap(), port may not be changed." << endl;
|
||||
return;
|
||||
}
|
||||
unsigned short port;
|
||||
in >> port;
|
||||
in.close();
|
||||
ofstream out(GSERVER_PORT_FILE, ios::out);
|
||||
ofstream out(Util::gserver_port_file.c_str());
|
||||
if (!out) {
|
||||
cout << "Failed in checkSwap(), port may not be changed." << endl;
|
||||
cerr << "Failed in checkSwap(), port may not be changed." << endl;
|
||||
return;
|
||||
}
|
||||
out << port;
|
||||
out.close();
|
||||
chmod(GSERVER_PORT_FILE, 0644);
|
||||
string cmd = string("rm ") + GSERVER_PORT_SWAP;
|
||||
chmod(Util::gserver_port_file.c_str(), 0644);
|
||||
string cmd = string("rm ") + Util::gserver_port_swap;
|
||||
system(cmd.c_str());
|
||||
}
|
||||
|
||||
bool startServer() {
|
||||
unsigned short port = Socket::DEFAULT_CONNECT_PORT;
|
||||
ifstream in(GSERVER_PORT_FILE, ios::in);
|
||||
ifstream in(Util::gserver_port_file.c_str());
|
||||
if (!in) {
|
||||
ofstream out(GSERVER_PORT_FILE, ios::out);
|
||||
ofstream out(Util::gserver_port_file.c_str());
|
||||
if (out) {
|
||||
out << port;
|
||||
out.close();
|
||||
chmod(GSERVER_PORT_FILE, 0644);
|
||||
chmod(Util::gserver_port_file.c_str(), 0644);
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
@ -215,47 +220,75 @@ bool startServer() {
|
|||
if (!Util::dir_exist("logs")) {
|
||||
Util::create_dir("logs");
|
||||
}
|
||||
freopen(GSERVER_LOG, "a", stdout);
|
||||
freopen(GSERVER_LOG, "a", stderr);
|
||||
Server server(port);
|
||||
if (!server.createConnection()) {
|
||||
cout << Util::getTimeString() << "Failed to create connection at port " << port << '.' << endl;
|
||||
return false;
|
||||
freopen(Util::gserver_log.c_str(), "a", stdout);
|
||||
freopen(Util::gserver_log.c_str(), "a", stderr);
|
||||
|
||||
int status;
|
||||
|
||||
while (true) {
|
||||
fpid = fork();
|
||||
|
||||
// child, main process
|
||||
if (fpid == 0) {
|
||||
Server server(port);
|
||||
if (!server.createConnection()) {
|
||||
cerr << Util::getTimeString() << "Failed to create connection at port " << port << '.' << endl;
|
||||
return false;
|
||||
}
|
||||
cout << Util::getTimeString() << "Server started at port " << port << '.' << endl;
|
||||
server.listen();
|
||||
exit(0);
|
||||
return true;
|
||||
}
|
||||
|
||||
// parent, deamon process
|
||||
else if (fpid > 0) {
|
||||
waitpid(fpid, &status, 0);
|
||||
if (WIFEXITED(status)) {
|
||||
exit(0);
|
||||
return true;
|
||||
}
|
||||
cerr << Util::getTimeString() << "Server stopped abnormally, restarting server..." << endl;
|
||||
}
|
||||
|
||||
// fork failure
|
||||
else {
|
||||
cerr << Util::getTimeString() << "Failed to start server: deamon fork failure." << endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
cout << Util::getTimeString() << "Server started at port " << port << '.' << endl;
|
||||
server.listen();
|
||||
exit(0);
|
||||
return true;
|
||||
}
|
||||
|
||||
// parent
|
||||
else if (fpid > 0) {
|
||||
cout << "Server started at port " << port << '.' << endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
// fork failure
|
||||
else {
|
||||
cout << "Failed to start server at port " << port << '.' << endl;
|
||||
cerr << "Failed to start server at port " << port << '.' << endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool stopServer() {
|
||||
unsigned short port = Socket::DEFAULT_CONNECT_PORT;
|
||||
ifstream in(GSERVER_PORT_FILE, ios::in);
|
||||
ifstream in(Util::gserver_port_file.c_str());
|
||||
if (in) {
|
||||
in >> port;
|
||||
in.close();
|
||||
}
|
||||
Socket socket;
|
||||
if (!socket.create() || !socket.connect("127.0.0.1", port) || !socket.send("stop")) {
|
||||
cout << "Failed to stop server at port " << port << '.' << endl;
|
||||
cerr << "Failed to stop server at port " << port << '.' << endl;
|
||||
return false;
|
||||
}
|
||||
string recv_msg;
|
||||
socket.recv(recv_msg);
|
||||
socket.close();
|
||||
if (recv_msg != "server stopped.") {
|
||||
cout << "Failed to stop server at port " << port << '.' << endl;
|
||||
cerr << "Failed to stop server at port " << port << '.' << endl;
|
||||
return false;
|
||||
}
|
||||
cout << "Server stopped at port " << port << '.' << endl;
|
||||
|
|
|
@ -12,9 +12,9 @@ int
|
|||
main(int argc, char * argv[])
|
||||
{
|
||||
//chdir(dirname(argv[0]));
|
||||
#ifdef DEBUG
|
||||
//#ifdef DEBUG
|
||||
Util util;
|
||||
#endif
|
||||
//#endif
|
||||
|
||||
cout << "argc: " << argc << "\t";
|
||||
cout << "DB_store:" << argv[1] << "\t";
|
||||
|
|
58
NOTES.md
58
NOTES.md
|
@ -7,14 +7,26 @@
|
|||
在使用gserver时,不能在数据库没有unload时再用gbuild或其他命令修改数据库,仅限于C/S模式
|
||||
将IRC聊天放到gstore文档上,freenode #gStore
|
||||
|
||||
storage中大量使用long类型,文件大小也可能达到64G,最好在64位机器上运行。
|
||||
|
||||
# 推广
|
||||
|
||||
必须建立一个官方网站,可以展示下团队、demo,需要建立社区/论坛并维护
|
||||
另外要有桌面应用或者网页应用,以可视化的方式操作数据库,类似virtuoso和neo4j那种
|
||||
server 118.89.115.42 gstore-pku.com
|
||||
|
||||
自己的网站可以用实验室的服务器,gstore网站最好用云服务,图个稳定
|
||||
但用实验室主机,备案时是否更麻烦?得以企业为单位,而且解析是否更麻烦?
|
||||
gstore网站中的demo应用的主体可以放在实验室主机上,至少是gstore数据库应抽离出来,但若实验室主机不开外网,应如何而配置代理?
|
||||
demo应用全部外链,具体服务放在实验室公开的主机上,通过ip:port连接
|
||||
考虑使用hbase,结合云平台
|
||||
|
||||
---
|
||||
|
||||
论文:新的join策略,特殊的子图同态问题,如何选择顺序
|
||||
动态估价的评估函数要考虑方向性,因为可能含literal变量,对应的候选集大小不可靠,只能单向。
|
||||
但每条边总是含subject的,所以每条边总是可以备选的。不过问题是literal变量应该先做还是后做
|
||||
|
||||
另一种过滤方式:直接用key-value索引,比如?x-?y-constant,可能就比较适合,如果用vstree先过滤出的候选集太大的话
|
||||
考虑对线状查询或星形查询做特殊处理,这里的形状仅指需要join的部分
|
||||
比如?x-?y-constant, why not just use key-value to generate sequentially
|
||||
但这种没有考虑到更远的约束,可能导致不少中间解是无效的,实际上线状图的拼接顺序也不一定是从两端开始
|
||||
|
||||
---
|
||||
|
||||
|
@ -79,13 +91,18 @@ http://blog.csdn.net/infoworld/article/details/8670951
|
|||
要在单机支持到10亿triple,最坏情况下最多有20亿entity和20亿literal,目前的编号方式是不行的(int扩展为unsigned)
|
||||
最好在单机100G内存上支持起freebase(2.5B triples)这个规模的数据集,就像jena和virtuoso一样,慢不要紧
|
||||
|
||||
同时将ID的编码改为unsigned,无效标志-1改为最大值的宏, triple数目的类型也要改为unsigned
|
||||
注意pre的ID还可以为-2,或者对于pre仍然用int,或者改函数的返回值为long long (还有一些没有用-1而是>=0)
|
||||
type分支中query过程可能还有问题,需要修改Query/里面的类型,另外stringindex中也要修改,分界线已经是20亿且非法不再是-1
|
||||
remove signature.binary, 合并两个分支type value
|
||||
vstree在build和query时可以用不同大小的缓存,来加速build过程
|
||||
---
|
||||
将B+tree中叶节点的大的value分离出来,新建一套缓存,使用block机制,标记length为0表示未读取
|
||||
类型bstr的length问题也需要解决
|
||||
类型bstr的length问题也需要解决(新建Istr类型)
|
||||
如果把类型直接改成long long,空间开销一下子就上升了一倍
|
||||
解决方法:对于ID2string,仍然用char*和unsigned,但对于s2xx p2xx o2xx,应该用long long*和unsigned来表示,这样最高可支持到40亿triple
|
||||
注意:在B+树中是以long long*的方式存,但读出后应该全部换成unsigned*和unsigned搭配的方式(最长支持20亿个po对)
|
||||
UBSTR: 类型bstr的length问题也需要解决 如果把类型直接改成long long,空间开销一下子就上升了一倍
|
||||
解决方法:对于ID2string,仍然用char*和unsigned,但对于s2xx p2xx o2xx,应该用unsigned long long*和unsigned来表示,这样最高可支持到40亿triple
|
||||
(其实这个不是特别必要,很少会有这种情况,我们处理的triple数目一般限制在20亿,就算是type这种边,po对数也就是跟entity数目持平,很难达到5亿)
|
||||
---
|
||||
那么是否可以调整entity与literal的分界线,如果entity数目一般都比literal数目多的话
|
||||
直接把literal从大到小编号,可在ID模块中指定顺序,这样每个Datbase模块应该有自己独特的分界线,其他模块用时也需要注意
|
||||
|
@ -465,6 +482,8 @@ build db error if triple num > 500M
|
|||
|
||||
# BETTER
|
||||
|
||||
#### 添加数据访问层,数据范式和生成数据访问的源码
|
||||
|
||||
#### 在BasicQuery.cpp中的encodeBasicQuery函数中发现有pre_id==-1时就可以直接中止查询,返回空值!
|
||||
|
||||
#### 将KVstore模块中在堆中寻找Node*的操作改为用treap实现(或多存指针避开搜索?)
|
||||
|
@ -515,6 +534,8 @@ http://www.oschina.net/question/188977_58777
|
|||
|
||||
# ADVICE
|
||||
|
||||
#### 考虑利用hdfs或者hbase,这样就可以利用各公司已有的数据库系统,但这是否会和已有的内外存交换冲突?
|
||||
|
||||
#### 数值型查询 实数域 [-bound, bound] 类型很难匹配,有必要单独编码么? 数据集中不应有范围 Query中编码过滤后还需验证
|
||||
x>a, x<b, >=, <=, a<x<b, x=c
|
||||
vstree中遇到"1237"^^<...integer>时不直接取字符串,而是转换为数值并编码
|
||||
|
@ -599,3 +620,26 @@ Consider the use of Bloom Filter and FM-sketches
|
|||
|
||||
http://www.hprd.org/download/
|
||||
|
||||
|
||||
|
||||
## GIT USAGE
|
||||
|
||||
http://www.ruanyifeng.com/blog/2014/06/git_remote.html
|
||||
https://git-scm.com/book/zh/v1/%E8%B5%B7%E6%AD%A5-%E5%88%9D%E6%AC%A1%E8%BF%90%E8%A1%8C-Git-%E5%89%8D%E7%9A%84%E9%85%8D%E7%BD%AE
|
||||
|
||||
#### how to commit a message
|
||||
|
||||
package.json
|
||||
http://www.json.cn/
|
||||
https://www.oschina.net/news/69705/git-commit-message-and-changelog-guide
|
||||
https://sanwen8.cn/p/44eCof7.html
|
||||
|
||||
1. commit one by one, a commit just do one thing
|
||||
|
||||
2. place a empty line between head and body, body and footer
|
||||
|
||||
3. the first letter of header should be in uppercase, and the header should not be too long, just a wonderful summary
|
||||
FIX: ... ADD:... REF:... 代码重构 SUB:...
|
||||
|
||||
4. each line should not be too long, add your real name and the influence in footer(maybe cause the code struct to change)
|
||||
|
||||
|
|
|
@ -102,7 +102,7 @@ BasicQuery::getVarName(int _var)
|
|||
}
|
||||
|
||||
// get triples number, also sentences number
|
||||
int
|
||||
unsigned
|
||||
BasicQuery::getTripleNum()
|
||||
{
|
||||
return this->triple_vt.size();
|
||||
|
@ -121,7 +121,8 @@ int BasicQuery::getEdgeNeighborID(int _var, int _i_th_edge)
|
|||
}
|
||||
|
||||
// get the ID of the i-th edge of _var
|
||||
int BasicQuery::getEdgePreID(int _var, int _i_th_edge)
|
||||
TYPE_PREDICATE_ID
|
||||
BasicQuery::getEdgePreID(int _var, int _i_th_edge)
|
||||
{
|
||||
return this->edge_pre_id[_var][_i_th_edge];
|
||||
}
|
||||
|
@ -177,20 +178,20 @@ BasicQuery::getCandidateList(int _var)
|
|||
return candidate_list[_var];
|
||||
}
|
||||
|
||||
int
|
||||
unsigned
|
||||
BasicQuery::getCandidateSize(int _var)
|
||||
{
|
||||
return this->candidate_list[_var].size();
|
||||
}
|
||||
|
||||
// get the result list of _var in the query graph
|
||||
vector<int*>&
|
||||
vector<unsigned*>&
|
||||
BasicQuery::getResultList()
|
||||
{
|
||||
return result_list;
|
||||
}
|
||||
|
||||
vector<int*>*
|
||||
vector<unsigned*>*
|
||||
BasicQuery::getResultListPointer()
|
||||
{
|
||||
return &result_list;
|
||||
|
@ -339,18 +340,20 @@ BasicQuery::setReady(int _var)
|
|||
}
|
||||
|
||||
void
|
||||
BasicQuery::updateSubSig(int _sub_var_id, int _pre_id, int _obj_id, int _line_id, int _obj_var_id)
|
||||
BasicQuery::updateSubSig(int _sub_var_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id, int _line_id, int _obj_var_id)
|
||||
{
|
||||
cout<<"sub var id: "<<_sub_var_id<<endl;
|
||||
// update var(sub)_signature according this triple
|
||||
//bool obj_is_str = (_obj_id == -1) && (_obj.at(0) != '?');
|
||||
//if(obj_is_str)
|
||||
if(_obj_id >= 0)
|
||||
if(_obj_id != INVALID_ENTITY_LITERAL_ID)
|
||||
//if(_obj_id >= 0)
|
||||
{
|
||||
//Signature::encodeStr2Entity(_obj.c_str(), this->var_sig[_sub_id]);
|
||||
Signature::encodeStr2Entity(this->var_sig[_sub_var_id], _obj_id, Util::EDGE_OUT);
|
||||
}
|
||||
|
||||
//DEBUG: if type of pre id is changed to usnigned, this will cause error
|
||||
if(_pre_id >= 0)
|
||||
{
|
||||
Signature::encodePredicate2Entity(this->var_sig[_sub_var_id], _pre_id, Util::EDGE_OUT);
|
||||
|
@ -367,13 +370,14 @@ BasicQuery::updateSubSig(int _sub_var_id, int _pre_id, int _obj_id, int _line_id
|
|||
}
|
||||
|
||||
void
|
||||
BasicQuery::updateObjSig(int _obj_var_id, int _pre_id, int _sub_id, int _line_id, int _sub_var_id)
|
||||
BasicQuery::updateObjSig(int _obj_var_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _sub_id, int _line_id, int _sub_var_id)
|
||||
{
|
||||
cout<<"obj var id: "<<_obj_var_id<<endl;
|
||||
// update var(obj)_signature
|
||||
//bool sub_is_str = (_sub_id == -1) && (_sub.at(0) != '?');
|
||||
//if(sub_is_str)
|
||||
if(_sub_id >= 0)
|
||||
if(_sub_id != INVALID_ENTITY_LITERAL_ID)
|
||||
//if(_sub_id >= 0)
|
||||
{
|
||||
//cout << "str2entity" << endl;
|
||||
Signature::encodeStr2Entity(this->var_sig[_obj_var_id], _sub_id, Util::EDGE_IN);
|
||||
|
@ -506,7 +510,8 @@ BasicQuery::encodeBasicQuery(KVstore* _p_kvstore, const vector<string>& _query_v
|
|||
string& pre = this->triple_vt[i].predicate;
|
||||
string& obj = this->triple_vt[i].object;
|
||||
|
||||
int pre_id = -1; //not found
|
||||
//int pre_id = -1; //not found
|
||||
TYPE_PREDICATE_ID pre_id = INVALID_PREDICATE_ID; //not found
|
||||
if(pre[0] == '?') //pre var
|
||||
{
|
||||
pre_id = -2; //mark that this is a pre var
|
||||
|
@ -547,11 +552,12 @@ BasicQuery::encodeBasicQuery(KVstore* _p_kvstore, const vector<string>& _query_v
|
|||
bool sub_is_var = (sub_var_id != -1);
|
||||
if(sub_is_var)
|
||||
{
|
||||
int obj_id = -1;
|
||||
//int obj_id = -1;
|
||||
TYPE_ENTITY_LITERAL_ID obj_id = INVALID_ENTITY_LITERAL_ID;
|
||||
if(obj.at(0) != '?')
|
||||
{
|
||||
obj_id = _p_kvstore->getIDByEntity(obj);
|
||||
if(obj_id == -1)
|
||||
if(obj_id == INVALID_ENTITY_LITERAL_ID)
|
||||
{
|
||||
obj_id = _p_kvstore->getIDByLiteral(obj);
|
||||
}
|
||||
|
@ -572,7 +578,8 @@ BasicQuery::encodeBasicQuery(KVstore* _p_kvstore, const vector<string>& _query_v
|
|||
bool obj_is_var = (obj_var_id != -1);
|
||||
if(obj_is_var)
|
||||
{
|
||||
int sub_id = -1;
|
||||
//int sub_id = -1;
|
||||
TYPE_ENTITY_LITERAL_ID sub_id = INVALID_ENTITY_LITERAL_ID;
|
||||
if(sub.at(0) != '?')
|
||||
{
|
||||
sub_id = _p_kvstore->getIDByEntity(sub);
|
||||
|
@ -994,45 +1001,47 @@ BasicQuery::print(ostream& _out_stream)
|
|||
|
||||
// WARN:not used because this also considers the candidate not
|
||||
// adding literals
|
||||
int
|
||||
int
|
||||
BasicQuery::getVarID_MinCandidateList()
|
||||
{
|
||||
int min_var = -1;
|
||||
int min_size = Util::TRIPLE_NUM_MAX;
|
||||
unsigned min_size = Util::TRIPLE_NUM_MAX;
|
||||
for(int i = 0; i < this->graph_var_num; i ++)
|
||||
{
|
||||
int tmp_size = (this->candidate_list[i]).size();
|
||||
unsigned tmp_size = (this->candidate_list[i]).size();
|
||||
if(tmp_size < min_size)
|
||||
{
|
||||
min_var = i;
|
||||
min_size = tmp_size;
|
||||
}
|
||||
}
|
||||
|
||||
return min_var;
|
||||
}
|
||||
|
||||
int
|
||||
int
|
||||
BasicQuery::getVarID_MaxCandidateList()
|
||||
{
|
||||
int max_var = -1;
|
||||
int max_size = -1;
|
||||
unsigned max_size = 0;
|
||||
for(int i = 0; i < this->graph_var_num; i ++)
|
||||
{
|
||||
int tmp_size = (this->candidate_list[i]).size();
|
||||
unsigned tmp_size = (this->candidate_list[i]).size();
|
||||
if(tmp_size > max_size)
|
||||
{
|
||||
max_var = i;
|
||||
max_size = tmp_size;
|
||||
}
|
||||
}
|
||||
|
||||
return max_var;
|
||||
}
|
||||
|
||||
int
|
||||
int
|
||||
BasicQuery::getVarID_FirstProcessWhenJoin()
|
||||
{
|
||||
int min_var = -1;
|
||||
int min_size = Util::TRIPLE_NUM_MAX;
|
||||
unsigned min_size = Util::TRIPLE_NUM_MAX;
|
||||
//int min_var2 = -1;
|
||||
//int min_size2 = Util::TRIPLE_NUM_MAX;
|
||||
for(int i = 0; i < this->graph_var_num; ++i)
|
||||
|
@ -1049,7 +1058,7 @@ BasicQuery::getVarID_FirstProcessWhenJoin()
|
|||
else
|
||||
cout<<"var "<<i<<" is ready!"<<endl;
|
||||
|
||||
int tmp_size = (this->candidate_list[i]).size();
|
||||
unsigned tmp_size = (this->candidate_list[i]).size();
|
||||
//if(this->isLiteralVariable(i))
|
||||
//{
|
||||
//if(tmp_size < min_size2)
|
||||
|
@ -1112,10 +1121,12 @@ string BasicQuery::triple_str()
|
|||
stringstream _ss;
|
||||
|
||||
_ss<<"Triple num:"<<this->getTripleNum()<<endl;
|
||||
for (int i=0;i<getTripleNum();i++)
|
||||
|
||||
for (int i = 0; i < getTripleNum(); i++)
|
||||
{
|
||||
_ss<<(this->getTriple(i).toString())<<endl;
|
||||
}
|
||||
|
||||
return _ss.str();
|
||||
}
|
||||
|
||||
|
|
|
@ -120,7 +120,7 @@ private:
|
|||
int retrieve_var_num;
|
||||
string* var_name;
|
||||
IDList* candidate_list;
|
||||
vector<int*> result_list;
|
||||
vector<unsigned*> result_list;
|
||||
int* var_degree;
|
||||
|
||||
//whether has added the variable's literal candidate
|
||||
|
@ -162,8 +162,8 @@ private:
|
|||
|
||||
//void updateSubSig(int _sub_id, int _pre_id, int _obj_id, std::string _obj, int _line_id);
|
||||
//void updateObjSig(int _obj_id, int _pre_id, int _sub_id, std::string _sub, int _line_id);
|
||||
void updateSubSig(int _sub_var_id, int _pre_id, int _obj_id, int _line_id, int _obj_var_id);
|
||||
void updateObjSig(int _obj_var_id, int _pre_id, int _sub_id, int _line_id, int _sub_var_id);
|
||||
void updateSubSig(int _sub_var_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id, int _line_id, int _obj_var_id);
|
||||
void updateObjSig(int _obj_var_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _sub_id, int _line_id, int _sub_var_id);
|
||||
|
||||
//infos for predicate variables
|
||||
vector<PreVar> pre_var;
|
||||
|
@ -175,8 +175,8 @@ private:
|
|||
map<int, int> selected_var_position;
|
||||
|
||||
public:
|
||||
static const int MAX_VAR_NUM = 10;
|
||||
static const int MAX_PRE_VAR_NUM = 10;
|
||||
static const int MAX_VAR_NUM = 20;
|
||||
static const int MAX_PRE_VAR_NUM = 20;
|
||||
static const char NOT_JUST_SELECT = 'a';
|
||||
static const char SELECT_VAR = 's';
|
||||
|
||||
|
@ -203,7 +203,7 @@ public:
|
|||
int getIDByVarName(const string& _name);
|
||||
|
||||
// get triples number, also sentences number
|
||||
int getTripleNum();
|
||||
unsigned getTripleNum();
|
||||
|
||||
//check if a normal var is in select
|
||||
bool isVarSelected(const std::string& _name) const;
|
||||
|
@ -220,7 +220,7 @@ public:
|
|||
int getEdgeNeighborID(int _var, int _i_th_edge);
|
||||
|
||||
// get the preID of the i-th edge of _var
|
||||
int getEdgePreID(int _var, int _i_th_edge);
|
||||
TYPE_PREDICATE_ID getEdgePreID(int _var, int _i_th_edge);
|
||||
|
||||
// get the type of the i-th edge of _var
|
||||
char getEdgeType(int _var, int _i_th_edge);
|
||||
|
@ -236,11 +236,11 @@ public:
|
|||
// get the candidate list of _var in the query graph
|
||||
IDList& getCandidateList(int _var);
|
||||
|
||||
int getCandidateSize(int _var);
|
||||
unsigned getCandidateSize(int _var);
|
||||
|
||||
// get the result list of _var in the query graph
|
||||
vector<int*>& getResultList();
|
||||
vector<int*>* getResultListPointer();
|
||||
vector<unsigned*>& getResultList();
|
||||
vector<unsigned*>* getResultListPointer();
|
||||
|
||||
// get the entity signature of _var in the query graph
|
||||
const EntityBitSet& getEntitySignature(int _var);
|
||||
|
|
|
@ -1828,7 +1828,7 @@ void GeneralEvaluation::dfsJoinableResultGraph(int x, vector < pair<char, int> >
|
|||
|
||||
int varnum = (int)temp->results[0].var.varset.size();
|
||||
|
||||
vector<int*> &basicquery_result =this->sparql_query.getBasicQuery(blockid).getResultList();
|
||||
vector<unsigned*> &basicquery_result =this->sparql_query.getBasicQuery(blockid).getResultList();
|
||||
int basicquery_result_num = (int)basicquery_result.size();
|
||||
|
||||
temp->results[0].res.reserve(basicquery_result_num);
|
||||
|
@ -2191,7 +2191,7 @@ void GeneralEvaluation::queryRewriteEncodeRetrieveJoin(int dep)
|
|||
sub_temp->results[0].var = Varset(encode_varset[i]);
|
||||
int varnum = (int)encode_varset[i].size();
|
||||
|
||||
vector<int*> &basicquery_result = this->expansion_evaluation_stack[dep].sparql_query.getBasicQuery(i).getResultList();
|
||||
vector<unsigned*> &basicquery_result = this->expansion_evaluation_stack[dep].sparql_query.getBasicQuery(i).getResultList();
|
||||
int basicquery_result_num = (int)basicquery_result.size();
|
||||
|
||||
sub_temp->results[0].res.reserve(basicquery_result_num);
|
||||
|
@ -2356,7 +2356,7 @@ void GeneralEvaluation::getFinalResult(ResultSet &result_str)
|
|||
}
|
||||
else
|
||||
{
|
||||
vector <int> keys;
|
||||
vector <unsigned> keys;
|
||||
vector <bool> desc;
|
||||
for (int i = 0; i < (int)this->query_tree.getOrder().size(); i++)
|
||||
{
|
||||
|
@ -2552,7 +2552,7 @@ void GeneralEvaluation::releaseResultStack()
|
|||
delete results_id;
|
||||
}
|
||||
|
||||
void GeneralEvaluation::prepareUpdateTriple(QueryTree::GroupPattern &update_pattern, TripleWithObjType *&update_triple, int &update_triple_num)
|
||||
void GeneralEvaluation::prepareUpdateTriple(QueryTree::GroupPattern &update_pattern, TripleWithObjType *&update_triple, unsigned &update_triple_num)
|
||||
{
|
||||
update_pattern.getVarset();
|
||||
|
||||
|
@ -2613,3 +2613,4 @@ void GeneralEvaluation::prepareUpdateTriple(QueryTree::GroupPattern &update_patt
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -34,16 +34,16 @@ class GeneralEvaluation
|
|||
std::vector <Varset> sparql_query_varset;
|
||||
VSTree *vstree;
|
||||
KVstore *kvstore;
|
||||
TNUM* pre2num;
|
||||
int limitID_predicate;
|
||||
int limitID_literal;
|
||||
TYPE_TRIPLE_NUM* pre2num;
|
||||
TYPE_PREDICATE_ID limitID_predicate;
|
||||
TYPE_ENTITY_LITERAL_ID limitID_literal;
|
||||
StringIndex *stringindex;
|
||||
Strategy strategy;
|
||||
ResultFilter result_filter;
|
||||
bool need_output_answer;
|
||||
|
||||
public:
|
||||
explicit GeneralEvaluation(VSTree *_vstree, KVstore *_kvstore, StringIndex *_stringindex, TNUM* _pre2num, int _limitID_predicate, int _limitID_literal):
|
||||
explicit GeneralEvaluation(VSTree *_vstree, KVstore *_kvstore, StringIndex *_stringindex, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal):
|
||||
vstree(_vstree), kvstore(_kvstore), stringindex(_stringindex), pre2num(_pre2num), limitID_predicate(_limitID_predicate), limitID_literal(_limitID_literal), need_output_answer(false)
|
||||
{
|
||||
}
|
||||
|
@ -239,7 +239,7 @@ class GeneralEvaluation
|
|||
void getFinalResult(ResultSet &result_str);
|
||||
void releaseResultStack();
|
||||
|
||||
void prepareUpdateTriple(QueryTree::GroupPattern &update_pattern, TripleWithObjType *&update_triple, int &update_triple_num);
|
||||
void prepareUpdateTriple(QueryTree::GroupPattern &update_pattern, TripleWithObjType *&update_triple, unsigned &update_triple_num);
|
||||
};
|
||||
|
||||
#endif // _QUERY_GENERALEVALUATION_H
|
||||
|
|
124
Query/IDList.cpp
124
Query/IDList.cpp
|
@ -16,38 +16,40 @@ IDList::IDList()
|
|||
}
|
||||
|
||||
//return the _i-th id of the list if _i exceeds, return -1
|
||||
int
|
||||
IDList::getID(int _i)const
|
||||
unsigned
|
||||
IDList::getID(unsigned _i) const
|
||||
{
|
||||
if (this->size() > _i)
|
||||
{
|
||||
return this->id_list[_i];
|
||||
}
|
||||
return -1;
|
||||
|
||||
//return -1;
|
||||
return INVALID;
|
||||
}
|
||||
|
||||
bool
|
||||
IDList::addID(int _id)
|
||||
IDList::addID(unsigned _id)
|
||||
{
|
||||
//a check for duplicate case will be more reliable
|
||||
this->id_list.push_back(_id);
|
||||
return true;
|
||||
}
|
||||
|
||||
int
|
||||
IDList::size()const
|
||||
unsigned
|
||||
IDList::size() const
|
||||
{
|
||||
return this->id_list.size();
|
||||
}
|
||||
|
||||
bool
|
||||
IDList::empty()const
|
||||
IDList::empty() const
|
||||
{
|
||||
return this->id_list.size() == 0;
|
||||
}
|
||||
|
||||
bool
|
||||
IDList::isExistID(int _id)const
|
||||
IDList::isExistID(unsigned _id) const
|
||||
{
|
||||
// naive implementation of searching(linear search).
|
||||
// you can use binary search when the id list is sorted, if necessary.
|
||||
|
@ -62,15 +64,14 @@ IDList::isExistID(int _id)const
|
|||
return false;
|
||||
}
|
||||
|
||||
const vector<int>*
|
||||
IDList::getList()const
|
||||
const vector<unsigned>*
|
||||
IDList::getList() const
|
||||
{
|
||||
return &(this->id_list);
|
||||
}
|
||||
|
||||
|
||||
int&
|
||||
IDList::operator[](const int& _i)
|
||||
unsigned&
|
||||
IDList::operator[](const unsigned& _i)
|
||||
{
|
||||
if (this->size() > _i)
|
||||
{
|
||||
|
@ -105,7 +106,7 @@ IDList::clear()
|
|||
}
|
||||
|
||||
void
|
||||
IDList::copy(const vector<int>& _new_idlist)
|
||||
IDList::copy(const vector<unsigned>& _new_idlist)
|
||||
{
|
||||
this->id_list = _new_idlist;
|
||||
}
|
||||
|
@ -116,8 +117,8 @@ IDList::copy(const IDList* _new_idlist)
|
|||
this->id_list = *(_new_idlist->getList());
|
||||
}
|
||||
|
||||
int
|
||||
IDList::intersectList(const int* _id_list, int _list_len)
|
||||
unsigned
|
||||
IDList::intersectList(const unsigned* _id_list, unsigned _list_len)
|
||||
{
|
||||
if (_id_list == NULL || _list_len == 0)
|
||||
{
|
||||
|
@ -160,9 +161,9 @@ IDList::intersectList(const int* _id_list, int _list_len)
|
|||
{
|
||||
case 0:
|
||||
{ //this bracket is needed if vars are defined in case
|
||||
int id_i = 0;
|
||||
int index_move_forward = 0;
|
||||
vector<int>::iterator it = this->id_list.begin();
|
||||
unsigned id_i = 0;
|
||||
unsigned index_move_forward = 0;
|
||||
vector<unsigned>::iterator it = this->id_list.begin();
|
||||
while (it != (this->id_list).end())
|
||||
{
|
||||
int can_id = *it;
|
||||
|
@ -186,16 +187,16 @@ IDList::intersectList(const int* _id_list, int _list_len)
|
|||
it++;
|
||||
}
|
||||
remove_number = this->id_list.size() - index_move_forward;
|
||||
vector<int>::iterator new_end = this->id_list.begin() + index_move_forward;
|
||||
vector<unsigned>::iterator new_end = this->id_list.begin() + index_move_forward;
|
||||
(this->id_list).erase(new_end, this->id_list.end());
|
||||
break;
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
vector<int> new_id_list;
|
||||
for (int i = 0; i < _list_len; ++i)
|
||||
vector<unsigned> new_id_list;
|
||||
for (unsigned i = 0; i < _list_len; ++i)
|
||||
{
|
||||
if (Util::bsearch_vec_uporder(_id_list[i], this->getList()) != -1)
|
||||
if (Util::bsearch_vec_uporder(_id_list[i], this->getList()) != INVALID)
|
||||
new_id_list.push_back(_id_list[i]);
|
||||
}
|
||||
this->id_list = new_id_list;
|
||||
|
@ -204,11 +205,11 @@ IDList::intersectList(const int* _id_list, int _list_len)
|
|||
}
|
||||
case 2:
|
||||
{
|
||||
vector<int> new_id_list;
|
||||
int m = this->id_list.size(), i;
|
||||
vector<unsigned> new_id_list;
|
||||
unsigned m = this->id_list.size(), i;
|
||||
for (i = 0; i < m; ++i)
|
||||
{
|
||||
if (Util::bsearch_int_uporder(this->id_list[i], _id_list, _list_len) != -1)
|
||||
if (Util::bsearch_int_uporder(this->id_list[i], _id_list, _list_len) != INVALID)
|
||||
new_id_list.push_back(this->id_list[i]);
|
||||
}
|
||||
this->id_list = new_id_list;
|
||||
|
@ -223,25 +224,25 @@ IDList::intersectList(const int* _id_list, int _list_len)
|
|||
return remove_number;
|
||||
}
|
||||
|
||||
int
|
||||
unsigned
|
||||
IDList::intersectList(const IDList& _id_list)
|
||||
{
|
||||
// copy _id_list to the temp array first.
|
||||
int temp_list_len = _id_list.size();
|
||||
int* temp_list = new int[temp_list_len];
|
||||
unsigned temp_list_len = _id_list.size();
|
||||
unsigned* temp_list = new unsigned[temp_list_len];
|
||||
//BETTER:not to copy, just achieve here
|
||||
for (int i = 0; i < temp_list_len; i++)
|
||||
for (unsigned i = 0; i < temp_list_len; i++)
|
||||
{
|
||||
temp_list[i] = _id_list.getID(i);
|
||||
}
|
||||
|
||||
int remove_number = this->intersectList(temp_list, temp_list_len);
|
||||
delete[]temp_list;
|
||||
unsigned remove_number = this->intersectList(temp_list, temp_list_len);
|
||||
delete[] temp_list;
|
||||
return remove_number;
|
||||
}
|
||||
|
||||
int
|
||||
IDList::unionList(const int* _id_list, int _list_len, bool only_literal)
|
||||
unsigned
|
||||
IDList::unionList(const unsigned* _id_list, unsigned _list_len, bool only_literal)
|
||||
{
|
||||
if (_id_list == NULL || _list_len == 0)
|
||||
return 0;
|
||||
|
@ -249,23 +250,25 @@ IDList::unionList(const int* _id_list, int _list_len, bool only_literal)
|
|||
if (only_literal)
|
||||
{
|
||||
//NOTICE:this means that the original is no literals and we need to add from a list(containing entities/literals)
|
||||
int k = 0;
|
||||
unsigned k = 0;
|
||||
//NOTICE:literal id > entity id; the list is ordered
|
||||
for (; k < _list_len; ++k)
|
||||
if (Util::is_literal_ele(_id_list[k]))
|
||||
break;
|
||||
|
||||
//TODO+BETTER: speed up the process to find the first literal
|
||||
for (; k < _list_len; ++k)
|
||||
this->addID(_id_list[k]);
|
||||
return _list_len - k;
|
||||
}
|
||||
// O(n)
|
||||
int origin_size = (this->id_list).size();
|
||||
int* temp_list = new int[origin_size + _list_len];
|
||||
int temp_list_len = 0;
|
||||
unsigned origin_size = (this->id_list).size();
|
||||
unsigned* temp_list = new unsigned[origin_size + _list_len];
|
||||
unsigned temp_list_len = 0;
|
||||
|
||||
// union
|
||||
{
|
||||
int i = 0, j = 0;
|
||||
unsigned i = 0, j = 0;
|
||||
while (i < origin_size && j < _list_len)
|
||||
{
|
||||
if (this->id_list[i] == _id_list[j])
|
||||
|
@ -298,7 +301,7 @@ IDList::unionList(const int* _id_list, int _list_len, bool only_literal)
|
|||
}
|
||||
}
|
||||
|
||||
int add_number = temp_list_len - origin_size;
|
||||
unsigned add_number = temp_list_len - origin_size;
|
||||
|
||||
// update this IDList
|
||||
this->clear();
|
||||
|
@ -344,24 +347,24 @@ IDList::unionList(const int* _id_list, int _list_len, bool only_literal)
|
|||
*/
|
||||
}
|
||||
|
||||
int
|
||||
unsigned
|
||||
IDList::unionList(const IDList& _id_list, bool only_literal)
|
||||
{
|
||||
// copy _id_list to the temp array first.
|
||||
int temp_list_len = _id_list.size();
|
||||
int* temp_list = new int[temp_list_len];
|
||||
unsigned temp_list_len = _id_list.size();
|
||||
unsigned* temp_list = new unsigned[temp_list_len];
|
||||
//BETTER:not to copy, just achieve here
|
||||
for (int i = 0; i < temp_list_len; i++)
|
||||
for (unsigned i = 0; i < temp_list_len; i++)
|
||||
{
|
||||
temp_list[i] = _id_list.getID(i);
|
||||
}
|
||||
int ret = this->unionList(temp_list, temp_list_len, only_literal);
|
||||
unsigned ret = this->unionList(temp_list, temp_list_len, only_literal);
|
||||
delete[] temp_list;
|
||||
return ret;
|
||||
}
|
||||
|
||||
IDList*
|
||||
IDList::intersect(const IDList& _id_list, const int* _list, int _len)
|
||||
IDList::intersect(const IDList& _id_list, const unsigned* _list, unsigned _len)
|
||||
{
|
||||
IDList* p = new IDList;
|
||||
//if (_list == NULL || _len == 0) //just copy _id_list
|
||||
|
@ -379,7 +382,7 @@ IDList::intersect(const IDList& _id_list, const int* _list, int _len)
|
|||
//compare n(k+1) and nklogn: k0 = log(n/2)2 requiring that n>2
|
||||
//k<=k0 binary search; k>k0 intersect
|
||||
int method = -1; //0: intersect 1: search in vector 2: search in int*
|
||||
int n = _id_list.size();
|
||||
unsigned n = _id_list.size();
|
||||
double k = 0;
|
||||
if (n < _len)
|
||||
{
|
||||
|
@ -407,10 +410,10 @@ IDList::intersect(const IDList& _id_list, const int* _list, int _len)
|
|||
case 0:
|
||||
{ //this bracket is needed if vars are defined in case
|
||||
int id_i = 0;
|
||||
int num = _id_list.size();
|
||||
for (int i = 0; i < num; ++i)
|
||||
unsigned num = _id_list.size();
|
||||
for (unsigned i = 0; i < num; ++i)
|
||||
{
|
||||
int can_id = _id_list.getID(i);
|
||||
unsigned can_id = _id_list.getID(i);
|
||||
while ((id_i < _len) && (_list[id_i] < can_id))
|
||||
{
|
||||
id_i++;
|
||||
|
@ -431,20 +434,20 @@ IDList::intersect(const IDList& _id_list, const int* _list, int _len)
|
|||
}
|
||||
case 1:
|
||||
{
|
||||
for (int i = 0; i < _len; ++i)
|
||||
for (unsigned i = 0; i < _len; ++i)
|
||||
{
|
||||
if (Util::bsearch_vec_uporder(_list[i], _id_list.getList()) != -1)
|
||||
if (Util::bsearch_vec_uporder(_list[i], _id_list.getList()) != INVALID)
|
||||
p->addID(_list[i]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 2:
|
||||
{
|
||||
int m = _id_list.size(), i;
|
||||
unsigned m = _id_list.size(), i;
|
||||
for (i = 0; i < m; ++i)
|
||||
{
|
||||
int t = _id_list.getID(i);
|
||||
if (Util::bsearch_int_uporder(t, _list, _len) != -1)
|
||||
unsigned t = _id_list.getID(i);
|
||||
if (Util::bsearch_int_uporder(t, _list, _len) != INVALID)
|
||||
p->addID(t);
|
||||
}
|
||||
break;
|
||||
|
@ -457,15 +460,16 @@ IDList::intersect(const IDList& _id_list, const int* _list, int _len)
|
|||
return p;
|
||||
}
|
||||
|
||||
int
|
||||
IDList::erase(int i)
|
||||
bool
|
||||
IDList::erase(unsigned i)
|
||||
{
|
||||
id_list.erase(id_list.begin() + i, id_list.end());
|
||||
return 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
int
|
||||
IDList::bsearch_uporder(int _key)
|
||||
unsigned
|
||||
IDList::bsearch_uporder(unsigned _key)
|
||||
{
|
||||
return Util::bsearch_vec_uporder(_key, this->getList());
|
||||
}
|
||||
|
||||
|
|
|
@ -15,31 +15,31 @@ class IDList
|
|||
{
|
||||
public:
|
||||
IDList();
|
||||
int getID(int _i)const;
|
||||
bool addID(int _id);
|
||||
unsigned getID(unsigned _i) const;
|
||||
bool addID(unsigned _id);
|
||||
|
||||
//check whether _id exists in this IDList.
|
||||
bool isExistID(int _id) const;
|
||||
int size() const;
|
||||
bool isExistID(unsigned _id) const;
|
||||
unsigned size() const;
|
||||
bool empty() const;
|
||||
const std::vector<int>* getList()const;
|
||||
int& operator[] (const int & _i);
|
||||
const std::vector<unsigned>* getList()const;
|
||||
unsigned& operator[] (const unsigned & _i);
|
||||
std::string to_str();
|
||||
int sort();
|
||||
void clear();
|
||||
void copy(const std::vector<int>& _new_idlist);
|
||||
void copy(const std::vector<unsigned>& _new_idlist);
|
||||
void copy(const IDList* _new_idlist);
|
||||
|
||||
// intersect/union _id_list to this IDList, note that the two list must be ordered before using these two functions.
|
||||
int intersectList(const int* _id_list, int _list_len);
|
||||
int intersectList(const IDList&);
|
||||
int unionList(const int* _id_list, int _list_len, bool only_literal=false);
|
||||
int unionList(const IDList&, bool only_literal=false);
|
||||
int bsearch_uporder(int _key);
|
||||
static IDList* intersect(const IDList&, const int*, int);
|
||||
unsigned intersectList(const unsigned* _id_list, unsigned _list_len);
|
||||
unsigned intersectList(const IDList&);
|
||||
unsigned unionList(const unsigned* _id_list, unsigned _list_len, bool only_literal=false);
|
||||
unsigned unionList(const IDList&, bool only_literal=false);
|
||||
unsigned bsearch_uporder(unsigned _key);
|
||||
static IDList* intersect(const IDList&, const unsigned*, unsigned);
|
||||
private:
|
||||
std::vector<int> id_list;
|
||||
int erase(int i);
|
||||
std::vector<unsigned> id_list;
|
||||
bool erase(unsigned i);
|
||||
};
|
||||
|
||||
#endif //_QUERY_IDLIST_H
|
||||
|
|
|
@ -23,7 +23,7 @@ void ResultFilter::changeResultHashTable(SPARQLquery &query, int value)
|
|||
for (int i = 0; i < query.getBasicQueryNum(); i++)
|
||||
{
|
||||
BasicQuery &basicquery = query.getBasicQuery(i);
|
||||
vector<int*> &basicquery_result =basicquery.getResultList();
|
||||
vector<unsigned*> &basicquery_result =basicquery.getResultList();
|
||||
int result_num = basicquery_result.size();
|
||||
int var_num = basicquery.getVarNum();
|
||||
|
||||
|
@ -79,3 +79,4 @@ void ResultFilter::candFilterWithResultHashTable(BasicQuery &basicquery)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -13,11 +13,13 @@
|
|||
#include "SPARQLquery.h"
|
||||
#include "../Util/Util.h"
|
||||
|
||||
//TODO: adjust the type to unsigned
|
||||
|
||||
class ResultFilter
|
||||
{
|
||||
private:
|
||||
static const int MAX_SIZE = 1048576;
|
||||
inline int hash(unsigned int x)
|
||||
inline int hash(unsigned x)
|
||||
{
|
||||
x = (x + 0x7ed55d16) + (x << 12);
|
||||
x = (x ^ 0xc761c23c) ^ (x >> 19);
|
||||
|
|
|
@ -27,7 +27,7 @@ ResultSet::~ResultSet()
|
|||
delete[] this->var_name;
|
||||
if (!this->useStream)
|
||||
{
|
||||
for(int i = 0; i < this->ansNum; i++)
|
||||
for(unsigned i = 0; i < this->ansNum; i++)
|
||||
{
|
||||
delete[] this->answer[i];
|
||||
}
|
||||
|
@ -68,7 +68,7 @@ ResultSet::checkUseStream()
|
|||
}
|
||||
|
||||
void
|
||||
ResultSet::setOutputOffsetLimit(int _output_offset, int _output_limit)
|
||||
ResultSet::setOutputOffsetLimit(unsigned _output_offset, unsigned _output_limit)
|
||||
{
|
||||
this->output_offset = _output_offset;
|
||||
this->output_limit = _output_limit;
|
||||
|
@ -89,7 +89,7 @@ ResultSet::setVar(const vector<string> & _var_names)
|
|||
string
|
||||
ResultSet::to_str()
|
||||
{
|
||||
int ans_num = max(this->ansNum - this->output_offset, 0);
|
||||
unsigned ans_num = max((long long)this->ansNum - this->output_offset, (long long)0);
|
||||
if (this->output_limit != -1)
|
||||
ans_num = min(ans_num, this->output_limit);
|
||||
if(ans_num == 0)
|
||||
|
@ -111,7 +111,7 @@ ResultSet::to_str()
|
|||
this->resetStream();
|
||||
|
||||
const Bstr* bp;
|
||||
for(int i = (!this->useStream ? this->output_offset : 0); i < this->ansNum; i++)
|
||||
for(unsigned i = (!this->useStream ? this->output_offset : 0); i < this->ansNum; i++)
|
||||
{
|
||||
if (this->output_limit != -1 && i == this->output_offset + this->output_limit)
|
||||
break;
|
||||
|
@ -161,7 +161,7 @@ ResultSet::to_JSON()
|
|||
this->resetStream();
|
||||
|
||||
const Bstr* bp;
|
||||
for(int i = (!this->useStream ? this->output_offset : 0); i < this->ansNum; i++)
|
||||
for(unsigned i = (!this->useStream ? this->output_offset : 0); i < this->ansNum; i++)
|
||||
{
|
||||
if (this->output_limit != -1 && i == this->output_offset + this->output_limit)
|
||||
break;
|
||||
|
@ -234,7 +234,7 @@ ResultSet::output(FILE* _fp)
|
|||
{
|
||||
if (this->useStream)
|
||||
{
|
||||
int ans_num = max(this->ansNum - this->output_offset, 0);
|
||||
unsigned ans_num = max((long long)this->ansNum - this->output_offset, (long long)0);
|
||||
if (this->output_limit != -1)
|
||||
ans_num = min(ans_num, this->output_limit);
|
||||
if(ans_num == 0)
|
||||
|
@ -251,7 +251,7 @@ ResultSet::output(FILE* _fp)
|
|||
fprintf(_fp, "\n");
|
||||
|
||||
const Bstr* bp;
|
||||
for(int i = 0; i < this->ansNum; i++)
|
||||
for(unsigned i = 0; i < this->ansNum; i++)
|
||||
{
|
||||
if (this->output_limit != -1 && i == this->output_offset + this->output_limit)
|
||||
break;
|
||||
|
@ -275,7 +275,7 @@ ResultSet::output(FILE* _fp)
|
|||
}
|
||||
|
||||
void
|
||||
ResultSet::openStream(std::vector<int> &_keys, std::vector<bool> &_desc)
|
||||
ResultSet::openStream(vector<unsigned> &_keys, vector<bool> &_desc)
|
||||
{
|
||||
if (this->useStream)
|
||||
{
|
||||
|
|
|
@ -21,9 +21,9 @@ private:
|
|||
public:
|
||||
int select_var_num;
|
||||
std::string* var_name;
|
||||
int ansNum;
|
||||
unsigned ansNum;
|
||||
std::string** answer;
|
||||
int output_offset, output_limit;
|
||||
unsigned output_offset, output_limit;
|
||||
|
||||
ResultSet();
|
||||
~ResultSet();
|
||||
|
@ -31,7 +31,7 @@ public:
|
|||
|
||||
void setUseStream();
|
||||
bool checkUseStream();
|
||||
void setOutputOffsetLimit(int _output_offset, int _output_limit);
|
||||
void setOutputOffsetLimit(unsigned _output_offset, unsigned _output_limit);
|
||||
|
||||
//convert to binary string
|
||||
//Bstr* to_bstr();
|
||||
|
@ -44,7 +44,7 @@ public:
|
|||
void setVar(const std::vector<std::string> & _var_names);
|
||||
|
||||
//operations on private stream from caller
|
||||
void openStream(std::vector<int> &_keys, std::vector<bool> &_desc);
|
||||
void openStream(std::vector<unsigned> &_keys, std::vector<bool> &_desc);
|
||||
void resetStream();
|
||||
void writeToStream(std::string& _s);
|
||||
const Bstr* getOneRecord();
|
||||
|
|
|
@ -36,3 +36,4 @@ class Varset
|
|||
|
||||
#endif // _QUERY_VARSET_H
|
||||
|
||||
|
||||
|
|
|
@ -13,10 +13,11 @@ using namespace std;
|
|||
SigEntry::SigEntry()
|
||||
{
|
||||
(this->sig).entityBitSet.reset();
|
||||
this->entity_id = -1;
|
||||
this->entity_id = INVALID_ENTITY_LITERAL_ID;
|
||||
//this->entity_id = -1;
|
||||
}
|
||||
|
||||
SigEntry::SigEntry(int _entity_id, EntityBitSet& _bitset)
|
||||
SigEntry::SigEntry(TYPE_ENTITY_LITERAL_ID _entity_id, EntityBitSet& _bitset)
|
||||
{
|
||||
this->entity_id = _entity_id;
|
||||
(this->sig).entityBitSet |= _bitset;
|
||||
|
@ -28,7 +29,7 @@ SigEntry::SigEntry(const SigEntry& _sig_entry)
|
|||
this->entity_id = _sig_entry.entity_id;
|
||||
}
|
||||
|
||||
SigEntry::SigEntry(const EntitySig& _sig, int _entity_id)
|
||||
SigEntry::SigEntry(const EntitySig& _sig, TYPE_ENTITY_LITERAL_ID _entity_id)
|
||||
{
|
||||
this->sig = _sig;
|
||||
this->entity_id = _entity_id;
|
||||
|
@ -40,7 +41,7 @@ SigEntry::getEntitySig() const
|
|||
return this->sig;
|
||||
}
|
||||
|
||||
int
|
||||
TYPE_ENTITY_LITERAL_ID
|
||||
SigEntry::getEntityId() const
|
||||
{
|
||||
return this->entity_id;
|
||||
|
|
|
@ -17,14 +17,14 @@ class SigEntry
|
|||
private:
|
||||
EntitySig sig;
|
||||
//-1 if not in leaf node
|
||||
int entity_id;
|
||||
TYPE_ENTITY_LITERAL_ID entity_id;
|
||||
public:
|
||||
SigEntry();
|
||||
SigEntry(int _entity_id, EntityBitSet& _bitset);
|
||||
SigEntry(TYPE_ENTITY_LITERAL_ID _entity_id, EntityBitSet& _bitset);
|
||||
SigEntry(const SigEntry& _sig_entry);
|
||||
SigEntry(const EntitySig& sig, int _entity_id);
|
||||
SigEntry(const EntitySig& sig, TYPE_ENTITY_LITERAL_ID _entity_id);
|
||||
const EntitySig& getEntitySig() const;
|
||||
int getEntityId() const;
|
||||
TYPE_ENTITY_LITERAL_ID getEntityId() const;
|
||||
int getSigCount() const;
|
||||
SigEntry& operator=(const SigEntry _sig_entry);
|
||||
SigEntry& operator|=(const SigEntry _sig_entry);
|
||||
|
|
|
@ -32,7 +32,7 @@ Signature::BitSet2str(const EntityBitSet& _bitset)
|
|||
}
|
||||
|
||||
void
|
||||
Signature::encodeEdge2Entity(EntityBitSet& _entity_bs, int _pre_id, int _neighbor_id, const char _type)
|
||||
Signature::encodeEdge2Entity(EntityBitSet& _entity_bs, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _neighbor_id, const char _type)
|
||||
{
|
||||
Signature::encodePredicate2Entity(_entity_bs, _pre_id, _type);
|
||||
|
||||
|
@ -47,29 +47,29 @@ Signature::encodeEdge2Entity(EntityBitSet& _entity_bs, int _pre_id, int _neighbo
|
|||
}
|
||||
|
||||
void
|
||||
Signature::encodePredicate2Entity(EntityBitSet& _entity_bs, int _pre_id, const char _type)
|
||||
Signature::encodePredicate2Entity(EntityBitSet& _entity_bs, TYPE_PREDICATE_ID _pre_id, const char _type)
|
||||
{
|
||||
//NOTICE:this not used now
|
||||
if (Signature::PREDICATE_ENCODE_METHOD == 0)
|
||||
{
|
||||
//WARN:change if need to use again, because the encoding method has changed now!
|
||||
int pos = ((_pre_id + 10) % Signature::EDGE_SIG_LENGTH) + Signature::STR_SIG_LENGTH;
|
||||
unsigned pos = ((_pre_id + 10) % Signature::EDGE_SIG_LENGTH) + Signature::STR_SIG_LENGTH;
|
||||
_entity_bs.set(pos);
|
||||
}
|
||||
else
|
||||
{
|
||||
//NOTICE: in * maybe the int will overflow
|
||||
//NOTICE: in * maybe the unsigned will overflow
|
||||
long long id = _pre_id;
|
||||
int seed_num = id % Signature::EDGE_SIG_INTERVAL_NUM_HALF;
|
||||
unsigned seed_num = id % Signature::EDGE_SIG_INTERVAL_NUM_HALF;
|
||||
|
||||
if (_type == Util::EDGE_OUT)
|
||||
{
|
||||
seed_num += Signature::EDGE_SIG_INTERVAL_NUM_HALF;
|
||||
}
|
||||
|
||||
//int primeSize = 5;
|
||||
//int prime1[]={5003,5009,5011,5021,5023};
|
||||
//int prime2[]={49943,49957,49991,49993,49999};
|
||||
//unsigned primeSize = 5;
|
||||
//unsigned prime1[]={5003,5009,5011,5021,5023};
|
||||
//unsigned prime2[]={49943,49957,49991,49993,49999};
|
||||
|
||||
//NOTICE: more ones in the bitset(use more primes) means less conflicts, but also weakens the filtration of VSTree.
|
||||
// when the data set is big enough, cutting down the size of candidate list should come up to our primary consideration.
|
||||
|
@ -77,49 +77,49 @@ Signature::encodePredicate2Entity(EntityBitSet& _entity_bs, int _pre_id, const c
|
|||
// also, when the data set is small, hash conflicts can hardly happen.
|
||||
// therefore, I think using 2 primes(set up two ones in bitset) is enough.
|
||||
// --by hanshuo.
|
||||
//int primeSize = 2;
|
||||
//int prime1[] = {5003, 5011};
|
||||
//int prime2[] = {49957, 49993};
|
||||
//unsigned primeSize = 2;
|
||||
//unsigned prime1[] = {5003, 5011};
|
||||
//unsigned prime2[] = {49957, 49993};
|
||||
|
||||
//for(int i = 0; i < primeSize; i++)
|
||||
//for(unsigned i = 0; i < primeSize; i++)
|
||||
//{
|
||||
//int seed = _pre_id * prime1[i] % prime2[i];
|
||||
//int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
|
||||
//unsigned seed = _pre_id * prime1[i] % prime2[i];
|
||||
//unsigned pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
|
||||
//_entity_bs.set(pos);
|
||||
//}
|
||||
int seed = id * 5003 % 49957;
|
||||
int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
|
||||
unsigned seed = id * 5003 % 49957;
|
||||
unsigned pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
|
||||
_entity_bs.set(pos);
|
||||
}
|
||||
}
|
||||
|
||||
//void
|
||||
//Signature::encodePredicate2Edge(int _pre_id, EdgeBitSet& _edge_bs)
|
||||
//Signature::encodePredicate2Edge(unsigned _pre_id, EdgeBitSet& _edge_bs)
|
||||
//{
|
||||
//if (Signature::PREDICATE_ENCODE_METHOD == 0)
|
||||
//{
|
||||
//int pos = (_pre_id + 10) % Signature::EDGE_SIG_LENGTH;
|
||||
//unsigned pos = (_pre_id + 10) % Signature::EDGE_SIG_LENGTH;
|
||||
//_edge_bs.set(pos);
|
||||
//}
|
||||
//else
|
||||
//{
|
||||
//int seed_num = _pre_id % Signature::EDGE_SIG_INTERVAL_NUM_HALF;
|
||||
////int primeSize = 5;
|
||||
////int prime1[]={5003,5009,5011,5021,5023};
|
||||
////int prime2[]={49943,49957,49991,49993,49999};
|
||||
//unsigned seed_num = _pre_id % Signature::EDGE_SIG_INTERVAL_NUM_HALF;
|
||||
////unsigned primeSize = 5;
|
||||
////unsigned prime1[]={5003,5009,5011,5021,5023};
|
||||
////unsigned prime2[]={49943,49957,49991,49993,49999};
|
||||
|
||||
////int primeSize = 2;
|
||||
////int prime1[] = {5003,5011};
|
||||
////int prime2[] = {49957,49993};
|
||||
////unsigned primeSize = 2;
|
||||
////unsigned prime1[] = {5003,5011};
|
||||
////unsigned prime2[] = {49957,49993};
|
||||
|
||||
////for (int i = 0; i < primeSize; i++)
|
||||
////for (unsigned i = 0; i < primeSize; i++)
|
||||
////{
|
||||
////int seed = _pre_id * prime1[i] % prime2[i];
|
||||
////int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
|
||||
////unsigned seed = _pre_id * prime1[i] % prime2[i];
|
||||
////unsigned pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
|
||||
////_edge_bs.set(pos);
|
||||
////}
|
||||
//int seed = _pre_id * 5003 % 49957;
|
||||
//int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
|
||||
//unsigned seed = _pre_id * 5003 % 49957;
|
||||
//unsigned pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
|
||||
//_edge_bs.set(pos);
|
||||
//}
|
||||
//}
|
||||
|
@ -127,11 +127,11 @@ Signature::encodePredicate2Entity(EntityBitSet& _entity_bs, int _pre_id, const c
|
|||
//NOTICE: no need to encode itself because only variable in query need to be filtered!
|
||||
//So only consider all neighbors!
|
||||
void
|
||||
Signature::encodeStr2Entity(EntityBitSet& _entity_bs, int _neighbor_id, const char _type)
|
||||
Signature::encodeStr2Entity(EntityBitSet& _entity_bs, TYPE_ENTITY_LITERAL_ID _neighbor_id, const char _type)
|
||||
{
|
||||
//NOTICE: we assume the parameter is always valid(invalid args should not be passed here)
|
||||
long long id = _neighbor_id;
|
||||
//NOTICE: in * maybe the int will overflow
|
||||
//NOTICE: in * maybe the unsigned will overflow
|
||||
long long seed = id * 5003 % 49957;
|
||||
seed = seed % Signature::STR_SIG_INTERVAL_BASE;
|
||||
seed = seed + (id % Signature::STR_SIG_INTERVAL_NUM) * Signature::STR_SIG_INTERVAL_BASE;
|
||||
|
@ -159,14 +159,14 @@ Signature::encodeStr2Entity(EntityBitSet& _entity_bs, int _neighbor_id, const ch
|
|||
//_str is subject or object or literal
|
||||
//if (strlen(_str) >0 && _str[0] == '?')
|
||||
//return;
|
||||
//int length = (int)strlen(_str);
|
||||
//unsigned int hashKey = 0;
|
||||
//unsigned int pos = 0;
|
||||
//unsigned length = (unsigned)strlen(_str);
|
||||
//unsigned unsigned hashKey = 0;
|
||||
//unsigned unsigned pos = 0;
|
||||
//char *str2 = (char*)calloc(length + 1, sizeof(char));
|
||||
//strcpy(str2, _str);
|
||||
//char *str = str2;
|
||||
//unsigned base = Signature::STR_SIG_BASE * (Signature::HASH_NUM - 1);
|
||||
//for (int i = Signature::HASH_NUM - 1; i >= 0; --i)
|
||||
//for (unsigned i = Signature::HASH_NUM - 1; i >= 0; --i)
|
||||
//{
|
||||
//HashFunction hf = Util::hash[i];
|
||||
//if (hf == NULL)
|
||||
|
@ -198,7 +198,7 @@ Signature::encodeStr2Entity(EntityBitSet& _entity_bs, int _neighbor_id, const ch
|
|||
}
|
||||
|
||||
//void
|
||||
//Signature::encodeStrID2Entity(int _str_id, EntityBitSet& _entity_bs)
|
||||
//Signature::encodeStrID2Entity(unsigned _str_id, EntityBitSet& _entity_bs)
|
||||
//{
|
||||
////NOT USED NOW
|
||||
//}
|
||||
|
|
|
@ -24,13 +24,13 @@ public:
|
|||
|
||||
//static HashFunction hash[HashNum];
|
||||
//must make sure: ENTITY_SIG_LENGTH = EDGE_SIG_LENGTH + STR_SIG_LENGTH
|
||||
static const int STR_SIG_INTERVAL_NUM = 20;
|
||||
//static const int STR_SIG_INTERVAL_NUM = 16;
|
||||
static const int STR_SIG_INTERVAL_BASE = 10;
|
||||
static const int STR_SIG_LITERAL = STR_SIG_INTERVAL_NUM * STR_SIG_INTERVAL_BASE;
|
||||
static const int STR_SIG_ENTITY = STR_SIG_LITERAL * 2;
|
||||
static const unsigned STR_SIG_INTERVAL_NUM = 20;
|
||||
//static const unsigned STR_SIG_INTERVAL_NUM = 16;
|
||||
static const unsigned STR_SIG_INTERVAL_BASE = 10;
|
||||
static const unsigned STR_SIG_LITERAL = STR_SIG_INTERVAL_NUM * STR_SIG_INTERVAL_BASE;
|
||||
static const unsigned STR_SIG_ENTITY = STR_SIG_LITERAL * 2;
|
||||
//here we divide as entity neighbors and literal neighbors: ENTITY(in and out), LITERAL(only for out edges)
|
||||
static const int STR_SIG_LENGTH = STR_SIG_ENTITY + STR_SIG_LITERAL; //600
|
||||
static const unsigned STR_SIG_LENGTH = STR_SIG_ENTITY + STR_SIG_LITERAL; //600
|
||||
|
||||
//NOTICE: after vstree filter, all constant neighbors will be used again to do precise filtering
|
||||
//howvere, only few constant pres will be used again for filtering later
|
||||
|
@ -39,15 +39,15 @@ public:
|
|||
|
||||
//str filter is more important in VSTree than predicate, because
|
||||
//a predicate may correspond to a lot of entities and predicate num is usually small
|
||||
static const int EDGE_SIG_INTERVAL_NUM_HALF = 10; //in edge or out edge
|
||||
//static const int EDGE_SIG_INTERVAL_NUM_HALF = 16; //in edge or out edge
|
||||
static const int EDGE_SIG_INTERVAL_NUM = 2 * EDGE_SIG_INTERVAL_NUM_HALF;
|
||||
static const int EDGE_SIG_INTERVAL_BASE = 10;
|
||||
static const int EDGE_SIG_LENGTH = EDGE_SIG_INTERVAL_NUM * EDGE_SIG_INTERVAL_BASE; //200
|
||||
//static const int EDGE_SIG_LENGTH2 = EDGE_SIG_INTERVAL_NUM_HALF * EDGE_SIG_INTERVAL_BASE;
|
||||
static const unsigned EDGE_SIG_INTERVAL_NUM_HALF = 10; //in edge or out edge
|
||||
//static const unsigned EDGE_SIG_INTERVAL_NUM_HALF = 16; //in edge or out edge
|
||||
static const unsigned EDGE_SIG_INTERVAL_NUM = 2 * EDGE_SIG_INTERVAL_NUM_HALF;
|
||||
static const unsigned EDGE_SIG_INTERVAL_BASE = 10;
|
||||
static const unsigned EDGE_SIG_LENGTH = EDGE_SIG_INTERVAL_NUM * EDGE_SIG_INTERVAL_BASE; //200
|
||||
//static const unsigned EDGE_SIG_LENGTH2 = EDGE_SIG_INTERVAL_NUM_HALF * EDGE_SIG_INTERVAL_BASE;
|
||||
|
||||
static const int ENTITY_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH; //1000
|
||||
//static const int ENTITY_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH + NEIGHBOR_SIG_LENGTH;
|
||||
static const unsigned ENTITY_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH; //1000
|
||||
//static const unsigned ENTITY_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH + NEIGHBOR_SIG_LENGTH;
|
||||
|
||||
//QUERY: the num of bitset must be based on 16, i.e. unsigned short? 1000 is not allowed
|
||||
//but 800, 500 is ok
|
||||
|
@ -58,12 +58,12 @@ public:
|
|||
static std::string BitSet2str(const EntityBitSet& _bitset);
|
||||
|
||||
//NOTICE: there are two predicate encoding method now, see the encoding functions @Signature.cpp for details
|
||||
const static int PREDICATE_ENCODE_METHOD = 1;
|
||||
static void encodePredicate2Entity(EntityBitSet& _entity_bs, int _pre_id, const char _type);
|
||||
static void encodeStr2Entity(EntityBitSet& _entity_bs, int _neighbor_id, const char _type);
|
||||
static void encodeEdge2Entity(EntityBitSet& _entity_bs, int _pre_id, int _neighbor_id, const char _type);
|
||||
//static void encodeStrID2Entity(int _str_id, EntityBitSet& _entity_bs);
|
||||
//static void encodePredicate2Edge(int _pre_id, EdgeBitSet& _edge_bs);
|
||||
const static unsigned PREDICATE_ENCODE_METHOD = 1;
|
||||
static void encodePredicate2Entity(EntityBitSet& _entity_bs, TYPE_PREDICATE_ID _pre_id, const char _type);
|
||||
static void encodeStr2Entity(EntityBitSet& _entity_bs, TYPE_ENTITY_LITERAL_ID _neighbor_id, const char _type);
|
||||
static void encodeEdge2Entity(EntityBitSet& _entity_bs, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _neighbor_id, const char _type);
|
||||
//static void encodeStrID2Entity(unsigned _str_id, EntityBitSet& _entity_bs);
|
||||
//static void encodePredicate2Edge(unsigned _pre_id, EdgeBitSet& _edge_bs);
|
||||
|
||||
//Signature()
|
||||
//{
|
||||
|
@ -109,7 +109,7 @@ public:
|
|||
bool operator!=(const EntitySig& _sig)const;
|
||||
EntitySig& operator=(const EntitySig& _sig);
|
||||
const EntityBitSet& getBitset()const;
|
||||
void encode(const char * _str, int _pre_id);
|
||||
void encode(const char * _str, TYPE_PREDICATE_ID _pre_id);
|
||||
std::string to_str() const;
|
||||
};
|
||||
|
||||
|
|
|
@ -345,7 +345,7 @@ void StringIndex::trySequenceAccess()
|
|||
this->predicate.trySequenceAccess();
|
||||
}
|
||||
|
||||
void StringIndex::change(std::vector<int> &ids, KVstore &kv_store, bool is_entity_or_literal)
|
||||
void StringIndex::change(std::vector<unsigned> &ids, KVstore &kv_store, bool is_entity_or_literal)
|
||||
{
|
||||
if (is_entity_or_literal)
|
||||
{
|
||||
|
@ -364,7 +364,7 @@ void StringIndex::change(std::vector<int> &ids, KVstore &kv_store, bool is_entit
|
|||
}
|
||||
}
|
||||
|
||||
void StringIndex::disable(std::vector<int> &ids, bool is_entity_or_literal)
|
||||
void StringIndex::disable(std::vector<unsigned> &ids, bool is_entity_or_literal)
|
||||
{
|
||||
if (is_entity_or_literal)
|
||||
{
|
||||
|
|
|
@ -12,6 +12,8 @@
|
|||
#include "../KVstore/KVstore.h"
|
||||
#include "../Util/Util.h"
|
||||
|
||||
//TODO: adjust the type
|
||||
|
||||
class StringIndexFile
|
||||
{
|
||||
public:
|
||||
|
@ -124,8 +126,8 @@ class StringIndex
|
|||
void addRequest(int id, std::string *str, bool is_entity_or_literal = true);
|
||||
void trySequenceAccess();
|
||||
|
||||
void change(std::vector<int> &ids, KVstore &kv_store, bool is_entity_or_literal = true);
|
||||
void disable(std::vector<int> &ids, bool is_entity_or_literal = true);
|
||||
void change(std::vector<unsigned> &ids, KVstore &kv_store, bool is_entity_or_literal = true);
|
||||
void disable(std::vector<unsigned> &ids, bool is_entity_or_literal = true);
|
||||
};
|
||||
|
||||
#endif // _STRING_INDEX_H
|
||||
|
|
|
@ -17,13 +17,20 @@ Bstr::Bstr()
|
|||
this->str = NULL;
|
||||
}
|
||||
|
||||
Bstr::Bstr(const char* _str, unsigned _len)
|
||||
Bstr::Bstr(const char* _str, unsigned _len, bool _nocopy)
|
||||
{
|
||||
//WARN: if need a string .please add '\0' in your own!
|
||||
this->length = _len;
|
||||
//DEBUG:if copy memory?
|
||||
//this->str = _str; //not valid:const char* -> char*
|
||||
this->str = (char*)malloc(_len);
|
||||
|
||||
//if(_nocopy)
|
||||
//{
|
||||
//this->str = _str; //not valid:const char* -> char*
|
||||
//return;
|
||||
//}
|
||||
|
||||
//NOTICE: we decide to use new/delete in global area
|
||||
//this->str = (char*)malloc(_len);
|
||||
this->str = new char[_len];
|
||||
memcpy(this->str, _str, sizeof(char) * _len);
|
||||
//this->str[_len]='\0';
|
||||
}
|
||||
|
@ -116,6 +123,15 @@ Bstr::operator != (const Bstr& _bstr)
|
|||
unsigned
|
||||
Bstr::getLen() const
|
||||
{
|
||||
//WARN: we should not include too complicate logic here!!!!
|
||||
|
||||
//NOTICE: this is for VList
|
||||
//if(this->isBstrLongList())
|
||||
////if(this->str == NULL)
|
||||
//{
|
||||
//return 0;
|
||||
//}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
|
@ -146,15 +162,18 @@ Bstr::copy(const Bstr* _bp)
|
|||
this->length = _bp->getLen();
|
||||
//DEBUG!!!
|
||||
//cerr<<"bstr length: "<<this->length<<endl;
|
||||
this->str = (char*)malloc(this->length);
|
||||
memcpy(this->str, _bp->getStr(), this->length);
|
||||
|
||||
//this->str = (char*)malloc(this->length);
|
||||
this->str = new char[this->length];
|
||||
memcpy(this->str, _bp->getStr(), sizeof(char) * this->length);
|
||||
}
|
||||
|
||||
void
|
||||
Bstr::copy(const char* _str, unsigned _len)
|
||||
{
|
||||
this->length = _len;
|
||||
this->str = (char*)malloc(this->length);
|
||||
//this->str = (char*)malloc(this->length);
|
||||
this->str = new char[this->length];
|
||||
memcpy(this->str, _str, this->length);
|
||||
}
|
||||
|
||||
|
@ -168,7 +187,8 @@ Bstr::clear()
|
|||
void
|
||||
Bstr::release()
|
||||
{
|
||||
free(this->str); //ok to be null, do nothing
|
||||
//free(this->str); //ok to be null, do nothing
|
||||
delete[] this->str;
|
||||
clear();
|
||||
}
|
||||
|
||||
|
@ -203,3 +223,9 @@ Bstr::print(string s) const
|
|||
//#endif
|
||||
}
|
||||
|
||||
bool
|
||||
Bstr::isBstrLongList() const
|
||||
{
|
||||
return this->str == NULL;
|
||||
}
|
||||
|
||||
|
|
|
@ -18,13 +18,14 @@ class Bstr
|
|||
{
|
||||
private:
|
||||
char* str; //pointers consume 8 byte in 64-bit system
|
||||
//TODO: the length maybe not needed
|
||||
unsigned length;
|
||||
|
||||
public:
|
||||
Bstr();
|
||||
//if copy memory, then use const char*, but slow
|
||||
//else, can not use const char* -> char*
|
||||
Bstr(const char* _str, unsigned _len);
|
||||
Bstr(const char* _str, unsigned _len, bool _nocopy = false);
|
||||
//Bstr(char* _str, unsigned _len);
|
||||
Bstr(const Bstr& _bstr);
|
||||
//Bstr& operate = (const Bstr& _bstr);
|
||||
|
@ -47,6 +48,9 @@ public:
|
|||
//int write(FILE* _fp);
|
||||
~Bstr();
|
||||
void print(std::string s) const; //DEBUG
|
||||
|
||||
//judge if this Bstr represent a long list value, and waiting to be each time on need
|
||||
bool isBstrLongList() const;
|
||||
};
|
||||
|
||||
#endif // _UTIL_BSTR_H
|
||||
|
|
|
@ -41,7 +41,7 @@ Stream::Stream()
|
|||
this->init();
|
||||
}
|
||||
|
||||
Stream::Stream(std::vector<int>& _keys, std::vector<bool>& _desc, unsigned _rownum, unsigned _colnum, bool _flag)
|
||||
Stream::Stream(std::vector<TYPE_ENTITY_LITERAL_ID>& _keys, std::vector<bool>& _desc, unsigned _rownum, unsigned _colnum, bool _flag)
|
||||
{
|
||||
this->init();
|
||||
#ifdef DEBUG_STREAM
|
||||
|
@ -58,7 +58,8 @@ Stream::Stream(std::vector<int>& _keys, std::vector<bool>& _desc, unsigned _rown
|
|||
this->record_size = new unsigned[this->colnum];
|
||||
for(unsigned i = 0; i < this->colnum; ++i)
|
||||
{
|
||||
this->record[i].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
char* tmptr = new char[Util::TRANSFER_SIZE];
|
||||
this->record[i].setStr(tmptr);
|
||||
this->record_size[i] = Util::TRANSFER_SIZE;
|
||||
}
|
||||
|
||||
|
@ -148,7 +149,8 @@ Stream::copyToRecord(const char* _str, unsigned _len, unsigned _idx)
|
|||
if(length + 1 > this->record_size[_idx])
|
||||
{
|
||||
this->record[_idx].release();
|
||||
this->record[_idx].setStr((char*)malloc((length + 1) * sizeof(char)));
|
||||
char* tmptr = new char[length+1];
|
||||
this->record[_idx].setStr(tmptr);
|
||||
this->record_size[_idx] = length + 1; //one more byte: convenient to add \0
|
||||
}
|
||||
|
||||
|
@ -187,7 +189,8 @@ Stream::outputCache()
|
|||
{
|
||||
unsigned len;
|
||||
fread(&len, sizeof(unsigned), 1, this->tempfp);
|
||||
char* p = (char*)malloc(len * sizeof(char));
|
||||
//char* p = (char*)malloc(len * sizeof(char));
|
||||
char* p = new char[len];
|
||||
fread(p, sizeof(char), len, this->tempfp);
|
||||
bp[i].setLen(len);
|
||||
bp[i].setStr(p);
|
||||
|
@ -320,13 +323,16 @@ Stream::read()
|
|||
//FILE* fp = (FILE*)(this->ans);
|
||||
for(unsigned i = 0; i < this->colnum; ++i)
|
||||
{
|
||||
//BETTER:alloca and reuse the space in Bstr?
|
||||
//BETTER:alloc and reuse the space in Bstr?
|
||||
unsigned len;
|
||||
fread(&len, sizeof(unsigned), 1, this->ansDisk);
|
||||
char* s = (char*)calloc(len + 1, sizeof(char));
|
||||
//char* s = (char*)calloc(len + 1, sizeof(char));
|
||||
char* s = new char[len+1];
|
||||
fread(s, sizeof(char), len, this->ansDisk);
|
||||
s[len] = '\0';
|
||||
this->copyToRecord(s, len, i);
|
||||
free(s);
|
||||
//free(s);
|
||||
delete[] s;
|
||||
}
|
||||
}
|
||||
this->xpos++;
|
||||
|
@ -420,7 +426,9 @@ Stream::mergeSort()
|
|||
#endif
|
||||
break;
|
||||
}
|
||||
s = (char*)malloc(sizeof(char) * len);
|
||||
|
||||
//s = (char*)malloc(sizeof(char) * len);
|
||||
s = new char[len];
|
||||
fread(s, sizeof(char), len, tp);
|
||||
bp[i].setLen(len);
|
||||
bp[i].setStr(s);
|
||||
|
|
|
@ -38,17 +38,17 @@
|
|||
struct ResultCmp
|
||||
{
|
||||
int result_len;
|
||||
std::vector<int> keys;
|
||||
std::vector<TYPE_ENTITY_LITERAL_ID> keys;
|
||||
std::vector<bool> desc;
|
||||
//ResultCmp(int _l):result_len(_l){}
|
||||
ResultCmp()
|
||||
{
|
||||
this->result_len = 0;
|
||||
}
|
||||
ResultCmp(int _l, std::vector<int>& _keys, std::vector<bool> &_desc)
|
||||
ResultCmp(int _l, std::vector<TYPE_ENTITY_LITERAL_ID>& _keys, std::vector<bool> &_desc)
|
||||
{
|
||||
this->result_len = _l;
|
||||
this->keys = std::vector<int>(_keys);
|
||||
this->keys = std::vector<TYPE_ENTITY_LITERAL_ID>(_keys);
|
||||
this->desc = std::vector<bool>(_desc);
|
||||
}
|
||||
bool operator() (Bstr* const& a, Bstr* const& b)
|
||||
|
@ -140,7 +140,7 @@ public:
|
|||
static const unsigned BASE_MEMORY_LIMIT = 1 << 30;
|
||||
|
||||
Stream();
|
||||
Stream(std::vector<int>& _keys, std::vector<bool>& _desc, unsigned _rownum, unsigned _colnum, bool _flag);
|
||||
Stream(std::vector<TYPE_ENTITY_LITERAL_ID>& _keys, std::vector<bool>& _desc, unsigned _rownum, unsigned _colnum, bool _flag);
|
||||
|
||||
//read/write should be based on the unit of record
|
||||
|
||||
|
|
3154
Util/Util.cpp
3154
Util/Util.cpp
File diff suppressed because it is too large
Load Diff
98
Util/Util.h
98
Util/Util.h
|
@ -37,6 +37,7 @@ in the sparql query can point to the same node in data graph)
|
|||
#include <sys/file.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
|
@ -87,9 +88,10 @@ in the sparql query can point to the same node in data graph)
|
|||
//#define DEBUG_STREAM
|
||||
//#define DEBUG_PRECISE 1 all information
|
||||
//#define DEBUG_KVSTORE 1 //in KVstore
|
||||
#define DEBUG_VSTREE 1 //in Database
|
||||
//#define DEBUG_VSTREE 1 //in Database
|
||||
//#define DEBUG_LRUCACHE 1
|
||||
//#define DEBUG_DATABASE 1 //in Database
|
||||
//#define DEBUG_VLIST 1
|
||||
//
|
||||
//
|
||||
|
||||
|
@ -123,6 +125,12 @@ in the sparql query can point to the same node in data graph)
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_VLIST
|
||||
#ifndef DEBUG
|
||||
#define DEBUG
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef DEBUG
|
||||
//#define DEBUG
|
||||
#endif
|
||||
|
@ -141,21 +149,46 @@ typedef unsigned(*HashFunction)(const char*);
|
|||
//http://www.cppblog.com/aurain/archive/2010/07/06/119463.html
|
||||
//http://blog.csdn.net/mycomputerxiaomei/article/details/7641221
|
||||
//http://kb.cnblogs.com/page/189480/
|
||||
//
|
||||
//type for the triple num
|
||||
//TODO:this should use unsigned (triple num may > 2500000000)
|
||||
typedef int TNUM;
|
||||
//type for entity/literal/predicate ID
|
||||
typedef int ELPID;
|
||||
|
||||
//TODO:typedef several ID typesand new a ID module
|
||||
//what is more, the str length and Block ID in kvstore
|
||||
typedef unsigned PREDICATE_ID;
|
||||
//type for the triple num
|
||||
//NOTICE: this should use unsigned (triple num may > 2500000000)
|
||||
typedef unsigned TYPE_TRIPLE_NUM;
|
||||
|
||||
//type for entity/literal ID
|
||||
typedef unsigned TYPE_ENTITY_LITERAL_ID;
|
||||
static const TYPE_ENTITY_LITERAL_ID INVALID_ENTITY_LITERAL_ID = UINT_MAX;
|
||||
//static const TYPE_ENTITY_LITERAL_ID INVALID_ENTITY_LITERAL_ID = -1;
|
||||
//#define INVALID_ENTITY_LITERAL_ID UINT_MAX
|
||||
|
||||
//type for predicate ID
|
||||
typedef int TYPE_PREDICATE_ID;
|
||||
static const TYPE_PREDICATE_ID INVALID_PREDICATE_ID = -1;
|
||||
//static const TYPE_PREDICATE_ID INVALID_PREDICATE_ID = -1;
|
||||
//#define INVALID_PREDICATE_ID -1
|
||||
|
||||
|
||||
//TODO:typedef several ID types and new a ID module
|
||||
|
||||
//TODO:encode entity from low to high, encode literal from high to low(finally select the mid of space as border)
|
||||
typedef unsigned ENTITY_LITERAL_ID;
|
||||
typedef unsigned NODE_ID;
|
||||
|
||||
//TODO: what is more, the Block ID in kvstore
|
||||
//typedef unsigned NODE_ID;
|
||||
|
||||
//can use `man limits.h` to see more
|
||||
#define INVALID UINT_MAX
|
||||
static const unsigned INVALID = UINT_MAX;
|
||||
//static const int INVALID = -1;
|
||||
//#define INVALID UINT_MAX
|
||||
|
||||
//NOTICE: always use unsigned for query result matrix
|
||||
//
|
||||
//NOTICE: if use define, the type is none
|
||||
|
||||
typedef struct TYPE_ID_TUPLE
|
||||
{
|
||||
TYPE_ENTITY_LITERAL_ID subid;
|
||||
TYPE_ENTITY_LITERAL_ID preid;
|
||||
TYPE_ENTITY_LITERAL_ID objid;
|
||||
}ID_TUPLE;
|
||||
|
||||
/******** all static&universal constants and fucntions ********/
|
||||
class Util
|
||||
|
@ -168,13 +201,16 @@ public:
|
|||
|
||||
static const unsigned MB = 1048576;
|
||||
static const unsigned GB = 1073741824;
|
||||
static const int TRIPLE_NUM_MAX = 1000*1000*1000;
|
||||
//static const int TRIPLE_NUM_MAX = 1000*1000*1000;
|
||||
static const TYPE_TRIPLE_NUM TRIPLE_NUM_MAX = INVALID;
|
||||
static const char EDGE_IN = 'i';
|
||||
static const char EDGE_OUT= 'o';
|
||||
|
||||
//In order to differentiate the sub-part and literal-part of object
|
||||
//let subid begin with 0, while literalid begins with LITERAL_FIRST_ID
|
||||
//used in Database and Join
|
||||
static const int LITERAL_FIRST_ID = 1000*1000*1000;
|
||||
static const int LITERAL_FIRST_ID = 2 * 1000*1000*1000;
|
||||
|
||||
//initial transfer buffer size in Tree/ and Stream/
|
||||
static const unsigned TRANSFER_SIZE = 1 << 20; //1M
|
||||
//NOTICE:the larger the faster, but need to care the memory usage(not use 1<<33, negative)
|
||||
|
@ -204,11 +240,12 @@ public:
|
|||
static int compIIpair(int _a1, int _b1, int _a2, int _b2);
|
||||
static std::string showtime();
|
||||
static int cmp_int(const void* _i1, const void* _i2);
|
||||
static void sort(int*& _id_list, int _list_len);
|
||||
static int bsearch_int_uporder(int _key, const int* _array,int _array_num);
|
||||
static bool bsearch_preid_uporder(int _preid, int* _pair_idlist, int _list_len);
|
||||
static int bsearch_vec_uporder(int _key, const std::vector<int>* _vec);
|
||||
static std::string result_id_str(std::vector<int*>& _v, int _var_num);
|
||||
static int cmp_unsigned(const void* _i1, const void* _i2);
|
||||
static void sort(unsigned*& _id_list, unsigned _list_len);
|
||||
static unsigned bsearch_int_uporder(unsigned _key, const unsigned* _array, unsigned _array_num);
|
||||
static bool bsearch_preid_uporder(TYPE_PREDICATE_ID _preid, unsigned* _pair_idlist, unsigned _list_len);
|
||||
static unsigned bsearch_vec_uporder(unsigned _key, const std::vector<unsigned>* _vec);
|
||||
static std::string result_id_str(std::vector<unsigned*>& _v, int _var_num);
|
||||
static bool dir_exist(const std::string _dir);
|
||||
static bool create_dir(const std:: string _dir);
|
||||
static long get_cur_time();
|
||||
|
@ -218,13 +255,17 @@ public:
|
|||
static std::string getTimeString();
|
||||
static std::string node2string(const char* _raw_str);
|
||||
|
||||
static bool is_literal_ele(int);
|
||||
static int removeDuplicate(int*, int);
|
||||
static bool is_literal_ele(TYPE_ENTITY_LITERAL_ID id);
|
||||
static bool is_entity_ele(TYPE_ENTITY_LITERAL_ID id);
|
||||
|
||||
static unsigned removeDuplicate(unsigned*, unsigned);
|
||||
|
||||
static std::string getQueryFromFile(const char* _file_path);
|
||||
static std::string getSystemOutput(std::string cmd);
|
||||
static std::string getExactPath(const char* path);
|
||||
static std::string getItemsFromDir(std::string path);
|
||||
static void logging(std::string _str);
|
||||
static void empty_file(const char* _fname);
|
||||
|
||||
// Below are some useful hash functions for string
|
||||
static unsigned simpleHash(const char *_str);
|
||||
|
@ -248,7 +289,7 @@ public:
|
|||
static HashFunction hash[];
|
||||
|
||||
static double logarithm(double _a, double _b);
|
||||
static void intersect(int*& _id_list, int& _id_list_len, const int* _list1, int _len1, const int* _list2, int _len2);
|
||||
static void intersect(unsigned*& _id_list, unsigned& _id_list_len, const unsigned* _list1, unsigned _len1, const unsigned* _list2, unsigned _len2);
|
||||
|
||||
static char* l_trim(char *szOutput, const char *szInput);
|
||||
static char* r_trim(char *szOutput, const char *szInput);
|
||||
|
@ -258,6 +299,9 @@ public:
|
|||
Util();
|
||||
~Util();
|
||||
static std::string profile;
|
||||
//NOTICE: this function must be called out of any Database to config the basic settings
|
||||
//You can call it by Util util in the first of your main program
|
||||
//Another way is to build a GstoreApplication program, and do this configure in the initialization of the application
|
||||
static bool configure(); //read init.conf and set the parameters for this system
|
||||
static bool config_setting();
|
||||
static bool config_advanced();
|
||||
|
@ -270,6 +314,10 @@ public:
|
|||
static int _spo_cmp(const void* _a, const void* _b);
|
||||
static int _ops_cmp(const void* _a, const void* _b);
|
||||
static int _pso_cmp(const void* _a, const void* _b);
|
||||
//sort functions for sort on ID_TUPLE
|
||||
static bool spo_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b);
|
||||
static bool ops_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b);
|
||||
static bool pso_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b);
|
||||
|
||||
static std::string tmp_path;
|
||||
// this are for debugging
|
||||
|
@ -279,6 +327,10 @@ public:
|
|||
static FILE* debug_database;
|
||||
static FILE* debug_vstree;
|
||||
|
||||
static std::string gserver_port_file;
|
||||
static std::string gserver_port_swap;
|
||||
static std::string gserver_log;
|
||||
|
||||
|
||||
private:
|
||||
static bool isValidIPV4(std::string);
|
||||
|
|
|
@ -0,0 +1,348 @@
|
|||
/*=============================================================================
|
||||
# Filename: VList.cpp
|
||||
# Author: Bookug Lobert
|
||||
# Mail: zengli-bookug@pku.edu.cn
|
||||
# Last Modified: 2017-03-27 15:47
|
||||
# Description:
|
||||
=============================================================================*/
|
||||
|
||||
#include "VList.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
bool
|
||||
VList::isLongList(unsigned _len)
|
||||
{
|
||||
return _len > VList::LENGTH_BORDER;
|
||||
}
|
||||
|
||||
VList::VList()
|
||||
{ //not use ../logs/, notice the location of program
|
||||
cur_block_num = SET_BLOCK_NUM;
|
||||
filepath = "";
|
||||
freelist = NULL;
|
||||
max_buffer_size = Util::MAX_BUFFER_SIZE;
|
||||
freemem = max_buffer_size;
|
||||
}
|
||||
|
||||
VList::VList(string& _filepath, string& _mode, unsigned long long _buffer_size)
|
||||
{
|
||||
cur_block_num = SET_BLOCK_NUM; //initialize
|
||||
this->filepath = _filepath;
|
||||
|
||||
if (_mode == string("build"))
|
||||
valfp = fopen(_filepath.c_str(), "w+b");
|
||||
else if (_mode == string("open"))
|
||||
valfp = fopen(_filepath.c_str(), "r+b");
|
||||
else
|
||||
{
|
||||
cout<<string("error in VList: Invalid mode ") + _mode<<endl;
|
||||
return;
|
||||
}
|
||||
if (valfp == NULL)
|
||||
{
|
||||
cout<<string("error in VList: Open error ") + _filepath<<endl;
|
||||
return;
|
||||
}
|
||||
|
||||
this->max_buffer_size = _buffer_size;
|
||||
this->freemem = this->max_buffer_size;
|
||||
this->freelist = new BlockInfo; //null-head
|
||||
|
||||
//TODO: read/write by char is too slow, how about read all and deal , then clear?
|
||||
//
|
||||
//BETTER: hwo about assign IDs in a dynamic way?
|
||||
//limitID freelist
|
||||
//QUETY: can free id list consume very large memory??
|
||||
|
||||
unsigned i, j, k; //j = (SuperNum-1)*BLOCK_SIZE
|
||||
BlockInfo* bp;
|
||||
if (_mode == "build")
|
||||
{ //write basic information
|
||||
i = 0;
|
||||
fwrite(&cur_block_num, sizeof(unsigned), 1, this->valfp); //current block num
|
||||
//NOTICE: use a 1M block for a unsigned?? not ok!
|
||||
fseek(this->valfp, BLOCK_SIZE, SEEK_SET);
|
||||
bp = this->freelist;
|
||||
j = cur_block_num / 8;
|
||||
for (i = 0; i < j; ++i)
|
||||
{
|
||||
fputc(0, this->valfp);
|
||||
for (k = 0; k < 8; ++k)
|
||||
{
|
||||
bp->next = new BlockInfo(i * 8 + k + 1, NULL);
|
||||
bp = bp->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
else //_mode == "open"
|
||||
{
|
||||
//read basic information
|
||||
char c;
|
||||
fread(&cur_block_num, sizeof(unsigned), 1, this->valfp);
|
||||
fseek(this->valfp, BLOCK_SIZE, SEEK_SET);
|
||||
bp = this->freelist;
|
||||
j = cur_block_num / 8;
|
||||
for (i = 0; i < j; ++i)
|
||||
{
|
||||
c = fgetc(valfp);
|
||||
for (k = 0; k < 8; ++k)
|
||||
{
|
||||
if ((c & (1 << k)) == 0)
|
||||
{
|
||||
bp->next = new BlockInfo(i * 8 + 7 - k + 1, NULL);
|
||||
bp = bp->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//NOTICE: we do not need to alloc the blocks for free block id list, AllocBlock is only for later blocks
|
||||
}
|
||||
|
||||
long //8-byte in 64-bit machine
|
||||
VList::Address(unsigned _blocknum) const //BETTER: inline function
|
||||
{
|
||||
if (_blocknum == 0)
|
||||
return 0;
|
||||
else if (_blocknum > cur_block_num)
|
||||
{
|
||||
//print(string("error in Address: Invalid blocknum ") + Util::int2string(_blocknum));
|
||||
return -1; //address should be non-negative
|
||||
}
|
||||
//NOTICE: here should explictly use long
|
||||
return (long)(this->SuperNum + _blocknum - 1) * (long)BLOCK_SIZE;
|
||||
}
|
||||
|
||||
unsigned
|
||||
VList::Blocknum(long address) const
|
||||
{
|
||||
return (address / BLOCK_SIZE) + 1 - this->SuperNum;
|
||||
}
|
||||
|
||||
unsigned
|
||||
VList::AllocBlock()
|
||||
{
|
||||
BlockInfo* p = this->freelist->next;
|
||||
if (p == NULL)
|
||||
{
|
||||
for (unsigned i = 0; i < SET_BLOCK_INC; ++i)
|
||||
{
|
||||
cur_block_num++; //BETTER: check if > MAX_BLOCK_NUM
|
||||
this->FreeBlock(cur_block_num);
|
||||
}
|
||||
p = this->freelist->next;
|
||||
}
|
||||
unsigned t = p->num;
|
||||
this->freelist->next = p->next;
|
||||
delete p;
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
void
|
||||
VList::FreeBlock(unsigned _blocknum)
|
||||
{ //QUERY: head-sub and tail-add will be better?
|
||||
BlockInfo* bp = new BlockInfo(_blocknum, this->freelist->next);
|
||||
this->freelist->next = bp;
|
||||
}
|
||||
|
||||
//NOTICE: all reads are aligned to 4 bytes(including a string)
|
||||
//a string may acrossseveral blocks
|
||||
//
|
||||
//NOTICE: not use buffer, read/write on need, update at once, so no need to write back at last
|
||||
//NOTICE: the next is placed at the begin of a block
|
||||
|
||||
|
||||
void
|
||||
VList::ReadAlign(unsigned* _next)
|
||||
{
|
||||
if (ftell(valfp) % BLOCK_SIZE == 0)
|
||||
{
|
||||
fseek(valfp, Address(*_next), SEEK_SET);
|
||||
fread(_next, sizeof(unsigned), 1, valfp);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
VList::WriteAlign(unsigned* _curnum)
|
||||
{
|
||||
if (ftell(valfp) % BLOCK_SIZE == 0)
|
||||
{
|
||||
unsigned blocknum = this->AllocBlock();
|
||||
fseek(valfp, Address(*_curnum), SEEK_SET);
|
||||
fwrite(&blocknum, sizeof(unsigned), 1, valfp);
|
||||
fseek(valfp, Address(blocknum) + 4, SEEK_SET);
|
||||
*_curnum = blocknum;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
VList::readValue(unsigned _block_num, char*& _str, unsigned& _len)
|
||||
{
|
||||
#ifdef DEBUG_VLIST
|
||||
cout<<"to get value of block num: "<<_block_num<<endl;
|
||||
#endif
|
||||
fseek(valfp, Address(_block_num), SEEK_SET);
|
||||
unsigned next;
|
||||
fread(&next, sizeof(unsigned), 1, valfp);
|
||||
this->readBstr(_str, _len, &next);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned
|
||||
VList::writeValue(const char* _str, unsigned _len)
|
||||
{
|
||||
unsigned blocknum = this->AllocBlock();
|
||||
unsigned curnum = blocknum;
|
||||
|
||||
//NOTICE: here we must skip the next position first
|
||||
fseek(valfp, Address(curnum) + 4, SEEK_SET);
|
||||
this->writeBstr(_str, _len, &curnum);
|
||||
|
||||
#ifdef DEBUG_VLIST
|
||||
cout<<"to write value - block num: "<<blocknum<<endl;
|
||||
#endif
|
||||
return blocknum;
|
||||
}
|
||||
|
||||
bool
|
||||
VList::removeValue(unsigned _block_num)
|
||||
{
|
||||
unsigned store = _block_num, next;
|
||||
fseek(this->valfp, Address(store), SEEK_SET);
|
||||
fread(&next, sizeof(unsigned), 1, valfp);
|
||||
|
||||
while (store != 0)
|
||||
{
|
||||
this->FreeBlock(store);
|
||||
store = next;
|
||||
fseek(valfp, Address(store), SEEK_SET);
|
||||
fread(&next, sizeof(unsigned), 1, valfp);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
VList::readBstr(char*& _str, unsigned& _len, unsigned* _next)
|
||||
{
|
||||
//long address;
|
||||
unsigned len, i, j;
|
||||
fread(&len, sizeof(unsigned), 1, this->valfp);
|
||||
#ifdef DEBUG_VLIST
|
||||
cout<<"the length of value: "<<len<<endl;
|
||||
#endif
|
||||
this->ReadAlign(_next);
|
||||
|
||||
//char* s = (char*)malloc(len);
|
||||
char* s = new char[len];
|
||||
_len = len;
|
||||
|
||||
for (i = 0; i + 4 < len; i += 4)
|
||||
{
|
||||
fread(s + i, sizeof(char), 4, valfp);
|
||||
this->ReadAlign(_next);
|
||||
}
|
||||
while (i < len)
|
||||
{
|
||||
fread(s + i, sizeof(char), 1, valfp); //BETTER
|
||||
i++;
|
||||
}
|
||||
|
||||
j = len % 4;
|
||||
if (j > 0)
|
||||
j = 4 - j;
|
||||
fseek(valfp, j, SEEK_CUR);
|
||||
|
||||
//NOTICE+DEBUG: I think no need to align here, later no data to read
|
||||
//(if need to read, then fseek again to find a new value)
|
||||
//this->ReadAlign(_next);
|
||||
|
||||
_str = s;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
VList::writeBstr(const char* _str, unsigned _len, unsigned* _curnum)
|
||||
{
|
||||
unsigned i, j, len = _len;
|
||||
fwrite(&len, sizeof(unsigned), 1, valfp);
|
||||
this->WriteAlign(_curnum);
|
||||
//cout<<"to write bstr, length: "<<len<<endl;
|
||||
|
||||
//BETTER: compute this need how many blocks first, then write a block a time
|
||||
|
||||
const char* s = _str;
|
||||
for (i = 0; i + 4 < len; i += 4)
|
||||
{
|
||||
fwrite(s + i, sizeof(char), 4, valfp);
|
||||
this->WriteAlign(_curnum);
|
||||
}
|
||||
while (i < len)
|
||||
{
|
||||
fwrite(s + i, sizeof(char), 1, valfp);
|
||||
i++;
|
||||
}
|
||||
|
||||
j = len % 4;
|
||||
if (j > 0)
|
||||
j = 4 - j;
|
||||
fseek(valfp, j, SEEK_CUR);
|
||||
|
||||
//NOTICE+DEBUG: I think no need to align here, later no data to write
|
||||
//(if need to write, then fseek again to write a new value)
|
||||
//this->WriteAlign(_curnum);
|
||||
fseek(valfp, Address(*_curnum), SEEK_SET);
|
||||
unsigned t = 0;
|
||||
fwrite(&t, sizeof(unsigned), 1, valfp);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
VList::~VList()
|
||||
{
|
||||
//write the info back
|
||||
fseek(this->valfp, 0, SEEK_SET);
|
||||
fwrite(&cur_block_num, sizeof(unsigned), 1, valfp);//write current blocks num
|
||||
fseek(valfp, BLOCK_SIZE, SEEK_SET);
|
||||
int i, j = cur_block_num / 8; //(SuperNum-1)*BLOCK_SIZE;
|
||||
for (i = 0; i < j; ++i)
|
||||
{
|
||||
//reset to 1 first
|
||||
fputc(0xff, valfp);
|
||||
}
|
||||
char c;
|
||||
BlockInfo* bp = this->freelist->next;
|
||||
while (bp != NULL)
|
||||
{
|
||||
//if not-use then set 0, aligned to byte!
|
||||
#ifdef DEBUG_KVSTORE
|
||||
if (bp->num > cur_block_num)
|
||||
{
|
||||
printf("blocks num exceed, cur_block_num: %u\n", cur_block_num);
|
||||
exit(1);
|
||||
}
|
||||
#endif
|
||||
j = bp->num - 1;
|
||||
i = j / 8;
|
||||
j = 7 - j % 8;
|
||||
fseek(valfp, BLOCK_SIZE + i, SEEK_SET);
|
||||
c = fgetc(valfp);
|
||||
fseek(valfp, -1, SEEK_CUR);
|
||||
fputc(c & ~(1 << j), valfp);
|
||||
bp = bp->next;
|
||||
}
|
||||
|
||||
bp = this->freelist;
|
||||
BlockInfo* next;
|
||||
while (bp != NULL)
|
||||
{
|
||||
next = bp->next;
|
||||
delete bp;
|
||||
bp = next;
|
||||
}
|
||||
fclose(this->valfp);
|
||||
}
|
||||
|
|
@ -0,0 +1,87 @@
|
|||
/*=============================================================================
|
||||
# Filename: VList.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: zengli-bookug@pku.edu.cn
|
||||
# Last Modified: 2017-03-27 15:40
|
||||
# Description:
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _UTIL_VLIST_H
|
||||
#define _UTIL_VLIST_H
|
||||
|
||||
#include "Util.h"
|
||||
#include "Bstr.h"
|
||||
|
||||
//NOTICE: not keep long list in memory, read each time
|
||||
//but when can you free the long list(kvstore should release it after parsing)
|
||||
//
|
||||
//CONSIDER: if to keep long list in memory, should adjust the bstr in memory:
|
||||
//unsigned: 0 char*: an object (if in memory, if modified, length, content, block num)
|
||||
//when reading a long list in a node, generate the object first, and the object will tell you whether
|
||||
//the list is in mmeory or not
|
||||
|
||||
//BETTER: use two kind of blocks in two files, like 1M and 1G (split the block num into two parts)
|
||||
|
||||
//STRUCT: a long list real-address is the block ID in file2(only for long value lists, a list across several 1M blocks)
|
||||
//tree-value Bstr: unsigned=the real address char*=NULL
|
||||
//in disk:
|
||||
//file1 is tree file, the long list is represented as: 0 real-address
|
||||
//NOTICE: long list is not kept in mmeory for cache, it is read/update each time on need!
|
||||
|
||||
//TODO: use fread/fwrite here instead of fgetc/fputc
|
||||
//including other trees
|
||||
|
||||
class VList
|
||||
{
|
||||
public:
|
||||
//NOTICE:the border is 10^6, but the block is larger, 1M
|
||||
//this is not choosed intuitively, we make sure that using vlist is better: transferring time>random seek time(x/40M>0.006)
|
||||
//Also notice that if no modification on data, read a node is almost sequentially in normal IVTree
|
||||
//In VList, case is the same and using VList may bring another seek cost!(it is not easy to setup cache for IVTree due to data struct)
|
||||
static const unsigned LENGTH_BORDER = 1000000;
|
||||
//static const unsigned LENGTH_BORDER = 100;
|
||||
static const unsigned BLOCK_SIZE = 1 << 20; //fixed size of disk-block
|
||||
static const unsigned MAX_BLOCK_NUM = 1 << 23; //max block-num
|
||||
//below two constants: must can be exactly divided by 8
|
||||
static const unsigned SET_BLOCK_NUM = 1 << 3; //initial blocks num
|
||||
static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc
|
||||
static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1;
|
||||
|
||||
private:
|
||||
unsigned long long max_buffer_size;
|
||||
unsigned cur_block_num;
|
||||
std::string filepath;
|
||||
BlockInfo* freelist;
|
||||
//very long value list are stored in a separate file(with large block)
|
||||
//
|
||||
//NOTICE: according to the summary result, 90% value lists are just below 100 bytes
|
||||
//<10%: 5000000~100M bytes
|
||||
FILE* valfp;
|
||||
|
||||
//NOTICE: freemem's type is long long here, due to large memory in server.
|
||||
//However, needmem in handler() and request() is ok to be int/unsigned.
|
||||
//Because the bstr' size is controlled, so is the node.
|
||||
unsigned long long freemem; //free memory to use, non-negative
|
||||
//unsigned long long time; //QUERY(achieving an old-swap startegy?)
|
||||
long Address(unsigned _blocknum) const;
|
||||
unsigned Blocknum(long address) const;
|
||||
unsigned AllocBlock();
|
||||
void FreeBlock(unsigned _blocknum);
|
||||
void ReadAlign(unsigned* _next);
|
||||
void WriteAlign(unsigned* _next);
|
||||
bool readBstr(char*& _bp, unsigned& _len, unsigned* _next);
|
||||
bool writeBstr(const char* _str, unsigned _len, unsigned* _curnum);
|
||||
|
||||
public:
|
||||
VList();
|
||||
VList(std::string& _filepath, std::string& _mode, unsigned long long _buffer_size);//create a fixed-size file or open an existence
|
||||
bool readValue(unsigned _block_num, char*& _str, unsigned& _len);
|
||||
unsigned writeValue(const char* _str, unsigned _len);
|
||||
bool removeValue(unsigned _block_num);
|
||||
~VList();
|
||||
|
||||
static bool isLongList(unsigned _len);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
@ -25,8 +25,10 @@ using namespace std;
|
|||
//int LRUCache::DEFAULT_CAPACITY = 10000000;
|
||||
int LRUCache::DEFAULT_CAPACITY = 1 * 1000 * 1000; //about 20G memory for vstree
|
||||
//int LRUCache::DEFAULT_CAPACITY = 1000;
|
||||
//TODO:10^6 is a good parameter, at most use 20G
|
||||
//NOTICE:10^6 is a good parameter, at most use 20G
|
||||
|
||||
//NOTICE: it is ok to set it 4000000 when building!!! better to adjust according to the current memory usage
|
||||
//also use 2000000 or smaller for query()
|
||||
LRUCache::LRUCache(int _capacity)
|
||||
{
|
||||
//initialize the lock
|
||||
|
@ -39,7 +41,7 @@ LRUCache::LRUCache(int _capacity)
|
|||
cout << "LRUCache initial..." << endl;
|
||||
this->capacity = _capacity > 0 ? _capacity : LRUCache::DEFAULT_CAPACITY;
|
||||
|
||||
// TODO+DEBUG:it seems that a minium size is required, for example, multiple path down(the height?)
|
||||
//DEBUG:it seems that a minium size is required, for example, multiple path down(the height?)
|
||||
//at least 3*h
|
||||
//
|
||||
// we should guarantee the cache is big enough.
|
||||
|
|
|
@ -160,14 +160,18 @@ void VSTree::retrieve(SPARQLquery& _query)
|
|||
//NOTICE:this can only be done by one thread
|
||||
//build the VSTree from the _entity_signature_file.
|
||||
bool
|
||||
VSTree::buildTree(std::string _entry_file_path)
|
||||
VSTree::buildTree(std::string _entry_file_path, int _cache_size)
|
||||
{
|
||||
Util::logging("IN VSTree::buildTree");
|
||||
|
||||
//NOTICE: entry buffer don't need to store all entities, just loop, read and deal
|
||||
//not so much memory: 2 * 10^6 * (4+800/8) < 1G
|
||||
|
||||
// create the entry buffer and node buffer.
|
||||
this->entry_buffer = new EntryBuffer(EntryBuffer::DEFAULT_CAPACITY);
|
||||
//cout<<"entry buffer newed"<<endl;
|
||||
this->node_buffer = new LRUCache(LRUCache::DEFAULT_CAPACITY);
|
||||
this->node_buffer = new LRUCache(_cache_size);
|
||||
//this->node_buffer = new LRUCache(LRUCache::DEFAULT_CAPACITY);
|
||||
|
||||
// create the root node.
|
||||
//VNode* rootNodePtr = new VNode();
|
||||
|
@ -643,10 +647,11 @@ VSTree::saveTree()
|
|||
}
|
||||
|
||||
bool
|
||||
VSTree::loadTree()
|
||||
VSTree::loadTree(int _cache_size)
|
||||
{
|
||||
cout << "load VSTree..." << endl;
|
||||
(this->node_buffer) = new LRUCache(LRUCache::DEFAULT_CAPACITY);
|
||||
(this->node_buffer) = new LRUCache(_cache_size);
|
||||
//(this->node_buffer) = new LRUCache(LRUCache::DEFAULT_CAPACITY);
|
||||
cout<<"LRU cache built"<<endl;
|
||||
|
||||
bool flag = this->loadTreeInfo();
|
||||
|
@ -929,7 +934,7 @@ VSTree::split(VNode* _p_node_being_split, const SigEntry& _insert_entry, VNode*
|
|||
bool is_leaf = oldNodePtr->isLeaf();
|
||||
// then create a new node to act as BEntryIndex's father.
|
||||
VNode* newNodePtr = this->createNode(is_leaf);
|
||||
#ifdef DEBUG
|
||||
#ifdef DEBUG_VSTREE
|
||||
cout<<"new node file line: "<<newNodePtr->getFileLine()<<endl;
|
||||
#endif
|
||||
|
||||
|
|
|
@ -17,6 +17,9 @@
|
|||
|
||||
//NOTICE:R/W more than 4G
|
||||
|
||||
//TODO: in multiple threads case, to ensure the vstree and cache is correct, maybe lock the whole vstree!
|
||||
//(at one time, only one thread can query/update the vstree)
|
||||
|
||||
class VSTree
|
||||
{
|
||||
friend class VNode;
|
||||
|
@ -25,7 +28,7 @@ public:
|
|||
~VSTree();
|
||||
int getHeight()const;
|
||||
//build the VSTree from the _entity_signature_file.
|
||||
bool buildTree(std::string _entity_signature_file);
|
||||
bool buildTree(std::string _entity_signature_file, int _cache_size = -1);
|
||||
bool deleteTree();
|
||||
|
||||
//if the tree is empty
|
||||
|
@ -47,7 +50,7 @@ public:
|
|||
//save the tree information to tree_info_file_path, and flush the tree nodes in memory to tree_node_file_path.
|
||||
bool saveTree();
|
||||
//load tree from tree_info_file_path and tree_node_file_path files.
|
||||
bool loadTree();
|
||||
bool loadTree(int _cache_size = -1);
|
||||
//get the tree's root node pointer.
|
||||
VNode* getRoot();
|
||||
//get the node pointer by its file line.
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,5 @@
|
|||
INSERT DATA
|
||||
{
|
||||
<http://www.founder/102> <http://www.founder.20.link:52> <http://www.founder/106> .
|
||||
<http://www.founder/102> <http://www.founder> <http://www.founder/73> .
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
DELETE DATA { <http://www.founder/102> <http://www.founder.20.link:52> <http://www.founder/106> . }
|
|
@ -0,0 +1 @@
|
|||
select ?subject ?predict ?object WHERE { ?subject <http://www.founder.20.link:52> ?object; ?predict ?object . }
|
|
@ -0,0 +1 @@
|
|||
DELETE WHERE { <http://www.founder/101> ?predict ?object . }
|
|
@ -0,0 +1 @@
|
|||
select ?predict where {<http://www.founder/102> ?predict <http://www.founder/73> .}
|
|
@ -0,0 +1,5 @@
|
|||
select ?subject ?predict ?object where
|
||||
{
|
||||
<http://www.founder/102> <http://www.founder.20.link:52> ?object.
|
||||
?subject ?predict ?object.
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
select ?subject ?predict ?object where {?subject <http://www.founder.20.link:52> <http://www.founder/106>; ?predict ?object . }
|
|
@ -0,0 +1 @@
|
|||
DELETE WHERE { ?subject <http://www.founder.20.link:52> ?objcet. }
|
|
@ -0,0 +1 @@
|
|||
select ?a WHERE { <a> <http://www.founder.20.link:52> <b> . }
|
|
@ -0,0 +1 @@
|
|||
select ?s where { <http://www.founder/100> <http://www.founder.20.attr:dmID> "22". }
|
|
@ -0,0 +1 @@
|
|||
select ?a WHERE { <a> <http://www.founder.20.link:52> <b> . }
|
30
makefile
30
makefile
|
@ -70,10 +70,11 @@ api_java = api/java/lib/GstoreJavaAPI.jar
|
|||
#sstreeobj = $(objdir)Tree.o $(objdir)Storage.o $(objdir)Node.o $(objdir)IntlNode.o $(objdir)LeafNode.o $(objdir)Heap.o
|
||||
sitreeobj = $(objdir)SITree.o $(objdir)SIStorage.o $(objdir)SINode.o $(objdir)SIIntlNode.o $(objdir)SILeafNode.o $(objdir)SIHeap.o
|
||||
istreeobj = $(objdir)ISTree.o $(objdir)ISStorage.o $(objdir)ISNode.o $(objdir)ISIntlNode.o $(objdir)ISLeafNode.o $(objdir)ISHeap.o
|
||||
ivtreeobj = $(objdir)IVTree.o $(objdir)IVStorage.o $(objdir)IVNode.o $(objdir)IVIntlNode.o $(objdir)IVLeafNode.o $(objdir)IVHeap.o
|
||||
|
||||
kvstoreobj = $(objdir)KVstore.o $(sitreeobj) $(istreeobj) #$(sstreeobj)
|
||||
kvstoreobj = $(objdir)KVstore.o $(sitreeobj) $(istreeobj) $(ivtreeobj) #$(sstreeobj)
|
||||
|
||||
utilobj = $(objdir)Util.o $(objdir)Bstr.o $(objdir)Stream.o $(objdir)Triple.o $(objdir)BloomFilter.o
|
||||
utilobj = $(objdir)Util.o $(objdir)Bstr.o $(objdir)Stream.o $(objdir)Triple.o $(objdir)BloomFilter.o $(objdir)VList.o
|
||||
|
||||
queryobj = $(objdir)SPARQLquery.o $(objdir)BasicQuery.o $(objdir)ResultSet.o $(objdir)IDList.o \
|
||||
$(objdir)Varset.o $(objdir)QueryTree.o $(objdir)ResultFilter.o $(objdir)GeneralEvaluation.o
|
||||
|
@ -217,6 +218,26 @@ $(objdir)ISHeap.o: KVstore/ISTree/heap/ISHeap.cpp KVstore/ISTree/heap/ISHeap.h $
|
|||
$(CC) $(CFLAGS) KVstore/ISTree/heap/ISHeap.cpp -o $(objdir)ISHeap.o
|
||||
#objects in istree/ end
|
||||
|
||||
#objects in ivtree/ begin
|
||||
$(objdir)IVTree.o: KVstore/IVTree/IVTree.cpp KVstore/IVTree/IVTree.h $(objdir)Stream.o $(objdir)VList.o
|
||||
$(CC) $(CFLAGS) KVstore/IVTree/IVTree.cpp -o $(objdir)IVTree.o
|
||||
|
||||
$(objdir)IVStorage.o: KVstore/IVTree/storage/IVStorage.cpp KVstore/IVTree/storage/IVStorage.h $(objdir)Util.o
|
||||
$(CC) $(CFLAGS) KVstore/IVTree/storage/IVStorage.cpp -o $(objdir)IVStorage.o $(def64IO)
|
||||
|
||||
$(objdir)IVNode.o: KVstore/IVTree/node/IVNode.cpp KVstore/IVTree/node/IVNode.h $(objdir)Util.o
|
||||
$(CC) $(CFLAGS) KVstore/IVTree/node/IVNode.cpp -o $(objdir)IVNode.o
|
||||
|
||||
$(objdir)IVIntlNode.o: KVstore/IVTree/node/IVIntlNode.cpp KVstore/IVTree/node/IVIntlNode.h
|
||||
$(CC) $(CFLAGS) KVstore/IVTree/node/IVIntlNode.cpp -o $(objdir)IVIntlNode.o
|
||||
|
||||
$(objdir)IVLeafNode.o: KVstore/IVTree/node/IVLeafNode.cpp KVstore/IVTree/node/IVLeafNode.h
|
||||
$(CC) $(CFLAGS) KVstore/IVTree/node/IVLeafNode.cpp -o $(objdir)IVLeafNode.o
|
||||
|
||||
$(objdir)IVHeap.o: KVstore/IVTree/heap/IVHeap.cpp KVstore/IVTree/heap/IVHeap.h $(objdir)Util.o
|
||||
$(CC) $(CFLAGS) KVstore/IVTree/heap/IVHeap.cpp -o $(objdir)IVHeap.o
|
||||
#objects in ivtree/ end
|
||||
|
||||
$(objdir)KVstore.o: KVstore/KVstore.cpp KVstore/KVstore.h KVstore/Tree.h
|
||||
$(CC) $(CFLAGS) KVstore/KVstore.cpp $(inc) -o $(objdir)KVstore.o
|
||||
|
||||
|
@ -302,6 +323,9 @@ $(objdir)Triple.o: Util/Triple.cpp Util/Triple.h $(objdir)Util.o
|
|||
$(objdir)BloomFilter.o: Util/BloomFilter.cpp Util/BloomFilter.h $(objdir)Util.o
|
||||
$(CC) $(CFLAGS) Util/BloomFilter.cpp -o $(objdir)BloomFilter.o
|
||||
|
||||
$(objdir)VList.o: Util/VList.cpp Util/VList.h
|
||||
$(CC) $(CFLAGS) Util/VList.cpp -o $(objdir)VList.o
|
||||
|
||||
#objects in util/ end
|
||||
|
||||
|
||||
|
@ -408,7 +432,7 @@ dist: clean
|
|||
|
||||
tarball:
|
||||
tar -czvf devGstore.tar.gz api bin lib tools .debug .tmp .objs test docs data makefile \
|
||||
Main Database KVstore Util Query Signature VSTree Parser Server README.md init.conf NOTES.md StringIndex COVERAGE LICENSE
|
||||
Main Database KVstore Util Query Signature VSTree Parser Server README.md init.conf NOTES.md StringIndex COVERAGE
|
||||
|
||||
APIexample: $(api_cpp) $(api_java)
|
||||
$(MAKE) -C api/cpp/example
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
{
|
||||
"config": {
|
||||
"ghooks": {
|
||||
"commit-msg": "validate-commit-msg"
|
||||
}
|
||||
},
|
||||
|
||||
"scripts": {
|
||||
"changelog-all": "conventional-changelog -p angular -i CHANGELOG.md -w -r 0",
|
||||
"changelog": "conventional-changelog -p angular -i CHANGELOG.md -w"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
"config": {
|
||||
"ghooks": {
|
||||
//"pre-commit": "gulp lint",
|
||||
"commit-msg": "validate-commit-msg",
|
||||
//"pre-push": "make test",
|
||||
//"post-merge": "npm install",
|
||||
//"post-rewrite": "npm install",
|
||||
}
|
||||
}
|
||||
"scripts": {
|
||||
"changelog-all": "conventional-changelog -p angular -i CHANGELOG.md -w -r 0",
|
||||
"changelog": "conventional-changelog -p angular -i CHANGELOG.md -w",
|
||||
}
|
||||
}
|
|
@ -0,0 +1,403 @@
|
|||
#include "../Util/Util.h"
|
||||
#include "KVstore.h"
|
||||
|
||||
using namespace std;
|
||||
const int maxn=10000000;
|
||||
KVstore testkv;
|
||||
|
||||
void generate_test_data()
|
||||
{
|
||||
FILE *data_file1=fopen("./data1","w");
|
||||
FILE *data_file2=fopen("./data2","w");
|
||||
fprintf(data_file1,"%d\n",maxn);
|
||||
fprintf(data_file2,"%d\n",maxn);
|
||||
for(int i=0;i<maxn;i++)
|
||||
{
|
||||
string str="";
|
||||
char tmp[3];
|
||||
tmp[1]=0;
|
||||
int k=i;
|
||||
for(int j=0;j<7;j++)
|
||||
{
|
||||
tmp[0]='a'+k%26;
|
||||
str.append(tmp);
|
||||
k/=26;
|
||||
}
|
||||
fprintf(data_file1,"%d %s\n",i,str.c_str());
|
||||
|
||||
k=i;
|
||||
int o=k%800;
|
||||
k/=800;
|
||||
int p=k%800;
|
||||
k/=800;
|
||||
int s=k%800;
|
||||
fprintf(data_file2,"%d %d %d\n",s,p,o);
|
||||
}
|
||||
fclose(data_file1);
|
||||
fclose(data_file2);
|
||||
}
|
||||
|
||||
//for checking memory
|
||||
int parseLine(char* line){
|
||||
// This assumes that a digit will be found and the line ends in " Kb".
|
||||
int i = strlen(line);
|
||||
const char* p = line;
|
||||
while (*p <'0' || *p > '9') p++;
|
||||
line[i-3] = 0;
|
||||
i = atoi(p);
|
||||
return i;
|
||||
}
|
||||
|
||||
int getMemkb(){ //Note: this value is in KB!
|
||||
FILE* file = fopen("/proc/self/status", "r");
|
||||
int result = -1;
|
||||
char line[128];
|
||||
|
||||
while (fgets(line, 128, file) != NULL){
|
||||
if (strncmp(line, "VmSize:", 7) == 0){
|
||||
result = parseLine(line);
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(file);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
//string2id
|
||||
void test_string2id_insert()
|
||||
{
|
||||
cout<< "testing string2id insert..." <<endl;
|
||||
testkv.open_entity2id(KVstore::CREATE_MODE);
|
||||
|
||||
FILE *data1=fopen("./data1","r");
|
||||
|
||||
timeval start_time,end_time;
|
||||
gettimeofday(&start_time,NULL);
|
||||
|
||||
int n,id;
|
||||
char str[10];
|
||||
fscanf(data1,"%d",&n);
|
||||
for(int i=0;i<n;i++)
|
||||
{
|
||||
fscanf(data1,"%d %s",&id,str);
|
||||
testkv.setIDByEntity(string(str),id);
|
||||
}
|
||||
gettimeofday(&end_time,NULL);
|
||||
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
|
||||
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
|
||||
|
||||
fclose(data1);
|
||||
testkv.close_entity2id();
|
||||
}
|
||||
|
||||
void test_string2id_delete()
|
||||
{
|
||||
cout<< "testing string2id delete..." <<endl;
|
||||
testkv.open_entity2id(KVstore::READ_WRITE_MODE);
|
||||
|
||||
FILE *data1=fopen("./data1","r");
|
||||
|
||||
timeval start_time,end_time;
|
||||
gettimeofday(&start_time,NULL);
|
||||
|
||||
int n,id;
|
||||
char str[10];
|
||||
fscanf(data1,"%d",&n);
|
||||
for(int i=0;i<n;i++)
|
||||
{
|
||||
fscanf(data1,"%d %s",&id,str);
|
||||
testkv.subIDByEntity(string(str));
|
||||
}
|
||||
gettimeofday(&end_time,NULL);
|
||||
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
|
||||
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
|
||||
|
||||
fclose(data1);
|
||||
testkv.close_entity2id();
|
||||
}
|
||||
|
||||
void test_string2id_search()
|
||||
{
|
||||
cout<< "testing string2id search..." <<endl;
|
||||
testkv.open_entity2id(KVstore::READ_WRITE_MODE);
|
||||
|
||||
|
||||
timeval start_time,end_time;
|
||||
gettimeofday(&start_time,NULL);
|
||||
|
||||
FILE *data1=fopen("./data1","r");
|
||||
int n,id;
|
||||
char str[10];
|
||||
fscanf(data1,"%d",&n);
|
||||
for(int i=0;i<n;i++)
|
||||
{
|
||||
fscanf(data1,"%d %s",&id,str);
|
||||
testkv.getIDByEntity(string(str));
|
||||
}
|
||||
gettimeofday(&end_time,NULL);
|
||||
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
|
||||
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
|
||||
|
||||
fclose(data1);
|
||||
testkv.close_entity2id();
|
||||
}
|
||||
|
||||
|
||||
//id2string
|
||||
void test_id2string_insert()
|
||||
{
|
||||
cout<< "testing id2string insert..." <<endl;
|
||||
testkv.open_id2entity(KVstore::CREATE_MODE);
|
||||
|
||||
timeval start_time,end_time;
|
||||
gettimeofday(&start_time,NULL);
|
||||
|
||||
FILE *data1=fopen("./data1","r");
|
||||
int n,id;
|
||||
char str[10];
|
||||
fscanf(data1,"%d",&n);
|
||||
for(int i=0;i<n;i++)
|
||||
{
|
||||
fscanf(data1,"%d %s",&id,str);
|
||||
testkv.setEntityByID(id,string(str));
|
||||
}
|
||||
gettimeofday(&end_time,NULL);
|
||||
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
|
||||
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
|
||||
|
||||
fclose(data1);
|
||||
testkv.close_id2entity();
|
||||
}
|
||||
|
||||
void test_id2string_delete()
|
||||
{
|
||||
cout<< "testing id2string delete..." <<endl;
|
||||
testkv.open_id2entity(KVstore::READ_WRITE_MODE);
|
||||
|
||||
timeval start_time,end_time;
|
||||
gettimeofday(&start_time,NULL);
|
||||
|
||||
FILE *data1=fopen("./data1","r");
|
||||
int n,id;
|
||||
char str[10];
|
||||
fscanf(data1,"%d",&n);
|
||||
for(int i=0;i<n;i++)
|
||||
{
|
||||
testkv.subEntityByID(i);
|
||||
}
|
||||
gettimeofday(&end_time,NULL);
|
||||
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
|
||||
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
|
||||
|
||||
fclose(data1);
|
||||
testkv.close_id2entity();
|
||||
}
|
||||
|
||||
void test_id2string_search()
|
||||
{
|
||||
cout<< "testing id2string search..." <<endl;
|
||||
testkv.open_id2entity(KVstore::READ_WRITE_MODE);
|
||||
|
||||
timeval start_time,end_time;
|
||||
gettimeofday(&start_time,NULL);
|
||||
|
||||
FILE *data1=fopen("./data1","r");
|
||||
int n,id;
|
||||
char str[10];
|
||||
fscanf(data1,"%d",&n);
|
||||
for(int i=0;i<n;i++)
|
||||
{
|
||||
testkv.getEntityByID(i);
|
||||
}
|
||||
gettimeofday(&end_time,NULL);
|
||||
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
|
||||
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
|
||||
|
||||
fclose(data1);
|
||||
testkv.close_id2entity();
|
||||
}
|
||||
|
||||
|
||||
//subID2values
|
||||
void test_subID2values_insert()
|
||||
{
|
||||
cout<< "testing subID2values insert..." <<endl;
|
||||
testkv.open_subID2values(KVstore::CREATE_MODE);
|
||||
|
||||
timeval start_time,end_time;
|
||||
gettimeofday(&start_time,NULL);
|
||||
|
||||
FILE *data2=fopen("./data2","r");
|
||||
int n,s,p,o;
|
||||
fscanf(data2,"%d",&n);
|
||||
for(int i=0;i<n;i++)
|
||||
{
|
||||
fscanf(data2,"%d%d%d",&s,&p,&o);
|
||||
testkv.updateInsert_s2values(s,p,o);
|
||||
}
|
||||
gettimeofday(&end_time,NULL);
|
||||
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
|
||||
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
|
||||
|
||||
fclose(data2);
|
||||
testkv.close_subID2values();
|
||||
}
|
||||
|
||||
void test_subID2values_delete()
|
||||
{
|
||||
cout<< "testing subID2values delete..." <<endl;
|
||||
testkv.open_subID2values(KVstore::READ_WRITE_MODE);
|
||||
|
||||
timeval start_time,end_time;
|
||||
gettimeofday(&start_time,NULL);
|
||||
|
||||
FILE *data2=fopen("./data2","r");
|
||||
int n,s,p,o;
|
||||
fscanf(data2,"%d",&n);
|
||||
for(int i=0;i<n;i++)
|
||||
{
|
||||
fscanf(data2,"%d%d%d",&s,&p,&o);
|
||||
testkv.updateRemove_s2values(s,p,o);
|
||||
}
|
||||
gettimeofday(&end_time,NULL);
|
||||
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
|
||||
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
|
||||
|
||||
fclose(data2);
|
||||
testkv.close_subID2values();
|
||||
}
|
||||
|
||||
void test_subID2values_get_s2p()
|
||||
{
|
||||
cout<< "testing subID2values get s2p..." <<endl;
|
||||
testkv.open_subID2values(KVstore::READ_WRITE_MODE);
|
||||
|
||||
timeval start_time,end_time;
|
||||
gettimeofday(&start_time,NULL);
|
||||
|
||||
FILE *data2=fopen("./data2","r");
|
||||
int n,s,p,o;
|
||||
fscanf(data2,"%d",&n);
|
||||
for(int i=0;i<n;i++)
|
||||
{
|
||||
fscanf(data2,"%d%d%d",&s,&p,&o);
|
||||
int *plist=NULL;
|
||||
int len;
|
||||
testkv.getpreIDlistBysubID(s,plist,len);
|
||||
delete[] plist;
|
||||
}
|
||||
gettimeofday(&end_time,NULL);
|
||||
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
|
||||
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
|
||||
|
||||
fclose(data2);
|
||||
testkv.close_subID2values();
|
||||
}
|
||||
|
||||
void test_subID2values_get_s2o()
|
||||
{
|
||||
cout<< "testing subID2values get s2o..." <<endl;
|
||||
testkv.open_subID2values(KVstore::READ_WRITE_MODE);
|
||||
|
||||
timeval start_time,end_time;
|
||||
gettimeofday(&start_time,NULL);
|
||||
|
||||
FILE *data2=fopen("./data2","r");
|
||||
int n,s,p,o;
|
||||
fscanf(data2,"%d",&n);
|
||||
for(int i=0;i<n;i++)
|
||||
{
|
||||
fscanf(data2,"%d%d%d",&s,&p,&o);
|
||||
int *olist;
|
||||
int len;
|
||||
testkv.getobjIDlistBysubID(s,olist,len);
|
||||
delete[] olist;
|
||||
}
|
||||
gettimeofday(&end_time,NULL);
|
||||
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
|
||||
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
|
||||
|
||||
fclose(data2);
|
||||
testkv.close_subID2values();
|
||||
}
|
||||
|
||||
void test_subID2values_get_sp2o()
|
||||
{
|
||||
cout<< "testing subID2values get sp2o..." <<endl;
|
||||
testkv.open_subID2values(KVstore::READ_WRITE_MODE);
|
||||
|
||||
timeval start_time,end_time;
|
||||
gettimeofday(&start_time,NULL);
|
||||
|
||||
FILE *data2=fopen("./data2","r");
|
||||
int n,s,p,o;
|
||||
fscanf(data2,"%d",&n);
|
||||
for(int i=0;i<n;i++)
|
||||
{
|
||||
fscanf(data2,"%d%d%d",&s,&p,&o);
|
||||
int *olist;
|
||||
int len;
|
||||
testkv.getobjIDlistBysubIDpreID(s,p,olist,len);
|
||||
delete[] olist;
|
||||
}
|
||||
gettimeofday(&end_time,NULL);
|
||||
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
|
||||
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
|
||||
|
||||
fclose(data2);
|
||||
testkv.close_subID2values();
|
||||
}
|
||||
|
||||
void test_subID2values_get_s2po()
|
||||
{
|
||||
cout<< "testing subID2values get s2po..." <<endl;
|
||||
testkv.open_subID2values(KVstore::READ_WRITE_MODE);
|
||||
|
||||
timeval start_time,end_time;
|
||||
gettimeofday(&start_time,NULL);
|
||||
|
||||
FILE *data2=fopen("./data2","r");
|
||||
int n,s,p,o;
|
||||
fscanf(data2,"%d",&n);
|
||||
for(int i=0;i<n;i++)
|
||||
{
|
||||
fscanf(data2,"%d%d%d",&s,&p,&o);
|
||||
int *polist;
|
||||
int len;
|
||||
testkv.getpreIDobjIDlistBysubID(s,polist,len);
|
||||
delete[] polist;
|
||||
}
|
||||
gettimeofday(&end_time,NULL);
|
||||
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
|
||||
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
|
||||
|
||||
fclose(data2);
|
||||
testkv.close_subID2values();
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if(argc==2 && strcmp(argv[1],"generate")==0)
|
||||
generate_test_data();
|
||||
|
||||
//string2id
|
||||
test_string2id_insert();
|
||||
test_string2id_search();
|
||||
test_string2id_delete();
|
||||
|
||||
//id2string
|
||||
test_id2string_insert();
|
||||
test_id2string_search();
|
||||
test_id2string_delete();
|
||||
|
||||
//subID2values
|
||||
test_subID2values_insert();
|
||||
test_subID2values_get_s2p();
|
||||
test_subID2values_get_s2o();
|
||||
test_subID2values_get_sp2o();
|
||||
test_subID2values_get_s2po();
|
||||
test_subID2values_delete();
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue