feat: merge type branch;

value branch has already been merged into type;

by zengli
This commit is contained in:
bookug 2017-05-18 22:35:22 +08:00
commit 25e1c3ed10
97 changed files with 12330 additions and 4703 deletions

7
.gitignore vendored
View File

@ -91,3 +91,10 @@ tags
*.out
*.bak~
# queries
*.sql
*.sh
# modules
node_modules

File diff suppressed because it is too large Load Diff

View File

@ -28,13 +28,14 @@
class Database
{
public:
static const bool only_sub2idpre2id = true;
static const int internal = 100 * 1000;
void test();
void test_build_sig();
void test_join();
void printIDlist(int _i, int* _list, int _len, std::string _log);
void printPairList(int _i, int* _list, int _len, std::string _log);
//static const bool only_sub2idpre2id = true;
//static const int internal = 100 * 1000;
//void test();
//void test_build_sig();
//void test_join();
//void printIDlist(int _i, int* _list, int _len, std::string _log);
//void printPairList(int _i, int* _list, int _len, std::string _log);
//when encode EntitySig, one way uses STRING-hash, the other one uses ID-hash
//depending on this->encode_mode
@ -59,26 +60,30 @@ public:
bool insert(std::string _rdf_file);
bool remove(std::string _rdf_file);
/* name of this DB*/
//name of this DB
string getName();
/* root Path of this DB + sixTuplesFile */
//root Path of this DB + sixTuplesFile
string getSixTuplesFile();
/* root Path of this DB + signatureBFile */
//root Path of this DB + signatureBFile
string getSignatureBFile();
/* root Path of this DB + DBInfoFile */
//root Path of this DB + DBInfoFile
string getDBInfoFile();
//id tuples file
string getIDTuplesFile();
private:
string name;
string store_path;
bool is_active;
int triples_num;
int entity_num;
int sub_num;
int pre_num;
int literal_num;
TYPE_TRIPLE_NUM triples_num;
TYPE_ENTITY_LITERAL_ID entity_num;
TYPE_ENTITY_LITERAL_ID sub_num;
TYPE_PREDICATE_ID pre_num;
TYPE_ENTITY_LITERAL_ID literal_num;
int encode_mode;
@ -94,51 +99,59 @@ private:
//six tuples: <sub pre obj sid pid oid>
string six_tuples_file;
//B means binary
string signature_binary_file;
//id tuples file
string id_tuples_file;
//pre2num mapping
TNUM* pre2num;
TYPE_TRIPLE_NUM* pre2num;
//valid: check from minNumPID to maxNumPID
int maxNumPID, minNumPID;
TYPE_PREDICATE_ID maxNumPID, minNumPID;
void setPreMap();
//TODO: set the buffer capacity as dynamic according to the current memory usage
//string buffer
Buffer* entity_buffer;
//unsigned offset; //maybe let id start from an offset
unsigned entity_buffer_size;
Buffer* literal_buffer;
unsigned literal_buffer_size;
void setStringBuffer();
void warmUp();
//BETTER+TODO:add a predicate buffer for ?p query
//BETTER:add a predicate buffer for ?p query
//However, I think this is not necessary because ?p is rare and the p2xx tree is small enough
//triple num per group for insert/delete
//can not be too high, otherwise the heap will over
static const int GROUP_SIZE = 1000;
//manage the ID allocate and garbage
static const int START_ID_NUM = 0;
static const TYPE_ENTITY_LITERAL_ID START_ID_NUM = 0;
//static const int START_ID_NUM = 1000;
/////////////////////////////////////////////////////////////////////////////////
//NOTICE:error if >= LITERAL_FIRST_ID
string free_id_file_entity; //the first is limitID, then free id list
int limitID_entity; //the current maxium ID num(maybe not used so much)
TYPE_ENTITY_LITERAL_ID limitID_entity; //the current maxium ID num(maybe not used so much)
BlockInfo* freelist_entity; //free id list, reuse BlockInfo for Storage class
int allocEntityID();
void freeEntityID(int _id);
TYPE_ENTITY_LITERAL_ID allocEntityID();
void freeEntityID(TYPE_ENTITY_LITERAL_ID _id);
/////////////////////////////////////////////////////////////////////////////////
//NOTICE:error if >= 2*LITERAL_FIRST_ID
string free_id_file_literal;
int limitID_literal;
TYPE_ENTITY_LITERAL_ID limitID_literal;
BlockInfo* freelist_literal;
int allocLiteralID();
void freeLiteralID(int _id);
TYPE_ENTITY_LITERAL_ID allocLiteralID();
void freeLiteralID(TYPE_ENTITY_LITERAL_ID _id);
/////////////////////////////////////////////////////////////////////////////////
//NOTICE:error if >= 2*LITERAL_FIRST_ID
string free_id_file_predicate;
int limitID_predicate;
TYPE_PREDICATE_ID limitID_predicate;
BlockInfo* freelist_predicate;
int allocPredicateID();
void freePredicateID(int _id);
TYPE_PREDICATE_ID allocPredicateID();
void freePredicateID(TYPE_PREDICATE_ID _id);
/////////////////////////////////////////////////////////////////////////////////
void initIDinfo(); //initialize the members
void resetIDinfo(); //reset the id info for build
@ -158,11 +171,11 @@ private:
//encode Triple into Object EntityBitSet
bool encodeTriple2ObjEntityBitSet(EntityBitSet& _bitset, const Triple* _p_triple);
bool calculateEntityBitSet(int _entity_id, EntityBitSet & _bitset);
bool calculateEntityBitSet(TYPE_ENTITY_LITERAL_ID _entity_id, EntityBitSet & _bitset);
//check whether the relative 3-tuples exist
//usually, through sp2olist
bool exist_triple(int _sub_id, int _pre_id, int _obj_id);
bool exist_triple(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id);
//* _rdf_file denotes the path of the RDF file, where stores the rdf data
//* there are many step in this function, each one responds to an sub-function
@ -174,24 +187,25 @@ private:
//* 4. build: objID2subIDlist, <objIDpreID>2subIDlist objID2<preIDsubID>list
//encodeRDF_new invoke new rdfParser to solve task 1 & 2 in one time scan.
bool encodeRDF_new(const string _rdf_file);
void build_s2xx(int**);
void build_o2xx(int**);
void build_p2xx(int**);
void readIDTuples(ID_TUPLE*& _p_id_tuples);
void build_s2xx(ID_TUPLE*);
void build_o2xx(ID_TUPLE*);
void build_p2xx(ID_TUPLE*);
//insert and delete, notice that modify is not needed here
//we can read from file or use sparql syntax
bool insertTriple(const TripleWithObjType& _triple, vector<int>* _vertices = NULL, vector<int>* _predicates = NULL);
bool removeTriple(const TripleWithObjType& _triple, vector<int>* _vertices = NULL, vector<int>* _predicates = NULL);
bool insertTriple(const TripleWithObjType& _triple, vector<unsigned>* _vertices = NULL, vector<unsigned>* _predicates = NULL);
bool removeTriple(const TripleWithObjType& _triple, vector<unsigned>* _vertices = NULL, vector<unsigned>* _predicates = NULL);
//NOTICE:one by one is too costly, sort and insert/delete at a time will be better
int insert(const TripleWithObjType* _triples, int _triple_num);
unsigned insert(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num);
//bool insert(const vector<TripleWithObjType>& _triples, vector<int>& _vertices, vector<int>& _predicates);
int remove(const TripleWithObjType* _triples, int _triple_num);
unsigned remove(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num);
//bool remove(const vector<TripleWithObjType>& _triples, vector<int>& _vertices, vector<int>& _predicates);
bool sub2id_pre2id_obj2id_RDFintoSignature(const string _rdf_file, int**& _p_id_tuples, int & _id_tuples_max);
bool literal2id_RDFintoSignature(const string _rdf_file, int** _p_id_tuples, int _id_tuples_max);
bool sub2id_pre2id_obj2id_RDFintoSignature(const string _rdf_file);
//bool literal2id_RDFintoSignature(const string _rdf_file, int** _p_id_tuples, TYPE_TRIPLE_NUM _id_tuples_max);
bool objIDIsEntityID(int _id);
bool objIDIsEntityID(TYPE_ENTITY_LITERAL_ID _id);
//* join on the vector of CandidateList, available after retrieve from the VSTREE
//* and store the resut in _result_set

View File

@ -16,7 +16,7 @@ Join::Join()
this->result_list = NULL;
}
Join::Join(KVstore* _kvstore, TNUM* _pre2num, int _limitID_predicate, int _limitID_literal)
Join::Join(KVstore* _kvstore, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal)
{
this->kvstore = _kvstore;
this->result_list = NULL;
@ -103,7 +103,8 @@ Join::score_node(int var)
continue;
}
//CHECK:if the pre id is valid (0<=p<limit_predicateID)
int pid = this->basic_query->getEdgePreID(var, i);
TYPE_PREDICATE_ID pid = this->basic_query->getEdgePreID(var, i);
//DEBUG: if TYPE_PREDICATE_ID is changed to unsigned
if(pid < 0 || pid >= this->limitID_predicate)
{
continue;
@ -123,7 +124,7 @@ Join::score_node(int var)
}
int
Join::judge(int _smallest, int _biggest)
Join::judge(unsigned _smallest, unsigned _biggest)
{
return 0; //DEBUG:remove when index_join is ok
//BETTER?:use appropiate method according to size and structure
@ -132,7 +133,8 @@ Join::judge(int _smallest, int _biggest)
//BETTER:how to guess the size of can_lists
double size = (_smallest + _biggest) / 2.0;
double ans = Join::PARAM_DENSE * dense - size / Join::PARAM_SIZE;
if (ans > Join::JUDGE_LIMIT)
double limit = 1.0 / (double)Join::JUDGE_LIMIT;
if (ans > limit)
return 0; //multi_join method
else
return 1; //index_join method
@ -321,7 +323,9 @@ Join::pre_var_handler()
#ifdef DEBUG_JOIN
//cout << sub_name << endl << triple.predicate << endl << obj_name << endl;
#endif
int sub_id = -1, obj_id = -1, var1 = -1, var2 = -1;
TYPE_ENTITY_LITERAL_ID sub_id, obj_id;
sub_id = obj_id = INVALID_ENTITY_LITERAL_ID;
int var1 = -1, var2 = -1;
if (sub_name[0] != '?')
{
@ -339,7 +343,8 @@ Join::pre_var_handler()
if (obj_name[0] != '?')
{
obj_id = this->kvstore->getIDByEntity(obj_name);
if (obj_id == -1)
//if (obj_id == -1)
if (obj_id == INVALID_ENTITY_LITERAL_ID)
obj_id = this->kvstore->getIDByLiteral(obj_name);
}
else
@ -355,10 +360,11 @@ Join::pre_var_handler()
cout<<"subid: "<<sub_id<<" objid: "<<obj_id<<endl;
#endif
int* id_list = NULL;
int id_list_len = 0;
unsigned* id_list = NULL;
unsigned id_list_len = 0;
//two vars in query
if (sub_id == -1 && obj_id == -1)
if (sub_id == INVALID_ENTITY_LITERAL_ID && obj_id == INVALID_ENTITY_LITERAL_ID)
//if (sub_id == -1 && obj_id == -1)
{
if (var1 == -1 && var2 == -1)
{
@ -407,15 +413,15 @@ Join::pre_var_handler()
}
}
//two constants in query
else if (sub_id != -1 && obj_id != -1)
else if (sub_id != INVALID_ENTITY_LITERAL_ID && obj_id != INVALID_ENTITY_LITERAL_ID)
{
//just use so2p in query graph to find predicates
//this->kvstore->getpreIDlistBysubIDobjID(sub_id, obj_id, id_list, id_list_len);
int sid = sub_id, oid = obj_id;
TYPE_ENTITY_LITERAL_ID sid = sub_id, oid = obj_id;
this->kvstore->getpreIDlistBysubIDobjID(sid, oid, id_list, id_list_len, true);
}
//sub is var while obj is constant
else if (sub_id == -1 && obj_id != -1)
else if (sub_id == INVALID_ENTITY_LITERAL_ID && obj_id != INVALID_ENTITY_LITERAL_ID)
{
if (var1 == -1)
{
@ -424,12 +430,12 @@ Join::pre_var_handler()
else
{
this->kvstore->getpreIDlistBysubIDobjID((*it)[this->id2pos[var1]], obj_id, id_list, id_list_len, true);
int sid = (*it)[this->id2pos[var1]], oid = obj_id;
TYPE_ENTITY_LITERAL_ID sid = (*it)[this->id2pos[var1]], oid = obj_id;
this->kvstore->getpreIDlistBysubIDobjID(sid, oid, id_list, id_list_len, true);
}
}
//sub is constant while obj is var
else if (sub_id != -1 && obj_id == -1)
else if (sub_id != INVALID_ENTITY_LITERAL_ID && obj_id == INVALID_ENTITY_LITERAL_ID)
{
if (var2 == -1)
{
@ -439,7 +445,7 @@ Join::pre_var_handler()
{
//NOTICE:no need to add literals here because they are added in add_literal_candidate using s2o
//this->kvstore->getpreIDlistBysubIDobjID(sub_id, (*it)[this->id2pos[var2]], id_list, id_list_len);
int sid = sub_id, oid = (*it)[this->id2pos[var2]];
TYPE_ENTITY_LITERAL_ID sid = sub_id, oid = (*it)[this->id2pos[var2]];
this->kvstore->getpreIDlistBysubIDobjID(sid, oid, id_list, id_list_len, true);
}
}
@ -465,7 +471,7 @@ Join::pre_var_handler()
else
{
#ifdef DEBUG_JOIN
for(int k = 0; k < valid_ans.size(); ++k)
for(unsigned k = 0; k < valid_ans.size(); ++k)
cout << this->kvstore->getPredicateByID(valid_ans[k])<<" ";
cout<<endl;
#endif
@ -478,7 +484,7 @@ Join::pre_var_handler()
//
//NOTICE: we add all here(select/not) because they maybe needed by generating satellites
//we need to copy only the selected ones in copyToResult
int size = valid_ans.size();
unsigned size = valid_ans.size();
//BETTER:only add pre vars which are selected or linked with satellite
if (size > 0)
@ -489,7 +495,7 @@ Join::pre_var_handler()
//continue;
//}
it->push_back(valid_ans[0]);
int begin = 1;
unsigned begin = 1;
if (!if_new_start && size > 1)
{
this->add_new_to_results(it, valid_ans[1]);
@ -499,7 +505,7 @@ Join::pre_var_handler()
this->new_start--;
begin = 2;
}
for (int j = begin; j < size; ++j)
for (unsigned j = begin; j < size; ++j)
{
this->add_new_to_results(it, valid_ans[j]);
}
@ -543,7 +549,7 @@ Join::copyToResult()
cout << "core var num: " << core_var_num << " select var num: " << select_var_num << endl;
#endif
this->record_len = select_var_num + selected_pre_var_num;
this->record = new int[this->record_len];
this->record = new unsigned[this->record_len];
for (TableIterator it = this->current_table.begin(); it != this->current_table.end(); ++it)
{
@ -581,7 +587,7 @@ Join::copyToResult()
for (i = 0; i < core_var_num; ++i)
{
int id = this->pos2id[i];
int ele = (*it)[i];
unsigned ele = (*it)[i];
int degree = this->basic_query->getVarDegree(id);
for (int j = 0; j < degree; ++j)
{
@ -591,12 +597,12 @@ Join::copyToResult()
#ifdef DEBUG_JOIN
//cout << "to generate "<<id2<<endl;
#endif
int* idlist = NULL;
int idlist_len = 0;
unsigned* idlist = NULL;
unsigned idlist_len = 0;
int triple_id = this->basic_query->getEdgeID(id, j);
Triple triple = this->basic_query->getTriple(triple_id);
int preid = this->basic_query->getEdgePreID(id, j);
TYPE_PREDICATE_ID preid = this->basic_query->getEdgePreID(id, j);
if (preid == -2) //?p
{
string predicate = triple.predicate;
@ -606,7 +612,7 @@ Join::copyToResult()
preid = (*it)[this->id2pos[pre_var_id+this->var_num]];
//}
}
else if (preid == -1)
else if (preid == -1) //INVALID_PREDICATE_ID
{
//ERROR
}
@ -686,17 +692,17 @@ Join::cartesian(int pos, int end)
{
if (pos == end)
{
int* new_record = new int[this->record_len];
memcpy(new_record, this->record, sizeof(int) * this->record_len);
unsigned* new_record = new unsigned[this->record_len];
memcpy(new_record, this->record, sizeof(unsigned) * this->record_len);
this->result_list->push_back(new_record);
return;
}
int size = this->satellites[pos].idlist_len;
unsigned size = this->satellites[pos].idlist_len;
int id = this->satellites[pos].id;
int vpos = this->basic_query->getSelectedVarPosition(id);
int* list = this->satellites[pos].idlist;
for (int i = 0; i < size; ++i)
unsigned* list = this->satellites[pos].idlist;
for (unsigned i = 0; i < size; ++i)
{
this->record[vpos] = list[i];
this->cartesian(pos + 1, end);
@ -740,13 +746,13 @@ Join::toStartJoin()
for (int j = 0; j < var_degree; ++j)
{
//int neighbor_id = this->basic_query->getEdgeNeighborID(var_id, j);
int predicate_id = this->basic_query->getEdgePreID(var_id, j);
TYPE_PREDICATE_ID predicate_id = this->basic_query->getEdgePreID(var_id, j);
int triple_id = this->basic_query->getEdgeID(var_id, j);
Triple triple = this->basic_query->getTriple(triple_id);
string neighbor_name = triple.subject;
IDList this_edge_literal_list;
int* object_list = NULL;
int object_list_len = 0;
unsigned* object_list = NULL;
unsigned object_list_len = 0;
if (predicate_id >= 0)
{
@ -782,9 +788,9 @@ Join::toStartJoin()
{
cout<<"Special Case: star graph whose pres are all var"<<endl;
//get all literals in this db
for(int i = 0; i < this->limitID_literal; ++i)
for(TYPE_ENTITY_LITERAL_ID i = 0; i < this->limitID_literal; ++i)
{
int id = i + Util::LITERAL_FIRST_ID;
TYPE_ENTITY_LITERAL_ID id = i + Util::LITERAL_FIRST_ID;
string literal = this->kvstore->getLiteralByID(id);
if(literal == "")
{
@ -815,10 +821,38 @@ Join::join()
//the smallest candidate list size of the not-satellite vars
int id = this->basic_query->getVarID_FirstProcessWhenJoin();
int smallest = this->basic_query->getCandidateSize(id);
unsigned smallest = 0;
if(id >= 0)
{
smallest = this->basic_query->getCandidateSize(id);
}
else
{
cout<<"error in join() - id < 0"<<endl;
return false;
}
if(!this->is_literal_var(id) && smallest == 0)
{
cout<<"join() - already empty"<<endl;
return false; //empty result
int biggest = this->basic_query->getVarID_MaxCandidateList();
}
int id_max = this->basic_query->getVarID_MaxCandidateList();
unsigned biggest = 0;
if(id_max >= 0)
{
biggest = this->basic_query->getCandidateSize(id_max);
}
else
{
cout<<"error in join() - id < 0"<<endl;
return false;
}
if(!this->is_literal_var(id_max) && biggest == 0)
{
cout<<"join() - already empty"<<endl;
return false; //empty result
}
int method = this->judge(smallest, biggest);
bool ret = true;
@ -911,7 +945,7 @@ Join::is_literal_var(int _id)
//===================================================================================================
void
Join::add_new_to_results(TableIterator it, int id)
Join::add_new_to_results(TableIterator it, unsigned id)
{
//NTC:already have one more in *it if need to push back
RecordType tmp(*it);
@ -920,7 +954,7 @@ Join::add_new_to_results(TableIterator it, int id)
}
void
Join::update_answer_list(IDList*& valid_ans_list, IDList& _can_list, int* id_list, int id_list_len, bool _is_literal)
Join::update_answer_list(IDList*& valid_ans_list, IDList& _can_list, unsigned* id_list, unsigned id_list_len, bool _is_literal)
{
if (valid_ans_list == NULL)
{
@ -928,7 +962,7 @@ Join::update_answer_list(IDList*& valid_ans_list, IDList& _can_list, int* id_lis
//valid_ans_list.unionList(_can_list);
if (_is_literal)
{
int entity_len = 0;
unsigned entity_len = 0;
while (true)
{
if (entity_len == id_list_len || Util::is_literal_ele(id_list[entity_len]))
@ -951,6 +985,11 @@ Join::update_answer_list(IDList*& valid_ans_list, IDList& _can_list, int* id_lis
}
}
//TODO: multiple lists intersect, how about sort and intersect from small to big?
//but this need to generate all first, I think sort by pre2num if better!
//
//TODO: set the entity_literal border in kvstore, and intersect entity part and literal part respectively
//NOTICE: consider two directions according to table1 size and table2 size
//1. -> add ID mapping record for the first linking column, whole(offset, size) zengli
//2. <- join using inverted index for each column, offset and size for each column, hulin
@ -961,7 +1000,7 @@ Join::update_answer_list(IDList*& valid_ans_list, IDList& _can_list, int* id_lis
//However, the case is really rare in our test(the reason may be that the web graph is always very sparse)
//If we add a buffer for this case, will cause worse performance
bool
Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, int _can_list_size, int _id, bool _is_literal)
Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, unsigned _can_list_size, int _id, bool _is_literal)
{
if(_can_list_size == 0 && !_is_literal)
{
@ -1020,14 +1059,14 @@ Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, int _can_list_s
#ifdef DEBUG_JOIN
cout << "edge exists!" << endl;
#endif
int ele = *it1;
unsigned ele = *it1;
bool exist_constant_pre = false;
bool s2o_pre_var = false;
bool o2s_pre_var = false;
for(RecordIterator it2 = edge_index.begin(); it2 != edge_index.end(); ++it2)
for(vector<int>::iterator it2 = edge_index.begin(); it2 != edge_index.end(); ++it2)
{
int edge_type = this->basic_query->getEdgeType(_id, *it2);
int pre_id = this->basic_query->getEdgePreID(_id, *it2);
TYPE_PREDICATE_ID pre_id = this->basic_query->getEdgePreID(_id, *it2);
if (pre_id == -2) //predicate var
{
@ -1058,8 +1097,8 @@ Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, int _can_list_s
break;
}
int* id_list;
int id_list_len;
unsigned* id_list;
unsigned id_list_len;
if (edge_type == Util::EDGE_IN)
{
#ifdef DEBUG_JOIN
@ -1113,8 +1152,8 @@ Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, int _can_list_s
//all pres are variable, so use s2o or o2s to add
if(s2o_pre_var)
{
int* id_list2;
int id_list2_len;
unsigned* id_list2;
unsigned id_list2_len;
this->kvstore->getobjIDlistBysubID(ele, id_list2, id_list2_len, true);
update_answer_list(valid_ans_list, _can_list, id_list2, id_list2_len, _is_literal);
delete[] id_list2;
@ -1126,8 +1165,8 @@ Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, int _can_list_s
}
if(o2s_pre_var)
{
int* id_list2;
int id_list2_len;
unsigned* id_list2;
unsigned id_list2_len;
this->kvstore->getsubIDlistByobjID(ele, id_list2, id_list2_len, true);
update_answer_list(valid_ans_list, _can_list, id_list2, id_list2_len, _is_literal);
delete[] id_list2;
@ -1145,10 +1184,10 @@ Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, int _can_list_s
cout << "this record is matched!!" << endl;
#endif
found = true;
int size = valid_ans_list->size();
unsigned size = valid_ans_list->size();
it0->push_back((*valid_ans_list)[0]);
int begin = 1;
unsigned begin = 1;
if (!if_new_start && size > 1)
{
this->add_new_to_results(it0, (*valid_ans_list)[1]);
@ -1159,7 +1198,7 @@ Join::join_two(vector< vector<int> >& _edges, IDList& _can_list, int _can_list_s
begin = 2;
}
for (int i = begin; i < size; ++i)
for (unsigned i = begin; i < size; ++i)
{
//WARN+NOTICE:this strategy may cause that duplicates are not together!
this->add_new_to_results(it0, (*valid_ans_list)[i]);
@ -1222,13 +1261,13 @@ Join::multi_join()
//this->filterBySatellites(this->start_id);
IDList& start_table = this->basic_query->getCandidateList(this->start_id);
int start_size = this->basic_query->getCandidateSize(this->start_id);
unsigned start_size = this->basic_query->getCandidateSize(this->start_id);
#ifdef DEBUG_JOIN
cout << "the start size " << start_size << endl;
#endif
for (int i = 0; i < start_size; ++i)
for (unsigned i = 0; i < start_size; ++i)
{
int ele = start_table.getID(i);
unsigned ele = start_table.getID(i);
RecordType record(1, ele);
this->current_table.push_back(record);
//this->table_row_new.push_back(false);
@ -1277,7 +1316,7 @@ Join::multi_join()
//int* tmp_id_list;
//int tmp_id_list_len;
IDList& can_list = this->basic_query->getCandidateList(id2);
int can_list_size = can_list.size();
unsigned can_list_size = can_list.size();
for (int i = 0; i < this->id_pos; ++i)
{
@ -1333,7 +1372,7 @@ Join::multi_join()
for (int i = 0; i < this->id_pos; ++i)
{
vector<int> edge_index = edges[i];
for(RecordIterator it = edge_index.begin(); it != edge_index.end(); ++it)
for(vector<int>::iterator it = edge_index.begin(); it != edge_index.end(); ++it)
{
int edge_id = this->basic_query->getEdgeID(id2, *it);
dealed_triple[edge_id] = true;
@ -1459,9 +1498,10 @@ Join::constant_edge_filter(int _var_i)
this->dealed_triple[triple_id] = true;
}
int pre_id = this->basic_query->getEdgePreID(_var_i, j);
int lit_id = (this->kvstore)->getIDByEntity(neighbor_name);
if (lit_id == -1)
TYPE_PREDICATE_ID pre_id = this->basic_query->getEdgePreID(_var_i, j);
TYPE_ENTITY_LITERAL_ID lit_id = (this->kvstore)->getIDByEntity(neighbor_name);
//if (lit_id == -1)
if (lit_id == INVALID_ENTITY_LITERAL_ID)
{
lit_id = (this->kvstore)->getIDByLiteral(neighbor_name);
}
@ -1477,8 +1517,8 @@ Join::constant_edge_filter(int _var_i)
// Util::logging(_ss.str());
// }
int id_list_len = 0;
int* id_list = NULL;
unsigned id_list_len = 0;
unsigned* id_list = NULL;
if (pre_id >= 0)
{
if (edge_type == Util::EDGE_OUT)
@ -1531,7 +1571,7 @@ Join::constant_edge_filter(int _var_i)
if (id_list_len == 0)
{
_list.clear();
delete[]id_list;
delete[] id_list;
return false;
}
// cout << "\t\t can:" << can_list.to_str() << endl;
@ -1598,7 +1638,7 @@ Join::add_literal_candidate()
for (int j = 0; j < var_degree; j++)
{
int neighbor_id = this->basic_query->getEdgeNeighborID(var_id, j);
int predicate_id = this->basic_query->getEdgePreID(var_id, j);
TYPE_PREDICATE_ID predicate_id = this->basic_query->getEdgePreID(var_id, j);
int triple_id = this->basic_query->getEdgeID(var_id, j);
Triple triple = this->basic_query->getTriple(triple_id);
string neighbor_name = triple.subject;
@ -1607,19 +1647,21 @@ Join::add_literal_candidate()
// if the neighbor of this edge is an entity, we can add all literals which has an exact predicate edge linking to this entity.
if (neighbor_id == -1)
{
int subject_id = (this->kvstore)->getIDByEntity(neighbor_name);
int* object_list = NULL;
int object_list_len = 0;
TYPE_ENTITY_LITERAL_ID subject_id = (this->kvstore)->getIDByEntity(neighbor_name);
unsigned* object_list = NULL;
unsigned object_list_len = 0;
if (predicate_id >= 0)
{
(this->kvstore)->getobjIDlistBysubIDpreID(subject_id, predicate_id, object_list, object_list_len, true);
}
else if (predicate_id == -2)
{
this->kvstore->getobjIDlistBysubID(subject_id, object_list, object_list_len, true);
}
//NOTICE:only literals should be unioned
this_edge_literal_list.unionList(object_list, object_list_len, true);
delete[]object_list;
delete[] object_list;
}
// if the neighbor of this edge is variable, then the neighbor variable can not have any literal results,
// we should add literals when join these two variables, see the Database::join function for details.
@ -1732,7 +1774,7 @@ Join::preFilter(int _var)
//if size is very large, the cost is high and not many can be filtered!
//(keep state for each one-degree node, if considered)
IDList& cans = this->basic_query->getCandidateList(_var);
int size = this->basic_query->getCandidateSize(_var);
unsigned size = this->basic_query->getCandidateSize(_var);
//result if already empty for non-literal variable
if (size == 0)
@ -1745,8 +1787,8 @@ Join::preFilter(int _var)
int var_degree = this->basic_query->getVarDegree(_var);
//NOTICE:maybe several same predicates
set<int> in_edge_pre_id;
set<int> out_edge_pre_id;
set<TYPE_PREDICATE_ID> in_edge_pre_id;
set<TYPE_PREDICATE_ID> out_edge_pre_id;
for (int i = 0; i < var_degree; i++)
{
@ -1772,18 +1814,20 @@ Join::preFilter(int _var)
//else
//cout << "need to filter: " << neighbor_name << endl;
int pre_id = this->basic_query->getEdgePreID(_var, i);
TYPE_PREDICATE_ID pre_id = this->basic_query->getEdgePreID(_var, i);
//WARN+BETTER:invalid(should be discarded in Query) or ?p(should not be considered here)
if (pre_id < 0)
{
continue;
}
//TODO+BETTER: is any pre really used? do we need to losen the restrictions?
//size:m<n; time:mlgn < n-m
//The former time is computed because the m should be small if we select this p, tending to use binary-search
//when doing intersectList operation(mlgn < m+n).
//The latter time is computed due to the unnecessary copy cost if not using this p
TNUM border = size / (Util::logarithm(2, size) + 1);
TYPE_TRIPLE_NUM border = size / (Util::logarithm(2, size) + 1);
//not use inefficient pre to filter
if(this->dealed_triple[triple_id] || this->pre2num[pre_id] > border)
{
@ -1810,9 +1854,9 @@ Join::preFilter(int _var)
}
//NOTICE:use p2s here, use s2p in only_pre_filter_after_join because pres there are not efficient
set<int>::iterator it;
int* list = NULL;
int len = 0;
set<TYPE_PREDICATE_ID>::iterator it;
unsigned* list = NULL;
unsigned len = 0;
for(it = in_edge_pre_id.begin(); it != in_edge_pre_id.end(); ++it)
{
this->kvstore->getobjIDlistBypreID(*it, list, len, true);
@ -1852,8 +1896,9 @@ Join::only_pre_filter_after_join()
//cout<<"var: "<<this->basic_query->getVarName(var_id)<<endl;
//get all the only predicate filter edges for this variable.
vector<int> in_edge_pre_id;
vector<int> out_edge_pre_id;
vector<TYPE_PREDICATE_ID> in_edge_pre_id;
vector<TYPE_PREDICATE_ID> out_edge_pre_id;
for (int i = 0; i < var_degree; i++)
{
//cout<<"var linking edge: "<<i<<endl;
@ -1892,7 +1937,7 @@ Join::only_pre_filter_after_join()
//else
//cout << "need to filter: " << neighbor_name << endl;
int pre_id = this->basic_query->getEdgePreID(var_id, i);
TYPE_PREDICATE_ID pre_id = this->basic_query->getEdgePreID(var_id, i);
if (pre_id < 0)
{
continue;
@ -1916,9 +1961,9 @@ Join::only_pre_filter_after_join()
for (TableIterator it = this->current_table.begin(); it != this->current_table.end();)
{
int entity_id = (*it)[this->id2pos[var_id]];
int* pair_list = NULL;
int pair_len = 0;
TYPE_ENTITY_LITERAL_ID entity_id = (*it)[this->id2pos[var_id]];
unsigned* pair_list = NULL;
unsigned pair_len = 0;
bool exist_preid = true;
//NOTICE: four ways to judge if the predicates exist
@ -1934,12 +1979,14 @@ Join::only_pre_filter_after_join()
//(this->kvstore)->getpreIDsubIDlistByobjID(entity_id, pair_list, pair_len);
(this->kvstore)->getpreIDlistByobjID(entity_id, pair_list, pair_len, true);
for (vector<int>::iterator itr_pre = in_edge_pre_id.begin(); itr_pre != in_edge_pre_id.end(); itr_pre++)
for (vector<TYPE_PREDICATE_ID>::iterator itr_pre = in_edge_pre_id.begin(); itr_pre != in_edge_pre_id.end(); itr_pre++)
{
int pre_id = (*itr_pre);
TYPE_PREDICATE_ID pre_id = (*itr_pre);
//exist_preid = Util::bsearch_preid_uporder(pre_id, pair_list, pair_len);
if (Util::bsearch_int_uporder(pre_id, pair_list, pair_len) == -1)
if (Util::bsearch_int_uporder(pre_id, pair_list, pair_len) == INVALID)
{
exist_preid = false;
}
if (!exist_preid)
{
break;
@ -1952,12 +1999,14 @@ Join::only_pre_filter_after_join()
//(this->kvstore)->getpreIDobjIDlistBysubID(entity_id, pair_list, pair_len);
(this->kvstore)->getpreIDlistBysubID(entity_id, pair_list, pair_len, true);
for (vector<int>::iterator itr_pre = out_edge_pre_id.begin(); itr_pre != out_edge_pre_id.end(); itr_pre++)
for (vector<TYPE_PREDICATE_ID>::iterator itr_pre = out_edge_pre_id.begin(); itr_pre != out_edge_pre_id.end(); itr_pre++)
{
int pre_id = (*itr_pre);
TYPE_PREDICATE_ID pre_id = (*itr_pre);
//exist_preid = Util::bsearch_preid_uporder(pre_id, pair_list, pair_len);
if (Util::bsearch_int_uporder(pre_id, pair_list, pair_len) == -1)
if (Util::bsearch_int_uporder(pre_id, pair_list, pair_len) == INVALID)
{
exist_preid = false;
}
if (!exist_preid)
{
break;

View File

@ -15,23 +15,23 @@
#include "../KVstore/KVstore.h"
#include "../Util/Util.h"
typedef vector<int> RecordType;
typedef vector<int>::iterator RecordIterator;
typedef vector<unsigned> RecordType;
typedef vector<unsigned>::iterator RecordIterator;
typedef list<RecordType> TableType;
typedef list<RecordType>::iterator TableIterator;
typedef list<RecordType>::reverse_iterator TableReverseIterator;
//typedef list< vector<int> > TableType;
//typedef list< vector<int> >::iterator TableIterator;
//typedef list< vector<int> >::reverse_iterator TableReverseIterator;
typedef vector< vector<int*> > IdLists;
typedef vector< vector<int> > IdListsLen;
//typedef vector< vector<int*> > IdLists;
//typedef vector< vector<int> > IdListsLen;
typedef struct Satellite
{
int id;
int* idlist;
int idlist_len;
Satellite(int _id, int* _idlist, int _idlist_len)
unsigned* idlist;
unsigned idlist_len;
Satellite(int _id, unsigned* _idlist, unsigned _idlist_len)
{
this->id = _id;
this->idlist = _idlist;
@ -47,16 +47,21 @@ private:
int var_num;
BasicQuery* basic_query;
KVstore* kvstore;
TNUM* pre2num;
int limitID_predicate;
int limitID_literal;
TYPE_TRIPLE_NUM* pre2num;
TYPE_PREDICATE_ID limitID_predicate;
TYPE_ENTITY_LITERAL_ID limitID_literal;
//used by score_node for parameters
static const unsigned PARAM_DEGREE = 1;
static const unsigned PARAM_SIZE = 1000000;
static const unsigned PARAM_PRE = 10000;
static const unsigned PARAM_DENSE = 1;
static const double JUDGE_LIMIT = 0.5;
static const int LIMIT_CANDIDATE_LIST_SIZE = 1000;
static const unsigned JUDGE_LIMIT = 2;
//NOTICE+DEBUG: please use constexpr below instead of the phase above(constexpr is supported in C++11)
//http://www.cnblogs.com/wanyuanchun/p/4041080.html
//constexpr static const double JUDGE_LIMIT = 0.5;
static const unsigned LIMIT_CANDIDATE_LIST_SIZE = 1000;
//BETTER?:predefine size to avoid copy cost
TableType current_table;
TableIterator new_start; //keep to end() as default
@ -69,10 +74,10 @@ private:
bool* dealed_triple;
stack<int> mystack;
vector<int*>* result_list;
vector<unsigned*>* result_list;
vector<Satellite> satellites;
int* record;
int record_len;
unsigned* record;
unsigned record_len;
void init(BasicQuery* _basic_query);
void clear();
@ -81,7 +86,7 @@ private:
//judge which method should be used according to
//the size of candidates and structure of quering graph
int judge(int _smallest, int _biggest);
int judge(unsigned _smallest, unsigned _biggest);
//select the start point and search order
void select();
@ -108,20 +113,20 @@ private:
//functions for help
//copy/add to the end of current_table and set true
void add_new_to_results(TableIterator it, int id);
void add_new_to_results(TableIterator it, unsigned id);
//void set_results_old(list<bool>::iterator it);
int choose_next_node(int id);
bool is_literal_var(int id);
bool is_literal_ele(int _id);
//bool is_literal_ele(int _id);
void copyToResult();
//BETTER?:change these params to members in class
void acquire_all_id_lists(IdLists& _id_lists, IdListsLen& _id_lists_len, IDList& _can_list, vector<int>& _edges, int _id, int _can_list_size);
void update_answer_list(IDList*& valid_ans_list, IDList& _can_list, int* id_list, int id_list_len, bool _is_literal);
bool join_two(vector< vector<int> >& _edges, IDList& _can_list, int _can_list_size, int _id, bool _is_literal);
//void acquire_all_id_lists(IdLists& _id_lists, IdListsLen& _id_lists_len, IDList& _can_list, vector<int>& _edges, int _id, unsigned _can_list_size);
void update_answer_list(IDList*& valid_ans_list, IDList& _can_list, unsigned* id_list, unsigned id_list_len, bool _is_literal);
bool join_two(vector< vector<int> >& _edges, IDList& _can_list, unsigned _can_list_size, int _id, bool _is_literal);
bool multi_join();
//NOTICE:this is only used to join a BasicQuery
@ -129,7 +134,7 @@ private:
public:
Join();
Join(KVstore* _kvstore, TNUM* _pre2num, int _limitID_predicate, int _limitID_literal);
Join(KVstore* _kvstore, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal);
//these functions can be called by Database
bool join_sparql(SPARQLquery& _sparql_query);
bool join_basic(BasicQuery* _basic_query);

View File

@ -18,7 +18,7 @@ Strategy::Strategy()
//this->prepare_handler();
}
Strategy::Strategy(KVstore* _kvstore, VSTree* _vstree, TNUM* _pre2num, int _limitID_predicate, int _limitID_literal)
Strategy::Strategy(KVstore* _kvstore, VSTree* _vstree, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal)
{
this->method = 0;
this->kvstore = _kvstore;
@ -58,7 +58,7 @@ Strategy::handle(SPARQLquery& _query, ResultFilter* _result_filter)
{
this->method = -1;
vector<int*>& result_list = (*iter)->getResultList();
vector<unsigned*>& result_list = (*iter)->getResultList();
//int select_var_num = (*iter)->getSelectVarNum();
//the num of vars needing to be joined, i.e. selectVarNum if only one triple
int varNum = (*iter)->getVarNum();
@ -157,7 +157,7 @@ Strategy::handle(SPARQLquery& _query, ResultFilter* _result_filter)
}
void
Strategy::handler0(BasicQuery* _bq, vector<int*>& _result_list, ResultFilter* _result_filter)
Strategy::handler0(BasicQuery* _bq, vector<unsigned*>& _result_list, ResultFilter* _result_filter)
{
//long before_filter = Util::get_cur_time();
cout << "this BasicQuery use query strategy 0" << endl;
@ -215,7 +215,7 @@ Strategy::handler0(BasicQuery* _bq, vector<int*>& _result_list, ResultFilter* _r
}
void
Strategy::handler1(BasicQuery* _bq, vector<int*>& _result_list)
Strategy::handler1(BasicQuery* _bq, vector<unsigned*>& _result_list)
{
long before_filter = Util::get_cur_time();
cout << "this BasicQuery use query strategy 1" << endl;
@ -223,14 +223,15 @@ Strategy::handler1(BasicQuery* _bq, vector<int*>& _result_list)
char edge_type = _bq->getEdgeType(0, 0);
int triple_id = _bq->getEdgeID(0, 0);
Triple triple = _bq->getTriple(triple_id);
int pre_id = _bq->getEdgePreID(0, 0);
int* id_list = NULL;
int id_list_len = 0;
TYPE_PREDICATE_ID pre_id = _bq->getEdgePreID(0, 0);
unsigned* id_list = NULL;
unsigned id_list_len = 0;
if (edge_type == Util::EDGE_OUT)
{
//cout<<"edge out!!!"<<endl;
int nid = (this->kvstore)->getIDByEntity(triple.object);
if (nid == -1)
TYPE_ENTITY_LITERAL_ID nid = (this->kvstore)->getIDByEntity(triple.object);
//if (nid == -1)
if (nid == INVALID_ENTITY_LITERAL_ID)
{
nid = (this->kvstore)->getIDByLiteral(triple.object);
}
@ -246,9 +247,9 @@ Strategy::handler1(BasicQuery* _bq, vector<int*>& _result_list)
cout << "after filter, used " << (after_filter - before_filter) << "ms" << endl;
_result_list.clear();
//cout<<"now to copy result to list"<<endl;
for (int i = 0; i < id_list_len; ++i)
for (unsigned i = 0; i < id_list_len; ++i)
{
int* record = new int[1]; //only this var is selected
unsigned* record = new unsigned[1]; //only this var is selected
record[0] = id_list[i];
//cout<<this->kvstore->getEntityByID(record[0])<<endl;
_result_list.push_back(record);
@ -260,20 +261,20 @@ Strategy::handler1(BasicQuery* _bq, vector<int*>& _result_list)
}
void
Strategy::handler2(BasicQuery* _bq, vector<int*>& _result_list)
Strategy::handler2(BasicQuery* _bq, vector<unsigned*>& _result_list)
{
long before_filter = Util::get_cur_time();
cout << "this BasicQuery use query strategy 2" << endl;
int triple_id = _bq->getEdgeID(0, 0);
Triple triple = _bq->getTriple(triple_id);
int pre_id = _bq->getEdgePreID(0, 0);
TYPE_PREDICATE_ID pre_id = _bq->getEdgePreID(0, 0);
//NOTICE:it is ok for var1 or var2 to be -1, i.e. not encoded
int var1_id = _bq->getIDByVarName(triple.subject);
int var2_id = _bq->getIDByVarName(triple.object);
int* id_list = NULL;
int id_list_len = 0;
unsigned* id_list = NULL;
unsigned id_list_len = 0;
if (var1_id == 0) //subject var selected
{
//use p2s directly
@ -291,9 +292,9 @@ Strategy::handler2(BasicQuery* _bq, vector<int*>& _result_list)
long after_filter = Util::get_cur_time();
cout << "after filter, used " << (after_filter - before_filter) << "ms" << endl;
_result_list.clear();
for (int i = 0; i < id_list_len; ++i)
for (unsigned i = 0; i < id_list_len; ++i)
{
int* record = new int[1]; //only one var
unsigned* record = new unsigned[1]; //only one var
record[0] = id_list[i];
_result_list.push_back(record);
}
@ -304,15 +305,15 @@ Strategy::handler2(BasicQuery* _bq, vector<int*>& _result_list)
}
void
Strategy::handler3(BasicQuery* _bq, vector<int*>& _result_list)
Strategy::handler3(BasicQuery* _bq, vector<unsigned*>& _result_list)
{
long before_filter = Util::get_cur_time();
cout << "this BasicQuery use query strategy 3" << endl;
int triple_id = _bq->getEdgeID(0, 0);
Triple triple = _bq->getTriple(triple_id);
int pre_id = _bq->getEdgePreID(0, 0);
int* id_list = NULL;
int id_list_len = 0;
TYPE_PREDICATE_ID pre_id = _bq->getEdgePreID(0, 0);
unsigned* id_list = NULL;
unsigned id_list_len = 0;
_result_list.clear();
this->kvstore->getsubIDobjIDlistBypreID(pre_id, id_list, id_list_len);
@ -328,9 +329,9 @@ Strategy::handler3(BasicQuery* _bq, vector<int*>& _result_list)
long after_filter = Util::get_cur_time();
cout << "after filter, used " << (after_filter - before_filter) << "ms" << endl;
for (int i = 0; i < id_list_len; i += 2)
for (unsigned i = 0; i < id_list_len; i += 2)
{
int* record = new int[2]; //2 vars and selected
unsigned* record = new unsigned[2]; //2 vars and selected
record[var1_id] = id_list[i];
record[var2_id] = id_list[i + 1];
_result_list.push_back(record);
@ -343,7 +344,7 @@ Strategy::handler3(BasicQuery* _bq, vector<int*>& _result_list)
}
void
Strategy::handler4(BasicQuery* _bq, vector<int*>& _result_list)
Strategy::handler4(BasicQuery* _bq, vector<unsigned*>& _result_list)
{
cout<<"Special Case: consider pre var in this triple"<<endl;
int varNum = _bq->getVarNum();
@ -354,8 +355,9 @@ Strategy::handler4(BasicQuery* _bq, vector<int*>& _result_list)
int selected_var_num = _bq->getSelectVarNum();
Triple triple = _bq->getTriple(0);
int pvpos = _bq->getSelectedPreVarPosition(triple.predicate);
int* id_list = NULL;
int id_list_len = 0;
unsigned* id_list = NULL;
unsigned id_list_len = 0;
_result_list.clear();
//cout<<"total num: "<<total_num <<endl;
@ -369,9 +371,9 @@ Strategy::handler4(BasicQuery* _bq, vector<int*>& _result_list)
cout<<"predicate: "<<triple.predicate<<" "<<pvpos<<endl;
//very special case, to find all triples, select ?s (?p) ?o where { ?s ?p ?o . }
//filter and join is too costly, should enum all predicates and use p2so
for(int i = 0; i < this->limitID_predicate; ++i)
for(TYPE_PREDICATE_ID i = 0; i < this->limitID_predicate; ++i)
{
int pid = i;
TYPE_PREDICATE_ID pid = i;
this->kvstore->getsubIDobjIDlistBypreID(pid, id_list, id_list_len);
int rsize = selected_var_num;
if(selected_pre_var_num == 1)
@ -380,9 +382,9 @@ Strategy::handler4(BasicQuery* _bq, vector<int*>& _result_list)
}
//always place s/o before p in result list
for (int j = 0; j < id_list_len; j += 2)
for (unsigned j = 0; j < id_list_len; j += 2)
{
int* record = new int[rsize];
unsigned* record = new unsigned[rsize];
//check the s/o var if selected, need to ensure the placement order
if(ovpos >= 0)
{
@ -409,14 +411,15 @@ Strategy::handler4(BasicQuery* _bq, vector<int*>& _result_list)
int vpos = -1;
if(triple.subject[0] != '?') //constant
{
int sid = (this->kvstore)->getIDByEntity(triple.subject);
TYPE_ENTITY_LITERAL_ID sid = (this->kvstore)->getIDByEntity(triple.subject);
this->kvstore->getpreIDobjIDlistBysubID(sid, id_list, id_list_len);
vpos = _bq->getSelectedVarPosition(triple.object);
}
else if(triple.object[0] != '?') //constant
{
int oid = (this->kvstore)->getIDByEntity(triple.object);
if (oid == -1)
TYPE_ENTITY_LITERAL_ID oid = (this->kvstore)->getIDByEntity(triple.object);
//if (oid == -1)
if (oid == INVALID_ENTITY_LITERAL_ID)
{
oid = (this->kvstore)->getIDByLiteral(triple.object);
}
@ -430,9 +433,9 @@ Strategy::handler4(BasicQuery* _bq, vector<int*>& _result_list)
rsize++;
}
//always place s/o before p in result list
for (int i = 0; i < id_list_len; i += 2)
for (unsigned i = 0; i < id_list_len; i += 2)
{
int* record = new int[rsize];
unsigned* record = new unsigned[rsize];
if(vpos >= 0)
{
record[vpos] = id_list[i + 1]; //for the s/o var
@ -448,8 +451,8 @@ Strategy::handler4(BasicQuery* _bq, vector<int*>& _result_list)
{
cout<<"Special Case 3"<<endl;
//just use so2p
int sid = (this->kvstore)->getIDByEntity(triple.subject);
int oid = (this->kvstore)->getIDByEntity(triple.object);
unsigned sid = (this->kvstore)->getIDByEntity(triple.subject);
unsigned oid = (this->kvstore)->getIDByEntity(triple.object);
if (oid == -1)
{
oid = (this->kvstore)->getIDByLiteral(triple.object);
@ -457,9 +460,9 @@ Strategy::handler4(BasicQuery* _bq, vector<int*>& _result_list)
this->kvstore->getpreIDlistBysubIDobjID(sid, oid, id_list, id_list_len);
//copy to result list
for (int i = 0; i < id_list_len; ++i)
for (unsigned i = 0; i < id_list_len; ++i)
{
int* record = new int[1];
unsigned* record = new unsigned[1];
record[0] = id_list[i];
_result_list.push_back(record);
}
@ -471,38 +474,42 @@ Strategy::handler4(BasicQuery* _bq, vector<int*>& _result_list)
//TODO:if any constants in a query are not found in kvstore, then this BGP should end to speed up the processing
void
Strategy::handler5(BasicQuery* _bq, vector<int*>& _result_list)
Strategy::handler5(BasicQuery* _bq, vector<unsigned*>& _result_list)
{
cout<<"Special Case: consider constant triple"<<endl;
Triple triple = _bq->getTriple(0);
_result_list.clear();
int subid = this->kvstore->getIDByEntity(triple.subject);
if(subid == -1) //not found
TYPE_ENTITY_LITERAL_ID subid = this->kvstore->getIDByEntity(triple.subject);
//if(subid == -1) //not found
if(subid == INVALID_ENTITY_LITERAL_ID) //not found
{
return;
}
int preid = this->kvstore->getIDByPredicate(triple.predicate);
if(preid == -1) //not found
TYPE_PREDICATE_ID preid = this->kvstore->getIDByPredicate(triple.predicate);
//if(preid == -1) //not found
if(preid == INVALID_PREDICATE_ID) //not found
{
return;
}
int objid = this->kvstore->getIDByEntity(triple.object);
if(objid == -1)
TYPE_ENTITY_LITERAL_ID objid = this->kvstore->getIDByEntity(triple.object);
//if(objid == -1)
if(objid == INVALID_ENTITY_LITERAL_ID)
{
objid = this->kvstore->getIDByLiteral(triple.object);
}
if(objid == -1)
//if(objid == -1)
if(objid == INVALID_ENTITY_LITERAL_ID)
{
return;
}
int* id_list = NULL;
int id_list_len = 0;
unsigned* id_list = NULL;
unsigned id_list_len = 0;
(this->kvstore)->getobjIDlistBysubIDpreID(subid, preid, id_list, id_list_len);
if (Util::bsearch_int_uporder(objid, id_list, id_list_len) != -1)
if (Util::bsearch_int_uporder(objid, id_list, id_list_len) != INVALID)
{
int* record = new int[3];
unsigned* record = new unsigned[3];
record[0] = subid;
record[1] = preid;
record[2] = objid;

View File

@ -23,7 +23,7 @@ class Strategy
{
public:
Strategy();
Strategy(KVstore*, VSTree*, TNUM*, int, int);
Strategy(KVstore*, VSTree*, TYPE_TRIPLE_NUM*, TYPE_PREDICATE_ID, TYPE_ENTITY_LITERAL_ID);
~Strategy();
//select efficient strategy to do the sparql query
bool handle(SPARQLquery&, ResultFilter* _result_filter = NULL);
@ -32,22 +32,25 @@ private:
int method;
KVstore* kvstore;
VSTree* vstree;
TNUM* pre2num;
int limitID_predicate;
int limitID_literal;
TYPE_TRIPLE_NUM* pre2num;
TYPE_PREDICATE_ID limitID_predicate;
TYPE_ENTITY_LITERAL_ID limitID_literal;
void handler0(BasicQuery*, vector<int*>&, ResultFilter* _result_filter = NULL);
void handler1(BasicQuery*, vector<int*>&);
void handler2(BasicQuery*, vector<int*>&);
void handler3(BasicQuery*, vector<int*>&);
void handler4(BasicQuery*, vector<int*>&);
void handler5(BasicQuery*, vector<int*>&);
//NOTICE: even the ID type is int, it is no problem and no waste that we use unsigned in answer
//(because -1, -2 or other invalid IDs can not be in answer)
void handler0(BasicQuery*, vector<unsigned*>&, ResultFilter* _result_filter = NULL);
void handler1(BasicQuery*, vector<unsigned*>&);
void handler2(BasicQuery*, vector<unsigned*>&);
void handler3(BasicQuery*, vector<unsigned*>&);
void handler4(BasicQuery*, vector<unsigned*>&);
void handler5(BasicQuery*, vector<unsigned*>&);
//QueryHandler *dispatch;
//void prepare_handler();
};
//function pointer array
static const unsigned QUERY_HANDLER_NUM = 4;
typedef void (Strategy::*QueryHandler[QUERY_HANDLER_NUM]) (BasicQuery*, vector<int*>&);
typedef void (Strategy::*QueryHandler[QUERY_HANDLER_NUM]) (BasicQuery*, vector<unsigned*>&);
//QueryHandler dispatch;
#endif //_DATABASE_STRATEGY_H

View File

@ -20,7 +20,7 @@ ISTree::ISTree()
TSM = NULL;
storepath = "";
filename = "";
transfer_size[0] = transfer_size[1] = transfer_size[2] = 0;
//transfer_size[0] = transfer_size[1] = transfer_size[2] = 0;
this->stream = NULL;
this->request = 0;
}
@ -37,10 +37,10 @@ ISTree::ISTree(string _storepath, string _filename, string _mode, unsigned long
this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail);
else
this->root = NULL;
this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE));
this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE));
this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE));
this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M
//this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE));
//this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE));
//this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE));
//this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M
this->stream = NULL;
this->request = 0;
}
@ -51,30 +51,30 @@ ISTree::getFilePath()
return storepath + "/" + filename;
}
void //WARN: not check _str and _len
ISTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index)
{
if (_index > 2)
return;
/*
if(_str == NULL || _len == 0)
{
printf("error in CopyToTransfer: empty string\n");
return;
}
*/
//unsigned length = _bstr->getLen();
unsigned length = _len;
if (length + 1 > this->transfer_size[_index])
{
transfer[_index].release();
transfer[_index].setStr((char*)malloc(length + 1));
this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0
}
memcpy(this->transfer[_index].getStr(), _str, length);
this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore
this->transfer[_index].setLen(length);
}
//void //WARN: not check _str and _len
//ISTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index)
//{
//if (_index > 2)
//return;
//[>
//if(_str == NULL || _len == 0)
//{
//printf("error in CopyToTransfer: empty string\n");
//return;
//}
//*/
////unsigned length = _bstr->getLen();
//unsigned length = _len;
//if (length + 1 > this->transfer_size[_index])
//{
//transfer[_index].release();
//transfer[_index].setStr((char*)malloc(length + 1));
//this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0
//}
//memcpy(this->transfer[_index].getStr(), _str, length);
//this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore
//this->transfer[_index].setLen(length);
//}
unsigned
ISTree::getHeight() const
@ -104,41 +104,46 @@ ISTree::prepare(ISNode* _np)
}
bool
ISTree::search(int _key, char*& _str, int& _len)
ISTree::search(unsigned _key, char*& _str, unsigned& _len)
{
if (_key < 0)
{
printf("error in ISTree-search: empty string\n");
return false;
}
//DEBUG
//if (_key < 0)
//{
//printf("error in ISTree-search: empty string\n");
//return false;
//}
this->request = 0;
int store;
ISNode* ret = this->find(_key, &store, false);
//cout<<"to find the position: "<<store<<endl;
if (ret == NULL || store == -1 || _key != ret->getKey(store)) //tree is empty or not found
{
return false;
}
const Bstr* val = ret->getValue(store);
this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request
_str = this->transfer[0].getStr();
_len = this->transfer[0].getLen();
//this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request
//_str = this->transfer[0].getStr();
//_len = this->transfer[0].getLen();
_str = val->getStr();
_len = val->getLen();
this->TSM->request(request);
return true;
}
bool
ISTree::insert(int _key, const char* _str, unsigned _len)
ISTree::insert(unsigned _key, char* _str, unsigned _len)
{
if (_key < 0)
{
printf("error in ISTree-insert: empty string\n");
return false;
}
//if (_key < 0)
//{
//printf("error in ISTree-insert: empty string\n");
//return false;
//}
this->CopyToTransfer(_str, _len, 2);
const Bstr* val = &(this->transfer[2]);
//this->CopyToTransfer(_str, _len, 2);
//const Bstr* val = &(this->transfer[2]);
this->request = 0;
ISNode* ret;
if (this->root == NULL) //tree is empty
@ -222,29 +227,33 @@ ISTree::insert(int _key, const char* _str, unsigned _len)
else
{
p->addKey(_key, i);
p->addValue(val, i, true);
p->addValue(_str, _len, i, true);
p->addNum();
request += val->getLen();
request += _len;
p->setDirty();
this->TSM->updateHeap(p, p->getRank(), true);
//_key->clear();
//_value->clear();
}
this->TSM->request(request);
//if(_key == 0)
//{
//cout<<"the 0th element is: "<<_str[0]<<endl;
//}
return !ifexist; //QUERY(which case:return false)
}
bool
ISTree::modify(int _key, const char* _str, unsigned _len)
ISTree::modify(unsigned _key, char* _str, unsigned _len)
{
if (_key < 0)
{
printf("error in ISTree-modify: empty string\n");
return false;
}
//if (_key < 0)
//{
//printf("error in ISTree-modify: empty string\n");
//return false;
//}
this->CopyToTransfer(_str, _len, 2); //not check value
const Bstr* val = &(this->transfer[2]);
//this->CopyToTransfer(_str, _len, 2); //not check value
//const Bstr* val = &(this->transfer[2]);
this->request = 0;
int store;
ISNode* ret = this->find(_key, &store, true);
@ -255,22 +264,23 @@ ISTree::modify(int _key, const char* _str, unsigned _len)
}
//cout<<"ISTree::modify() - key is found, now to remove"<<endl;
unsigned len = ret->getValue(store)->getLen();
ret->setValue(val, store, true);
ret->setValue(_str, _len, store, true);
//cout<<"value reset"<<endl;
//cout<<"newlen: "<<val->getLen()<<" oldlen: "<<len<<endl;
//request += (val->getLen() - len);
this->request = val->getLen();
this->request = _len;
this->request -= len;
ret->setDirty();
//cout<<"to request"<<endl;
this->TSM->request(request);
//cout<<"memory requested"<<endl;
return true;
}
//this function is useful for search and modify, and range-query
ISNode* //return the first key's position that >= *_key
ISTree::find(int _key, int* _store, bool ifmodify)
ISTree::find(unsigned _key, int* _store, bool ifmodify)
{ //to assign value for this->bstr, function shouldn't be const!
if (this->root == NULL)
return NULL; //ISTree Is Empty
@ -300,6 +310,7 @@ ISTree::find(int _key, int* _store, bool ifmodify)
*_store = -1; //Not Found
else
*_store = i;
return p;
}
@ -311,13 +322,14 @@ ISTree::find(unsigned _len, const char* _str, int* store) const
*/
bool
ISTree::remove(int _key)
ISTree::remove(unsigned _key)
{
if (_key < 0)
{
printf("error in ISTree-remove: empty string\n");
return false;
}
//DEBUG
//if (_key < 0)
//{
//printf("error in ISTree-remove: empty string\n");
//return false;
//}
this->request = 0;
ISNode* ret;
@ -443,7 +455,7 @@ ISTree::resetStream()
}
bool //special case: not exist, one-edge-case
ISTree::range_query(int _key1, int _key2)
ISTree::range_query(unsigned _key1, unsigned _key2)
{ //the range is: *_key1 <= x < *_key2
//if(_key1 <0 && _key2 <0)
//return false;
@ -516,7 +528,7 @@ ISTree::range_query(int _key1, int _key2)
delete this->stream;
this->stream = NULL;
}
vector<int> keys;
vector<unsigned> keys;
vector<bool> desc;
this->stream = new Stream(keys, desc, ansNum, 1, false);
@ -570,6 +582,7 @@ ISTree::release(ISNode* _np) const
return;
}
int cnt = _np->getNum();
//WARN: not chnage cnt to int type here(otherwise endless loop)
for (; cnt >= 0; --cnt)
release(_np->getChild(cnt));
delete _np;
@ -655,3 +668,4 @@ ISTree::print(string s)
else;
#endif
}

View File

@ -3,7 +3,7 @@
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:44
# Description: struct and interface of the B+ tree
# Description: ID2string, including id2entity, id2literal and id2predicate
=============================================================================*/
#ifndef _KVSTORE_ISTREE_ISTREE_H
@ -19,7 +19,7 @@
class ISTree
{
protected:
unsigned int height; //0 indicates an empty tree
unsigned height; //0 indicates an empty tree
ISNode* root;
ISNode* leaves_head; //the head of LeafNode-list
ISNode* leaves_tail; //the tail of LeafNode-list
@ -36,8 +36,8 @@ protected:
//so lock is a must. Add lock to transfer is better than to add
//lock to every key/value. However, modify requires a lock for a
//key/value, and multiple search for different keys are ok!!!
Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char*
unsigned transfer_size[3];
//Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char*
//unsigned transfer_size[3];
//tree's operations should be atom(if read nodes)
//sum the request and send to ISStorage at last
@ -49,25 +49,25 @@ protected:
std::string filename; //ok for user to change
/* some private functions */
std::string getFilePath(); //in UNIX system
void CopyToTransfer(const char* _str, unsigned _len, unsigned _index);
//void CopyToTransfer(const char* _str, unsigned _len, unsigned _index);
void release(ISNode* _np) const;
public:
ISTree(); //always need to initial transfer
ISTree(std::string _storepath, std::string _filename, std::string _mode, unsigned long long _buffer_size);
unsigned int getHeight() const;
unsigned getHeight() const;
void setHeight(unsigned _h);
ISNode* getRoot() const;
//void setRoot(Node* _root);
//insert, search, remove, set
bool search(int _key, char*& _str, int& _len);
bool insert(int _key, const char* _str, unsigned _len);
bool modify(int _key, const char* _str, unsigned _len);
ISNode* find(int _key, int* store, bool ifmodify);
bool remove(int _key);
bool search(unsigned _key, char*& _str, unsigned& _len);
bool insert(unsigned _key, char* _str, unsigned _len);
bool modify(unsigned _key, char* _str, unsigned _len);
ISNode* find(unsigned _key, int* store, bool ifmodify);
bool remove(unsigned _key);
const Bstr* getRangeValue();
void resetStream();
bool range_query(int _key1, int _key2);
bool range_query(unsigned _key1, unsigned _key2);
bool save();
~ISTree();
void print(std::string s); //DEBUG(print the tree)
@ -77,3 +77,4 @@ public:
//After saved, it's ok to continue operations on tree!
#endif

View File

@ -39,3 +39,4 @@ public:
};
#endif

View File

@ -131,7 +131,7 @@ ISIntlNode::split(ISNode* _father, int _index)
p->addNum();
}
p->addChild(this->childs[i], k);
int tp = this->keys[MIN_KEY_NUM];
unsigned tp = this->keys[MIN_KEY_NUM];
this->setNum(MIN_KEY_NUM);
_father->addKey(tp, _index);
_father->addChild(p, _index + 1); //DEBUG(check the index)
@ -177,7 +177,7 @@ ISIntlNode::coalesce(ISNode* _father, int _index)
}
}
int tmp = 0;
unsigned tmp = 0;
switch (ccase)
{
case 1: //union right to this

View File

@ -46,3 +46,4 @@ public:
};
#endif

View File

@ -82,6 +82,7 @@ ISLeafNode::getValue(int _index) const
int num = this->getNum();
if (_index < 0 || _index >= num)
{
//cout<<"null in getValue: "<<_index<<endl;
//print(string("error in getValue: Invalid index ") + Util::int2string(_index));
return NULL;
}
@ -125,13 +126,53 @@ ISLeafNode::addValue(const Bstr* _value, int _index, bool ifcopy)
return true;
}
bool
ISLeafNode::setValue(char* _str, unsigned _len, int _index, bool ifcopy)
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
//print(string("error in setValue: Invalid index ") + Util::int2string(_index));
return false;
}
this->values[_index].release(); //NOTICE: only used in modify
this->values[_index].setStr(_str);
this->values[_index].setLen(_len);
return true;
}
bool
ISLeafNode::addValue(char* _str, unsigned _len, int _index, bool ifcopy)
{
int num = this->getNum();
//cout<<"addValue: "<<num<<" "<<_index<<endl;
if (_index < 0 || _index > num)
{
//print(string("error in addValue: Invalid index ") + Util::int2string(_index));
//cout<<"error in addValue: "<<_index<<" "<<num<<endl;
return false;
}
int i;
for (i = num - 1; i >= _index; --i)
this->values[i + 1] = this->values[i];
this->values[_index].setStr(_str);
this->values[_index].setLen(_len);
//cout<<"show: "<<this->values[_index].getLen()<<" "<<this->values[_index].getStr()[0]<<endl;
return true;
}
bool
ISLeafNode::subValue(int _index, bool ifdel)
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
print(string("error in subValue: Invalid index ") + Util::int2string(_index));
//print(string("error in subValue: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
@ -181,7 +222,7 @@ ISLeafNode::split(ISNode* _father, int _index)
p->addValue(this->values + i, k);
p->addNum();
}
int tp = this->keys[MIN_KEY_NUM];
unsigned tp = this->keys[MIN_KEY_NUM];
this->setNum(MIN_KEY_NUM);
_father->addKey(tp, _index);
_father->addChild(p, _index + 1); //DEBUG(check the index)
@ -226,7 +267,7 @@ ISLeafNode::coalesce(ISNode* _father, int _index)
}
}
int tmp = 0;
unsigned tmp = 0;
switch (ccase)
{
case 1: //union right to this
@ -283,8 +324,8 @@ ISLeafNode::coalesce(ISNode* _father, int _index)
p->subNum();
break;
default:
print("error in coalesce: Invalid case!");
//printf("error in coalesce: Invalid case!");
//print("error in coalesce: Invalid case!");
cout<<"error in coalesce: Invalid case!"<<endl;
}
_father->setDirty();
p->setDirty();
@ -374,3 +415,4 @@ ISLeafNode::print(string s)
else;
#endif
}

View File

@ -27,6 +27,7 @@ public:
void Normal();
ISNode* getPrev() const;
ISNode* getNext() const;
const Bstr* getValue(int _index) const;
bool setValue(const Bstr* _value, int _index, bool ifcopy = false);
bool addValue(const Bstr* _value, int _index, bool ifcopy = false);
@ -34,6 +35,10 @@ public:
void setPrev(ISNode* _prev);
void setNext(ISNode* _next);
unsigned getSize() const;
bool setValue(char* _str, unsigned _len, int _index, bool ifcopy = false);
bool addValue(char* _str, unsigned _len, int _index, bool ifcopy = false);
ISNode* split(ISNode* _father, int _index);
ISNode* coalesce(ISNode* _father, int _index);
void release();
@ -48,3 +53,4 @@ public:
//BETTER: prev isn't a must, and reverse-range can be achieved using recursive-next
#endif

View File

@ -203,7 +203,7 @@ ISNode::setFlag(unsigned _flag)
this->flag = _flag;
}
int
unsigned
ISNode::getKey(int _index) const
{
int num = this->getNum();
@ -211,14 +211,15 @@ ISNode::getKey(int _index) const
{
//print(string("error in getKey: Invalid index ") + Util::int2string(_index));
printf("error in getKey: Invalid index\n");
return -1;
//return -1;
return INVALID;
}
else
return this->keys[_index];
}
bool
ISNode::setKey(int _key, int _index)
ISNode::setKey(unsigned _key, int _index)
{
int num = this->getNum();
if (_index < 0 || _index >= num)
@ -231,7 +232,7 @@ ISNode::setKey(int _key, int _index)
}
bool
ISNode::addKey(int _key, int _index)
ISNode::addKey(unsigned _key, int _index)
{
int num = this->getNum();
if (_index < 0 || _index > num)
@ -264,7 +265,7 @@ ISNode::subKey(int _index)
}
int
ISNode::searchKey_less(int _key) const
ISNode::searchKey_less(unsigned _key) const
{
int num = this->getNum();
//for(i = 0; i < num; ++i)
@ -290,7 +291,7 @@ ISNode::searchKey_less(int _key) const
}
int
ISNode::searchKey_equal(int _key) const
ISNode::searchKey_equal(unsigned _key) const
{
int num = this->getNum();
//for(i = 0; i < num; ++i)
@ -305,7 +306,7 @@ ISNode::searchKey_equal(int _key) const
}
int
ISNode::searchKey_lessEqual(int _key) const
ISNode::searchKey_lessEqual(unsigned _key) const
{
//int num = this->getNum();
//for(i = 0; i < num; ++i)
@ -318,3 +319,4 @@ ISNode::searchKey_lessEqual(int _key) const
else
return ret;
}

View File

@ -63,15 +63,15 @@ public:
void setStore(unsigned _store);
unsigned getFlag() const;
void setFlag(unsigned _flag);
int getKey(int _index) const; //need to check the index
bool setKey(int _key, int _index);
bool addKey(int _key, int _index);
unsigned getKey(int _index) const; //need to check the index
bool setKey(unsigned _key, int _index);
bool addKey(unsigned _key, int _index);
bool subKey(int _index);
//several binary key search utilities
int searchKey_less(int _key) const;
int searchKey_equal(int _key) const;
int searchKey_lessEqual(int _key) const;
int searchKey_less(unsigned _key) const;
int searchKey_equal(unsigned _key) const;
int searchKey_lessEqual(unsigned _key) const;
//virtual functions: polymorphic
virtual ISNode* getChild(int _index) const { return NULL; };
@ -80,12 +80,18 @@ public:
virtual bool subChild(int _index) { return true; };
virtual ISNode* getPrev() const { return NULL; };
virtual ISNode* getNext() const { return NULL; };
virtual const Bstr* getValue(int _index) const { return NULL; };
virtual bool setValue(const Bstr* _value, int _index, bool ifcopy = false) { return true; };
virtual bool addValue(const Bstr* _value, int _index, bool ifcopy = false) { return true; };
virtual bool subValue(int _index, bool ifdel = false) { return true; };
virtual void setPrev(ISNode* _prev) {};
virtual void setNext(ISNode* _next) {};
virtual bool setValue(char* _str, unsigned _len, int _index, bool ifcopy = false) { return true; };
virtual bool addValue(char* _str, unsigned _len, int _index, bool ifcopy = false) { return true; };
//pure virtual function
virtual void Virtual() = 0;
virtual void Normal() = 0;
virtual unsigned getSize() const = 0; //return all memory owned
@ -111,3 +117,4 @@ public:
*/
#endif

View File

@ -69,7 +69,7 @@ ISStorage::ISStorage(string& _filepath, string& _mode, unsigned* _height, unsign
else //_mode == "open"
{
//read basic information
int rootnum;
unsigned rootnum;
char c;
fread(this->treeheight, sizeof(unsigned), 1, this->treefp);
fread(&rootnum, sizeof(unsigned), 1, this->treefp);
@ -274,7 +274,8 @@ ISStorage::readNode(ISNode* _np, long long* _request)
fseek(treefp, 4 * (num + 1), SEEK_CUR);
//to read all keys
int tmp = -1;
unsigned tmp = INVALID;
//int tmp = -1;
for (i = 0; i < num; ++i)
{
fread(&tmp, sizeof(int), 1, treefp);
@ -384,7 +385,8 @@ ISStorage::writeNode(ISNode* _np)
}
}
int tmp = 0;
//int tmp = 0;
unsigned tmp = INVALID;
//to write all keys
for (i = 0; i < num; ++i)
{
@ -397,7 +399,13 @@ ISStorage::writeNode(ISNode* _np)
{
//to write all values
for (i = 0; i < num; ++i)
{
this->writeBstr(_np->getValue(i), &blocknum, SpecialBlock);
//if(_np->getKey(0) == 0)
//{
//cout<<"the 0th value: "<<_np->getValue(i)->getStr()[0]<<endl;
//}
}
}
fseek(treefp, Address(blocknum), SEEK_SET);
if (SpecialBlock)
@ -408,6 +416,7 @@ ISStorage::writeNode(ISNode* _np)
//NOTICE:we may store the dirty bit into the tree file, but that is ok
//Each time we read the tree file to construct a node, we always set the drity bit to 0
_np->delDirty();
return true;
}
@ -419,7 +428,8 @@ ISStorage::readBstr(Bstr* _bp, unsigned* _next)
fread(&len, sizeof(unsigned), 1, this->treefp);
this->ReadAlign(_next);
//this->request(len);
char* s = (char*)malloc(len);
//char* s = (char*)malloc(len);
char* s = new char[len];
_bp->setLen(len);
for (i = 0; i + 4 < len; i += 4)
{
@ -437,6 +447,7 @@ ISStorage::readBstr(Bstr* _bp, unsigned* _next)
fseek(treefp, j, SEEK_CUR);
this->ReadAlign(_next);
_bp->setStr(s);
return true;
}
@ -675,3 +686,4 @@ ISStorage::print(string s)
fputs("\n", Util::debug_kvstore);
#endif
}

View File

@ -43,6 +43,8 @@ private:
//Because the bstr' size is controlled, so is the node.
unsigned long long freemem; //free memory to use, non-negative
//unsigned long long time; //QUERY(achieving an old-swap startegy?)
//QUERY: should this be long long? (otherwise will be different in 32-bit and 64-bit machine)
long Address(unsigned _blocknum) const;
unsigned Blocknum(long address) const;
unsigned AllocBlock();
@ -70,3 +72,4 @@ public:
};
#endif

702
KVstore/IVTree/IVTree.cpp Normal file
View File

@ -0,0 +1,702 @@
/*=============================================================================
# Filename: IVTree.cpp
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:45
# Description: achieve functions in IVTree.h
=============================================================================*/
#include "IVTree.h"
using namespace std;
IVTree::IVTree()
{
height = 0;
mode = "";
root = NULL;
leaves_head = NULL;
leaves_tail = NULL;
TSM = NULL;
storepath = "";
filename = "";
//transfer_size[0] = transfer_size[1] = transfer_size[2] = 0;
//transfer_size = 0;
this->stream = NULL;
this->request = 0;
this->value_list = NULL;
}
IVTree::IVTree(string _storepath, string _filename, string _mode, unsigned long long _buffer_size)
{
storepath = _storepath;
filename = _filename;
this->height = 0;
this->mode = string(_mode);
string filepath = this->getFilePath();
string vlist_file = filepath + "_vlist";
this->value_list = new VList(vlist_file, this->mode, 1<<30);
TSM = new IVStorage(filepath, this->mode, &this->height, _buffer_size, this->value_list);
if (this->mode == "open")
this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail);
else
this->root = NULL;
//this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE));
//this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE));
//this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE));
//this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M
//this->transfer.setStr((char*)malloc(Util::TRANSFER_SIZE));
this->stream = NULL;
this->request = 0;
}
string
IVTree::getFilePath()
{
return storepath + "/" + filename;
}
//void //WARN: not check _str and _len
//IVTree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index)
//{
//if (_index > 2)
//return;
//[>
//if(_str == NULL || _len == 0)
//{
//printf("error in CopyToTransfer: empty string\n");
//return;
//}
//*/
////unsigned length = _bstr->getLen();
//unsigned length = _len;
//if (length + 1 > this->transfer_size[_index])
//{
//transfer[_index].release();
//transfer[_index].setStr((char*)malloc(length + 1));
//this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0
//}
//memcpy(this->transfer[_index].getStr(), _str, length);
//this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore
//this->transfer[_index].setLen(length);
//}
unsigned
IVTree::getHeight() const
{
return this->height;
}
void
IVTree::setHeight(unsigned _h)
{
this->height = _h;
}
IVNode*
IVTree::getRoot() const
{
return this->root;
}
void
IVTree::prepare(IVNode* _np)
{
//this->request = 0;
bool flag = _np->inMem();
if (!flag)
{
this->TSM->readNode(_np, &request); //readNode deal with request
}
}
bool
IVTree::search(unsigned _key, char*& _str, unsigned& _len)
{
//if (_key < 0)
//{
//printf("error in IVTree-search: empty string\n");
//return false;
//}
this->request = 0;
int store;
IVNode* ret = this->find(_key, &store, false);
if (ret == NULL || store == -1 || _key != ret->getKey(store)) //tree is empty or not found
{
return false;
}
ret->getValue(this->value_list, store, _str, _len);
//const Bstr* val = ret->getValue(store);
//this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request
//_str = this->transfer[0].getStr();
//_len = this->transfer[0].getLen();
this->TSM->request(request);
return true;
}
bool
IVTree::insert(unsigned _key, char* _str, unsigned _len)
{
//if (_key < 0)
//{
//printf("error in IVTree-insert: empty string\n");
//return false;
//}
//this->CopyToTransfer(_str, _len, 2);
//const Bstr* val = &(this->transfer[2]);
this->request = 0;
IVNode* ret;
if (this->root == NULL) //tree is empty
{
leaves_tail = leaves_head = root = new IVLeafNode;
request += IVNode::LEAF_SIZE;
this->height = 1;
root->setHeight(1); //add to heap later
}
//this->prepare(this->root); //root must be in-mem
if (root->getNum() == IVNode::MAX_KEY_NUM)
{
IVNode* father = new IVIntlNode;
request += IVNode::INTL_SIZE;
father->addChild(root, 0);
ret = root->split(father, 0);
if (ret->isLeaf() && ret->getNext() == NULL)
this->leaves_tail = ret;
if (ret->isLeaf())
request += IVNode::LEAF_SIZE;
else
request += IVNode::INTL_SIZE;
this->height++; //height rises only when root splits
//WARN: height area in Node: 4 bit!
father->setHeight(this->height); //add to heap later
this->TSM->updateHeap(ret, ret->getRank(), false);
this->root = father;
}
IVNode* p = this->root;
IVNode* q;
int i;
while (!p->isLeaf())
{
//j = p->getNum();
//for(i = 0; i < j; ++i)
//if(bstr < *(p->getKey(i)))
//break;
//NOTICE: using binary search is better here
i = p->searchKey_less(_key);
q = p->getChild(i);
this->prepare(q);
if (q->getNum() == IVNode::MAX_KEY_NUM)
{
ret = q->split(p, i);
if (ret->isLeaf() && ret->getNext() == NULL)
this->leaves_tail = ret;
if (ret->isLeaf())
request += IVNode::LEAF_SIZE;
else
request += IVNode::INTL_SIZE;
//BETTER: in loop may update multiple times
this->TSM->updateHeap(ret, ret->getRank(), false);
this->TSM->updateHeap(q, q->getRank(), true);
this->TSM->updateHeap(p, p->getRank(), true);
if (_key < p->getKey(i))
p = q;
else
p = ret;
}
else
{
p->setDirty();
this->TSM->updateHeap(p, p->getRank(), true);
p = q;
}
}
//j = p->getNum();
//for(i = 0; i < j; ++i)
//if(bstr < *(p->getKey(i)))
//break;
i = p->searchKey_less(_key);
//insert existing key is ok, but not inserted in
//however, the tree-shape may change due to possible split in former code
bool ifexist = false;
if (i > 0 && _key == p->getKey(i - 1))
ifexist = true;
else
{
p->addKey(_key, i);
p->addValue(this->value_list, i, _str, _len, true);
p->addNum();
//NOTICE: is this is a vlist, then it will be freed, and should not be included in the request memory
if(!VList::isLongList(_len))
{
request += _len;
}
//request += val->getLen();
p->setDirty();
this->TSM->updateHeap(p, p->getRank(), true);
//_key->clear();
//_value->clear();
}
this->TSM->request(request);
return !ifexist; //QUERY(which case:return false)
}
bool
IVTree::modify(unsigned _key, char* _str, unsigned _len)
{
//if (_key < 0)
//{
//printf("error in IVTree-modify: empty string\n");
//return false;
//}
//this->CopyToTransfer(_str, _len, 2); //not check value
//const Bstr* val = &(this->transfer[2]);
this->request = 0;
int store;
IVNode* ret = this->find(_key, &store, true);
if (ret == NULL || store == -1 || _key != ret->getKey(store)) //tree is empty or not found
{
cerr << "tree is empty or not found" << endl;
return false;
}
//cout<<"IVTree::modify() - key is found, now to remove"<<endl;
//NOTICE+DEBUG: if this value is a long list, then it is not saved in memory, here should return 0 in Bstr
unsigned len = ret->getValue(store)->getLen();
if(ret->getValue(store)->isBstrLongList())
{
len = 0;
}
ret->setValue(this->value_list, store, _str, _len, true);
//ret->setValue(val, store, true);
//cout<<"value reset"<<endl;
//cout<<"newlen: "<<val->getLen()<<" oldlen: "<<len<<endl;
//request += (val->getLen() - len);
if(!VList::isLongList(_len))
{
this->request += _len;
}
//this->request = val->getLen();
this->request -= len;
ret->setDirty();
//cout<<"to request"<<endl;
this->TSM->request(request);
//cout<<"memory requested"<<endl;
return true;
}
//this function is useful for search and modify, and range-query
IVNode* //return the first key's position that >= *_key
IVTree::find(unsigned _key, int* _store, bool ifmodify)
{ //to assign value for this->bstr, function shouldn't be const!
if (this->root == NULL)
return NULL; //IVTree Is Empty
IVNode* p = root;
int i, j;
while (!p->isLeaf())
{
if (ifmodify)
p->setDirty();
//j = p->getNum();
//for(i = 0; i < j; ++i) //BETTER(Binary-Search)
//if(bstr < *(p->getKey(i)))
//break;
i = p->searchKey_less(_key);
p = p->getChild(i);
this->prepare(p);
}
j = p->getNum();
//for(i = 0; i < j; ++i)
//if(bstr <= *(p->getKey(i)))
//break;
i = p->searchKey_lessEqual(_key);
if (i == j)
*_store = -1; //Not Found
else
*_store = i;
return p;
}
/*
Node*
IVTree::find(unsigned _len, const char* _str, int* store) const
{
}
*/
bool
IVTree::remove(unsigned _key)
{
//if (_key < 0)
//{
//printf("error in IVTree-remove: empty string\n");
//return false;
//}
this->request = 0;
IVNode* ret;
if (this->root == NULL) //tree is empty
return false;
IVNode* p = this->root;
IVNode* q;
int i, j;
while (!p->isLeaf())
{
j = p->getNum();
//for(i = 0; i < j; ++i)
//if(bstr < *(p->getKey(i)))
//break;
i = p->searchKey_less(_key);
q = p->getChild(i);
this->prepare(q);
if (q->getNum() < IVNode::MIN_CHILD_NUM) //==MIN_KEY_NUM
{
if (i > 0)
this->prepare(p->getChild(i - 1));
if (i < j)
this->prepare(p->getChild(i + 1));
ret = q->coalesce(p, i);
if (ret != NULL)
this->TSM->updateHeap(ret, 0, true);//non-sense node
this->TSM->updateHeap(q, q->getRank(), true);
if (q->isLeaf())
{
if (q->getPrev() == NULL)
this->leaves_head = q;
if (q->getNext() == NULL)
this->leaves_tail = q;
}
if (p->getNum() == 0) //root shrinks
{
//this->leaves_head = q;
this->root = q;
this->TSM->updateHeap(p, 0, true); //instead of delete p
this->height--;
}
}
else
p->setDirty();
this->TSM->updateHeap(p, p->getRank(), true);
p = q;
}
bool flag = false;
//j = p->getNum(); //LeafNode(maybe root)
//for(i = 0; i < j; ++i)
// if(bstr == *(p->getKey(i)))
// {
// request -= p->getKey(i)->getLen();
// request -= p->getValue(i)->getLen();
// p->subKey(i, true); //to release
// p->subValue(i, true); //to release
// p->subNum();
// if(p->getNum() == 0) //root leaf 0 key
// {
// this->root = NULL;
// this->leaves_head = NULL;
// this->leaves_tail = NULL;
// this->height = 0;
// this->TSM->updateHeap(p, 0, true); //instead of delete p
// }
// p->setDirty();
// flag = true;
// break;
// }
i = p->searchKey_equal(_key);
//WARN+NOTICE:here must check, because the key to remove maybe not exist
if (i != (int)p->getNum())
{
if(!p->getValue(i)->isBstrLongList())
{
request -= p->getValue(i)->getLen();
}
p->subKey(i); //to release
p->subValue(this->value_list, i, true); //to release
p->subNum();
if (p->getNum() == 0) //root leaf 0 key
{
this->root = NULL;
this->leaves_head = NULL;
this->leaves_tail = NULL;
this->height = 0;
this->TSM->updateHeap(p, 0, true); //instead of delete p
}
p->setDirty();
flag = true;
}
this->TSM->request(request);
return flag; //i == j, not found
}
const Bstr*
IVTree::getRangeValue()
{
if (this->stream == NULL)
{
fprintf(stderr, "IVTree::getRangeValue(): no results now!\n");
return NULL;
}
if (this->stream->isEnd())
{
fprintf(stderr, "IVTree::getRangeValue(): read till end now!\n");
return NULL;
}
//NOTICE:this is one record, and donot free the memory!
//NOTICE:Bstr[] but only one element, used as Bstr*
return this->stream->read();
}
void
IVTree::resetStream()
{
if (this->stream == NULL)
{
fprintf(stderr, "no results now!\n");
return;
}
this->stream->setEnd();
}
//TODO: change to using value list, getValue() maybe not get real long list
bool //special case: not exist, one-edge-case
IVTree::range_query(unsigned _key1, unsigned _key2)
{ //the range is: *_key1 <= x < *_key2
//if(_key1 <0 && _key2 <0)
//return false;
//ok to search one-edge, requiring only one be negative
//find and write value
int store1, store2;
IVNode *p1, *p2;
if (_key1 >= 0)
{
request = 0;
p1 = this->find(_key1, &store1, false);
if (p1 == NULL || store1 == -1)
return false; //no element
this->TSM->request(request);
}
else
{
p1 = this->leaves_head;
store1 = 0;
}
if (_key2 >= 0)
{ //QUERY: another strategy is to getnext and compare every time to tell end
request = 0;
p2 = this->find(_key2, &store2, false);
if (p2 == NULL)
return false;
else if (store2 == -1)
store2 = p2->getNum();
else if (store2 == 0)
{
p2 = p2->getPrev();
if (p2 == NULL)
return false; //no element
store2 = p2->getNum();
}
this->TSM->request(request);
}
else
{
p2 = this->leaves_tail;
store2 = p2->getNum();
}
IVNode* p = p1;
unsigned i, l, r;
//get the num of answers first, not need to prepare the node
unsigned ansNum = 0;
while (true)
{
//request = 0;
//this->prepare(p);
if (p == p1)
l = store1;
else
l = 0;
if (p == p2)
r = store2;
else
r = p->getNum();
ansNum += (r - l);
//this->TSM->request(request);
if (p != p2)
p = p->getNext();
else
break;
}
if (this->stream != NULL)
{
delete this->stream;
this->stream = NULL;
}
vector<unsigned> keys;
vector<bool> desc;
this->stream = new Stream(keys, desc, ansNum, 1, false);
p = p1;
while (1)
{
request = 0;
this->prepare(p);
if (p == p1)
l = store1;
else
l = 0;
if (p == p2)
r = store2;
else
r = p->getNum();
for (i = l; i < r; ++i)
{
//NOTICE:Bstr* in an array, used as Bstr[]
//DEBUG+TODO: if long list?? clean
this->stream->write(p->getValue(i));
}
this->TSM->request(request);
if (p != p2)
p = p->getNext();
else
break;
}
this->stream->setEnd();
return true;
}
bool
IVTree::save() //save the whole tree to disk
{
#ifdef DEBUG_KVSTORE
printf("now to save tree!\n");
#endif
if (TSM->writeTree(this->root))
return true;
else
return false;
}
void
IVTree::release(IVNode* _np) const
{
if (_np == NULL) return;
if (_np->isLeaf())
{
delete _np;
return;
}
int cnt = _np->getNum();
for (; cnt >= 0; --cnt)
release(_np->getChild(cnt));
delete _np;
}
IVTree::~IVTree()
{
delete this->value_list;
delete this->stream; //maybe NULL
delete TSM;
#ifdef DEBUG_KVSTORE
printf("already empty the buffer, now to delete all nodes in tree!\n");
#endif
//recursively delete each Node
release(root);
}
void
IVTree::print(string s)
{
#ifdef DEBUG_KVSTORE
fputs(Util::showtime().c_str(), Util::debug_kvstore);
fputs("Class IVTree\n", Util::debug_kvstore);
fputs("Message: ", Util::debug_kvstore);
fputs(s.c_str(), Util::debug_kvstore);
fputs("\n", Util::debug_kvstore);
fprintf(Util::debug_kvstore, "Height: %d\n", this->height);
if (s == "tree" || s == "TREE")
{
if (this->root == NULL)
{
fputs("Null IVTree\n", Util::debug_kvstore);
return;
}
IVNode** ns = new IVNode*[this->height];
int* ni = new int[this->height];
IVNode* np;
int i, pos = 0;
ns[pos] = this->root;
ni[pos] = this->root->getNum();
pos++;
while (pos > 0)
{
np = ns[pos - 1];
i = ni[pos - 1];
this->prepare(np);
if (np->isLeaf() || i < 0) //LeafNode or ready IntlNode
{ //child-num ranges: 0~num
if (s == "tree")
np->print("node");
else
np->print("NODE"); //print full node-information
pos--;
continue;
}
else
{
ns[pos] = np->getChild(i);
ni[pos - 1]--;
ni[pos] = ns[pos]->getNum();
pos++;
}
}
delete[] ns;
delete[] ni;
}
else if (s == "LEAVES" || s == "leaves")
{
IVNode* np;
for (np = this->leaves_head; np != NULL; np = np->getNext())
{
this->prepare(np);
if (s == "leaves")
np->print("node");
else
np->print("NODE");
}
}
else if (s == "check tree")
{
//check the tree, if satisfy B+ definition
//TODO
}
else;
#endif
}

98
KVstore/IVTree/IVTree.h Normal file
View File

@ -0,0 +1,98 @@
/*=============================================================================
# Filename: IVTree.h
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:44
# Description: ID2valueList, including s2po, p2so and o2ps
=============================================================================*/
#ifndef _KVSTORE_IVTREE_IVTREE_H
#define _KVSTORE_IVTREE_IVTREE_H
#include "../../Util/Util.h"
#include "../../Util/Stream.h"
#include "../../Util/VList.h"
#include "node/IVNode.h"
#include "node/IVIntlNode.h"
#include "node/IVLeafNode.h"
#include "storage/IVStorage.h"
//TODO: for long list, do not read in time, just on need
//the memory is kept with the node, updat ewith node
//NOTICE: to release the node, maybe the value list is NULL
//value bstr: unsigned=address, NULL
//BETTER?: build a new block store for long list??
//NOTICE: we do not need to use transfer bstr here, neithor for two directions
//when insert/query, we do not release the value in kvstore
class IVTree
{
protected:
unsigned height; //0 indicates an empty tree
IVNode* root;
IVNode* leaves_head; //the head of LeafNode-list
IVNode* leaves_tail; //the tail of LeafNode-list
std::string mode; //BETTER(to use enum)
IVStorage* TSM; //Tree-Storage-Manage
//BETTER:multiple stream maybe needed:)
Stream* stream;
//always alloc one more byte than length, then user can add a '\0'
//to get a real string, instead of new and copy
//other operations will be harmful to search, so store value in
//transfer temporally, while length adjusted.
//TODO: in multi-user case, multiple-search will cause problem,
//so lock is a must. Add lock to transfer is better than to add
//lock to every key/value. However, modify requires a lock for a
//key/value, and multiple search for different keys are ok!!!
//Bstr transfer;
//unsigned transfer_size;
//Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char*
//unsigned transfer_size[3];
//tree's operations should be atom(if read nodes)
//sum the request and send to IVStorage at last
//ensure that all nodes operated are in memory
long long request;
void prepare(IVNode* _np);
std::string storepath;
std::string filename; //ok for user to change
/* some private functions */
std::string getFilePath(); //in UNIX system
//void CopyToTransfer(const char* _str, unsigned _len, unsigned _index);
//void CopyToTransfer(const char* _str, unsigned _len);
void release(IVNode* _np) const;
//very long value list are stored in a separate file(with large block)
//
//NOTICE: according to the summary result, 90% value lists are just below 100 bytes
//<10%: 5000000~100M bytes
VList* value_list;
public:
IVTree(); //always need to initial transfer
IVTree(std::string _storepath, std::string _filename, std::string _mode, unsigned long long _buffer_size);
unsigned getHeight() const;
void setHeight(unsigned _h);
IVNode* getRoot() const;
//void setRoot(Node* _root);
//insert, search, remove, set
bool search(unsigned _key, char*& _str, unsigned& _len);
bool insert(unsigned _key, char* _str, unsigned _len);
bool modify(unsigned _key, char* _str, unsigned _len);
IVNode* find(unsigned _key, int* store, bool ifmodify);
bool remove(unsigned _key);
const Bstr* getRangeValue();
void resetStream();
bool range_query(unsigned _key1, unsigned _key2);
bool save();
~IVTree();
void print(std::string s); //DEBUG(print the tree)
};
//NOTICE: need to save tree manually before delete, otherwise will cause problem.
//(problem range between two extremes: not-modified, totally-modified)
//After saved, it's ok to continue operations on tree!
#endif

View File

@ -0,0 +1,186 @@
/*=============================================================================
# Filename: IVHeap.cpp
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:37
# Description: achieve functions in IVHeap.h
=============================================================================*/
#include "IVHeap.h"
using namespace std;
IVHeap::IVHeap()
{
this->length = this->size = 0;
this->heap = NULL;
}
IVHeap::IVHeap(unsigned _size)
{
this->length = 0;
this->size = _size;
//this->heap = (Node**)malloc(this->size * sizeof(Node*)); //not use 4 or 8
this->heap = new IVNode*[this->size];
if (this->heap == NULL)
{
this->print("error in IVHeap: Allocation fail!");
exit(1);
}
/*
this->npmap = (Map*)malloc(this->size * sizeof(struct Map));
if(this->npmap == NULL)
{
this->print("error in IVHeap: Allocation fail!");
exit(1);
}
*/
}
IVNode*
IVHeap::getTop() const
{
if (this->length > 0)
return this->heap[0];
else
return NULL;
}
unsigned
IVHeap::getLen() const
{
return this->length;
}
unsigned
IVHeap::getSize() const
{
return this->size;
}
bool
IVHeap::isEmpty() const
{
return this->length == 0;
}
bool
IVHeap::insert(IVNode* _np)
{
if (this->length == this->size) //when full, reallocate
{
this->heap = (IVNode**)realloc(this->heap, 2 * this->size * sizeof(IVNode*));
if (this->heap == NULL)
{
print("error in isert: Reallocation fail!");
return false;
}
/*
this->npmap = (struct Map*)realloc(this->npmap, 2 * this->size * sizeof(struct Map));
if(this->npmap == NULL)
{
print("error in insert: Reallocation fail!");
return false;
}
*/
this->size = 2 * this->size;
}
unsigned i = this->length, j;
while (i != 0)
{
j = (i - 1) / 2;
if (_np->getRank() >= this->heap[j]->getRank())
break;
heap[i] = heap[j];
//this->npmap[k].pos = i; //adjust the position
i = j;
}
this->heap[i] = _np;
this->length++;
return true;
}
bool
IVHeap::remove()
{
if (this->length == 0)
{
print("error in remove: remove from empty heap!");
return false;
}
//Node* tp = this->heap[0];
this->length--;
if (this->length == 0)
return true;
IVNode* xp = this->heap[this->length];
unsigned i = 0, j = 1;
while (j < this->length)
{
if (j < this->length - 1 && this->heap[j]->getRank() > this->heap[j + 1]->getRank())
j++;
if (xp->getRank() <= this->heap[j]->getRank())
break;
this->heap[i] = this->heap[j];
i = j;
j = 2 * i + 1;
}
this->heap[i] = xp;
return true;
}
bool
IVHeap::modify(IVNode* _np, bool _flag) //control direction
{
//search and adjust
unsigned i, j;
for (i = 0; i < this->length; ++i)
if (this->heap[i] == _np)
break;
if (_flag == true) //move up
{
while (i != 0)
{
j = (i - 1) / 2;
if (_np->getRank() < heap[j]->getRank())
{
heap[i] = heap[j];
heap[j] = _np;
i = j;
}
else
break;
}
}
else //move down
{
j = 2 * i + 1;
while (j < this->length)
{
if (j < this->length - 1 && heap[j]->getRank() > heap[j + 1]->getRank())
j++;
if (heap[j]->getRank() < _np->getRank())
{
heap[i] = heap[j];
heap[j] = _np;
i = j;
}
else
break;
}
}
return true;
}
IVHeap::~IVHeap()
{
delete[] this->heap;
this->heap = NULL;
this->length = this->size = 0;
}
void
IVHeap::print(string s)
{
#ifdef DEBUG_KVSTORE
#endif
}

View File

@ -0,0 +1,41 @@
/*=============================================================================
# Filename: IVHeap.h
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:37
# Description: set and deal of IVNode*s in memory
=============================================================================*/
#ifndef _KVSTORE_IVTREE_HEAP_IVHEAP_H
#define _KVSTORE_IVTREE_HEAP_IVHEAP_H
#include "../../../Util/Util.h"
#include "../node/IVNode.h"
/* add, sub, modify: all can be done within O(logn) using adjust-function */
//QUERY: when modified, finding right position consumes O(n). How about keeping smallest?
//(add O(1), sub O(2n), modify O(n)
//TODO: to solve this probem, use another hash: (pointer, pos), to find the right position of
//given p in O(lgn) time
class IVHeap
{
private:
IVNode** heap; //dynamic array
unsigned length; //valid elements num
unsigned size; //max-size of heap
public:
IVHeap();
IVHeap(unsigned _size);
IVNode* getTop() const; //return the top element
unsigned getLen() const;
unsigned getSize() const;
bool isEmpty() const;
bool insert(IVNode* _np); //insert and adjust
bool remove(); //remove top and adjust
bool modify(IVNode* _np, bool _flag); //searech modified element and adjust
~IVHeap();
void print(std::string s); //DEBUG
};
#endif

View File

@ -0,0 +1,293 @@
/*=============================================================================
# Filename: IVIntlNode.cpp
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:40
# Description: achieve functions in IVIntlNode.h
=============================================================================*/
#include "IVIntlNode.h"
using namespace std;
/*
void
IVIntlNode::AllocChilds()
{
childs = (Node**)malloc(sizeof(Node*) * MAX_CHILD_NUM);
}
*/
IVIntlNode::IVIntlNode()
{
memset(childs, 0, sizeof(IVNode*) * MAX_CHILD_NUM);
//this->AllocChilds();
}
IVIntlNode::IVIntlNode(bool isVirtual) //call father-class's constructor automaticlly
{
memset(childs, 0, sizeof(IVNode*) * MAX_CHILD_NUM);
//this->AllocChilds();
}
/*
IVIntlNode::IntlNode(Storage* TSM) //QUERY
{
TSM->readNode(this, Storage::OVER);
}
*/
void
IVIntlNode::Virtual()
{
//this->FreeKeys();
this->release();
this->delMem();
}
void
IVIntlNode::Normal()
{
this->AllocKeys();
this->setMem();
}
IVNode*
IVIntlNode::getChild(int _index) const
{
int num = this->getNum();
if (_index < 0 || _index > num) //num keys, num+1 childs
{
//print(string("error in getChild: Invalid index ") + Util::int2string(_index));
return NULL;
}
else
return childs[_index];
}
bool
IVIntlNode::setChild(IVNode* _child, int _index)
{
int num = this->getNum();
if (_index < 0 || _index > num)
{
print(string("error in setChild: Invalid index ") + Util::int2string(_index));
return false;
}
this->childs[_index] = _child;
return true;
}
bool
IVIntlNode::addChild(IVNode* _child, int _index)
{
int num = this->getNum();
if (_index < 0 || _index > num + 1)
{
print(string("error in addChild: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
for (i = num; i >= _index; --i) //DEBUG: right bounder!!!
childs[i + 1] = childs[i];
childs[_index] = _child;
return true;
}
bool
IVIntlNode::subChild(int _index)
{
int num = this->getNum();
if (_index < 0 || _index > num)
{
print(string("error in subchild: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
for (i = _index; i < num; ++i) //DEBUG: right bounder!!!
childs[i] = childs[i + 1];
return true;
}
unsigned
IVIntlNode::getSize() const
{
//unsigned sum = INTL_SIZE, num = this->getNum(), i;
//return sum;
return INTL_SIZE;
}
IVNode*
IVIntlNode::split(IVNode* _father, int _index)
{
int num = this->getNum();
IVNode* p = new IVIntlNode; //right child
p->setHeight(this->getHeight());
int i, k;
for (i = MIN_CHILD_NUM, k = 0; i < num; ++i, ++k)
{
p->addKey(this->keys[i], k);
p->addChild(this->childs[i], k);
p->addNum();
}
p->addChild(this->childs[i], k);
int tp = this->keys[MIN_KEY_NUM];
this->setNum(MIN_KEY_NUM);
_father->addKey(tp, _index);
_father->addChild(p, _index + 1); //DEBUG(check the index)
_father->addNum();
_father->setDirty();
p->setDirty();
this->setDirty();
return p;
}
IVNode*
IVIntlNode::coalesce(IVNode* _father, int _index)
{
//int num = this->getNum();
int i, j = _father->getNum(), k; //BETTER: unsigned?
IVNode* p;
int ccase = 0;
//const Bstr* bstr;
if (_index < j) //the right neighbor
{
p = _father->getChild(_index + 1);
k = p->getNum();
if ((unsigned)k > MIN_KEY_NUM)
ccase = 2;
else //==MIN_KEY_NUM
ccase = 1;
}
if (_index > 0) //the left neighbor
{
IVNode* tp = _father->getChild(_index - 1);
unsigned tk = tp->getNum();
if (ccase < 2)
{
if (ccase == 0)
ccase = 3;
if (tk > MIN_KEY_NUM)
ccase = 4;
}
if (ccase > 2)
{
p = tp;
k = tk;
}
}
unsigned tmp = 0;
switch (ccase)
{
case 1: //union right to this
this->addKey(_father->getKey(_index), this->getNum());
this->addNum();
for (i = 0; i < k; ++i)
{
this->addKey(p->getKey(i), this->getNum());
this->addChild(p->getChild(i), this->getNum());
this->addNum();
}
this->setChild(p->getChild(i), this->getNum());
_father->subKey(_index);
_father->subChild(_index + 1);
_father->subNum();
p->setNum(0);
//delete p;
break;
case 2: //move one form right
this->addKey(_father->getKey(_index), this->getNum());
_father->setKey(p->getKey(0), _index);
p->subKey(0);
this->addChild(p->getChild(0), this->getNum() + 1);
p->subChild(0);
this->addNum();
p->subNum();
break;
case 3: //union left to this
this->addKey(_father->getKey(_index - 1), 0);
this->addNum();
for (i = k; i > 0; --i)
{
int t = i - 1;
this->addKey(p->getKey(t), 0);
this->addChild(p->getChild(i), 0);
this->addNum();
}
this->addChild(p->getChild(0), 0);
_father->subKey(_index - 1);
_father->subChild(_index - 1);
_father->subNum();
p->setNum(0);
//delete p;
break;
case 4: //move one from left
tmp = p->getKey(k - 1);
p->subKey(k - 1);
this->addKey(_father->getKey(_index - 1), 0);
_father->setKey(tmp, _index - 1);
this->addChild(p->getChild(k), 0);
p->subChild(k);
this->addNum();
p->subNum();
break;
default:
print("error in coalesce: Invalid case!");
//printf("error in coalesce: Invalid case!");
}
_father->setDirty();
p->setDirty();
this->setDirty();
if (ccase == 1 || ccase == 3)
return p;
else
return NULL;
}
void
IVIntlNode::release()
{
if (!this->inMem())
return;
//unsigned num = this->getNum();
delete[] keys; //this will release all!!!
}
IVIntlNode::~IVIntlNode()
{
release();
//free(childs);
}
void
IVIntlNode::print(string s)
{
#ifdef DEBUG_KVSTORE
int num = this->getNum();
fputs(Util::showtime().c_str(), Util::debug_kvstore);
fputs("Class IVIntlNode\n", Util::debug_kvstore);
fputs("Message: ", Util::debug_kvstore);
fputs(s.c_str(), Util::debug_kvstore);
fputs("\n", Util::debug_kvstore);
if (s == "node" || s == "NODE")
{
fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag);
/*
int i;
for (i = 0; i < num; ++i)
{
if (s == "node")
this->keys[i].print("bstr");
else
this->keys[i].print("BSTR");
}
*/
}
else if (s == "check node")
{
//TODO(check node, if satisfy B+ definition)
}
else;
#endif
}

View File

@ -0,0 +1,48 @@
/*=============================================================================
# Filename: IVIntlNode.h
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:40
# Description: the internal-node of a B+ tree
=============================================================================*/
#ifndef _KVSTORE_IVTREE_NODE_IVINTLNODE_H
#define _KVSTORE_IVTREE_NODE_IVINTLNODE_H
#include "IVNode.h"
class IVIntlNode : public IVNode
{
protected:
IVNode* childs[MAX_CHILD_NUM + 1];
//Node** childs;
//void AllocChilds();
public:
IVIntlNode();
IVIntlNode(bool isVirtual);
//IntlNode(Storage* TSM);
void Virtual();
void Normal();
IVNode* getChild(int _index) const;
bool setChild(IVNode* _child, int _index);
bool addChild(IVNode* _child, int _index);
bool subChild(int _index);
unsigned getSize() const;
IVNode* split(IVNode* _father, int _index);
IVNode* coalesce(IVNode* _father, int _index);
void release();
~IVIntlNode();
void print(std::string s); //DEBUG
/*non-sense functions: polymorphic
Node* getPrev() const;
Node* getNext() const;
const Bstr* getValue(int _index) const;
bool setValue(const Bstr* _value, int _index);
bool addValue(const Bstr* _value, int _index);
bool subValue(int _index);
void setPrev(Node* _prev);
void setNext(Node* _next);
*/
};
#endif

View File

@ -0,0 +1,538 @@
/*=============================================================================
# Filename: IVLeafNode.cpp
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:40
# Description: ahieve functions in IVLeafNode.h
=============================================================================*/
#include "IVLeafNode.h"
using namespace std;
void
IVLeafNode::AllocValues()
{
values = new Bstr[MAX_KEY_NUM];
}
/*
void
IVLeafNode::FreeValues()
{
delete[] values;
}
*/
IVLeafNode::IVLeafNode()
{
flag |= NF_IL; //leaf flag
prev = next = NULL;
AllocValues();
}
IVLeafNode::IVLeafNode(bool isVirtual)
{
flag |= NF_IL;
prev = next = NULL;
if (!isVirtual)
AllocValues();
}
/*
IVLeafNode::LeafNode(Storage* TSM)
{
AllocValues();
TSM->readNode(this, Storage::OVER);
}
*/
void
IVLeafNode::Virtual()
{
//this->FreeKeys();
//this->FreeValues();
this->release();
this->delMem();
}
void
IVLeafNode::Normal()
{
this->AllocKeys();
this->AllocValues();
this->setMem();
}
IVNode*
IVLeafNode::getPrev() const
{
return prev;
}
IVNode*
IVLeafNode::getNext() const
{
return next;
}
const Bstr*
IVLeafNode::getValue(int _index) const
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
//print(string("error in getValue: Invalid index ") + Util::int2string(_index));
return NULL;
}
else
return this->values + _index;
}
bool
IVLeafNode::setValue(const Bstr* _value, int _index, bool _ifcopy)
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
print(string("error in setValue: Invalid index ") + Util::int2string(_index));
return false;
}
this->values[_index].release(); //NOTICE: only used in modify
if(_ifcopy)
{
this->values[_index].copy(_value);
}
else
{
this->values[_index] = *_value;
}
return true;
}
bool
IVLeafNode::getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
//print(string("error in getValue: Invalid index ") + Util::int2string(_index));
return NULL;
}
//read long list
if(this->values[_index].isBstrLongList())
{
#ifdef DEBUG_VLIST
cout<<"this is a vlist in get()"<<endl;
#endif
unsigned block_num = this->values[_index].getLen();
_vlist->readValue(block_num, _str, _len);
}
else
{
_str = this->values[_index].getStr();
_len = this->values[_index].getLen();
}
return true;
}
bool
IVLeafNode::setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy)
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
print(string("error in setValue: Invalid index ") + Util::int2string(_index));
return false;
}
if(this->values[_index].isBstrLongList())
{
#ifdef DEBUG_VLIST
cout<<"this is a vlist in set()"<<endl;
#endif
unsigned block_num = this->values[_index].getLen();
_vlist->removeValue(block_num);
}
else
{
this->values[_index].release(); //NOTICE: only used in modify
}
//DEBUG: we do not need to copy here
//we just need to ensure that the pointer's memory is not released
//if (ifcopy)
//{
//this->values[_index].copy(_value);
//}
//else
//{
//this->values[_index] = *_value;
if(VList::isLongList(_len))
{
unsigned block_num = _vlist->writeValue(_str, _len);
this->values[_index].setStr(NULL);
this->values[_index].setLen(block_num);
//NOTICE: we need to free the long list value
delete[] _str;
}
else
{
this->values[_index].setStr(_str);
this->values[_index].setLen(_len);
}
//}
return true;
}
bool
IVLeafNode::addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy)
{
int num = this->getNum();
if (_index < 0 || _index > num)
{
print(string("error in addValue: Invalid index ") + Util::int2string(_index));
return false;
}
for (int i = num - 1; i >= _index; --i)
this->values[i + 1] = this->values[i];
//if (ifcopy)
//this->values[_index].copy(_value);
//else
//this->values[_index] = *_value;
if(VList::isLongList(_len))
{
#ifdef DEBUG_VLIST
cout<<"this is a vlist in add()"<<endl;
#endif
unsigned block_num = _vlist->writeValue(_str, _len);
this->values[_index].setStr(NULL);
this->values[_index].setLen(block_num);
//NOTICE: we need to free the long list value
delete[] _str;
#ifdef DEBUG_VLIST
//cout<<"to check vlist: "<<this->values[_index].getLen()<<endl;
#endif
}
else
{
this->values[_index].setStr(_str);
this->values[_index].setLen(_len);
}
//this->values[_index].setStr(_str);
//this->values[_index].setLen(_len);
return true;
}
bool
IVLeafNode::subValue(VList* _vlist, int _index, bool ifdel)
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
print(string("error in subValue: Invalid index ") + Util::int2string(_index));
return false;
}
if(this->values[_index].isBstrLongList())
{
unsigned block_num = this->values[_index].getLen();
_vlist->removeValue(block_num);
}
else
{
if (ifdel)
{
values[_index].release();
}
}
for (int i = _index; i < num - 1; ++i)
this->values[i] = this->values[i + 1];
return true;
}
bool
IVLeafNode::addValue(const Bstr* _value, int _index, bool ifcopy)
{
int num = this->getNum();
if (_index < 0 || _index > num)
{
print(string("error in addValue: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
for (i = num - 1; i >= _index; --i)
this->values[i + 1] = this->values[i];
if (ifcopy)
this->values[_index].copy(_value);
else
this->values[_index] = *_value;
return true;
}
bool
IVLeafNode::subValue(int _index, bool ifdel)
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
print(string("error in subValue: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
if (ifdel)
values[_index].release();
for (i = _index; i < num - 1; ++i)
this->values[i] = this->values[i + 1];
return true;
}
void
IVLeafNode::setPrev(IVNode* _prev)
{
this->prev = _prev;
}
void
IVLeafNode::setNext(IVNode* _next)
{
this->next = _next;
}
unsigned
IVLeafNode::getSize() const
{
unsigned sum = LEAF_SIZE, num = this->getNum(), i;
for (i = 0; i < num; ++i)
{
sum += values[i].getLen();
}
return sum;
}
IVNode*
IVLeafNode::split(IVNode* _father, int _index)
{
int num = this->getNum();
IVNode* p = new IVLeafNode; //right child
p->setHeight(this->getHeight()); //NOTICE: assign height for new node
p->setNext(this->next);
this->setNext(p);
p->setPrev(this);
int i, k;
for (i = MIN_KEY_NUM, k = 0; i < num; ++i, ++k)
{
p->addKey(this->keys[i], k);
p->addValue(this->values + i, k);
p->addNum();
}
int tp = this->keys[MIN_KEY_NUM];
this->setNum(MIN_KEY_NUM);
_father->addKey(tp, _index);
_father->addChild(p, _index + 1); //DEBUG(check the index)
_father->addNum();
_father->setDirty();
p->setDirty();
this->setDirty();
return p;
}
IVNode*
IVLeafNode::coalesce(IVNode* _father, int _index)
{ //add a key or coalesce a neighbor to this
int i, j = _father->getNum(), k; //BETTER: unsigned?
IVNode* p = NULL;
int ccase = 0;
//const Bstr* bstr;
if (_index < j) //the right neighbor
{
p = _father->getChild(_index + 1);
k = p->getNum();
if ((unsigned)k > MIN_KEY_NUM)
ccase = 2;
else //==MIN_KEY_NUM
ccase = 1;
}
if (_index > 0) //the left neighbor
{
IVNode* tp = _father->getChild(_index - 1);
unsigned tk = tp->getNum();
if (ccase < 2)
{
if (ccase == 0)
ccase = 3;
if (tk > MIN_KEY_NUM)
ccase = 4;
}
if (ccase > 2)
{
p = tp;
k = tk;
}
}
int tmp = 0;
switch (ccase)
{
case 1: //union right to this
for (i = 0; i < k; ++i)
{
this->addKey(p->getKey(i), this->getNum());
this->addValue(p->getValue(i), this->getNum());
this->addNum();
}
_father->subKey(_index);
_father->subChild(_index + 1);
_father->subNum();
this->next = p->getNext();
if (this->next != NULL)
this->next->setPrev(this);
p->setNum(0); //NOTICE: adjust num before delete!
//delete p;
break;
case 2: //move one from right
this->addKey(p->getKey(0), this->getNum());
_father->setKey(p->getKey(1), _index);
p->subKey(0);
this->addValue(p->getValue(0), this->getNum());
p->subValue(0);
this->addNum();
p->subNum();
break;
case 3: //union left to this
//BETTER: move all keys/etc one time
for (i = k; i > 0; --i)
{
int t = i - 1;
this->addKey(p->getKey(t), 0);
this->addValue(p->getValue(t), 0);
this->addNum();
}
_father->subKey(_index - 1);
_father->subChild(_index - 1);
_father->subNum();
this->prev = p->getPrev();
if (this->prev != NULL) //else: leaves-list
this->prev->setNext(this);
p->setNum(0);
//delete p;
break;
case 4: //move one from left
tmp = p->getKey(k - 1);
p->subKey(k - 1);
this->addKey(tmp, 0);
_father->setKey(tmp, _index - 1);
this->addValue(p->getValue(k - 1), 0);
p->subValue(k - 1);
this->addNum();
p->subNum();
break;
default:
print("error in coalesce: Invalid case!");
//printf("error in coalesce: Invalid case!");
}
_father->setDirty();
p->setDirty();
this->setDirty();
if (ccase == 1 || ccase == 3)
return p;
else
return NULL;
}
void
IVLeafNode::release()
{
if (!this->inMem())
return;
unsigned num = this->getNum();
/*
for(int i = 0; i < num; ++i)
{
keys[i].release();
values[i].release();
}
*/
for (unsigned i = num; i < MAX_KEY_NUM; ++i)
{
values[i].clear();
}
delete[] keys;
delete[] values;
}
IVLeafNode::~IVLeafNode()
{
release();
}
void
IVLeafNode::print(string s)
{
#ifdef DEBUG_KVSTORE
unsigned num = this->getNum();
fputs(Util::showtime().c_str(), Util::debug_kvstore);
fputs("Class IVLeafNode\n", Util::debug_kvstore);
fputs("Message: ", Util::debug_kvstore);
fputs(s.c_str(), Util::debug_kvstore);
fputs("\n", Util::debug_kvstore);
unsigned i;
if (s == "NODE")
{
fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag);
fprintf(Util::debug_kvstore, "prev: %p\tnext: %p\n", this->prev, this->next);
for (i = 0; i < num; ++i)
{
//this->keys[i].print("BSTR");
this->values[i].print("BSTR");
}
}
else if (s == "node")
{
fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag);
fprintf(Util::debug_kvstore, "prev: %p\tnext: %p\n", this->prev, this->next);
}
else if (s == "check node")
{
//check the node, if satisfy B+ definition
bool flag = true;
if (num < MIN_KEY_NUM || num > MAX_KEY_NUM)
flag = false;
if (flag)
{
for (i = 1; i < num; ++i)
{
if (keys[i] > keys[i - 1])
continue;
else
break;
}
if (i < num)
flag = false;
}
this->print("node");
if (flag)
fprintf(Util::debug_kvstore, "This node is good\n");
else
fprintf(Util::debug_kvstore, "This node is bad\n");
}
else;
#endif
}

View File

@ -0,0 +1,58 @@
/*=============================================================================
# Filename: IVLeafNode.h
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:39
# Description: the leaf-node of a B+ tree
=============================================================================*/
#ifndef _KVSTORE_IVTREE_NODE_IVLEAFNODE_H
#define _KVSTORE_IVTREE_NODE_IVLEAFNODE_H
#include "IVNode.h"
class IVLeafNode : public IVNode
{
protected:
IVNode* prev; //LeafNode
IVNode* next;
Bstr* values;
void AllocValues();
//void FreeValues();
public:
IVLeafNode();
IVLeafNode(bool isVirtual);
//LeafNode(Storage* TSM);
void Virtual();
void Normal();
IVNode* getPrev() const;
IVNode* getNext() const;
const Bstr* getValue(int _index) const;
bool setValue(const Bstr* _value, int _index, bool _ifcopy=false);
bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const;
bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false);
bool addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false);
bool subValue(VList* _vlist, int _index, bool ifdel = false);
bool addValue(const Bstr* _val, int _index, bool ifcopy = false);
bool subValue(int _index, bool ifdel = false);
void setPrev(IVNode* _prev);
void setNext(IVNode* _next);
unsigned getSize() const;
IVNode* split(IVNode* _father, int _index);
IVNode* coalesce(IVNode* _father, int _index);
void release();
~IVLeafNode();
void print(std::string s); //DEBUG
/*non-sense virtual function
Node* getChild(int _index) const;
bool addChild(Node* _child, int _index);
bool subChild(int _index);
*/
};
//BETTER: prev isn't a must, and reverse-range can be achieved using recursive-next
#endif

View File

@ -0,0 +1,320 @@
/*=============================================================================
# Filename: IVNode.cpp
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:39
# Description: achieve functions in IVNode.h
=============================================================================*/
#include "IVNode.h"
using namespace std;
void
IVNode::AllocKeys()
{
keys = new unsigned[MAX_KEY_NUM];
}
/*
void
IVNode::FreeKeys()
{
delete[] keys;
}
*/
IVNode::IVNode()
{
store = flag = 0;
flag |= NF_IM;
AllocKeys();
}
IVNode::IVNode(bool isVirtual)
{
store = flag = 0;
if (!isVirtual)
{
flag |= NF_IM;
AllocKeys();
}
}
/*
IVNode::Node(Storage* TSM)
{
AllocKeys();
TSM->readIVNode(this, Storage::OVER);
}
*/
bool
IVNode::isLeaf() const
{
return this->flag & NF_IL;
}
bool
IVNode::isDirty() const
{
return this->flag & NF_ID;
}
void
IVNode::setDirty()
{
this->flag |= NF_ID;
}
void
IVNode::delDirty()
{
this->flag &= ~NF_ID;
}
bool
IVNode::inMem() const
{
return this->flag & NF_IM;
}
void
IVNode::setMem()
{
this->flag |= NF_IM;
}
void
IVNode::delMem()
{
this->flag &= ~NF_IM;
}
/*
bool
IVNode::isVirtual() const
{
return this->flag & NF_IV;
}
void
IVNode::setVirtual()
{
this->flag |= NF_IV;
}
void
IVNode::delVirtual()
{
this->flag &= ~NF_IV;
}
*/
unsigned
IVNode::getRank() const
{
return this->flag & NF_RK;
}
void
IVNode::setRank(unsigned _rank)
{
this->flag &= ~NF_RK;
this->flag |= _rank;
}
unsigned
IVNode::getHeight() const
{
return (this->flag & NF_HT) >> 20;
}
void
IVNode::setHeight(unsigned _h)
{
this->flag &= ~NF_HT;
this->flag |= (_h << 20);
}
unsigned
IVNode::getNum() const
{
return (this->flag & NF_KN) >> 12;
}
bool
IVNode::setNum(int _num)
{
if (_num < 0 || (unsigned)_num > MAX_KEY_NUM)
{
print(string("error in setNum: Invalid num ") + Util::int2string(_num));
return false;
}
this->flag &= ~NF_KN;
this->flag |= (_num << 12);
return true;
}
bool
IVNode::addNum()
{
if (this->getNum() + 1 > MAX_KEY_NUM)
{
print("error in addNum: Invalid!");
return false;
}
this->flag += (1 << 12);
return true;
}
bool
IVNode::subNum()
{
if (this->getNum() < 1)
{
print("error in subNum: Invalid!");
return false;
}
this->flag -= (1 << 12);
return true;
}
unsigned
IVNode::getStore() const
{
return this->store;
}
void
IVNode::setStore(unsigned _store)
{
this->store = _store;
}
unsigned
IVNode::getFlag() const
{
return flag;
}
void
IVNode::setFlag(unsigned _flag)
{
this->flag = _flag;
}
unsigned
IVNode::getKey(int _index) const
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
//print(string("error in getKey: Invalid index ") + Util::int2string(_index));
printf("error in getKey: Invalid index\n");
return -1;
}
else
return this->keys[_index];
}
bool
IVNode::setKey(unsigned _key, int _index)
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
print(string("error in setKey: Invalid index ") + Util::int2string(_index));
return false;
}
keys[_index] = _key;
return true;
}
bool
IVNode::addKey(unsigned _key, int _index)
{
int num = this->getNum();
if (_index < 0 || _index > num)
{
print(string("error in addKey: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
//NOTICE: if num == MAX_KEY_NUM, will visit keys[MAX_KEY_NUM], not legal!!!
//however. tree operations ensure that: when node is full, not add but split first!
for (i = num - 1; i >= _index; --i)
keys[i + 1] = keys[i];
keys[_index] = _key;
return true;
}
bool
IVNode::subKey(int _index)
{
int num = this->getNum();
if (_index < 0 || _index >= num)
{
print(string("error in subKey: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
for (i = _index; i < num - 1; ++i)
keys[i] = keys[i + 1];
return true;
}
int
IVNode::searchKey_less(unsigned _key) const
{
int num = this->getNum();
//for(i = 0; i < num; ++i)
//if(bstr < *(p->getKey(i)))
//break;
int low = 0, high = num - 1, mid = -1;
while (low <= high)
{
mid = (low + high) / 2;
if (this->keys[mid] > _key)
{
if (low == mid)
break;
high = mid;
}
else
{
low = mid + 1;
}
}
return low;
}
int
IVNode::searchKey_equal(unsigned _key) const
{
int num = this->getNum();
//for(i = 0; i < num; ++i)
// if(bstr == *(p->getKey(i)))
// {
int ret = this->searchKey_less(_key);
if (ret > 0 && this->keys[ret - 1] == _key)
return ret - 1;
else
return num;
}
int
IVNode::searchKey_lessEqual(unsigned _key) const
{
//int num = this->getNum();
//for(i = 0; i < num; ++i)
//if(bstr <= *(p->getKey(i)))
//break;
int ret = this->searchKey_less(_key);
if (ret > 0 && this->keys[ret - 1] == _key)
return ret - 1;
else
return ret;
}

View File

@ -0,0 +1,123 @@
/*=============================================================================
# Filename: IVNode.h
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:38
# Description: basic Node class, father of IVIntlNode and IVLeafNode
=============================================================================*/
#ifndef _KVSTORE_IVTREE_NODE_IVNODE_H
#define _KVSTORE_IVTREE_NODE_IVNODE_H
#include "../../../Util/Util.h"
#include "../../../Util/Bstr.h"
#include "../../../Util/VList.h"
class IVNode //abstract basic class
{
public:
static const unsigned DEGREE = 2 * 63; //the degree of B+ tree
static const unsigned MAX_CHILD_NUM = DEGREE;
static const unsigned MIN_CHILD_NUM = DEGREE >> 1;
static const unsigned MAX_KEY_NUM = MAX_CHILD_NUM - 1; //max key-num
static const unsigned MIN_KEY_NUM = MIN_CHILD_NUM - 1; //min key-num
/* diffrent flags for tree-nodes, 32-bit put rank in low-bits means no need to move*/
static const unsigned NF_IL = 0x80000000; //is leaf
static const unsigned NF_ID = 0x00080000; //is dirty, in rank-area
static const unsigned NF_IM = 0x20000000; //in memory, not virtual
//static const unsigned NF_IV = 0x10000000; //is virtual
static const unsigned NF_RK = 0x00ffffff; //select-rank, in Storage
static const unsigned NF_HT = 0xf00000; //height area in rank
static const unsigned NF_KN = 0x07f000; //NOTICE: decided by DEGREE
static const unsigned INTL_SIZE = sizeof(int) * MAX_KEY_NUM;
static const unsigned LEAF_SIZE = INTL_SIZE + sizeof(Bstr) * MAX_KEY_NUM;
protected:
unsigned store; //store address, the BLock index
unsigned flag; //NF_RK, NF_IL,NF_ID, NF_IV, propety
//int num; //totle keys num
//Node* father; //point to father-node, which must be IntlNode
unsigned* keys;
void AllocKeys();
//void FreeKeys();
public:
IVNode();
IVNode(bool isVirtual);
bool isLeaf() const;
bool isDirty() const;
void setDirty();
void delDirty();
bool inMem() const;
void setMem();
void delMem();
//bool isVirtual() const;
//void setVirtual();
//void delVirtual();
unsigned getRank() const;
void setRank(unsigned _rank);
unsigned getHeight() const;
void setHeight(unsigned _h);
unsigned getNum() const;
bool setNum(int _num);
bool addNum();
bool subNum();
unsigned getStore() const;
void setStore(unsigned _store);
unsigned getFlag() const;
void setFlag(unsigned _flag);
unsigned getKey(int _index) const; //need to check the index
bool setKey(unsigned _key, int _index);
bool addKey(unsigned _key, int _index);
bool subKey(int _index);
//several binary key search utilities
int searchKey_less(unsigned _key) const;
int searchKey_equal(unsigned _key) const;
int searchKey_lessEqual(unsigned _key) const;
//virtual functions: polymorphic
virtual IVNode* getChild(int _index) const { return NULL; };
virtual bool setChild(IVNode* _child, int _index) { return true; };
virtual bool addChild(IVNode* _child, int _index) { return true; };
virtual bool subChild(int _index) { return true; };
virtual IVNode* getPrev() const { return NULL; };
virtual IVNode* getNext() const { return NULL; };
virtual const Bstr* getValue(int _index) const { return NULL; };
virtual bool setValue(const Bstr* _value, int _index, bool _ifcopy=false) { return true; };
virtual bool getValue(VList* _vlist, int _index, char*& _str, unsigned& _len) const { return NULL; };
virtual bool setValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false) { return true; };
virtual bool addValue(VList* _vlist, int _index, char* _str, unsigned _len, bool ifcopy = false) { return true; };
virtual bool subValue(VList* _vlist, int _index, bool ifdel = false) { return true; };
virtual bool addValue(const Bstr* _val, int _index, bool ifcopy = false) { return true; };
virtual bool subValue(int _index, bool ifdel = false) { return true; };
virtual void setPrev(IVNode* _prev) {};
virtual void setNext(IVNode* _next) {};
//pure virtual functions
virtual void Virtual() = 0;
virtual void Normal() = 0;
virtual unsigned getSize() const = 0; //return all memory owned
virtual IVNode* split(IVNode* _father, int _index) = 0;
virtual IVNode* coalesce(IVNode* _father, int _index) = 0;
virtual void release() = 0; //release the node, only remain necessary information
virtual ~IVNode() {};
virtual void print(std::string s) = 0; //DEBUG(print the Node)
};
/*NOTICE(operations in release())
*To save memory, we can only remain store and flag(childs added for Leaf).
*However, in this way childs'pointers is ok to change, use Node** or Node*& is also nonsense
*because the pointer variable may die.
*Another way is only to release dynamic memory, and store thw whole, read the Bstr only to
*build. In this way nodes'pointer doesn't change, and operation is simplified, while memory
*is consumed a bit more. Because Bstrs consume the most memory, and memory-disk swapping is
*the most time-consuming thing, it seems to be a better way.
*WARN:when a node is in memory and not deleted, its basic content is always being! If nodes are
*really too many, this will cause disaster because we can't swap them out until tree is closed!
*To solve this problem, there should be two types of release-function: one to release Bstr, one
*to release the whole(pointer is invalid and rebuild problem)
*/
#endif

View File

@ -0,0 +1,738 @@
/*=============================================================================
# Filename: IVStorage.cpp
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:43
# Description: achieve functions in IVStorage.h
=============================================================================*/
#include "IVStorage.h"
using namespace std;
IVStorage::IVStorage()
{ //not use ../logs/, notice the location of program
cur_block_num = SET_BLOCK_NUM;
filepath = "";
freelist = NULL;
treefp = NULL;
max_buffer_size = Util::MAX_BUFFER_SIZE;
heap_size = max_buffer_size / IVNode::INTL_SIZE;
freemem = max_buffer_size;
minheap = NULL;
this->value_list = NULL;
}
IVStorage::IVStorage(string& _filepath, string& _mode, unsigned* _height, unsigned long long _buffer_size, VList* _vlist)
{
cur_block_num = SET_BLOCK_NUM; //initialize
this->filepath = _filepath;
if (_mode == string("build"))
treefp = fopen(_filepath.c_str(), "w+b");
else if (_mode == string("open"))
treefp = fopen(_filepath.c_str(), "r+b");
else
{
print(string("error in IVStorage: Invalid mode ") + _mode);
return;
}
if (treefp == NULL)
{
print(string("error in IVStorage: Open error ") + _filepath);
return;
}
this->treeheight = _height; //originally set to 0
this->max_buffer_size = _buffer_size;
this->heap_size = this->max_buffer_size / IVNode::INTL_SIZE;
this->freemem = this->max_buffer_size;
this->freelist = new BlockInfo; //null-head
unsigned i, j, k; //j = (SuperNum-1)*BLOCK_SIZE
BlockInfo* bp;
if (_mode == "build")
{ //write basic information
i = 0;
fwrite(&i, sizeof(unsigned), 1, this->treefp); //height
fwrite(&i, sizeof(unsigned), 1, this->treefp); //rootnum
fwrite(&cur_block_num, sizeof(unsigned), 1, this->treefp); //current block num
fseek(this->treefp, BLOCK_SIZE, SEEK_SET);
bp = this->freelist;
j = cur_block_num / 8;
for (i = 0; i < j; ++i)
{
fputc(0, this->treefp);
for (k = 0; k < 8; ++k)
{
bp->next = new BlockInfo(i * 8 + k + 1, NULL);
bp = bp->next;
}
}
}
else //_mode == "open"
{
//read basic information
unsigned rootnum;
char c;
fread(this->treeheight, sizeof(unsigned), 1, this->treefp);
fread(&rootnum, sizeof(unsigned), 1, this->treefp);
fread(&cur_block_num, sizeof(unsigned), 1, this->treefp);
fseek(this->treefp, BLOCK_SIZE, SEEK_SET);
bp = this->freelist;
j = cur_block_num / 8;
for (i = 0; i < j; ++i)
{
c = fgetc(treefp);
for (k = 0; k < 8; ++k)
{
if ((c & (1 << k)) == 0)
{
bp->next = new BlockInfo(i * 8 + 7 - k + 1, NULL);
bp = bp->next;
}
}
}
fseek(treefp, Address(rootnum), SEEK_SET);
//treefp is now ahead of root-block
}
this->minheap = new IVHeap(this->heap_size);
this->value_list = _vlist;
}
bool
IVStorage::preRead(IVNode*& _root, IVNode*& _leaves_head, IVNode*& _leaves_tail) //pre-read and build whole tree
{ //set root(in memory) and leaves_head
//TODO: false when exceed memory
_leaves_tail = _leaves_head = _root = NULL;
if (ftell(this->treefp) == 0) //root is null
{
return true;
}
unsigned next, store, j, pos = 0;
unsigned h = *this->treeheight;
IVNode* p;
//read root node
this->createNode(p);
_root = p;
fread(&next, sizeof(unsigned), 1, treefp);
//use stack to achieve
long address[h]; //current address
unsigned used[h]; //used child num
unsigned total[h]; //total child num
unsigned block[h]; //next block num
IVNode* nodes[h];
address[pos] = ftell(treefp);
used[pos] = 0;
total[pos] = p->getNum() + 1;
block[pos] = next;
nodes[pos] = p;
pos++;
IVNode* prev = NULL;
while (pos > 0)
{
j = pos - 1;
if (nodes[j]->isLeaf() || used[j] == total[j]) //LeafNode or ready IntlNode
{
if (nodes[j]->isLeaf())
{
if (prev != NULL)
{
prev->setNext(nodes[j]);
nodes[j]->setPrev(prev);
}
prev = nodes[j];
}
pos--;
continue;
}
fseek(this->treefp, address[j], SEEK_SET);
fread(&store, sizeof(unsigned), 1, treefp);
this->ReadAlign(block + j);
address[j] = ftell(treefp);
fseek(treefp, Address(store), SEEK_SET);
this->createNode(p);
nodes[j]->setChild(p, used[j]);
used[j]++;
fread(&next, sizeof(unsigned), 1, treefp);
address[pos] = ftell(treefp);
used[pos] = 0;
total[pos] = p->getNum() + 1;
block[pos] = next;
nodes[pos] = p;
pos++;
}
//set leaves and read root, which is always keeped in-mem
p = _root;
while (!p->isLeaf())
{
p = p->getChild(0);
}
_leaves_head = p;
p = _root;
while (!p->isLeaf())
{
p = p->getChild(p->getNum());
}
_leaves_tail = p;
long long memory = 0;
this->readNode(_root, &memory);
this->request(memory);
return true;
}
long //8-byte in 64-bit machine
IVStorage::Address(unsigned _blocknum) const //BETTER: inline function
{
if (_blocknum == 0)
return 0;
else if (_blocknum > cur_block_num)
{
//print(string("error in Address: Invalid blocknum ") + Util::int2string(_blocknum));
return -1; //address should be non-negative
}
//NOTICE: here should explictly use long
return (long)(this->SuperNum + _blocknum - 1) * (long)BLOCK_SIZE;
}
unsigned
IVStorage::Blocknum(long address) const
{
return (address / BLOCK_SIZE) + 1 - this->SuperNum;
}
unsigned
IVStorage::AllocBlock()
{
BlockInfo* p = this->freelist->next;
if (p == NULL)
{
for (unsigned i = 0; i < SET_BLOCK_INC; ++i)
{
cur_block_num++; //BETTER: check if > MAX_BLOCK_NUM
this->FreeBlock(cur_block_num);
}
p = this->freelist->next;
}
unsigned t = p->num;
this->freelist->next = p->next;
delete p;
return t;
}
void
IVStorage::FreeBlock(unsigned _blocknum)
{ //QUERY: head-sub and tail-add will be better?
BlockInfo* bp = new BlockInfo(_blocknum, this->freelist->next);
this->freelist->next = bp;
}
//NOTICE: all reads are aligned to 4 bytes(including a string)
//a string may acrossseveral blocks
void
IVStorage::ReadAlign(unsigned* _next)
{
if (ftell(treefp) % BLOCK_SIZE == 0)
{
fseek(treefp, Address(*_next), SEEK_SET);
fread(_next, sizeof(unsigned), 1, treefp);
}
}
void
IVStorage::WriteAlign(unsigned* _curnum, bool& _SpecialBlock)
{
if (ftell(treefp) % BLOCK_SIZE == 0)
{
unsigned blocknum = this->AllocBlock();
fseek(treefp, Address(*_curnum), SEEK_SET);
if (_SpecialBlock)
{
fseek(treefp, 4, SEEK_CUR);
_SpecialBlock = false;
}
fwrite(&blocknum, sizeof(unsigned), 1, treefp);
fseek(treefp, Address(blocknum) + 4, SEEK_SET);
*_curnum = blocknum;
}
}
bool
IVStorage::readNode(IVNode* _np, long long* _request)
{
if (_np == NULL || _np->inMem())
return false; //can't read or needn't
fseek(treefp, Address(_np->getStore()), SEEK_SET);
bool flag = _np->isLeaf();
unsigned next;
unsigned i, num = _np->getNum();
Bstr bstr;
fseek(treefp, 4, SEEK_CUR);
fread(&next, sizeof(unsigned), 1, treefp);
//read data, use readBstr...
//fread(treefp, "%u", &num);
//_np->setNum(num);
if (flag)
*_request += IVNode::LEAF_SIZE;
else
*_request += IVNode::INTL_SIZE;
_np->Normal();
if (!flag)
fseek(treefp, 4 * (num + 1), SEEK_CUR);
//to read all keys
//int tmp = -1;
unsigned tmp = INVALID;
for (i = 0; i < num; ++i)
{
fread(&tmp, sizeof(int), 1, treefp);
this->ReadAlign(&next);
_np->setKey(tmp, i);
}
if (flag)
{
//to read all values
for (i = 0; i < num; ++i)
{
this->readBstr(&bstr, &next);
//if not long list value
if(bstr.getStr() != NULL)
{
*_request += bstr.getLen();
}
_np->setValue(&bstr, i);
}
}
//_np->setFlag((_np->getFlag() & ~Node::NF_IV & ~Node::NF_ID) | Node::NF_IM);
//_np->delVirtual();
_np->delDirty();
//_np->setMem();
this->updateHeap(_np, _np->getRank(), false);
bstr.clear();
return true;
}
bool
IVStorage::createNode(IVNode*& _np) //cretae virtual nodes, not in-mem
{
/*
if(ftell(this->treefp)== 0) //null root
{
_np = NULL;
return false;
}
*/
unsigned t; //QUERY: maybe next-flag... will be better-storage?
bool flag = false; //IntlNode
fread(&t, sizeof(unsigned), 1, treefp);
if ((t & IVNode::NF_IL) > 0) //WARN: according to setting
flag = true; //LeafNode
if (flag)
{
//this->request(sizeof(LeafNode));
_np = new IVLeafNode(true);
}
else
{
//this->request(sizeof(IntlNode));
_np = new IVIntlNode(true);
}
//fseek(treefp, -4, SEEK_CUR);
//_np->setFlag(_np->getFlag() | (t & Node::NF_RK));
//_np->setRank(t);
_np->setFlag(t);
_np->delDirty();
_np->delMem();
_np->setStore(Blocknum(ftell(treefp) - 4));
return true;
}
//BETTER: Does SpecialBlock really needed? why can't we place next before flag??
//
//NOTICE: root num begins from 1, if root num is 0, then it is invalid, i.e. the tree is NULL
//(and ftell(root address) will be 0 either)
bool
IVStorage::writeNode(IVNode* _np)
{
if (_np == NULL || !_np->inMem() || (_np->getRank() > 0 && !_np->isDirty()))
return false; //not need to write back
unsigned num = _np->getNum(), i;
bool flag = _np->isLeaf(), SpecialBlock = true;
/*
if(!flag)
{
for(i = 0; i <= num; ++i)
if(_np->getChild(i)->isDirty())
return false; //NOTICE: all childs must be clean!
}
*/
//to release original blocks
unsigned store = _np->getStore(), next;
//if first store is 0, meaning a new node
fseek(this->treefp, Address(store) + 4, SEEK_SET);
fread(&next, sizeof(unsigned), 1, treefp);
while (store != 0)
{
this->FreeBlock(store);
store = next;
fseek(treefp, Address(store), SEEK_SET);
fread(&next, sizeof(unsigned), 1, treefp);
}
if (num == 0)
return true; //node is empty!
unsigned t;
//write Node information
unsigned blocknum = this->AllocBlock();
_np->setStore(blocknum);
long address = this->Address(blocknum);
fseek(this->treefp, address, SEEK_SET);
t = _np->getFlag();
fwrite(&t, sizeof(unsigned), 1, treefp); //DEBUG
fseek(treefp, 4, SEEK_CUR);
if (!flag)
{
for (i = 0; i <= num; ++i)
{
t = _np->getChild(i)->getStore();
fwrite(&t, sizeof(unsigned), 1, treefp); //DEBUG
this->WriteAlign(&blocknum, SpecialBlock);
}
}
//int tmp = 0;
unsigned tmp = INVALID;
//to write all keys
for (i = 0; i < num; ++i)
{
tmp = _np->getKey(i);
fwrite(&tmp, sizeof(int), 1, treefp);
this->WriteAlign(&blocknum, SpecialBlock);
}
if (flag)
{
//to write all values
for (i = 0; i < num; ++i)
{
this->writeBstr(_np->getValue(i), &blocknum, SpecialBlock);
}
}
fseek(treefp, Address(blocknum), SEEK_SET);
if (SpecialBlock)
fseek(treefp, 4, SEEK_CUR);
t = 0;
fwrite(&t, sizeof(unsigned), 1, treefp); //the end-block
//_np->setFlag(_np->getFlag() & ~Node::NF_ID);
//NOTICE:we may store the dirty bit into the tree file, but that is ok
//Each time we read the tree file to construct a node, we always set the drity bit to 0
_np->delDirty();
return true;
}
bool
IVStorage::readBstr(Bstr* _bp, unsigned* _next)
{
//long address;
unsigned len, i, j;
fread(&len, sizeof(unsigned), 1, this->treefp);
this->ReadAlign(_next);
//NOTICE: if this is a long list as value
if(len == 0)
{
unsigned addr = 0;
fread(&addr, sizeof(unsigned), 1, this->treefp);
#ifdef DEBUG_VLIST
cout<<"read a vlist in IVStorage - addr: "<<addr<<endl;
#endif
_bp->setLen(addr);
_bp->setStr(NULL);
this->ReadAlign(_next);
return true;
}
//this->request(len);
//NOTICE: we use new for all, consistent with Bstr and KVstore
//char* s = (char*)malloc(len);
char* s = new char[len];
_bp->setLen(len);
for (i = 0; i + 4 < len; i += 4)
{
fread(s + i, sizeof(char), 4, treefp);
this->ReadAlign(_next);
}
while (i < len)
{
fread(s + i, sizeof(char), 1, treefp); //BETTER
i++;
}
j = len % 4;
if (j > 0)
j = 4 - j;
fseek(treefp, j, SEEK_CUR);
this->ReadAlign(_next);
_bp->setStr(s);
return true;
}
bool
IVStorage::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock)
{
unsigned i, j, len = _bp->getLen();
//NOTICE: to write long list value
if(_bp->getStr() == NULL)
{
unsigned flag = 0;
fwrite(&flag, sizeof(unsigned), 1, treefp);
this->WriteAlign(_curnum, _SpecialBlock);
//then this is the real block num
fwrite(&len, sizeof(unsigned), 1, treefp);
#ifdef DEBUG_VLIST
cout<<"to write a vlist in IVStorage::writeBstr() - blocknum: "<<len<<endl;
#endif
this->WriteAlign(_curnum, _SpecialBlock);
return true;
}
fwrite(&len, sizeof(unsigned), 1, treefp);
this->WriteAlign(_curnum, _SpecialBlock);
char* s = _bp->getStr();
for (i = 0; i + 4 < len; i += 4)
{
fwrite(s + i, sizeof(char), 4, treefp);
this->WriteAlign(_curnum, _SpecialBlock);
}
while (i < len)
{
fwrite(s + i, sizeof(char), 1, treefp);
i++;
}
j = len % 4;
if (j > 0)
j = 4 - j;
fseek(treefp, j, SEEK_CUR);
this->WriteAlign(_curnum, _SpecialBlock);
return true;
}
bool
IVStorage::writeTree(IVNode* _root) //write the whole tree back and close treefp
{
fseek(this->treefp, 0, SEEK_SET);
fwrite(this->treeheight, sizeof(unsigned), 1, treefp);
//delete all nonsense-node in heap, otherwise will waste storage permanently
IVNode* p;
while (1)
{ //all non-sense nodes will be in-head-area, due to minimal rank
p = minheap->getTop();
if (p == NULL) //heap is empty, only when root==NULL
break;
if (p->getRank() == 0) //indicate non-sense node
{
this->minheap->remove();
this->writeNode(p);
delete p;
}
else
break;
}
unsigned i, j, t;
//QUERY: another way to write all nodes back is to print out all nodes in heap
//but this method will cause no node in heap any more, while operations may be
//afetr tree-saving. Which method is better?
//write nodes recursively using stack, including root-num
if (_root != NULL)
{
IVNode* p = _root;
unsigned h = *this->treeheight, pos = 0;
IVNode* ns[h];
int ni[h];
ns[pos] = p;
ni[pos] = p->getNum();
pos++;
while (pos > 0)
{
j = pos - 1;
p = ns[j];
if (p->isLeaf() || ni[j] < 0) //leaf or all childs are ready
{
this->writeNode(p);
pos--;
continue;
}
ns[pos] = p->getChild(ni[j]);
ni[pos] = ns[pos]->getNum();
pos++;
ni[j]--;
}
t = _root->getStore();
}
else
t = 0;
fseek(this->treefp, 4, SEEK_SET);
fwrite(&t, sizeof(unsigned), 1, treefp); //write the root num
fwrite(&cur_block_num, sizeof(unsigned), 1, treefp);//write current blocks num
fseek(treefp, BLOCK_SIZE, SEEK_SET);
j = cur_block_num / 8; //(SuperNum-1)*BLOCK_SIZE;
//reset to 1 first
for (i = 0; i < j; ++i)
{
fputc(0xff, treefp);
}
char c;
BlockInfo* bp = this->freelist->next;
while (bp != NULL)
{
//if not-use then set 0, aligned to byte!
#ifdef DEBUG_KVSTORE
if (bp->num > cur_block_num)
{
printf("blocks num exceed, cur_block_num: %u\n", cur_block_num);
exit(1);
}
#endif
j = bp->num - 1;
i = j / 8;
j = 7 - j % 8;
fseek(treefp, BLOCK_SIZE + i, SEEK_SET);
c = fgetc(treefp);
fseek(treefp, -1, SEEK_CUR);
fputc(c & ~(1 << j), treefp);
bp = bp->next;
}
//fclose(this->treefp);
return true;
}
void
IVStorage::updateHeap(IVNode* _np, unsigned _rank, bool _inheap) const
{
if (_inheap) //already in heap, to modify
{
unsigned t = _np->getRank();
_np->setRank(_rank);
if (t < _rank)
this->minheap->modify(_np, false);
else if (t > _rank)
this->minheap->modify(_np, true);
else;
}
else //not in heap, to add
{
_np->setRank(_rank);
this->minheap->insert(_np);
}
}
bool
IVStorage::request(long long _needmem) //aligned to byte
{ //NOTICE: <0 means release
//cout<<"freemem: "<<this->freemem<<" needmem: "<<_needmem<<endl;
if (_needmem > 0 && this->freemem < (unsigned long long)_needmem)
if (!this->handler(_needmem - freemem)) //disaster in buffer memory
{
print(string("error in request: out of buffer-mem, now to exit"));
//exit(1);
return false;
}
this->freemem -= _needmem;
return true;
}
bool
IVStorage::handler(unsigned long long _needmem) //>0
{
//cout<<"swap happen"<<endl;
IVNode* p;
unsigned long long size;
//if(_needmem < SET_BUFFER_SIZE) //to recover to SET_BUFFER_SIZE buffer
// _needmem = SET_BUFFER_SIZE;
//cout<<"IVStorage::handler() - now to loop to release nodes"<<endl;
while (1)
{
p = this->minheap->getTop();
//cout<<"get heap top"<<endl;
if (p == NULL)
{
cout << "the heap top is null" << endl;
return false; //can't satisfy or can't recover to SET_BUFFER_SIZE
}
this->minheap->remove();
//cout<<"node removed in heap"<<endl;
size = p->getSize();
this->freemem += size;
this->writeNode(p);
//cout<<"node write back"<<endl;
if (p->getNum() > 0)
p->Virtual();
else
delete p; //non-sense node
//cout<<"node memory released"<<endl;
if (_needmem > size)
{
//cout<<"reduce the request"<<endl;
_needmem -= size;
}
else
{
//cout<<"ok to break"<<endl;
break;
}
}
//cout<<"IVStorage::handler() -- finished"<<endl;
return true;
}
IVStorage::~IVStorage()
{
//release heap and freelist...
#ifdef DEBUG_KVSTORE
printf("now to release the kvstore!\n");
#endif
BlockInfo* bp = this->freelist;
BlockInfo* next;
while (bp != NULL)
{
next = bp->next;
delete bp;
bp = next;
}
#ifdef DEBUG_KVSTORE
printf("already empty the freelist!\n");
#endif
delete this->minheap;
#ifdef DEBUG_KVSTORE
printf("already empty the buffer heap!\n");
#endif
fclose(this->treefp);
//#ifdef DEBUG_KVSTORE
//NOTICE:there is more than one tree
//fclose(Util::debug_kvstore); //NULL is ok!
//Util::debug_kvstore = NULL;
//#endif
}
void
IVStorage::print(string s)
{
#ifdef DEBUG_KVSTORE
fputs(Util::showtime().c_str(), Util::debug_kvstore);
fputs("Class IVStorage\n", Util::debug_kvstore);
fputs("Message: ", Util::debug_kvstore);
fputs(s.c_str(), Util::debug_kvstore);
fputs("\n", Util::debug_kvstore);
#endif
}

View File

@ -0,0 +1,84 @@
/*=============================================================================
# Filename: IVStorage.h
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:43
# Description: swap between memory and disk, achieving system-like method
=============================================================================*/
#ifndef _KVSTORE_IVTREE_STORAGE_IVSTORAGE_H
#define _KVSTORE_IVTREE_STORAGE_IVSTORAGE_H
#include "../../../Util/VList.h"
#include "../node/IVIntlNode.h"
#include "../node/IVLeafNode.h"
#include "../heap/IVHeap.h"
//It controls read, write, swap
class IVStorage
{
public:
static const unsigned BLOCK_SIZE = Util::STORAGE_BLOCK_SIZE; //fixed size of disk-block
//there are 18 B+Tree indexes and one vstree index, so set 3G buffer size
//static const unsigned long long MAX_BUFFER_SIZE = Util::MAX_BUFFER_SIZE; //max buffer size
//static const unsigned SET_BUFFER_SIZE = 1 << 30; //set buffer size
//static const unsigned HEAP_SIZE = MAX_BUFFER_SIZE / IVNode::INTL_SIZE;
//DEBUG: maybe need to set larger, now the file size is 64G at most
static const unsigned MAX_BLOCK_NUM = 1 << 24; //max block-num
//below two constants: must can be exactly divided by 8
static const unsigned SET_BLOCK_NUM = 1 << 8; //initial blocks num
static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc
static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1;
//static const unsigned TRANSFER_CAPACITY = BLOCK_SIZE;
//enum ReadType { OVER = 0, EXPAND, NORMAL };
private:
unsigned long long max_buffer_size;
unsigned heap_size;
unsigned cur_block_num;
std::string filepath;
unsigned* treeheight;
BlockInfo* freelist;
FILE* treefp; //file: tree nodes
IVHeap* minheap; //heap of Nodes's pointer, sorted in NF_RK
//very long value list are stored in a separate file(with large block)
//
//NOTICE: according to the summary result, 90% value lists are just below 100 bytes
//<10%: 5000000~100M bytes
VList* value_list;
//NOTICE: freemem's type is long long here, due to large memory in server.
//However, needmem in handler() and request() is ok to be int/unsigned.
//Because the bstr' size is controlled, so is the node.
unsigned long long freemem; //free memory to use, non-negative
//unsigned long long time; //QUERY(achieving an old-swap startegy?)
long Address(unsigned _blocknum) const;
unsigned Blocknum(long address) const;
unsigned AllocBlock();
void FreeBlock(unsigned _blocknum);
void ReadAlign(unsigned* _next);
void WriteAlign(unsigned* _next, bool& _SpecialBlock);
public:
IVStorage();
IVStorage(std::string& _filepath, std::string& _mode, unsigned* _height, unsigned long long _buffer_size, VList* _vlist); //create a fixed-size file or open an existence
bool preRead(IVNode*& _root, IVNode*& _leaves_head, IVNode*& _leaves_tail); //read and build all nodes, only root in memory
bool readNode(IVNode* _np, long long* _request); //read, if virtual
bool createNode(IVNode*& _np); //use fp to create a new node
//NOTICE(if children and child not exist, build children's Nodes)
bool writeNode(IVNode* _np);
bool readBstr(Bstr* _bp, unsigned* _next);
bool writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock);
bool writeTree(IVNode* _np);
void updateHeap(IVNode* _np, unsigned _rank, bool _inheap) const;
bool request(long long _needmem); //deal with memory request
bool handler(unsigned long long _needmem); //swap some nodes out
//bool update(); //update InMem Node's rank, with clock
~IVStorage();
void print(std::string s); //DEBUG
};
#endif

File diff suppressed because it is too large Load Diff

View File

@ -10,8 +10,21 @@
#define _KVSTORE_KVSTORE_H
#include "../Util/Util.h"
#include "../Util/VList.h"
#include "Tree.h"
//TODO: is it needed to keep a length in Bstr?? especially for IVTree?
//add a length: sizeof bstr from 8 to 16(4 -> 8 for alignment)
//add a \0 in tail: only add 1 char
//QUERY: but to count the length each time maybe very costly?
//No, because triple num is stored in char* now!!!! we do not need to save it again
//TODO: entity_border in s2values list is not needed!!! not waste memory here
//
//QUERY: but to implement vlist, we need a unsigned flag
//What is more, we need to store the string in disk, how can we store it if without the length?
//unsigned type stored as chars, maybe will have '\0'
//In memory, we do not know when the oidlist ends if without the original length (butthe triple num will answer this!)
class KVstore
{
public:
@ -27,36 +40,36 @@ public:
//===============================================================================
//including IN-neighbor & OUT-neighbor
int getEntityDegree(int _entity_id) const;
int getEntityInDegree(int _entity_id) const;
int getEntityOutDegree(int _entity_id) const;
unsigned getEntityDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const;
unsigned getEntityInDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const;
unsigned getEntityOutDegree(TYPE_ENTITY_LITERAL_ID _entity_id) const;
int getLiteralDegree(int _literal_id) const;
int getPredicateDegree(int _predicate_id) const;
unsigned getLiteralDegree(TYPE_ENTITY_LITERAL_ID _literal_id) const;
unsigned getPredicateDegree(TYPE_PREDICATE_ID _predicate_id) const;
int getSubjectPredicateDegree(int _subid, int _preid) const;
int getObjectPredicateDegree(int _objid, int _preid) const;
unsigned getSubjectPredicateDegree(TYPE_ENTITY_LITERAL_ID _subid, TYPE_PREDICATE_ID _preid) const;
unsigned getObjectPredicateDegree(TYPE_ENTITY_LITERAL_ID _objid, TYPE_PREDICATE_ID _preid) const;
//===============================================================================
//Before calling these functions, we are sure that the triples doesn't exist.
bool updateTupleslist_insert(int _sub_id, int _pre_id, int _obj_id);
bool updateTupleslist_remove(int _sub_id, int _pre_id, int _obj_id);
bool updateTupleslist_insert(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id);
bool updateTupleslist_remove(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id);
bool updateInsert_s2values(int _sub_id, int _pre_id, int _obj_id);
bool updateRemove_s2values(int _sub_id, int _pre_id, int _obj_id);
bool updateInsert_s2values(int _subid, const std::vector<int>& _pidoidlist);
bool updateRemove_s2values(int _subid, const std::vector<int>& _pidoidlist);
bool updateInsert_s2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id);
bool updateRemove_s2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id);
bool updateInsert_s2values(TYPE_ENTITY_LITERAL_ID _subid, const std::vector<unsigned>& _pidoidlist);
bool updateRemove_s2values(TYPE_ENTITY_LITERAL_ID _subid, const std::vector<unsigned>& _pidoidlist);
bool updateInsert_o2values(int _sub_id, int _pre_id, int _obj_id);
bool updateRemove_o2values(int _sub_id, int _pre_id, int _obj_id);
bool updateInsert_o2values(int _objid, const std::vector<int>& _pidsidlist);
bool updateRemove_o2values(int _objid, const std::vector<int>& _pidsidlist);
bool updateInsert_o2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id);
bool updateRemove_o2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id);
bool updateInsert_o2values(TYPE_ENTITY_LITERAL_ID _objid, const std::vector<unsigned>& _pidsidlist);
bool updateRemove_o2values(TYPE_ENTITY_LITERAL_ID _objid, const std::vector<unsigned>& _pidsidlist);
bool updateInsert_p2values(int _sub_id, int _pre_id, int _obj_id);
bool updateRemove_p2values(int _sub_id, int _pre_id, int _obj_id);
bool updateInsert_p2values(int _preid, const std::vector<int>& _sidoidlist);
bool updateRemove_p2values(int _preid, const std::vector<int>& _sidoidlist);
bool updateInsert_p2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id);
bool updateRemove_p2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id);
bool updateInsert_p2values(TYPE_PREDICATE_ID _preid, const std::vector<unsigned>& _sidoidlist);
bool updateRemove_p2values(TYPE_PREDICATE_ID _preid, const std::vector<unsigned>& _sidoidlist);
//===============================================================================
@ -64,74 +77,74 @@ public:
bool open_entity2id(int _mode);
bool close_entity2id();
bool subIDByEntity(std::string _entity);
int getIDByEntity(std::string _entity) const;
bool setIDByEntity(std::string _entity, int _id);
TYPE_ENTITY_LITERAL_ID getIDByEntity(std::string _entity) const;
bool setIDByEntity(std::string _entity, TYPE_ENTITY_LITERAL_ID _id);
//for id2entity
bool open_id2entity(int _mode);
bool close_id2entity();
bool subEntityByID(int _id);
std::string getEntityByID(int _id) const;
bool setEntityByID(int _id, std::string _entity);
bool subEntityByID(TYPE_ENTITY_LITERAL_ID _id);
std::string getEntityByID(TYPE_ENTITY_LITERAL_ID _id) const;
bool setEntityByID(TYPE_ENTITY_LITERAL_ID _id, std::string _entity);
//for predicate2id
bool open_predicate2id(int _mode);
bool close_predicate2id();
bool subIDByPredicate(std::string _predicate);
int getIDByPredicate(std::string _predicate) const;
bool setIDByPredicate(std::string _predicate, int _id);
TYPE_PREDICATE_ID getIDByPredicate(std::string _predicate) const;
bool setIDByPredicate(std::string _predicate, TYPE_PREDICATE_ID _id);
//for id2predicate
bool open_id2predicate(int _mode);
bool close_id2predicate();
bool subPredicateByID(int _id);
std::string getPredicateByID(int _id) const;
bool setPredicateByID(int _id, std::string _predicate);
bool subPredicateByID(TYPE_PREDICATE_ID _id);
std::string getPredicateByID(TYPE_PREDICATE_ID _id) const;
bool setPredicateByID(TYPE_PREDICATE_ID _id, std::string _predicate);
//for literal2id
bool open_literal2id(int _mode);
bool close_literal2id();
bool subIDByLiteral(std::string _literal);
int getIDByLiteral(std::string _literal) const;
bool setIDByLiteral(std::string _literal, int _id);
TYPE_ENTITY_LITERAL_ID getIDByLiteral(std::string _literal) const;
bool setIDByLiteral(std::string _literal, TYPE_ENTITY_LITERAL_ID _id);
//for id2literal
bool open_id2literal(int _mode);
bool close_id2literal();
bool subLiteralByID(int _id);
std::string getLiteralByID(int _id) const;
bool setLiteralByID(int _id, std::string _literal);
bool subLiteralByID(TYPE_ENTITY_LITERAL_ID _id);
std::string getLiteralByID(TYPE_ENTITY_LITERAL_ID _id) const;
bool setLiteralByID(TYPE_ENTITY_LITERAL_ID _id, std::string _literal);
//===============================================================================
//for subID2values
bool open_subID2values(int _mode);
bool close_subID2values();
bool build_subID2values(int** _p_id_tuples, int _triples_num);
bool getpreIDlistBysubID(int _subid, int*& _preidlist, int& _list_len, bool _no_duplicate = false) const;
bool getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len, bool _no_duplicate = false) const;
bool getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist, int& _list_len, bool _no_duplicate = false) const;
bool getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _list_len, bool _no_duplicate = false) const;
bool build_subID2values(ID_TUPLE* _p_id_tuples, TYPE_TRIPLE_NUM _triples_num);
bool getpreIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _preidlist, unsigned& _list_len, bool _no_duplicate = false) const;
bool getobjIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _objidlist, unsigned& _list_len, bool _no_duplicate = false) const;
bool getobjIDlistBysubIDpreID(TYPE_ENTITY_LITERAL_ID _subid, TYPE_PREDICATE_ID _preid, unsigned*& _objidlist, unsigned& _list_len, bool _no_duplicate = false) const;
bool getpreIDobjIDlistBysubID(TYPE_ENTITY_LITERAL_ID _subid, unsigned*& _preid_objidlist, unsigned& _list_len, bool _no_duplicate = false) const;
//for objID2values
bool open_objID2values(int _mode);
bool close_objID2values();
bool build_objID2values(int** _p_id_tuples, int _triples_num);
bool getpreIDlistByobjID(int _objid, int*& _preidlist, int& _list_len, bool _no_duplicate = false) const;
bool getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len, bool _no_duplicate = false) const;
bool getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, int& _list_len, bool _no_duplicate = false) const;
bool getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _list_len, bool _no_duplicate = false) const;
bool build_objID2values(ID_TUPLE* _p_id_tuples, TYPE_TRIPLE_NUM _triples_num);
bool getpreIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _preidlist, unsigned& _list_len, bool _no_duplicate = false) const;
bool getsubIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _subidlist, unsigned& _list_len, bool _no_duplicate = false) const;
bool getsubIDlistByobjIDpreID(TYPE_ENTITY_LITERAL_ID _objid, TYPE_PREDICATE_ID _preid, unsigned*& _subidlist, unsigned& _list_len, bool _no_duplicate = false) const;
bool getpreIDsubIDlistByobjID(TYPE_ENTITY_LITERAL_ID _objid, unsigned*& _preid_subidlist, unsigned& _list_len, bool _no_duplicate = false) const;
//for preID2values
bool open_preID2values(int _mode);
bool close_preID2values();
bool build_preID2values(int** _p_id_tuples, int _triples_num);
bool getsubIDlistBypreID(int _preid, int*& _subidlist, int& _list_len, bool _no_duplicate = false) const;
bool getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len, bool _no_duplicate = false) const;
bool getsubIDobjIDlistBypreID(int _preid, int*& _subid_objidlist, int& _list_len, bool _no_duplicate = false) const;
bool build_preID2values(ID_TUPLE* _p_id_tuples, TYPE_TRIPLE_NUM _triples_num);
bool getsubIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _subidlist, unsigned& _list_len, bool _no_duplicate = false) const;
bool getobjIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _objidlist, unsigned& _list_len, bool _no_duplicate = false) const;
bool getsubIDobjIDlistBypreID(TYPE_PREDICATE_ID _preid, unsigned*& _subid_objidlist, unsigned& _list_len, bool _no_duplicate = false) const;
//for so2p
bool getpreIDlistBysubIDobjID(int _subID, int _objID, int*& _preidlist, int& _list_len, bool _no_duplicate = false) const;
bool getpreIDlistBysubIDobjID(TYPE_ENTITY_LITERAL_ID _subID, TYPE_ENTITY_LITERAL_ID _objID, unsigned*& _preidlist, unsigned& _list_len, bool _no_duplicate = false) const;
private:
@ -164,9 +177,9 @@ private:
static unsigned short buffer_literal2id_query;
static unsigned short buffer_id2literal_query;
ISTree* subID2values;
ISTree* objID2values;
ISTree* preID2values;
IVTree* subID2values;
IVTree* objID2values;
IVTree* preID2values;
static std::string s_sID2values;
static std::string s_oID2values;
static std::string s_pID2values;
@ -181,27 +194,36 @@ private:
bool open(SITree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size);
bool open(ISTree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size);
bool open(IVTree* & _p_btree, std::string _tree_name, int _mode, unsigned long long _buffer_size);
void flush(SITree* _p_btree);
void flush(ISTree* _p_btree);
void flush(IVTree* _p_btree);
bool addValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val);
bool addValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen);
bool addValueByKey(SITree* _p_btree, char* _key, unsigned _klen, unsigned _val);
bool addValueByKey(ISTree* _p_btree, unsigned _key, char* _val, unsigned _vlen);
bool addValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen);
bool setValueByKey(SITree* _p_btree, const char* _key, int _klen, int _val);
bool setValueByKey(ISTree* _p_btree, int _key, const char* _val, int _vlen);
bool setValueByKey(SITree* _p_btree, char* _key, unsigned _klen, unsigned _val);
bool setValueByKey(ISTree* _p_btree, unsigned _key, char* _val, unsigned _vlen);
bool setValueByKey(IVTree* _p_btree, unsigned _key, char* _val, unsigned _vlen);
bool getValueByKey(SITree* _p_btree, const char* _key, int _klen, int* _val) const;
bool getValueByKey(ISTree* _p_btree, int _key, char*& _val, int& _vlen) const;
bool getValueByKey(SITree* _p_btree, const char* _key, unsigned _klen, unsigned* _val) const;
bool getValueByKey(ISTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const;
bool getValueByKey(IVTree* _p_btree, unsigned _key, char*& _val, unsigned& _vlen) const;
int getIDByStr(SITree* _p_btree, const char* _key, int _klen) const;
bool removeKey(SITree* _p_btree, const char* _key, int _klen);
bool removeKey(ISTree* _p_btree, int _key);
static std::vector<int> intersect(const int* _list1, const int* _list2, int _len1, int _len2);
static int binarySearch(int key, const int* _list, int _list_len, int step = 1);
static bool isEntity(int id);
TYPE_ENTITY_LITERAL_ID getIDByStr(SITree* _p_btree, const char* _key, unsigned _klen) const;
bool removeKey(SITree* _p_btree, const char* _key, unsigned _klen);
bool removeKey(ISTree* _p_btree, unsigned _key);
bool removeKey(IVTree* _p_btree, unsigned _key);
static std::vector<unsigned> intersect(const unsigned* _list1, const unsigned* _list2, unsigned _len1, unsigned _len2);
static unsigned binarySearch(unsigned key, const unsigned* _list, unsigned _list_len, int step = 1);
static bool isEntity(TYPE_ENTITY_LITERAL_ID id);
};
#endif //_KVSTORE_KVSTORE_H

View File

@ -20,7 +20,7 @@ SITree::SITree()
TSM = NULL;
storepath = "";
filename = "";
transfer_size[0] = transfer_size[1] = transfer_size[2] = 0;
//transfer_size[0] = transfer_size[1] = transfer_size[2] = 0;
this->request = 0;
}
@ -36,10 +36,10 @@ SITree::SITree(string _storepath, string _filename, string _mode, unsigned long
this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail);
else
this->root = NULL;
this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE));
this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE));
this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE));
this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M
//this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE));
//this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE));
//this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE));
//this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M
this->request = 0;
}
@ -49,30 +49,30 @@ SITree::getFilePath()
return storepath + "/" + filename;
}
void //WARN: not check _str and _len
SITree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index)
{
if (_index > 2)
return;
/*
if(_str == NULL || _len == 0)
{
printf("error in CopyToTransfer: empty string\n");
return;
}
*/
//unsigned length = _bstr->getLen();
unsigned length = _len;
if (length + 1 > this->transfer_size[_index])
{
transfer[_index].release();
transfer[_index].setStr((char*)malloc(length + 1));
this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0
}
memcpy(this->transfer[_index].getStr(), _str, length);
this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore
this->transfer[_index].setLen(length);
}
//void //WARN: not check _str and _len
//SITree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index)
//{
//if (_index > 2)
//return;
//[>
//if(_str == NULL || _len == 0)
//{
//printf("error in CopyToTransfer: empty string\n");
//return;
//}
//*/
////unsigned length = _bstr->getLen();
//unsigned length = _len;
//if (length + 1 > this->transfer_size[_index])
//{
//transfer[_index].release();
//transfer[_index].setStr((char*)malloc(length + 1));
//this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0
//}
//memcpy(this->transfer[_index].getStr(), _str, length);
//this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore
//this->transfer[_index].setLen(length);
//}
unsigned
SITree::getHeight() const
@ -102,40 +102,47 @@ SITree::prepare(SINode* _np)
}
bool
SITree::search(const char* _str, unsigned _len, int* _val)
SITree::search(const char* _str, unsigned _len, unsigned* _val)
{
if (_str == NULL || _len == 0)
{
printf("error in SITree-search: empty string\n");
*_val = -1;
//*_val = -1;
return false;
}
this->CopyToTransfer(_str, _len, 1);
//this->CopyToTransfer(_str, _len, 1);
request = 0;
Bstr bstr = this->transfer[1]; //not to modify its memory
//Bstr bstr = this->transfer[1]; //not to modify its memory
//Bstr bstr(_str, _len, true);
int store;
SINode* ret = this->find(&transfer[1], &store, false);
if (ret == NULL || store == -1 || bstr != *(ret->getKey(store))) //tree is empty or not found
SINode* ret = this->find(_str, _len, &store, false);
if (ret == NULL || store == -1) //tree is empty or not found
{
//bstr.clear();
return false;
}
const Bstr* tmp = ret->getKey(store);
if (Util::compare(_str, _len, tmp->getStr(), tmp->getLen()) != 0) //tree is empty or not found
{
bstr.clear();
return false;
}
*_val = ret->getValue(store);
this->TSM->request(request);
bstr.clear();
//bstr.clear();
return true;
}
bool
SITree::insert(const char* _str, unsigned _len, int _val)
SITree::insert(char* _str, unsigned _len, unsigned _val)
{
if (_str == NULL || _len == 0)
{
printf("error in SITree-insert: empty string\n");
return false;
}
this->CopyToTransfer(_str, _len, 1);
//this->CopyToTransfer(_str, _len, 1);
this->request = 0;
SINode* ret;
@ -170,8 +177,8 @@ SITree::insert(const char* _str, unsigned _len, int _val)
SINode* p = this->root;
SINode* q;
int i;
const Bstr* _key = &transfer[1];
Bstr bstr = *_key;
//const Bstr* _key = &transfer[1];
//Bstr bstr = *_key;
while (!p->isLeaf())
{
//j = p->getNum();
@ -179,7 +186,7 @@ SITree::insert(const char* _str, unsigned _len, int _val)
//if(bstr < *(p->getKey(i)))
//break;
//NOTICE: using binary search is better here
i = p->searchKey_less(bstr);
i = p->searchKey_less(_str, _len);
q = p->getChild(i);
this->prepare(q);
@ -196,7 +203,10 @@ SITree::insert(const char* _str, unsigned _len, int _val)
this->TSM->updateHeap(ret, ret->getRank(), false);
this->TSM->updateHeap(q, q->getRank(), true);
this->TSM->updateHeap(p, p->getRank(), true);
if (bstr < *(p->getKey(i)))
//if (bstr < *(p->getKey(i)))
const Bstr* tmp = p->getKey(i);
int cmp_res = Util::compare(_str, _len, tmp->getStr(), tmp->getLen());
if (cmp_res < 0)
p = q;
else
p = ret;
@ -212,63 +222,82 @@ SITree::insert(const char* _str, unsigned _len, int _val)
//for(i = 0; i < j; ++i)
//if(bstr < *(p->getKey(i)))
//break;
i = p->searchKey_less(bstr);
i = p->searchKey_less(_str, _len);
//insert existing key is ok, but not inserted in
//however, the tree-shape may change due to possible split in former code
bool ifexist = false;
if (i > 0 && bstr == *(p->getKey(i - 1)))
ifexist = true;
else
//if (i > 0 && bstr == *(p->getKey(i - 1)))
if (i > 0)
{
p->addKey(_key, i, true);
const Bstr* tmp = p->getKey(i-1);
int cmp_res = Util::compare(_str, _len, tmp->getStr(), tmp->getLen());
if(cmp_res == 0)
{
ifexist = true;
}
}
if(!ifexist)
{
p->addKey(_str, _len, i, true);
p->addValue(_val, i);
p->addNum();
request += _key->getLen();
request += _len;
p->setDirty();
this->TSM->updateHeap(p, p->getRank(), true);
}
this->TSM->request(request);
bstr.clear(); //NOTICE: must be cleared!
//bstr.clear(); //NOTICE: must be cleared!
return !ifexist; //QUERY(which case:return false)
}
bool
SITree::modify(const char* _str, unsigned _len, int _val)
SITree::modify(const char* _str, unsigned _len, unsigned _val)
{
if (_str == NULL || _len == 0)
{
printf("error in SITree-modify: empty string\n");
return false;
}
this->CopyToTransfer(_str, _len, 1);
//this->CopyToTransfer(_str, _len, 1);
this->request = 0;
const Bstr* _key = &transfer[1];
Bstr bstr = *_key;
//const Bstr* _key = &transfer[1];
//Bstr bstr = *_key;
int store;
SINode* ret = this->find(_key, &store, true);
if (ret == NULL || store == -1 || bstr != *(ret->getKey(store))) //tree is empty or not found
SINode* ret = this->find(_str, _len, &store, true);
if (ret == NULL || store == -1) //tree is empty or not found
{
bstr.clear();
//bstr.clear();
return false;
}
const Bstr* tmp = ret->getKey(store);
if (Util::compare(_str, _len, tmp->getStr(), tmp->getLen()) != 0) //tree is empty or not found
{
return false;
}
ret->setValue(_val, store);
ret->setDirty();
this->TSM->request(request);
bstr.clear();
//bstr.clear();
return true;
}
//this function is useful for search and modify, and range-query
SINode* //return the first key's position that >= *_key
SITree::find(const Bstr* _key, int* _store, bool ifmodify)
SITree::find(const char* _str, unsigned _len, int* _store, bool ifmodify)
{ //to assign value for this->bstr, function shouldn't be const!
if (this->root == NULL)
return NULL; //SITree Is Empty
SINode* p = root;
int i, j;
Bstr bstr = *_key; //local Bstr: multiple delete
//Bstr bstr = *_key; //local Bstr: multiple delete
while (!p->isLeaf())
{
if (ifmodify)
@ -277,7 +306,7 @@ SITree::find(const Bstr* _key, int* _store, bool ifmodify)
//for(i = 0; i < j; ++i) //BETTER(Binary-Search)
//if(bstr < *(p->getKey(i)))
//break;
i = p->searchKey_less(bstr);
i = p->searchKey_less(_str, _len);
p = p->getChild(i);
this->prepare(p);
@ -287,13 +316,15 @@ SITree::find(const Bstr* _key, int* _store, bool ifmodify)
//for(i = 0; i < j; ++i)
//if(bstr <= *(p->getKey(i)))
//break;
i = p->searchKey_lessEqual(bstr);
i = p->searchKey_lessEqual(_str, _len);
if (i == j)
*_store = -1; //Not Found
else
*_store = i;
bstr.clear();
//bstr.clear();
return p;
}
@ -312,24 +343,25 @@ SITree::remove(const char* _str, unsigned _len)
printf("error in SITree-remove: empty string\n");
return false;
}
this->CopyToTransfer(_str, _len, 1);
//this->CopyToTransfer(_str, _len, 1);
request = 0;
const Bstr* _key = &transfer[1];
//const Bstr* _key = &transfer[1];
SINode* ret;
if (this->root == NULL) //tree is empty
return false;
SINode* p = this->root;
SINode* q;
int i, j;
Bstr bstr = *_key;
//Bstr bstr = *_key;
while (!p->isLeaf())
{
j = p->getNum();
//for(i = 0; i < j; ++i)
//if(bstr < *(p->getKey(i)))
//break;
i = p->searchKey_less(bstr);
i = p->searchKey_less(_str, _len);
q = p->getChild(i);
this->prepare(q);
@ -343,6 +375,7 @@ SITree::remove(const char* _str, unsigned _len)
if (ret != NULL)
this->TSM->updateHeap(ret, 0, true);//non-sense node
this->TSM->updateHeap(q, q->getRank(), true);
if (q->isLeaf())
{
if (q->getPrev() == NULL)
@ -350,6 +383,7 @@ SITree::remove(const char* _str, unsigned _len)
if (q->getNext() == NULL)
this->leaves_tail = q;
}
if (p->getNum() == 0) //root shrinks
{
//this->leaves_head = q;
@ -365,7 +399,7 @@ SITree::remove(const char* _str, unsigned _len)
}
bool flag = false;
i = p->searchKey_equal(bstr);
i = p->searchKey_equal(_str, _len);
//WARN+NOTICE:here must check, because the key to remove maybe not exist
if (i != (int)p->getNum())
{
@ -386,7 +420,8 @@ SITree::remove(const char* _str, unsigned _len)
}
this->TSM->request(request);
bstr.clear();
//bstr.clear();
return flag; //i == j, not found
}
@ -496,3 +531,4 @@ SITree::print(string s)
else;
#endif
}

View File

@ -3,7 +3,7 @@
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:44
# Description: struct and interface of the B+ tree
# Description: string2ID, including entity2id, literal2id, predicate2id
=============================================================================*/
#ifndef _KVSTORE_SITREE_SITREE_H
@ -21,7 +21,7 @@
class SITree
{
private:
unsigned int height; //0 indicates an empty tree
unsigned height; //0 indicates an empty tree
SINode* root;
SINode* leaves_head; //the head of LeafNode-list
SINode* leaves_tail; //the tail of LeafNode-list
@ -36,13 +36,19 @@ private:
//so lock is a must. Add lock to transfer is better than to add
//lock to every key/value. However, modify requires a lock for a
//key/value, and multiple search for different keys are ok!!!
Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char*
unsigned transfer_size[3];
//Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char*
//unsigned transfer_size[3];
//TODO: in all B+ trees, updat eoperation should lock the whole tree, while search operations not
//However, the transfer bstr maybe cause the parallism error!!!!
//Why we need the transfer? It is ok to pass the original string pointer to return
//A problem is that before the caller ends, the tree can not be modified(so a read-writ elock is required)
std::string storepath;
std::string filename; //ok for user to change
/* some private functions */
std::string getFilePath(); //in UNIX system
void CopyToTransfer(const char* _str, unsigned _len, unsigned _index);
//void CopyToTransfer(const char* _str, unsigned _len, unsigned _index);
void release(SINode* _np) const;
//tree's operations should be atom(if read nodes)
@ -54,14 +60,15 @@ private:
public:
SITree(); //always need to initial transfer
SITree(std::string _storepath, std::string _filename, std::string _mode, unsigned long long _buffer_size);
unsigned int getHeight() const;
unsigned getHeight() const;
void setHeight(unsigned _h);
SINode* getRoot() const;
//insert, search, remove, set
bool search(const char* _str, unsigned _len, int* _val);
bool insert(const char* _str, unsigned _len, int _val);
bool modify(const char* _str, unsigned _len, int _val);
bool search(const char* _str, unsigned _len, unsigned* _val);
bool insert(char* _str, unsigned _len, unsigned _val);
bool modify(const char* _str, unsigned _len, unsigned _val);
SINode* find(const Bstr* _key, int* store, bool ifmodify);
SINode* find(const char* _key, unsigned _len, int* store, bool ifmodify);
bool remove(const char* _str, unsigned _len);
bool save();
~SITree();
@ -72,3 +79,4 @@ public:
//After saved, it's ok to continue operations on tree!
#endif

View File

@ -184,3 +184,4 @@ SIHeap::print(string s)
#ifdef DEBUG_KVSTORE
#endif
}

View File

@ -39,3 +39,4 @@ public:
};
#endif

View File

@ -75,6 +75,7 @@ SIIntlNode::setChild(SINode* _child, int _index)
return false;
}
this->childs[_index] = _child;
return true;
}
@ -91,6 +92,7 @@ SIIntlNode::addChild(SINode* _child, int _index)
for (i = num; i >= _index; --i) //DEBUG: right bounder!!!
childs[i + 1] = childs[i];
childs[_index] = _child;
return true;
}
@ -106,6 +108,7 @@ SIIntlNode::subChild(int _index)
int i;
for (i = _index; i < num; ++i) //DEBUG: right bounder!!!
childs[i] = childs[i + 1];
return true;
}
@ -115,6 +118,7 @@ SIIntlNode::getSize() const
unsigned sum = INTL_SIZE, num = this->getNum(), i;
for (i = 0; i < num; ++i)
sum += keys[i].getLen();
return sum;
}
@ -140,6 +144,7 @@ SIIntlNode::split(SINode* _father, int _index)
_father->setDirty();
p->setDirty();
this->setDirty();
return p;
}
@ -235,6 +240,7 @@ SIIntlNode::coalesce(SINode* _father, int _index)
print("error in coalesce: Invalid case!");
//printf("error in coalesce: Invalid case!");
}
_father->setDirty();
p->setDirty();
this->setDirty();
@ -291,3 +297,4 @@ SIIntlNode::print(string s)
else;
#endif
}

View File

@ -46,3 +46,4 @@ public:
};
#endif

View File

@ -13,7 +13,7 @@ using namespace std;
void
SILeafNode::AllocValues()
{
values = new int[MAX_KEY_NUM];
values = new unsigned[MAX_KEY_NUM];
}
/*
@ -76,7 +76,7 @@ SILeafNode::getNext() const
return next;
}
int
unsigned
SILeafNode::getValue(int _index) const
{
int num = this->getNum();
@ -90,7 +90,7 @@ SILeafNode::getValue(int _index) const
}
bool
SILeafNode::setValue(int _val, int _index)
SILeafNode::setValue(unsigned _val, int _index)
{
int num = this->getNum();
if (_index < 0 || _index >= num)
@ -99,11 +99,12 @@ SILeafNode::setValue(int _val, int _index)
return false;
}
this->values[_index] = _val;
return true;
}
bool
SILeafNode::addValue(int _val, int _index)
SILeafNode::addValue(unsigned _val, int _index)
{
int num = this->getNum();
if (_index < 0 || _index > num)
@ -115,6 +116,7 @@ SILeafNode::addValue(int _val, int _index)
for (i = num - 1; i >= _index; --i)
this->values[i + 1] = this->values[i];
this->values[_index] = _val;
return true;
}
@ -130,6 +132,7 @@ SILeafNode::subValue(int _index)
int i;
for (i = _index; i < num - 1; ++i)
this->values[i] = this->values[i + 1];
return true;
}
@ -180,6 +183,7 @@ SILeafNode::split(SINode* _father, int _index)
_father->setDirty();
p->setDirty();
this->setDirty();
return p;
}
@ -275,6 +279,7 @@ SILeafNode::coalesce(SINode* _father, int _index)
print("error in coalesce: Invalid case!");
//printf("error in coalesce: Invalid case!");
}
_father->setDirty();
p->setDirty();
this->setDirty();
@ -363,3 +368,4 @@ SILeafNode::print(string s)
else;
#endif
}

View File

@ -16,7 +16,7 @@ class SILeafNode : public SINode
protected:
SINode* prev; //LeafNode
SINode* next;
int* values;
unsigned* values;
void AllocValues();
//void FreeValues();
public:
@ -27,18 +27,24 @@ public:
void Normal();
SINode* getPrev() const;
SINode* getNext() const;
int getValue(int _index) const;
bool setValue(int _val, int _index);
bool addValue(int _val, int _index);
unsigned getValue(int _index) const;
bool setValue(unsigned _val, int _index);
bool addValue(unsigned _val, int _index);
bool subValue(int _index);
void setPrev(SINode* _prev);
void setNext(SINode* _next);
unsigned getSize() const;
SINode* split(SINode* _father, int _index);
SINode* coalesce(SINode* _father, int _index);
void release();
~SILeafNode();
void print(std::string s); //DEBUG
/*non-sense virtual function
Node* getChild(int _index) const;
bool addChild(Node* _child, int _index);
@ -48,3 +54,4 @@ public:
//BETTER: prev isn't a must, and reverse-range can be achieved using recursive-next
#endif

View File

@ -251,6 +251,28 @@ SINode::addKey(const Bstr* _key, int _index, bool ifcopy)
keys[_index].copy(_key);
else
keys[_index] = *_key;
return true;
}
bool
SINode::addKey(char* _str, unsigned _len, int _index, bool ifcopy)
{
int num = this->getNum();
if (_index < 0 || _index > num)
{
print(string("error in addKey: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
//NOTICE: if num == MAX_KEY_NUM, will visit keys[MAX_KEY_NUM], not legal!!!
//however. tree operations ensure that: when node is full, not add but split first!
for (i = num - 1; i >= _index; --i)
keys[i + 1] = keys[i];
keys[_index].setStr(_str);
keys[_index].setLen(_len);
return true;
}
@ -268,6 +290,7 @@ SINode::subKey(int _index, bool ifdel)
keys[_index].release();
for (i = _index; i < num - 1; ++i)
keys[i] = keys[i + 1];
return true;
}
@ -294,6 +317,7 @@ SINode::searchKey_less(const Bstr& _bstr) const
low = mid + 1;
}
}
return low;
}
@ -326,3 +350,56 @@ SINode::searchKey_lessEqual(const Bstr& _bstr) const
else
return ret;
}
int
SINode::searchKey_less(const char* _str, unsigned _len) const
{
int num = this->getNum();
int low = 0, high = num - 1, mid = -1;
while (low <= high)
{
mid = (low + high) / 2;
//if (this->keys[mid] > _bstr)
if (Util::compare(this->keys[mid].getStr(), this->keys[mid].getLen(), _str, _len) > 0)
{
if (low == mid)
break;
high = mid;
}
else
{
low = mid + 1;
}
}
return low;
}
int
SINode::searchKey_equal(const char* _str, unsigned _len) const
{
int num = this->getNum();
//for(i = 0; i < num; ++i)
// if(bstr == *(p->getKey(i)))
// {
int ret = this->searchKey_less(_str, _len);
//if (ret > 0 && this->keys[ret - 1] == _bstr)
if (ret > 0 && Util::compare(this->keys[ret-1].getStr(), this->keys[ret-1].getLen(), _str, _len) == 0)
return ret - 1;
else
return num;
}
int
SINode::searchKey_lessEqual(const char* _str, unsigned _len) const
{
int ret = this->searchKey_less(_str, _len);
//if (ret > 0 && this->keys[ret - 1] == _bstr)
if (ret > 0 && Util::compare(this->keys[ret-1].getStr(), this->keys[ret-1].getLen(), _str, _len) == 0)
return ret - 1;
else
return ret;
}

View File

@ -30,7 +30,7 @@ public:
static const unsigned NF_HT = 0xf00000; //height area in rank
static const unsigned NF_KN = 0x07f000; //NOTICE: decided by DEGREE
static const unsigned INTL_SIZE = sizeof(Bstr) * MAX_KEY_NUM;
static const unsigned LEAF_SIZE = sizeof(int) * MAX_KEY_NUM + INTL_SIZE;
static const unsigned LEAF_SIZE = sizeof(unsigned) * MAX_KEY_NUM + INTL_SIZE;
protected:
unsigned store; //store address, the BLock index
unsigned flag; //NF_RK, NF_IL,NF_ID, NF_IV, propety
@ -64,9 +64,11 @@ public:
void setStore(unsigned _store);
unsigned getFlag() const;
void setFlag(unsigned _flag);
const Bstr* getKey(int _index) const; //need to check the index
bool setKey(const Bstr* _key, int _index, bool ifcopy = false);
bool addKey(const Bstr* _key, int _index, bool ifcopy = false);
bool addKey(char* _str, unsigned _len, int _index, bool ifcopy = false);
bool subKey(int _index, bool ifdel = false);
//several binary key search utilities
@ -74,19 +76,26 @@ public:
int searchKey_equal(const Bstr& _bstr) const;
int searchKey_lessEqual(const Bstr& _bstr) const;
int searchKey_less(const char* _str, unsigned _len) const;
int searchKey_equal(const char* _str, unsigned _len) const;
int searchKey_lessEqual(const char* _str, unsigned _len) const;
//virtual functions: polymorphic
//NOTICE: not pure-virtual, not required to be implemented again, can be used now
virtual SINode* getChild(int _index) const { return NULL; };
virtual bool setChild(SINode* _child, int _index) { return true; };
virtual bool addChild(SINode* _child, int _index) { return true; };
virtual bool subChild(int _index) { return true; };
virtual SINode* getPrev() const { return NULL; };
virtual SINode* getNext() const { return NULL; };
virtual int getValue(int _index) const { return -1; };
virtual bool setValue(int _val, int _index) { return true; };
virtual bool addValue(int _val, int _index) { return true; };
virtual unsigned getValue(int _index) const { return -1; };
virtual bool setValue(unsigned _val, int _index) { return true; };
virtual bool addValue(unsigned _val, int _index) { return true; };
virtual bool subValue(int _index) { return true; };
virtual void setPrev(SINode* _prev) {};
virtual void setNext(SINode* _next) {};
//NOTICE: pure-virtual, must to be implemented again in the sub-class
virtual void Virtual() = 0;
virtual void Normal() = 0;
virtual unsigned getSize() const = 0; //return all memory owned
@ -112,3 +121,4 @@ public:
*/
#endif

View File

@ -71,7 +71,8 @@ SIStorage::SIStorage(string& _filepath, string& _mode, unsigned* _height, unsign
else //_mode == "open"
{
//read basic information
int rootnum;
unsigned rootnum;
//int rootnum;
char c;
fread(this->treeheight, sizeof(unsigned), 1, this->treefp);
fread(&rootnum, sizeof(unsigned), 1, this->treefp);
@ -214,6 +215,7 @@ SIStorage::AllocBlock()
unsigned t = p->num;
this->freelist->next = p->next;
delete p;
return t;
}
@ -286,10 +288,11 @@ SIStorage::readNode(SINode* _np, long long* _request)
if (flag)
{
//to read all values
int tmp = -1;
unsigned tmp = INVALID;
//int tmp = -1;
for (i = 0; i < num; ++i)
{
fread(&tmp, sizeof(int), 1, treefp);
fread(&tmp, sizeof(unsigned), 1, treefp);
this->ReadAlign(&next);
_np->setValue(tmp, i);
}
@ -300,6 +303,7 @@ SIStorage::readNode(SINode* _np, long long* _request)
//_np->setMem();
this->updateHeap(_np, _np->getRank(), false);
bstr.clear();
return true;
}
@ -335,6 +339,7 @@ SIStorage::createNode(SINode*& _np) //cretae virtual nodes, not in-mem
_np->delDirty();
_np->delMem();
_np->setStore(Blocknum(ftell(treefp) - 4));
return true;
}
@ -343,6 +348,7 @@ SIStorage::writeNode(SINode* _np)
{
if (_np == NULL || !_np->inMem() || (_np->getRank() > 0 && !_np->isDirty()))
return false; //not need to write back
unsigned num = _np->getNum(), i;
bool flag = _np->isLeaf(), SpecialBlock = true;
/*
@ -392,12 +398,13 @@ SIStorage::writeNode(SINode* _np)
if (flag)
{
int tmp = -1;
//int tmp = -1;
unsigned tmp = INVALID;
//to write all values
for (i = 0; i < num; ++i)
{
tmp = _np->getValue(i);
fwrite(&tmp, sizeof(int), 1, treefp);
fwrite(&tmp, sizeof(unsigned), 1, treefp);
this->WriteAlign(&blocknum, SpecialBlock);
}
}
@ -408,6 +415,7 @@ SIStorage::writeNode(SINode* _np)
fwrite(&t, sizeof(unsigned), 1, treefp); //the end-block
//_np->setFlag(_np->getFlag() & ~Node::NF_ID);
_np->delDirty();
return true;
}
@ -419,7 +427,8 @@ SIStorage::readBstr(Bstr* _bp, unsigned* _next)
fread(&len, sizeof(unsigned), 1, this->treefp);
this->ReadAlign(_next);
//this->request(len);
char* s = (char*)malloc(len);
//char* s = (char*)malloc(len);
char* s = new char[len];
_bp->setLen(len);
for (i = 0; i + 4 < len; i += 4)
{
@ -437,6 +446,7 @@ SIStorage::readBstr(Bstr* _bp, unsigned* _next)
fseek(treefp, j, SEEK_CUR);
this->ReadAlign(_next);
_bp->setStr(s);
return true;
}
@ -462,6 +472,7 @@ SIStorage::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock)
j = 4 - j;
fseek(treefp, j, SEEK_CUR);
this->WriteAlign(_curnum, _SpecialBlock);
return true;
}
@ -552,6 +563,7 @@ SIStorage::writeTree(SINode* _root) //write the whole tree back and close treefp
bp = bp->next;
}
//fclose(this->treefp);
return true;
}
@ -586,6 +598,7 @@ SIStorage::request(long long _needmem) //aligned to byte
return false;;
}
this->freemem -= _needmem;
return true;
}
@ -614,6 +627,7 @@ SIStorage::handler(unsigned long long _needmem) //>0
else
break;
}
return true;
}
@ -657,3 +671,4 @@ SIStorage::print(string s)
fputs("\n", Util::debug_kvstore);
#endif
}

View File

@ -13,6 +13,14 @@
#include "../node/SILeafNode.h"
#include "../heap/SIHeap.h"
//TODO: whether to use heap or not, is a big question
//For single-query application, it seems that LRU list like VSTree is a better choice(no much cost in the buffer itself)
//But in multiple-queries case, things maybe different
//BETTER:
//add a heap position in node, to speed up the node-pointer searching
//lower the update times of heap, if the size is 128M, then each update is 27 at most
//if not update in time, then the heap maybe not be a heap, then why do we use heap? why not a simple array?
//It controls read, write, swap
class SIStorage
{
@ -70,3 +78,4 @@ public:
};
#endif

View File

@ -2,3 +2,4 @@
#include "ISTree/ISTree.h"
#include "SITree/SITree.h"
#include "IVTree/IVTree.h"

View File

@ -7,9 +7,9 @@ int
main(int argc, char * argv[])
{
//chdir(dirname(argv[0]));
#ifdef DEBUG
//#ifdef DEBUG
Util util;
#endif
//#endif
cout << "argc: " << argc << "\t";
cout << "DB_store:" << argv[1] << "\t";

View File

@ -17,9 +17,9 @@ int
main(int argc, char * argv[])
{
//chdir(dirname(argv[0]));
#ifdef DEBUG
//#ifdef DEBUG
Util util;
#endif
//#endif
if(argc < 3) //./gbuild
{
//output help info here

View File

@ -12,9 +12,9 @@
int main(int argc, char * argv[])
{
//chdir(dirname(argv[0]));
#ifdef DEBUG
//#ifdef DEBUG
Util util;
#endif
//#endif
std::string ip = Socket::DEFAULT_SERVER_IP;
unsigned short port = Socket::DEFAULT_CONNECT_PORT;

View File

@ -122,9 +122,9 @@ main(int argc, char **argv)
//NOTICE:this is needed to ensure the file path is the work path
//chdir(dirname(argv[0]));
//NOTICE:this is needed to set several debug files
#ifdef DEBUG
//#ifdef DEBUG
Util util;
#endif
//#endif
db_home = Util::global_config["db_home"];

View File

@ -38,9 +38,9 @@ int
main(int argc, char * argv[])
{
//chdir(dirname(argv[0]));
#ifdef DEBUG
//#ifdef DEBUG
Util util;
#endif
//#endif
if (argc == 1 || strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0)
{

View File

@ -11,9 +11,9 @@
using namespace std;
#define GSERVER_PORT_FILE "bin/.gserver_port"
#define GSERVER_PORT_SWAP "bin/.gserver_port.swap"
#define GSERVER_LOG "logs/gserver.log"
//#define GSERVER_PORT_FILE "bin/.gserver_port"
//#define GSERVER_PORT_SWAP "bin/.gserver_port.swap"
//#define GSERVER_LOG "logs/gserver.log"
bool isOnlyProcess(const char* argv0);
void checkSwap();
@ -22,9 +22,9 @@ bool stopServer();
int main(int argc, char* argv[])
{
#ifdef DEBUG
//#ifdef DEBUG
Util util;
#endif
//#endif
string mode;
if (argc == 1) {
@ -61,7 +61,7 @@ int main(int argc, char* argv[])
unsigned short port = Socket::DEFAULT_CONNECT_PORT;
if (argc == 3) {
if (!Util::isValidPort(string(argv[2]))) {
cout << "Invalid port: " << argv[2] << endl;
cerr << "Invalid port: " << argv[2] << endl;
return -1;
}
else {
@ -70,9 +70,9 @@ int main(int argc, char* argv[])
}
}
if (!isOnlyProcess(argv[0])) {
ofstream out(GSERVER_PORT_SWAP, ios::out);
ofstream out(Util::gserver_port_swap.c_str());
if (!out) {
cout << "Failed to change port!" << endl;
cerr << "Failed to change port!" << endl;
return -1;
}
out << port;
@ -80,9 +80,9 @@ int main(int argc, char* argv[])
cout << "Port will be changed to " << port << " after the current server stops or restarts." << endl;
return 0;
}
ofstream out(GSERVER_PORT_FILE, ios::out);
ofstream out(Util::gserver_port_file.c_str());
if (!out) {
cout << "Failed to change port!" << endl;
cerr << "Failed to change port!" << endl;
return -1;
}
out << port;
@ -93,10 +93,15 @@ int main(int argc, char* argv[])
if (mode == "-s" || mode == "--start") {
if (!isOnlyProcess(argv[0])) {
cout << "gServer already running!" << endl;
cerr << "gServer already running!" << endl;
return -1;
}
if (startServer()) {
sleep(1);
if (isOnlyProcess(argv[0])) {
cerr << "Server stopped unexpectedly. Check for port conflicts!" << endl;
return -1;
}
return 0;
}
else {
@ -106,7 +111,7 @@ int main(int argc, char* argv[])
if (mode == "-t" || mode == "--stop") {
if (isOnlyProcess(argv[0])) {
cout << "gServer not running!" << endl;
cerr << "gServer not running!" << endl;
return -1;
}
if (stopServer()) {
@ -119,7 +124,7 @@ int main(int argc, char* argv[])
if (mode == "-r" || mode == "--restart") {
if (isOnlyProcess(argv[0])) {
cout << "gServer not running!" << endl;
cerr << "gServer not running!" << endl;
return -1;
}
if (!stopServer()) {
@ -133,14 +138,14 @@ int main(int argc, char* argv[])
if (mode == "-P" || mode == "--printport") {
unsigned short port = Socket::DEFAULT_CONNECT_PORT;
ifstream in(GSERVER_PORT_FILE);
ifstream in(Util::gserver_port_file.c_str());
if (in) {
in >> port;
in.close();
}
cout << "Current connection port is " << port << '.' << endl;
unsigned short portSwap = 0;
ifstream inSwap(GSERVER_PORT_SWAP);
ifstream inSwap(Util::gserver_port_swap.c_str());
if (inSwap) {
inSwap >> portSwap;
inSwap.close();
@ -153,14 +158,14 @@ int main(int argc, char* argv[])
if (mode == "-k" || mode == "--kill") {
if (isOnlyProcess(argv[0])) {
cout << "No process to kill!" << endl;
cerr << "No process to kill!" << endl;
return -1;
}
execl("/usr/bin/killall", "killall", Util::getExactPath(argv[0]).c_str(), NULL);
return 0;
}
cout << "Invalid arguments! Input \"bin/gserver -h\" for help." << endl;
cerr << "Invalid arguments! Type \"bin/gserver -h\" for help." << endl;
return -1;
}
@ -169,38 +174,38 @@ bool isOnlyProcess(const char* argv0) {
}
void checkSwap() {
if (access(GSERVER_PORT_SWAP, 00) != 0) {
if (access(Util::gserver_port_swap.c_str(), 00) != 0) {
return;
}
ifstream in(GSERVER_PORT_SWAP, ios::in);
ifstream in(Util::gserver_port_swap.c_str());
if (!in) {
cout << "Failed in checkSwap(), port may not be changed." << endl;
cerr << "Failed in checkSwap(), port may not be changed." << endl;
return;
}
unsigned short port;
in >> port;
in.close();
ofstream out(GSERVER_PORT_FILE, ios::out);
ofstream out(Util::gserver_port_file.c_str());
if (!out) {
cout << "Failed in checkSwap(), port may not be changed." << endl;
cerr << "Failed in checkSwap(), port may not be changed." << endl;
return;
}
out << port;
out.close();
chmod(GSERVER_PORT_FILE, 0644);
string cmd = string("rm ") + GSERVER_PORT_SWAP;
chmod(Util::gserver_port_file.c_str(), 0644);
string cmd = string("rm ") + Util::gserver_port_swap;
system(cmd.c_str());
}
bool startServer() {
unsigned short port = Socket::DEFAULT_CONNECT_PORT;
ifstream in(GSERVER_PORT_FILE, ios::in);
ifstream in(Util::gserver_port_file.c_str());
if (!in) {
ofstream out(GSERVER_PORT_FILE, ios::out);
ofstream out(Util::gserver_port_file.c_str());
if (out) {
out << port;
out.close();
chmod(GSERVER_PORT_FILE, 0644);
chmod(Util::gserver_port_file.c_str(), 0644);
}
}
else {
@ -215,47 +220,75 @@ bool startServer() {
if (!Util::dir_exist("logs")) {
Util::create_dir("logs");
}
freopen(GSERVER_LOG, "a", stdout);
freopen(GSERVER_LOG, "a", stderr);
Server server(port);
if (!server.createConnection()) {
cout << Util::getTimeString() << "Failed to create connection at port " << port << '.' << endl;
return false;
freopen(Util::gserver_log.c_str(), "a", stdout);
freopen(Util::gserver_log.c_str(), "a", stderr);
int status;
while (true) {
fpid = fork();
// child, main process
if (fpid == 0) {
Server server(port);
if (!server.createConnection()) {
cerr << Util::getTimeString() << "Failed to create connection at port " << port << '.' << endl;
return false;
}
cout << Util::getTimeString() << "Server started at port " << port << '.' << endl;
server.listen();
exit(0);
return true;
}
// parent, deamon process
else if (fpid > 0) {
waitpid(fpid, &status, 0);
if (WIFEXITED(status)) {
exit(0);
return true;
}
cerr << Util::getTimeString() << "Server stopped abnormally, restarting server..." << endl;
}
// fork failure
else {
cerr << Util::getTimeString() << "Failed to start server: deamon fork failure." << endl;
return false;
}
}
cout << Util::getTimeString() << "Server started at port " << port << '.' << endl;
server.listen();
exit(0);
return true;
}
// parent
else if (fpid > 0) {
cout << "Server started at port " << port << '.' << endl;
return true;
}
// fork failure
else {
cout << "Failed to start server at port " << port << '.' << endl;
cerr << "Failed to start server at port " << port << '.' << endl;
return false;
}
}
bool stopServer() {
unsigned short port = Socket::DEFAULT_CONNECT_PORT;
ifstream in(GSERVER_PORT_FILE, ios::in);
ifstream in(Util::gserver_port_file.c_str());
if (in) {
in >> port;
in.close();
}
Socket socket;
if (!socket.create() || !socket.connect("127.0.0.1", port) || !socket.send("stop")) {
cout << "Failed to stop server at port " << port << '.' << endl;
cerr << "Failed to stop server at port " << port << '.' << endl;
return false;
}
string recv_msg;
socket.recv(recv_msg);
socket.close();
if (recv_msg != "server stopped.") {
cout << "Failed to stop server at port " << port << '.' << endl;
cerr << "Failed to stop server at port " << port << '.' << endl;
return false;
}
cout << "Server stopped at port " << port << '.' << endl;

View File

@ -12,9 +12,9 @@ int
main(int argc, char * argv[])
{
//chdir(dirname(argv[0]));
#ifdef DEBUG
//#ifdef DEBUG
Util util;
#endif
//#endif
cout << "argc: " << argc << "\t";
cout << "DB_store:" << argv[1] << "\t";

View File

@ -7,14 +7,26 @@
在使用gserver时不能在数据库没有unload时再用gbuild或其他命令修改数据库仅限于C/S模式
将IRC聊天放到gstore文档上freenode #gStore
storage中大量使用long类型文件大小也可能达到64G最好在64位机器上运行。
# 推广
必须建立一个官方网站可以展示下团队、demo需要建立社区/论坛并维护
另外要有桌面应用或者网页应用以可视化的方式操作数据库类似virtuoso和neo4j那种
server 118.89.115.42 gstore-pku.com
自己的网站可以用实验室的服务器gstore网站最好用云服务图个稳定
但用实验室主机,备案时是否更麻烦?得以企业为单位,而且解析是否更麻烦?
gstore网站中的demo应用的主体可以放在实验室主机上至少是gstore数据库应抽离出来但若实验室主机不开外网应如何而配置代理
demo应用全部外链具体服务放在实验室公开的主机上通过ip:port连接
考虑使用hbase结合云平台
---
论文新的join策略特殊的子图同态问题如何选择顺序
动态估价的评估函数要考虑方向性因为可能含literal变量对应的候选集大小不可靠只能单向。
但每条边总是含subject的所以每条边总是可以备选的。不过问题是literal变量应该先做还是后做
另一种过滤方式直接用key-value索引比如?x-?y-constant可能就比较适合如果用vstree先过滤出的候选集太大的话
考虑对线状查询或星形查询做特殊处理这里的形状仅指需要join的部分
比如?x-?y-constant, why not just use key-value to generate sequentially
但这种没有考虑到更远的约束,可能导致不少中间解是无效的,实际上线状图的拼接顺序也不一定是从两端开始
---
@ -79,13 +91,18 @@ http://blog.csdn.net/infoworld/article/details/8670951
要在单机支持到10亿triple最坏情况下最多有20亿entity和20亿literal目前的编号方式是不行的(int扩展为unsigned)
最好在单机100G内存上支持起freebase(2.5B triples)这个规模的数据集就像jena和virtuoso一样慢不要紧
同时将ID的编码改为unsigned无效标志-1改为最大值的宏, triple数目的类型也要改为unsigned
注意pre的ID还可以为-2或者对于pre仍然用int或者改函数的返回值为long long (还有一些没有用-1而是>=0)
type分支中query过程可能还有问题需要修改Query/里面的类型另外stringindex中也要修改分界线已经是20亿且非法不再是-1
remove signature.binary, 合并两个分支type value
vstree在build和query时可以用不同大小的缓存来加速build过程
---
将B+tree中叶节点的大的value分离出来新建一套缓存使用block机制标记length为0表示未读取
类型bstr的length问题也需要解决
类型bstr的length问题也需要解决(新建Istr类型)
如果把类型直接改成long long空间开销一下子就上升了一倍
解决方法对于ID2string仍然用char*和unsigned但对于s2xx p2xx o2xx应该用long long*和unsigned来表示这样最高可支持到40亿triple
注意在B+树中是以long long*的方式存但读出后应该全部换成unsigned*和unsigned搭配的方式(最长支持20亿个po对)
UBSTR: 类型bstr的length问题也需要解决 如果把类型直接改成long long空间开销一下子就上升了一倍
解决方法对于ID2string仍然用char*和unsigned但对于s2xx p2xx o2xx应该用unsigned long long*和unsigned来表示这样最高可支持到40亿triple
(其实这个不是特别必要很少会有这种情况我们处理的triple数目一般限制在20亿就算是type这种边po对数也就是跟entity数目持平很难达到5亿)
---
那么是否可以调整entity与literal的分界线如果entity数目一般都比literal数目多的话
直接把literal从大到小编号可在ID模块中指定顺序这样每个Datbase模块应该有自己独特的分界线其他模块用时也需要注意
@ -465,6 +482,8 @@ build db error if triple num > 500M
# BETTER
#### 添加数据访问层,数据范式和生成数据访问的源码
#### 在BasicQuery.cpp中的encodeBasicQuery函数中发现有pre_id==-1时就可以直接中止查询返回空值
#### 将KVstore模块中在堆中寻找Node*的操作改为用treap实现(或多存指针避开搜索?)
@ -515,6 +534,8 @@ http://www.oschina.net/question/188977_58777
# ADVICE
#### 考虑利用hdfs或者hbase这样就可以利用各公司已有的数据库系统但这是否会和已有的内外存交换冲突
#### 数值型查询 实数域 [-bound, bound] 类型很难匹配,有必要单独编码么? 数据集中不应有范围 Query中编码过滤后还需验证
x>a, x<b, >=, <=, a<x<b, x=c
vstree中遇到"1237"^^<...integer>时不直接取字符串,而是转换为数值并编码
@ -599,3 +620,26 @@ Consider the use of Bloom Filter and FM-sketches
http://www.hprd.org/download/
## GIT USAGE
http://www.ruanyifeng.com/blog/2014/06/git_remote.html
https://git-scm.com/book/zh/v1/%E8%B5%B7%E6%AD%A5-%E5%88%9D%E6%AC%A1%E8%BF%90%E8%A1%8C-Git-%E5%89%8D%E7%9A%84%E9%85%8D%E7%BD%AE
#### how to commit a message
package.json
http://www.json.cn/
https://www.oschina.net/news/69705/git-commit-message-and-changelog-guide
https://sanwen8.cn/p/44eCof7.html
1. commit one by one, a commit just do one thing
2. place a empty line between head and body, body and footer
3. the first letter of header should be in uppercase, and the header should not be too long, just a wonderful summary
FIX: ... ADD:... REF:... 代码重构 SUB:...
4. each line should not be too long, add your real name and the influence in footer(maybe cause the code struct to change)

View File

@ -102,7 +102,7 @@ BasicQuery::getVarName(int _var)
}
// get triples number, also sentences number
int
unsigned
BasicQuery::getTripleNum()
{
return this->triple_vt.size();
@ -121,7 +121,8 @@ int BasicQuery::getEdgeNeighborID(int _var, int _i_th_edge)
}
// get the ID of the i-th edge of _var
int BasicQuery::getEdgePreID(int _var, int _i_th_edge)
TYPE_PREDICATE_ID
BasicQuery::getEdgePreID(int _var, int _i_th_edge)
{
return this->edge_pre_id[_var][_i_th_edge];
}
@ -177,20 +178,20 @@ BasicQuery::getCandidateList(int _var)
return candidate_list[_var];
}
int
unsigned
BasicQuery::getCandidateSize(int _var)
{
return this->candidate_list[_var].size();
}
// get the result list of _var in the query graph
vector<int*>&
vector<unsigned*>&
BasicQuery::getResultList()
{
return result_list;
}
vector<int*>*
vector<unsigned*>*
BasicQuery::getResultListPointer()
{
return &result_list;
@ -339,18 +340,20 @@ BasicQuery::setReady(int _var)
}
void
BasicQuery::updateSubSig(int _sub_var_id, int _pre_id, int _obj_id, int _line_id, int _obj_var_id)
BasicQuery::updateSubSig(int _sub_var_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id, int _line_id, int _obj_var_id)
{
cout<<"sub var id: "<<_sub_var_id<<endl;
// update var(sub)_signature according this triple
//bool obj_is_str = (_obj_id == -1) && (_obj.at(0) != '?');
//if(obj_is_str)
if(_obj_id >= 0)
if(_obj_id != INVALID_ENTITY_LITERAL_ID)
//if(_obj_id >= 0)
{
//Signature::encodeStr2Entity(_obj.c_str(), this->var_sig[_sub_id]);
Signature::encodeStr2Entity(this->var_sig[_sub_var_id], _obj_id, Util::EDGE_OUT);
}
//DEBUG: if type of pre id is changed to usnigned, this will cause error
if(_pre_id >= 0)
{
Signature::encodePredicate2Entity(this->var_sig[_sub_var_id], _pre_id, Util::EDGE_OUT);
@ -367,13 +370,14 @@ BasicQuery::updateSubSig(int _sub_var_id, int _pre_id, int _obj_id, int _line_id
}
void
BasicQuery::updateObjSig(int _obj_var_id, int _pre_id, int _sub_id, int _line_id, int _sub_var_id)
BasicQuery::updateObjSig(int _obj_var_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _sub_id, int _line_id, int _sub_var_id)
{
cout<<"obj var id: "<<_obj_var_id<<endl;
// update var(obj)_signature
//bool sub_is_str = (_sub_id == -1) && (_sub.at(0) != '?');
//if(sub_is_str)
if(_sub_id >= 0)
if(_sub_id != INVALID_ENTITY_LITERAL_ID)
//if(_sub_id >= 0)
{
//cout << "str2entity" << endl;
Signature::encodeStr2Entity(this->var_sig[_obj_var_id], _sub_id, Util::EDGE_IN);
@ -506,7 +510,8 @@ BasicQuery::encodeBasicQuery(KVstore* _p_kvstore, const vector<string>& _query_v
string& pre = this->triple_vt[i].predicate;
string& obj = this->triple_vt[i].object;
int pre_id = -1; //not found
//int pre_id = -1; //not found
TYPE_PREDICATE_ID pre_id = INVALID_PREDICATE_ID; //not found
if(pre[0] == '?') //pre var
{
pre_id = -2; //mark that this is a pre var
@ -547,11 +552,12 @@ BasicQuery::encodeBasicQuery(KVstore* _p_kvstore, const vector<string>& _query_v
bool sub_is_var = (sub_var_id != -1);
if(sub_is_var)
{
int obj_id = -1;
//int obj_id = -1;
TYPE_ENTITY_LITERAL_ID obj_id = INVALID_ENTITY_LITERAL_ID;
if(obj.at(0) != '?')
{
obj_id = _p_kvstore->getIDByEntity(obj);
if(obj_id == -1)
if(obj_id == INVALID_ENTITY_LITERAL_ID)
{
obj_id = _p_kvstore->getIDByLiteral(obj);
}
@ -572,7 +578,8 @@ BasicQuery::encodeBasicQuery(KVstore* _p_kvstore, const vector<string>& _query_v
bool obj_is_var = (obj_var_id != -1);
if(obj_is_var)
{
int sub_id = -1;
//int sub_id = -1;
TYPE_ENTITY_LITERAL_ID sub_id = INVALID_ENTITY_LITERAL_ID;
if(sub.at(0) != '?')
{
sub_id = _p_kvstore->getIDByEntity(sub);
@ -998,16 +1005,17 @@ int
BasicQuery::getVarID_MinCandidateList()
{
int min_var = -1;
int min_size = Util::TRIPLE_NUM_MAX;
unsigned min_size = Util::TRIPLE_NUM_MAX;
for(int i = 0; i < this->graph_var_num; i ++)
{
int tmp_size = (this->candidate_list[i]).size();
unsigned tmp_size = (this->candidate_list[i]).size();
if(tmp_size < min_size)
{
min_var = i;
min_size = tmp_size;
}
}
return min_var;
}
@ -1015,16 +1023,17 @@ int
BasicQuery::getVarID_MaxCandidateList()
{
int max_var = -1;
int max_size = -1;
unsigned max_size = 0;
for(int i = 0; i < this->graph_var_num; i ++)
{
int tmp_size = (this->candidate_list[i]).size();
unsigned tmp_size = (this->candidate_list[i]).size();
if(tmp_size > max_size)
{
max_var = i;
max_size = tmp_size;
}
}
return max_var;
}
@ -1032,7 +1041,7 @@ int
BasicQuery::getVarID_FirstProcessWhenJoin()
{
int min_var = -1;
int min_size = Util::TRIPLE_NUM_MAX;
unsigned min_size = Util::TRIPLE_NUM_MAX;
//int min_var2 = -1;
//int min_size2 = Util::TRIPLE_NUM_MAX;
for(int i = 0; i < this->graph_var_num; ++i)
@ -1049,7 +1058,7 @@ BasicQuery::getVarID_FirstProcessWhenJoin()
else
cout<<"var "<<i<<" is ready!"<<endl;
int tmp_size = (this->candidate_list[i]).size();
unsigned tmp_size = (this->candidate_list[i]).size();
//if(this->isLiteralVariable(i))
//{
//if(tmp_size < min_size2)
@ -1112,10 +1121,12 @@ string BasicQuery::triple_str()
stringstream _ss;
_ss<<"Triple num:"<<this->getTripleNum()<<endl;
for (int i=0;i<getTripleNum();i++)
for (int i = 0; i < getTripleNum(); i++)
{
_ss<<(this->getTriple(i).toString())<<endl;
}
return _ss.str();
}

View File

@ -120,7 +120,7 @@ private:
int retrieve_var_num;
string* var_name;
IDList* candidate_list;
vector<int*> result_list;
vector<unsigned*> result_list;
int* var_degree;
//whether has added the variable's literal candidate
@ -162,8 +162,8 @@ private:
//void updateSubSig(int _sub_id, int _pre_id, int _obj_id, std::string _obj, int _line_id);
//void updateObjSig(int _obj_id, int _pre_id, int _sub_id, std::string _sub, int _line_id);
void updateSubSig(int _sub_var_id, int _pre_id, int _obj_id, int _line_id, int _obj_var_id);
void updateObjSig(int _obj_var_id, int _pre_id, int _sub_id, int _line_id, int _sub_var_id);
void updateSubSig(int _sub_var_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _obj_id, int _line_id, int _obj_var_id);
void updateObjSig(int _obj_var_id, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _sub_id, int _line_id, int _sub_var_id);
//infos for predicate variables
vector<PreVar> pre_var;
@ -175,8 +175,8 @@ private:
map<int, int> selected_var_position;
public:
static const int MAX_VAR_NUM = 10;
static const int MAX_PRE_VAR_NUM = 10;
static const int MAX_VAR_NUM = 20;
static const int MAX_PRE_VAR_NUM = 20;
static const char NOT_JUST_SELECT = 'a';
static const char SELECT_VAR = 's';
@ -203,7 +203,7 @@ public:
int getIDByVarName(const string& _name);
// get triples number, also sentences number
int getTripleNum();
unsigned getTripleNum();
//check if a normal var is in select
bool isVarSelected(const std::string& _name) const;
@ -220,7 +220,7 @@ public:
int getEdgeNeighborID(int _var, int _i_th_edge);
// get the preID of the i-th edge of _var
int getEdgePreID(int _var, int _i_th_edge);
TYPE_PREDICATE_ID getEdgePreID(int _var, int _i_th_edge);
// get the type of the i-th edge of _var
char getEdgeType(int _var, int _i_th_edge);
@ -236,11 +236,11 @@ public:
// get the candidate list of _var in the query graph
IDList& getCandidateList(int _var);
int getCandidateSize(int _var);
unsigned getCandidateSize(int _var);
// get the result list of _var in the query graph
vector<int*>& getResultList();
vector<int*>* getResultListPointer();
vector<unsigned*>& getResultList();
vector<unsigned*>* getResultListPointer();
// get the entity signature of _var in the query graph
const EntityBitSet& getEntitySignature(int _var);

View File

@ -1828,7 +1828,7 @@ void GeneralEvaluation::dfsJoinableResultGraph(int x, vector < pair<char, int> >
int varnum = (int)temp->results[0].var.varset.size();
vector<int*> &basicquery_result =this->sparql_query.getBasicQuery(blockid).getResultList();
vector<unsigned*> &basicquery_result =this->sparql_query.getBasicQuery(blockid).getResultList();
int basicquery_result_num = (int)basicquery_result.size();
temp->results[0].res.reserve(basicquery_result_num);
@ -2191,7 +2191,7 @@ void GeneralEvaluation::queryRewriteEncodeRetrieveJoin(int dep)
sub_temp->results[0].var = Varset(encode_varset[i]);
int varnum = (int)encode_varset[i].size();
vector<int*> &basicquery_result = this->expansion_evaluation_stack[dep].sparql_query.getBasicQuery(i).getResultList();
vector<unsigned*> &basicquery_result = this->expansion_evaluation_stack[dep].sparql_query.getBasicQuery(i).getResultList();
int basicquery_result_num = (int)basicquery_result.size();
sub_temp->results[0].res.reserve(basicquery_result_num);
@ -2356,7 +2356,7 @@ void GeneralEvaluation::getFinalResult(ResultSet &result_str)
}
else
{
vector <int> keys;
vector <unsigned> keys;
vector <bool> desc;
for (int i = 0; i < (int)this->query_tree.getOrder().size(); i++)
{
@ -2552,7 +2552,7 @@ void GeneralEvaluation::releaseResultStack()
delete results_id;
}
void GeneralEvaluation::prepareUpdateTriple(QueryTree::GroupPattern &update_pattern, TripleWithObjType *&update_triple, int &update_triple_num)
void GeneralEvaluation::prepareUpdateTriple(QueryTree::GroupPattern &update_pattern, TripleWithObjType *&update_triple, unsigned &update_triple_num)
{
update_pattern.getVarset();
@ -2613,3 +2613,4 @@ void GeneralEvaluation::prepareUpdateTriple(QueryTree::GroupPattern &update_patt
}
}
}

View File

@ -34,16 +34,16 @@ class GeneralEvaluation
std::vector <Varset> sparql_query_varset;
VSTree *vstree;
KVstore *kvstore;
TNUM* pre2num;
int limitID_predicate;
int limitID_literal;
TYPE_TRIPLE_NUM* pre2num;
TYPE_PREDICATE_ID limitID_predicate;
TYPE_ENTITY_LITERAL_ID limitID_literal;
StringIndex *stringindex;
Strategy strategy;
ResultFilter result_filter;
bool need_output_answer;
public:
explicit GeneralEvaluation(VSTree *_vstree, KVstore *_kvstore, StringIndex *_stringindex, TNUM* _pre2num, int _limitID_predicate, int _limitID_literal):
explicit GeneralEvaluation(VSTree *_vstree, KVstore *_kvstore, StringIndex *_stringindex, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal):
vstree(_vstree), kvstore(_kvstore), stringindex(_stringindex), pre2num(_pre2num), limitID_predicate(_limitID_predicate), limitID_literal(_limitID_literal), need_output_answer(false)
{
}
@ -239,7 +239,7 @@ class GeneralEvaluation
void getFinalResult(ResultSet &result_str);
void releaseResultStack();
void prepareUpdateTriple(QueryTree::GroupPattern &update_pattern, TripleWithObjType *&update_triple, int &update_triple_num);
void prepareUpdateTriple(QueryTree::GroupPattern &update_pattern, TripleWithObjType *&update_triple, unsigned &update_triple_num);
};
#endif // _QUERY_GENERALEVALUATION_H

View File

@ -16,38 +16,40 @@ IDList::IDList()
}
//return the _i-th id of the list if _i exceeds, return -1
int
IDList::getID(int _i)const
unsigned
IDList::getID(unsigned _i) const
{
if (this->size() > _i)
{
return this->id_list[_i];
}
return -1;
//return -1;
return INVALID;
}
bool
IDList::addID(int _id)
IDList::addID(unsigned _id)
{
//a check for duplicate case will be more reliable
this->id_list.push_back(_id);
return true;
}
int
IDList::size()const
unsigned
IDList::size() const
{
return this->id_list.size();
}
bool
IDList::empty()const
IDList::empty() const
{
return this->id_list.size() == 0;
}
bool
IDList::isExistID(int _id)const
IDList::isExistID(unsigned _id) const
{
// naive implementation of searching(linear search).
// you can use binary search when the id list is sorted, if necessary.
@ -62,15 +64,14 @@ IDList::isExistID(int _id)const
return false;
}
const vector<int>*
IDList::getList()const
const vector<unsigned>*
IDList::getList() const
{
return &(this->id_list);
}
int&
IDList::operator[](const int& _i)
unsigned&
IDList::operator[](const unsigned& _i)
{
if (this->size() > _i)
{
@ -105,7 +106,7 @@ IDList::clear()
}
void
IDList::copy(const vector<int>& _new_idlist)
IDList::copy(const vector<unsigned>& _new_idlist)
{
this->id_list = _new_idlist;
}
@ -116,8 +117,8 @@ IDList::copy(const IDList* _new_idlist)
this->id_list = *(_new_idlist->getList());
}
int
IDList::intersectList(const int* _id_list, int _list_len)
unsigned
IDList::intersectList(const unsigned* _id_list, unsigned _list_len)
{
if (_id_list == NULL || _list_len == 0)
{
@ -160,9 +161,9 @@ IDList::intersectList(const int* _id_list, int _list_len)
{
case 0:
{ //this bracket is needed if vars are defined in case
int id_i = 0;
int index_move_forward = 0;
vector<int>::iterator it = this->id_list.begin();
unsigned id_i = 0;
unsigned index_move_forward = 0;
vector<unsigned>::iterator it = this->id_list.begin();
while (it != (this->id_list).end())
{
int can_id = *it;
@ -186,16 +187,16 @@ IDList::intersectList(const int* _id_list, int _list_len)
it++;
}
remove_number = this->id_list.size() - index_move_forward;
vector<int>::iterator new_end = this->id_list.begin() + index_move_forward;
vector<unsigned>::iterator new_end = this->id_list.begin() + index_move_forward;
(this->id_list).erase(new_end, this->id_list.end());
break;
}
case 1:
{
vector<int> new_id_list;
for (int i = 0; i < _list_len; ++i)
vector<unsigned> new_id_list;
for (unsigned i = 0; i < _list_len; ++i)
{
if (Util::bsearch_vec_uporder(_id_list[i], this->getList()) != -1)
if (Util::bsearch_vec_uporder(_id_list[i], this->getList()) != INVALID)
new_id_list.push_back(_id_list[i]);
}
this->id_list = new_id_list;
@ -204,11 +205,11 @@ IDList::intersectList(const int* _id_list, int _list_len)
}
case 2:
{
vector<int> new_id_list;
int m = this->id_list.size(), i;
vector<unsigned> new_id_list;
unsigned m = this->id_list.size(), i;
for (i = 0; i < m; ++i)
{
if (Util::bsearch_int_uporder(this->id_list[i], _id_list, _list_len) != -1)
if (Util::bsearch_int_uporder(this->id_list[i], _id_list, _list_len) != INVALID)
new_id_list.push_back(this->id_list[i]);
}
this->id_list = new_id_list;
@ -223,25 +224,25 @@ IDList::intersectList(const int* _id_list, int _list_len)
return remove_number;
}
int
unsigned
IDList::intersectList(const IDList& _id_list)
{
// copy _id_list to the temp array first.
int temp_list_len = _id_list.size();
int* temp_list = new int[temp_list_len];
unsigned temp_list_len = _id_list.size();
unsigned* temp_list = new unsigned[temp_list_len];
//BETTER:not to copy, just achieve here
for (int i = 0; i < temp_list_len; i++)
for (unsigned i = 0; i < temp_list_len; i++)
{
temp_list[i] = _id_list.getID(i);
}
int remove_number = this->intersectList(temp_list, temp_list_len);
delete[]temp_list;
unsigned remove_number = this->intersectList(temp_list, temp_list_len);
delete[] temp_list;
return remove_number;
}
int
IDList::unionList(const int* _id_list, int _list_len, bool only_literal)
unsigned
IDList::unionList(const unsigned* _id_list, unsigned _list_len, bool only_literal)
{
if (_id_list == NULL || _list_len == 0)
return 0;
@ -249,23 +250,25 @@ IDList::unionList(const int* _id_list, int _list_len, bool only_literal)
if (only_literal)
{
//NOTICE:this means that the original is no literals and we need to add from a list(containing entities/literals)
int k = 0;
unsigned k = 0;
//NOTICE:literal id > entity id; the list is ordered
for (; k < _list_len; ++k)
if (Util::is_literal_ele(_id_list[k]))
break;
//TODO+BETTER: speed up the process to find the first literal
for (; k < _list_len; ++k)
this->addID(_id_list[k]);
return _list_len - k;
}
// O(n)
int origin_size = (this->id_list).size();
int* temp_list = new int[origin_size + _list_len];
int temp_list_len = 0;
unsigned origin_size = (this->id_list).size();
unsigned* temp_list = new unsigned[origin_size + _list_len];
unsigned temp_list_len = 0;
// union
{
int i = 0, j = 0;
unsigned i = 0, j = 0;
while (i < origin_size && j < _list_len)
{
if (this->id_list[i] == _id_list[j])
@ -298,7 +301,7 @@ IDList::unionList(const int* _id_list, int _list_len, bool only_literal)
}
}
int add_number = temp_list_len - origin_size;
unsigned add_number = temp_list_len - origin_size;
// update this IDList
this->clear();
@ -344,24 +347,24 @@ IDList::unionList(const int* _id_list, int _list_len, bool only_literal)
*/
}
int
unsigned
IDList::unionList(const IDList& _id_list, bool only_literal)
{
// copy _id_list to the temp array first.
int temp_list_len = _id_list.size();
int* temp_list = new int[temp_list_len];
unsigned temp_list_len = _id_list.size();
unsigned* temp_list = new unsigned[temp_list_len];
//BETTER:not to copy, just achieve here
for (int i = 0; i < temp_list_len; i++)
for (unsigned i = 0; i < temp_list_len; i++)
{
temp_list[i] = _id_list.getID(i);
}
int ret = this->unionList(temp_list, temp_list_len, only_literal);
unsigned ret = this->unionList(temp_list, temp_list_len, only_literal);
delete[] temp_list;
return ret;
}
IDList*
IDList::intersect(const IDList& _id_list, const int* _list, int _len)
IDList::intersect(const IDList& _id_list, const unsigned* _list, unsigned _len)
{
IDList* p = new IDList;
//if (_list == NULL || _len == 0) //just copy _id_list
@ -379,7 +382,7 @@ IDList::intersect(const IDList& _id_list, const int* _list, int _len)
//compare n(k+1) and nklogn: k0 = log(n/2)2 requiring that n>2
//k<=k0 binary search; k>k0 intersect
int method = -1; //0: intersect 1: search in vector 2: search in int*
int n = _id_list.size();
unsigned n = _id_list.size();
double k = 0;
if (n < _len)
{
@ -407,10 +410,10 @@ IDList::intersect(const IDList& _id_list, const int* _list, int _len)
case 0:
{ //this bracket is needed if vars are defined in case
int id_i = 0;
int num = _id_list.size();
for (int i = 0; i < num; ++i)
unsigned num = _id_list.size();
for (unsigned i = 0; i < num; ++i)
{
int can_id = _id_list.getID(i);
unsigned can_id = _id_list.getID(i);
while ((id_i < _len) && (_list[id_i] < can_id))
{
id_i++;
@ -431,20 +434,20 @@ IDList::intersect(const IDList& _id_list, const int* _list, int _len)
}
case 1:
{
for (int i = 0; i < _len; ++i)
for (unsigned i = 0; i < _len; ++i)
{
if (Util::bsearch_vec_uporder(_list[i], _id_list.getList()) != -1)
if (Util::bsearch_vec_uporder(_list[i], _id_list.getList()) != INVALID)
p->addID(_list[i]);
}
break;
}
case 2:
{
int m = _id_list.size(), i;
unsigned m = _id_list.size(), i;
for (i = 0; i < m; ++i)
{
int t = _id_list.getID(i);
if (Util::bsearch_int_uporder(t, _list, _len) != -1)
unsigned t = _id_list.getID(i);
if (Util::bsearch_int_uporder(t, _list, _len) != INVALID)
p->addID(t);
}
break;
@ -457,15 +460,16 @@ IDList::intersect(const IDList& _id_list, const int* _list, int _len)
return p;
}
int
IDList::erase(int i)
bool
IDList::erase(unsigned i)
{
id_list.erase(id_list.begin() + i, id_list.end());
return 0;
return true;
}
int
IDList::bsearch_uporder(int _key)
unsigned
IDList::bsearch_uporder(unsigned _key)
{
return Util::bsearch_vec_uporder(_key, this->getList());
}

View File

@ -15,31 +15,31 @@ class IDList
{
public:
IDList();
int getID(int _i)const;
bool addID(int _id);
unsigned getID(unsigned _i) const;
bool addID(unsigned _id);
//check whether _id exists in this IDList.
bool isExistID(int _id) const;
int size() const;
bool isExistID(unsigned _id) const;
unsigned size() const;
bool empty() const;
const std::vector<int>* getList()const;
int& operator[] (const int & _i);
const std::vector<unsigned>* getList()const;
unsigned& operator[] (const unsigned & _i);
std::string to_str();
int sort();
void clear();
void copy(const std::vector<int>& _new_idlist);
void copy(const std::vector<unsigned>& _new_idlist);
void copy(const IDList* _new_idlist);
// intersect/union _id_list to this IDList, note that the two list must be ordered before using these two functions.
int intersectList(const int* _id_list, int _list_len);
int intersectList(const IDList&);
int unionList(const int* _id_list, int _list_len, bool only_literal=false);
int unionList(const IDList&, bool only_literal=false);
int bsearch_uporder(int _key);
static IDList* intersect(const IDList&, const int*, int);
unsigned intersectList(const unsigned* _id_list, unsigned _list_len);
unsigned intersectList(const IDList&);
unsigned unionList(const unsigned* _id_list, unsigned _list_len, bool only_literal=false);
unsigned unionList(const IDList&, bool only_literal=false);
unsigned bsearch_uporder(unsigned _key);
static IDList* intersect(const IDList&, const unsigned*, unsigned);
private:
std::vector<int> id_list;
int erase(int i);
std::vector<unsigned> id_list;
bool erase(unsigned i);
};
#endif //_QUERY_IDLIST_H

View File

@ -23,7 +23,7 @@ void ResultFilter::changeResultHashTable(SPARQLquery &query, int value)
for (int i = 0; i < query.getBasicQueryNum(); i++)
{
BasicQuery &basicquery = query.getBasicQuery(i);
vector<int*> &basicquery_result =basicquery.getResultList();
vector<unsigned*> &basicquery_result =basicquery.getResultList();
int result_num = basicquery_result.size();
int var_num = basicquery.getVarNum();
@ -79,3 +79,4 @@ void ResultFilter::candFilterWithResultHashTable(BasicQuery &basicquery)
}
}
}

View File

@ -13,11 +13,13 @@
#include "SPARQLquery.h"
#include "../Util/Util.h"
//TODO: adjust the type to unsigned
class ResultFilter
{
private:
static const int MAX_SIZE = 1048576;
inline int hash(unsigned int x)
inline int hash(unsigned x)
{
x = (x + 0x7ed55d16) + (x << 12);
x = (x ^ 0xc761c23c) ^ (x >> 19);

View File

@ -27,7 +27,7 @@ ResultSet::~ResultSet()
delete[] this->var_name;
if (!this->useStream)
{
for(int i = 0; i < this->ansNum; i++)
for(unsigned i = 0; i < this->ansNum; i++)
{
delete[] this->answer[i];
}
@ -68,7 +68,7 @@ ResultSet::checkUseStream()
}
void
ResultSet::setOutputOffsetLimit(int _output_offset, int _output_limit)
ResultSet::setOutputOffsetLimit(unsigned _output_offset, unsigned _output_limit)
{
this->output_offset = _output_offset;
this->output_limit = _output_limit;
@ -89,7 +89,7 @@ ResultSet::setVar(const vector<string> & _var_names)
string
ResultSet::to_str()
{
int ans_num = max(this->ansNum - this->output_offset, 0);
unsigned ans_num = max((long long)this->ansNum - this->output_offset, (long long)0);
if (this->output_limit != -1)
ans_num = min(ans_num, this->output_limit);
if(ans_num == 0)
@ -111,7 +111,7 @@ ResultSet::to_str()
this->resetStream();
const Bstr* bp;
for(int i = (!this->useStream ? this->output_offset : 0); i < this->ansNum; i++)
for(unsigned i = (!this->useStream ? this->output_offset : 0); i < this->ansNum; i++)
{
if (this->output_limit != -1 && i == this->output_offset + this->output_limit)
break;
@ -161,7 +161,7 @@ ResultSet::to_JSON()
this->resetStream();
const Bstr* bp;
for(int i = (!this->useStream ? this->output_offset : 0); i < this->ansNum; i++)
for(unsigned i = (!this->useStream ? this->output_offset : 0); i < this->ansNum; i++)
{
if (this->output_limit != -1 && i == this->output_offset + this->output_limit)
break;
@ -234,7 +234,7 @@ ResultSet::output(FILE* _fp)
{
if (this->useStream)
{
int ans_num = max(this->ansNum - this->output_offset, 0);
unsigned ans_num = max((long long)this->ansNum - this->output_offset, (long long)0);
if (this->output_limit != -1)
ans_num = min(ans_num, this->output_limit);
if(ans_num == 0)
@ -251,7 +251,7 @@ ResultSet::output(FILE* _fp)
fprintf(_fp, "\n");
const Bstr* bp;
for(int i = 0; i < this->ansNum; i++)
for(unsigned i = 0; i < this->ansNum; i++)
{
if (this->output_limit != -1 && i == this->output_offset + this->output_limit)
break;
@ -275,7 +275,7 @@ ResultSet::output(FILE* _fp)
}
void
ResultSet::openStream(std::vector<int> &_keys, std::vector<bool> &_desc)
ResultSet::openStream(vector<unsigned> &_keys, vector<bool> &_desc)
{
if (this->useStream)
{

View File

@ -21,9 +21,9 @@ private:
public:
int select_var_num;
std::string* var_name;
int ansNum;
unsigned ansNum;
std::string** answer;
int output_offset, output_limit;
unsigned output_offset, output_limit;
ResultSet();
~ResultSet();
@ -31,7 +31,7 @@ public:
void setUseStream();
bool checkUseStream();
void setOutputOffsetLimit(int _output_offset, int _output_limit);
void setOutputOffsetLimit(unsigned _output_offset, unsigned _output_limit);
//convert to binary string
//Bstr* to_bstr();
@ -44,7 +44,7 @@ public:
void setVar(const std::vector<std::string> & _var_names);
//operations on private stream from caller
void openStream(std::vector<int> &_keys, std::vector<bool> &_desc);
void openStream(std::vector<unsigned> &_keys, std::vector<bool> &_desc);
void resetStream();
void writeToStream(std::string& _s);
const Bstr* getOneRecord();

View File

@ -36,3 +36,4 @@ class Varset
#endif // _QUERY_VARSET_H

View File

@ -13,10 +13,11 @@ using namespace std;
SigEntry::SigEntry()
{
(this->sig).entityBitSet.reset();
this->entity_id = -1;
this->entity_id = INVALID_ENTITY_LITERAL_ID;
//this->entity_id = -1;
}
SigEntry::SigEntry(int _entity_id, EntityBitSet& _bitset)
SigEntry::SigEntry(TYPE_ENTITY_LITERAL_ID _entity_id, EntityBitSet& _bitset)
{
this->entity_id = _entity_id;
(this->sig).entityBitSet |= _bitset;
@ -28,7 +29,7 @@ SigEntry::SigEntry(const SigEntry& _sig_entry)
this->entity_id = _sig_entry.entity_id;
}
SigEntry::SigEntry(const EntitySig& _sig, int _entity_id)
SigEntry::SigEntry(const EntitySig& _sig, TYPE_ENTITY_LITERAL_ID _entity_id)
{
this->sig = _sig;
this->entity_id = _entity_id;
@ -40,7 +41,7 @@ SigEntry::getEntitySig() const
return this->sig;
}
int
TYPE_ENTITY_LITERAL_ID
SigEntry::getEntityId() const
{
return this->entity_id;

View File

@ -17,14 +17,14 @@ class SigEntry
private:
EntitySig sig;
//-1 if not in leaf node
int entity_id;
TYPE_ENTITY_LITERAL_ID entity_id;
public:
SigEntry();
SigEntry(int _entity_id, EntityBitSet& _bitset);
SigEntry(TYPE_ENTITY_LITERAL_ID _entity_id, EntityBitSet& _bitset);
SigEntry(const SigEntry& _sig_entry);
SigEntry(const EntitySig& sig, int _entity_id);
SigEntry(const EntitySig& sig, TYPE_ENTITY_LITERAL_ID _entity_id);
const EntitySig& getEntitySig() const;
int getEntityId() const;
TYPE_ENTITY_LITERAL_ID getEntityId() const;
int getSigCount() const;
SigEntry& operator=(const SigEntry _sig_entry);
SigEntry& operator|=(const SigEntry _sig_entry);

View File

@ -32,7 +32,7 @@ Signature::BitSet2str(const EntityBitSet& _bitset)
}
void
Signature::encodeEdge2Entity(EntityBitSet& _entity_bs, int _pre_id, int _neighbor_id, const char _type)
Signature::encodeEdge2Entity(EntityBitSet& _entity_bs, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _neighbor_id, const char _type)
{
Signature::encodePredicate2Entity(_entity_bs, _pre_id, _type);
@ -47,29 +47,29 @@ Signature::encodeEdge2Entity(EntityBitSet& _entity_bs, int _pre_id, int _neighbo
}
void
Signature::encodePredicate2Entity(EntityBitSet& _entity_bs, int _pre_id, const char _type)
Signature::encodePredicate2Entity(EntityBitSet& _entity_bs, TYPE_PREDICATE_ID _pre_id, const char _type)
{
//NOTICE:this not used now
if (Signature::PREDICATE_ENCODE_METHOD == 0)
{
//WARN:change if need to use again, because the encoding method has changed now!
int pos = ((_pre_id + 10) % Signature::EDGE_SIG_LENGTH) + Signature::STR_SIG_LENGTH;
unsigned pos = ((_pre_id + 10) % Signature::EDGE_SIG_LENGTH) + Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
}
else
{
//NOTICE: in * maybe the int will overflow
//NOTICE: in * maybe the unsigned will overflow
long long id = _pre_id;
int seed_num = id % Signature::EDGE_SIG_INTERVAL_NUM_HALF;
unsigned seed_num = id % Signature::EDGE_SIG_INTERVAL_NUM_HALF;
if (_type == Util::EDGE_OUT)
{
seed_num += Signature::EDGE_SIG_INTERVAL_NUM_HALF;
}
//int primeSize = 5;
//int prime1[]={5003,5009,5011,5021,5023};
//int prime2[]={49943,49957,49991,49993,49999};
//unsigned primeSize = 5;
//unsigned prime1[]={5003,5009,5011,5021,5023};
//unsigned prime2[]={49943,49957,49991,49993,49999};
//NOTICE: more ones in the bitset(use more primes) means less conflicts, but also weakens the filtration of VSTree.
// when the data set is big enough, cutting down the size of candidate list should come up to our primary consideration.
@ -77,49 +77,49 @@ Signature::encodePredicate2Entity(EntityBitSet& _entity_bs, int _pre_id, const c
// also, when the data set is small, hash conflicts can hardly happen.
// therefore, I think using 2 primes(set up two ones in bitset) is enough.
// --by hanshuo.
//int primeSize = 2;
//int prime1[] = {5003, 5011};
//int prime2[] = {49957, 49993};
//unsigned primeSize = 2;
//unsigned prime1[] = {5003, 5011};
//unsigned prime2[] = {49957, 49993};
//for(int i = 0; i < primeSize; i++)
//for(unsigned i = 0; i < primeSize; i++)
//{
//int seed = _pre_id * prime1[i] % prime2[i];
//int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
//unsigned seed = _pre_id * prime1[i] % prime2[i];
//unsigned pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
//_entity_bs.set(pos);
//}
int seed = id * 5003 % 49957;
int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
unsigned seed = id * 5003 % 49957;
unsigned pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
_entity_bs.set(pos);
}
}
//void
//Signature::encodePredicate2Edge(int _pre_id, EdgeBitSet& _edge_bs)
//Signature::encodePredicate2Edge(unsigned _pre_id, EdgeBitSet& _edge_bs)
//{
//if (Signature::PREDICATE_ENCODE_METHOD == 0)
//{
//int pos = (_pre_id + 10) % Signature::EDGE_SIG_LENGTH;
//unsigned pos = (_pre_id + 10) % Signature::EDGE_SIG_LENGTH;
//_edge_bs.set(pos);
//}
//else
//{
//int seed_num = _pre_id % Signature::EDGE_SIG_INTERVAL_NUM_HALF;
////int primeSize = 5;
////int prime1[]={5003,5009,5011,5021,5023};
////int prime2[]={49943,49957,49991,49993,49999};
//unsigned seed_num = _pre_id % Signature::EDGE_SIG_INTERVAL_NUM_HALF;
////unsigned primeSize = 5;
////unsigned prime1[]={5003,5009,5011,5021,5023};
////unsigned prime2[]={49943,49957,49991,49993,49999};
////int primeSize = 2;
////int prime1[] = {5003,5011};
////int prime2[] = {49957,49993};
////unsigned primeSize = 2;
////unsigned prime1[] = {5003,5011};
////unsigned prime2[] = {49957,49993};
////for (int i = 0; i < primeSize; i++)
////for (unsigned i = 0; i < primeSize; i++)
////{
////int seed = _pre_id * prime1[i] % prime2[i];
////int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
////unsigned seed = _pre_id * prime1[i] % prime2[i];
////unsigned pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
////_edge_bs.set(pos);
////}
//int seed = _pre_id * 5003 % 49957;
//int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
//unsigned seed = _pre_id * 5003 % 49957;
//unsigned pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
//_edge_bs.set(pos);
//}
//}
@ -127,11 +127,11 @@ Signature::encodePredicate2Entity(EntityBitSet& _entity_bs, int _pre_id, const c
//NOTICE: no need to encode itself because only variable in query need to be filtered!
//So only consider all neighbors!
void
Signature::encodeStr2Entity(EntityBitSet& _entity_bs, int _neighbor_id, const char _type)
Signature::encodeStr2Entity(EntityBitSet& _entity_bs, TYPE_ENTITY_LITERAL_ID _neighbor_id, const char _type)
{
//NOTICE: we assume the parameter is always valid(invalid args should not be passed here)
long long id = _neighbor_id;
//NOTICE: in * maybe the int will overflow
//NOTICE: in * maybe the unsigned will overflow
long long seed = id * 5003 % 49957;
seed = seed % Signature::STR_SIG_INTERVAL_BASE;
seed = seed + (id % Signature::STR_SIG_INTERVAL_NUM) * Signature::STR_SIG_INTERVAL_BASE;
@ -159,14 +159,14 @@ Signature::encodeStr2Entity(EntityBitSet& _entity_bs, int _neighbor_id, const ch
//_str is subject or object or literal
//if (strlen(_str) >0 && _str[0] == '?')
//return;
//int length = (int)strlen(_str);
//unsigned int hashKey = 0;
//unsigned int pos = 0;
//unsigned length = (unsigned)strlen(_str);
//unsigned unsigned hashKey = 0;
//unsigned unsigned pos = 0;
//char *str2 = (char*)calloc(length + 1, sizeof(char));
//strcpy(str2, _str);
//char *str = str2;
//unsigned base = Signature::STR_SIG_BASE * (Signature::HASH_NUM - 1);
//for (int i = Signature::HASH_NUM - 1; i >= 0; --i)
//for (unsigned i = Signature::HASH_NUM - 1; i >= 0; --i)
//{
//HashFunction hf = Util::hash[i];
//if (hf == NULL)
@ -198,7 +198,7 @@ Signature::encodeStr2Entity(EntityBitSet& _entity_bs, int _neighbor_id, const ch
}
//void
//Signature::encodeStrID2Entity(int _str_id, EntityBitSet& _entity_bs)
//Signature::encodeStrID2Entity(unsigned _str_id, EntityBitSet& _entity_bs)
//{
////NOT USED NOW
//}

View File

@ -24,13 +24,13 @@ public:
//static HashFunction hash[HashNum];
//must make sure: ENTITY_SIG_LENGTH = EDGE_SIG_LENGTH + STR_SIG_LENGTH
static const int STR_SIG_INTERVAL_NUM = 20;
//static const int STR_SIG_INTERVAL_NUM = 16;
static const int STR_SIG_INTERVAL_BASE = 10;
static const int STR_SIG_LITERAL = STR_SIG_INTERVAL_NUM * STR_SIG_INTERVAL_BASE;
static const int STR_SIG_ENTITY = STR_SIG_LITERAL * 2;
static const unsigned STR_SIG_INTERVAL_NUM = 20;
//static const unsigned STR_SIG_INTERVAL_NUM = 16;
static const unsigned STR_SIG_INTERVAL_BASE = 10;
static const unsigned STR_SIG_LITERAL = STR_SIG_INTERVAL_NUM * STR_SIG_INTERVAL_BASE;
static const unsigned STR_SIG_ENTITY = STR_SIG_LITERAL * 2;
//here we divide as entity neighbors and literal neighbors: ENTITY(in and out), LITERAL(only for out edges)
static const int STR_SIG_LENGTH = STR_SIG_ENTITY + STR_SIG_LITERAL; //600
static const unsigned STR_SIG_LENGTH = STR_SIG_ENTITY + STR_SIG_LITERAL; //600
//NOTICE: after vstree filter, all constant neighbors will be used again to do precise filtering
//howvere, only few constant pres will be used again for filtering later
@ -39,15 +39,15 @@ public:
//str filter is more important in VSTree than predicate, because
//a predicate may correspond to a lot of entities and predicate num is usually small
static const int EDGE_SIG_INTERVAL_NUM_HALF = 10; //in edge or out edge
//static const int EDGE_SIG_INTERVAL_NUM_HALF = 16; //in edge or out edge
static const int EDGE_SIG_INTERVAL_NUM = 2 * EDGE_SIG_INTERVAL_NUM_HALF;
static const int EDGE_SIG_INTERVAL_BASE = 10;
static const int EDGE_SIG_LENGTH = EDGE_SIG_INTERVAL_NUM * EDGE_SIG_INTERVAL_BASE; //200
//static const int EDGE_SIG_LENGTH2 = EDGE_SIG_INTERVAL_NUM_HALF * EDGE_SIG_INTERVAL_BASE;
static const unsigned EDGE_SIG_INTERVAL_NUM_HALF = 10; //in edge or out edge
//static const unsigned EDGE_SIG_INTERVAL_NUM_HALF = 16; //in edge or out edge
static const unsigned EDGE_SIG_INTERVAL_NUM = 2 * EDGE_SIG_INTERVAL_NUM_HALF;
static const unsigned EDGE_SIG_INTERVAL_BASE = 10;
static const unsigned EDGE_SIG_LENGTH = EDGE_SIG_INTERVAL_NUM * EDGE_SIG_INTERVAL_BASE; //200
//static const unsigned EDGE_SIG_LENGTH2 = EDGE_SIG_INTERVAL_NUM_HALF * EDGE_SIG_INTERVAL_BASE;
static const int ENTITY_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH; //1000
//static const int ENTITY_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH + NEIGHBOR_SIG_LENGTH;
static const unsigned ENTITY_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH; //1000
//static const unsigned ENTITY_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH + NEIGHBOR_SIG_LENGTH;
//QUERY: the num of bitset must be based on 16, i.e. unsigned short? 1000 is not allowed
//but 800, 500 is ok
@ -58,12 +58,12 @@ public:
static std::string BitSet2str(const EntityBitSet& _bitset);
//NOTICE: there are two predicate encoding method now, see the encoding functions @Signature.cpp for details
const static int PREDICATE_ENCODE_METHOD = 1;
static void encodePredicate2Entity(EntityBitSet& _entity_bs, int _pre_id, const char _type);
static void encodeStr2Entity(EntityBitSet& _entity_bs, int _neighbor_id, const char _type);
static void encodeEdge2Entity(EntityBitSet& _entity_bs, int _pre_id, int _neighbor_id, const char _type);
//static void encodeStrID2Entity(int _str_id, EntityBitSet& _entity_bs);
//static void encodePredicate2Edge(int _pre_id, EdgeBitSet& _edge_bs);
const static unsigned PREDICATE_ENCODE_METHOD = 1;
static void encodePredicate2Entity(EntityBitSet& _entity_bs, TYPE_PREDICATE_ID _pre_id, const char _type);
static void encodeStr2Entity(EntityBitSet& _entity_bs, TYPE_ENTITY_LITERAL_ID _neighbor_id, const char _type);
static void encodeEdge2Entity(EntityBitSet& _entity_bs, TYPE_PREDICATE_ID _pre_id, TYPE_ENTITY_LITERAL_ID _neighbor_id, const char _type);
//static void encodeStrID2Entity(unsigned _str_id, EntityBitSet& _entity_bs);
//static void encodePredicate2Edge(unsigned _pre_id, EdgeBitSet& _edge_bs);
//Signature()
//{
@ -109,7 +109,7 @@ public:
bool operator!=(const EntitySig& _sig)const;
EntitySig& operator=(const EntitySig& _sig);
const EntityBitSet& getBitset()const;
void encode(const char * _str, int _pre_id);
void encode(const char * _str, TYPE_PREDICATE_ID _pre_id);
std::string to_str() const;
};

View File

@ -345,7 +345,7 @@ void StringIndex::trySequenceAccess()
this->predicate.trySequenceAccess();
}
void StringIndex::change(std::vector<int> &ids, KVstore &kv_store, bool is_entity_or_literal)
void StringIndex::change(std::vector<unsigned> &ids, KVstore &kv_store, bool is_entity_or_literal)
{
if (is_entity_or_literal)
{
@ -364,7 +364,7 @@ void StringIndex::change(std::vector<int> &ids, KVstore &kv_store, bool is_entit
}
}
void StringIndex::disable(std::vector<int> &ids, bool is_entity_or_literal)
void StringIndex::disable(std::vector<unsigned> &ids, bool is_entity_or_literal)
{
if (is_entity_or_literal)
{

View File

@ -12,6 +12,8 @@
#include "../KVstore/KVstore.h"
#include "../Util/Util.h"
//TODO: adjust the type
class StringIndexFile
{
public:
@ -124,8 +126,8 @@ class StringIndex
void addRequest(int id, std::string *str, bool is_entity_or_literal = true);
void trySequenceAccess();
void change(std::vector<int> &ids, KVstore &kv_store, bool is_entity_or_literal = true);
void disable(std::vector<int> &ids, bool is_entity_or_literal = true);
void change(std::vector<unsigned> &ids, KVstore &kv_store, bool is_entity_or_literal = true);
void disable(std::vector<unsigned> &ids, bool is_entity_or_literal = true);
};
#endif // _STRING_INDEX_H

View File

@ -17,13 +17,20 @@ Bstr::Bstr()
this->str = NULL;
}
Bstr::Bstr(const char* _str, unsigned _len)
Bstr::Bstr(const char* _str, unsigned _len, bool _nocopy)
{
//WARN: if need a string .please add '\0' in your own!
this->length = _len;
//DEBUG:if copy memory?
//this->str = _str; //not valid:const char* -> char*
this->str = (char*)malloc(_len);
//if(_nocopy)
//{
//this->str = _str; //not valid:const char* -> char*
//return;
//}
//NOTICE: we decide to use new/delete in global area
//this->str = (char*)malloc(_len);
this->str = new char[_len];
memcpy(this->str, _str, sizeof(char) * _len);
//this->str[_len]='\0';
}
@ -116,6 +123,15 @@ Bstr::operator != (const Bstr& _bstr)
unsigned
Bstr::getLen() const
{
//WARN: we should not include too complicate logic here!!!!
//NOTICE: this is for VList
//if(this->isBstrLongList())
////if(this->str == NULL)
//{
//return 0;
//}
return length;
}
@ -146,15 +162,18 @@ Bstr::copy(const Bstr* _bp)
this->length = _bp->getLen();
//DEBUG!!!
//cerr<<"bstr length: "<<this->length<<endl;
this->str = (char*)malloc(this->length);
memcpy(this->str, _bp->getStr(), this->length);
//this->str = (char*)malloc(this->length);
this->str = new char[this->length];
memcpy(this->str, _bp->getStr(), sizeof(char) * this->length);
}
void
Bstr::copy(const char* _str, unsigned _len)
{
this->length = _len;
this->str = (char*)malloc(this->length);
//this->str = (char*)malloc(this->length);
this->str = new char[this->length];
memcpy(this->str, _str, this->length);
}
@ -168,7 +187,8 @@ Bstr::clear()
void
Bstr::release()
{
free(this->str); //ok to be null, do nothing
//free(this->str); //ok to be null, do nothing
delete[] this->str;
clear();
}
@ -203,3 +223,9 @@ Bstr::print(string s) const
//#endif
}
bool
Bstr::isBstrLongList() const
{
return this->str == NULL;
}

View File

@ -18,13 +18,14 @@ class Bstr
{
private:
char* str; //pointers consume 8 byte in 64-bit system
//TODO: the length maybe not needed
unsigned length;
public:
Bstr();
//if copy memory, then use const char*, but slow
//else, can not use const char* -> char*
Bstr(const char* _str, unsigned _len);
Bstr(const char* _str, unsigned _len, bool _nocopy = false);
//Bstr(char* _str, unsigned _len);
Bstr(const Bstr& _bstr);
//Bstr& operate = (const Bstr& _bstr);
@ -47,6 +48,9 @@ public:
//int write(FILE* _fp);
~Bstr();
void print(std::string s) const; //DEBUG
//judge if this Bstr represent a long list value, and waiting to be each time on need
bool isBstrLongList() const;
};
#endif // _UTIL_BSTR_H

View File

@ -41,7 +41,7 @@ Stream::Stream()
this->init();
}
Stream::Stream(std::vector<int>& _keys, std::vector<bool>& _desc, unsigned _rownum, unsigned _colnum, bool _flag)
Stream::Stream(std::vector<TYPE_ENTITY_LITERAL_ID>& _keys, std::vector<bool>& _desc, unsigned _rownum, unsigned _colnum, bool _flag)
{
this->init();
#ifdef DEBUG_STREAM
@ -58,7 +58,8 @@ Stream::Stream(std::vector<int>& _keys, std::vector<bool>& _desc, unsigned _rown
this->record_size = new unsigned[this->colnum];
for(unsigned i = 0; i < this->colnum; ++i)
{
this->record[i].setStr((char*)malloc(Util::TRANSFER_SIZE));
char* tmptr = new char[Util::TRANSFER_SIZE];
this->record[i].setStr(tmptr);
this->record_size[i] = Util::TRANSFER_SIZE;
}
@ -148,7 +149,8 @@ Stream::copyToRecord(const char* _str, unsigned _len, unsigned _idx)
if(length + 1 > this->record_size[_idx])
{
this->record[_idx].release();
this->record[_idx].setStr((char*)malloc((length + 1) * sizeof(char)));
char* tmptr = new char[length+1];
this->record[_idx].setStr(tmptr);
this->record_size[_idx] = length + 1; //one more byte: convenient to add \0
}
@ -187,7 +189,8 @@ Stream::outputCache()
{
unsigned len;
fread(&len, sizeof(unsigned), 1, this->tempfp);
char* p = (char*)malloc(len * sizeof(char));
//char* p = (char*)malloc(len * sizeof(char));
char* p = new char[len];
fread(p, sizeof(char), len, this->tempfp);
bp[i].setLen(len);
bp[i].setStr(p);
@ -320,13 +323,16 @@ Stream::read()
//FILE* fp = (FILE*)(this->ans);
for(unsigned i = 0; i < this->colnum; ++i)
{
//BETTER:alloca and reuse the space in Bstr?
//BETTER:alloc and reuse the space in Bstr?
unsigned len;
fread(&len, sizeof(unsigned), 1, this->ansDisk);
char* s = (char*)calloc(len + 1, sizeof(char));
//char* s = (char*)calloc(len + 1, sizeof(char));
char* s = new char[len+1];
fread(s, sizeof(char), len, this->ansDisk);
s[len] = '\0';
this->copyToRecord(s, len, i);
free(s);
//free(s);
delete[] s;
}
}
this->xpos++;
@ -420,7 +426,9 @@ Stream::mergeSort()
#endif
break;
}
s = (char*)malloc(sizeof(char) * len);
//s = (char*)malloc(sizeof(char) * len);
s = new char[len];
fread(s, sizeof(char), len, tp);
bp[i].setLen(len);
bp[i].setStr(s);

View File

@ -38,17 +38,17 @@
struct ResultCmp
{
int result_len;
std::vector<int> keys;
std::vector<TYPE_ENTITY_LITERAL_ID> keys;
std::vector<bool> desc;
//ResultCmp(int _l):result_len(_l){}
ResultCmp()
{
this->result_len = 0;
}
ResultCmp(int _l, std::vector<int>& _keys, std::vector<bool> &_desc)
ResultCmp(int _l, std::vector<TYPE_ENTITY_LITERAL_ID>& _keys, std::vector<bool> &_desc)
{
this->result_len = _l;
this->keys = std::vector<int>(_keys);
this->keys = std::vector<TYPE_ENTITY_LITERAL_ID>(_keys);
this->desc = std::vector<bool>(_desc);
}
bool operator() (Bstr* const& a, Bstr* const& b)
@ -140,7 +140,7 @@ public:
static const unsigned BASE_MEMORY_LIMIT = 1 << 30;
Stream();
Stream(std::vector<int>& _keys, std::vector<bool>& _desc, unsigned _rownum, unsigned _colnum, bool _flag);
Stream(std::vector<TYPE_ENTITY_LITERAL_ID>& _keys, std::vector<bool>& _desc, unsigned _rownum, unsigned _colnum, bool _flag);
//read/write should be based on the unit of record

View File

@ -48,6 +48,10 @@ map<string, string> Util::global_config;
//==================================================================================================================
string Util::gserver_port_file = "bin/.gserver_port";
string Util::gserver_port_swap = "bin/.gserver_port.swap";
string Util::gserver_log = "logs/gserver.log";
//NOTICE:used in Database, Join and Strategy
//int Util::triple_num = 0;
//int Util::pre_num = 0;
@ -441,20 +445,27 @@ Util::memoryLeft()
}
bool
Util::is_literal_ele(int _id)
Util::is_literal_ele(TYPE_ENTITY_LITERAL_ID _id)
{
return _id >= Util::LITERAL_FIRST_ID;
}
bool
Util::is_entity_ele(TYPE_ENTITY_LITERAL_ID id)
{
return id < Util::LITERAL_FIRST_ID;
}
//NOTICE: require that the list is ordered
int
Util::removeDuplicate(int* _list, int _len)
unsigned
Util::removeDuplicate(unsigned* _list, unsigned _len)
{
if (_list == NULL || _len == 0) {
return 0;
}
int valid = 0, limit = _len - 1;
for(int i = 0; i < limit; ++i)
unsigned valid = 0, limit = _len - 1;
for(unsigned i = 0; i < limit; ++i)
{
if(_list[i] != _list[i+1])
{
@ -462,6 +473,7 @@ Util::removeDuplicate(int* _list, int _len)
}
}
_list[valid++] = _list[limit];
return valid;
}
@ -471,25 +483,47 @@ Util::cmp_int(const void* _i1, const void* _i2)
return *(int*)_i1 - *(int*)_i2;
}
void
Util::sort(int*& _id_list, int _list_len)
int
Util::cmp_unsigned(const void* _i1, const void* _i2)
{
qsort(_id_list, _list_len, sizeof(int), Util::cmp_int);
unsigned t1 = *(unsigned*)_i1;
unsigned t2 = *(unsigned*)_i2;
if(t1 > t2)
{
return 1;
}
else if(t1 == t2)
{
return 0;
}
else //t1 < t2
{
return -1;
}
}
int
Util::bsearch_int_uporder(int _key, const int* _array,int _array_num)
void
Util::sort(unsigned*& _id_list, unsigned _list_len)
{
qsort(_id_list, _list_len, sizeof(unsigned), Util::cmp_unsigned);
}
unsigned
Util::bsearch_int_uporder(unsigned _key, const unsigned* _array, unsigned _array_num)
{
if (_array_num == 0)
{
return -1;
//return -1;
return INVALID;
}
if (_array == NULL)
{
return -1;
//return -1;
return INVALID;
}
int _first = _array[0];
int _last = _array[_array_num - 1];
unsigned _first = _array[0];
unsigned _last = _array[_array_num - 1];
if (_last == _key)
{
@ -498,13 +532,14 @@ Util::bsearch_int_uporder(int _key, const int* _array,int _array_num)
if (_last < _key || _first > _key)
{
return -1;
//return -1;
return INVALID;
}
int low = 0;
int high = _array_num - 1;
unsigned low = 0;
unsigned high = _array_num - 1;
int mid;
unsigned mid;
while (low <= high)
{
mid = (high - low) / 2 + low;
@ -521,19 +556,22 @@ Util::bsearch_int_uporder(int _key, const int* _array,int _array_num)
low = mid + 1;
}
}
return -1;
//return -1;
return INVALID;
}
bool
Util::bsearch_preid_uporder(int _preid, int* _pair_idlist, int _list_len)
Util::bsearch_preid_uporder(TYPE_PREDICATE_ID _preid, unsigned* _pair_idlist, unsigned _list_len)
{
if(_list_len == 0)
{
return false;
}
int pair_num = _list_len / 2;
int _first = _pair_idlist[2*0 + 0];
int _last = _pair_idlist[2*(pair_num-1) + 0];
//NOTICE: if list len > 0, then it must >= 2, so pair num >= 1
unsigned pair_num = _list_len / 2;
unsigned _first = _pair_idlist[2*0 + 0];
unsigned _last = _pair_idlist[2*(pair_num-1) + 0];
if(_preid == _last)
{
@ -546,9 +584,9 @@ Util::bsearch_preid_uporder(int _preid, int* _pair_idlist, int _list_len)
return false;
}
int low = 0;
int high = pair_num - 1;
int mid;
unsigned low = 0;
unsigned high = pair_num - 1;
unsigned mid;
while(low <= high)
{
@ -571,17 +609,18 @@ Util::bsearch_preid_uporder(int _preid, int* _pair_idlist, int _list_len)
return false;
}
int
Util::bsearch_vec_uporder(int _key, const vector<int>* _vec)
unsigned
Util::bsearch_vec_uporder(unsigned _key, const vector<unsigned>* _vec)
{
int tmp_size = _vec->size();
unsigned tmp_size = _vec->size();
if (tmp_size == 0)
{
return -1;
//return -1;
return INVALID;
}
int _first = (*_vec)[0];
int _last = (*_vec)[tmp_size - 1];
unsigned _first = (*_vec)[0];
unsigned _last = (*_vec)[tmp_size - 1];
if (_key == _last)
{
@ -591,12 +630,13 @@ Util::bsearch_vec_uporder(int _key, const vector<int>* _vec)
bool not_find = (_last < _key || _first > _key);
if (not_find)
{
return -1;
//return -1;
return INVALID;
}
int low = 0;
int high = tmp_size - 1;
int mid;
unsigned low = 0;
unsigned high = tmp_size - 1;
unsigned mid;
while (low <= high)
{
@ -615,17 +655,20 @@ Util::bsearch_vec_uporder(int _key, const vector<int>* _vec)
low = mid + 1;
}
}
return -1;
//return -1;
return INVALID;
}
string
Util::result_id_str(vector<int*>& _v, int _var_num)
Util::result_id_str(vector<unsigned*>& _v, int _var_num)
{
stringstream _ss;
for(unsigned i = 0; i < _v.size(); i ++)
unsigned size = _v.size();
for(unsigned i = 0; i < size; ++i)
{
int* _p_int = _v[i];
unsigned* _p_int = _v[i];
_ss << "[";
for(int j = 0; j < _var_num-1; j ++)
{
@ -637,10 +680,18 @@ Util::result_id_str(vector<int*>& _v, int _var_num)
return _ss.str();
}
bool
Util::dir_exist(const string _dir)
{
return (opendir(_dir.c_str()) != NULL);
DIR* dirptr = opendir(_dir.c_str());
if(dirptr != NULL)
{
closedir(dirptr);
return true;
}
return false;
}
bool
@ -1188,10 +1239,11 @@ Util::logarithm(double _a, double _b)
return -1.0;
}
void
Util::intersect(int*& _id_list, int& _id_list_len, const int* _list1, int _len1, const int* _list2, int _len2)
Util::intersect(unsigned*& _id_list, unsigned& _id_list_len, const unsigned* _list1, unsigned _len1, const unsigned* _list2, unsigned _len2)
{
vector<int> res;
vector<unsigned> res;
//cout<<"intersect prevar: "<<_len1<<" "<<_len2<<endl;
if(_list1 == NULL || _len1 == 0 || _list2 == NULL || _len2 == 0)
{
@ -1206,7 +1258,7 @@ Util::intersect(int*& _id_list, int& _id_list_len, const int* _list1, int _len1,
//compare n(k+1) and nklogn: k0 = log(n/2)2 requiring that n>2
//k<=k0 binary search; k>k0 intersect
int method = -1; //0: intersect 1: search in list1 2: search in list2
int n = _len1;
unsigned n = _len1;
double k = 0;
if(n < _len2)
{
@ -1232,11 +1284,11 @@ Util::intersect(int*& _id_list, int& _id_list_len, const int* _list1, int _len1,
{
case 0:
{ //this bracket is needed if vars are defined in case
int id_i = 0;
int num = _len1;
for(int i = 0; i < num; ++i)
unsigned id_i = 0;
unsigned num = _len1;
for(unsigned i = 0; i < num; ++i)
{
int can_id = _list1[i];
unsigned can_id = _list1[i];
while((id_i < _len2) && (_list2[id_i] < can_id))
{
id_i ++;
@ -1257,20 +1309,20 @@ Util::intersect(int*& _id_list, int& _id_list_len, const int* _list1, int _len1,
}
case 1:
{
for(int i = 0; i < _len2; ++i)
for(unsigned i = 0; i < _len2; ++i)
{
if(Util::bsearch_int_uporder(_list2[i], _list1, _len1) != -1)
if(Util::bsearch_int_uporder(_list2[i], _list1, _len1) != INVALID)
res.push_back(_list2[i]);
}
break;
}
case 2:
{
int m = _len1, i;
unsigned m = _len1, i;
for(i = 0; i < m; ++i)
{
int t = _list1[i];
if(Util::bsearch_int_uporder(t, _list2, _len2) != -1)
unsigned t = _list1[i];
if(Util::bsearch_int_uporder(t, _list2, _len2) != INVALID)
res.push_back(t);
}
break;
@ -1286,8 +1338,8 @@ Util::intersect(int*& _id_list, int& _id_list_len, const int* _list1, int _len1,
_id_list = NULL;
}
else {
_id_list = new int[_id_list_len];
for (int i = 0; i < _id_list_len; ++i)
_id_list = new unsigned[_id_list_len];
for (unsigned i = 0; i < _id_list_len; ++i)
_id_list[i] = res[i];
}
delete[] _list1;
@ -1508,3 +1560,85 @@ Util::_pso_cmp(const void* _a, const void* _b)
return 0;
}
bool
Util::spo_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b)
{
if(a.subid != b.subid)
{
return a.subid < b.subid;
}
if(a.preid != b.preid)
{
return a.preid < b.preid;
}
if(a.objid != b.objid)
{
return a.objid < b.objid;
}
//all are equal, no need to sort this two
return false;
}
bool
Util::ops_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b)
{
if(a.objid != b.objid)
{
return a.objid < b.objid;
}
if(a.preid != b.preid)
{
return a.preid < b.preid;
}
if(a.subid != b.subid)
{
return a.subid < b.subid;
}
//all are equal, no need to sort this two
return false;
}
bool
Util::pso_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b)
{
if(a.preid != b.preid)
{
return a.preid < b.preid;
}
if(a.subid != b.subid)
{
return a.subid < b.subid;
}
if(a.objid != b.objid)
{
return a.objid < b.objid;
}
//all are equal, no need to sort this two
return false;
}
void
Util::empty_file(const char* _fname)
{
FILE * fp;
//NOTICE: if exist, then overwrite and create a empty file
fp = fopen(_fname, "w");
if(fp == NULL)
{
printf("do empty file %s failed\n", _fname);
}
else
{
fclose(fp);
}
}

View File

@ -37,6 +37,7 @@ in the sparql query can point to the same node in data graph)
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <sys/socket.h>
#include <netinet/in.h>
@ -87,9 +88,10 @@ in the sparql query can point to the same node in data graph)
//#define DEBUG_STREAM
//#define DEBUG_PRECISE 1 all information
//#define DEBUG_KVSTORE 1 //in KVstore
#define DEBUG_VSTREE 1 //in Database
//#define DEBUG_VSTREE 1 //in Database
//#define DEBUG_LRUCACHE 1
//#define DEBUG_DATABASE 1 //in Database
//#define DEBUG_VLIST 1
//
//
@ -123,6 +125,12 @@ in the sparql query can point to the same node in data graph)
#endif
#endif
#ifdef DEBUG_VLIST
#ifndef DEBUG
#define DEBUG
#endif
#endif
#ifndef DEBUG
//#define DEBUG
#endif
@ -141,21 +149,46 @@ typedef unsigned(*HashFunction)(const char*);
//http://www.cppblog.com/aurain/archive/2010/07/06/119463.html
//http://blog.csdn.net/mycomputerxiaomei/article/details/7641221
//http://kb.cnblogs.com/page/189480/
//
//type for the triple num
//TODO:this should use unsigned (triple num may > 2500000000)
typedef int TNUM;
//type for entity/literal/predicate ID
typedef int ELPID;
//TODO:typedef several ID typesand new a ID module
//what is more, the str length and Block ID in kvstore
typedef unsigned PREDICATE_ID;
//type for the triple num
//NOTICE: this should use unsigned (triple num may > 2500000000)
typedef unsigned TYPE_TRIPLE_NUM;
//type for entity/literal ID
typedef unsigned TYPE_ENTITY_LITERAL_ID;
static const TYPE_ENTITY_LITERAL_ID INVALID_ENTITY_LITERAL_ID = UINT_MAX;
//static const TYPE_ENTITY_LITERAL_ID INVALID_ENTITY_LITERAL_ID = -1;
//#define INVALID_ENTITY_LITERAL_ID UINT_MAX
//type for predicate ID
typedef int TYPE_PREDICATE_ID;
static const TYPE_PREDICATE_ID INVALID_PREDICATE_ID = -1;
//static const TYPE_PREDICATE_ID INVALID_PREDICATE_ID = -1;
//#define INVALID_PREDICATE_ID -1
//TODO:typedef several ID types and new a ID module
//TODO:encode entity from low to high, encode literal from high to low(finally select the mid of space as border)
typedef unsigned ENTITY_LITERAL_ID;
typedef unsigned NODE_ID;
//TODO: what is more, the Block ID in kvstore
//typedef unsigned NODE_ID;
//can use `man limits.h` to see more
#define INVALID UINT_MAX
static const unsigned INVALID = UINT_MAX;
//static const int INVALID = -1;
//#define INVALID UINT_MAX
//NOTICE: always use unsigned for query result matrix
//
//NOTICE: if use define, the type is none
typedef struct TYPE_ID_TUPLE
{
TYPE_ENTITY_LITERAL_ID subid;
TYPE_ENTITY_LITERAL_ID preid;
TYPE_ENTITY_LITERAL_ID objid;
}ID_TUPLE;
/******** all static&universal constants and fucntions ********/
class Util
@ -168,13 +201,16 @@ public:
static const unsigned MB = 1048576;
static const unsigned GB = 1073741824;
static const int TRIPLE_NUM_MAX = 1000*1000*1000;
//static const int TRIPLE_NUM_MAX = 1000*1000*1000;
static const TYPE_TRIPLE_NUM TRIPLE_NUM_MAX = INVALID;
static const char EDGE_IN = 'i';
static const char EDGE_OUT= 'o';
//In order to differentiate the sub-part and literal-part of object
//let subid begin with 0, while literalid begins with LITERAL_FIRST_ID
//used in Database and Join
static const int LITERAL_FIRST_ID = 1000*1000*1000;
static const int LITERAL_FIRST_ID = 2 * 1000*1000*1000;
//initial transfer buffer size in Tree/ and Stream/
static const unsigned TRANSFER_SIZE = 1 << 20; //1M
//NOTICE:the larger the faster, but need to care the memory usage(not use 1<<33, negative)
@ -204,11 +240,12 @@ public:
static int compIIpair(int _a1, int _b1, int _a2, int _b2);
static std::string showtime();
static int cmp_int(const void* _i1, const void* _i2);
static void sort(int*& _id_list, int _list_len);
static int bsearch_int_uporder(int _key, const int* _array,int _array_num);
static bool bsearch_preid_uporder(int _preid, int* _pair_idlist, int _list_len);
static int bsearch_vec_uporder(int _key, const std::vector<int>* _vec);
static std::string result_id_str(std::vector<int*>& _v, int _var_num);
static int cmp_unsigned(const void* _i1, const void* _i2);
static void sort(unsigned*& _id_list, unsigned _list_len);
static unsigned bsearch_int_uporder(unsigned _key, const unsigned* _array, unsigned _array_num);
static bool bsearch_preid_uporder(TYPE_PREDICATE_ID _preid, unsigned* _pair_idlist, unsigned _list_len);
static unsigned bsearch_vec_uporder(unsigned _key, const std::vector<unsigned>* _vec);
static std::string result_id_str(std::vector<unsigned*>& _v, int _var_num);
static bool dir_exist(const std::string _dir);
static bool create_dir(const std:: string _dir);
static long get_cur_time();
@ -218,13 +255,17 @@ public:
static std::string getTimeString();
static std::string node2string(const char* _raw_str);
static bool is_literal_ele(int);
static int removeDuplicate(int*, int);
static bool is_literal_ele(TYPE_ENTITY_LITERAL_ID id);
static bool is_entity_ele(TYPE_ENTITY_LITERAL_ID id);
static unsigned removeDuplicate(unsigned*, unsigned);
static std::string getQueryFromFile(const char* _file_path);
static std::string getSystemOutput(std::string cmd);
static std::string getExactPath(const char* path);
static std::string getItemsFromDir(std::string path);
static void logging(std::string _str);
static void empty_file(const char* _fname);
// Below are some useful hash functions for string
static unsigned simpleHash(const char *_str);
@ -248,7 +289,7 @@ public:
static HashFunction hash[];
static double logarithm(double _a, double _b);
static void intersect(int*& _id_list, int& _id_list_len, const int* _list1, int _len1, const int* _list2, int _len2);
static void intersect(unsigned*& _id_list, unsigned& _id_list_len, const unsigned* _list1, unsigned _len1, const unsigned* _list2, unsigned _len2);
static char* l_trim(char *szOutput, const char *szInput);
static char* r_trim(char *szOutput, const char *szInput);
@ -258,6 +299,9 @@ public:
Util();
~Util();
static std::string profile;
//NOTICE: this function must be called out of any Database to config the basic settings
//You can call it by Util util in the first of your main program
//Another way is to build a GstoreApplication program, and do this configure in the initialization of the application
static bool configure(); //read init.conf and set the parameters for this system
static bool config_setting();
static bool config_advanced();
@ -270,6 +314,10 @@ public:
static int _spo_cmp(const void* _a, const void* _b);
static int _ops_cmp(const void* _a, const void* _b);
static int _pso_cmp(const void* _a, const void* _b);
//sort functions for sort on ID_TUPLE
static bool spo_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b);
static bool ops_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b);
static bool pso_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b);
static std::string tmp_path;
// this are for debugging
@ -279,6 +327,10 @@ public:
static FILE* debug_database;
static FILE* debug_vstree;
static std::string gserver_port_file;
static std::string gserver_port_swap;
static std::string gserver_log;
private:
static bool isValidIPV4(std::string);

348
Util/VList.cpp Normal file
View File

@ -0,0 +1,348 @@
/*=============================================================================
# Filename: VList.cpp
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2017-03-27 15:47
# Description:
=============================================================================*/
#include "VList.h"
using namespace std;
bool
VList::isLongList(unsigned _len)
{
return _len > VList::LENGTH_BORDER;
}
VList::VList()
{ //not use ../logs/, notice the location of program
cur_block_num = SET_BLOCK_NUM;
filepath = "";
freelist = NULL;
max_buffer_size = Util::MAX_BUFFER_SIZE;
freemem = max_buffer_size;
}
VList::VList(string& _filepath, string& _mode, unsigned long long _buffer_size)
{
cur_block_num = SET_BLOCK_NUM; //initialize
this->filepath = _filepath;
if (_mode == string("build"))
valfp = fopen(_filepath.c_str(), "w+b");
else if (_mode == string("open"))
valfp = fopen(_filepath.c_str(), "r+b");
else
{
cout<<string("error in VList: Invalid mode ") + _mode<<endl;
return;
}
if (valfp == NULL)
{
cout<<string("error in VList: Open error ") + _filepath<<endl;
return;
}
this->max_buffer_size = _buffer_size;
this->freemem = this->max_buffer_size;
this->freelist = new BlockInfo; //null-head
//TODO: read/write by char is too slow, how about read all and deal , then clear?
//
//BETTER: hwo about assign IDs in a dynamic way?
//limitID freelist
//QUETY: can free id list consume very large memory??
unsigned i, j, k; //j = (SuperNum-1)*BLOCK_SIZE
BlockInfo* bp;
if (_mode == "build")
{ //write basic information
i = 0;
fwrite(&cur_block_num, sizeof(unsigned), 1, this->valfp); //current block num
//NOTICE: use a 1M block for a unsigned?? not ok!
fseek(this->valfp, BLOCK_SIZE, SEEK_SET);
bp = this->freelist;
j = cur_block_num / 8;
for (i = 0; i < j; ++i)
{
fputc(0, this->valfp);
for (k = 0; k < 8; ++k)
{
bp->next = new BlockInfo(i * 8 + k + 1, NULL);
bp = bp->next;
}
}
}
else //_mode == "open"
{
//read basic information
char c;
fread(&cur_block_num, sizeof(unsigned), 1, this->valfp);
fseek(this->valfp, BLOCK_SIZE, SEEK_SET);
bp = this->freelist;
j = cur_block_num / 8;
for (i = 0; i < j; ++i)
{
c = fgetc(valfp);
for (k = 0; k < 8; ++k)
{
if ((c & (1 << k)) == 0)
{
bp->next = new BlockInfo(i * 8 + 7 - k + 1, NULL);
bp = bp->next;
}
}
}
}
//NOTICE: we do not need to alloc the blocks for free block id list, AllocBlock is only for later blocks
}
long //8-byte in 64-bit machine
VList::Address(unsigned _blocknum) const //BETTER: inline function
{
if (_blocknum == 0)
return 0;
else if (_blocknum > cur_block_num)
{
//print(string("error in Address: Invalid blocknum ") + Util::int2string(_blocknum));
return -1; //address should be non-negative
}
//NOTICE: here should explictly use long
return (long)(this->SuperNum + _blocknum - 1) * (long)BLOCK_SIZE;
}
unsigned
VList::Blocknum(long address) const
{
return (address / BLOCK_SIZE) + 1 - this->SuperNum;
}
unsigned
VList::AllocBlock()
{
BlockInfo* p = this->freelist->next;
if (p == NULL)
{
for (unsigned i = 0; i < SET_BLOCK_INC; ++i)
{
cur_block_num++; //BETTER: check if > MAX_BLOCK_NUM
this->FreeBlock(cur_block_num);
}
p = this->freelist->next;
}
unsigned t = p->num;
this->freelist->next = p->next;
delete p;
return t;
}
void
VList::FreeBlock(unsigned _blocknum)
{ //QUERY: head-sub and tail-add will be better?
BlockInfo* bp = new BlockInfo(_blocknum, this->freelist->next);
this->freelist->next = bp;
}
//NOTICE: all reads are aligned to 4 bytes(including a string)
//a string may acrossseveral blocks
//
//NOTICE: not use buffer, read/write on need, update at once, so no need to write back at last
//NOTICE: the next is placed at the begin of a block
void
VList::ReadAlign(unsigned* _next)
{
if (ftell(valfp) % BLOCK_SIZE == 0)
{
fseek(valfp, Address(*_next), SEEK_SET);
fread(_next, sizeof(unsigned), 1, valfp);
}
}
void
VList::WriteAlign(unsigned* _curnum)
{
if (ftell(valfp) % BLOCK_SIZE == 0)
{
unsigned blocknum = this->AllocBlock();
fseek(valfp, Address(*_curnum), SEEK_SET);
fwrite(&blocknum, sizeof(unsigned), 1, valfp);
fseek(valfp, Address(blocknum) + 4, SEEK_SET);
*_curnum = blocknum;
}
}
bool
VList::readValue(unsigned _block_num, char*& _str, unsigned& _len)
{
#ifdef DEBUG_VLIST
cout<<"to get value of block num: "<<_block_num<<endl;
#endif
fseek(valfp, Address(_block_num), SEEK_SET);
unsigned next;
fread(&next, sizeof(unsigned), 1, valfp);
this->readBstr(_str, _len, &next);
return true;
}
unsigned
VList::writeValue(const char* _str, unsigned _len)
{
unsigned blocknum = this->AllocBlock();
unsigned curnum = blocknum;
//NOTICE: here we must skip the next position first
fseek(valfp, Address(curnum) + 4, SEEK_SET);
this->writeBstr(_str, _len, &curnum);
#ifdef DEBUG_VLIST
cout<<"to write value - block num: "<<blocknum<<endl;
#endif
return blocknum;
}
bool
VList::removeValue(unsigned _block_num)
{
unsigned store = _block_num, next;
fseek(this->valfp, Address(store), SEEK_SET);
fread(&next, sizeof(unsigned), 1, valfp);
while (store != 0)
{
this->FreeBlock(store);
store = next;
fseek(valfp, Address(store), SEEK_SET);
fread(&next, sizeof(unsigned), 1, valfp);
}
return true;
}
bool
VList::readBstr(char*& _str, unsigned& _len, unsigned* _next)
{
//long address;
unsigned len, i, j;
fread(&len, sizeof(unsigned), 1, this->valfp);
#ifdef DEBUG_VLIST
cout<<"the length of value: "<<len<<endl;
#endif
this->ReadAlign(_next);
//char* s = (char*)malloc(len);
char* s = new char[len];
_len = len;
for (i = 0; i + 4 < len; i += 4)
{
fread(s + i, sizeof(char), 4, valfp);
this->ReadAlign(_next);
}
while (i < len)
{
fread(s + i, sizeof(char), 1, valfp); //BETTER
i++;
}
j = len % 4;
if (j > 0)
j = 4 - j;
fseek(valfp, j, SEEK_CUR);
//NOTICE+DEBUG: I think no need to align here, later no data to read
//(if need to read, then fseek again to find a new value)
//this->ReadAlign(_next);
_str = s;
return true;
}
bool
VList::writeBstr(const char* _str, unsigned _len, unsigned* _curnum)
{
unsigned i, j, len = _len;
fwrite(&len, sizeof(unsigned), 1, valfp);
this->WriteAlign(_curnum);
//cout<<"to write bstr, length: "<<len<<endl;
//BETTER: compute this need how many blocks first, then write a block a time
const char* s = _str;
for (i = 0; i + 4 < len; i += 4)
{
fwrite(s + i, sizeof(char), 4, valfp);
this->WriteAlign(_curnum);
}
while (i < len)
{
fwrite(s + i, sizeof(char), 1, valfp);
i++;
}
j = len % 4;
if (j > 0)
j = 4 - j;
fseek(valfp, j, SEEK_CUR);
//NOTICE+DEBUG: I think no need to align here, later no data to write
//(if need to write, then fseek again to write a new value)
//this->WriteAlign(_curnum);
fseek(valfp, Address(*_curnum), SEEK_SET);
unsigned t = 0;
fwrite(&t, sizeof(unsigned), 1, valfp);
return true;
}
VList::~VList()
{
//write the info back
fseek(this->valfp, 0, SEEK_SET);
fwrite(&cur_block_num, sizeof(unsigned), 1, valfp);//write current blocks num
fseek(valfp, BLOCK_SIZE, SEEK_SET);
int i, j = cur_block_num / 8; //(SuperNum-1)*BLOCK_SIZE;
for (i = 0; i < j; ++i)
{
//reset to 1 first
fputc(0xff, valfp);
}
char c;
BlockInfo* bp = this->freelist->next;
while (bp != NULL)
{
//if not-use then set 0, aligned to byte!
#ifdef DEBUG_KVSTORE
if (bp->num > cur_block_num)
{
printf("blocks num exceed, cur_block_num: %u\n", cur_block_num);
exit(1);
}
#endif
j = bp->num - 1;
i = j / 8;
j = 7 - j % 8;
fseek(valfp, BLOCK_SIZE + i, SEEK_SET);
c = fgetc(valfp);
fseek(valfp, -1, SEEK_CUR);
fputc(c & ~(1 << j), valfp);
bp = bp->next;
}
bp = this->freelist;
BlockInfo* next;
while (bp != NULL)
{
next = bp->next;
delete bp;
bp = next;
}
fclose(this->valfp);
}

87
Util/VList.h Normal file
View File

@ -0,0 +1,87 @@
/*=============================================================================
# Filename: VList.h
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2017-03-27 15:40
# Description:
=============================================================================*/
#ifndef _UTIL_VLIST_H
#define _UTIL_VLIST_H
#include "Util.h"
#include "Bstr.h"
//NOTICE: not keep long list in memory, read each time
//but when can you free the long list(kvstore should release it after parsing)
//
//CONSIDER: if to keep long list in memory, should adjust the bstr in memory:
//unsigned: 0 char*: an object (if in memory, if modified, length, content, block num)
//when reading a long list in a node, generate the object first, and the object will tell you whether
//the list is in mmeory or not
//BETTER: use two kind of blocks in two files, like 1M and 1G (split the block num into two parts)
//STRUCT: a long list real-address is the block ID in file2(only for long value lists, a list across several 1M blocks)
//tree-value Bstr: unsigned=the real address char*=NULL
//in disk:
//file1 is tree file, the long list is represented as: 0 real-address
//NOTICE: long list is not kept in mmeory for cache, it is read/update each time on need!
//TODO: use fread/fwrite here instead of fgetc/fputc
//including other trees
class VList
{
public:
//NOTICE:the border is 10^6, but the block is larger, 1M
//this is not choosed intuitively, we make sure that using vlist is better: transferring time>random seek time(x/40M>0.006)
//Also notice that if no modification on data, read a node is almost sequentially in normal IVTree
//In VList, case is the same and using VList may bring another seek cost!(it is not easy to setup cache for IVTree due to data struct)
static const unsigned LENGTH_BORDER = 1000000;
//static const unsigned LENGTH_BORDER = 100;
static const unsigned BLOCK_SIZE = 1 << 20; //fixed size of disk-block
static const unsigned MAX_BLOCK_NUM = 1 << 23; //max block-num
//below two constants: must can be exactly divided by 8
static const unsigned SET_BLOCK_NUM = 1 << 3; //initial blocks num
static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc
static const unsigned SuperNum = MAX_BLOCK_NUM / (8 * BLOCK_SIZE) + 1;
private:
unsigned long long max_buffer_size;
unsigned cur_block_num;
std::string filepath;
BlockInfo* freelist;
//very long value list are stored in a separate file(with large block)
//
//NOTICE: according to the summary result, 90% value lists are just below 100 bytes
//<10%: 5000000~100M bytes
FILE* valfp;
//NOTICE: freemem's type is long long here, due to large memory in server.
//However, needmem in handler() and request() is ok to be int/unsigned.
//Because the bstr' size is controlled, so is the node.
unsigned long long freemem; //free memory to use, non-negative
//unsigned long long time; //QUERY(achieving an old-swap startegy?)
long Address(unsigned _blocknum) const;
unsigned Blocknum(long address) const;
unsigned AllocBlock();
void FreeBlock(unsigned _blocknum);
void ReadAlign(unsigned* _next);
void WriteAlign(unsigned* _next);
bool readBstr(char*& _bp, unsigned& _len, unsigned* _next);
bool writeBstr(const char* _str, unsigned _len, unsigned* _curnum);
public:
VList();
VList(std::string& _filepath, std::string& _mode, unsigned long long _buffer_size);//create a fixed-size file or open an existence
bool readValue(unsigned _block_num, char*& _str, unsigned& _len);
unsigned writeValue(const char* _str, unsigned _len);
bool removeValue(unsigned _block_num);
~VList();
static bool isLongList(unsigned _len);
};
#endif

View File

@ -25,8 +25,10 @@ using namespace std;
//int LRUCache::DEFAULT_CAPACITY = 10000000;
int LRUCache::DEFAULT_CAPACITY = 1 * 1000 * 1000; //about 20G memory for vstree
//int LRUCache::DEFAULT_CAPACITY = 1000;
//TODO:10^6 is a good parameter, at most use 20G
//NOTICE:10^6 is a good parameter, at most use 20G
//NOTICE: it is ok to set it 4000000 when building!!! better to adjust according to the current memory usage
//also use 2000000 or smaller for query()
LRUCache::LRUCache(int _capacity)
{
//initialize the lock
@ -39,7 +41,7 @@ LRUCache::LRUCache(int _capacity)
cout << "LRUCache initial..." << endl;
this->capacity = _capacity > 0 ? _capacity : LRUCache::DEFAULT_CAPACITY;
// TODO+DEBUG:it seems that a minium size is required, for example, multiple path down(the height?)
//DEBUG:it seems that a minium size is required, for example, multiple path down(the height?)
//at least 3*h
//
// we should guarantee the cache is big enough.

View File

@ -160,14 +160,18 @@ void VSTree::retrieve(SPARQLquery& _query)
//NOTICE:this can only be done by one thread
//build the VSTree from the _entity_signature_file.
bool
VSTree::buildTree(std::string _entry_file_path)
VSTree::buildTree(std::string _entry_file_path, int _cache_size)
{
Util::logging("IN VSTree::buildTree");
//NOTICE: entry buffer don't need to store all entities, just loop, read and deal
//not so much memory: 2 * 10^6 * (4+800/8) < 1G
// create the entry buffer and node buffer.
this->entry_buffer = new EntryBuffer(EntryBuffer::DEFAULT_CAPACITY);
//cout<<"entry buffer newed"<<endl;
this->node_buffer = new LRUCache(LRUCache::DEFAULT_CAPACITY);
this->node_buffer = new LRUCache(_cache_size);
//this->node_buffer = new LRUCache(LRUCache::DEFAULT_CAPACITY);
// create the root node.
//VNode* rootNodePtr = new VNode();
@ -643,10 +647,11 @@ VSTree::saveTree()
}
bool
VSTree::loadTree()
VSTree::loadTree(int _cache_size)
{
cout << "load VSTree..." << endl;
(this->node_buffer) = new LRUCache(LRUCache::DEFAULT_CAPACITY);
(this->node_buffer) = new LRUCache(_cache_size);
//(this->node_buffer) = new LRUCache(LRUCache::DEFAULT_CAPACITY);
cout<<"LRU cache built"<<endl;
bool flag = this->loadTreeInfo();
@ -929,7 +934,7 @@ VSTree::split(VNode* _p_node_being_split, const SigEntry& _insert_entry, VNode*
bool is_leaf = oldNodePtr->isLeaf();
// then create a new node to act as BEntryIndex's father.
VNode* newNodePtr = this->createNode(is_leaf);
#ifdef DEBUG
#ifdef DEBUG_VSTREE
cout<<"new node file line: "<<newNodePtr->getFileLine()<<endl;
#endif

View File

@ -17,6 +17,9 @@
//NOTICE:R/W more than 4G
//TODO: in multiple threads case, to ensure the vstree and cache is correct, maybe lock the whole vstree!
//(at one time, only one thread can query/update the vstree)
class VSTree
{
friend class VNode;
@ -25,7 +28,7 @@ public:
~VSTree();
int getHeight()const;
//build the VSTree from the _entity_signature_file.
bool buildTree(std::string _entity_signature_file);
bool buildTree(std::string _entity_signature_file, int _cache_size = -1);
bool deleteTree();
//if the tree is empty
@ -47,7 +50,7 @@ public:
//save the tree information to tree_info_file_path, and flush the tree nodes in memory to tree_node_file_path.
bool saveTree();
//load tree from tree_info_file_path and tree_node_file_path files.
bool loadTree();
bool loadTree(int _cache_size = -1);
//get the tree's root node pointer.
VNode* getRoot();
//get the node pointer by its file line.

2000
data/bbug.nt Normal file

File diff suppressed because it is too large Load Diff

5
data/bbug0.sql Normal file
View File

@ -0,0 +1,5 @@
INSERT DATA
{
<http://www.founder/102> <http://www.founder.20.link:52> <http://www.founder/106> .
<http://www.founder/102> <http://www.founder> <http://www.founder/73> .
}

1
data/bbug0d.sql Normal file
View File

@ -0,0 +1 @@
DELETE DATA { <http://www.founder/102> <http://www.founder.20.link:52> <http://www.founder/106> . }

1
data/bbug1.sql Normal file
View File

@ -0,0 +1 @@
select ?subject ?predict ?object WHERE { ?subject <http://www.founder.20.link:52> ?object; ?predict ?object . }

1
data/bbug2.sql Normal file
View File

@ -0,0 +1 @@
DELETE WHERE { <http://www.founder/101> ?predict ?object . }

1
data/bbug3.sql Normal file
View File

@ -0,0 +1 @@
select ?predict where {<http://www.founder/102> ?predict <http://www.founder/73> .}

5
data/bbug4.sql Normal file
View File

@ -0,0 +1,5 @@
select ?subject ?predict ?object where
{
<http://www.founder/102> <http://www.founder.20.link:52> ?object.
?subject ?predict ?object.
}

1
data/bbug5.sql Normal file
View File

@ -0,0 +1 @@
select ?subject ?predict ?object where {?subject <http://www.founder.20.link:52> <http://www.founder/106>; ?predict ?object . }

1
data/bbug6.sql Normal file
View File

@ -0,0 +1 @@
DELETE WHERE { ?subject <http://www.founder.20.link:52> ?objcet. }

1
data/error.sql Normal file
View File

@ -0,0 +1 @@
select ?a WHERE { <a> <http://www.founder.20.link:52> <b> . }

1
data/exist.sql Normal file
View File

@ -0,0 +1 @@
select ?s where { <http://www.founder/100> <http://www.founder.20.attr:dmID> "22". }

1
data/fault.sql Normal file
View File

@ -0,0 +1 @@
select ?a WHERE { <a> <http://www.founder.20.link:52> <b> . }

View File

@ -70,10 +70,11 @@ api_java = api/java/lib/GstoreJavaAPI.jar
#sstreeobj = $(objdir)Tree.o $(objdir)Storage.o $(objdir)Node.o $(objdir)IntlNode.o $(objdir)LeafNode.o $(objdir)Heap.o
sitreeobj = $(objdir)SITree.o $(objdir)SIStorage.o $(objdir)SINode.o $(objdir)SIIntlNode.o $(objdir)SILeafNode.o $(objdir)SIHeap.o
istreeobj = $(objdir)ISTree.o $(objdir)ISStorage.o $(objdir)ISNode.o $(objdir)ISIntlNode.o $(objdir)ISLeafNode.o $(objdir)ISHeap.o
ivtreeobj = $(objdir)IVTree.o $(objdir)IVStorage.o $(objdir)IVNode.o $(objdir)IVIntlNode.o $(objdir)IVLeafNode.o $(objdir)IVHeap.o
kvstoreobj = $(objdir)KVstore.o $(sitreeobj) $(istreeobj) #$(sstreeobj)
kvstoreobj = $(objdir)KVstore.o $(sitreeobj) $(istreeobj) $(ivtreeobj) #$(sstreeobj)
utilobj = $(objdir)Util.o $(objdir)Bstr.o $(objdir)Stream.o $(objdir)Triple.o $(objdir)BloomFilter.o
utilobj = $(objdir)Util.o $(objdir)Bstr.o $(objdir)Stream.o $(objdir)Triple.o $(objdir)BloomFilter.o $(objdir)VList.o
queryobj = $(objdir)SPARQLquery.o $(objdir)BasicQuery.o $(objdir)ResultSet.o $(objdir)IDList.o \
$(objdir)Varset.o $(objdir)QueryTree.o $(objdir)ResultFilter.o $(objdir)GeneralEvaluation.o
@ -217,6 +218,26 @@ $(objdir)ISHeap.o: KVstore/ISTree/heap/ISHeap.cpp KVstore/ISTree/heap/ISHeap.h $
$(CC) $(CFLAGS) KVstore/ISTree/heap/ISHeap.cpp -o $(objdir)ISHeap.o
#objects in istree/ end
#objects in ivtree/ begin
$(objdir)IVTree.o: KVstore/IVTree/IVTree.cpp KVstore/IVTree/IVTree.h $(objdir)Stream.o $(objdir)VList.o
$(CC) $(CFLAGS) KVstore/IVTree/IVTree.cpp -o $(objdir)IVTree.o
$(objdir)IVStorage.o: KVstore/IVTree/storage/IVStorage.cpp KVstore/IVTree/storage/IVStorage.h $(objdir)Util.o
$(CC) $(CFLAGS) KVstore/IVTree/storage/IVStorage.cpp -o $(objdir)IVStorage.o $(def64IO)
$(objdir)IVNode.o: KVstore/IVTree/node/IVNode.cpp KVstore/IVTree/node/IVNode.h $(objdir)Util.o
$(CC) $(CFLAGS) KVstore/IVTree/node/IVNode.cpp -o $(objdir)IVNode.o
$(objdir)IVIntlNode.o: KVstore/IVTree/node/IVIntlNode.cpp KVstore/IVTree/node/IVIntlNode.h
$(CC) $(CFLAGS) KVstore/IVTree/node/IVIntlNode.cpp -o $(objdir)IVIntlNode.o
$(objdir)IVLeafNode.o: KVstore/IVTree/node/IVLeafNode.cpp KVstore/IVTree/node/IVLeafNode.h
$(CC) $(CFLAGS) KVstore/IVTree/node/IVLeafNode.cpp -o $(objdir)IVLeafNode.o
$(objdir)IVHeap.o: KVstore/IVTree/heap/IVHeap.cpp KVstore/IVTree/heap/IVHeap.h $(objdir)Util.o
$(CC) $(CFLAGS) KVstore/IVTree/heap/IVHeap.cpp -o $(objdir)IVHeap.o
#objects in ivtree/ end
$(objdir)KVstore.o: KVstore/KVstore.cpp KVstore/KVstore.h KVstore/Tree.h
$(CC) $(CFLAGS) KVstore/KVstore.cpp $(inc) -o $(objdir)KVstore.o
@ -302,6 +323,9 @@ $(objdir)Triple.o: Util/Triple.cpp Util/Triple.h $(objdir)Util.o
$(objdir)BloomFilter.o: Util/BloomFilter.cpp Util/BloomFilter.h $(objdir)Util.o
$(CC) $(CFLAGS) Util/BloomFilter.cpp -o $(objdir)BloomFilter.o
$(objdir)VList.o: Util/VList.cpp Util/VList.h
$(CC) $(CFLAGS) Util/VList.cpp -o $(objdir)VList.o
#objects in util/ end
@ -408,7 +432,7 @@ dist: clean
tarball:
tar -czvf devGstore.tar.gz api bin lib tools .debug .tmp .objs test docs data makefile \
Main Database KVstore Util Query Signature VSTree Parser Server README.md init.conf NOTES.md StringIndex COVERAGE LICENSE
Main Database KVstore Util Query Signature VSTree Parser Server README.md init.conf NOTES.md StringIndex COVERAGE
APIexample: $(api_cpp) $(api_java)
$(MAKE) -C api/cpp/example

12
package.json Normal file
View File

@ -0,0 +1,12 @@
{
"config": {
"ghooks": {
"commit-msg": "validate-commit-msg"
}
},
"scripts": {
"changelog-all": "conventional-changelog -p angular -i CHANGELOG.md -w -r 0",
"changelog": "conventional-changelog -p angular -i CHANGELOG.md -w"
}
}

15
test/package.json Normal file
View File

@ -0,0 +1,15 @@
{
"config": {
"ghooks": {
//"pre-commit": "gulp lint",
"commit-msg": "validate-commit-msg",
//"pre-push": "make test",
//"post-merge": "npm install",
//"post-rewrite": "npm install",
}
}
"scripts": {
"changelog-all": "conventional-changelog -p angular -i CHANGELOG.md -w -r 0",
"changelog": "conventional-changelog -p angular -i CHANGELOG.md -w",
}
}

403
test/test_kvstore.cpp Normal file
View File

@ -0,0 +1,403 @@
#include "../Util/Util.h"
#include "KVstore.h"
using namespace std;
const int maxn=10000000;
KVstore testkv;
void generate_test_data()
{
FILE *data_file1=fopen("./data1","w");
FILE *data_file2=fopen("./data2","w");
fprintf(data_file1,"%d\n",maxn);
fprintf(data_file2,"%d\n",maxn);
for(int i=0;i<maxn;i++)
{
string str="";
char tmp[3];
tmp[1]=0;
int k=i;
for(int j=0;j<7;j++)
{
tmp[0]='a'+k%26;
str.append(tmp);
k/=26;
}
fprintf(data_file1,"%d %s\n",i,str.c_str());
k=i;
int o=k%800;
k/=800;
int p=k%800;
k/=800;
int s=k%800;
fprintf(data_file2,"%d %d %d\n",s,p,o);
}
fclose(data_file1);
fclose(data_file2);
}
//for checking memory
int parseLine(char* line){
// This assumes that a digit will be found and the line ends in " Kb".
int i = strlen(line);
const char* p = line;
while (*p <'0' || *p > '9') p++;
line[i-3] = 0;
i = atoi(p);
return i;
}
int getMemkb(){ //Note: this value is in KB!
FILE* file = fopen("/proc/self/status", "r");
int result = -1;
char line[128];
while (fgets(line, 128, file) != NULL){
if (strncmp(line, "VmSize:", 7) == 0){
result = parseLine(line);
break;
}
}
fclose(file);
return result;
}
//string2id
void test_string2id_insert()
{
cout<< "testing string2id insert..." <<endl;
testkv.open_entity2id(KVstore::CREATE_MODE);
FILE *data1=fopen("./data1","r");
timeval start_time,end_time;
gettimeofday(&start_time,NULL);
int n,id;
char str[10];
fscanf(data1,"%d",&n);
for(int i=0;i<n;i++)
{
fscanf(data1,"%d %s",&id,str);
testkv.setIDByEntity(string(str),id);
}
gettimeofday(&end_time,NULL);
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
fclose(data1);
testkv.close_entity2id();
}
void test_string2id_delete()
{
cout<< "testing string2id delete..." <<endl;
testkv.open_entity2id(KVstore::READ_WRITE_MODE);
FILE *data1=fopen("./data1","r");
timeval start_time,end_time;
gettimeofday(&start_time,NULL);
int n,id;
char str[10];
fscanf(data1,"%d",&n);
for(int i=0;i<n;i++)
{
fscanf(data1,"%d %s",&id,str);
testkv.subIDByEntity(string(str));
}
gettimeofday(&end_time,NULL);
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
fclose(data1);
testkv.close_entity2id();
}
void test_string2id_search()
{
cout<< "testing string2id search..." <<endl;
testkv.open_entity2id(KVstore::READ_WRITE_MODE);
timeval start_time,end_time;
gettimeofday(&start_time,NULL);
FILE *data1=fopen("./data1","r");
int n,id;
char str[10];
fscanf(data1,"%d",&n);
for(int i=0;i<n;i++)
{
fscanf(data1,"%d %s",&id,str);
testkv.getIDByEntity(string(str));
}
gettimeofday(&end_time,NULL);
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
fclose(data1);
testkv.close_entity2id();
}
//id2string
void test_id2string_insert()
{
cout<< "testing id2string insert..." <<endl;
testkv.open_id2entity(KVstore::CREATE_MODE);
timeval start_time,end_time;
gettimeofday(&start_time,NULL);
FILE *data1=fopen("./data1","r");
int n,id;
char str[10];
fscanf(data1,"%d",&n);
for(int i=0;i<n;i++)
{
fscanf(data1,"%d %s",&id,str);
testkv.setEntityByID(id,string(str));
}
gettimeofday(&end_time,NULL);
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
fclose(data1);
testkv.close_id2entity();
}
void test_id2string_delete()
{
cout<< "testing id2string delete..." <<endl;
testkv.open_id2entity(KVstore::READ_WRITE_MODE);
timeval start_time,end_time;
gettimeofday(&start_time,NULL);
FILE *data1=fopen("./data1","r");
int n,id;
char str[10];
fscanf(data1,"%d",&n);
for(int i=0;i<n;i++)
{
testkv.subEntityByID(i);
}
gettimeofday(&end_time,NULL);
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
fclose(data1);
testkv.close_id2entity();
}
void test_id2string_search()
{
cout<< "testing id2string search..." <<endl;
testkv.open_id2entity(KVstore::READ_WRITE_MODE);
timeval start_time,end_time;
gettimeofday(&start_time,NULL);
FILE *data1=fopen("./data1","r");
int n,id;
char str[10];
fscanf(data1,"%d",&n);
for(int i=0;i<n;i++)
{
testkv.getEntityByID(i);
}
gettimeofday(&end_time,NULL);
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
fclose(data1);
testkv.close_id2entity();
}
//subID2values
void test_subID2values_insert()
{
cout<< "testing subID2values insert..." <<endl;
testkv.open_subID2values(KVstore::CREATE_MODE);
timeval start_time,end_time;
gettimeofday(&start_time,NULL);
FILE *data2=fopen("./data2","r");
int n,s,p,o;
fscanf(data2,"%d",&n);
for(int i=0;i<n;i++)
{
fscanf(data2,"%d%d%d",&s,&p,&o);
testkv.updateInsert_s2values(s,p,o);
}
gettimeofday(&end_time,NULL);
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
fclose(data2);
testkv.close_subID2values();
}
void test_subID2values_delete()
{
cout<< "testing subID2values delete..." <<endl;
testkv.open_subID2values(KVstore::READ_WRITE_MODE);
timeval start_time,end_time;
gettimeofday(&start_time,NULL);
FILE *data2=fopen("./data2","r");
int n,s,p,o;
fscanf(data2,"%d",&n);
for(int i=0;i<n;i++)
{
fscanf(data2,"%d%d%d",&s,&p,&o);
testkv.updateRemove_s2values(s,p,o);
}
gettimeofday(&end_time,NULL);
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
fclose(data2);
testkv.close_subID2values();
}
void test_subID2values_get_s2p()
{
cout<< "testing subID2values get s2p..." <<endl;
testkv.open_subID2values(KVstore::READ_WRITE_MODE);
timeval start_time,end_time;
gettimeofday(&start_time,NULL);
FILE *data2=fopen("./data2","r");
int n,s,p,o;
fscanf(data2,"%d",&n);
for(int i=0;i<n;i++)
{
fscanf(data2,"%d%d%d",&s,&p,&o);
int *plist=NULL;
int len;
testkv.getpreIDlistBysubID(s,plist,len);
delete[] plist;
}
gettimeofday(&end_time,NULL);
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
fclose(data2);
testkv.close_subID2values();
}
void test_subID2values_get_s2o()
{
cout<< "testing subID2values get s2o..." <<endl;
testkv.open_subID2values(KVstore::READ_WRITE_MODE);
timeval start_time,end_time;
gettimeofday(&start_time,NULL);
FILE *data2=fopen("./data2","r");
int n,s,p,o;
fscanf(data2,"%d",&n);
for(int i=0;i<n;i++)
{
fscanf(data2,"%d%d%d",&s,&p,&o);
int *olist;
int len;
testkv.getobjIDlistBysubID(s,olist,len);
delete[] olist;
}
gettimeofday(&end_time,NULL);
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
fclose(data2);
testkv.close_subID2values();
}
void test_subID2values_get_sp2o()
{
cout<< "testing subID2values get sp2o..." <<endl;
testkv.open_subID2values(KVstore::READ_WRITE_MODE);
timeval start_time,end_time;
gettimeofday(&start_time,NULL);
FILE *data2=fopen("./data2","r");
int n,s,p,o;
fscanf(data2,"%d",&n);
for(int i=0;i<n;i++)
{
fscanf(data2,"%d%d%d",&s,&p,&o);
int *olist;
int len;
testkv.getobjIDlistBysubIDpreID(s,p,olist,len);
delete[] olist;
}
gettimeofday(&end_time,NULL);
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
fclose(data2);
testkv.close_subID2values();
}
void test_subID2values_get_s2po()
{
cout<< "testing subID2values get s2po..." <<endl;
testkv.open_subID2values(KVstore::READ_WRITE_MODE);
timeval start_time,end_time;
gettimeofday(&start_time,NULL);
FILE *data2=fopen("./data2","r");
int n,s,p,o;
fscanf(data2,"%d",&n);
for(int i=0;i<n;i++)
{
fscanf(data2,"%d%d%d",&s,&p,&o);
int *polist;
int len;
testkv.getpreIDobjIDlistBysubID(s,polist,len);
delete[] polist;
}
gettimeofday(&end_time,NULL);
cout<< "time(s) : " << end_time.tv_sec-start_time.tv_sec + (end_time.tv_usec-start_time.tv_usec)/1000000.0 <<endl;
cout<< "memory(kb) : " << getMemkb() <<endl <<endl;
fclose(data2);
testkv.close_subID2values();
}
int main(int argc, char *argv[])
{
if(argc==2 && strcmp(argv[1],"generate")==0)
generate_test_data();
//string2id
test_string2id_insert();
test_string2id_search();
test_string2id_delete();
//id2string
test_id2string_insert();
test_id2string_search();
test_id2string_delete();
//subID2values
test_subID2values_insert();
test_subID2values_get_s2p();
test_subID2values_get_s2o();
test_subID2values_get_sp2o();
test_subID2values_get_s2po();
test_subID2values_delete();
return 0;
}