move the oldest gStore to the backup branch

This commit is contained in:
bookug 2016-09-16 15:19:28 +08:00
parent 1fce0907bb
commit 7bd3d5145c
553 changed files with 239894 additions and 35876 deletions

3
.debug/.gitignore vendored
View File

@ -1,3 +0,0 @@
*
!.gitignore

3
.objs/.gitignore vendored
View File

@ -1,3 +0,0 @@
*
!.gitignore

3
.tmp/.gitignore vendored
View File

@ -1,3 +0,0 @@
*
!.gitignore

38
Bstr/Bstr.cpp Normal file
View File

@ -0,0 +1,38 @@
/*
* Bstr.cpp
*
* Created on: 2014-6-20
* Author: liyouhuan
*/
#include "Bstr.h"
Bstr::Bstr(const char* _str, const int _len)
{
this->len = _len;
this->str = new char[len+1];
memcpy(this->str, _str, sizeof(char)*_len);
this->str[_len]='\0';
}
bool Bstr::operator > (const Bstr& _b_str)
{
return true;
}
bool Bstr::operator < (const Bstr& _b_str)
{
return true;
}
bool Bstr::operator == (const Bstr& _b_str)
{
return true;
}
bool Bstr::read(FILE* _fp)
{
return true;
}
int Bstr::write(FILE* _fp)
{
return 0;
}

29
Bstr/Bstr.h Normal file
View File

@ -0,0 +1,29 @@
/*
* Bstr.h
*
* Created on: 2014-6-20
* Author: liyouhuan
*/
#ifndef BSTR_H_
#define BSTR_H_
#include<iostream>
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
using namespace std;
class Bstr{
private:
char* str;
int len;
public:
Bstr(const char* _str, const int _len);
bool operator > (const Bstr& _b_str);
bool operator < (const Bstr& _b_str);
bool operator == (const Bstr& _b_str);
bool read(FILE* _fp);
int write(FILE* _fp);
};
#endif /* BSTR_H_ */

File diff suppressed because it is too large Load Diff

2771
Database/Database.cpp~ Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,57 +1,73 @@
/*=============================================================================
# Filename: Database.h
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-10-23 14:20
# Description: originally written by liyouhuan, modified by zengli and chenjiaqi
=============================================================================*/
/*
* database.h
*
* Created on: 2014-6-20
* Author: liyouhuan
*/
#ifndef _DATABASE_DATABASE_H
#define _DATABASE_DATABASE_H
#ifndef DATABASE_H_
#define DATABASE_H_
#include<iostream>
#include<string.h>
using namespace std;
#include "../Util/Util.h"
#include "../Util/Triple.h"
#include "Join.h"
#include "../Query/IDList.h"
#include "../Query/ResultSet.h"
#include "../Query/SPARQLquery.h"
#include "../Query/BasicQuery.h"
#include "../Triple/Triple.h"
#include "../Signature/SigEntry.h"
#include "../KVstore/KVstore.h"
#include "../VSTree/VSTree.h"
#include "../Parser/DBparser.h"
#include "../Parser/RDFParser.h"
#include "../Parser/SparqlParser.h"
#include "../Query/GeneralEvaluation.h"
#include "../util/util.h"
#include<stdio.h>
#include<sys/time.h>
#include "fstream"
class Database{
class Database
{
public:
static const bool debug_1 = false;
static const bool debug_2 = false;
static const bool only_sub2idpre2id = true;
static const bool debug_vstree = false;
static const int internal = 100*1000;
static FILE* fp_debug;
static void log(std::string _str);
void test();
void test_build_sig();
void test_join();
void printIDlist(int _i, int* _list, int _len, std::string _log);
void printPairList(int _i, int* _list, int _len, std::string _log);
//when encode EntitySig, one way uses STRING-hash, the other one uses ID-hash
//depending on this->encode_mode
static const int TRIPLE_NUM_MAX = 1000*1000*1000;
/* In order to differentiate the sub-part and literal-part of object
* let subid begin with 0, while literalid begins with LITERAL_FIRST_ID */
static const int LITERAL_FIRST_ID = 1000*1000*1000;
/* when encode EntitySig, one way uses STRING-hash, the other one uses ID-hash
* depending on this->encode_mode */
static const int STRING_MODE = 1;
static const int ID_MODE = 2;
Database();
Database(std::string _name);
void release(FILE* fp0);
~Database();
bool load();
bool unload();
bool query(const string _query, ResultSet& _result_set, FILE* _fp = stdout);
bool query(const string _query, ResultSet& _result_set);
//1. if subject of _triple doesn't exist,
//then assign a new subid, and insert a new SigEntry
//2. assign new tuple_id to tuple, if predicate or object doesn't exist before too;
//3. if subject exist, update SigEntry, and update spo, ops... etc. if needed
/*
* 1. if subject of _triple doesn't exist,
* then assign a new subid, and insert a new SigEntry
* 2. assign new tuple_id to tuple, if predicate or object doesn't exist before too;
* 3. if subject exist, update SigEntry, and update spo, ops... etc. if needed
* 4.
* */
bool insert(const string& _insert_rdf_file);
bool remove(const string& _rdf_file);
@ -81,14 +97,13 @@ private:
VSTree* vstree;
KVstore* kvstore;
Join* join;
//metadata of this database: sub_num, pre_num, obj_num, literal_num, etc.
/* metadata of this database: sub_num, pre_num, obj_num, literal_num, etc. */
string db_info_file;
//six tuples: <sub pre obj sid pid oid>
/* six tuples: <sub pre obj sid pid oid> */
string six_tuples_file;
//B means binary
/* B means binary */
string signature_binary_file;
bool saveDBInfoFile();
@ -96,28 +111,30 @@ private:
string getStorePath();
//encode relative signature data of all Basic Graph Query, who union together into SPARQLquery
/* encode relative signature data of all Basic Graph Query, who union together into SPARQLquery */
void buildSparqlSignature(SPARQLquery & _sparql_q);
//encode Triple into Subject EntityBitSet
/* encode Triple into Subject EntityBitSet */
bool encodeTriple2SubEntityBitSet(EntityBitSet& _bitset, const Triple* _p_triple);
//encode Triple into Object EntityBitSet
/* encode Triple into Object EntityBitSet */
bool encodeTriple2ObjEntityBitSet(EntityBitSet& _bitset, const Triple* _p_triple);
bool calculateEntityBitSet(int _sub_id, EntityBitSet & _bitset);
//check whether the relative 3-tuples exist
//usually, through sp2olist
/* check whether the relative 3-tuples exist
* usually, through sp2olist */
bool exist_triple(int _sub_id, int _pre_id, int _obj_id);
//* _rdf_file denotes the path of the RDF file, where stores the rdf data
//* there are many step in this function, each one responds to an sub-function
//* 1. map sub2id and pre2id
//* 2. map literal2id and encode RDF data into signature,
//* storing in binary file: this->getSignatureBFile(), the order responds to subID
//* also, store six_tuples in file: this->getSixTuplesFile()
//* 3. build: subID2objIDlist, <subIDpreID>2objIDlist subID2<preIDobjID>list
//* 4. build: objID2subIDlist, <objIDpreID>2subIDlist objID2<preIDsubID>list
/*
* _rdf_file denotes the path of the RDF file, where stores the rdf data
* there are many step in this function, each one responds to an sub-function
* 1. map sub2id and pre2id
* 2. map literal2id and encode RDF data into signature,
* storing in binary file: this->getSignatureBFile(), the order responds to subID
* also, store six_tuples in file: this->getSixTuplesFile()
* 3. build: subID2objIDlist, <subIDpreID>2objIDlist subID2<preIDobjID>list
* 4. build: objID2subIDlist, <objIDpreID>2subIDlist objID2<preIDsubID>list
* */
//encodeRDF_new invoke new rdfParser to solve task 1 & 2 in one time scan.
bool encodeRDF(const string _rdf_file);
bool encodeRDF_new(const string _rdf_file);
@ -129,34 +146,43 @@ private:
bool sub2id_pre2id(const string _rdf_file, int**& _p_id_tuples, int & _id_tuples_max);
bool literal2id_RDFintoSignature(const string _rdf_file, int** _p_id_tuples, int _id_tuples_max);
bool s2o_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max);
bool o2s_o2ps_op2s(int** _p_id_tuples, int _id_tuples_max);
//NOTICE: below is the new one
//bool s2p_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max);
bool s2p_s2o_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max);
//bool s2p_s2o_s2po_sp2o_sp2n(int** _p_id_tuples, int _id_tuples_max);
bool o2p_o2s_o2ps_op2s(int** _p_id_tuples, int _id_tuples_max);
//bool o2p_o2s_o2ps_op2s_op2n(int** _p_id_tuples, int _id_tuples_max);
bool p2s_p2o_p2so(int** _p_id_tuples, int _id_tuples_max);
//bool p2s_p2o_p2so_p2n(int** _p_id_tuples, int _id_tuples_max);
bool so2p_s2o(int** _p_id_tuples, int _id_tuples_max);
bool s2o_sp2o_s2po(int** _p_id_tuples, int _id_tuples_max);
bool o2s_op2s_o2ps(int** _p_id_tuples, int _id_tuples_max);
static int _spo_cmp(const void* _a, const void* _b);
static int _ops_cmp(const void* _a, const void* _b);
static int _pso_cmp(const void* _a, const void* _b);
static int _sop_cmp(const void* _a, const void* _b);
bool objIDIsEntityID(int _id);
/*
* join on the vector of CandidateList, available after retrieve from the VSTREE
* and store the resut in _result_set
* */
bool join(SPARQLquery& _sparql_query);
//* join on the vector of CandidateList, available after retrieve from the VSTREE
//* and store the resut in _result_set
void filter_before_join(BasicQuery* basic_query);
void literal_edge_filter(BasicQuery* basic_query, int _var_i);
void preid_filter(BasicQuery* basic_query, int _var_i);
void only_pre_filter_after_join(BasicQuery* basic_query);
void add_literal_candidate(BasicQuery* basic_query);
bool join_basic(BasicQuery* _basic_query);
bool join(vector<int*>& _result_list, int _var_id, int _pre_id, int _var_id2, const char _edge_type,
int _var_num, bool shouldAddLiteral, IDList& _can_list);
//bool join(vector<int*>& _result_list, int _var_id, int _pre_id, int _var_id2, const char _edge_type, int _var_num, bool shouldAddLiteral, IDList& _can_list);
bool select(vector<int*>& _result_list, int _var_id, int _pre_id, int _var_id2, const char _edge_type, int _var_num);
//bool select(vector<int*>& _result_list, int _var_id, int _pre_id, int _var_id2, const char _edge_type, int _var_num);
void mapVarVec(vector<string> &a, vector<string> &b, vector<int> &mapvar, int &total_vars);
void mergeJoin(SPARQLquery::TempResult &a, SPARQLquery::TempResult &b, SPARQLquery::TempResult &r);
void mergeUnion(SPARQLquery::TempResult &a, SPARQLquery::TempResult &b, SPARQLquery::TempResult &r);
void mergeLeftOuterJoin(SPARQLquery::TempResult &a, SPARQLquery::TempResult &b, SPARQLquery::TempResult &r);
void doFilter(SPARQLquery::TempResult &a, SPARQLquery::FilterTree *ft, SPARQLquery::TempResult &r);
void getFilterStr(string &str , vector<int> &r, map<string, int> &dict, string &ftarg);
bool matchFilter(vector<int> &r,map<string, int> &dict, SPARQLquery::FilterTree *ft);
//get the final string result_set from SPARQLquery
int genEvaPlan(SPARQLquery::PatternGroup& pg, SPARQLquery& query, int id);
void doEvaPlan(SPARQLquery &query);
/* get the final string result_set from SPARQLquery */
bool getFinalResult(SPARQLquery& _sparql_q, ResultSet& _result_set);
};
#endif //_DATABASE_DATABASE_H
#endif /* DATABASE_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -1,238 +0,0 @@
/*=============================================================================
# Filename: Join.h
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-12-13 16:05
# Description: design join strategies and select/cost modules
=============================================================================*/
#ifndef _JOIN_JOIN_H
#define _JOIN_JOIN_H
#include "../Query/IDList.h"
#include "../Query/BasicQuery.h"
#include "../Query/SPARQLquery.h"
#include "../KVstore/KVstore.h"
#include "../Util/Util.h"
//BETTER?:place multi_join and index_join in separated files
typedef vector<int> RecordType;
typedef vector<int>::iterator RecordIterator;
typedef list<RecordType> TableType;
typedef list<RecordType>::iterator TableIterator;
typedef list<RecordType>::reverse_iterator TableReverseIterator;
//typedef list< vector<int> > TableType;
//typedef list< vector<int> >::iterator TableIterator;
//typedef list< vector<int> >::reverse_iterator TableReverseIterator;
typedef vector< vector<int*> > IdLists;
typedef vector< vector<int> > IdListsLen;
typedef struct IndexItem
{
int value;
bool isValid; //needed for final travelling
//NOTICE: the size of vector is expected to be small
//the order in vector must be same as in IndexList vector
vector< list< list<struct IndexItem>::iterator > > travel;
vector< set<int> > check;
//map< int, list < list<struct IndexItem>::iterator > > links; //direct next index list id and linking
//map< int, set <int> > check; //indirect previous index list id and verifying
IndexItem()
{
this->value = -1;
this->isValid = false;
}
IndexItem(int _val)
{
this->value = _val;
this->isValid = true;
}
}IndexItem;
typedef struct IndexList
{
//int next;
//NOTICE:the list should be ordered at the beginning
list<IndexItem> candidates;
list<IndexItem>::iterator border; //used to divide valid and invalid area
bool prepared; //find and set all invalid eles restricted by subtree in travelling
int position; //current neighbor to travel
vector<int> travel_map; //the mapping between links position and IndexList id
vector<int> check_map; //the mapping between check position and IndexList id
IndexList()
{
//this->next = -1;
this->prepared = false;
this->position = 0;
}
bool end()
{
return this->position == (int)this->travel_map.size();
}
int next()
{
return this->travel_map[this->position++];
}
//NOTICE:we can not use binary-search in list, but this search method maybe slow
//BETTER?:adjust the list to binary-tree or other struture?
list<IndexItem>::iterator search(int _val)
{
for(list<IndexItem>::iterator it = this->candidates.begin(); it != this->border; ++it)
{
if(it->value == _val)
return it;
}
return this->border;
}
}IndexList;
typedef struct Satellite
{
int id;
int* idlist;
int idlist_len;
Satellite(int _id, int* _idlist, int _idlist_len)
{
this->id = _id;
this->idlist = _idlist;
this->idlist_len = _idlist_len;
}
}Satellite;
typedef list<IndexItem> ItemList;
typedef list<IndexItem>::iterator ItemListIterator;
typedef list< list<struct IndexItem>::iterator > IteratorList;
//Database new Join and pass something like kvstore
class Join
{
private:
int start_id;
int var_num;
//bool* dealed_triple;
BasicQuery* basic_query;
KVstore* kvstore;
//used by score_node for parameters
static const unsigned PARAM_DEGREE = 1;
static const unsigned PARAM_SIZE = 100000;
static const unsigned PARAM_DENSE = 1;
static const double JUDGE_LIMIT = 0.5;
static const int LIMIT_CANDIDATE_LIST_SIZE = 1000;
//BETTER?:predefine size to avoid copy cost
TableType current_table;
TableIterator new_start; //keep to end() as default
//list<bool> table_row_new;
//keep the mapping for disordered ids in vector<int> table
int* id2pos;
int id_pos; //the num of id put into id2pos currently
int* pos2id;
bool* dealed_triple;
stack<int> mystack;
vector<int*>* result_list;
vector<Satellite> satellites;
int* record;
int record_len;
void init(BasicQuery* _basic_query);
void clear();
void add_id_pos_mapping(int _id);
void reset_id_pos_mapping();
//judge which method should be used according to
//the size of candidates and structure of quering graph
int judge(int _smallest, int _biggest);
//select the start point and search order
void select();
//score the cost to link two tables and the efficience
//of filtering
//int score(List1, List2);
//score the node according to degree and size
double score_node(unsigned _degree, unsigned _size);
void toStartJoin();
bool filter_before_join();
bool constant_edge_filter(int _var_i);
void preid_filter(int _var_i);
bool only_pre_filter_after_join();
void add_literal_candidate();
bool pre_var_handler();
//bool filterBySatellites(int _var, int _ele);
bool filterBySatellites(int _var);
bool allFilterByPres();
void generateAllSatellites();
void cartesian(int pos, int end);
//functions for help
//copy/add to the end of current_table and set true
void add_new_to_results(TableIterator it, int id);
//void set_results_old(list<bool>::iterator it);
int choose_next_node(int id);
bool is_literal_var(int id);
bool is_literal_ele(int _id);
void copyToResult();
//BETTER?:change these params to members in class
void acquire_all_id_lists(IdLists& _id_lists, IdListsLen& _id_lists_len, IDList& _can_list, vector<int>& _edges, int _id, int _can_list_size);
bool if_prepare_idlist(int _can_list_size, bool _is_literal);
bool new_join_with_multi_vars_prepared(IdLists& _id_lists, IdListsLen& _id_lists_len, vector<int>& _edges, IDList& _can_list, int _can_list_size);
bool new_join_with_multi_vars_not_prepared(vector<int>& _edges, IDList& _can_list, int _can_list_size, int _id, bool _is_literal);
bool multi_join();
//================================================================================================
//The index join method saves the memory cost because 2m+2mn < 3mn,
//and time may be reduced if the pre-process is not too costly
//because we can reuse the links other than recompute in temporal table
//New struct is needed for node, i.e. list<bool, int, list<iterator> >,
//because we may have to delete, but how can we know if an iterator
//is valid if the one it points to is removed?(remove if the other is removed; using end())
//1. based on edges: process each time only in valid area(already
//macthed with others, invalid is removed), and finally it must be
//all ok, just copy to result_list. We should select the edge order
//to better the efficiency, but how can we keep only a neighbor links
//set if we want to save memory?(ensure all can be linked later)
//2. based on points: search deeply like multi-index-join, only a
//neighbor links set is kept for a node(not every edge), so memory
//cost is low. Finally, travel around along valid iterator, copy...
IndexList* index_lists;
void buildIndexLists();
bool travel_init(int _lid);
bool index_link(int _nid, int _idx);
bool index_filter(int _nid, int _idx);
bool table_travel(int _id1, int _id2);
bool table_check(int _id1, int _id2);
bool index_travel_one();
bool index_travel_two();
bool index_travel();
bool index_join();
//NOTICE:this is only used to join a BasicQuery
bool join();
public:
Join();
Join(KVstore* _kvstore);
//these functions can be called by Database
bool join_sparql(SPARQLquery& _sparql_query);
bool join_basic(BasicQuery* _basic_query);
~Join();
};
#endif //_JOIN_JOIN_H

View File

@ -1,363 +0,0 @@
/*=============================================================================
# Filename: Strategy.cpp
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2016-05-07 16:31
# Description: implement functions in Strategy.h
=============================================================================*/
#include "Strategy.h"
using namespace std;
Strategy::Strategy()
{
this->method = 0;
this->kvstore = NULL;
this->vstree = NULL;
//this->prepare_handler();
}
Strategy::Strategy(KVstore* _kvstore, VSTree* _vstree)
{
this->method = 0;
this->kvstore = _kvstore;
this->vstree = _vstree;
//this->prepare_handler();
}
Strategy::~Strategy()
{
//delete[] this->dispatch;
}
//void
//Strategy::prepare_handler()
//{
//this->dispatch = new QueryHandler[Strategy::QUERY_HANDLER_NUM];
//this->dispatch[0] = Strategy::handler0;
//}
//NOTICE: 2-triple case ?s1 p1 c0 ?s2 p2 c0 is viewed as an unconnected graph
//however, this can be dealed due to several basicquery and linking
bool
Strategy::handle(SPARQLquery& _query)
{
#ifdef MULTI_INDEX
Util::logging("IN GeneralEvaluation::handle");
vector<BasicQuery*>& queryList = _query.getBasicQueryVec();
// enumerate each BasicQuery and retrieve their variables' mapping entity in the VSTree.
vector<BasicQuery*>::iterator iter=queryList.begin();
for(; iter != queryList.end(); iter++)
{
this->method = 0;
vector<int*>& result_list = (*iter)->getResultList();
int select_var_num = (*iter)->getSelectVarNum();
int varNum = (*iter)->getVarNum(); //the num of vars needing to be joined
int total_num = (*iter)->getTotalVarNum();
int pre_varNum = (*iter)->getPreVarNum();
if((*iter)->getTripleNum() == 1 && pre_varNum == 1)
{
Triple triple = (*iter)->getTriple(0);
int* id_list = NULL;
int id_list_len = 0;
result_list.clear();
if(total_num == 2)
{
//TODO:consider special case, select ?s (?p) ?o where { ?s ?p ?o . }
//filter and join is too costly, should enum all predicates and use p2so
//maybe the selected vars are ?s (?p) or ?o (?p)
cerr << "not supported now!" << endl;
}
else if(total_num == 1)
{
//TODO:if just select s/o, use o2s/s2o
//if only p is selected, use s2p or o2p
//only if both s/o and p are selected, use s2po or o2ps
if(triple.subject[0] != '?') //constant
{
int sid = (this->kvstore)->getIDByEntity(triple.subject);
this->kvstore->getpreIDobjIDlistBysubID(sid, id_list, id_list_len);
}
else if(triple.object[0] != '?') //constant
{
int oid = (this->kvstore)->getIDByEntity(triple.object);
if(oid == -1)
{
oid = (this->kvstore)->getIDByLiteral(triple.object);
}
this->kvstore->getpreIDsubIDlistByobjID(oid, id_list, id_list_len);
}
//always place s/o before p in result list
for(int i = 0; i < id_list_len; i += 2)
{
int* record = new int[2]; //2 vars selected
record[1] = id_list[i]; //for the pre var
record[0] = id_list[i+1]; //for the s/o var
result_list.push_back(record);
}
}
else if(total_num == 0) //only ?p
{
//just use so2p
int sid = (this->kvstore)->getIDByEntity(triple.subject);
int oid = (this->kvstore)->getIDByEntity(triple.object);
if(oid == -1)
{
oid = (this->kvstore)->getIDByLiteral(triple.object);
}
this->kvstore->getpreIDlistBysubIDobjID(sid, oid, id_list, id_list_len);
//copy to result list
for(int i = 0; i < id_list_len; ++i)
{
int* record = new int[1];
record[0] = id_list[i];
result_list.push_back(record);
}
}
delete[] id_list;
continue;
}
if(pre_varNum == 0 && (*iter)->getTripleNum() == 1) //only one triple and no predicates
{
//only one variable and one triple: ?s pre obj or sub pre ?o
if(total_num == 1)
{
this->method = 1;
}
//only two vars: ?s pre ?o
else if(total_num == 2)
{
if(varNum == 1) //the selected id should be 0
{
this->method = 2;
}
else //==2
{
this->method = 3;
}
}
//cerr << "this BasicQuery use query strategy 2" << endl;
//cerr<<"Final result size: "<<(*iter)->getResultList().size()<<endl;
//continue;
}
//QueryHandler dispatch;
//dispatch[0] = handler0;
switch(this->method)
{
case 0:
this->handler0(*iter, result_list);
break;
case 1:
this->handler1(*iter, result_list);
break;
case 2:
this->handler2(*iter, result_list);
break;
case 3:
this->handler3(*iter, result_list);
break;
default:
cerr << "not support this method" << endl;
}
cerr<<"Final result size: "<<(*iter)->getResultList().size()<<endl;
//BETTER: use function pointer array in C++ class
}
#else
cerr << "this BasicQuery use original query strategy" << endl;
long tv_handle = Util::get_cur_time();
(this->vstree)->retrieve(_query);
long tv_retrieve = Util::get_cur_time();
cout << "after Retrieve, used " << (tv_retrieve - tv_handle) << "ms." << endl;
this->join = new Join(kvstore);
this->join->join_sparql(_query);
delete this->join;
long tv_join = Util::get_cur_time();
cout << "after Join, used " << (tv_join - tv_retrieve) << "ms." << endl;
#endif
Util::logging("OUT Strategy::handle");
return true;
}
void
Strategy::handler0(BasicQuery* _bq, vector<int*>& _result_list)
{
long before_filter = Util::get_cur_time();
cerr << "this BasicQuery use query strategy 0" << endl;
//BETTER:not all vars in join filtered by vstree
//(A)-B-c: B should by vstree, then by c, but A should be generated in join(first set A as not)
//if A not in join, just filter B by pre
//divided into star graphs, join core vertices, generate satellites
//join should also start from a core vertex(neighbor can be constants or vars) if available
//
//QUERY: is there any case that a node should be retrieved by other index?(instead of vstree or generate whne join)
//
//we had better treat 1-triple case(no ?p) as special, and then in other cases, core vertex exist(if connected)
//However, if containing ?p and 1-triple, we should treat it also as a special case, or select a variable as core vertex
//and retrieved (for example, ?s ?p o or s ?p ?o, generally no core vertex in these cases)
long tv_handle = Util::get_cur_time();
int varNum = _bq->getVarNum(); //the num of vars needing to be joined
for(int i = 0; i < varNum; ++i)
{
if(_bq->if_need_retrieve(i) == false)
continue;
bool flag = _bq->isLiteralVariable(i);
const EntityBitSet& entityBitSet = _bq->getVarBitSet(i);
IDList* idListPtr = &( _bq->getCandidateList(i) );
this->vstree->retrieveEntity(entityBitSet, idListPtr);
if(!flag)
{
_bq->setReady(i);
}
//the basic query should end if one non-literal var has no candidates
if(idListPtr->size() == 0 && !flag)
{
break;
}
}
//if(_bq->isReady(0))
//cout<<"error: var 0 is ready?"<<endl;
//TODO:end directly if one is empty!
long tv_retrieve = Util::get_cur_time();
cout << "after Retrieve, used " << (tv_retrieve - tv_handle) << "ms." << endl;
Join *join = new Join(kvstore);
join->join_basic(_bq);
delete join;
long tv_join = Util::get_cur_time();
cout << "after Join, used " << (tv_join - tv_retrieve) << "ms." << endl;
}
void
Strategy::handler1(BasicQuery* _bq, vector<int*>& _result_list)
{
long before_filter = Util::get_cur_time();
cerr << "this BasicQuery use query strategy 1" << endl;
//int neighbor_id = (*_bq->getEdgeNeighborID(0, 0); //constant, -1
char edge_type = _bq->getEdgeType(0, 0);
int triple_id = _bq->getEdgeID(0, 0);
Triple triple = _bq->getTriple(triple_id);
int pre_id = _bq->getEdgePreID(0, 0);
int* id_list = NULL;
int id_list_len = 0;
if(edge_type == Util::EDGE_OUT)
{
//cerr<<"edge out!!!"<<endl;
int nid = (this->kvstore)->getIDByEntity(triple.object);
if(nid == -1)
{
nid = (this->kvstore)->getIDByLiteral(triple.object);
}
this->kvstore->getsubIDlistByobjIDpreID(nid, pre_id, id_list, id_list_len);
}
else
{
//cerr<<"edge in!!!"<<endl;
this->kvstore->getobjIDlistBysubIDpreID(this->kvstore->getIDByEntity(triple.subject), pre_id, id_list, id_list_len);
}
long after_filter = Util::get_cur_time();
cerr << "after filter, used " << (after_filter - before_filter) << "ms" << endl;
_result_list.clear();
//cerr<<"now to copy result to list"<<endl;
for(int i = 0; i < id_list_len; ++i)
{
int* record = new int[1]; //only this var is selected
record[0] = id_list[i];
//cerr<<this->kvstore->getEntityByID(record[0])<<endl;
_result_list.push_back(record);
}
long after_copy = Util::get_cur_time();
cerr<<"after copy to result list: used "<<(after_copy-after_filter)<<" ms"<<endl;
delete[] id_list;
cerr<<"Final result size: "<<_result_list.size()<<endl;
}
void
Strategy::handler2(BasicQuery* _bq, vector<int*>& _result_list)
{
long before_filter = Util::get_cur_time();
cerr << "this BasicQuery use query strategy 2" << endl;
int triple_id = _bq->getEdgeID(0, 0);
Triple triple = _bq->getTriple(triple_id);
int pre_id = _bq->getEdgePreID(0, 0);
int var1_id = _bq->getIDByVarName(triple.subject);
int var2_id = _bq->getIDByVarName(triple.object);
int* id_list = NULL;
int id_list_len = 0;
if(var1_id == 0) //subject var selected
{
//use p2s directly
this->kvstore->getsubIDlistBypreID(pre_id, id_list, id_list_len);
}
else if(var2_id == 0) //object var selected
{
//use p2o directly
this->kvstore->getobjIDlistBypreID(pre_id, id_list, id_list_len);
}
else
{
cerr << "ERROR in Database::handle(): no selected var!"<<endl;
}
long after_filter = Util::get_cur_time();
cerr << "after filter, used " << (after_filter - before_filter) << "ms" << endl;
_result_list.clear();
for(int i = 0; i < id_list_len; ++i)
{
int* record = new int[1]; //only one var
record[0] = id_list[i];
_result_list.push_back(record);
}
long after_copy = Util::get_cur_time();
cerr<<"after copy to result list: used "<<(after_copy-after_filter)<<" ms"<<endl;
delete[] id_list;
cerr<<"Final result size: "<<_result_list.size()<<endl;
}
void
Strategy::handler3(BasicQuery* _bq, vector<int*>& _result_list)
{
long before_filter = Util::get_cur_time();
cerr << "this BasicQuery use query strategy 3" << endl;
int triple_id = _bq->getEdgeID(0, 0);
Triple triple = _bq->getTriple(triple_id);
int pre_id = _bq->getEdgePreID(0, 0);
int* id_list = NULL;
int id_list_len = 0;
this->kvstore->getsubIDobjIDlistBypreID(pre_id, id_list, id_list_len);
int var1_id = _bq->getIDByVarName(triple.subject);
int var2_id = _bq->getIDByVarName(triple.object);
long after_filter = Util::get_cur_time();
cerr << "after filter, used " << (after_filter - before_filter) << "ms" << endl;
_result_list.clear();
for(int i = 0; i < id_list_len; i += 2)
{
int* record = new int[2]; //2 vars and selected
record[var1_id] = id_list[i];
record[var2_id] = id_list[i+1];
_result_list.push_back(record);
}
long after_copy = Util::get_cur_time();
cerr<<"after copy to result list: used "<<(after_copy-after_filter)<<" ms"<<endl;
delete[] id_list;
cerr<<"Final result size: "<<_result_list.size()<<endl;
}

View File

@ -1,47 +0,0 @@
/*=============================================================================
# Filename: Strategy.h
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2016-05-07 16:28
# Description:
=============================================================================*/
#ifndef _DATABASE_STRATEGY_H
#define _DATABASE_STRATEGY_H
#include "../Util/Util.h"
#include "../Util/Triple.h"
#include "Join.h"
#include "../Query/IDList.h"
#include "../Query/SPARQLquery.h"
#include "../Query/BasicQuery.h"
#include "../KVstore/KVstore.h"
#include "../VSTree/VSTree.h"
class Strategy
{
public:
Strategy();
Strategy(KVstore*, VSTree*);
~Strategy();
//select efficient strategy to do the sparql query
bool handle(SPARQLquery&);
private:
int method;
KVstore* kvstore;
VSTree* vstree;
void handler0(BasicQuery*, vector<int*>&);
void handler1(BasicQuery*, vector<int*>&);
void handler2(BasicQuery*, vector<int*>&);
void handler3(BasicQuery*, vector<int*>&);
//QueryHandler *dispatch;
//void prepare_handler();
};
static const unsigned QUERY_HANDLER_NUM = 4;
typedef void (Strategy::*QueryHandler[QUERY_HANDLER_NUM]) (BasicQuery*, vector<int*>&);
//QueryHandler dispatch;
#endif //_DATABASE_STRATEGY_H

View File

@ -15,7 +15,7 @@ string filePath_sID2s;
string filePath_o2sID;
string filePath_opID2sID;
FILE * _log_btree;
// 在中间结点中插入键
// 在中间结点中插入键
bool mItnlNode::Insert( mNode * pNode)
{
if(getCount() >= MAXNUM_KEY)
@ -32,7 +32,7 @@ bool mItnlNode::Insert( mNode * pNode)
printf("err in insert itnl\n");
system("pause"); exit(0);
}
// 在要插入的点是在最右端时要特殊处理, solved
// 在要插入的点是在最右端时要特殊处理, solved
for(int i = getCount() + 1; i > _ikey; i --)
{
this ->setElement(i, this ->getElement(i - 1) );
@ -50,13 +50,13 @@ bool mItnlNode::Insert( mNode * pNode)
return true;
}
// 在中间结点中删除键,以及该键后的指针
// 在中间结点中删除键,以及该键后的指针
int mItnlNode::Delete(const KeyType & _keytype)
{
int _index = -1;
int _ibegin = 1, _iend = getCount();
int _imiddle;
// 二分查找index
// 二分查找index
while(_ibegin < _iend)
{
_imiddle = (_ibegin + _iend) / 2;
@ -81,9 +81,9 @@ int mItnlNode::Delete(const KeyType & _keytype)
}
}
//任何一个不是根节点或不是叶子节点的节点必须保证包含至少两个元素
// 对于insert index = 1 的时候一定是整棵树的最右边!!!
// delete则不同
//任何一个不是根节点或不是叶子节点的节点必须保证包含至少两个元素
// 对于insert index = 1 的时候一定是整棵树的最右边!!!
// delete则不同
if(_index == 1 && this ->getFather() != NULL)
{
mItnlNode * itnl_father = (mItnlNode *)(this ->getFather() );
@ -91,7 +91,7 @@ int mItnlNode::Delete(const KeyType & _keytype)
KeyType & i_data = this ->getKey(2);
while(itnl_father != NULL)
{
// 此处同insert不同审慎其不同之处
// 此处同insert不同审慎其不同之处
int tmp_key = itnl_father ->iExist(f_data);
if(tmp_key < 1)
{
@ -142,7 +142,7 @@ KeyType & mItnlNode::Split(mItnlNode* pNode)
return (pNode ->getElement(1)).mKey;
}
// 结合结点,把指定中间结点的数据全部剪切到本中间结点
// 结合结点,把指定中间结点的数据全部剪切到本中间结点
bool mItnlNode::Combine(mItnlNode * pNode)
{
if(this ->getCount() + pNode ->getCount() > MAXNUM_KEY)
@ -156,7 +156,7 @@ bool mItnlNode::Combine(mItnlNode * pNode)
return true;
}
// 从另一结点移一个元素到本结点
// 从另一结点移一个元素到本结点
bool mItnlNode::MoveOneElement(mNode* pNode)
{
@ -164,9 +164,9 @@ bool mItnlNode::MoveOneElement(mNode* pNode)
return false;
}
// 清除叶子结点中的数据
// 清除叶子结点中的数据
// 在叶子结点中插入数据
// 在叶子结点中插入数据
bool mLeafNode::Insert(const mleafdata & _leafdata)
{
const KeyType & data = _leafdata.mData;
@ -179,9 +179,9 @@ bool mLeafNode::Insert(const mleafdata & _leafdata)
printf("err count too large\n");
return false;
}
// 返回i data 介于i - 1 与 i 之间, 要放在i 上
// 返回i data 介于i - 1 与 i 之间, 要放在i 上
int _i_insert = this ->iInsert(data);
// 还要考虑仅根节点是叶子节点的情况, 需要再加个条件
// 还要考虑仅根节点是叶子节点的情况, 需要再加个条件
if(_i_insert == 1 && this ->getFather() != NULL)
{
mItnlNode * _pFather = (mItnlNode *)(this ->getFather());
@ -196,7 +196,7 @@ bool mLeafNode::Insert(const mleafdata & _leafdata)
system("pause");
exit(0);
}
//同步使得内存位等其它位失效, 策略失策, 弥补之。。
//同步使得内存位等其它位失效, 策略失策, 弥补之。。
_pFather ->setKey(_ikey, _leafdata.mData);
_pFather ->setMemory(_ikey);
_pFather ->setModify();
@ -216,10 +216,10 @@ bool mLeafNode::Insert(const mleafdata & _leafdata)
}
/*
* key的下标 -1
*
* insert不同
* insert若出现最左端必然是整棵树的最左端
* key的下标 -1
*
* insert不同
* insert若出现最左端必然是整棵树的最左端
*/
int mLeafNode::Delete(KeyType & _keytype)
{
@ -249,9 +249,9 @@ int mLeafNode::Delete(KeyType & _keytype)
_ibegin = _imiddle;
}
}
//任何一个不是根节点或不是叶子节点的节点必须保证包含至少两个元素
// 对于insert index = 1 的时候一定是整棵树的最右边!!!
// delete则不同
//任何一个不是根节点或不是叶子节点的节点必须保证包含至少两个元素
// 对于insert index = 1 的时候一定是整棵树的最右边!!!
// delete则不同
if(_index == 1 && this ->getFather() != NULL)
{
mItnlNode * itnl_father = (mItnlNode *)(this ->getFather() );
@ -259,7 +259,7 @@ int mLeafNode::Delete(KeyType & _keytype)
KeyType & i_data = this ->getKey(2);
while(itnl_father != NULL)
{
// 此处同insert不同审慎其不同之处
// 此处同insert不同审慎其不同之处
int tmp_key = itnl_father ->iExist(f_data);
if(tmp_key < 1)
{
@ -292,7 +292,7 @@ int mLeafNode::Delete(KeyType & _keytype)
}
return -1;
}
//重载delete of leaf
//重载delete of leaf
int mLeafNode::Delete(KeyType & _keytype, char partval[], int & pvFlag)
{
int _index = -1;
@ -334,12 +334,12 @@ int mLeafNode::Delete(KeyType & _keytype, char partval[], int & pvFlag)
if(pvFlag == FLAG_NO_ZERO)
return _index;
//如果删除成功并且元素变为空, 则继续删除对应的key
//如果删除成功并且元素变为空, 则继续删除对应的key
//任何一个不是根节点或不是叶子节点的节点必须保证包含至少两个元素
// 对于insert index = 1 的时候一定是整棵树的最右边!!!
// delete则不同
//任何一个不是根节点或不是叶子节点的节点必须保证包含至少两个元素
// 对于insert index = 1 的时候一定是整棵树的最右边!!!
// delete则不同
if(_index == 1 && this ->getFather() != NULL)
{
mItnlNode * itnl_father = (mItnlNode *)(this ->getFather() );
@ -347,7 +347,7 @@ int mLeafNode::Delete(KeyType & _keytype, char partval[], int & pvFlag)
KeyType & i_data = this ->getKey(2);
while(itnl_father != NULL)
{
// 此处同insert不同审慎其不同之处
// 此处同insert不同审慎其不同之处
int tmp_key = itnl_father ->iExist(f_data);
if(tmp_key < 1)
{
@ -379,7 +379,7 @@ int mLeafNode::Delete(KeyType & _keytype, char partval[], int & pvFlag)
// 分裂叶子结点,把本叶子结点的后一半数据剪切到指定的叶子结点中
// 分裂叶子结点,把本叶子结点的后一半数据剪切到指定的叶子结点中
KeyType & mLeafNode::Split(mLeafNode * pNode)
{
for(int i = ORDER_V + 1; i <= MAXNUM_KEY; i ++)
@ -393,7 +393,7 @@ KeyType & mLeafNode::Split(mLeafNode * pNode)
return (pNode ->getElement(1)).mData;
}
// 结合结点,把指定叶子结点的数据全部剪切到本叶子结点
// 结合结点,把指定叶子结点的数据全部剪切到本叶子结点
bool mLeafNode::Combine(mLeafNode * pNode)
{
int this_count = this ->getCount();
@ -405,7 +405,7 @@ bool mLeafNode::Combine(mLeafNode * pNode)
}
return false;
}
// 查找对应的叶子结点
// 查找对应的叶子结点
mLeafNode* BPlusTree::SearchLeafNode(const KeyType & data)const
{
mNode * pNode = mRoot;
@ -415,15 +415,15 @@ mLeafNode* BPlusTree::SearchLeafNode(const KeyType & data)const
}
else// no check
{
/*
* while
*
*
* pNode break
* pNode指向叶子节点
*
* Search ,
*/
/*
* while
*
*
* pNode break
* pNode指向叶子节点
*
* Search ,
*/
// int _floor = 1;
while(pNode ->getType() == NODE_TYPE_INTERNAL)
{
@ -461,8 +461,8 @@ mLeafNode* BPlusTree::SearchLeafNode(const KeyType & data)const
}
return NULL;
}
// 在树中查找数据
bool BPlusTree::Search(KeyType & data, mleafdata & _ret)//增加一参数, 用于接收查找过程中进入的叶子节点
// 在树中查找数据
bool BPlusTree::Search(KeyType & data, mleafdata & _ret)//增加一参数, 用于接收查找过程中进入的叶子节点
{
mLeafNode * _pLeaf = SearchLeafNode(data);
int _ikey = _pLeaf ->iExist(data);
@ -521,7 +521,7 @@ bool BPlusTree::Insert(const mleafdata & _leafdata)
long long int _addr_newleaf = mblockQueue.Pop();
_pNewLeaf ->setAddrFB(_addr_newleaf);
// _key_tmp 也就将是_pnewleaf的第一个元素的key
// _key_tmp 也就将是_pnewleaf的第一个元素的key
// set modified in split;
if(! _pOldLeaf ->getModify())
{
@ -535,8 +535,8 @@ bool BPlusTree::Insert(const mleafdata & _leafdata)
if(_pFather == NULL)
{
// _pOldLeaf以前是根节点要把offset = 0 让出来
// 还存在占用外存链的情况下则需要释放外存链
// _pOldLeaf以前是根节点要把offset = 0 让出来
// 还存在占用外存链的情况下则需要释放外存链
// if(!preModified)
// {
// DelDisk(mfp, 0l, mblockQueue);
@ -545,7 +545,7 @@ bool BPlusTree::Insert(const mleafdata & _leafdata)
_pOldLeaf ->setAddrFB(_addr_new);
// setmodified in initial;
mItnlNode * _pItnl = new mItnlNode;
// 分配新的首地址ַ
// 分配新的首地址
long long int _addr_root = 0;
long long int _addr_2 = _pNewLeaf ->getAddrFB();
long long int _addr_1 = _pOldLeaf ->getAddrFB();
@ -587,15 +587,15 @@ bool mLeafNode :: dupInsert(const mleafdata & _mleafdata, int _index_insert)
return true;
}
/* 删除某数据
* ~~
/* 删除某数据
* ~~
*/
bool BPlusTree::Delete(KeyType & data)
{
mLeafNode * _pOldLeaf = SearchLeafNode(data);
// for when _ikey = 1
//if ok , return the index of the deleted
//如果idelete是1的话则函数里会是否进行向上删除 考虑
//如果idelete是1的话则函数里会是否进行向上删除 考虑
int _idelete = _pOldLeaf ->Delete(data);
if(_idelete < 0) return false;
@ -607,13 +607,13 @@ bool BPlusTree::Delete(KeyType & data)
if(_pOldLeaf ->getCount() >= ORDER_V)
{
//如果idelete = 1 向上删除 不需要
//如果idelete = 1 向上删除 不需要
return true;
}
// count < 50%
int flag = FLAG_LEFT;
// 右兄弟优先
// 右兄弟优先
mLeafNode * _pBrother = (mLeafNode*)(_pOldLeaf ->getBrother(flag));
//brother > 50%
@ -661,13 +661,13 @@ bool BPlusTree::Delete(KeyType & data)
cout << "bug run" << endl;
return false;
}
//重载删除函数
//重载删除函数
bool BPlusTree :: Delete(KeyType & data, char PartVal[])
{
mLeafNode * _pOldLeaf = SearchLeafNode(data);
// for when _ikey = 1
//if ok , return the index of the deleted
//如果idelete是1的话则函数里会是否进行向上删除 考虑
// for when _ikey = 1
//if ok , return the index of the deleted
//如果idelete是1的话则函数里会是否进行向上删除 考虑
int pvFlag = FLAG_ZERO;
int _idelete = _pOldLeaf ->Delete(data, PartVal, pvFlag);
if(_idelete < 0) return false;
@ -681,13 +681,13 @@ bool BPlusTree :: Delete(KeyType & data, char PartVal[])
if(_pOldLeaf ->getCount() >= ORDER_V)
{
//如果idelete = 1 向上删除 不需要
//如果idelete = 1 向上删除 不需要
return true;
}
// count < 50%
int flag = FLAG_LEFT;
// <EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// 右兄弟优先
mLeafNode * _pBrother = (mLeafNode*)(_pOldLeaf ->getBrother(flag));
//brother > 50%
@ -740,7 +740,7 @@ void BPlusTree::Flush(){
this ->StoreTree();
this ->ClearTree();
}
// 清除整个树,删除所有结点
// 清除整个树,删除所有结点
void BPlusTree :: ClearTree()
{
queue<mNode *> pQueue[100];
@ -799,26 +799,26 @@ void BPlusTree :: ClearTree()
return;
}
// 检查树是否满足B+树的定义
// 检查树是否满足B+树的定义
//bool BPlusTree::CheckTree()
//{}
// 递归检查结点及其子树是否满足B+树的定义
// 递归检查结点及其子树是否满足B+树的定义
//bool BPlusTree::CheckNode(mNode* pNode)
//{}
// 打印整个树
// 打印整个树
//void BPlusTree::PrintTree(FILE * ifp)
//{}
// 打印某结点
// 打印某结点
//void BPlusTree::PrintNode(mNode* pNode, FILE * ifp)
//{}
//递归函数:插入键到中间结点
//key即为pNode中的首个key
//递归函数:插入键到中间结点
//key即为pNode中的首个key
bool BPlusTree::InsertItnlNode(mItnlNode* pNode, mNode* pSon)
{
if(pNode == NULL || pNode ->getType() == NODE_TYPE_LEAF)
@ -878,8 +878,8 @@ bool BPlusTree::InsertItnlNode(mItnlNode* pNode, mNode* pSon)
if(_pFather == NULL)
{
// 原offset = 0处块以及相应后续链接块先清除
// 判断是否需要删除的外存链
// 原offset = 0处块以及相应后续链接块先清除
// 判断是否需要删除的外存链
// if(!preModified)
// {
// DelDisk(mfp, 0l, mblockQueue);
@ -890,7 +890,7 @@ bool BPlusTree::InsertItnlNode(mItnlNode* pNode, mNode* pSon)
_pOldItnl ->setAddrFB(_addr_new);
// setmodified in mnode_initial;
mItnlNode * _pItnl = new mItnlNode;
//分配新的首地址
// 分配新的首地址
long long int _addr_root = 0l;
long long int _addr_1 = _pOldItnl ->getAddrFB();
long long int _addr_2 = _pNewItnl ->getAddrFB();
@ -921,11 +921,11 @@ bool BPlusTree::InsertItnlNode(mItnlNode* pNode, mNode* pSon)
return false;
}
/*
* key对应的元素并在delete函数中完成对父节点的修改
* > 50%
* key对应的元素并在delete函数中完成对父节点的修改
* > 50%
*
*/
// 递归函数:在中间结点中删除键
// 递归函数:在中间结点中删除键
bool BPlusTree::DeleteItnlNode(mItnlNode* pItnl, KeyType & key)
{
int _idelete = pItnl ->Delete(key);
@ -984,11 +984,11 @@ bool BPlusTree::DeleteItnlNode(mItnlNode* pItnl, KeyType & key)
cout << "run bug" << endl;
return false;
}
//保存树结构
//保存树结构
/*
*
* 1
* 2
*
* 1
* 2
*/
void BPlusTree :: StoreTree()
{
@ -1004,7 +1004,7 @@ void BPlusTree :: StoreTree()
{
pNode = pQueue.front();
pQueue.pop();
// 内有unmodify
// 内有unmodify
if(pNode->getModify()){
any = true;
}
@ -1026,7 +1026,7 @@ void BPlusTree :: StoreTree()
}
}
cout << endl;
char _queuefile[1024];
char _queuefile[50];
strcpy(_queuefile, mTreeName);
strcat(_queuefile, "_queue.btree");
this ->mblockQueue.WriteQueue(_queuefile);
@ -1039,7 +1039,7 @@ void BPlusTree :: StoreTree()
}
//
//打印树
//打印树
void BPlusTree :: PrintTree()
{
queue<mNode *> pQueue[100];
@ -1083,18 +1083,18 @@ void BPlusTree :: PrintTree()
}
/*
*
*
* type int
* count int
* count_block int
* blocklink mBlockLink
*
*
* type int
* count int
* count_block int
* blocklink mBlockLink
*
*
*
*
*/
mNode * ReadNode(FILE * fp, long long _addrfb)//每个新建的节点都有分配的或上层节点给予的首块存储区地址
mNode * ReadNode(FILE * fp, long long _addrfb)//每个新建的节点都有分配的或上层节点给予的首块存储区地址
{
fseek(fp, _addrfb, SEEK_SET);
int _type_tmp;
@ -1118,25 +1118,25 @@ mNode * ReadNode(FILE * fp, long long _addrfb)//每个新建的节点都有分
_pLeaf ->unModify();
_pLeaf ->setAddrFB(_addrfb);
fread(&(_pLeaf ->leafLink), sizeof(_pLeaf ->leafLink), 1, fp);
/*
* while cycle里
* for cycle
*
*/
/*
* while cycle里
* for cycle
*
*/
/*
*
*
*
* count值
*/
/*
* -1 len便 sizeleft 412 -1lensizeleft自身
* sizeleft    
* buffer及其size
*  buffer()
* buffer后回写长度及块链接 fp指针 & sizeleft
*/
/*
*
*
*
* count值
*/
/*
* -1 len便 sizeleft 412 -1lensizeleft自身
* sizeleft    
* buffer及其size
*  buffer()
* buffer后回写长度及块链接 fp指针 & sizeleft
*/
bool ReadIn = true;
while(_i_tmp <= _count_tmp)
{
@ -1199,7 +1199,7 @@ mNode * ReadNode(FILE * fp, long long _addrfb)//每个新建的节点都有分
mItnlNode * _pItnl = new mItnlNode;
_pItnl ->setCount(_count_tmp);
_pItnl ->unModify();
// 后加, 未知之前没有设地址的原因
// 后加, 未知之前没有设地址的原因
_pItnl ->setAddrFB(_addrfb);
while(_i_tmp <= _count_tmp)
{
@ -1262,7 +1262,7 @@ void WriteNode(mNode * pNode, FILE * fp, mQueue & _mqueue)
fwrite(&(_count_tmp), size_count, 1, fp);
_size_left -= size_count;//--------------------------------int
long long int _addr_blocklink = ftell(fp); // 可能产生问题
long long int _addr_blocklink = ftell(fp); // 可能产生问题
fseek(fp, size_int + size_blocklink, SEEK_CUR);
_size_left -= size_int + size_blocklink;//------------------int + long_long_int*2
@ -1284,20 +1284,20 @@ void WriteNode(mNode * pNode, FILE * fp, mQueue & _mqueue)
mBlockLink _blocklink;
for(; _i_tmp <= _count; _i_tmp ++)
{
/*
* true false;
* false则先返写块容元素个数 _ _addr_blocklink
* _size_left, mblocklink
*
*
* ,
*/
/*
* true false;
* false则先返写块容元素个数 _ _addr_blocklink
* _size_left, mblocklink
*
*
* ,
*/
int size_key = -1;
int size_val = -1;
int _tmp_i = -1;
bool_WriteIn = _pLeaf ->LeafData[_i_tmp].Write_mleafdata(fp, _size_left, size_key, size_val);
if(!bool_WriteIn) //确定不可能有单一元素超过4K 可能产生问题
if(!bool_WriteIn) //确定不可能有单一元素超过4K 可能产生问题
{
if(size_key + size_val > BLOCKSIZE - size_int*3 - size_lli*4
&& _size_left > size_key + size_int * 3 + 1)
@ -1324,9 +1324,9 @@ void WriteNode(mNode * pNode, FILE * fp, mQueue & _mqueue)
// _curblockaddr = _mqueue.Pop();
_blocklink.mBlockLink_Initial();
_blocklink.preBlockAddr = _preblockaddr;
_preblockaddr = _curblockaddr; // 新块的上一块就是当前块(首块指向-1
_curblockaddr = _mqueue.Pop(); //当前块变量再调整为指向新块
_blocklink.nextBlockAddr = _curblockaddr; // 上一块的下一块即为新块
_preblockaddr = _curblockaddr; // 新块的上一块就是当前块(首块指向-1
_curblockaddr = _mqueue.Pop(); //当前块变量再调整为指向新块
_blocklink.nextBlockAddr = _curblockaddr; // 上一块的下一块即为新块
fseek(fp, _addr_blocklink, SEEK_SET);
nEle_inBlock ++;
@ -1335,7 +1335,7 @@ void WriteNode(mNode * pNode, FILE * fp, mQueue & _mqueue)
fwrite(&_blocklink, size_blocklink, 1, fp);
fseek(fp, _curblockaddr, SEEK_SET);
_addr_blocklink = _curblockaddr;//调整回写偏移
_addr_blocklink = _curblockaddr;//调整回写偏移
fseek(fp, size_blocklink + size_int, SEEK_CUR);
//write back blocklink
@ -1349,16 +1349,16 @@ void WriteNode(mNode * pNode, FILE * fp, mQueue & _mqueue)
_blocklink.mBlockLink_Initial();
_blocklink.preBlockAddr = _preblockaddr;
_preblockaddr = _curblockaddr; // 新块的上一块就是当前块(首块指向-1
_curblockaddr = _mqueue.Pop(); //当前块变量再调整为指向新块
_blocklink.nextBlockAddr = _curblockaddr; // 上一块的下一块即为新块
_preblockaddr = _curblockaddr; // 新块的上一块就是当前块(首块指向-1
_curblockaddr = _mqueue.Pop(); //当前块变量再调整为指向新块
_blocklink.nextBlockAddr = _curblockaddr; // 上一块的下一块即为新块
fseek(fp, _addr_blocklink, SEEK_SET);
fwrite(&_block_hold, size_int, 1, fp);
fwrite(&_blocklink, size_blocklink, 1, fp);
fseek(fp, _curblockaddr, SEEK_SET);
_addr_blocklink = _curblockaddr;//调整回写偏移
_addr_blocklink = _curblockaddr;//调整回写偏移
fseek(fp, size_blocklink + size_int, SEEK_CUR);
}
@ -1372,36 +1372,36 @@ void WriteNode(mNode * pNode, FILE * fp, mQueue & _mqueue)
// _size_left = BLOCKSIZE - _size_buffer - size_int - size_blocklink;
_size_left = -1;
first_int = _size_buffer;
/*
* -1 len便 sizeleft 412 -1lensizeleft自身
* sizeleft    
* buffer及其size
*  buffer()
* buffer后回写长度及块链接 fp指针 & sizeleft
*/
/*
* -1 len便 sizeleft 412 -1lensizeleft自身
* sizeleft    
* buffer及其size
*  buffer()
* buffer后回写长度及块链接 fp指针 & sizeleft
*/
}
else
{
_i_tmp --;
_size_left = BLOCKSIZE; //<EFBFBD>ظ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
_size_left = BLOCKSIZE; //回复块内容量
_blocklink.mBlockLink_Initial();
_blocklink.preBlockAddr = _preblockaddr;
_preblockaddr = _curblockaddr; // 新块的上一块就是当前块(首块指向-1
_curblockaddr = _mqueue.Pop(); //当前块变量再调整为指向新块
_blocklink.nextBlockAddr = _curblockaddr; // 上一块的下一块即为新块
/*
*
*/
_preblockaddr = _curblockaddr; // 新块的上一块就是当前块(首块指向-1
_curblockaddr = _mqueue.Pop(); //当前块变量再调整为指向新块
_blocklink.nextBlockAddr = _curblockaddr; // 上一块的下一块即为新块
/*
*
*/
fseek(fp, _addr_blocklink, SEEK_SET);
int size_ele_inblock = sizeof(nEle_inBlock);
fwrite(&first_int, size_ele_inblock, 1, fp);
fwrite(&_blocklink, size_blocklink, 1, fp);
fseek(fp, _curblockaddr, SEEK_SET);
_addr_blocklink = _curblockaddr;//调整回写偏移
_addr_blocklink = _curblockaddr;//调整回写偏移
fseek(fp, size_ele_inblock + size_blocklink, SEEK_CUR);
_size_left -= size_ele_inblock + size_blocklink;//===========int + lli * 2
@ -1415,13 +1415,13 @@ void WriteNode(mNode * pNode, FILE * fp, mQueue & _mqueue)
}// if write_in
}// for count
/*
*
*/
/*
*
*/
_blocklink.mBlockLink_Initial();
_blocklink.preBlockAddr = _preblockaddr;
_blocklink.nextBlockAddr = -1; // 上一块的下一块即为新块
_blocklink.nextBlockAddr = -1; // 上一块的下一块即为新块
fseek(fp, _addr_blocklink, SEEK_SET);
fwrite(&first_int, size_int, 1, fp);
fwrite(&_blocklink, size_blocklink, 1, fp);
@ -1440,37 +1440,37 @@ void WriteNode(mNode * pNode, FILE * fp, mQueue & _mqueue)
mBlockLink _blocklink;
for(; _i_tmp <= _count; _i_tmp ++)
{
/*
* true false;
* false则先返写块容元素个数 _ _addr_blocklink
* _size_left, mblocklink
*
*
* ,
*/
/*
* true false;
* false则先返写块容元素个数 _ _addr_blocklink
* _size_left, mblocklink
*
*
* ,
*/
bool_WriteIn = _pItnl ->ItnlData[_i_tmp].Write_mitnldata(fp, _size_left);
if(!bool_WriteIn) //确定不可能有单一元素超过4K 可能产生问题
if(!bool_WriteIn) //确定不可能有单一元素超过4K 可能产生问题
{
_i_tmp --;
_size_left = BLOCKSIZE; //回复块内容量
_size_left = BLOCKSIZE; //回复块内容量
_blocklink.mBlockLink_Initial();
_blocklink.preBlockAddr = _preblockaddr;
_preblockaddr = _curblockaddr; // 新块的上一块就是当前块(首块指向-1
_curblockaddr = _mqueue.Pop(); //当前块变量再调整为指向新块
_blocklink.nextBlockAddr = _curblockaddr; // 上一块的下一块即为新块
/*
*
*/
_preblockaddr = _curblockaddr; // 新块的上一块就是当前块(首块指向-1
_curblockaddr = _mqueue.Pop(); //当前块变量再调整为指向新块
_blocklink.nextBlockAddr = _curblockaddr; // 上一块的下一块即为新块
/*
*
*/
fseek(fp, _addr_blocklink, SEEK_SET);
int size_ele_inblock = sizeof(nEle_inBlock);
fwrite(&nEle_inBlock, size_ele_inblock, 1, fp);
fwrite(&_blocklink, size_blocklink, 1, fp);
fseek(fp, _curblockaddr, SEEK_SET);
_addr_blocklink = _curblockaddr;//调整回写偏移
_addr_blocklink = _curblockaddr;//调整回写偏移
fseek(fp, size_ele_inblock + size_blocklink, SEEK_CUR);
_size_left -= size_ele_inblock + size_blocklink;
@ -1481,13 +1481,13 @@ void WriteNode(mNode * pNode, FILE * fp, mQueue & _mqueue)
nEle_inBlock ++;
}
}
/*
*
*/
/*
*
*/
_blocklink.mBlockLink_Initial();
_blocklink.preBlockAddr = _preblockaddr;
_blocklink.nextBlockAddr = -1; // 上一块的下一块即为新块
_blocklink.nextBlockAddr = -1; // 上一块的下一块即为新块
fseek(fp, _addr_blocklink, SEEK_SET);
int size_ele_inblock = sizeof(nEle_inBlock);
fwrite(&nEle_inBlock, size_ele_inblock, 1, fp);
@ -1517,7 +1517,7 @@ void DelDisk(FILE * fp, long long int _addrfb, mQueue & _mqueue)
long long int BackAddr[1000] = {};
vector<long long int> BackVec;
// BackAddr[_i_tmp] = _addrfb;
// _i_tmp ++; 首块地址绝非要一起回收, 在delete实现之后首块地址在适时push的
// _i_tmp ++; 首块地址绝非要一起回收, 在delete实现之后首块地址在适时push的
while(_curblocklink.nextBlockAddr != -1)
{
{
@ -1582,7 +1582,7 @@ bool Delete_Key_PartVal(char keyStr[], int keyLen, char partVal[], BPlusTree * _
return dRet;
}
// 以下为四个对应的删除实现, 调用了上面两个函数;
// 以下为四个对应的删除实现, 调用了上面两个函数;
bool Delete_sID2sub(int _sID, BPlusTree * _p_sID2sub)
{
char * sid2str = new char[5];
@ -1741,9 +1741,9 @@ void mitnldata::Read_mitnldata(FILE * fp)
}
bool mitnldata::Write_mitnldata( FILE * fp, int & _size_left )
{
/*
* KeyType
*/
/*
* KeyType
*/
int size_lli = sizeof(long long int);
int size_mKey = mKey.WriteSize();
if(_size_left < size_lli + size_mKey) return false;
@ -1815,7 +1815,7 @@ BPlusTree::BPlusTree(const char * const_tree_name, const char * _build_or_open)
}
this ->Initial();
insert_count = 0;
char _tree_name[1024];
char _tree_name[1000];
memcpy(_tree_name, const_tree_name, (int)strlen(const_tree_name));
_tree_name[(int)strlen(const_tree_name)] = '\0';
int _key_chose;
@ -1843,7 +1843,7 @@ BPlusTree::BPlusTree(const char * const_tree_name, const char * _build_or_open)
mRoot = new mLeafNode;
mRoot ->setAddrFB( 0 );
printf("the tree called: %s\n", mTreeName);
char _treefile[1024];
char _treefile[500];
strcpy(_treefile, mTreeName);
strcat(_treefile, ".btree");
if((mfp = fopen(_treefile, "wb+")) == NULL)
@ -1874,7 +1874,7 @@ BPlusTree::BPlusTree(const char * const_tree_name, const char * _build_or_open)
break;
}
}
char _queuefile[1024];
char _queuefile[500];
strcpy(_queuefile, mTreeName);
strcat(_queuefile, "_queue.btree");
mblockQueue.ReadQueue(_queuefile);

View File

@ -25,10 +25,10 @@
#include<set>
#define BLOCKSIZE (1 << 14)/* 16K */
#define ORDER_V 128 /* 为简单起见把v固定为2实际的B+树v值应该是可配的 */
#define MAXNUM_KEY (ORDER_V * 2) /* 内部结点中最多键个数为2v ( 1~2v )*/
#define MAXNUM_POINTER (ORDER_V * 2 + 1) /* 内部结点中最多指向子树的指针个数为2v ( 1~2v )*/
#define MAXNUM_DATA (ORDER_V * 2 + 1) /* 结点中用作定义为2v ( 1~2v )*/
#define ORDER_V 128 /* 为简单起见把v固定为2实际的B+树v值应该是可配的 */
#define MAXNUM_KEY (ORDER_V * 2) /* 内部结点中最多键个数为2v ( 1~2v )*/
#define MAXNUM_POINTER (ORDER_V * 2 + 1) /* 内部结点中最多指向子树的指针个数为2v ( 1~2v )*/
#define MAXNUM_DATA (ORDER_V * 2 + 1) /* 结点中用作定义为2v ( 1~2v )*/
#define TERM_NUMBER 1
#define FLAG_LEFT 5
@ -49,8 +49,8 @@ using namespace std;
enum NODE_TYPE
{
NODE_TYPE_INTERNAL = 2, // 内部结点
NODE_TYPE_LEAF = 3, // 叶子结点
NODE_TYPE_INTERNAL = 2, // 内部结点
NODE_TYPE_LEAF = 3, // 叶子结点
};
enum eletype
{
@ -73,7 +73,7 @@ public:
bool is_AtMem;
char* sKey;
int iKey;
int mLenKey; //关于是否读入\n的试验
int mLenKey; //关于是否读入\n的试验
KeyType()
{
KeyType_Initial();
@ -179,9 +179,9 @@ public:
void ReadKey(FILE * fp);
void WriteKey(FILE * fp);
/*
*
*/
/*
*
*/
int WriteSize()
{
int size_mLenKey = sizeof(mLenKey);
@ -524,7 +524,7 @@ public:
{
Term[0][_tag] = '\0';
lenTerm[0] -= sizeof(int) + sizeof(char);
//只有一个元素时很特殊, 长度是减少4 其余减少5
//只有一个元素时很特殊, 长度是减少4 其余减少5
if(lenTerm[0] <= 0) flag = FLAG_ZERO;
else flag = FLAG_NO_ZERO;
return true;
@ -630,10 +630,9 @@ public:
class mQueue
{
public:
// static const int qLenth = 5243005; /* 5*1024*1024 */
static const int qLenth = 20971520; /* 20*1024*1024 */
static const int qLenth = 5243005;
int qUsed;
bool qAvailable[mQueue::qLenth];
bool qAvailable[mQueue::qLenth];/* 5*1024*1024 */
public:
mQueue()
@ -788,7 +787,7 @@ public:
mNode * _pBrother = NULL;
for(int i = 1; i <= _pFather ->getCount(); i ++)
{
//ָ<EFBFBD><EFBFBD>ƥ<EFBFBD><EFBFBD>
//指针匹配
if(_pFather ->getPointer(i) == this)
{
if(i == (_pFather ->getCount()) + 1)
@ -974,7 +973,7 @@ public:
}
}
//此函数需仔细考虑~~
//此函数需仔细考虑~~
int iExist(const KeyType &_keytype)
{
int _ibegin = 1, _iend = getCount();
@ -992,7 +991,7 @@ public:
}
if(_ibegin == _iend - 1) return -1;
// 后加可能有风险
// 后加可能有风险
if(ItnlData[_imiddle].mKey > _keytype)
{
@ -1130,17 +1129,17 @@ public:
printf(" == ");
}
// 插入键
// 最左端递归向上
bool Insert(mNode* _pmnode);
// 删除键
int Delete(const KeyType & _keytype);
// 分裂结点
KeyType & Split(mItnlNode* _mitnlnode);
// 结合结点
bool Combine(mItnlNode * _pmnode);
// 从另一结点移一个元素到本结点
bool MoveOneElement(mNode * _pmnode);
// 插入键
// 最左端递归向上
bool Insert(mNode* _pmnode);
// 删除键
int Delete(const KeyType & _keytype);
// 分裂结点
KeyType & Split(mItnlNode* _mitnlnode);
// 结合结点
bool Combine(mItnlNode * _pmnode);
// 从另一结点移一个元素到本结点
bool MoveOneElement(mNode * _pmnode);
};
@ -1434,7 +1433,7 @@ public:
}
}
// 此两个函数对叶节点无意义
// 此两个函数对叶节点无意义
mNode * getPointer(int _i)
{
return NULL;
@ -1480,7 +1479,7 @@ public:
}
return -1;
}
// 考虑cout = 0的情况
// 考虑cout = 0的情况
int iInsert(const KeyType & _keytype)
{
int _ibegin = 1, _iend = getCount();
@ -1555,19 +1554,19 @@ public:
// 插入数据
// 最左端递归向上
bool Insert(const mleafdata & _leafdata);
// 删除数据
int Delete(KeyType & _keytype);
//重载delete partval
int Delete(KeyType & _keytype, char partval[], int & pvFlag);
// 分裂结点
KeyType & Split(mLeafNode* _mpnode);
// 结合结点
bool Combine(mLeafNode* _mpnode);
// 重复插入
bool dupInsert(const mleafdata & _leafdata, int _index_insert);
// 插入数据
// 最左端递归向上
bool Insert(const mleafdata & _leafdata);
// 删除数据
int Delete(KeyType & _keytype);
//重载delete partval
int Delete(KeyType & _keytype, char partval[], int & pvFlag);
// 分裂结点
KeyType & Split(mLeafNode* _mpnode);
// 结合结点
bool Combine(mLeafNode* _mpnode);
// 重复插入
bool dupInsert(const mleafdata & _leafdata, int _index_insert);
};
extern mNode* ReadNode(FILE * fp, long long int _addr);
@ -1584,19 +1583,19 @@ extern bool Delete_obj2sID(char _obj_str[], int _del_sID, BPlusTree * _p_obj2sID
extern bool Delete_objpID2sID(char _obj_str[], int _pID, int _del_sID, BPlusTree * _p_objpID2sID);
/* B+树数据结构 */
/* B+树数据结构 */
class BPlusTree
{
public:
// 以下两个变量用于实现双向链表
mLeafNode* pmLeafHead; // 头结点
mLeafNode* pmLeafTail; // 尾结点
mNode * mRoot; // 根结点
// 以下两个变量用于实现双向链表
mLeafNode* pmLeafHead; // 头结点
mLeafNode* pmLeafTail; // 尾结点
mNode * mRoot; // 根结点
mQueue mblockQueue;
FILE * mfp;
char mTreeName[1024];
int mDepth; // 树的深度
char mTreeName[55];
int mDepth; // 树的深度
int insert_count;
void Initial();
@ -1606,33 +1605,33 @@ public:
void log(const char* _log)const;
void forcheck();
// 获取和设置根结点
mNode * getRoot() { return mRoot; }
// 获取和设置根结点
mNode * getRoot() { return mRoot; }
void setRoot(mNode * root) { mRoot = root; }
void Flush();
// 为插入而查找叶子结点
mLeafNode * SearchLeafNode(const KeyType & data)const;
//插入键到中间结点
bool InsertItnlNode(mItnlNode * pNode, mNode * pSon);
// 在中间结点中删除键
bool DeleteItnlNode(mItnlNode * pNode, KeyType & key);
// 查找指定的数据
bool Search(KeyType & data, mleafdata & _ret);
// 插入指定的数据
bool Insert(const mleafdata & _mleafdata);
// 删除指定的数据
bool Delete(KeyType & data);
// 重载删除函数
bool Delete(KeyType & data, char PartVal[]);
// 清除树
void ClearTree();
// 打印树
void PrintTree();
//读出根节点
void ReadRoot();
//保存树结构
void StoreTree();
void setRoot(mNode * root) { mRoot = root; }
void Flush();
// 为插入而查找叶子结点
mLeafNode * SearchLeafNode(const KeyType & data)const;
//插入键到中间结点
bool InsertItnlNode(mItnlNode * pNode, mNode * pSon);
// 在中间结点中删除键
bool DeleteItnlNode(mItnlNode * pNode, KeyType & key);
// 查找指定的数据
bool Search(KeyType & data, mleafdata & _ret);
// 插入指定的数据
bool Insert(const mleafdata & _mleafdata);
// 删除指定的数据
bool Delete(KeyType & data);
// 重载删除函数
bool Delete(KeyType & data, char PartVal[]);
// 清除树
void ClearTree();
// 打印树
void PrintTree();
//读出根节点
void ReadRoot();
//保存树结构
void StoreTree();
};
#endif /* CBTREE_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -1,23 +1,22 @@
/*=============================================================================
# Filename: KVstore.h
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-10-23 14:23
# Description:
=============================================================================*/
/*
* KVstore.h
*
* Created on: 2014-5-8
* Author: liyouhuan
*/
#ifndef _KVSTORE_KVSTORE_H
#define _KVSTORE_KVSTORE_H
#include "../Util/Util.h"
#include "tree/Tree.h"
//TODO:add debug instruction, control if using the so2p index and p2so index
//these are really costly
class KVstore
{
#ifndef KVSTORE_H_
#define KVSTORE_H_
#include<iostream>
#include<string.h>
#include<sys/stat.h>
#include<dirent.h>
#include"Btree.h"
using namespace std;
class KVstore{
public:
static const bool debug_mode = false;
static const bool test = false;
static const int READ_WRITE_MODE = 1;
static const int CREATE_MODE = 2;
@ -47,118 +46,73 @@ private:
bool remove_xy(int*& _xylist, int& _list_len,int _x_id, int _y_id);
public:
//for entity2id
/* for entity2id */
bool open_entity2id(const int _mode);
int getIDByEntity(const std::string _entity);
bool setIDByEntity(const std::string _entity, int _id);
int getIDByEntity(const string _entity);
bool setIDByEntity(const string _entity, int _id);
//for id2entity
/* for id2entity */
bool open_id2entity(const int _mode);
std::string getEntityByID(int _id);
bool setEntityByID(int _id, std::string _entity);
string getEntityByID(int _id);
bool setEntityByID(int _id, string _entity);
//for predicate2id
/* for predicate2id */
bool open_predicate2id(const int _mode);
int getIDByPredicate(const std::string _predicate);
bool setIDByPredicate(const std::string _predicate, int _id);
int getIDByPredicate(const string _predicate);
bool setIDByPredicate(const string _predicate, int _id);
//for id2predicate
/* for id2predicate */
bool open_id2predicate(const int _mode);
std::string getPredicateByID(int _id);
bool setPredicateByID(const int _id, std::string _predicate);
string getPredicateByID(int _id);
bool setPredicateByID(const int _id, string _predicate);
//for id2literal
/* for id2literal */
bool open_id2literal(const int _mode);
std::string getLiteralByID(int _id);
bool setLiteralByID(const int _id, std::string _literal);
string getLiteralByID(int _id);
bool setLiteralByID(const int _id, string _literal);
//for literal2id
/* for literal2id */
bool open_literal2id(const int _mode);
int getIDByLiteral(std::string _literal);
bool setIDByLiteral(const std::string _literal, int _id);
int getIDByLiteral(string _literal);
bool setIDByLiteral(const string _literal, int _id);
//for subID 2 objIDlist
bool open_subID2objIDlist(const int _mode);
/* for subID 2 objIDlist */
bool open_subid2objidlist(const int _mode);
bool getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len);
bool setobjIDlistBysubID(int _subid, const int* _objidlist, int _list_len);
//for objID 2 subIDlist
bool open_objID2subIDlist(const int _mode);
/* for objID 2 subIDlist */
bool open_objid2subidlist(const int _mode);
bool getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len);
bool setsubIDlistByobjID(int _objid, const int* _subidlist, int _list_len);
//for subID&preID 2 objIDlist
/* for subID&preID 2 objIDlist */
bool open_subIDpreID2objIDlist(const int _mode);
bool getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist, int& _list_len);
bool setobjIDlistBysubIDpreID(int _subid, int _preid, const int* _objidlist, int _list_len);
//for objID&preID 2 subIDlist
/* for objID&preID 2 subIDlist */
bool open_objIDpreID2subIDlist(const int _mode);
bool getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, int& _list_len);
bool setsubIDlistByobjIDpreID(int _objid, int _preid, const int* _subidlist, int _list_len);
//for subID 2 preID&objIDlist
/* for subID 2 preID&objIDlist */
bool open_subID2preIDobjIDlist(const int _mode);
bool getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _list_len);
bool setpreIDobjIDlistBysubID(int _subid, const int* _preid_objidlist, int _list_len);
//for objID 2 preID&subIDlist
/* for objID 2 preID&subIDlist */
bool open_objID2preIDsubIDlist(const int _mode);
bool getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _list_len);
bool setpreIDsubIDlistByobjID(int _objid, const int* _preid_subidlist, int _list_len);
//for subID 2 preIDlist
bool open_subID2preIDlist(const int _mode);
bool getpreIDlistBysubID(int _subid, int*& _preidlist, int& _list_len);
bool setpreIDlistBysubID(int _subid, const int* _preidlist, int _list_len);
//for preID 2 subIDlist
bool open_preID2subIDlist(const int _mode);
bool getsubIDlistBypreID(int _preid, int*& _subidlist, int& _list_len);
bool setsubIDlistBypreID(int _preid, const int* _subidlist, int _list_len);
//for objID 2 preIDlist
bool open_objID2preIDlist(const int _mode);
bool getpreIDlistByobjID(int _objid, int*& _preidlist, int& _list_len);
bool setpreIDlistByobjID(int _objid, const int* _preidlist, int _list_len);
//for preID 2 objIDlist
bool open_preID2objIDlist(const int _mode);
bool getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len);
bool setobjIDlistBypreID(int _preid, const int* _objidlist, int _list_len);
//for subID&objID 2 preIDlist
bool open_subIDobjID2preIDlist(const int _mode);
bool getpreIDlistBysubIDobjID(int _subID, int _objID, int*& _preidlist, int& _list_len);
bool setpreIDlistBysubIDobjID(int _subID, int _objID, const int* _preidlist, int _list_len);
//for preID 2 subID&objIDlist
bool open_preID2subIDobjIDlist(const int _mode);
bool getsubIDobjIDlistBypreID(int _preid, int*& _subid_objidlist, int& _list_len);
bool setsubIDobjIDlistBypreID(int _preid, const int* _subid_objidlist, int _list_len);
//QUERY:is the below 3 indexes needed?
//In fact, p2so can compute the num of triples if dividing so_len by 2
//However, sometimes this can be very large and costly
//For example, the predicate is <rdf:type>
//for predicate 2 triple num
bool open_preID2num(const int _mode);
int getNumBypreID(int _preid);
bool setNumBypreID(int _preid, int _tripleNum);
//for subject&predicate 2 triple num
bool open_subIDpreID2num(const int _mode);
int getNumBysubIDpreID(int _subID, int _preID);
bool setNumBysubIDpreID(int _subID, int _preID, int _tripleNum);
//for object&predicate 2 triple num
bool open_objIDpreID2num(const int _mode);
int getNumByobjIDpreID(int _objid, int _preid);
bool setNumByobjIDpreID(int _objid, int _preid, int _tripleNum);
KVstore(std::string _store_path = ".");
/*
* _store_path denotes where to store the data
*/
KVstore(string _store_path = ".");
~KVstore();
void flush();
void release();
@ -166,74 +120,57 @@ public:
private:
std::string store_path;
//map entity to its id, and id to the entity
//s_entity2id is relative store file name
Tree* entity2id;
Tree* id2entity;
static std::string s_entity2id;
static std::string s_id2entity;
string store_path;
/*
*
* map entity to its id, and id to the entity
* s_entity2id is relative store file name
*/
Btree* entity2id;
Btree* id2entity;
static string s_entity2id;
static string s_id2entity;
Tree* predicate2id;
Tree* id2predicate;
static std::string s_predicate2id;
static std::string s_id2predicate;
Btree* predicate2id;
Btree* id2predicate;
static string s_predicate2id;
static string s_id2predicate;
Tree* literal2id;
Tree* id2literal;
static std::string s_literal2id;
static std::string s_id2literal;
Btree* literal2id;
Btree* id2literal;
static string s_literal2id;
static string s_id2literal;
Tree* subID2objIDlist;
Tree* objID2subIDlist;
static std::string s_sID2oIDlist;
static std::string s_oID2sIDlist;
Btree* subID2objIDlist;
Btree* objID2subIDlist;
static string s_sID2oIDlist;
static string s_oID2sIDlist;
//lack exist in update tuple
Tree* subIDpreID2objIDlist;
Tree* objIDpreID2subIDlist;
static std::string s_sIDpID2oIDlist;
static std::string s_oIDpID2sIDlist;
/* lack exist in update tuple */
Btree* subIDpreID2objIDlist;
Btree* objIDpreID2subIDlist;
static string s_sIDpID2oIDlist;
static string s_oIDpID2sIDlist;
Tree* subID2preIDobjIDlist;
Tree* objID2preIDsubIDlist;
static std::string s_sID2pIDoIDlist;
static std::string s_oID2pIDsIDlist;
Btree* subID2preIDobjIDlist;
Btree* objID2preIDsubIDlist;
static string s_sID2pIDoIDlist;
static string s_oID2pIDsIDlist;
Tree* subID2preIDlist;
Tree* preID2subIDlist;
static std::string s_sID2pIDlist;
static std::string s_pID2sIDlist;
Tree* objID2preIDlist;
Tree* preID2objIDlist;
static std::string s_oID2pIDlist;
static std::string s_pID2oIDlist;
Tree* subIDobjID2preIDlist;
Tree* preID2subIDobjIDlist;
static std::string s_sIDoID2pIDlist;
static std::string s_pID2sIDoIDlist;
Tree* preID2num;
Tree* subIDpreID2num;
Tree* objIDpreID2num;
static std::string s_pID2num;
static std::string s_sIDpID2num;
static std::string s_oIDpID2num;
void flush(Tree* _p_btree);
bool setValueByKey(Tree* _p_btree, const char* _key, int _klen, const char* _val, int _vlen);
bool getValueByKey(Tree* _p_btree, const char* _key, int _klen, char*& _val, int& _vlen);
int getIDByStr(Tree* _p_btree, const char* _key, int _klen);
bool removeKey(Tree* _p_btree, const char* _key, int _klen);
void flush(Btree* _p_btree);
void release(Btree* _p_btree);
bool setValueByKey(Btree* _p_btree, const char* _key, int _klen, const char* _val, int _vlen);
bool getValueByKey(Btree* _p_btree, const char* _key, int _klen, char*& _val, int& _vlen);
int getIDByStr(Btree* _p_btree, const char* _key, int _klen);
bool removeKey(Btree* _p_btree, const char* _key, int _klen);
/* Open a btree according the mode */
/* CREATE_MODE: build a new btree and delete if exist */
/* READ_WRITE_MODE: open a btree, btree must exist */
bool open(Tree* & _p_btree, const std::string _tree_name, const int _mode);
bool open(Btree* & _p_btree, const string _tree_name, const int _mode);
};
#endif //_KVSTORE_KVSTORE_H
#endif /* KVSTORE_H_ */

View File

@ -1,187 +0,0 @@
/*=============================================================================
# Filename: Heap.cpp
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:37
# Description: achieve functions in Heap.h
=============================================================================*/
#include "Heap.h"
using namespace std;
Heap::Heap()
{
this->length = this->size = 0;
this->heap = NULL;
}
Heap::Heap(unsigned _size)
{
this->length = 0;
this->size = _size;
//this->heap = (Node**)malloc(this->size * sizeof(Node*)); //not use 4 or 8
this->heap = new Node*[this->size];
if(this->heap == NULL)
{
this->print("error in Heap: Allocation fail!");
exit(1);
}
/*
this->npmap = (Map*)malloc(this->size * sizeof(struct Map));
if(this->npmap == NULL)
{
this->print("error in Heap: Allocation fail!");
exit(1);
}
*/
}
Node*
Heap::getTop() const
{
if(this->length > 0)
return this->heap[0];
else
return NULL;
}
unsigned
Heap::getLen() const
{
return this->length;
}
unsigned
Heap::getSize() const
{
return this->size;
}
bool
Heap::isEmpty() const
{
return this->length == 0;
}
bool
Heap::insert(Node* _np)
{
if(this->length == this->size) //when full, reallocate
{
this->heap = (Node**)realloc(this->heap, 2 * this->size * sizeof(Node*));
if(this->heap == NULL)
{
print("error in isert: Reallocation fail!");
return false;
}
/*
this->npmap = (struct Map*)realloc(this->npmap, 2 * this->size * sizeof(struct Map));
if(this->npmap == NULL)
{
print("error in insert: Reallocation fail!");
return false;
}
*/
this->size = 2 * this->size;
}
unsigned i = this->length, j;
while(i != 0)
{
j = (i-1)/2;
if(_np->getRank() >= this->heap[j]->getRank())
break;
heap[i] = heap[j];
//this->npmap[k].pos = i; //adjust the position
i = j;
}
this->heap[i] = _np;
this->length++;
return true;
}
bool
Heap::remove()
{
if(this->length == 0)
{
print("error in remove: remove from empty heap!");
return false;
}
//Node* tp = this->heap[0];
this->length--;
if(this->length == 0)
return true;
Node* xp = this->heap[this->length];
unsigned i = 0, j = 1;
while(j < this->length)
{
if(j < this->length-1 && this->heap[j]->getRank() > this->heap[j+1]->getRank())
j++;
if(xp->getRank() <= this->heap[j]->getRank())
break;
this->heap[i] = this->heap[j];
i = j;
j = 2 * i + 1;
}
this->heap[i] = xp;
return true;
}
bool
Heap::modify(Node* _np, bool _flag) //control direction
{
//search and adjust
unsigned i, j;
for(i = 0; i < this->length; ++i)
if(this->heap[i] == _np)
break;
if(_flag == true) //move up
{
while(i != 0)
{
j = (i-1)/2;
if(_np->getRank() < heap[j]->getRank())
{
heap[i] = heap[j];
heap[j] = _np;
i = j;
}
else
break;
}
}
else //move down
{
j = 2 * i + 1;
while(j < this->length)
{
if(j < this->length - 1 && heap[j]->getRank() > heap[j+1]->getRank())
j++;
if(heap[j]->getRank() < _np->getRank())
{
heap[i] = heap[j];
heap[j] = _np;
i = j;
}
else
break;
}
}
return true;
}
Heap::~Heap()
{
delete[] this->heap;
this->heap = NULL;
this->length = this->size = 0;
}
void
Heap::print(string s)
{
#ifdef DEBUG_KVSTORE
#endif
}

View File

@ -1,42 +0,0 @@
/*=============================================================================
# Filename: Heap.h
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:37
# Description: set and deal of Node*s in memory
=============================================================================*/
#ifndef _KVSTORE_HEAP_HEAP_H
#define _KVSTORE_HEAP_HEAP_H
#include "../../Util/Util.h"
#include "../node/Node.h"
/* add, sub, modify: all can be done within O(logn) using adjust-function */
//QUERY: when modified, finding right position consumes O(n). How about keeping smallest?
//(add O(1), sub O(2n), modify O(n)
//TODO: to solve this probem, use another hash: (pointer, pos), to find the right position of
//given p in O(lgn) time
class Heap
{
private:
Node** heap; //dynamic array
unsigned length; //valid elements num
unsigned size; //max-size of heap
public:
Heap();
Heap(unsigned _size);
Node* getTop() const; //return the top element
unsigned getLen() const;
unsigned getSize() const;
bool isEmpty() const;
bool insert(Node* _np); //insert and adjust
bool remove(); //remove top and adjust
bool modify(Node* _np, bool _flag); //searech modified element and adjust
~Heap();
void print(std::string s); //DEBUG
};
#endif

View File

@ -1,294 +0,0 @@
/*=============================================================================
# Filename: IntlNode.cpp
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:40
# Description: achieve functions in IntlNode.h
=============================================================================*/
#include "IntlNode.h"
using namespace std;
/*
void
IntlNode::AllocChilds()
{
childs = (Node**)malloc(sizeof(Node*) * MAX_CHILD_NUM);
}
*/
IntlNode::IntlNode()
{
memset(childs, 0, sizeof(Node*) * MAX_CHILD_NUM);
//this->AllocChilds();
}
IntlNode::IntlNode(bool isVirtual) //call father-class's constructor automaticlly
{
memset(childs, 0, sizeof(Node*) * MAX_CHILD_NUM);
//this->AllocChilds();
}
/*
IntlNode::IntlNode(Storage* TSM) //QUERY
{
TSM->readNode(this, Storage::OVER);
}
*/
void
IntlNode::Virtual()
{
//this->FreeKeys();
this->release();
this->delMem();
}
void
IntlNode::Normal()
{
this->AllocKeys();
this->setMem();
}
Node*
IntlNode::getChild(int _index) const
{
int num = this->getNum();
if(_index < 0 || _index > num) //num keys, num+1 childs
{
//print(string("error in getChild: Invalid index ") + Util::int2string(_index));
return NULL;
}
else
return childs[_index];
}
bool
IntlNode::setChild(Node* _child, int _index)
{
int num = this->getNum();
if(_index < 0 || _index > num)
{
print(string("error in setChild: Invalid index ") + Util::int2string(_index));
return false;
}
this->childs[_index] = _child;
return true;
}
bool
IntlNode::addChild(Node* _child, int _index)
{
int num = this->getNum();
if(_index < 0 || _index > num+1)
{
print(string("error in addChild: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
for(i = num; i >= _index; --i) //DEBUG: right bounder!!!
childs[i+1] = childs[i];
childs[_index] = _child;
return true;
}
bool
IntlNode::subChild(int _index)
{
int num = this->getNum();
if(_index < 0 || _index > num)
{
print(string("error in subchild: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
for(i = _index; i < num; ++i) //DEBUG: right bounder!!!
childs[i] = childs[i+1];
return true;
}
unsigned
IntlNode::getSize() const
{
unsigned sum = INTL_SIZE, num = this->getNum(), i;
for(i = 0; i < num; ++i)
sum += keys[i].getLen();
return sum;
}
Node*
IntlNode::split(Node* _father, int _index)
{
int num = this->getNum();
Node* p = new IntlNode; //right child
p->setHeight(this->getHeight());
int i, k;
for(i = MIN_CHILD_NUM, k = 0; i < num; ++i, ++k)
{
p->addKey(this->keys+i, k);
p->addChild(this->childs[i], k);
p->addNum();
}
p->addChild(this->childs[i], k);
const Bstr* tp = this->keys + MIN_KEY_NUM;
this->setNum(MIN_KEY_NUM);
_father->addKey(tp, _index);
_father->addChild(p, _index+1); //DEBUG(check the index)
_father->addNum();
_father->setDirty();
p->setDirty();
this->setDirty();
return p;
}
Node*
IntlNode::coalesce(Node* _father, int _index)
{
//int num = this->getNum();
int i, j = _father->getNum(), k; //BETTER: unsigned?
Node* p;
int ccase = 0;
const Bstr* bstr;
if(_index < j) //the right neighbor
{
p = _father->getChild(_index+1);
k = p->getNum();
if((unsigned)k > MIN_KEY_NUM)
ccase = 2;
else //==MIN_KEY_NUM
ccase = 1;
}
if(_index > 0) //the left neighbor
{
Node* tp = _father->getChild(_index-1);
unsigned tk = tp->getNum();
if(ccase < 2)
{
if(ccase == 0)
ccase = 3;
if(tk > MIN_KEY_NUM)
ccase = 4;
}
if(ccase > 2)
{
p = tp;
k = tk;
}
}
switch(ccase)
{
case 1: //union right to this
this->addKey(_father->getKey(_index), this->getNum());
this->addNum();
for(i = 0; i < k; ++i)
{
this->addKey(p->getKey(i), this->getNum());
this->addChild(p->getChild(i), this->getNum());
this->addNum();
}
this->setChild(p->getChild(i), this->getNum());
_father->subKey(_index);
_father->subChild(_index+1);
_father->subNum();
p->setNum(0);
//delete p;
break;
case 2: //move one form right
this->addKey(_father->getKey(_index), this->getNum());
_father->setKey(p->getKey(0), _index);
p->subKey(0);
this->addChild(p->getChild(0), this->getNum()+1);
p->subChild(0);
this->addNum();
p->subNum();
break;
case 3: //union left to this
this->addKey(_father->getKey(_index-1), 0);
this->addNum();
for(i = k; i > 0; --i)
{
int t = i - 1;
this->addKey(p->getKey(t), 0);
this->addChild(p->getChild(i), 0);
this->addNum();
}
this->addChild(p->getChild(0), 0);
_father->subKey(_index-1);
_father->subChild(_index-1);
_father->subNum();
p->setNum(0);
//delete p;
break;
case 4: //move one from left
bstr = p->getKey(k-1);
p->subKey(k-1);
this->addKey(_father->getKey(_index-1), 0);
_father->setKey(bstr, _index-1);
this->addChild(p->getChild(k), 0);
p->subChild(k);
this->addNum();
p->subNum();
break;
default:
print("error in coalesce: Invalid case!");
//printf("error in coalesce: Invalid case!");
}
_father->setDirty();
p->setDirty();
this->setDirty();
if(ccase == 1 || ccase == 3)
return p;
else
return NULL;
}
void
IntlNode::release()
{
if(!this->inMem())
return;
unsigned num = this->getNum();
//delete[] keys; //this will release all!!!
for(unsigned i = num; i < MAX_KEY_NUM; ++i)
keys[i].clear();
delete[] keys;
}
IntlNode::~IntlNode()
{
release();
//free(childs);
}
void
IntlNode::print(string s)
{
#ifdef DEBUG_KVSTORE
int num = this->getNum();
fputs(Util::showtime().c_str(), Util::debug_kvstore);
fputs("Class IntlNode\n", Util::debug_kvstore);
fputs("Message: ", Util::debug_kvstore);
fputs(s.c_str(), Util::debug_kvstore);
fputs("\n", Util::debug_kvstore);
if(s == "node" || s == "NODE")
{
fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag);
int i;
for(i = 0; i < num; ++i)
{
if(s == "node")
this->keys[i].print("bstr");
else
this->keys[i].print("BSTR");
}
}
else if(s == "check node")
{
//TODO(check node, if satisfy B+ definition)
}
else;
#endif
}

View File

@ -1,49 +0,0 @@
/*=============================================================================
# Filename: IntlNode.h
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:40
# Description: the internal-node of a B+ tree
=============================================================================*/
#ifndef _KVSTORE_NODE_INTLNODE_H
#define _KVSTORE_NODE_INTLNODE_H
#include "Node.h"
class IntlNode: public Node
{
protected:
Node* childs[MAX_CHILD_NUM+1];
//Node** childs;
//void AllocChilds();
public:
IntlNode();
IntlNode(bool isVirtual);
//IntlNode(Storage* TSM);
void Virtual();
void Normal();
Node* getChild(int _index) const;
bool setChild(Node* _child, int _index);
bool addChild(Node* _child, int _index);
bool subChild(int _index);
unsigned getSize() const;
Node* split(Node* _father, int _index);
Node* coalesce(Node* _father, int _index);
void release();
~IntlNode();
void print(std::string s); //DEBUG
/*non-sense functions: polymorphic
Node* getPrev() const;
Node* getNext() const;
const Bstr* getValue(int _index) const;
bool setValue(const Bstr* _value, int _index);
bool addValue(const Bstr* _value, int _index);
bool subValue(int _index);
void setPrev(Node* _prev);
void setNext(Node* _next);
*/
};
#endif

View File

@ -1,377 +0,0 @@
/*=============================================================================
# Filename: LeafNode.cpp
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:40
# Description: ahieve functions in LeafNode.h
=============================================================================*/
#include "LeafNode.h"
using namespace std;
void
LeafNode::AllocValues()
{
values = new Bstr[MAX_KEY_NUM];
}
/*
void
LeafNode::FreeValues()
{
delete[] values;
}
*/
LeafNode::LeafNode()
{
flag |= NF_IL; //leaf flag
prev = next = NULL;
AllocValues();
}
LeafNode::LeafNode(bool isVirtual)
{
flag |= NF_IL;
prev = next = NULL;
if(!isVirtual)
AllocValues();
}
/*
LeafNode::LeafNode(Storage* TSM)
{
AllocValues();
TSM->readNode(this, Storage::OVER);
}
*/
void
LeafNode::Virtual()
{
//this->FreeKeys();
//this->FreeValues();
this->release();
this->delMem();
}
void
LeafNode::Normal()
{
this->AllocKeys();
this->AllocValues();
this->setMem();
}
Node*
LeafNode::getPrev() const
{
return prev;
}
Node*
LeafNode::getNext() const
{
return next;
}
const Bstr*
LeafNode::getValue(int _index) const
{
int num = this->getNum();
if(_index < 0 || _index >= num)
{
//print(string("error in getValue: Invalid index ") + Util::int2string(_index));
return NULL;
}
else
return this->values + _index;
}
bool
LeafNode::setValue(const Bstr* _value, int _index, bool ifcopy)
{
int num = this->getNum();
if(_index < 0 || _index >= num)
{
print(string("error in setValue: Invalid index ") + Util::int2string(_index));
return false;
}
this->values[_index].release(); //NOTICE: only used in modify
if(ifcopy)
this->values[_index].copy(_value);
else
this->values[_index] = *_value;
return true;
}
bool
LeafNode::addValue(const Bstr* _value, int _index, bool ifcopy)
{
int num = this->getNum();
if(_index < 0 || _index > num)
{
print(string("error in addValue: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
for(i = num-1; i >= _index; --i)
this->values[i+1] = this->values[i];
if(ifcopy)
this->values[_index].copy(_value);
else
this->values[_index] = *_value;
return true;
}
bool
LeafNode::subValue(int _index, bool ifdel)
{
int num = this->getNum();
if(_index < 0 || _index >= num)
{
print(string("error in subValue: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
if(ifdel)
values[_index].release();
for(i = _index; i < num-1; ++i)
this->values[i] = this->values[i+1];
return true;
}
void
LeafNode::setPrev(Node* _prev)
{
this->prev = _prev;
}
void
LeafNode::setNext(Node* _next)
{
this->next = _next;
}
unsigned
LeafNode::getSize() const
{
unsigned sum = LEAF_SIZE, num = this->getNum(), i;
for(i = 0; i < num; ++i)
{
sum += keys[i].getLen();
sum += values[i].getLen();
}
return sum;
}
Node*
LeafNode::split(Node* _father, int _index)
{
int num = this->getNum();
Node* p = new LeafNode; //right child
p->setHeight(this->getHeight()); //NOTICE: assign height for new node
p->setNext(this->next);
this->setNext(p);
p->setPrev(this);
int i, k;
for(i = MIN_KEY_NUM, k = 0; i < num; ++i, ++k)
{
p->addKey(this->keys+i, k);
p->addValue(this->values+i, k);
p->addNum();
}
const Bstr* tp = this->keys + MIN_KEY_NUM;
this->setNum(MIN_KEY_NUM);
_father->addKey(tp, _index, true);
_father->addChild(p, _index+1); //DEBUG(check the index)
_father->addNum();
_father->setDirty();
p->setDirty();
this->setDirty();
return p;
}
Node*
LeafNode::coalesce(Node* _father, int _index)
{ //add a key or coalesce a neighbor to this
int i, j = _father->getNum(), k; //BETTER: unsigned?
Node* p = NULL;
int ccase = 0;
const Bstr* bstr;
if(_index < j) //the right neighbor
{
p = _father->getChild(_index+1);
k = p->getNum();
if((unsigned)k > MIN_KEY_NUM)
ccase = 2;
else //==MIN_KEY_NUM
ccase = 1;
}
if(_index > 0) //the left neighbor
{
Node* tp = _father->getChild(_index-1);
unsigned tk = tp->getNum();
if(ccase < 2)
{
if(ccase == 0)
ccase = 3;
if(tk > MIN_KEY_NUM)
ccase = 4;
}
if(ccase > 2)
{
p = tp;
k = tk;
}
}
switch(ccase)
{
case 1: //union right to this
for(i = 0; i < k; ++i)
{
this->addKey(p->getKey(i), this->getNum());
this->addValue(p->getValue(i), this->getNum());
this->addNum();
}
_father->subKey(_index, true);
_father->subChild(_index+1);
_father->subNum();
this->next = p->getNext();
if(this->next != NULL)
this->next->setPrev(this);
p->setNum(0); //NOTICE: adjust num before delete!
//delete p;
break;
case 2: //move one from right
this->addKey(p->getKey(0), this->getNum());
_father->setKey(p->getKey(1), _index, true);
p->subKey(0);
this->addValue(p->getValue(0), this->getNum());
p->subValue(0);
this->addNum();
p->subNum();
break;
case 3: //union left to this
//BETTER: move all keys/etc one time
for(i = k; i > 0; --i)
{
int t = i - 1;
this->addKey(p->getKey(t), 0);
this->addValue(p->getValue(t), 0);
this->addNum();
}
_father->subKey(_index-1, true);
_father->subChild(_index-1);
_father->subNum();
this->prev = p->getPrev();
if(this->prev != NULL) //else: leaves-list
this->prev->setNext(this);
p->setNum(0);
//delete p;
break;
case 4: //move one from left
bstr = p->getKey(k-1);
p->subKey(k-1);
this->addKey(bstr, 0);
_father->setKey(bstr, _index-1, true);
this->addValue(p->getValue(k-1), 0);
p->subValue(k-1);
this->addNum();
p->subNum();
break;
default:
print("error in coalesce: Invalid case!");
//printf("error in coalesce: Invalid case!");
}
_father->setDirty();
p->setDirty();
this->setDirty();
if(ccase == 1 || ccase == 3)
return p;
else
return NULL;
}
void
LeafNode::release()
{
if(!this->inMem())
return;
unsigned num = this->getNum();
/*
for(int i = 0; i < num; ++i)
{
keys[i].release();
values[i].release();
}
*/
for(unsigned i = num; i < MAX_KEY_NUM; ++i)
{
keys[i].clear();
values[i].clear();
}
delete[] keys;
delete[] values;
}
LeafNode::~LeafNode()
{
release();
}
void
LeafNode::print(string s)
{
#ifdef DEBUG_KVSTORE
unsigned num = this->getNum();
fputs(Util::showtime().c_str(), Util::debug_kvstore);
fputs("Class LeafNode\n", Util::debug_kvstore);
fputs("Message: ", Util::debug_kvstore);
fputs(s.c_str(), Util::debug_kvstore);
fputs("\n", Util::debug_kvstore);
unsigned i;
if(s == "NODE")
{
fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag);
fprintf(Util::debug_kvstore, "prev: %p\tnext: %p\n", this->prev, this->next);
for(i = 0; i < num; ++i)
{
this->keys[i].print("BSTR");
this->values[i].print("BSTR");
}
}
else if(s == "node")
{
fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag);
fprintf(Util::debug_kvstore, "prev: %p\tnext: %p\n", this->prev, this->next);
}
else if(s == "check node")
{
//check the node, if satisfy B+ definition
bool flag = true;
if(num < MIN_KEY_NUM || num > MAX_KEY_NUM)
flag = false;
if(flag)
{
for(i = 1; i < num; ++i)
{
if(keys[i] > keys[i-1])
continue;
else
break;
}
if( i < num)
flag = false;
}
this->print("node");
if(flag)
fprintf(Util::debug_kvstore, "This node is good\n");
else
fprintf(Util::debug_kvstore, "This node is bad\n");
}
else;
#endif
}

View File

@ -1,51 +0,0 @@
/*=============================================================================
# Filename: LeafNode.h
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:39
# Description: the leaf-node of a B+ tree
=============================================================================*/
#ifndef _KVSTORE_NODE_LEAFNODE_H
#define _KVSTORE_NODE_LEAFNODE_H
#include "Node.h"
class LeafNode: public Node
{
protected:
Node* prev; //LeafNode
Node* next;
Bstr* values;
void AllocValues();
//void FreeValues();
public:
LeafNode();
LeafNode(bool isVirtual);
//LeafNode(Storage* TSM);
void Virtual();
void Normal();
Node* getPrev() const;
Node* getNext() const;
const Bstr* getValue(int _index) const;
bool setValue(const Bstr* _value, int _index, bool ifcopy = false);
bool addValue(const Bstr* _value, int _index, bool ifcopy = false);
bool subValue(int _index, bool ifdel = false);
void setPrev(Node* _prev);
void setNext(Node* _next);
unsigned getSize() const;
Node* split(Node* _father, int _index);
Node* coalesce(Node* _father, int _index);
void release();
~LeafNode();
void print(std::string s); //DEBUG
/*non-sense virtual function
Node* getChild(int _index) const;
bool addChild(Node* _child, int _index);
bool subChild(int _index);
*/
};
//BETTER: prev isn't a must, and reverse-range can be achieved using recursive-next
#endif

View File

@ -1,329 +0,0 @@
/*=============================================================================
# Filename: Node.cpp
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:39
# Description: achieve functions in Node.h
=============================================================================*/
#include "Node.h"
using namespace std;
void
Node::AllocKeys()
{
keys = new Bstr[MAX_KEY_NUM];
}
/*
void
Node::FreeKeys()
{
delete[] keys;
}
*/
Node::Node()
{
store = flag = 0;
flag |= NF_IM;
AllocKeys();
}
Node::Node(bool isVirtual)
{
store = flag = 0;
if(!isVirtual)
{
flag |= NF_IM;
AllocKeys();
}
}
/*
Node::Node(Storage* TSM)
{
AllocKeys();
TSM->readNode(this, Storage::OVER);
}
*/
bool
Node::isLeaf() const
{
return this->flag & NF_IL;
}
bool
Node::isDirty() const
{
return this->flag & NF_ID;
}
void
Node::setDirty()
{
this->flag |= NF_ID;
}
void
Node::delDirty()
{
this->flag &= ~NF_ID;
}
bool
Node::inMem() const
{
return this->flag & NF_IM;
}
void
Node::setMem()
{
this->flag |= NF_IM;
}
void
Node::delMem()
{
this->flag &= ~NF_IM;
}
/*
bool
Node::isVirtual() const
{
return this->flag & NF_IV;
}
void
Node::setVirtual()
{
this->flag |= NF_IV;
}
void
Node::delVirtual()
{
this->flag &= ~NF_IV;
}
*/
unsigned
Node::getRank() const
{
return this->flag & NF_RK;
}
void
Node::setRank(unsigned _rank)
{
this->flag &= ~NF_RK;
this->flag |= _rank;
}
unsigned
Node::getHeight() const
{
return (this->flag & NF_HT)>>20;
}
void
Node::setHeight(unsigned _h)
{
this->flag &= ~NF_HT;
this->flag |= (_h<<20);
}
unsigned
Node::getNum() const
{
return (this->flag & NF_KN)>>12;
}
bool
Node::setNum(int _num)
{
if(_num < 0 || (unsigned)_num > MAX_KEY_NUM)
{
print(string("error in setNum: Invalid num ") + Util::int2string(_num));
return false;
}
this->flag &= ~NF_KN;
this->flag |= (_num<<12);
return true;
}
bool
Node::addNum()
{
if(this->getNum() + 1 > MAX_KEY_NUM)
{
print("error in addNum: Invalid!");
return false;
}
this->flag += (1<<12);
return true;
}
bool
Node::subNum()
{
if(this->getNum() < 1)
{
print("error in subNum: Invalid!");
return false;
}
this->flag -= (1<<12);
return true;
}
unsigned
Node::getStore() const
{
return this->store;
}
void
Node::setStore(unsigned _store)
{
this->store = _store;
}
unsigned
Node::getFlag() const
{
return flag;
}
void
Node::setFlag(unsigned _flag)
{
this->flag = _flag;
}
const Bstr*
Node::getKey(int _index) const
{
int num = this->getNum();
if(_index < 0 || _index >= num)
{
//print(string("error in getKey: Invalid index ") + Util::int2string(_index));
printf("error in getKey: Invalid index\n");
return NULL;
}
else
return this->keys + _index;
}
bool
Node::setKey(const Bstr* _key, int _index, bool ifcopy)
{
int num = this->getNum();
if(_index < 0 || _index >= num)
{
print(string("error in setKey: Invalid index ") + Util::int2string(_index));
return false;
}
if(ifcopy)
keys[_index].copy(_key);
else
keys[_index] = *_key;
return true;
}
bool
Node::addKey(const Bstr* _key, int _index, bool ifcopy)
{
int num = this->getNum();
if(_index < 0 || _index > num)
{
print(string("error in addKey: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
//NOTICE: if num == MAX_KEY_NUM, will visit keys[MAX_KEY_NUM], not legal!!!
//however. tree operations ensure that: when node is full, not add but split first!
for(i = num - 1; i >= _index; --i)
keys[i+1] = keys[i];
if(ifcopy)
keys[_index].copy(_key);
else
keys[_index] = *_key;
return true;
}
bool
Node::subKey(int _index, bool ifdel)
{
int num = this->getNum();
if(_index < 0 || _index >= num)
{
print(string("error in subKey: Invalid index ") + Util::int2string(_index));
return false;
}
int i;
if(ifdel)
keys[_index].release();
for(i = _index; i < num - 1; ++i)
keys[i] = keys[i+1];
return true;
}
int
Node::searchKey_less(const Bstr& _bstr) const
{
int num = this->getNum();
//for(i = 0; i < num; ++i)
//if(bstr < *(p->getKey(i)))
//break;
int low = 0, high = num - 1, mid = -1;
while(low <= high)
{
mid = (low + high) / 2;
if(this->keys[mid] > _bstr)
{
if(low == mid)
break;
high = mid;
}
else
{
low = mid + 1;
}
}
return low;
}
int
Node::searchKey_equal(const Bstr& _bstr) const
{
int num = this->getNum();
//for(i = 0; i < num; ++i)
// if(bstr == *(p->getKey(i)))
// {
int ret = this->searchKey_less(_bstr);
if(ret > 0 && this->keys[ret-1] == _bstr)
return ret - 1;
else
return num;
}
int
Node::searchKey_lessEqual(const Bstr& _bstr) const
{
int num = this->getNum();
//for(i = 0; i < num; ++i)
//if(bstr <= *(p->getKey(i)))
//break;
int ret = this->searchKey_less(_bstr);
if(ret > 0 && this->keys[ret-1] == _bstr)
return ret - 1;
else
return ret;
}

View File

@ -1,114 +0,0 @@
/*=============================================================================
# Filename: Node.h
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:38
# Description: basic Node class, father of IntlNode and LeafNode
=============================================================================*/
#ifndef _KVSTORE_NODE_H
#define _KVSTORE_NODE_H
#include "../../Util/Util.h"
#include "../../Util/Bstr.h"
class Node //abstract basic class
{
public:
static const unsigned DEGREE = 2 * 63; //the degree of B+ tree
static const unsigned MAX_CHILD_NUM = DEGREE;
static const unsigned MIN_CHILD_NUM = DEGREE >> 1;
static const unsigned MAX_KEY_NUM = MAX_CHILD_NUM - 1; //max key-num
static const unsigned MIN_KEY_NUM = MIN_CHILD_NUM - 1; //min key-num
/* diffrent flags for tree-nodes, 32-bit put rank in low-bits means no need to move*/
static const unsigned NF_IL = 0x80000000; //is leaf
static const unsigned NF_ID = 0x00080000; //is dirty, in rank-area
static const unsigned NF_IM = 0x20000000; //in memory, not virtual
//static const unsigned NF_IV = 0x10000000; //is virtual
static const unsigned NF_RK = 0x00ffffff; //select-rank, in Storage
static const unsigned NF_HT = 0xf00000; //height area in rank
static const unsigned NF_KN = 0x07f000; //NOTICE: decided by DEGREE
static const unsigned INTL_SIZE = sizeof(Bstr) * MAX_KEY_NUM;
static const unsigned LEAF_SIZE = 2 * INTL_SIZE;
protected:
unsigned store; //store address, the BLock index
unsigned flag; //NF_RK, NF_IL,NF_ID, NF_IV, propety
//int num; //totle keys num
//Node* father; //point to father-node, which must be IntlNode
Bstr* keys;
void AllocKeys();
//void FreeKeys();
public:
Node();
Node(bool isVirtual);
bool isLeaf() const;
bool isDirty() const;
void setDirty();
void delDirty();
bool inMem() const;
void setMem();
void delMem();
//bool isVirtual() const;
//void setVirtual();
//void delVirtual();
unsigned getRank() const;
void setRank(unsigned _rank);
unsigned getHeight() const;
void setHeight(unsigned _h);
unsigned getNum() const;
bool setNum(int _num);
bool addNum();
bool subNum();
unsigned getStore() const;
void setStore(unsigned _store);
unsigned getFlag() const;
void setFlag(unsigned _flag);
const Bstr* getKey(int _index) const; //need to check the index
bool setKey(const Bstr* _key, int _index, bool ifcopy = false);
bool addKey(const Bstr* _key, int _index, bool ifcopy = false);
bool subKey(int _index, bool ifdel = false);
//several binary key search utilities
int searchKey_less(const Bstr& _bstr) const;
int searchKey_equal(const Bstr& _bstr) const;
int searchKey_lessEqual(const Bstr& _bstr) const;
//virtual functions: polymorphic
virtual Node* getChild(int _index) const { return NULL; };
virtual bool setChild(Node* _child, int _index) { return true; };
virtual bool addChild(Node* _child, int _index) { return true; };
virtual bool subChild(int _index) { return true; };
virtual Node* getPrev() const { return NULL; };
virtual Node* getNext() const { return NULL; };
virtual const Bstr* getValue(int _index) const { return NULL; };
virtual bool setValue(const Bstr* _value, int _index, bool ifcopy = false) { return true; };
virtual bool addValue(const Bstr* _value, int _index, bool ifcopy = false) { return true; };
virtual bool subValue(int _index, bool ifdel = false) { return true;};
virtual void setPrev(Node* _prev) {};
virtual void setNext(Node* _next) {};
virtual void Virtual() = 0;
virtual void Normal() = 0;
virtual unsigned getSize() const = 0; //return all memory owned
virtual Node* split(Node* _father, int _index) = 0;
virtual Node* coalesce(Node* _father, int _index) = 0;
virtual void release() = 0; //release the node, only remain necessary information
virtual ~Node() {};
virtual void print(std::string s) = 0; //DEBUG(print the Node)
};
/*NOTICE(operations in release())
*To save memory, we can only remain store and flag(childs added for Leaf).
*However, in this way childs'pointers is ok to change, use Node** or Node*& is also nonsense
*because the pointer variable may die.
*Another way is only to release dynamic memory, and store thw whole, read the Bstr only to
*build. In this way nodes'pointer doesn't change, and operation is simplified, while memory
*is consumed a bit more. Because Bstrs consume the most memory, and memory-disk swapping is
*the most time-consuming thing, it seems to be a better way.
*WARN:when a node is in memory and not deleted, its basic content is always being! If nodes are
*really too many, this will cause disaster because we can't swap them out until tree is closed!
*To solve this problem, there should be two types of release-function: one to release Bstr, one
*to release the whole(pointer is invalid and rebuild problem)
*/
#endif

View File

@ -1,637 +0,0 @@
/*=============================================================================
# Filename: Storage.cpp
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:43
# Description: achieve functions in Storage.h
=============================================================================*/
#include "Storage.h"
using namespace std;
Storage::Storage()
{ //not use ../logs/, notice the location of program
cur_block_num = SET_BLOCK_NUM;
filepath = "";
freelist = NULL;
treefp = NULL;
minheap = NULL;
freemem = MAX_BUFFER_SIZE;
}
Storage::Storage(string& _filepath, string& _mode, unsigned* _height)
{
cur_block_num = SET_BLOCK_NUM; //initialize
this->filepath = _filepath;
if(_mode == string("build"))
treefp = fopen(_filepath.c_str(), "w+b");
else if(_mode == string("open"))
treefp = fopen(_filepath.c_str(), "r+b");
else
{
print(string("error in Storage: Invalid mode ") + _mode);
return;
}
if(treefp == NULL)
{
print(string("error in Storage: Open error ") + _filepath);
return;
}
this->treeheight = _height; //originally set to 0
this->freemem = MAX_BUFFER_SIZE;
this->freelist = new BlockInfo; //null-head
unsigned i, j, k; //j = (SuperNum-1)*BLOCK_SIZE
BlockInfo* bp;
if(_mode == "build")
{ //write basic information
i = 0;
fwrite(&i, sizeof(unsigned), 1, this->treefp); //height
fwrite(&i, sizeof(unsigned), 1, this->treefp); //rootnum
fwrite(&cur_block_num, sizeof(unsigned), 1, this->treefp); //current block num
fseek(this->treefp, BLOCK_SIZE, SEEK_SET);
bp = this->freelist;
j = cur_block_num / 8;
for(i = 0; i < j; ++i)
{
fputc(0, this->treefp);
for(k = 0; k < 8; ++k)
{
bp->next = new BlockInfo(i*8+k+1, NULL);
bp = bp->next;
}
}
}
else //_mode == "open"
{
//read basic information
int rootnum;
char c;
fread(this->treeheight, sizeof(unsigned), 1, this->treefp);
fread(&rootnum, sizeof(unsigned), 1, this->treefp);
fread(&cur_block_num, sizeof(unsigned), 1, this->treefp);
fseek(this->treefp, BLOCK_SIZE, SEEK_SET);
bp = this->freelist;
j = cur_block_num / 8;
for(i = 0; i < j; ++i)
{
c = fgetc(treefp);
for(k = 0; k < 8; ++k)
{
if((c & (1 << k)) == 0)
{
bp->next = new BlockInfo(i*8+7-k+1, NULL);
bp = bp->next;
}
}
}
fseek(treefp, Address(rootnum), SEEK_SET);
//treefp is now ahead of root-block
}
this->minheap = new Heap(HEAP_SIZE);
}
bool
Storage::preRead(Node*& _root, Node*& _leaves_head, Node*& _leaves_tail) //pre-read and build whole tree
{ //set root(in memory) and leaves_head
//TODO: false when exceed memory
_leaves_tail = _leaves_head = _root = NULL;
if(ftell(this->treefp) == 0) //root is null
{
return true;
}
unsigned next, store, j, pos = 0;
unsigned h = *this->treeheight;
Node* p;
//read root node
this->createNode(p);
_root = p;
fread(&next, sizeof(unsigned), 1, treefp);
//use stack to achieve
long address[h]; //current address
unsigned used[h]; //used child num
unsigned total[h]; //total child num
unsigned block[h]; //next block num
Node* nodes[h];
address[pos] = ftell(treefp);
used[pos] = 0;
total[pos]= p->getNum() + 1;
block[pos] = next;
nodes[pos] = p;
pos++;
Node* prev = NULL;
while(pos > 0)
{
j = pos - 1;
if(nodes[j]->isLeaf() || used[j] == total[j]) //LeafNode or ready IntlNode
{
if(nodes[j]->isLeaf())
{
if(prev != NULL)
{
prev->setNext(nodes[j]);
nodes[j]->setPrev(prev);
}
prev = nodes[j];
}
pos--;
continue;
}
fseek(this->treefp, address[j], SEEK_SET);
fread(&store, sizeof(unsigned), 1, treefp);
this->ReadAlign(block + j);
address[j] = ftell(treefp);
fseek(treefp, Address(store), SEEK_SET);
this->createNode(p);
nodes[j]->setChild(p, used[j]);
used[j]++;
fread(&next, sizeof(unsigned), 1, treefp);
address[pos] = ftell(treefp);
used[pos] = 0;
total[pos] = p->getNum() + 1;
block[pos] = next;
nodes[pos] = p;
pos++;
}
//set leaves and read root, which is always keeped in-mem
p = _root;
while(!p->isLeaf())
{
p = p->getChild(0);
}
_leaves_head = p;
p = _root;
while(!p->isLeaf())
{
p = p->getChild(p->getNum());
}
_leaves_tail = p;
int memory = 0;
this->readNode(_root, &memory);
this->request(memory);
return true;
}
long //8-byte in 64-bit machine
Storage::Address(unsigned _blocknum) const //BETTER: inline function
{
if(_blocknum == 0)
return 0;
else if(_blocknum > cur_block_num)
{
//print(string("error in Address: Invalid blocknum ") + Util::int2string(_blocknum));
return -1; //address should be non-negative
}
//NOTICE: here should explictly use long
return (long)(this->SuperNum+_blocknum-1) * (long)BLOCK_SIZE;
}
unsigned
Storage::Blocknum(long address) const
{
return (address/BLOCK_SIZE) + 1 - this->SuperNum;
}
unsigned
Storage::AllocBlock()
{
BlockInfo* p = this->freelist->next;
if(p == NULL)
{
for(unsigned i = 0; i < SET_BLOCK_INC; ++i)
{
cur_block_num++; //BETTER: check if > MAX_BLOCK_NUM
this->FreeBlock(cur_block_num);
}
p = this->freelist->next;
}
unsigned t = p->num;
this->freelist->next = p->next;
delete p;
return t;
}
void
Storage::FreeBlock(unsigned _blocknum)
{ //QUERY: head-sub and tail-add will be better?
BlockInfo* bp = new BlockInfo(_blocknum, this->freelist->next);
this->freelist->next = bp;
}
void
Storage::ReadAlign(unsigned* _next)
{
if(ftell(treefp) % BLOCK_SIZE == 0)
{
fseek(treefp, Address(*_next), SEEK_SET);
fread(_next, sizeof(unsigned), 1, treefp);
}
}
void
Storage::WriteAlign(unsigned* _curnum, bool& _SpecialBlock)
{
if(ftell(treefp) % BLOCK_SIZE == 0)
{
unsigned blocknum = this->AllocBlock();
fseek(treefp, Address(*_curnum), SEEK_SET);
if(_SpecialBlock)
{
fseek(treefp, 4, SEEK_CUR);
_SpecialBlock = false;
}
fwrite(&blocknum, sizeof(unsigned), 1, treefp);
fseek(treefp, Address(blocknum)+4, SEEK_SET);
*_curnum = blocknum;
}
}
bool
Storage::readNode(Node* _np, int* _request)
{
if(_np == NULL || _np->inMem())
return false; //can't read or needn't
fseek(treefp, Address(_np->getStore()), SEEK_SET);
bool flag = _np->isLeaf();
unsigned next;
unsigned i, num = _np->getNum();
Bstr bstr;
fseek(treefp, 4, SEEK_CUR);
fread(&next, sizeof(unsigned), 1, treefp);
//read data, use readBstr...
//fread(treefp, "%u", &num);
//_np->setNum(num);
if(flag)
*_request += Node::LEAF_SIZE;
else
*_request += Node::INTL_SIZE;
_np->Normal();
if(!flag)
fseek(treefp, 4 * (num + 1), SEEK_CUR);
for(i = 0; i < num; ++i)
{
this->readBstr(&bstr, &next);
_np->setKey(&bstr, i);
}
if(flag)
{
for(i = 0; i < num; ++i)
{
this->readBstr(&bstr, &next);
*_request += bstr.getLen();
_np->setValue(&bstr, i);
}
}
//_np->setFlag((_np->getFlag() & ~Node::NF_IV & ~Node::NF_ID) | Node::NF_IM);
//_np->delVirtual();
_np->delDirty();
//_np->setMem();
this->updateHeap(_np, _np->getRank(), false);
bstr.clear();
return true;
}
bool
Storage::createNode(Node*& _np) //cretae virtual nodes, not in-mem
{
/*
if(ftell(this->treefp)== 0) //null root
{
_np = NULL;
return false;
}
*/
unsigned t; //QUERY: maybe next-flag... will be better-storage?
bool flag = false; //IntlNode
fread(&t, sizeof(unsigned), 1, treefp);
if((t & Node::NF_IL) > 0) //WARN: according to setting
flag = true; //LeafNode
if(flag)
{
//this->request(sizeof(LeafNode));
_np = new LeafNode(true);
}
else
{
//this->request(sizeof(IntlNode));
_np = new IntlNode(true);
}
//fseek(treefp, -4, SEEK_CUR);
//_np->setFlag(_np->getFlag() | (t & Node::NF_RK));
//_np->setRank(t);
_np->setFlag(t);
_np->delDirty();
_np->delMem();
_np->setStore(Blocknum(ftell(treefp)-4));
return true;
}
bool
Storage::writeNode(Node* _np)
{
if(_np == NULL || !_np->inMem() || (_np->getRank() > 0 && !_np->isDirty()))
return false; //not need to write back
unsigned num = _np->getNum(), i;
bool flag = _np->isLeaf(), SpecialBlock = true;
/*
if(!flag)
{
for(i = 0; i <= num; ++i)
if(_np->getChild(i)->isDirty())
return false; //NOTICE: all childs must be clean!
}
*/
//to release original blocks
unsigned store = _np->getStore(), next;
//if first store is 0, meaning a new node
fseek(this->treefp, Address(store)+4, SEEK_SET);
fread(&next, sizeof(unsigned), 1, treefp);
while(store != 0)
{
this->FreeBlock(store);
store = next;
fseek(treefp, Address(store), SEEK_SET);
fread(&next, sizeof(unsigned), 1, treefp);
}
if(num == 0)
return true; //node is empty!
unsigned t;
//write Node information
unsigned blocknum = this->AllocBlock();
_np->setStore(blocknum);
long address = this->Address(blocknum);
fseek(this->treefp, address, SEEK_SET);
t = _np->getFlag();
fwrite(&t, sizeof(unsigned), 1, treefp); //DEBUG
fseek(treefp, 4, SEEK_CUR);
if(!flag)
{
for(i = 0; i <= num; ++i)
{
t = _np->getChild(i)->getStore();
fwrite(&t, sizeof(unsigned), 1, treefp); //DEBUG
this->WriteAlign(&blocknum, SpecialBlock);
}
}
for(i = 0; i < num; ++i)
this->writeBstr(_np->getKey(i), &blocknum, SpecialBlock);
if(flag)
{
for(i = 0; i < num; ++i)
this->writeBstr(_np->getValue(i), &blocknum, SpecialBlock);
}
fseek(treefp, Address(blocknum), SEEK_SET);
if(SpecialBlock)
fseek(treefp, 4, SEEK_CUR);
t = 0;
fwrite(&t, sizeof(unsigned), 1, treefp); //the end-block
//_np->setFlag(_np->getFlag() & ~Node::NF_ID);
_np->delDirty();
return true;
}
bool
Storage::readBstr(Bstr* _bp, unsigned* _next)
{
//long address;
unsigned len, i, j;
fread(&len, sizeof(unsigned), 1, this->treefp);
this->ReadAlign(_next);
//this->request(len);
char* s = (char*)malloc(len);
_bp->setLen(len);
for(i = 0; i + 4 < len; i += 4)
{
fread(s+i, sizeof(char), 4, treefp);
this->ReadAlign(_next);
}
while(i < len)
{
fread(s+i, sizeof(char), 1, treefp); //BETTER
i++;
}
j = len % 4;
if(j > 0)
j = 4 - j;
fseek(treefp, j, SEEK_CUR);
this->ReadAlign(_next);
_bp->setStr(s);
return true;
}
bool
Storage::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock)
{
unsigned i, j, len = _bp->getLen();
fwrite(&len, sizeof(unsigned), 1, treefp);
this->WriteAlign(_curnum, _SpecialBlock);
char* s = _bp->getStr();
for(i = 0; i + 4 < len; i += 4)
{
fwrite(s+i, sizeof(char), 4, treefp);
this->WriteAlign(_curnum, _SpecialBlock);
}
while(i < len)
{
fwrite(s+i, sizeof(char), 1, treefp);
i++;
}
j = len % 4;
if(j > 0)
j = 4 - j;
fseek(treefp, j, SEEK_CUR);
this->WriteAlign(_curnum, _SpecialBlock);
return true;
}
bool
Storage::writeTree(Node* _root) //write the whole tree back and close treefp
{
fseek(this->treefp, 0, SEEK_SET);
fwrite(this->treeheight, sizeof(unsigned), 1, treefp);
//delete all nonsense-node in heap, otherwise will waste storage permanently
Node* p;
while(1)
{ //all non-sense nodes will be in-head-area, due to minimal rank
p = minheap->getTop();
if(p == NULL) //heap is empty, only when root==NULL
break;
if(p->getRank() == 0) //indicate non-sense node
{
this->minheap->remove();
this->writeNode(p);
delete p;
}
else
break;
}
unsigned i, j, t;
//QUERY: another way to write all nodes back is to print out all nodes in heap
//but this method will cause no node in heap any more, while operations may be
//afetr tree-saving. Which method is better?
//write nodes recursively using stack, including root-num
if(_root != NULL)
{
Node* p = _root;
unsigned h = *this->treeheight, pos = 0;
Node* ns[h];
int ni[h];
ns[pos] = p;
ni[pos] = p->getNum();
pos++;
while(pos > 0)
{
j = pos - 1;
p = ns[j];
if(p->isLeaf() || ni[j] < 0) //leaf or all childs are ready
{
this->writeNode(p);
pos--;
continue;
}
ns[pos] = p->getChild(ni[j]);
ni[pos] = ns[pos]->getNum();
pos++;
ni[j]--;
}
t = _root->getStore();
}
else
t = 0;
fseek(this->treefp, 4, SEEK_SET);
fwrite(&t, sizeof(unsigned), 1, treefp); //write the root num
fwrite(&cur_block_num, sizeof(unsigned), 1, treefp);//write current blocks num
fseek(treefp, BLOCK_SIZE, SEEK_SET);
j = cur_block_num / 8; //(SuperNum-1)*BLOCK_SIZE;
//reset to 1 first
for(i = 0; i < j; ++i)
{
fputc(0xff, treefp);
}
char c;
BlockInfo* bp = this->freelist->next;
while(bp != NULL)
{
//if not-use then set 0, aligned to byte!
#ifdef DEBUG_KVSTORE
if(bp->num > cur_block_num)
{
printf("blocks num exceed, cur_block_num: %u\n", cur_block_num);
exit(1);
}
#endif
j = bp->num - 1;
i = j / 8;
j = 7 - j % 8;
fseek(treefp, BLOCK_SIZE+i, SEEK_SET);
c = fgetc(treefp);
fseek(treefp, -1, SEEK_CUR);
fputc(c & ~(1 << j), treefp);
bp = bp->next;
}
//fclose(this->treefp);
return true;
}
void
Storage::updateHeap(Node* _np, unsigned _rank, bool _inheap) const
{
if(_inheap) //already in heap, to modify
{
unsigned t = _np->getRank();
_np->setRank(_rank);
if(t < _rank)
this->minheap->modify(_np, false);
else if(t > _rank)
this->minheap->modify(_np, true);
else;
}
else //not in heap, to add
{
_np->setRank(_rank);
this->minheap->insert(_np);
}
}
void
Storage::request(int _needmem) //aligned to byte
{ //NOTICE: <0 means release
if(_needmem > 0 && this->freemem < (unsigned)_needmem)
if(!this->handler(_needmem - freemem)) //disaster in buffer memory
{
print(string("error in request: out of buffer-mem, now to exit"));
exit(1);
}
this->freemem -= _needmem;
}
bool
Storage::handler(unsigned _needmem) //>0
{
Node* p;
unsigned size;
//if(_needmem < SET_BUFFER_SIZE) //to recover to SET_BUFFER_SIZE buffer
// _needmem = SET_BUFFER_SIZE;
while(1)
{
p = this->minheap->getTop();
if(p == NULL)
return false; //can't satisfy or can't recover to SET_BUFFER_SIZE
this->minheap->remove();
size = p->getSize();
this->freemem += size;
this->writeNode(p);
if(p->getNum() > 0)
p->Virtual();
else
delete p; //non-sense node
if(_needmem > size)
_needmem -= size;
else
break;
}
return true;
}
Storage::~Storage()
{
//release heap and freelist...
#ifdef DEBUG_KVSTORE
printf("now to release the kvstore!\n");
#endif
BlockInfo* bp = this->freelist;
BlockInfo* next;
while(bp != NULL)
{
next = bp->next;
delete bp;
bp = next;
}
#ifdef DEBUG_KVSTORE
printf("already empty the freelist!\n");
#endif
delete this->minheap;
#ifdef DEBUG_KVSTORE
printf("already empty the buffer heap!\n");
#endif
fclose(this->treefp);
//#ifdef DEBUG_KVSTORE
// //NOTICE:there is more than one tree
// fclose(Util::debug_kvstore); //NULL is ok!
// Util::debug_kvstore = NULL;
//#endif
}
void
Storage::print(string s)
{
#ifdef DEBUG_KVSTORE
fputs(Util::showtime().c_str(), Util::debug_kvstore);
fputs("Class Storage\n", Util::debug_kvstore);
fputs("Message: ", Util::debug_kvstore);
fputs(s.c_str(), Util::debug_kvstore);
fputs("\n", Util::debug_kvstore);
#endif
}

View File

@ -1,74 +0,0 @@
/*=============================================================================
# Filename: Storage.h
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:43
# Description: swap between memory and disk, achieving system-like method
=============================================================================*/
#ifndef _KVSTORE_STORAGE_STORAGE_H
#define _KVSTORE_STORAGE_STORAGE_H
#include "../node/IntlNode.h"
#include "../node/LeafNode.h"
#include "../heap/Heap.h"
#include "file.h"
//It controls read, write, swap
class Storage
{
public:
static const unsigned BLOCK_SIZE = 1 << 16; //fixed size of disk-block
//there are 18 B+Tree indexes and one vstree index, so set 3G buffer size
//static const unsigned long long MAX_BUFFER_SIZE = 0xC0000000; //max buffer size
//static const unsigned long long MAX_BUFFER_SIZE = 0x1ffffffff; //max buffer size
static const unsigned long long MAX_BUFFER_SIZE = 0xffffffff; //max buffer size
//static const unsigned SET_BUFFER_SIZE = 1 << 30; //set buffer size
static const unsigned HEAP_SIZE = MAX_BUFFER_SIZE/Node::INTL_SIZE;
static const unsigned MAX_BLOCK_NUM = 1 << 24; //max block-num
//below two constants: must can be exactly divided by 8
static const unsigned SET_BLOCK_NUM = 1 << 8; //initial blocks num
static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc
static const unsigned SuperNum = MAX_BLOCK_NUM/(8*BLOCK_SIZE)+1;
//static const unsigned TRANSFER_CAPACITY = BLOCK_SIZE;
//enum ReadType { OVER = 0, EXPAND, NORMAL };
private:
unsigned cur_block_num;
std::string filepath;
unsigned* treeheight;
BlockInfo* freelist;
FILE* treefp; //file: tree nodes
Heap* minheap; //heap of Nodes's pointer, sorted in NF_RK
//NOTICE: freemem's type is long long here, due to large memory in server.
//However, needmem in handler() and request() is ok to be int/unsigned.
//Because the bstr' size is controlled, so is the node.
unsigned long long freemem; //free memory to use, non-negative
//unsigned long long time; //QUERY(achieving an old-swap startegy?)
long Address(unsigned _blocknum) const;
unsigned Blocknum(long address) const;
unsigned AllocBlock();
void FreeBlock(unsigned _blocknum);
void ReadAlign(unsigned* _next);
void WriteAlign(unsigned* _next, bool& _SpecialBlock);
public:
Storage();
Storage(std::string& _filepath, std::string& _mode, unsigned* _height);//create a fixed-size file or open an existence
bool preRead(Node*& _root, Node*& _leaves_head, Node*& _leaves_tail); //read and build all nodes, only root in memory
bool readNode(Node* _np, int* _request); //read, if virtual
bool createNode(Node*& _np); //use fp to create a new node
//NOTICE(if children and child not exist, build children's Nodes)
bool writeNode(Node* _np);
bool readBstr(Bstr* _bp, unsigned* _next);
bool writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock);
bool writeTree(Node* _np);
void updateHeap(Node* _np, unsigned _rank, bool _inheap) const;
void request(int _needmem); //deal with memory request
bool handler(unsigned _needmem); //swap some nodes out
//bool update(); //update InMem Node's rank, with clock
~Storage();
void print(std::string s); //DEBUG
};
#endif

View File

@ -1,90 +0,0 @@
/*=============================================================================
# Filename: file.h
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:42
# Description: disk file memlayout
=============================================================================*/
#ifndef _KVSTORE_STORAGE_FILE_H
#define _KVSTORE_STORAGE_FILE_H
#include "../../Util/Util.h"
/******** manage the disk-file as Blocks-list ********/
/* All blocks of given file are viewed as an array, extended dynamicly
* The first block(0, super-block) includes the information
* about the whole file(for example, the root Node's Block),
* especially a bitset use[MAXBN] which is used to recognize which
* block is in use
* In practical, the normal block numbers from 1 to MAXBN-1,
* so 0 can be used as division.
* (normal block is made of header and data)
* When file opened, program must read this bitset and create an
* freelist(several 10Ms memory),
* and remember to write back the bitset when closing.
* We store each Tree-Node as a unit, which may contain several
* blocks, not requiring continuous.
* While the tree is not closed, better to keep root Node in
* memory all the time.
*/
/*
struct Header
{ //this is the header information at the
//beginning of each block, then the data
//
//If this the first block of a node, we must also store
* the necessary information about the node. For example,
* a bit indicates whether a leaf-node, deciding how it
* should be read. The first block's prev and the
//final block's next should be 0
//blockaddr_t prev;
blockaddr_t next; //WARN(maybe larger type!)
//unsigned short end; //valid data:0~end
};
struct SuperBlock //SuperNum blocks, numbered 0
{
unsigned height;
unsigned rootnum; //use a whole block, may store other information
//for example, nodes's num
char use[BNWD]; //exactly SuperNum-1 blocks
};
//numbered from 1 to MAX_BLOCK_NUM
struct Node //may use several blocks, not must continuously
{
unsigned flag; //only in first block, special-block
unsigned next; //each real data-block, 0 means the end
information:
unsigned num;
unsigned childs[]; //only in IntlNodes
Bstr keys[];
Bstr values[]; //only in LeafNodes
};
*/
//When stored in disk, every Node* pointer should be changed to block-address
//(a bit indicates whether a leaf!),
//and char* should be changed to the real string.
class BlockInfo
{
public:
unsigned num;
BlockInfo* next;
BlockInfo()
{
num = 0;
next = NULL;
}
BlockInfo(unsigned _num, BlockInfo* _bp)
{
num = _num;
next = _bp;
}
};
#endif

View File

@ -1,689 +0,0 @@
/*=============================================================================
# Filename: Tree.cpp
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:45
# Description: achieve functions in Tree.h
=============================================================================*/
#include "Tree.h"
using namespace std;
//tree's operations should be atom(if read nodes)
//sum the request and send to Storage at last
//ensure that all nodes operated are in memory
int request = 0;
Tree::Tree()
{
height = 0;
mode = "";
root = NULL;
leaves_head = NULL;
leaves_tail = NULL;
TSM = NULL;
storepath = "";
filename = "";
transfer_size[0] = transfer_size[1] = transfer_size[2] = 0;
this->stream = NULL;
}
Tree::Tree(const string& _storepath, const string& _filename, const char* _mode)
{
storepath = _storepath;
filename = _filename;
this->height = 0;
this->mode = string(_mode);
string filepath = this->getFilePath();
TSM = new Storage(filepath, this->mode, &this->height);
if(this->mode == "open")
this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail);
else
this->root = NULL;
this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE));
this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE));
this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE));
this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M
this->stream = NULL;
}
string
Tree::getFilePath()
{
return storepath+"/"+filename;
}
void //WARN: not check _str and _len
Tree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index)
{
if(_index > 2)
return;
/*
if(_str == NULL || _len == 0)
{
printf("error in CopyToTransfer: empty string\n");
return;
}
*/
//unsigned length = _bstr->getLen();
unsigned length = _len;
if(length + 1 > this->transfer_size[_index])
{
transfer[_index].release();
transfer[_index].setStr((char*)malloc(length+1));
this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0
}
memcpy(this->transfer[_index].getStr(), _str, length);
this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore
this->transfer[_index].setLen(length);
}
unsigned
Tree::getHeight() const
{
return this->height;
}
void
Tree::setHeight(unsigned _h)
{
this->height = _h;
}
Node*
Tree::getRoot() const
{
return this->root;
}
void
Tree::prepare(Node* _np) const
{
bool flag = _np->inMem();
if(!flag)
this->TSM->readNode(_np, &request); //readNode deal with request
}
bool
Tree::search(const char* _str1, unsigned _len1, char*& _str2, int& _len2)
{
const Bstr* value = NULL;
if(_str1 == NULL || _len1 == 0)
{
printf("error in Tree-search: empty string\n");
return false;
}
this->CopyToTransfer(_str1, _len1, 1);
bool ret = this->search(&transfer[1], value);
if(ret)
{
_str2 = value->getStr();
_len2 = value->getLen();
}
return ret;
}
bool
Tree::search(const Bstr* _key, const Bstr*& _value)
{
request = 0;
Bstr bstr = *_key; //not to modify its memory
int store;
Node* ret = this->find(_key, &store, false);
if(ret == NULL || store == -1 || bstr != *(ret->getKey(store))) //tree is empty or not found
{
bstr.clear();
return false;
}
const Bstr* val = ret->getValue(store);
this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request
_value = &transfer[0];
this->TSM->request(request);
bstr.clear();
return true;
}
bool
Tree::insert(const char* _str1, unsigned _len1, const char* _str2, unsigned _len2)
{
if(_str1 == NULL || _len1 == 0)
{
printf("error in Tree-insert: empty string\n");
return false;
}
this->CopyToTransfer(_str1, _len1, 1);
this->CopyToTransfer(_str2, _len2, 2); //not check value
bool ret = this->insert(&transfer[1], &transfer[2]);
return ret;
}
bool
Tree::insert(const Bstr* _key, const Bstr* _value)
{
request = 0;
Node* ret;
if(this->root == NULL) //tree is empty
{
leaves_tail = leaves_head = root = new LeafNode;
request += Node::LEAF_SIZE;
this->height = 1;
root->setHeight(1); //add to heap later
}
//this->prepare(this->root); //root must be in-mem
if(root->getNum() == Node::MAX_KEY_NUM)
{
Node* father = new IntlNode;
request += Node::INTL_SIZE;
father->addChild(root, 0);
ret = root->split(father, 0);
if(ret->isLeaf() && ret->getNext() == NULL)
this->leaves_tail = ret;
if(ret->isLeaf())
request += Node::LEAF_SIZE;
else
request += Node::INTL_SIZE;
this->height++; //height rises only when root splits
//WARN: height area in Node: 4 bit!
father->setHeight(this->height); //add to heap later
this->TSM->updateHeap(ret, ret->getRank(), false);
this->root = father;
}
Node* p = this->root;
Node* q;
int i, j;
Bstr bstr = *_key;
while(!p->isLeaf())
{
//j = p->getNum();
//for(i = 0; i < j; ++i)
//if(bstr < *(p->getKey(i)))
//break;
//NOTICE: using binary search is better here
i = p->searchKey_less(bstr);
q = p->getChild(i);
this->prepare(q);
if(q->getNum() == Node::MAX_KEY_NUM)
{
ret = q->split(p, i);
if(ret->isLeaf() && ret->getNext() == NULL)
this->leaves_tail = ret;
if(ret->isLeaf())
request += Node::LEAF_SIZE;
else
request += Node::INTL_SIZE;
//BETTER: in loop may update multiple times
this->TSM->updateHeap(ret, ret->getRank(), false);
this->TSM->updateHeap(q, q->getRank(), true);
this->TSM->updateHeap(p, p->getRank(), true);
if(bstr < *(p->getKey(i)))
p = q;
else
p = ret;
}
else
{
p->setDirty();
this->TSM->updateHeap(p, p->getRank(), true);
p = q;
}
}
//j = p->getNum();
//for(i = 0; i < j; ++i)
//if(bstr < *(p->getKey(i)))
//break;
i = p->searchKey_less(bstr);
//insert existing key is ok, but not inserted in
//however, the tree-shape may change due to possible split in former code
bool ifexist = false;
if(i > 0 && bstr == *(p->getKey(i-1)))
ifexist = true;
else
{
p->addKey(_key, i, true);
p->addValue(_value, i, true);
p->addNum();
request += (_key->getLen() + _value->getLen());
p->setDirty();
this->TSM->updateHeap(p, p->getRank(), true);
//_key->clear();
//_value->clear();
}
this->TSM->request(request);
bstr.clear(); //NOTICE: must be cleared!
return !ifexist; //QUERY(which case:return false)
}
bool
Tree::modify(const char* _str1, unsigned _len1, const char* _str2, unsigned _len2)
{
if(_str1 == NULL || _len1 == 0)
{
printf("error in Tree-modify: empty string\n");
return false;
}
this->CopyToTransfer(_str1, _len1, 1);
this->CopyToTransfer(_str2, _len2, 2); //not check value
bool ret = this->modify(&transfer[1], &transfer[2]);
return ret;
}
bool
Tree::modify(const Bstr* _key, const Bstr* _value)
{
request = 0;
Bstr bstr = *_key;
int store;
Node* ret = this->find(_key, &store, true);
if(ret == NULL || store == -1 || bstr != *(ret->getKey(store))) //tree is empty or not found
{
bstr.clear();
return false;
}
unsigned len = ret->getValue(store)->getLen();
ret->setValue(_value, store, true);
request += (_value->getLen()-len);
//_value->clear();
ret->setDirty();
this->TSM->request(request);
bstr.clear();
return true;
}
//this function is useful for search and modify, and range-query
Node* //return the first key's position that >= *_key
Tree::find(const Bstr* _key, int* _store, bool ifmodify) const
{ //to assign value for this->bstr, function shouldn't be const!
if(this->root == NULL)
return NULL; //Tree Is Empty
Node* p = root;
int i, j;
Bstr bstr = *_key; //local Bstr: multiple delete
while(!p->isLeaf())
{
if(ifmodify)
p->setDirty();
//j = p->getNum();
//for(i = 0; i < j; ++i) //BETTER(Binary-Search)
//if(bstr < *(p->getKey(i)))
//break;
i = p->searchKey_less(bstr);
p = p->getChild(i);
this->prepare(p);
}
j = p->getNum();
//for(i = 0; i < j; ++i)
//if(bstr <= *(p->getKey(i)))
//break;
i = p->searchKey_lessEqual(bstr);
if(i == j)
*_store = -1; //Not Found
else
*_store = i;
bstr.clear();
return p;
}
/*
Node*
Tree::find(unsigned _len, const char* _str, int* store) const
{
}
*/
bool
Tree::remove(const char* _str, unsigned _len)
{
if(_str == NULL || _len == 0)
{
printf("error in Tree-remove: empty string\n");
return false;
}
this->CopyToTransfer(_str, _len, 1);
bool ret = this->remove(&transfer[1]);
return ret;
}
bool //BETTER: if not found, the road are also dirty! find first?
Tree::remove(const Bstr* _key)
{
request = 0;
Node* ret;
if(this->root == NULL) //tree is empty
return false;
Node* p = this->root;
Node* q;
int i, j;
Bstr bstr = *_key;
while(!p->isLeaf())
{
j = p->getNum();
//for(i = 0; i < j; ++i)
//if(bstr < *(p->getKey(i)))
//break;
i = p->searchKey_less(bstr);
q = p->getChild(i);
this->prepare(q);
if(q->getNum() < Node::MIN_CHILD_NUM) //==MIN_KEY_NUM
{
if(i > 0)
this->prepare(p->getChild(i-1));
if(i < j)
this->prepare(p->getChild(i+1));
ret = q->coalesce(p, i);
if(ret != NULL)
this->TSM->updateHeap(ret, 0, true);//non-sense node
this->TSM->updateHeap(q, q->getRank(), true);
if(q->isLeaf())
{
if(q->getPrev() == NULL)
this->leaves_head = q;
if(q->getNext() == NULL)
this->leaves_tail = q;
}
if(p->getNum() == 0) //root shrinks
{
//this->leaves_head = q;
this->root = q;
this->TSM->updateHeap(p, 0, true); //instead of delete p
this->height--;
}
}
else
p->setDirty();
this->TSM->updateHeap(p, p->getRank(), true);
p = q;
}
bool flag = false;
//j = p->getNum(); //LeafNode(maybe root)
//for(i = 0; i < j; ++i)
// if(bstr == *(p->getKey(i)))
// {
// request -= p->getKey(i)->getLen();
// request -= p->getValue(i)->getLen();
// p->subKey(i, true); //to release
// p->subValue(i, true); //to release
// p->subNum();
// if(p->getNum() == 0) //root leaf 0 key
// {
// this->root = NULL;
// this->leaves_head = NULL;
// this->leaves_tail = NULL;
// this->height = 0;
// this->TSM->updateHeap(p, 0, true); //instead of delete p
// }
// p->setDirty();
// flag = true;
// break;
// }
i = p->searchKey_equal(bstr);
request -= p->getKey(i)->getLen();
request -= p->getValue(i)->getLen();
p->subKey(i, true); //to release
p->subValue(i, true); //to release
p->subNum();
if(p->getNum() == 0) //root leaf 0 key
{
this->root = NULL;
this->leaves_head = NULL;
this->leaves_tail = NULL;
this->height = 0;
this->TSM->updateHeap(p, 0, true); //instead of delete p
}
p->setDirty();
flag = true;
this->TSM->request(request);
bstr.clear();
return flag; //i == j, not found
}
const Bstr*
Tree::getRangeValue()
{
if(this->stream == NULL)
{
fprintf(stderr, "Tree::getRangeValue(): no results now!\n");
return NULL;
}
if(this->stream->isEnd())
{
fprintf(stderr, "Tree::getRangeValue(): read till end now!\n");
return NULL;
}
//NOTICE:this is one record, and donot free the memory!
//NOTICE:Bstr[] but only one element, used as Bstr*
return this->stream->read();
}
void
Tree::resetStream()
{
if(this->stream == NULL)
{
fprintf(stderr, "no results now!\n");
return;
}
this->stream->setEnd();
}
bool //special case: not exist, one-edge-case
Tree::range_query(const Bstr* _key1, const Bstr* _key2)
{ //the range is: *_key1 <= x < *_key2
//if(_key1 == NULL && _key2 == NULL)
//return false;
//ok to search one-edge, requiring only one be NULL
//find and write value
int store1, store2;
Node *p1, *p2;
if(_key1 != NULL)
{
request = 0;
p1 = this->find(_key1, &store1, false);
if(p1 == NULL || store1 == -1)
return false; //no element
this->TSM->request(request);
}
else
{
p1 = this->leaves_head;
store1 = 0;
}
if(_key2 != NULL)
{ //QUERY: another strategy is to getnext and compare every time to tell end
request = 0;
p2 = this->find(_key2, &store2, false);
if(p2 == NULL)
return false;
else if(store2 == -1)
store2 = p2->getNum();
else if(store2 == 0)
{
p2 = p2->getPrev();
if(p2 == NULL)
return false; //no element
store2 = p2->getNum();
}
this->TSM->request(request);
}
else
{
p2 = this->leaves_tail;
store2 = p2->getNum();
}
Node* p = p1;
unsigned i, l, r;
//get the num of answers first, not need to prepare the node
unsigned ansNum = 0;
while(true)
{
//request = 0;
//this->prepare(p);
if(p == p1)
l = store1;
else
l = 0;
if(p == p2)
r = store2;
else
r = p->getNum();
ansNum += (r - l);
//this->TSM->request(request);
if(p != p2)
p = p->getNext();
else
break;
}
if(this->stream != NULL)
{
delete this->stream;
this->stream = NULL;
}
vector<int> keys;
vector<bool> desc;
this->stream = new Stream(keys, desc, ansNum, 1, false);
p = p1;
while(1)
{
request = 0;
this->prepare(p);
if(p == p1)
l = store1;
else
l = 0;
if(p == p2)
r = store2;
else
r = p->getNum();
for(i = l; i < r; ++i)
{
//NOTICE:Bstr* in an array, used as Bstr[]
this->stream->write(p->getValue(i));
}
this->TSM->request(request);
if(p != p2)
p = p->getNext();
else
break;
}
this->stream->setEnd();
return true;
}
bool
Tree::save() //save the whole tree to disk
{
#ifdef DEBUG_KVSTORE
printf("now to save tree!\n");
#endif
if(TSM->writeTree(this->root))
return true;
else
return false;
}
void
Tree::release(Node* _np) const
{
if(_np == NULL) return;
if(_np->isLeaf())
{
delete _np;
return;
}
int cnt = _np->getNum();
for(; cnt >= 0; --cnt)
release(_np->getChild(cnt));
delete _np;
}
Tree::~Tree()
{
delete this->stream; //maybe NULL
delete TSM;
#ifdef DEBUG_KVSTORE
printf("already empty the buffer, now to delete all nodes in tree!\n");
#endif
//recursively delete each Node
release(root);
}
void
Tree::print(string s)
{
#ifdef DEBUG_KVSTORE
fputs(Util::showtime().c_str(), Util::debug_kvstore);
fputs("Class Tree\n", Util::debug_kvstore);
fputs("Message: ", Util::debug_kvstore);
fputs(s.c_str(), Util::debug_kvstore);
fputs("\n", Util::debug_kvstore);
fprintf(Util::debug_kvstore, "Height: %d\n", this->height);
if(s == "tree" || s == "TREE")
{
if(this->root == NULL)
{
fputs("Null Tree\n", Util::debug_kvstore);
return;
}
Node** ns = new Node*[this->height];
int* ni = new int[this->height];
Node* np;
int i, pos = 0;
ns[pos] = this->root;
ni[pos] = this->root->getNum();
pos++;
while(pos > 0)
{
np = ns[pos-1];
i = ni[pos-1];
this->prepare(np);
if(np->isLeaf() || i < 0) //LeafNode or ready IntlNode
{ //child-num ranges: 0~num
if(s == "tree")
np->print("node");
else
np->print("NODE"); //print full node-information
pos--;
continue;
}
else
{
ns[pos] = np->getChild(i);
ni[pos-1]--;
ni[pos] = ns[pos]->getNum();
pos++;
}
}
delete[] ns;
delete[] ni;
}
else if(s == "LEAVES" || s == "leaves")
{
Node* np;
for(np = this->leaves_head; np != NULL; np = np->getNext())
{
this->prepare(np);
if(s == "leaves")
np->print("node");
else
np->print("NODE");
}
}
else if(s == "check tree")
{
//check the tree, if satisfy B+ definition
//TODO
}
else;
#endif
}

View File

@ -1,79 +0,0 @@
/*=============================================================================
# Filename: Tree.h
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-04-26 16:44
# Description: struct and interface of the B+ tree
=============================================================================*/
#ifndef _KVSTORE_TREE_TREE_H
#define _KVSTORE_TREE_TREE_H
#include "../../Util/Util.h"
#include "../../Util/Stream.h"
#include "../node/Node.h"
#include "../node/IntlNode.h"
#include "../node/LeafNode.h"
#include "../storage/Storage.h"
class Tree
{
private:
unsigned int height; //0 indicates an empty tree
Node* root;
Node* leaves_head; //the head of LeafNode-list
Node* leaves_tail; //the tail of LeafNode-list
std::string mode; //BETTER(to use enum)
Storage* TSM; //Tree-Storage-Manage
//BETTER:multiple stream maybe needed:)
Stream* stream;
//always alloc one more byte than length, then user can add a '\0'
//to get a real string, instead of new and copy
//other operations will be harmful to search, so store value in
//transfer temporally, while length adjusted.
//TODO: in multi-user case, multiple-search will cause problem,
//so lock is a must. Add lock to transfer is better than to add
//lock to every key/value. However, modify requires a lock for a
//key/value, and multiple search for different keys are ok!!!
Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char*
unsigned transfer_size[3];
std::string storepath;
std::string filename; //ok for user to change
/* some private functions */
std::string getFilePath(); //in UNIX system
void CopyToTransfer(const char* _str, unsigned _len, unsigned _index);
void release(Node* _np) const;
void prepare(Node* _np) const;
public:
Tree(); //always need to initial transfer
Tree(const std::string& _storepath, const std::string& _filename, const char* _mode);
unsigned int getHeight() const;
void setHeight(unsigned _h);
Node* getRoot() const;
//void setRoot(Node* _root);
//insert, search, remove, set
bool search(const char* _str1, unsigned _len1, char*& _str2, int& _len2);
bool search(const Bstr* _key1, const Bstr*& _value);
bool insert(const Bstr* _key, const Bstr* _value);
bool insert(const char* _str1, unsigned _len1, const char* _str2, unsigned _len2);
bool modify(const Bstr* _key, const Bstr* _value);
bool modify(const char* _str1, unsigned _len1, const char* _str2, unsigned _len2);
Node* find(const Bstr* _key, int* store, bool ifmodify) const;
//Node* find(unsigned _len, const char* _str, int* store) const;
bool remove(const Bstr* _key);
bool remove(const char* _str, unsigned _len);
const Bstr* getRangeValue();
void resetStream();
bool range_query(const Bstr* _key1, const Bstr* _key2);
bool save();
~Tree();
void print(std::string s); //DEBUG(print the tree)
};
//NOTICE: need to save tree manually before delete, otherwise will cause problem.
//(problem range between two extremes: not-modified, totally-modified)
//After saved, it's ok to continue operations on tree!
#endif

675
LICENSE
View File

@ -1,675 +0,0 @@
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
{one line to give the program's name and a brief idea of what it does.}
Copyright (C) {year} {name of author}
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
{project} Copyright (C) {year} {fullname}
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<http://www.gnu.org/licenses/>.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
<http://www.gnu.org/philosophy/why-not-lgpl.html>.

File diff suppressed because it is too large Load Diff

View File

@ -1,53 +0,0 @@
/*=============================================================================
# Filename: gload.cpp
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-10-24 19:27
# Description: firstly written by liyouhuan, modified by zengli
TODO: add -h/--help for help message
=============================================================================*/
#include "../Util/Util.h"
#include "../Database/Database.h"
using namespace std;
//[0]./gload [1]data_folder_path [2]rdf_file_path
int
main(int argc, char * argv[])
{
//chdir(dirname(argv[0]));
Util util;
//system("clock");
cout << "gload..." << endl;
{
cout << "argc: " << argc << "\t";
cout << "DB_store:" << argv[1] << "\t";
cout << "RDF_data: " << argv[2] << "\t";
cout << endl;
}
string _db_path = string(argv[1]);
//if(_db_path[0] != '/' && _db_path[0] != '~') //using relative path
//{
//_db_path = string("../") + _db_path;
//}
string _rdf = string(argv[2]);
//if(_rdf[0] != '/' && _rdf[0] != '~') //using relative path
//{
//_rdf = string("../") + _rdf;
//}
Database _db(_db_path);
bool flag = _db.build(_rdf);
if (flag)
{
cout << "import RDF file to database done." << endl;
}
else
{
cout << "import RDF file to database failed." << endl;
}
//system("clock");
return 0;
}

View File

@ -1,256 +0,0 @@
/*=============================================================================
# Filename: gquery.cpp
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-10-20 12:23
# Description: query a database, there are several ways to use this program:
1. ./gquery print the help message
2. ./gquery --help simplified as -h, equal to 1
3. ./gquery db_folder query_path load query from given path fro given database
4. ./gquery db_folder load the given database and open console
=============================================================================*/
#include "../Database/Database.h"
#include "../Util/Util.h"
using namespace std;
//WARN:cannot support soft links!
void
help()
{
printf("\
/*=============================================================================\n\
# Filename: gquery.cpp\n\
# Author: Bookug Lobert\n\
# Mail: 1181955272@qq.com\n\
# Last Modified: 2015-10-20 12:23\n\
# Description: query a database, there are several ways to use this program:\n\
1. ./gquery print the help message\n\
2. ./gquery --help simplified as -h, equal to 1\n\
3. ./gquery db_folder query_path load query from given path fro given database\n\
4. ./gquery db_folder load the given database and open console\n\
=============================================================================*/\n");
}
int
main(int argc, char * argv[])
{
//chdir(dirname(argv[0]));
Util util;
if(argc == 1 || strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0)
{
help();
return 0;
}
cout << "gquery..." << endl;
if(argc < 2)
{
cerr << "error: lack of DB_store to be queried" << endl;
return 0;
}
{
cout << "argc: " << argc << "\t";
cout << "DB_store:" << argv[1] << "\t";
cout << endl;
}
string db_folder = string(argv[1]);
//if(db_folder[0] != '/' && db_folder[0] != '~') //using relative path
//{
//db_folder = string("../") + db_folder;
//}
Database _db(db_folder);
_db.load();
cout << "finish loading" << endl;
// read query from file.
if (argc >= 3)
{
// ifstream fin(argv[2]);
// if(!fin)
// {
// cout << "can not open: " << buf << endl;
// return 0;
// }
//
// memset(buf, 0, sizeof(buf));
// stringstream _ss;
// while(!fin.eof()){
// fin.getline(buf, 9999);
// _ss << buf << "\n";
// }
// fin.close();
//
// string query = _ss.str();
string query = string(argv[2]);
//if(query[0] != '/' && query[0] != '~') //using relative path
//{
//query = string("../") + query;
//}
query = Util::getQueryFromFile(query.c_str());
if (query.empty())
{
return 0;
}
printf("query is:\n%s\n\n", query.c_str());
ResultSet _rs;
_db.query(query, _rs, stdout);
if (argc >= 4)
{
Util::save_to_file(argv[3], _rs.to_str());
}
return 0;
}
// read query file path from terminal.
// BETTER: sighandler ctrl+C/D/Z
string query;
//char resolved_path[PATH_MAX+1];
#ifdef READLINE_ON
char *buf, prompt[] = "gsql>";
//const int commands_num = 3;
//char commands[][20] = {"help", "quit", "sparql"};
printf("Type `help` for information of all commands\n");
printf("Type `help command_t` for detail of command_t\n");
rl_bind_key('\t', rl_complete);
while(true)
{
buf = readline(prompt);
if(buf == NULL)
continue;
else
add_history(buf);
if(strncmp(buf, "help", 4) == 0)
{
if(strcmp(buf, "help") == 0)
{
//print commands message
printf("help - print commands message\n");
printf("quit - quit the console normally\n");
printf("sparql - load query from the second argument\n");
}
else
{
//TODO: help for a given command
}
continue;
}
else if(strcmp(buf, "quit") == 0)
break;
else if(strncmp(buf, "sparql", 6) != 0)
{
printf("unknown commands\n");
continue;
}
//TODO: sparql + string, not only path
string query_file;
//BETTER:build a parser for this console
bool ifredirect = false;
char* rp = buf;
int pos = strlen(buf) - 1;
while(pos > -1)
{
if(*(rp+pos) == '>')
{
ifredirect = true;
break;
}
pos--;
}
rp += pos;
char* p = buf + strlen(buf) - 1;
FILE* fp = stdout; ///default to output on screen
if(ifredirect)
{
char* tp = p;
while(*tp == ' ' || *tp == '\t')
tp--;
*(tp+1) = '\0';
tp = rp + 1;
while(*tp == ' ' || *tp == '\t')
tp++;
fp = fopen(tp, "w"); //NOTICE:not judge here!
p = rp - 1; //NOTICE: all separated with ' ' or '\t'
}
while(*p == ' ' || *p == '\t') //set the end of path
p--;
*(p+1) = '\0';
p = buf + 6;
while(*p == ' ' || *p == '\t') //acquire the start of path
p++;
//TODO: support the soft links(or hard links)
//there are also readlink and getcwd functions for help
//http://linux.die.net/man/2/readlink
//NOTICE:getcwd and realpath cannot acquire the real path of file
//in the same directory and the program is executing when the
//system starts running
//NOTICE: use realpath(p, NULL) is ok, but need to free the memory
char* q = realpath(p, NULL); //QUERY:still not work for soft links
#ifdef DEBUG_PRECISE
printf("%s\n", p);
#endif
if(q == NULL)
{
printf("invalid path!\n");
free(q);
free(buf);
continue;
}
else
printf("%s\n", q);
//query = getQueryFromFile(p);
query = Util::getQueryFromFile(q);
if(query.empty())
{
free(q);
//free(resolved_path);
free(buf);
if(ifredirect)
fclose(fp);
continue;
}
printf("query is:\n");
printf("%s\n\n", query.c_str());
ResultSet _rs;
_db.query(query, _rs, fp);
//test...
//string answer_file = query_file+".out";
//Util::save_to_file(answer_file.c_str(), _rs.to_str());
free(q);
//free(resolved_path);
free(buf);
if(ifredirect)
fclose(fp);
#ifdef DEBUG_PRECISE
printf("after buf freed!\n");
#endif
}
//#else //DEBUG:this not work!
// while(true)
// {
// cout << "please input query file path:" << endl;
// string query_file;
// cin >> query_file;
// //char* q = realpath(query_file.c_str(), NULL);
// string query = getQueryFromFile(query_file.c_str());
// if(query.empty())
// {
// //free(resolved_path);
// continue;
// }
// cout << "query is:" << endl;
// cout << query << endl << endl;
// ResultSet _rs;
// _db.query(query, _rs, stdout);
// //free(resolved_path);
// }
#endif // READLINE_ON
return 0;
}

View File

@ -1,33 +0,0 @@
/*=============================================================================
# Filename: gserver.cpp
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2016-02-26 19:15
# Description: first written by hanshuo, modified by zengli
=============================================================================*/
#include "../Server/Server.h"
#include "../Util/Util.h"
int main(int argc, char * argv[])
{
//chdir(dirname(argv[0]));
Util util;
unsigned short port = Socket::DEFAULT_CONNECT_PORT;
if (argc > 1)
{
std::stringstream ss(argv[1]);
ss >> port;
}
Server server(port);
std::cout << "port=" << port << std::endl; //debug
server.createConnection();
server.listen();
return 0;
}

111
Makefile Normal file
View File

@ -0,0 +1,111 @@
objdir=objs/
objfile= $(objdir)Bstr.o $(objdir)Database.o $(objdir)KVstore.o $(objdir)Btree.o \
$(objdir)CBtreeFunc.o $(objdir)SPARQLquery.o $(objdir)BasicQuery.o $(objdir)ResultSet.o \
$(objdir)SigEntry.o $(objdir)Signature.o $(objdir)Triple.o $(objdir)util.o $(objdir)VSTree.o \
$(objdir)IDList.o $(objdir)EntryBuffer.o $(objdir)LRUCache.o $(objdir)VNode.o $(objdir)DBparser.o \
$(objdir)SparqlParser.o $(objdir)SparqlLexer.o $(objdir)Operation.o $(objdir)Socket.o \
$(objdir)Server.o $(objdir)Client.o \
$(objdir)TurtleParser.o $(objdir)RDFParser.o
inc=-I./tools/libantlr3c-3.4/ -I./tools/libantlr3c-3.4/include
all: gload gquery gserver gclient
gload: $(objdir)gload.o $(objfile)
g++ -o gload $(objdir)gload.o $(objfile) lib/libantlr.a
gquery: $(objdir)gquery.o $(objfile)
g++ -o gquery $(objdir)gquery.o $(objfile) lib/libantlr.a
gserver: $(objdir)gserver.o $(objfile)
g++ -o gserver $(objdir)gserver.o $(objfile) lib/libantlr.a
gclient: $(objdir)gclient.o $(objfile)
g++ -o gclient $(objdir)gclient.o $(objfile) lib/libantlr.a
$(objdir)gload.o: main/gload.cpp
g++ -c main/gload.cpp $(inc) -L./lib lib/libantlr.a -o $(objdir)gload.o
$(objdir)gquery.o: main/gquery.cpp
g++ -c main/gquery.cpp $(inc) -o $(objdir)gquery.o
$(objdir)gserver.o: main/gserver.cpp
g++ -c main/gserver.cpp $(inc) -o $(objdir)gserver.o
$(objdir)gclient.o: main/gclient.cpp
g++ -c main/gclient.cpp $(inc) -o $(objdir)gclient.o
$(objdir)Bstr.o: Bstr/Bstr.cpp Bstr/Bstr.h
g++ -c Bstr/Bstr.cpp $(inc) -o $(objdir)Bstr.o
$(objdir)Database.o: Database/Database.cpp Database/Database.h $(objdir)IDList.o $(objdir)ResultSet.o $(objdir)SPARQLquery.o \
$(objdir)BasicQuery.o \
$(objdir)Triple.o $(objdir)SigEntry.o $(objdir)KVstore.o $(objdir)VSTree.o $(objdir)DBparser.o $(objdir)util.o \
$(objdir)RDFParser.o
g++ -c Database/Database.cpp $(inc) -o $(objdir)Database.o
$(objdir)KVstore.o: KVstore/KVstore.cpp KVstore/KVstore.h $(objdir)Btree.o
g++ -c KVstore/KVstore.cpp $(inc) -o $(objdir)KVstore.o
$(objdir)Btree.o: KVstore/Btree.cpp KVstore/Btree.h $(objdir)CBtreeFunc.o
g++ -c KVstore/Btree.cpp -o $(objdir)Btree.o
$(objdir)CBtreeFunc.o: KVstore/CBtreeFunc.cpp KVstore/CBtreeH.h
g++ -c KVstore/CBtreeFunc.cpp -o $(objdir)CBtreeFunc.o -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE
$(objdir)IDList.o: Query/IDList.cpp Query/IDList.h
g++ -c Query/IDList.cpp $(inc) -o $(objdir)IDList.o
$(objdir)SPARQLquery.o: Query/SPARQLquery.cpp Query/SPARQLquery.h $(objdir)BasicQuery.o
g++ -c Query/SPARQLquery.cpp $(inc) -o $(objdir)SPARQLquery.o
$(objdir)BasicQuery.o: Query/BasicQuery.cpp Query/BasicQuery.h $(objdir)Signature.o
g++ -c Query/BasicQuery.cpp $(inc) -o $(objdir)BasicQuery.o
$(objdir)ResultSet.o: Query/ResultSet.cpp Query/ResultSet.h
g++ -c Query/ResultSet.cpp $(inc) -o $(objdir)ResultSet.o
$(objdir)SigEntry.o: Signature/SigEntry.cpp Signature/SigEntry.h $(objdir)Signature.o
g++ -c Signature/SigEntry.cpp $(inc) -o $(objdir)SigEntry.o
$(objdir)Signature.o: Signature/Signature.cpp Signature/Signature.h
g++ -c Signature/Signature.cpp $(inc) -o $(objdir)Signature.o
$(objdir)Triple.o: Triple/Triple.cpp Triple/Triple.h
g++ -c Triple/Triple.cpp $(inc) -o $(objdir)Triple.o
$(objdir)util.o: util/util.cpp util/util.h
g++ -c util/util.cpp $(inc) -o $(objdir)util.o
$(objdir)VSTree.o: VSTree/VSTree.cpp VSTree/VSTree.h $(objdir)EntryBuffer.o $(objdir)LRUCache.o $(objdir)VNode.o
g++ -c VSTree/VSTree.cpp $(inc) -o $(objdir)VSTree.o
$(objdir)EntryBuffer.o: VSTree/EntryBuffer.cpp VSTree/EntryBuffer.h Signature/SigEntry.h
g++ -c VSTree/EntryBuffer.cpp $(inc) -o $(objdir)EntryBuffer.o
$(objdir)LRUCache.o: VSTree/LRUCache.cpp VSTree/LRUCache.h VSTree/VNode.h
g++ -c VSTree/LRUCache.cpp $(inc) -o $(objdir)LRUCache.o
$(objdir)VNode.o: VSTree/VNode.cpp VSTree/VNode.h
g++ -c VSTree/VNode.cpp $(inc) -o $(objdir)VNode.o
$(objdir)DBparser.o: Parser/DBparser.cpp Parser/DBparser.h $(objdir)SparqlParser.o $(objdir)SparqlLexer.o $(objdir)Triple.o
g++ -c Parser/DBparser.cpp $(inc) -o $(objdir)DBparser.o
$(objdir)SparqlParser.o: Parser/SparqlParser.c Parser/SparqlParser.h
gcc -c Parser/SparqlParser.c $(inc) -o $(objdir)SparqlParser.o
$(objdir)SparqlLexer.o: Parser/SparqlLexer.c Parser/SparqlLexer.h
gcc -c Parser/SparqlLexer.c $(inc) -o $(objdir)SparqlLexer.o
$(objdir)TurtleParser.o: Parser/TurtleParser.cpp Parser/TurtleParser.h Parser/Type.h
gcc -c Parser/TurtleParser.cpp $(inc) -o $(objdir)TurtleParser.o
$(objdir)RDFParser.o: Parser/RDFParser.cpp Parser/RDFParser.h $(objdir)TurtleParser.o $(objdir)Triple.o
gcc -c Parser/RDFParser.cpp $(inc) -o $(objdir)RDFParser.o
$(objdir)Operation.o: Server/Operation.cpp Server/Operation.h
g++ -c Server/Operation.cpp $(inc) -o $(objdir)Operation.o
$(objdir)Socket.o: Server/Socket.cpp Server/Socket.h
g++ -c Server/Socket.cpp $(inc) -o $(objdir)Socket.o
$(objdir)Server.o: Server/Server.cpp Server/Server.h $(objdir)Socket.o $(objdir)Database.o $(objdir)Operation.o
g++ -c Server/Server.cpp $(inc) -o $(objdir)Server.o
$(objdir)Client.o: Server/Client.cpp Server/Client.h $(objdir)Socket.o
g++ -c Server/Client.cpp $(inc) -o $(objdir)Client.o
lib_antlr:
rm -rf tools/libantlr3c-3.4/
cd tools; tar -zxvf libantlr3c-3.4.tar.gz;
cd tools; cd libantlr3c-3.4/; ./configure -enable-64bit; make;
rm -rf lib/libantlr.a
ar -crv lib/libantlr.a tools/libantlr3c-3.4/*.o
clean:
rm -rf gload gquery gserver gclient $(objdir)/*.o

View File

@ -1,8 +1,8 @@
/*
* DBparser.cpp
*
* Created on: 2014-6-20
* Author: liyouhuan
* Created on: 2015-4-11
* Author: cjq
*/
#include "DBparser.h"
@ -13,16 +13,13 @@ DBparser::DBparser()
_prefix_map.clear();
}
/* input sparql query string and parse query into SPARQLquery
* the returned string is set for log when error happen
* */
string DBparser::sparqlParser(const string& _sparql, SPARQLquery& _sparql_query)
void DBparser::sparqlParser(const std::string& _sparql, SPARQLquery& _sparql_query)
{
pANTLR3_INPUT_STREAM input;
pSparqlLexer lex;
pANTLR3_COMMON_TOKEN_STREAM tokens;
pSparqlParser parser;
input = antlr3StringStreamNew((ANTLR3_UINT8 *)(_sparql.c_str()),ANTLR3_ENC_UTF8,_sparql.length(),(ANTLR3_UINT8 *)"QueryString");
input = antlr3StringStreamNew((ANTLR3_UINT8 *)(_sparql.c_str()), ANTLR3_ENC_UTF8, _sparql.length(), (ANTLR3_UINT8 *)"QueryString");
//input = antlr3FileStreamNew((pANTLR3_UINT8)filePath,ANTLR3_ENC_8BIT);
lex = SparqlLexerNew(input);
@ -32,285 +29,446 @@ string DBparser::sparqlParser(const string& _sparql, SPARQLquery& _sparql_query)
SparqlParser_workload_return r = parser->workload(parser);
pANTLR3_BASE_TREE root = r.tree;
//pANTLR3_BASE_TREE treeNode;
printNode(root);
parseNode(root,_sparql_query,0);
if (printNode(root) > 0) throw "Some errors are found in the SPARQL query request.";
parseTree(root,_sparql_query);
printquery(_sparql_query);
genQueryVec(_sparql_query.getPatternGroup(), _sparql_query);
parser->free(parser);
tokens->free(tokens);
lex->free(lex);
input->close(input);
return "";
}
/* file pointer _fp points to rdfFile
* that was opened previously in Database::encodeRDF
* rdfParser() will be called many times until all triples in the rdfFile is parsed
* and after each call, a group of triples will be parsed into the vector;
* the returned string is set for log when error happen;
* a single line in file responds to a triple and end up with a '.'
* tuple in a line separated by '\t'
*/
string DBparser::rdfParser(ifstream& _fin, Triple* _triple_array, int& _triple_num)
int DBparser::printNode(pANTLR3_BASE_TREE node, int depth)
{
memset(line_buf, 0, buf_len);
_triple_num = 0;
int _line_len = 0;
while(_triple_num < DBparser::TRIPLE_NUM_PER_GROUP
&& (! _fin.eof()))
const char* s = (const char*) node->getText(node)->chars;
ANTLR3_UINT32 treeType = node->getType(node);
int hasErrorNode = 0;
if (treeType == 0) hasErrorNode = 1;
for (int i=0; i < depth; i++) printf(" ");
printf("%d: %s\n",treeType,s);
for (unsigned int j = 0; j < node->getChildCount(node); j++)
{
_fin.getline(line_buf, buf_len-1);
_line_len = strlen(line_buf);
/* maybe this is an empty line at the ending of file */
if(_line_len < 4)
{
continue;
}
bool _end_with_dot = true;
int _i_dot = 0;
for(int i = _line_len-1; i >= 0; i --)
{
if(line_buf[i] == '.')
{
_i_dot = i;
break;
}
if(line_buf[i] == '\t')
{
_end_with_dot = false;
break;
}
}
/* check the '.' */
if(! _end_with_dot)
{
cerr << "'.' is expected at line:" << line_buf << endl;
cerr << " line_length = " << _line_len << endl;
continue;
}
line_buf[_i_dot+1] = '\0';
_line_len = strlen(line_buf);
/* find the first TAB */
int _first_tab = -1;
for(int i = 0; i < _line_len; i ++)
{
if(line_buf[i] == '\t')
{
_first_tab = i;
break;
}
}
if(_first_tab == -1)
{
cerr << "First TAB is expected at line:" << line_buf << endl;
continue;
}
/* find the second TAB */
int _second_tab = -1;
for(int i = _first_tab+1; i < _line_len; i ++)
{
if(line_buf[i] == '\t')
{
_second_tab = i;
break;
}
}
if(_second_tab == -1)
{
cerr << "Second TAB is expected at line:" << line_buf << endl;
}
/* get sub, pre, obj and add new triple */
{
string _line = string(line_buf);
int _sub_size = _first_tab - 0;
_triple_array[_triple_num].subject = _line.substr(0, _sub_size);
int _pre_size = _second_tab - (_first_tab+1);
_triple_array[_triple_num].predicate = _line.substr(_first_tab+1, _pre_size);
/* (_line_len-1) make sure that '.' is not included */
int _obj_size = (_line_len-1) - (_second_tab+1);
_triple_array[_triple_num].object = _line.substr(_second_tab+1, _obj_size);
}
_triple_num ++;
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
int childNodeType = childNode->getType(childNode);
hasErrorNode += printNode(childNode, depth+1);
}
return "";
return hasErrorNode;
}
/*
* used in readline of FILE, avoiding new memory each time
*/
char* DBparser::line_buf = new char[100*1000];
int DBparser::buf_len = 100*1000;
int DBparser::parseString(pANTLR3_BASE_TREE node,std::string& str,int depth){
const char* s =(const char*) node->getText(node)->chars;
//std::cout<<"parseString: "<<s<<std::endl;
if (depth==0){
str = s;
void DBparser::parseTree(pANTLR3_BASE_TREE node, SPARQLquery& query)
{
printf("parseTree\n");
for (unsigned int j = 0; j < node->getChildCount(node); j++)
{
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, j);
//prologue 144
if (childNode->getType(childNode) == 144)
{
parsePrologue(childNode);
}
else
//select clause 156
if (childNode->getType(childNode) == 156)
{
parseSelectClause(childNode, query);
}
else
//group graph pattern 77
if (childNode->getType(childNode) == 77)
{
parseGroupPattern(childNode, query.getPatternGroup());
}
else parseTree(childNode, query);
}
else{
parseString((pANTLR3_BASE_TREE) node->getChild(node,0),str,depth-1);
}
return 0;
}
void DBparser::parsePrologue(pANTLR3_BASE_TREE node)
{
printf("parsePrologue\n");
for (unsigned int j = 0; j < node->getChildCount(node); j++)
{
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, j);
//prefix 143
if (childNode->getType(childNode) == 143)
{
parsePrefix(childNode);
}
}
}
void DBparser::parsePrefix(pANTLR3_BASE_TREE node)
{
printf("parsePrefix\n");
int DBparser::parsePrefix(pANTLR3_BASE_TREE node,std::pair<std::string,std::string>& prefixPair){
//const char* s =(const char*) node->getText(node)->chars;
std::string key;
std::string value;
for (unsigned int j=0;j<node->getChildCount(node);j++){
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node,j);
for (unsigned int j = 0; j < node->getChildCount(node); j++)
{
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
//prefix key string 136
if (childNode->getType(childNode)==136){
parseString(childNode,key);
if (childNode->getType(childNode) == 136)
{
parseString(childNode, key);
}
//prefix value URL 89
if (childNode->getType(childNode)==89){
parseString(childNode,value);
if (childNode->getType(childNode) == 89)
{
parseString(childNode, value);
}
}
prefixPair = make_pair(key,value);
return 0;
_prefix_map.insert(make_pair(key, value));
}
void DBparser::replacePrefix(string& str){
if (str[0]!='<'){
void DBparser::replacePrefix(std::string& str)
{
if (str[0] != '<' && str[0] != '\"' && str[0] != '?')
{
int sep=str.find(":");
std::string prefix=str.substr(0,sep+1);
std::cout<<"prefix: "<<prefix<<std::endl;
if (_prefix_map.find(prefix)!=_prefix_map.end()){
str=_prefix_map[prefix].substr(0,_prefix_map[prefix].length()-1)+str.substr(sep+1,str.length()-sep-1)+">";
std::cout<<"str: "<<str<<std::endl;
if (sep == -1) return;
std::string prefix=str.substr(0, sep+1);
std::cout << "prefix: " << prefix << std::endl;
if (_prefix_map.find(prefix) != _prefix_map.end())
{
str=_prefix_map[prefix].substr(0, _prefix_map[prefix].length() - 1) + str.substr(sep + 1 ,str.length() - sep - 1) + ">";
std::cout << "str: " << str << std::endl;
}
else{
std::cout<<"prefix not found..."<<std::endl;
else
{
std::cout << "prefix not found..." << std::endl;
throw "Some errors are found in the SPARQL query request.";
}
}
}
int DBparser::parseTriple(pANTLR3_BASE_TREE node,Triple& triple){
//const char* s =(const char*) node->getText(node)->chars;
std::string subject="";
std::string predicate="";
std::string object="";
for (unsigned int j=0;j<node->getChildCount(node);j++){
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node,j);
void DBparser::parseSelectClause(pANTLR3_BASE_TREE node, SPARQLquery& query)
{
printf("parseSelectClause\n");
for (unsigned int j = 0; j < node->getChildCount(node); j++)
{
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, j);
//var 199
if (childNode->getType(childNode) == 199)
{
parseSelectVar(childNode, query);
}
}
}
void DBparser::parseSelectVar(pANTLR3_BASE_TREE node, SPARQLquery& query)
{
printf("parseSelectVar\n");
std::string var = "";
for (unsigned int j = 0; j < node->getChildCount(node); j++)
{
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
if (childNode->getType(childNode) == 200)
{
parseString(childNode,var);
query.addOneProjection(var);
}
}
}
void DBparser::parseGroupPattern(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup)
{
printf("parseGroupPattern\n");
for (unsigned int j = 0; j < node->getChildCount(node); j++)
{
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
//triples same subject 185
if (childNode->getType(childNode) == 185)
{
parsePattern(childNode, patterngroup);
}
//optional 124
if (childNode->getType(childNode) == 124)
{
parseOptional(childNode, patterngroup);
}
//union 195
if (childNode->getType(childNode) == 195)
{
patterngroup.addOneGroupUnion();
parseUnion(childNode, patterngroup);
}
//filter 67
if (childNode->getType(childNode) == 67)
{
parseFilter(childNode, patterngroup);
}
}
}
void DBparser::parsePattern(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup)
{
printf("parsePattern\n");
std::string subject = "";
std::string predicate = "";
std::string object = "";
for (unsigned int j = 0; j < node->getChildCount(node); j++)
{
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
//subject 177
if (childNode->getType(childNode)==177){
parseString(childNode,subject,1);
if (childNode->getType(childNode) == 177)
{
parseString(childNode, subject, 1);
replacePrefix(subject);
}
//predicate 142
if (childNode->getType(childNode)==142){
parseString(childNode,predicate,4);
if (childNode->getType(childNode) == 142)
{
parseString(childNode, predicate, 4);
replacePrefix(predicate);
}
//object 119
if (childNode->getType(childNode)==119){
parseString(childNode,object,1);
if (childNode->getType(childNode) == 119)
{
parseString(childNode, object, 1);
replacePrefix(object);
}
}
triple=Triple(subject,predicate,object);
std::cout<<"Triple: \n\ts|"<<subject<<"|\n\tp|"<<predicate<<"|\n\to|"<<object<<"|"<<std::endl;
return 0;
patterngroup.addOnePattern(SPARQLquery::Pattern(SPARQLquery::Element(subject), SPARQLquery::Element(predicate), SPARQLquery::Element(object)));
}
int DBparser::parseBasicQuery(pANTLR3_BASE_TREE node,BasicQuery& basicQuery){
//const char* s =(const char*) node->getText(node)->chars;
Triple triple;
for (unsigned int j=0;j<node->getChildCount(node);j++){
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node,j);
//basicQuery 185
std::cout<<"Child type: "<<childNode->getType(childNode)<<endl;
if (childNode->getType(childNode)==185){
parseTriple(childNode,triple);
basicQuery.addTriple(triple);
}
if (childNode->getType(childNode)==195){
//Union part here!!
//parseUnion(childNode,U);
//basicQuery.addTriple(triple);
void DBparser::parseOptional(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup)
{
printf("parseOptional\n");
for (unsigned int j = 0; j < node->getChildCount(node); j++)
{
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
//group graph pattern 77
if (childNode->getType(childNode) == 77)
{
patterngroup.addOneOptional();
parseGroupPattern(childNode, patterngroup.getLastOptional());
}
}
return 0;
}
int DBparser::parseVar(pANTLR3_BASE_TREE node,SPARQLquery& query){
//const char* s =(const char*) node->getText(node)->chars;
std::string var="";
for (unsigned int j=0;j<node->getChildCount(node);j++){
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node,j);
//var 200
if (childNode->getType(childNode)==200){
parseString(childNode,var,0);
query.addQueryVar(var);
void DBparser::parseUnion(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup)
{
printf("parseUnion\n");
for (unsigned int j = 0; j < node->getChildCount(node); j++)
{
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
//group graph pattern 77
if (childNode->getType(childNode) == 77)
{
patterngroup.addOneUnion();
parseGroupPattern(childNode, patterngroup.getLastUnion());
}
//union 195
if (childNode->getType(childNode) == 195)
{
parseUnion(childNode, patterngroup);
}
}
return 0;
}
int DBparser::parseNode(pANTLR3_BASE_TREE node, SPARQLquery& query,int depth){
const char* s =(const char*) node->getText(node)->chars;
ANTLR3_UINT32 treeType = node->getType(node);
void DBparser::parseFilter(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup)
{
printf("parseFilter\n");
for (int i=0;i<depth;i++){
printf(" ");
for (unsigned int j = 0; j < node->getChildCount(node); j++)
{
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
patterngroup.addOneFilterTree();
parseFilterTree(childNode, patterngroup.getLastFilterTree());
}
printf("%d: %s\n",treeType,s);
}
for (unsigned int j=0;j<node->getChildCount(node);j++){
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node,j);
int childNodeType = childNode->getType(childNode);
switch (childNodeType){
//prefix
case 199:{
parseVar(childNode,query);
break;
void DBparser::parseFilterTree(pANTLR3_BASE_TREE node, SPARQLquery::FilterTree& filter)
{
printf("parseFilterTree\n");
//not 192
if (node->getType(node) == 192) filter.type = SPARQLquery::FilterTree::Not;
//or 125
if (node->getType(node) == 125) filter.type = SPARQLquery::FilterTree::Or;
//and 8
if (node->getType(node) == 8) filter.type = SPARQLquery::FilterTree::And;
//equal 62
if (node->getType(node) == 62) filter.type = SPARQLquery::FilterTree::Equal;
//not equal 116
if (node->getType(node) == 116) filter.type = SPARQLquery::FilterTree::NotEqual;
//less 100
if (node->getType(node) == 100) filter.type = SPARQLquery::FilterTree::Less;
//less equal 101
if (node->getType(node) == 101) filter.type = SPARQLquery::FilterTree::LessOrEqual;
//greater 72
if (node->getType(node) == 72) filter.type = SPARQLquery::FilterTree::Greater;
//greater equal 73
if (node->getType(node) == 73) filter.type = SPARQLquery::FilterTree::GreaterOrEqual;
for (unsigned int j = 0; j < node->getChildCount(node); j++)
{
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
//unary 190
if (childNode->getType(childNode) == 190)
if (j == 0)
{
parseString(childNode, filter.arg1, 1);
replacePrefix(filter.arg1);
}
//var
case 143:{
std::pair<std::string,std::string> prefixPair;
parsePrefix(childNode,prefixPair);
_prefix_map.insert(prefixPair);
break;
else
{
parseString(childNode, filter.arg2, 1);
replacePrefix(filter.arg2);
}
//BasicQuery
case 77:{
BasicQuery* basicQuery=new BasicQuery();
parseBasicQuery(childNode,*basicQuery);
query.addBasicQuery(basicQuery);
break;
else
if (j == 0)
{
filter.parg1 = new SPARQLquery::FilterTree();
parseFilterTree(childNode, *filter.parg1);
}
default:
parseNode(childNode,query,depth+1);
else
{
filter.parg2 = new SPARQLquery::FilterTree();
parseFilterTree(childNode, *filter.parg2);
}
}
}
void DBparser::parseString(pANTLR3_BASE_TREE node, std::string& str, int depth)
{
while (depth > 0 && node != NULL)
{
node = (pANTLR3_BASE_TREE) node->getChild(node, 0);
depth--;
}
if (node != NULL)
str = (const char*) node->getText(node)->chars;
else
throw "Some errors are found in the SPARQL query request.";
}
void DBparser::printquery(SPARQLquery& query)
{
std::vector <std::string> &varvec = query.getProjections();
printf("===========================================================================\n");
printf("var is :");
for (int i = 0; i < (int)varvec.size(); i++)
printf("%s\t", varvec[i].c_str());
printf("\n");
printgrouppattern(query.getPatternGroup(), 0);
printf("===========================================================================\n");
}
void DBparser::printgrouppattern(SPARQLquery::PatternGroup &pg, int dep)
{
for (int j = 0; j < dep; j++) printf("\t"); printf("{\n");
for (int j = 0; j < dep; j++) printf("\t"); printf("pattern:\n");
for(int i = 0; i < pg.patterns.size(); i++)
{
for (int j = 0; j < dep; j++) printf("\t");
printf("\t%s\t%s\t%s\n", pg.patterns[i].subject.value.c_str(), pg.patterns[i].predicate.value.c_str(), pg.patterns[i].object.value.c_str());
}
if (pg.optionals.size() > 0)
{
for (int j = 0; j < dep; j++) printf("\t"); printf("optional:\n");
for (int i = 0; i < pg.optionals.size(); i++)
printgrouppattern(pg.optionals[i], dep + 1);
}
for (int i = 0; i < pg.unions.size(); i++)
{
for (int j = 0; j < dep; j++) printf("\t"); printf("union %d:\n", i + 1);
for (int k = 0; k < pg.unions[i].size(); k++)
printgrouppattern(pg.unions[i][k], dep + 1);
}
if (pg.filters.size() > 0)
{
for (int j = 0; j < dep; j++) printf("\t"); printf("filter:\n");
for (int i = 0; i < pg.filters.size(); i++)
{
for (int j = 0; j <= dep; j++) printf("\t");
printfilter(pg.filters[i]);
printf("\n");
}
}
return 0;
for (int j = 0; j < dep; j++) printf("\t"); printf("}\n");
}
void DBparser::printNode(pANTLR3_BASE_TREE node, int depth){
const char* s =(const char*) node->getText(node)->chars;
ANTLR3_UINT32 treeType = node->getType(node);
void DBparser::printfilter(SPARQLquery::FilterTree &ft)
{
printf("(");
for (int i=0;i<depth;i++){
printf(" ");
}
printf("%d: %s\n",treeType,s);
for (unsigned int j=0;j<node->getChildCount(node);j++){
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node,j);
//int childNodeType = childNode->getType(childNode);
printNode(childNode,depth+1);
}
if (ft.type == SPARQLquery::FilterTree::Not) printf("!");
if (ft.parg1 == NULL) printf("%s", ft.arg1.c_str());
else printfilter(*ft.parg1);
if (ft.type == SPARQLquery::FilterTree::Or) printf("||");
if (ft.type == SPARQLquery::FilterTree::And) printf("&&");
if (ft.type == SPARQLquery::FilterTree::Equal) printf("=");
if (ft.type == SPARQLquery::FilterTree::NotEqual) printf("!=");
if (ft.type == SPARQLquery::FilterTree::Less) printf("<");
if (ft.type == SPARQLquery::FilterTree::LessOrEqual) printf("<=");
if (ft.type == SPARQLquery::FilterTree::Greater) printf(">");
if (ft.type == SPARQLquery::FilterTree::GreaterOrEqual) printf(">=");
if (ft.type != SPARQLquery::FilterTree::Not)
if (ft.parg2 == NULL) printf("%s", ft.arg2.c_str());
else printfilter(*ft.parg2);
printf(")");
}
void DBparser::genQueryVec(SPARQLquery::PatternGroup &pg, SPARQLquery& query)
{
if (pg.hasVar)
{
query.addBasicQuery();
query.addQueryVarVec();
for(int i = 0; i < pg.patterns.size(); i++)
{
string &sub = pg.patterns[i].subject.value;
string &pre = pg.patterns[i].predicate.value;
string &obj = pg.patterns[i].object.value;
query.addTriple(Triple(sub, pre, obj));
if (sub[0] == '?') query.addQueryVar(sub);
if (obj[0] == '?') query.addQueryVar(obj);
}
}
for (int i = 0; i < pg.unions.size(); i++)
for (int j = 0; j < pg.unions[i].size(); j++)
genQueryVec(pg.unions[i][j], query);
for (int i = 0; i < pg.optionals.size(); i++)
genQueryVec(pg.optionals[i], query);
}

View File

@ -1,62 +1,52 @@
/*
* DBparser.h
*
* Created on: 2014-6-20
* Author: liyouhuan
* Created on: 2015-4-11
* Author: cjq
*/
#ifndef DBPARSER_H_
#define DBPARSER_H_
#include "../Util/Util.h"
#include "../Util/Triple.h"
#include<string>
#include<vector>
#include<map>
#include<fstream>
#include<stdio.h>
#include "../Query/SPARQLquery.h"
#include "SparqlParser.h"
#include "SparqlLexer.h"
using namespace std;
class DBparser{
private:
void printNode(pANTLR3_BASE_TREE node,int depth=0);
int parseNode(pANTLR3_BASE_TREE node, SPARQLquery& query,int depth=0);
int parsePrefix(pANTLR3_BASE_TREE node,std::pair<std::string,std::string>& prefixPair);
int parseString(pANTLR3_BASE_TREE node,std::string& str,int depth=0);
int parseTriple(pANTLR3_BASE_TREE node,Triple& triple);
int parseBasicQuery(pANTLR3_BASE_TREE node,BasicQuery& basicQuery);
int parseVar(pANTLR3_BASE_TREE node,SPARQLquery& query);
void replacePrefix(string& str);
int printNode(pANTLR3_BASE_TREE node,int depth=0);
void parseTree(pANTLR3_BASE_TREE node, SPARQLquery& query);
void parsePrologue(pANTLR3_BASE_TREE node);
void parsePrefix(pANTLR3_BASE_TREE node);
void replacePrefix(std::string& str);
void parseSelectClause(pANTLR3_BASE_TREE node, SPARQLquery& query);
void parseSelectVar(pANTLR3_BASE_TREE node, SPARQLquery& query);
void parseGroupPattern(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup);
void parsePattern(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup);
void parseOptional(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup);
void parseUnion(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup);
void parseFilter(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup);
void parseFilterTree(pANTLR3_BASE_TREE node, SPARQLquery::FilterTree& filter);
void parseString(pANTLR3_BASE_TREE node, std::string& str, int depth=0);
void printquery(SPARQLquery& query);
void printgrouppattern(SPARQLquery::PatternGroup &pg, int dep = 0);
void printfilter(SPARQLquery::FilterTree &ft);
void genQueryVec(SPARQLquery::PatternGroup &pg, SPARQLquery& query);
std::map<std::string,std::string> _prefix_map;
public:
/* how many triples at most will be parsed out when call rdfParser() once
* when -1, it means parse all triples in the file into the triples set(array)
* */
static const int TRIPLE_NUM_PER_GROUP = 10*1000*1000;
DBparser();
/* input sparql query string and parse query into SPARQLquery
* the returned string is set for log when error happen */
string sparqlParser(const string& _sparql, SPARQLquery& _sparql_query);
/* file stream _fin points to rdfFile
* that was opened previously in Database::encodeRDF
* rdfParser() will be called many times until all triples in the rdfFile is parsed
* and after each call, a group of triples will be parsed into the vector;
* the returned string is set for log when error happen;
*/
string rdfParser(ifstream& _fin, Triple* _triple_array, int& _triple_num);
private:
/*
* used in readline of FILE, avoiding new memory each time
*/
static char* line_buf;
static int buf_len;
void sparqlParser(const std::string& _sparql, SPARQLquery& _sparql_query);
};
#endif /* DBPARSER_H_ */

View File

@ -1,661 +0,0 @@
/*
* QueryParser.cpp
*
* Created on: 2015-4-11
* Author: cjq
*/
#include "QueryParser.h"
using namespace std;
QueryParser::QueryParser()
{
_prefix_map.clear();
}
void QueryParser::sparqlParser(const string& query, QueryTree& querytree)
{
//uncompress before use
dfa34_Table_uncompress();
pANTLR3_INPUT_STREAM input;
pSparqlLexer lex;
pANTLR3_COMMON_TOKEN_STREAM tokens;
pSparqlParser parser;
input = antlr3StringStreamNew((ANTLR3_UINT8 *)(query.c_str()), ANTLR3_ENC_UTF8, query.length(), (ANTLR3_UINT8 *)"QueryString");
//input = antlr3FileStreamNew((pANTLR3_UINT8)filePath,ANTLR3_ENC_8BIT);
lex = SparqlLexerNew(input);
tokens = antlr3CommonTokenStreamSourceNew(ANTLR3_SIZE_HINT,TOKENSOURCE(lex));
parser = SparqlParserNew(tokens);
SparqlParser_workload_return r = parser->workload(parser);
pANTLR3_BASE_TREE root = r.tree;
if (printNode(root) > 0) throw "Some errors are found in the SPARQL query request.";
parseTree(root, querytree);
querytree.print();
parser->free(parser);
tokens->free(tokens);
lex->free(lex);
input->close(input);
}
int QueryParser::printNode(pANTLR3_BASE_TREE node, int dep)
{
const char* s = (const char*) node->getText(node)->chars;
ANTLR3_UINT32 treeType = node->getType(node);
int hasErrorNode = 0;
if (treeType == 0) hasErrorNode = 1;
for (int i=0; i < dep; i++) printf(" ");
printf("%d: %s\n",treeType,s);
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
hasErrorNode += printNode(childNode, dep + 1);
}
return hasErrorNode;
}
void QueryParser::parseTree(pANTLR3_BASE_TREE node, QueryTree& querytree)
{
printf("parseTree\n");
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, i);
//prologue 144
if (childNode->getType(childNode) == 144)
{
parsePrologue(childNode);
}
else
//select 155
if (childNode->getType(childNode) == 155)
{
querytree.setQueryForm(QueryTree::Select_Query);
parseTree(childNode, querytree);
}
else
//ask 13
if (childNode->getType(childNode) == 13)
{
querytree.setQueryForm(QueryTree::Ask_Query);
parseTree(childNode, querytree);
}
else
//select clause 156
if (childNode->getType(childNode) == 156)
{
parseSelectClause(childNode, querytree);
}
else
//group graph pattern 77
if (childNode->getType(childNode) == 77)
{
parseGroupPattern(childNode, querytree.getGroupPattern());
}
else
//order by 127
if (childNode->getType(childNode) == 127)
{
parseOrderBy(childNode, querytree);
}
else
//offset 120 limit 102
if (childNode->getType(childNode) == 120 || childNode->getType(childNode) == 102)
{
pANTLR3_BASE_TREE gchildNode=(pANTLR3_BASE_TREE) childNode->getChild(childNode, 0);
//integer 83
if (gchildNode->getType(gchildNode) == 83)
{
string str;
parseString(gchildNode, str, 0);
stringstream str2int;
int num;
str2int << str;
str2int >> num;
if (childNode->getType(childNode) == 120 && num >= 0)
querytree.setOffset(num);
if (childNode->getType(childNode) == 102 && num >= 0)
querytree.setLimit(num);
}
}
else parseTree(childNode, querytree);
}
}
void QueryParser::parsePrologue(pANTLR3_BASE_TREE node)
{
printf("parsePrologue\n");
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, i);
//prefix 143
if (childNode->getType(childNode) == 143)
parsePrefix(childNode);
}
}
void QueryParser::parsePrefix(pANTLR3_BASE_TREE node)
{
printf("parsePrefix\n");
string key;
string value;
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
//prefix namespace 136
if (childNode->getType(childNode) == 136)
parseString(childNode, key, 0);
//prefix IRI 89
if (childNode->getType(childNode) == 89)
parseString(childNode, value, 0);
}
_prefix_map.insert(make_pair(key, value));
}
void QueryParser::replacePrefix(string& str)
{
if (str[0] != '<' && str[0] != '\"' && str[0] != '?')
{
int sep=str.find(":");
if (sep == -1) return;
string prefix=str.substr(0, sep + 1);
//blank node
if (prefix == "_:") return;
cout << "prefix: " << prefix << endl;
if (_prefix_map.find(prefix) != _prefix_map.end())
{
str=_prefix_map[prefix].substr(0, _prefix_map[prefix].length() - 1) + str.substr(sep + 1 ,str.length() - sep - 1) + ">";
cout << "str: " << str << endl;
}
else
{
cout << "prefix not found..." << endl;
throw "Some errors are found in the SPARQL query request.";
}
}
}
void QueryParser::parseSelectClause(pANTLR3_BASE_TREE node, QueryTree& querytree)
{
printf("parseSelectClause\n");
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, i);
//distinct 52
if (childNode->getType(childNode) == 52)
querytree.setProjectionModifier(QueryTree::Modifier_Distinct);
//var 199
if (childNode->getType(childNode) == 199)
parseSelectVar(childNode, querytree);
//asterisk 14
if (childNode->getType(childNode) == 14)
querytree.setProjectionAsterisk();
}
}
void QueryParser::parseSelectVar(pANTLR3_BASE_TREE node, QueryTree& querytree)
{
printf("parseSelectVar\n");
string var = "";
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
if (childNode->getType(childNode) == 200)
{
parseString(childNode, var, 0);
querytree.addProjectionVar(var);
}
}
}
void QueryParser::parseGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern)
{
printf("parseGroupPattern\n");
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
//triples same subject 185
if (childNode->getType(childNode) == 185)
{
parsePattern(childNode, grouppattern);
}
//optional 124 minus 108
if (childNode->getType(childNode) == 124 || childNode->getType(childNode) == 108)
{
parseOptionalOrMinus(childNode, grouppattern);
}
//union 195
if (childNode->getType(childNode) == 195)
{
grouppattern.addOneGroupUnion();
parseUnion(childNode, grouppattern);
}
//filter 67
if (childNode->getType(childNode) == 67)
{
parseFilter(childNode, grouppattern);
}
//group graph pattern 77
//redundant {}
if (childNode->getType(childNode) == 77)
{
parseGroupPattern(childNode, grouppattern);
}
}
}
void QueryParser::parsePattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern)
{
printf("parsePattern\n");
string subject = "";
string predicate = "";
string object = "";
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
//subject 177
if (childNode->getType(childNode) == 177)
{
parseString(childNode, subject, 1);
replacePrefix(subject);
}
//predicate 142
if (childNode->getType(childNode) == 142)
{
pANTLR3_BASE_TREE gchildNode=(pANTLR3_BASE_TREE) childNode->getChild(childNode, 0);
//var 200
if (gchildNode->getType(gchildNode) == 200)
{
parseString(childNode, predicate, 1);
}
else
{
parseString(childNode, predicate, 4);
}
replacePrefix(predicate);
}
//object 119
if (childNode->getType(childNode) == 119)
{
parseString(childNode, object, 1);
replacePrefix(object);
}
if (i != 0 && i % 2 == 0) //triples same subject
{
grouppattern.addOnePattern(QueryTree::GroupPattern::Pattern( QueryTree::GroupPattern::Pattern::Element(subject),
QueryTree::GroupPattern::Pattern::Element(predicate),
QueryTree::GroupPattern::Pattern::Element(object)));
}
}
}
void QueryParser::parseOptionalOrMinus(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern)
{
//optional 124 minus 108
if (node->getType(node) == 124)
printf("parseOptional\n");
else if (node->getType(node) == 108)
printf("parseMinus\n");
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
//group graph pattern 77
if (childNode->getType(childNode) == 77)
{
if (node->getType(node) == 124)
grouppattern.addOneOptionalOrMinus('o');
else if (node->getType(node) == 108)
grouppattern.addOneOptionalOrMinus('m');
parseGroupPattern(childNode, grouppattern.getLastOptionalOrMinus());
}
}
}
void QueryParser::parseUnion(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern)
{
printf("parseUnion\n");
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
//group graph pattern 77
if (childNode->getType(childNode) == 77)
{
grouppattern.addOneUnion();
parseGroupPattern(childNode, grouppattern.getLastUnion());
}
//union 195
if (childNode->getType(childNode) == 195)
{
parseUnion(childNode, grouppattern);
}
}
}
void QueryParser::parseFilter(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern)
{
printf("parseFilter\n");
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
//unary 190
if (childNode->getType(childNode) == 190)
childNode = (pANTLR3_BASE_TREE) childNode->getChild(childNode, 0);
grouppattern.addOneFilterTree();
parseFilterTree(childNode, grouppattern, grouppattern.getLastFilterTree());
}
}
void QueryParser::parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern, QueryTree::GroupPattern::FilterTreeNode& filter)
{
printf("parseFilterTree\n");
switch (node->getType(node))
{
//! 192
case 192: filter.type = QueryTree::GroupPattern::FilterTreeNode::Not_type; break;
//not 115
case 115: filter.type = QueryTree::GroupPattern::FilterTreeNode::Not_type; break;
//or 125
case 125: filter.type = QueryTree::GroupPattern::FilterTreeNode::Or_type; break;
//and 8
case 8: filter.type = QueryTree::GroupPattern::FilterTreeNode::And_type; break;
//equal 62
case 62: filter.type = QueryTree::GroupPattern::FilterTreeNode::Equal_type; break;
//not equal 116
case 116: filter.type = QueryTree::GroupPattern::FilterTreeNode::NotEqual_type; break;
//less 100
case 100: filter.type = QueryTree::GroupPattern::FilterTreeNode::Less_type; break;
//less equal 101
case 101: filter.type = QueryTree::GroupPattern::FilterTreeNode::LessOrEqual_type; break;
//greater 72
case 72: filter.type = QueryTree::GroupPattern::FilterTreeNode::Greater_type; break;
//greater equal 73
case 73: filter.type = QueryTree::GroupPattern::FilterTreeNode::GreaterOrEqual_type; break;
//regex 150
case 150: filter.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type; break;
//lang 96
case 96: filter.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_lang_type; break;
//langmatches 97
case 97: filter.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_langmatches_type; break;
//bound 23
case 23: filter.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_bound_type; break;
//in 81
case 81: filter.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_in_type; break;
//exists 63
case 63: filter.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_exists_type; break;
//not exists 117
case 117: filter.type = QueryTree::GroupPattern::FilterTreeNode::Not_type; break;
default:
return;
}
//in the "NOT IN" case, in, var and expression list is on the same layer.
//not 115
if (node->getType(node) == 115)
{
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, 0);
//in 81
if (childNode->getType(childNode) == 81)
{
filter.child.push_back(QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild());
filter.child[0].type = 't';
filter.child[0].node.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_in_type;
parseVarInExpressionList(node, filter.child[0].node, 1);
return;
}
}
//in 81
if (node->getType(node) == 81)
{
parseVarInExpressionList(node, filter, 0);
return;
}
//not exists 117
if (node->getType(node) == 117)
{
filter.child.push_back(QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild());
filter.child[0].type = 't';
filter.child[0].node.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_exists_type;
parseExistsGroupPattern(node, grouppattern, filter.child[0].node);
return;
}
//exists 63
if (node->getType(node) == 63)
{
parseExistsGroupPattern(node, grouppattern, filter);
return;
}
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
//unary 190
if (childNode->getType(childNode) == 190)
{
pANTLR3_BASE_TREE gchildNode = (pANTLR3_BASE_TREE) childNode->getChild(childNode, 0);
//unsigned int type = gchildNode->getType(gchildNode);
//regex 150 lang 96 langmatches 97 bound 23 exists 63
//if (type == 150 || type == 96 || type == 97 || type == 23 || type == 63)
if (gchildNode->getChildCount(gchildNode) != 0)
childNode = gchildNode;
}
filter.child.push_back(QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild());
//unary 190
if (childNode->getType(childNode) == 190)
{
filter.child[i].type = 's';
parseString(childNode, filter.child[i].arg, 1);
replacePrefix(filter.child[i].arg);
}
else if (childNode->getChildCount(childNode) == 0)
{
filter.child[i].type = 's';
parseString(childNode, filter.child[i].arg, 0);
replacePrefix(filter.child[i].arg);
}
else
{
filter.child[i].type = 't';
parseFilterTree(childNode, grouppattern, filter.child[i].node);
}
}
}
void QueryParser::parseVarInExpressionList(pANTLR3_BASE_TREE node, QueryTree::GroupPattern::FilterTreeNode& filter, unsigned int begin)
{
printf("parseVarInExpressionList\n");
for (unsigned int i = begin; i < node->getChildCount(node); i++)
{
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
//unary 190
if (childNode->getType(childNode) == 190)
{
filter.child.push_back(QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild());
filter.child[i - begin].type = 's';
parseString(childNode, filter.child[i - begin].arg, 1);
replacePrefix(filter.child[i - begin].arg);
}
//expression list 65
if (childNode->getType(childNode) == 65)
{
for (unsigned int j = 0; j < childNode->getChildCount(childNode); j++)
{
pANTLR3_BASE_TREE gchildNode=(pANTLR3_BASE_TREE) childNode->getChild(childNode, j);
filter.child.push_back(QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild());
filter.child[i + j - begin].type = 's';
parseString(gchildNode, filter.child[i + j - begin].arg, 1);
replacePrefix(filter.child[i + j - begin].arg);
}
}
}
}
void QueryParser::parseExistsGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern, QueryTree::GroupPattern::FilterTreeNode& filter)
{
printf("parseExistsGroupPattern\n");
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, 0);
//group graph pattern 77
if (childNode->getType(childNode) == 77)
{
grouppattern.addOneExistsGroupPattern();
filter.exists_grouppattern_id = (int)grouppattern.filter_exists_grouppatterns[(int)grouppattern.filter_exists_grouppatterns.size() - 1].size() - 1;
parseGroupPattern(childNode, grouppattern.getLastExistsGroupPattern());
return;
}
}
void QueryParser::parseOrderBy(pANTLR3_BASE_TREE node, QueryTree& querytree)
{
printf("parseOrderBy\n");
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
//order by condition
if (childNode->getType(childNode) == 128)
{
string var;
bool desending = false;
for (unsigned int k = 0; k < childNode->getChildCount(childNode); k++)
{
pANTLR3_BASE_TREE gchildNode=(pANTLR3_BASE_TREE) childNode->getChild(childNode, k);
//var 200
if (gchildNode->getType(gchildNode) == 200)
parseString(gchildNode, var, 0);
//unary 190
if (gchildNode->getType(gchildNode) == 190)
parseString(gchildNode, var, 1);
//asend 12
if (gchildNode->getType(gchildNode) == 12)
desending = false;
//desend 49
if (gchildNode->getType(gchildNode) == 49)
desending = true;
}
querytree.addOrder(var, desending);
}
}
}
void QueryParser::parseString(pANTLR3_BASE_TREE node, string& str, int dep)
{
if (dep == 0)
{
str = (const char*) node->getText(node)->chars;
return;
}
while (dep > 1 && node != NULL)
{
node = (pANTLR3_BASE_TREE) node->getChild(node, 0);
dep--;
}
if (node == NULL || node->getChildCount(node) == 0)
throw "Some errors are found in the SPARQL query request.";
else
{
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, i);
unsigned int type = childNode->getType(childNode);
//var 200 string literal 170(single quotation marks) 171(double quotation marks)
//IRI 89 PNAME_LN 135
//if (type == 200 || type == 170 || type == 171 || type == 89 || type == 135)
if (type != 98)
{
str = (const char*) childNode->getText(childNode)->chars;
if (type == 170)
str = "\"" + str.substr(1, str.length() - 2) + "\"";
}
//custom language 98
if (type == 98)
{
string custom_lang;
custom_lang = (const char*) childNode->getText(childNode)->chars;
str += custom_lang;
}
}
}
}

View File

@ -1,46 +0,0 @@
/*
* QueryParser.h
*
* Created on: 2015-4-11
* Author: cjq
*/
#ifndef QUERYPARSER_H_
#define QUERYPARSER_H_
#include "../Util/Util.h"
#include "../Query/QueryTree.h"
#include "SparqlParser.h"
#include "SparqlLexer.h"
class QueryParser{
private:
std::map<std::string,std::string> _prefix_map;
int printNode(pANTLR3_BASE_TREE node,int dep = 0);
void parseTree(pANTLR3_BASE_TREE node, QueryTree& querytree);
void parsePrologue(pANTLR3_BASE_TREE node);
void parsePrefix(pANTLR3_BASE_TREE node);
void replacePrefix(std::string& str);
void parseSelectClause(pANTLR3_BASE_TREE node, QueryTree& querytree);
void parseSelectVar(pANTLR3_BASE_TREE node, QueryTree& querytree);
void parseGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern);
void parsePattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern);
void parseOptionalOrMinus(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern);
void parseUnion(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern);
void parseFilter(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern);
void parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern, QueryTree::GroupPattern::FilterTreeNode& filter);
void parseVarInExpressionList(pANTLR3_BASE_TREE node, QueryTree::GroupPattern::FilterTreeNode& filter, unsigned int begin);
void parseExistsGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern, QueryTree::GroupPattern::FilterTreeNode& filter);
void parseOrderBy(pANTLR3_BASE_TREE node, QueryTree& querytree);
void parseString(pANTLR3_BASE_TREE node, std::string& str, int dep);
public:
QueryParser();
void sparqlParser(const std::string& query, QueryTree& querytree);
};
#endif /* QUERYPARSER_H_ */

View File

@ -1,28 +1,30 @@
#ifndef gstore_parser_RDFParser
#define gstore_parser_RDFParser
#include "TurtleParser.h"
#include "../Triple/Triple.h"
#include <iostream>
#include <fstream>
#include <sstream>
#include <cstring>
using namespace std;
class RDFParser
{
private:
stringstream _sin;
TurtleParser _TurtleParser;
#include "../Util/Util.h"
#include "../Util/Triple.h"
#include "TurtleParser.h"
public:
static const int TRIPLE_NUM_PER_GROUP = 10 * 1000 * 1000;
using namespace std;
class RDFParser
{
private:
stringstream _sin;
TurtleParser _TurtleParser;
public:
static const int TRIPLE_NUM_PER_GROUP = 10 * 1000 * 1000;
//for parseString
RDFParser():_TurtleParser(_sin) {}
//for parseFile
RDFParser(ifstream& _fin):_TurtleParser(_fin) {}
string parseFile(TripleWithObjType* _triple_array, int& _triple_num);
string parseString(string _str, TripleWithObjType* _triple_array, int& _triple_num);
};
//for parseString
RDFParser():_TurtleParser(_sin){}
//for parseFile
RDFParser(ifstream& _fin):_TurtleParser(_fin){}
string parseFile(TripleWithObjType* _triple_array, int& _triple_num);
string parseString(string _str, TripleWithObjType* _triple_array, int& _triple_num);
};
#endif

View File

@ -1,4 +1,5 @@
#include "TurtleParser.h"
#include <sstream>
//---------------------------------------------------------------------------
// RDF-3X
// (c) 2008 Thomas Neumann. Web site: http://www.mpi-inf.mpg.de/~neumann/rdf3x

View File

@ -11,7 +11,10 @@
// San Francisco, California, 94105, USA.
//---------------------------------------------------------------------------
#include "Type.h"
#include "../Util/Util.h"
#include <istream>
#include <string>
#include <map>
#include <vector>
//---------------------------------------------------------------------------
/// Parse a turtle file
class TurtleParser

File diff suppressed because it is too large Load Diff

View File

@ -1,154 +1,71 @@
/*=============================================================================
# Filename: BasicQuery.h
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-10-31 19:18
# Description: originally written by liyouhuan, modified by zengli
=============================================================================*/
/*
* basicQuery.h
*
* Created on: 2014-6-20
* Author: liyouhuan
*/
#ifndef _QUERY_BASICQUERY_H
#define _QUERY_BASICQUERY_H
#ifndef BASICQUERY_H_
#define BASICQUERY_H_
#include "../Util/Util.h"
#include "../Util/Triple.h"
#include<iostream>
#include<string.h>
#include<stdio.h>
#include<stdlib.h>
#include<map>
#include<set>
#include<vector>
#include<algorithm>
#include "../Triple/Triple.h"
#include "../Signature/Signature.h"
#include "../KVstore/KVstore.h"
#include "IDList.h"
#include<sstream>
//NOTICE: the query graph must be linked
//var_id == -1: constant(string), entity or literal
//All constants should be dealed before joining tables!
//A var in query can be subject or object, and both. Once in subject,
//it cannot contain literal, while in object it may contain entity,
//literal, or both
//The vars not in join process are also encoded, so not -1
//
//a subject cannot be literal, but an object can be entity or literal
//not supported: ?v1 and this is a predicate
//pre_id == -1: the query graph is not valid and the result should be empty
//
//DEFINE:
//literal variable - no edge out(only occur in objects)
//(after we retrive all candidates from vstree, only entities are considered, the vars
//which only present in objects are possible to contain literals, so we must mark this!)
//free literal variable - a literal variable and all its neighbor id != -1
//(i.e. no constant neighbor which will restrict this variable, otherwise, we can acquire
//this var's can_list by querying in kvstore according to the constant and pre)
using namespace std;
//It is ok for var in select to be free var, but this can not be used as start point.
//(we assume candidates of the start point is all ok and then search deeply)
//However, we can always find a start point because not all vars are all in objects!
//(otherwise, no edge in query graph)
//
//What is more, some graphs will be regarded as not-connected, such as:
//A-c0-B, c0 is a constant, we should do a A x B here!
//two-part-matching, ABC and c1c2, each node connects with this two constants.
//(edge maybe different)
//+---------------------------------------------------------------------------+
//|elements in BasicQuery(all are strings) |
//|| |
//|+---constants(all need to be dealed before join) |
//|| | |
//|| +---literals(quoted in "") |
//|| | graph_var_num, the num of vars to join |
//|| +---entities(included in <>, prefix is allowed) | |
//|| | |
//|+---variables(all begin with '?') | |
//| | | |
//| +---selected vars(the former select_var_num ones) <<<<<<<<<<<<<<<<<<<+ |
//| | | |
//| +---not selected vars | |
//| | | |
//| +---degree > 1 <<<<<<<<<<<as bridge<<<<<<<<<<<<<<<<<<<<<<<<<<+ |
//| | | |
//| | +---exist in subjects(cannot be literal) |
//| | | |
//| | +---just in objects(all edges in, may include literals) |
//| | | |
//| | +---free(all neighbors are vars) |
//| | | |
//| | +---not-free(exist constant neighbors) |
//| | |
//| +---degree == 1(dealed after join) |
//| | |
//| +---subject |
//| | |
//| +---object |
//+---------------------------------------------------------------------------+
class PreVar
{
public:
string name;
vector<int> triples;
bool selected;
PreVar()
{
this->selected = false;
}
PreVar(string _name)
{
this->name = _name;
this->selected = false;
}
PreVar(string _name, bool _flag)
{
this->name = _name;
this->selected = _flag;
}
};
class BasicQuery
{
class BasicQuery{
private:
vector<string> option_vs;
vector<Triple> triple_vt;
// mapping from variables' name to their assigned id
/* mapping from variables' name to their assigned id. */
map<std::string, int> var_str2id;
// record each tuple's(subject, predicate, object) number of occurrences in this BasicQuery
/* record each tuple's(subject, predicate, object) number of occurrences in this BasicQuery. */
map<std::string, int> tuple2freq;
map<std::string, int> var_not_in_select;
// id < select_var_num means in select
int select_var_num;
// var_num is different from that in SPARQLquery
// because there are some variable not in select
int graph_var_num;
int total_var_num;
int retrieve_var_num;
/* var_num is different from that in SPARQLquery
* because there are some variable not in select */
int graph_var_num;
string* var_name;
IDList* candidate_list;
vector<int*> result_list;
int* var_degree;
int* var_degree;
//whether has added the variable's literal candidate
//bool* is_literal_candidate_added;
bool* ready;
//if need to be retrieved by vstree or generate when join(first is graph var)
bool* need_retrieve;
/* whether has added the variable's literal candidate */
bool* is_literal_candidate_added;
char encode_method;
// edge_id[var_id][i] : the line id of the i-th edge of the var
/* edge_id[var_id][i] : the line id of the i-th edge of the var */
int** edge_id;
// edge_id[var_id][i] : the neighbor id of the i-th edge of the var
/* edge_id[var_id][i] : the neighbor id of the i-th edge of the var */
int** edge_nei_id;
// edge_pre_id[var_id][i] : the preID of the i-th edge of the var
/* edge_pre_id[var_id][i] : the preID of the i-th edge of the var */
int** edge_pre_id;
// denote the type of edge, assigned with
// BasicQuery::IN or BasicQuery::OUT
// edge_type[var_id][i]
/* denote the type of edge, assigned with
* BasicQuery::IN or BasicQuery::OUT
* edge_type[var_id][i] */
char** edge_type;
EntityBitSet* var_sig;
// BETTER:edge sig is of little importance
// edge_sig[sub_id][obj_id]
/* edge_sig[sub_id][obj_id] */
EdgeBitSet** edge_sig;
void addInVarNotInSelect();
@ -157,127 +74,112 @@ private:
void initial();
void null_initial();
void updateSubSig(int _sub_id, int _pre_id, int _obj_id, std::string _obj, int _line_id);
void updateObjSig(int _obj_id, int _pre_id, int _sub_id, std::string _sub, int _line_id);
//infos for predicate variables
vector<PreVar> pre_var;
public:
static const char EDGE_IN = 'i';
static const char EDGE_OUT= 'o';
static const int MAX_VAR_NUM = 10;
static const int MAX_PRE_VAR_NUM = 10;
static const char NOT_JUST_SELECT = 'a';
static const char SELECT_VAR = 's';
// _query is a SPARQL query string
/* _query is a SPARQL query string */
BasicQuery(const string _query="");
~BasicQuery();
void clear();
//get the number of variables which are in join
/* get the number of variables */
int getVarNum();
//get selected number of variadbles
int getSelectVarNum();
//get the total number of variables
int getTotalVarNum();
//get the retrieved number of variables
int getRetrievedVarNum();
// get the name of _var in the query graph
/* get the name of _var in the query graph */
std::string getVarName(int _var);
// get triples number, also sentences number
/* get triples number, also sentences number */
int getTripleNum();
int getIDByVarName(const string& _name);
std::string to_str();
// get the ID of the i-th triple
/* get the ID of the i-th triple */
const Triple& getTriple(int _i_th_triple);
// get the ID of the i-th edge of _var
/* get the ID of the i-th edge of _var */
int getEdgeID(int _var, int _i_th_edge);
// get the ID of var, where the i-th edge of _var points to
/* get the ID of the i-th edge of _var */
int getEdgeNeighborID(int _var, int _i_th_edge);
// get the preID of the i-th edge of _var
/* get the preID of the i-th edge of _var */
int getEdgePreID(int _var, int _i_th_edge);
// get the type of the i-th edge of _var
/* get the type of the i-th edge of _var */
char getEdgeType(int _var, int _i_th_edge);
//get the degree of _var in the query graph
/* get the degree of _var in the query graph */
int getVarDegree(int _var);
//get the index of edge between two var ids
int getEdgeIndex(int _id0, int _id);
/* */
const EntityBitSet& getVarBitSet(int _i)const;
// get the candidate list of _var in the query graph
/* get the candidate list of _var in the query graph */
IDList& getCandidateList(int _var);
int getCandidateSize(int _var);
// get the result list of _var in the query graph
/* get the result list of _var in the query graph */
vector<int*>& getResultList();
vector<int*>* getResultListPointer();
// get the entity signature of _var in the query graph
/* get the entity signature of _var in the query graph */
const EntityBitSet& getEntitySignature(int _var);
// check whether the i-th edge of _var is IN edge
/* check whether the i-th edge of _var is IN edge */
bool isInEdge(int _var, int _i_th_edge)const;
// check whether the i-th edge of _var is OUT edge
/* check whether the i-th edge of _var is OUT edge */
bool isOutEdge(int _var, int _i_th_edge)const;
bool isOneDegreeNotJoinVar(std::string& _not_select_var);
bool isOneDegreeNotSelectVar(std::string& _not_select_var);
// check whether _var may include some literal results
/* check whether _var may include some literal results */
bool isLiteralVariable(int _var);
// check whether _var is literal variable and do not have any entity neighbors
/* check whether _var is literal variable and do not have any entity neighbors */
bool isFreeLiteralVariable(int _var);
// check whether has added _var's literal candidates
//bool isAddedLiteralCandidate(int _var);
/* check whether has added _var's literal candidates */
bool isAddedLiteralCandidate(int _var);
// set _var's literal candidates has been added
//void setAddedLiteralCandidate(int _var);
/* set _var's literal candidates has been added */
void setAddedLiteralCandidate(int _var);
//check if need to be retrieved
bool if_need_retrieve(int _var);
bool isSatelliteInJoin(int _var);
//if the candidates of var(in join) is all ready
bool isReady(int _var) const;
void setReady(int _var);
private:
void updateSubSig(int _sub_id, int _pre_id, int _obj_id, std::string _obj, int _line_id);
void updateObjSig(int _obj_id, int _pre_id, int _sub_id, std::string _sub, int _line_id);
// encode relative signature data of the query graph
bool encodeBasicQuery(KVstore* _p_kvstore, const std::vector<std::string>& _query_var);
struct ResultCmp;
struct ResultEqual;
unsigned getPreVarNum() const;
const PreVar& getPreVarByID(unsigned) const;
//int getIDByPreVarName(const std::string& _name) const;
int getPreVarID(const string& _name) const;
public:
/* encode relative signature data of the query graph */
void encodeBasicQuery(KVstore* _p_kvstore, const std::vector<std::string>& _query_var);
/* add triple */
void addTriple(const Triple& _triple);
/* print whole Basic query */
void print(ostream& _out_stream);
int getVarID_MinCandidateList();
int getVarID_MaxCandidateList();
int getVarID_FirstProcessWhenJoin();
// deprecated.
// static int cmp_result(const void* _a, const void* _b);
bool dupRemoval_invalidRemoval();
std::string candidate_str();
std::string result_str();
std::string triple_str();
};
#endif //_QUERY_BASICQUERY_H
#endif /* BASICQUERY_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -1,146 +0,0 @@
/*=============================================================================
# Filename: GeneralEvaluation.h
# Author: Jiaqi, Chen
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-03-02 20:33
# Description:
=============================================================================*/
#ifndef _QUERY_GENERALEVALUATION_H
#define _QUERY_GENERALEVALUATION_H
//#include "../Database/Database.h"
#include "SPARQLquery.h"
#include "../VSTree/VSTree.h"
#include "../Database/Join.h"
#include "../Database/Strategy.h"
#include "../KVstore/KVstore.h"
#include "../Query/ResultSet.h"
#include "../Util/Util.h"
#include "../Parser/QueryParser.h"
#include "QueryTree.h"
#include "Varset.h"
#include "RegexExpression.h"
#include "ResultFilter.h"
class GeneralEvaluation
{
private:
QueryParser query_parser;
QueryTree query_tree;
SPARQLquery sparql_query;
std::vector <Varset> sparql_query_varset;
VSTree *vstree;
KVstore *kvstore;
ResultSet &result_set;
ResultFilter result_filter;
bool handle(SPARQLquery&);
public:
explicit GeneralEvaluation(VSTree *_vstree, KVstore *_kvstore, ResultSet &_result_set):
vstree(_vstree), kvstore(_kvstore), result_set(_result_set){}
std::vector<std::vector<std::string> > getSPARQLQueryVarset();
void doQuery(const std::string &_query);
bool parseQuery(const std::string &_query);
void getBasicQuery(QueryTree::GroupPattern &grouppattern);
class FilterExistsGroupPatternResultSetRecord;
class TempResult
{
public:
Varset var;
std::vector<int*> res;
void release();
static int compareFunc(int *a, std::vector<int> &p, int *b, std::vector<int> &q);
void sort(int l, int r, std::vector<int> &p);
int findLeftBounder(std::vector<int> &p, int *b, std::vector<int> &q);
int findRightBounder(std::vector<int> &p, int *b, std::vector<int> &q);
void doJoin(TempResult &x, TempResult &r);
void doOptional(std::vector<bool> &binding, TempResult &x, TempResult &rn, TempResult &ra, bool add_no_binding);
void doUnion(TempResult &x, TempResult &rt, TempResult &rx);
void doMinus(TempResult &x, TempResult &r);
void doDistinct(TempResult &r);
void mapFilterTree2Varset(QueryTree::GroupPattern::FilterTreeNode& filter, Varset &v);
void doFilter(QueryTree::GroupPattern::FilterTreeNode &filter, FilterExistsGroupPatternResultSetRecord &filter_exists_grouppattern_resultset_record, TempResult &r, KVstore *kvstore);
void getFilterString(int* x, QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild &child, string &str, KVstore *kvstore);
bool matchFilterTree(int* x, QueryTree::GroupPattern::FilterTreeNode& filter, FilterExistsGroupPatternResultSetRecord &filter_exists_grouppattern_resultset_record, KVstore *kvstore);
void print();
};
class TempResultSet
{
public:
std::vector<TempResult> results;
void release();
int findCompatibleResult(Varset &_varset);
void doJoin(TempResultSet &x, TempResultSet &r);
void doOptional(TempResultSet &x, TempResultSet &r);
void doUnion(TempResultSet &x, TempResultSet &r);
void doMinus(TempResultSet &x, TempResultSet &r);
void doDistinct(Varset &projection, TempResultSet &r);
void doFilter(QueryTree::GroupPattern::FilterTreeNode& filter, FilterExistsGroupPatternResultSetRecord &filter_exists_grouppattern_resultset_record, TempResultSet &r, KVstore *kvstore);
void print();
};
class EvaluationUnit
{
private:
char type;
void * p;
public:
EvaluationUnit(char _type, void *_p = NULL):type(_type), p(_p){}
char getType()
{ return type; }
void * getPointer()
{ return p; }
};
std::vector<EvaluationUnit> semantic_evaluation_plan;
void generateEvaluationPlan(QueryTree::GroupPattern &grouppattern);
void dfsJoinableResultGraph(int x, vector < pair<char, int> > &node_info, vector < vector<int> > &edge, QueryTree::GroupPattern &grouppattern);
std::stack<TempResultSet*> semantic_evaluation_result_stack;
class FilterExistsGroupPatternResultSetRecord
{
public:
std::vector<TempResultSet*> resultset;
std::vector< std::vector<Varset> > common;
std::vector< std::vector< std::pair< std::vector<int>, std::vector<int> > > > common2resultset;
} filter_exists_grouppattern_resultset_record;
int countFilterExistsGroupPattern(QueryTree::GroupPattern::FilterTreeNode& filter);
void doEvaluationPlan();
class ExpansionEvaluationStackUnit
{
public:
QueryTree::GroupPattern grouppattern;
SPARQLquery sparql_query;
TempResultSet* result;
};
std::vector <ExpansionEvaluationStackUnit> expansion_evaluation_stack;
bool expanseFirstOuterUnionGroupPattern(QueryTree::GroupPattern &grouppattern, std::deque<QueryTree::GroupPattern> &queue);
void queryRewriteEncodeRetrieveJoin(int dep, ResultFilter &result_filter);
void getFinalResult(ResultSet& result_str);
};
#endif // _QUERY_GENERALEVALUATION_H

View File

@ -1,13 +1,14 @@
/*=============================================================================
# Filename: IDList.cpp
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2016-05-08 12:44
# Description: originally written by liyouhuan, modified by zengli
=============================================================================*/
/*
* IDList.cpp
*
* Created on: 2014-7-2
* Author: liyouhuan
*/
#include "IDList.h"
#include<sstream>
#include "../util/util.h"
#include <algorithm>
using namespace std;
IDList::IDList()
@ -15,9 +16,10 @@ IDList::IDList()
this->id_list.clear();
}
//return the _i-th id of the list if _i exceeds, return -1
int
IDList::getID(int _i)const
/* return the _i-th id of the list
* if _i exceeds, return -1;
* */
int IDList::getID(int _i)const
{
if(this->size() > _i)
{
@ -26,32 +28,23 @@ IDList::getID(int _i)const
return -1;
}
bool
IDList::addID(int _id)
bool IDList::addID(int _id)
{
//a check for duplicate case will be more reliable
/* a check for duplicate case will be more reliable */
this->id_list.push_back(_id);
return true;
}
int
IDList::size()const
int IDList::size()const
{
return this->id_list.size();
}
bool
IDList::empty()const
{
return this->id_list.size() == 0;
}
bool
IDList::isExistID(int _id)const
bool IDList::isExistID(int _id)const
{
// naive implementation of searching(linear search).
// you can use binary search when the id list is sorted, if necessary.
for(unsigned i = 0; i < this->id_list.size(); i++)
for (int i=0;i<this->id_list.size();i++)
{
if (this->id_list[i] == _id)
{
@ -62,15 +55,13 @@ IDList::isExistID(int _id)const
return false;
}
const vector<int>*
IDList::getList()const
const std::vector<int>* IDList::getList()const
{
return &(this->id_list);
}
int&
IDList::operator[](const int& _i)
int& IDList::operator[](const int& _i)
{
if(this->size() > _i)
{
@ -79,157 +70,69 @@ IDList::operator[](const int& _i)
return id_list[0];
}
string
IDList::to_str()
std::string IDList::to_str()
{
std::stringstream _ss;
_ss << "size=" << this->id_list.size() << "";
for(unsigned i = 0; i < this->id_list.size(); i ++)
for(int i = 0; i < this->id_list.size(); i ++)
{
_ss << "\t[" << this->id_list[i] << "]";
}
return _ss.str();
}
int
IDList::sort()
int IDList::sort()
{
std::sort(id_list.begin(),id_list.end());
return 0;
}
void
IDList::clear()
void IDList::clear()
{
this->id_list.clear();
}
void
IDList::copy(const vector<int>& _new_idlist)
int IDList::intersectList(const int* _id_list, int _list_len)
{
this->id_list = _new_idlist;
}
void
IDList::copy(const IDList* _new_idlist)
{
this->id_list = *(_new_idlist->getList());
}
int
IDList::intersectList(const int* _id_list, int _list_len)
{
if(_id_list == NULL || _list_len == 0)
int id_i = 0;
int index_move_forward = 0;
std::vector<int>::iterator it = this->id_list.begin();
while(it != (this->id_list).end())
{
int remove_number = this->id_list.size();
this->id_list.clear();
return remove_number;
}
//when size is almost the same, intersect O(n)
//when one size is small ratio, search in the larger one O(mlogn)
//
//n>0 m=nk(0<k<1)
//compare n(k+1) and nklogn: k0 = log(n/2)2 requiring that n>2
//k<=k0 binary search; k>k0 intersect
int method = -1; //0: intersect 1: search in vector 2: search in int*
int n = this->id_list.size();
double k = 0;
if(n < _list_len)
{
k = (double)n / (double)_list_len;
n = _list_len;
method = 2;
}
else
{
k = (double)_list_len / (double)n;
method = 1;
}
if(n <= 2)
method = 0;
else
{
double limit = Util::logarithm(n/2, 2);
if(k > limit)
method = 0;
}
int remove_number = 0;
switch(method)
{
case 0:
{ //this bracket is needed if vars are defined in case
int id_i = 0;
int index_move_forward = 0;
vector<int>::iterator it = this->id_list.begin();
while(it != (this->id_list).end())
int can_id = *it;
while( (id_i < _list_len) && (_id_list[id_i] < can_id) )
{
int can_id = *it;
while((id_i < _list_len) && (_id_list[id_i] < can_id))
{
id_i ++;
}
if(id_i == _list_len)
{
break;
}
if(can_id == _id_list[id_i])
{
(this->id_list)[index_move_forward] = can_id;
index_move_forward ++;
id_i ++;
}
it ++;
id_i ++;
}
remove_number = this->id_list.size() - index_move_forward;
vector<int>::iterator new_end = this->id_list.begin() + index_move_forward;
(this->id_list).erase(new_end, this->id_list.end());
break;
}
case 1:
{
vector<int> new_id_list;
for(int i = 0; i < _list_len; ++i)
if(id_i == _list_len){
break;
}
if(can_id == _id_list[id_i])
{
if(Util::bsearch_vec_uporder(_id_list[i], this->getList()) != -1)
new_id_list.push_back(_id_list[i]);
(this->id_list)[index_move_forward] = can_id;
index_move_forward ++;
id_i ++;
}
this->id_list = new_id_list;
remove_number = n - this->id_list.size();
break;
}
case 2:
{
vector<int> new_id_list;
int m = this->id_list.size(), i;
for(i = 0; i < m; ++i)
{
if(Util::bsearch_int_uporder(this->id_list[i], _id_list, _list_len) != -1)
new_id_list.push_back(this->id_list[i]);
}
this->id_list = new_id_list;
remove_number = m - this->id_list.size();
break;
}
default:
cerr << "no such method in IDList::intersectList()" << endl;
break;
it ++;
}
int remove_number = this->id_list.size() - index_move_forward;
std::vector<int>::iterator new_end =
this->id_list.begin() + index_move_forward;
(this->id_list).erase(new_end, this->id_list.end());
return remove_number;
}
int
IDList::intersectList(const IDList& _id_list)
int IDList::intersectList(const IDList& _id_list)
{
// copy _id_list to the temp array first.
int temp_list_len = _id_list.size();
int* temp_list = new int[temp_list_len];
//BETTER:not to copy, just achieve here
for (int i = 0; i < temp_list_len; i ++)
{
temp_list[i] = _id_list.getID(i);
@ -237,27 +140,12 @@ IDList::intersectList(const IDList& _id_list)
int remove_number = this->intersectList(temp_list, temp_list_len);
delete []temp_list;
return remove_number;
}
int
IDList::unionList(const int* _id_list, int _list_len, bool only_literal)
int IDList::unionList(const int* _id_list, int _list_len)
{
if(_id_list == NULL || _list_len == 0)
return 0;
if(only_literal)
{
//NOTICE:this means that the original is no literals and we need to add from a list(containing entities/literals)
int k = 0;
//NOTICE:literal id > entity id; the list is ordered
for(; k < _list_len; ++k)
if(Util::is_literal_ele(_id_list[k]))
break;
for(; k < _list_len; ++k)
this->addID(_id_list[k]);
return _list_len - k;
}
// O(n)
int origin_size = (this->id_list).size();
int* temp_list = new int[origin_size + _list_len];
@ -344,129 +232,27 @@ IDList::unionList(const int* _id_list, int _list_len, bool only_literal)
*/
}
int
IDList::unionList(const IDList& _id_list, bool only_literal)
int IDList::unionList(const IDList& _id_list)
{
// copy _id_list to the temp array first.
int temp_list_len = _id_list.size();
int* temp_list = new int[temp_list_len];
//BETTER:not to copy, just achieve here
for(int i = 0; i < temp_list_len; i ++)
for (int i = 0; i < temp_list_len; i ++)
{
temp_list[i] = _id_list.getID(i);
}
int ret = this->unionList(temp_list, temp_list_len, only_literal);
delete[] temp_list;
return ret;
return this->unionList(temp_list, temp_list_len);
}
IDList*
IDList::intersect(const IDList& _id_list, const int* _list, int _len)
int IDList::erase(int i)
{
IDList* p = new IDList;
if(_list == NULL || _len == 0) //just copy _id_list
{
int size = _id_list.size();
for(int i = 0; i < size; ++i)
p->addID(_id_list.getID(i));
return p;
}
//when size is almost the same, intersect O(n)
//when one size is small ratio, search in the larger one O(mlogn)
//
//n>0 m=nk(0<k<1)
//compare n(k+1) and nklogn: k0 = log(n/2)2 requiring that n>2
//k<=k0 binary search; k>k0 intersect
int method = -1; //0: intersect 1: search in vector 2: search in int*
int n = _id_list.size();
double k = 0;
if(n < _len)
{
k = (double)n / (double)_len;
n = _len;
method = 2;
}
else
{
k = (double)_len / (double)n;
method = 1;
}
if(n <= 2)
method = 0;
else
{
double limit = Util::logarithm(n/2, 2);
if(k > limit)
method = 0;
}
int remove_number = 0;
switch(method)
{
case 0:
{ //this bracket is needed if vars are defined in case
int id_i = 0;
int num = _id_list.size();
for(int i = 0; i < num; ++i)
{
int can_id = _id_list.getID(i);
while((id_i < _len) && (_list[id_i] < can_id))
{
id_i ++;
}
if(id_i == _len)
{
break;
}
if(can_id == _list[id_i])
{
p->addID(can_id);
id_i ++;
}
}
break;
}
case 1:
{
for(int i = 0; i < _len; ++i)
{
if(Util::bsearch_vec_uporder(_list[i], _id_list.getList()) != -1)
p->addID(_list[i]);
}
break;
}
case 2:
{
int m = _id_list.size(), i;
for(i = 0; i < m; ++i)
{
int t = _id_list.getID(i);
if(Util::bsearch_int_uporder(t, _list, _len) != -1)
p->addID(t);
}
break;
}
default:
cerr << "no such method in IDList::intersectList()" << endl;
break;
}
return p;
}
int
IDList::erase(int i)
{
id_list.erase(id_list.begin()+i, id_list.end());
id_list.erase(id_list.begin()+i,id_list.end());
return 0;
}
int
IDList::bsearch_uporder(int _key)
{
return Util::bsearch_vec_uporder(_key, this->getList());
}
int IDList::bsearch_uporder(int _key)
{
return util::bsearch_vec_uporder(_key, this->id_list);
}

View File

@ -1,46 +1,40 @@
/*=============================================================================
# Filename: IDList.h
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-10-23 15:03
# Description: originally written by liyouhuan, modified by zengli
=============================================================================*/
/*
* CandidateList.h
*
* Created on: 2014-7-2
* Author: liyouhuan
*/
#include<iostream>
#include<vector>
#include "../Util/Util.h"
#ifndef IDLIST_H_
#define IDLIST_H_
#ifndef _QUERY_IDLIST_H
#define _QUERY_IDLIST_H
class IDList
{
class IDList{
public:
IDList();
int getID(int _i)const;
bool addID(int _id);
//check whether _id exists in this IDList.
bool isExistID(int _id) const;
int size() const;
bool empty() const;
/* check whether _id exists in this IDList. */
bool isExistID(int _id)const;
int size()const;
const std::vector<int>* getList()const;
int& operator[] (const int & _i);
std::string to_str();
int sort();
void clear();
void copy(const std::vector<int>& _new_idlist);
void copy(const IDList* _new_idlist);
// intersect/union _id_list to this IDList, note that the two list must be ordered before using these two functions.
/* intersect/union _id_list to this IDList, note that the two list must be ordered before using these two functions. */
int intersectList(const int* _id_list, int _list_len);
int intersectList(const IDList&);
int unionList(const int* _id_list, int _list_len, bool only_literal=false);
int unionList(const IDList&, bool only_literal=false);
int unionList(const int* _id_list, int _list_len);
int unionList(const IDList&);
int bsearch_uporder(int _key);
static IDList* intersect(const IDList&, const int*, int);
private:
std::vector<int> id_list;
int erase(int i);
};
#endif //_QUERY_IDLIST_H
#endif /* IDLIST_H_ */

View File

@ -1,489 +0,0 @@
/*=============================================================================
# Filename: QueryTree.cpp
# Author: Jiaqi, Chen
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-03-02 20:35
# Description: implement functions in QueryTree.h
=============================================================================*/
#include "QueryTree.h"
using namespace std;
void QueryTree::GroupPattern::FilterTreeNode::getVarset(Varset &varset)
{
for (int i = 0; i < (int)this->child.size(); i++)
{
if (this->child[i].type == 's' && this->child[i].arg[0] == '?')
varset.addVar(this->child[i].arg);
if (this->child[i].type == 't')
this->child[i].node.getVarset(varset);
}
}
void QueryTree::GroupPattern::FilterTreeNode::print(vector<GroupPattern> &exist_grouppatterns, int dep)
{
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Not_type) printf("!");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type) printf("regex");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_lang_type) printf("lang");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_langmatches_type) printf("langmatches");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_bound_type) printf("bound");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_in_type)
{
if (this->child[0].type == 's') printf("%s", this->child[0].arg.c_str());
printf(" in (");
for (int i = 1; i < (int)this->child.size(); i++)
{
if (i != 1) printf(" , ");
if (this->child[i].type == 's') printf("%s", this->child[i].arg.c_str());
}
printf(")");
return;
}
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_exists_type)
{
printf("exists");
exist_grouppatterns[this->exists_grouppattern_id].print(dep);
return;
}
printf("(");
if ((int)this->child.size() >= 1)
{
if (this->child[0].type == 's') printf("%s", this->child[0].arg.c_str());
if (this->child[0].type == 't') this->child[0].node.print(exist_grouppatterns, dep);
}
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Or_type) printf(" || ");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::And_type) printf(" && ");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Equal_type) printf(" = ");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::NotEqual_type) printf(" != ");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Less_type) printf(" < ");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::LessOrEqual_type) printf(" <= ");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Greater_type) printf(" > ");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::GreaterOrEqual_type) printf(" >= ");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type || this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_langmatches_type) printf(", ");
if ((int)this->child.size() >= 2)
{
if (this->child[1].type == 's') printf("%s", this->child[1].arg.c_str());
if (this->child[1].type == 't') this->child[1].node.print(exist_grouppatterns, dep);
}
if ((int)this->child.size() >= 3)
{
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type && this->child[2].type == 's')
printf(", %s", this->child[2].arg.c_str());
}
printf(")");
}
//----------------------------------------------------------------------------------------------------------------------------------------------------
void QueryTree::GroupPattern::addOnePattern(Pattern _pattern)
{
this->patterns.push_back(_pattern);
}
void QueryTree::GroupPattern::addOneGroupUnion()
{
this->unions.push_back(GroupPatternUnions((int)this->patterns.size() - 1));
}
void QueryTree::GroupPattern::addOneUnion()
{
int n = (int)this->unions.size();
this->unions[n - 1].grouppattern_vec.push_back(GroupPattern());
}
QueryTree::GroupPattern& QueryTree::GroupPattern::getLastUnion()
{
int n = (int)this->unions.size();
int m = (int)this->unions[n - 1].grouppattern_vec.size();
return this->unions[n - 1].grouppattern_vec[m - 1];
}
void QueryTree::GroupPattern::addOneOptionalOrMinus(char _type)
{
this->optionals.push_back(OptionalOrMinusGroupPattern((int)this->patterns.size() - 1, (int)this->unions.size() - 1, _type));
}
QueryTree::GroupPattern& QueryTree::GroupPattern::getLastOptionalOrMinus()
{
int n = (int)this->optionals.size();
return this->optionals[n - 1].grouppattern;
}
void QueryTree::GroupPattern::addOneFilterTree()
{
this->filters.push_back(FilterTreeRoot());
this->filter_exists_grouppatterns.push_back(vector<GroupPattern>());
}
QueryTree::GroupPattern::FilterTreeNode& QueryTree::GroupPattern::getLastFilterTree()
{
return this->filters[(int)(this->filters.size()) - 1].root;
}
void QueryTree::GroupPattern::addOneExistsGroupPattern()
{
int n = (int)this->filter_exists_grouppatterns.size();
this->filter_exists_grouppatterns[n - 1].push_back(GroupPattern());
}
QueryTree::GroupPattern& QueryTree::GroupPattern::getLastExistsGroupPattern()
{
int n = (int)this->filter_exists_grouppatterns.size();
int m = (int)this->filter_exists_grouppatterns[n - 1].size();
return this->filter_exists_grouppatterns[n - 1][m - 1];
}
void QueryTree::GroupPattern::getVarset()
{
for (int i = 0; i < (int)this->patterns.size(); i++)
{
if (this->patterns[i].subject.value[0] == '?')
this->patterns[i].varset.addVar(this->patterns[i].subject.value);
if (this->patterns[i].object.value[0] == '?')
this->patterns[i].varset.addVar(this->patterns[i].object.value);
this->grouppattern_resultset_minimal_varset = this->grouppattern_resultset_minimal_varset + this->patterns[i].varset;
this->grouppattern_resultset_maximal_varset = this->grouppattern_resultset_maximal_varset + this->patterns[i].varset;
}
for (int i = 0; i < (int)this->unions.size(); i++)
{
Varset minimal_varset;
for (int j = 0; j < (int)this->unions[i].grouppattern_vec.size(); j++)
{
this->unions[i].grouppattern_vec[j].getVarset();
if (j == 0) minimal_varset = minimal_varset + this->unions[i].grouppattern_vec[j].grouppattern_resultset_minimal_varset;
else minimal_varset = minimal_varset * this->unions[i].grouppattern_vec[j].grouppattern_resultset_minimal_varset;
this->grouppattern_resultset_maximal_varset = this->grouppattern_resultset_maximal_varset + this->unions[i].grouppattern_vec[j].grouppattern_resultset_maximal_varset;
}
this->grouppattern_resultset_minimal_varset = this->grouppattern_resultset_minimal_varset + minimal_varset;
}
for (int i = 0; i < (int)this->optionals.size(); i++)
{
this->optionals[i].grouppattern.getVarset();
if (this->optionals[i].type == 'o')
this->grouppattern_resultset_maximal_varset = this->grouppattern_resultset_maximal_varset + this->optionals[i].grouppattern.grouppattern_resultset_maximal_varset;
}
for (int i = 0; i < (int)this->filters.size(); i++)
{
this->filters[i].root.getVarset(this->filters[i].varset);
}
for(int i = 0; i < (int)this->filter_exists_grouppatterns.size(); i++)
for (int j = 0; j < (int)this->filter_exists_grouppatterns[i].size(); j++)
{
this->filter_exists_grouppatterns[i][j].getVarset();
}
}
bool QueryTree::GroupPattern::checkOnlyUnionOptionalFilterNoExists()
{
for (int i = 0; i < (int)this->unions.size(); i++)
{
for (int j = 0; j < (int)this->unions[i].grouppattern_vec.size(); j++)
if (!this->unions[i].grouppattern_vec[j].checkOnlyUnionOptionalFilterNoExists())
return false;
}
for (int i = 0; i < (int)this->optionals.size(); i++)
{
if (this->optionals[i].type != 'o')
return false;
if (!this->optionals[i].grouppattern.checkOnlyUnionOptionalFilterNoExists())
return false;
}
for (int i = 0; i < (int)this->filter_exists_grouppatterns.size(); i++)
if ((int)this->filter_exists_grouppatterns[i].size() != 0)
return false;
return true;
}
pair<Varset, Varset> QueryTree::GroupPattern::checkOptionalGroupPatternVarsAndSafeFilter(Varset occur , Varset ban, bool &check_condition)
//return occur varset and ban varset
{
if (!check_condition) return make_pair(Varset(), Varset());
Varset this_ban;
int lastpattern = -1, lastunions = -1, lastoptional = -1;
while (check_condition && (lastpattern + 1 < (int)this->patterns.size() || lastunions + 1 < (int)this->unions.size() || lastoptional + 1 < (int)this->optionals.size()))
{
if (lastoptional + 1 < (int)this->optionals.size() && this->optionals[lastoptional + 1].lastpattern == lastpattern && this->optionals[lastoptional + 1].lastunions == lastunions)
//optional
{
pair<Varset, Varset> sub_grouppattern_return_varset = this->optionals[lastoptional + 1].grouppattern.checkOptionalGroupPatternVarsAndSafeFilter(Varset(), ban, check_condition);
if (occur.hasCommonVar(sub_grouppattern_return_varset.second))
check_condition = false;
Varset out = this->optionals[lastoptional + 1].grouppattern.grouppattern_resultset_maximal_varset - occur;
occur = occur + sub_grouppattern_return_varset.first;
this_ban = this_ban + sub_grouppattern_return_varset.second;
this_ban = this_ban + out;
ban = ban + this_ban;
lastoptional++;
}
else if (lastunions + 1 < (int)this->unions.size() && this->unions[lastunions + 1].lastpattern == lastpattern)
//union
{
Varset sub_grouppattern_occur, sub_grouppattern_ban;
for (int i = 0; i < (int)this->unions[lastunions + 1].grouppattern_vec.size(); i++)
{
pair<Varset, Varset> sub_grouppattern_result = this->unions[lastunions + 1].grouppattern_vec[i].checkOptionalGroupPatternVarsAndSafeFilter(occur, ban, check_condition);
if (i == 0)
sub_grouppattern_occur = sub_grouppattern_occur + sub_grouppattern_result.first;
else
sub_grouppattern_occur = sub_grouppattern_occur * sub_grouppattern_result.first;
sub_grouppattern_ban = sub_grouppattern_ban + sub_grouppattern_result.second;
}
occur = occur + sub_grouppattern_occur;
this_ban = this_ban + sub_grouppattern_ban;
ban = ban + this_ban;
lastunions++;
}
else
//triple pattern
{
if (this->patterns[lastpattern + 1].varset.hasCommonVar(ban))
check_condition = false;
occur = occur + this->patterns[lastpattern + 1].varset;
lastpattern++;
}
}
//filter
for (int i = 0; i < (int)this->filters.size(); i++)
if (!this->filters[i].varset.belongTo(occur))
{
check_condition = false;
break;
}
return make_pair(occur, this_ban);
}
void QueryTree::GroupPattern::initPatternBlockid()
{
for (int i = 0; i < (int)this->patterns.size(); i++)
this->pattern_blockid.push_back(i);
}
int QueryTree::GroupPattern::getRootPatternBlockid(int x)
{
if (this->pattern_blockid[x] == x) return x;
this->pattern_blockid[x] = getRootPatternBlockid(this->pattern_blockid[x]);
return this->pattern_blockid[x];
}
void QueryTree::GroupPattern::mergePatternBlockid(int x, int y)
{
int px = getRootPatternBlockid(x);
int py = getRootPatternBlockid(y);
this->pattern_blockid[px] = py;
}
void QueryTree::GroupPattern::print(int dep)
{
for (int t = 0; t < dep; t++) printf("\t"); printf("{\n");
int lastpattern = -1, lastunions = -1, lastoptional = -1;
while (lastpattern + 1 < (int)this->patterns.size() || lastunions + 1 < (int)this->unions.size() || lastoptional + 1 < (int)this->optionals.size())
{
if (lastoptional + 1 < (int)this->optionals.size() && this->optionals[lastoptional + 1].lastpattern == lastpattern && this->optionals[lastoptional + 1].lastunions == lastunions)
//optional
{
for (int t = 0; t <= dep; t++) printf("\t");
if (this->optionals[lastoptional + 1].type == 'o') printf("OPTIONAL\n");
if (this->optionals[lastoptional + 1].type == 'm') printf("MINUS\n");
this->optionals[lastoptional + 1].grouppattern.print(dep + 1);
lastoptional++;
}
else if (lastunions + 1 < (int)this->unions.size() && this->unions[lastunions + 1].lastpattern == lastpattern)
//union
{
for (int i = 0; i < (int)this->unions[lastunions + 1].grouppattern_vec.size(); i++)
{
if (i != 0)
{
for (int t = 0; t <= dep; t++) printf("\t"); printf("UNION\n");
}
this->unions[lastunions + 1].grouppattern_vec[i].print(dep + 1);
}
lastunions++;
}
else
//triple pattern
{
for (int t = 0; t <= dep; t++) printf("\t");
printf("%s\t%s\t%s.\n", this->patterns[lastpattern + 1].subject.value.c_str(), this->patterns[lastpattern + 1].predicate.value.c_str(), this->patterns[lastpattern + 1].object.value.c_str());
lastpattern++;
}
}
//filter
for (int i = 0; i < (int)this->filters.size(); i++)
{
for (int t = 0; t <= dep; t++) printf("\t"); printf("FILTER\t");
this->filters[i].root.print(this->filter_exists_grouppatterns[i], dep + 1);
printf("\n");
}
for (int t = 0; t < dep; t++) printf("\t"); printf("}\n");
}
//----------------------------------------------------------------------------------------------------------------------------------------------------
void QueryTree::setQueryForm(QueryForm _queryform)
{
this->query_form = _queryform;
}
QueryTree::QueryForm QueryTree::getQueryForm()
{
return this->query_form;
}
void QueryTree::setProjectionModifier(ProjectionModifier _projection_modifier)
{
projection_modifier = _projection_modifier;
}
QueryTree::ProjectionModifier QueryTree::getProjectionModifier()
{
return this->projection_modifier;
}
void QueryTree::addProjectionVar(string _projection)
{
this->projection.addVar(_projection);
}
int QueryTree::getProjectionNum()
{
return (int)this->projection.varset.size();
}
Varset& QueryTree::getProjection()
{
return this->projection;
}
void QueryTree::setProjectionAsterisk()
{
this->projection_asterisk = true;
}
bool QueryTree::checkProjectionAsterisk()
{
return this->projection_asterisk;
}
void QueryTree::addOrder(string &_var, bool _descending)
{
this->order.push_back(Order(_var, _descending));
}
vector<QueryTree::Order>& QueryTree::getOrder()
{
return this->order;
}
void QueryTree::setOffset(int _offset)
{
this->offset = _offset;
}
int QueryTree::getOffset()
{
return this->offset;
}
void QueryTree::setLimit(int _limit)
{
this->limit = _limit;
}
int QueryTree::getLimit()
{
return this->limit;
}
QueryTree::GroupPattern& QueryTree::getGroupPattern()
{
return this->grouppattern;
}
bool QueryTree::checkWellDesigned()
{
if (!this->getGroupPattern().checkOnlyUnionOptionalFilterNoExists())
return false;
bool check_condition = true;
this->getGroupPattern().checkOptionalGroupPatternVarsAndSafeFilter(Varset(), Varset(), check_condition);
return check_condition;
}
void QueryTree::print()
{
for (int j = 0; j < 80; j++) printf("="); printf("\n");
if (this->getQueryForm() == QueryTree::Select_Query)
{
printf("select");
if (this->getProjectionModifier() == QueryTree::Modifier_Distinct)
printf(" distinct");
printf("\n");
printf("var is : \t");
vector <string> &varvec = this->getProjection().varset;
for (int i = 0; i < (int)varvec.size(); i++)
printf("%s\t", varvec[i].c_str());
if (this->checkProjectionAsterisk())
printf("*");
printf("\n");
}
else printf("ask\n");
this->getGroupPattern().print(0);
if ((int)this->getOrder().size() > 0)
{
printf("order by : \t");
vector<QueryTree::Order>&order = this->getOrder();
for (int i = 0; i < (int)order.size(); i++)
{
if (!order[i].descending) printf("ASC(");
else printf("DESC(");
printf("%s) ", order[i].var.c_str());
}
printf("\n");
}
if (this->getOffset() != 0)
printf("offset : %d\n", this->getOffset());
if (this->getLimit() != -1)
printf("limit : %d\n", this->getLimit());
for (int j = 0; j < 80; j++) printf("="); printf("\n");
}

View File

@ -1,200 +0,0 @@
/*=============================================================================
# Filename: QueryTree.h
# Author: Jiaqi, Chen
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-03-02 20:35
# Description:
=============================================================================*/
#ifndef _QUERY_QUERYTREE_H
#define _QUERY_QUERYTREE_H
#include "../Util/Util.h"
#include "Varset.h"
class QueryTree
{
public:
QueryTree():
query_form(Select_Query), projection_modifier(Modifier_None), projection_asterisk(false), offset(0), limit(-1){}
enum QueryForm {Select_Query, Ask_Query};
enum ProjectionModifier {Modifier_None, Modifier_Distinct, Modifier_Reduced, Modifier_Count, Modifier_Duplicates};
class GroupPattern
{
public:
class Pattern
{
public:
class Element
{
public:
/*
enum Type { Variable, Literal, IRI };
enum SubType { None, CustomLanguage, CustomType };
Type type;
SubType subType;
std::string subTypeValue;
*/
std::string value;
Element(const std::string& _value):
value(_value){}
};
Element subject, predicate, object;
Varset varset;
Pattern(const Element _subject, const Element _predicate,const Element _object):subject(_subject), predicate(_predicate), object(_object){}
};
class GroupPatternUnions;
class OptionalOrMinusGroupPattern;
class FilterTreeNode;
class FilterTreeRoot;
std::vector<Pattern> patterns;
std::vector<GroupPatternUnions> unions;
std::vector<OptionalOrMinusGroupPattern> optionals;
std::vector<FilterTreeRoot> filters;
std::vector<std::vector<GroupPattern> > filter_exists_grouppatterns;
Varset grouppattern_resultset_minimal_varset, grouppattern_resultset_maximal_varset;
std::vector<int> pattern_blockid;
void addOnePattern(Pattern _pattern);
void addOneGroupUnion();
void addOneUnion();
GroupPattern& getLastUnion();
void addOneOptionalOrMinus(char _type);
GroupPattern& getLastOptionalOrMinus();
void addOneFilterTree();
FilterTreeNode& getLastFilterTree();
void addOneExistsGroupPattern();
GroupPattern& getLastExistsGroupPattern();
void getVarset();
bool checkOnlyUnionOptionalFilterNoExists();
std::pair<Varset, Varset> checkOptionalGroupPatternVarsAndSafeFilter(Varset occur , Varset ban, bool &check_condition);
void initPatternBlockid();
int getRootPatternBlockid(int x);
void mergePatternBlockid(int x, int y);
void print(int dep);
};
class GroupPattern::GroupPatternUnions
{
public:
std::vector<GroupPattern> grouppattern_vec;
int lastpattern;
GroupPatternUnions(int _lastpattern):
lastpattern(_lastpattern){}
};
class GroupPattern::OptionalOrMinusGroupPattern
{
public:
GroupPattern grouppattern;
int lastpattern, lastunions;
char type;
OptionalOrMinusGroupPattern(int _lastpattern, int _lastunions, char _type):
grouppattern(GroupPattern()), lastpattern(_lastpattern), lastunions(_lastunions), type(_type){}
};
class GroupPattern::FilterTreeNode
{
public:
enum FilterType
{
None_type, Or_type, And_type, Equal_type, NotEqual_type, Less_type, LessOrEqual_type, Greater_type, GreaterOrEqual_type,
Plus_type, Minus_type, Mul_type, Div_type, Not_type, UnaryPlus_type, UnaryMinus_type, Literal_type, Variable_type, IRI_type,
Function_type, ArgumentList_type,Builtin_str_type, Builtin_lang_type, Builtin_langmatches_type, Builtin_datatype_type, Builtin_bound_type,
Builtin_sameterm_type,Builtin_isiri_type, Builtin_isblank_type, Builtin_isliteral_type, Builtin_regex_type, Builtin_in_type, Builtin_exists_type
};
FilterType type;
class FilterTreeChild;
std::vector<FilterTreeChild> child;
int exists_grouppattern_id;
FilterTreeNode():
type(None_type), exists_grouppattern_id(-1){}
void getVarset(Varset &varset);
void print(std::vector<GroupPattern> &exist_grouppatterns, int dep);
};
class GroupPattern::FilterTreeNode::FilterTreeChild
{
public:
FilterTreeChild():
type(' '), pos(-1){}
char type;
FilterTreeNode node;
std::string arg;
int pos;
};
class GroupPattern::FilterTreeRoot
{
public:
FilterTreeNode root;
Varset varset;
};
class Order
{
public:
std::string var;
bool descending;
Order(std::string &_var, bool _descending):
var(_var), descending(_descending){}
};
private:
QueryForm query_form;
ProjectionModifier projection_modifier;
Varset projection;
bool projection_asterisk;
std::vector<Order> order;
int offset, limit;
GroupPattern grouppattern;
public:
void setQueryForm(QueryForm _queryform);
QueryForm getQueryForm();
void setProjectionModifier(ProjectionModifier _projection_modifier);
ProjectionModifier getProjectionModifier();
void addProjectionVar(std::string _projection);
int getProjectionNum();
Varset& getProjection();
void setProjectionAsterisk();
bool checkProjectionAsterisk();
void addOrder(std::string &_var, bool _descending);
std::vector<Order>& getOrder();
void setOffset(int _offset);
int getOffset();
void setLimit(int _limit);
int getLimit();
GroupPattern& getGroupPattern();
bool checkWellDesigned();
void print();
};
#endif // _QUERY_QUERYTREE_H

View File

@ -1,36 +0,0 @@
/*=============================================================================
# Filename: RegexExpression.cpp
# Author: Jiaqi, Chen
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-03-02 20:40
# Description:
=============================================================================*/
#include "../Util/Util.h"
class RegexExpression
{
private:
regex_t oRegex;
public:
~RegexExpression()
{
regfree(&oRegex);
}
inline bool compile(std::string &pRegexStr, std::string &flag)
{
int flags = 0;
for (int i = 0; i < (int)flag.length(); i++)
if (flag[i] == 'i') flags |= REG_ICASE;
int nErrCode = regcomp(&oRegex, pRegexStr.c_str(), flags);
return (nErrCode == 0);
}
inline bool match(std::string &pText)
{
int nErrCode = regexec(&oRegex, pText.c_str(), 0, NULL, 0);
return (nErrCode == 0);
}
};

View File

@ -1,81 +0,0 @@
/*=============================================================================
# Filename: ResultFilter.cpp
# Author: Jiaqi, Chen
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-05-03 15:36
# Description: implement functions in ResultFilter.h
=============================================================================*/
#include "ResultFilter.h"
using namespace std;
void ResultFilter::addVar(string var)
{
if (this->hash_table.count(var) == 0)
this->hash_table.insert(pair<string, vector<int> >(var, vector<int>(this->MAX_SIZE, 0)));
}
vector<int>* ResultFilter::findVar(string var)
{
if (this->hash_table.count(var) == 0)
return NULL;
return &this->hash_table[var];
}
void ResultFilter::change(SPARQLquery& query, int value)
{
for (int i = 0; i < query.getBasicQueryNum(); i++)
{
BasicQuery& basicquery = query.getBasicQuery(i);
vector<int*>& basicquery_result =basicquery.getResultList();
int result_num = basicquery_result.size();
int var_num = basicquery.getVarNum();
for (int j = 0; j < var_num; j++)
this->addVar(basicquery.getVarName(j));
vector<vector<int>*> refer;
for (int j = 0; j < var_num; j++)
refer.push_back(this->findVar(basicquery.getVarName(j)));
for (int j = 0; j < result_num; j++)
for (int k = 0; k < var_num; k++)
{
(*refer[k])[this->hash(basicquery_result[j][k])] += value;
}
}
}
void ResultFilter::candFilter(SPARQLquery& query)
{
for (int i = 0; i < query.getBasicQueryNum(); i++)
{
BasicQuery& basicquery = query.getBasicQuery(i);
for (int j = 0; j < basicquery.getVarNum(); j++)
{
vector<int>* col = this->findVar(basicquery.getVarName(j));
if (col != NULL)
{
IDList& idlist = basicquery.getCandidateList(j);
IDList new_idlist;
printf("candFilter on %s\n", basicquery.getVarName(j).c_str());
printf("before candFilter, size = %d\n", idlist.size());
for (int k = 0; k < idlist.size(); k++)
{
int id = idlist.getID(k);
if ((*col)[hash(id)] > 0)
{
new_idlist.addID(id);
}
}
idlist = new_idlist;
printf("after candFilter, size = %d\n", idlist.size());
}
}
}
}

View File

@ -1,35 +0,0 @@
/*=============================================================================
# Filename: ResultFilter.h
# Author: Jiaqi, Chen
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-05-03 15:36
# Description:
=============================================================================*/
#ifndef _QUERY_RESULTFILTER_H
#define _QUERY_RESULTFILTER_H
#include "SPARQLquery.h"
#include "../Util/Util.h"
class ResultFilter
{
private:
static const int MAX_SIZE = 1048576;
inline int hash(int x)
{
return ((x & (MAX_SIZE - 1)) * 17) & (MAX_SIZE - 1);
}
std::map<std::string, std::vector<int> > hash_table;
public:
void addVar(std::string var);
std::vector<int>* findVar(std::string var);
void change(SPARQLquery& query, int value);
void candFilter(SPARQLquery& query);
};
#endif // _QUERY_RESULTFILTER_H

View File

@ -1,41 +1,29 @@
/*=============================================================================
# Filename: ResultSet.cpp
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-10-24 22:01
# Description: implement functions in ResultSet.h
=============================================================================*/
/*
* ResultSet.cpp
*
* Created on: 2014-6-20
* Author: liyouhuan
*/
#include "ResultSet.h"
using namespace std;
#include <sstream>
ResultSet::ResultSet()
{
this->select_var_num = 0;
this->var_name = NULL;
this->ansNum = 0;
#ifndef STREAM_ON
this->answer = NULL;
#else
this->stream = NULL;
#endif
}
ResultSet::~ResultSet()
{
delete[] this->var_name;
#ifndef STREAM_ON
for(int i = 0; i < this->ansNum; i ++)
{
delete[] this->answer[i];
}
delete[] this->answer;
#else
delete this->stream; //maybe NULL
#endif
}
ResultSet::ResultSet(int _v_num, const string* _v_names)
{
this->select_var_num = _v_num;
@ -44,217 +32,57 @@ ResultSet::ResultSet(int _v_num, const string* _v_names)
{
this->var_name[i] = _v_names[i];
}
#ifdef STREAM_ON
this->stream = NULL;
#endif
}
void
ResultSet::setVar(const vector<string> & _var_names)
/* convert to binary string */
Bstr* ResultSet::to_bstr()
{
return NULL;
}
/* convert to usual string */
string ResultSet::to_str()
{
if(this->ansNum == 0)
{
return "[empty result]";
}
std::stringstream _buf;
//debug
// _buf << "There has answer: " << this->ansNum << endl;
// _buf << this->var_name[0];
// for(int i = 1; i < this->select_var_num; i ++)
// {
// _buf << "\t" << this->var_name[i];
// }
// _buf << "\n";
for(int i = 0; i < this->ansNum; i ++)
{
_buf << this->answer[i][0];
for(int j = 1; j < this->select_var_num; j ++)
{
//_buf << "\t" << this->answer[i][j];
_buf << " " << this->answer[i][j];
}
_buf << "\n";
}
return _buf.str();
}
void ResultSet::setVar(const std::vector<string> & _var_names)
{
this->select_var_num = _var_names.size();
this->var_name = new string[this->select_var_num];
for(int i = 0; i < this->select_var_num; i++)
for(int i = 0; i < this->select_var_num; i ++)
{
this->var_name[i] = _var_names[i];
}
}
//convert to usual string
string
ResultSet::to_str()
{
if(this->ansNum == 0)
{
return "[empty result]\n";
}
stringstream _buf;
//#ifdef DEBUG_PRECISE
_buf << "There has answer: " << this->ansNum << endl;
_buf << this->var_name[0];
for(int i = 1; i < this->select_var_num; i ++)
{
_buf << "\t" << this->var_name[i];
}
_buf << "\n";
//#endif
#ifndef STREAM_ON
for(int i = 0; i < this->ansNum; i++)
{
if (this->output_limit != -1 && i == this->output_offset + this->output_limit)
break;
#ifdef DEBUG_PRECISE
printf("to_str: well!\n"); //just for debug!
#endif //DEBUG_PRECISE
if (i >= this->output_offset)
{
_buf << this->answer[i][0];
for(int j = 1; j < this->select_var_num; j++)
{
//there may be ' ' in spo, but no '\t'
_buf << "\t" << this->answer[i][j];
//_buf << " " << this->answer[i][j];
}
_buf << "\n";
}
}
#ifdef DEBUG_PRECISE
printf("to_str: ends!\n"); //just for debug!
#endif //DEBUG_PRECISE
#else //STREAM_ON
printf("using stream to produce to_str()!\n");
_buf << this->readAllFromStream();
#endif //STREAM_ON
return _buf.str();
}
void
ResultSet::output(FILE* _fp)
{
#ifdef STREAM_ON
fprintf(_fp, "%s", this->var_name[0].c_str());
for(int i = 1; i < this->select_var_num; i++)
{
fprintf(_fp, "\t%s", this->var_name[i].c_str());
}
fprintf(_fp, "\n");
if(this->ansNum == 0)
{
fprintf(_fp, "[empty result]\n");
return;
}
const Bstr* bp;
for(int i = 0; i < this->ansNum; i++)
{
if (this->output_limit != -1 && i == this->output_offset + this->output_limit)
break;
bp = this->stream->read();
if (i >= this->output_offset)
{
fprintf(_fp, "%s", bp[0].getStr());
//fprintf(_fp, "%s", bp->getStr());
for(int j = 1; j < this->select_var_num; j++)
{
fprintf(_fp, "\t%s", bp[j].getStr());
//bp = this->stream.read();
//fprintf(_fp, "\t%s", bp->getStr());
}
fprintf(_fp, "\n");
}
}
#endif
}
void
ResultSet::openStream(std::vector<int> &_keys, std::vector<bool> &_desc, int _output_offset, int _output_limit)
{
#ifdef STREAM_ON
#ifdef DEBUG_STREAM
vector<int> debug_keys;
vector<bool> debug_desc;
for(int i = 0; i < this->select_var_num; ++i)
{
debug_keys.push_back(i);
debug_desc.push_back(false);
}
#endif
if(this->stream != NULL)
{
delete this->stream;
this->stream = NULL;
}
#ifdef DEBUG_STREAM
if(this->ansNum > 0)
this->stream = new Stream(debug_keys, debug_desc, this->ansNum, this->select_var_num, true);
#else
if(this->ansNum > 0)
this->stream = new Stream(_keys, _desc, this->ansNum, this->select_var_num, _keys.size() > 0);
#endif //DEBUG_STREAM
#endif //STREAM_ON
this->output_offset = _output_offset;
this->output_limit = _output_limit;
}
void
ResultSet::resetStream()
{
#ifdef STREAM_ON
//this->stream.reset();
if(this->stream != NULL)
this->stream->setEnd();
#endif
}
void
ResultSet::writeToStream(string& _s)
{
#ifdef STREAM_ON
if(this->stream != NULL)
this->stream->write(_s.c_str(), _s.length());
#endif
}
//QUERY: how to manage when large?
string
ResultSet::readAllFromStream()
{
stringstream buf;
#ifdef STREAM_ON
if(this->stream == NULL)
return "";
this->resetStream();
const Bstr* bp;
for(int i = 0; i < this->ansNum; i++)
{
if (this->output_limit != -1 && i == this->output_offset + this->output_limit)
break;
bp = this->stream->read();
if (i >= this->output_offset)
{
buf << bp[0].getStr();
for(int j = 1; j < this->select_var_num; ++j)
{
buf << "\t" << bp[j].getStr();
}
//buf << bp->getStr();
//for(int j = 1; j < this->select_var_num; j++)
//{
//bp = this->stream.read();
//buf << "\t" << bp->getStr();
//}
buf << "\n";
}
}
#endif
return buf.str();
}
const Bstr*
ResultSet::getOneRecord()
{
#ifdef STREAM_ON
if(this->stream == NULL)
{
fprintf(stderr, "ResultSet::getOneRecord(): no results now!\n");
return NULL;
}
if(this->stream->isEnd())
{
fprintf(stderr, "ResultSet::getOneRecord(): read till end now!\n");
return NULL;
}
//NOTICE:this is one record, and donot free the memory!
//NOTICE:Bstr[] but only one element, used as Bstr*
return this->stream->read();
#else
return NULL;
#endif
}

View File

@ -1,53 +1,39 @@
/*=============================================================================
# Filename: ResultSet.h
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-10-24 21:57
# Description: originally written by liyouhuan, modified by zengli
=============================================================================*/
/*
* ResultSet.h
*
* Created on: 2014-6-20
* Author: liyouhuan
*/
#ifndef _QUERY_RESULTSET_H
#define _QUERY_RESULTSET_H
#ifndef RESULTSET_H_
#define RESULTSET_H_
#include "../Util/Util.h"
#include "../Util/Bstr.h"
#include "../Util/Stream.h"
#include<string>
#include<vector>
#include "../Bstr/Bstr.h"
using namespace std;
class ResultSet
{
private:
#ifdef STREAM_ON
Stream* stream;
#endif
class ResultSet{
public:
int select_var_num;
int ansNum;
std::string* var_name;
int output_offset, output_limit;
string* var_name;
string** answer;
#ifndef STREAM_ON
std::string** answer;
#endif
ResultSet();
~ResultSet();
ResultSet(int _v_num, const std::string* _v_names);
ResultSet(int _v_num, const string* _v_names);
//convert to binary string
//Bstr* to_bstr();
/* convert to binary string */
Bstr* to_bstr();
//convert to usual string
std::string to_str();
void output(FILE* _fp); //output all results using Stream
void setVar(const std::vector<std::string> & _var_names);
/* convert to usual string */
string to_str();
//operations on private stream from caller
void openStream(std::vector<int> &_keys, std::vector<bool> &_desc, int _output_offset, int _output_limit);
void resetStream();
void writeToStream(std::string& _s);
std::string readAllFromStream();
const Bstr* getOneRecord();
/* */
void setVar(const std::vector<string> & _var_names);
};
#endif //_QUERY_RESULTSET_H
#endif /* RESULTSET_H_ */

View File

@ -1,21 +1,14 @@
/*=============================================================================
# Filename: SPARQLquery.cpp
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-10-31 19:15
# Description: implement functions in SPARQLquery.h
=============================================================================*/
/*
* SPARQLquery.cpp
*
* Created on: 2014-6-20
* Author: liyouhuan
*/
#include "SPARQLquery.h"
#include "../Parser/DBparser.h"
#include "../Database/Database.h"
using namespace std;
SPARQLquery::SPARQLquery(const string& _query)
{
}
SPARQLquery::SPARQLquery()
{
@ -24,167 +17,201 @@ SPARQLquery::SPARQLquery()
SPARQLquery::~SPARQLquery()
{
for(unsigned i = 0; i < this->query_union.size(); i ++)
for(int i = 0; i < this->query_vec.size(); i ++)
{
delete this->query_union[i];
delete this->query_vec[i];
}
}
void
SPARQLquery::addQueryVar(const string& _var)
void SPARQLquery::addQueryVar(const string& _var)
{
query_var.push_back(_var);
int n = (int)this->query_var_vec.size();
std::vector<std::string>::iterator i = find(this->query_var_vec[n - 1].begin(), this->query_var_vec[n - 1].end(), _var);
if (i == this->query_var_vec[n - 1].end()) this->query_var_vec[n - 1].push_back(_var);
}
const int
SPARQLquery::getQueryVarNum()
void SPARQLquery::addQueryVarVec()
{
return query_var.size();
this->query_var_vec.push_back(vector <string> ());
}
const vector<string>&
SPARQLquery::getQueryVar()const
vector<string>& SPARQLquery::getQueryVarVec(int _var_vec_id)
{
return this->query_var;
return this->query_var_vec[_var_vec_id];
}
const string&
SPARQLquery::getQueryVar(int _id)
{
return this->query_var.at(_id);
void SPARQLquery::addTriple(const Triple& _triple){
int last_i = query_vec.size()-1;
query_vec[last_i]->addTriple(_triple);
}
void
SPARQLquery::addTriple(const Triple& _triple){
int last_i = query_union.size()-1;
query_union[last_i]->addTriple(_triple);
const int SPARQLquery::getBasicQueryNum()
{
return this->query_vec.size();
}
const int
SPARQLquery::getBasicQueryNum()
void SPARQLquery::encodeQuery(KVstore* _p_kv_store)
{
return this->query_union.size();
}
void
SPARQLquery::encodeQuery(KVstore* _p_kv_store)
{
for(unsigned i = 0; i < this->query_union.size(); i ++)
for(int i = 0; i < this->query_vec.size(); i ++)
{
(this->query_union[i])->encodeBasicQuery(_p_kv_store, this->query_var);
(this->query_vec[i])->encodeBasicQuery(_p_kv_store, this->query_var_vec[i]);
}
}
void
SPARQLquery::encodeQuery(KVstore* _p_kv_store, vector< vector<string> > sparql_query_varset)
BasicQuery& SPARQLquery::getBasicQuery(int basic_query_id)
{
for(unsigned i = 0; i < this->query_union.size(); i ++)
return *(query_vec[basic_query_id]);
}
void SPARQLquery::addBasicQuery()
{
query_vec.push_back(new BasicQuery(""));
}
void SPARQLquery::addBasicQuery(BasicQuery* _basic_q)
{
this->query_vec.push_back(_basic_q);
}
vector<BasicQuery*>& SPARQLquery::getBasicQueryVec()
{
return this->query_vec;
}
void SPARQLquery::print(ostream& _out_stream)
{
int n = getBasicQueryNum();
std::cout << "Block " << n << " in total." << std::endl;
for (int i = 0; i < n; i++)
{
(this->query_union[i])->encodeBasicQuery(_p_kv_store, sparql_query_varset[i]);
}
}
BasicQuery&
SPARQLquery::getBasicQuery(int basic_query_id)
{
return *(query_union[basic_query_id]);
}
void
SPARQLquery::addBasicQuery(){
query_union.push_back(new BasicQuery(""));
}
void
SPARQLquery::addBasicQuery(BasicQuery* _basic_q)
{
this->query_union.push_back(_basic_q);
}
vector<BasicQuery*>&
SPARQLquery::getBasicQueryVec()
{
return this->query_union;
}
void
SPARQLquery::print(ostream& _out_stream){
int k=getQueryVarNum();
cout<<"QueryVar "<<k<<":"<<endl;
for (int i=0;i<k;i++){
cout<<getQueryVar(i)<<endl;
}
k=getBasicQueryNum();
cout<<"Block "<<k<<" in total."<<endl;
for (int i=0;i<k;i++){
cout<<"Block "<<i<<endl;
std::cout<<"Block "<< i << std::endl;
int m = this->query_var_vec[i].size();
std::cout << "QueryVar "<< m <<":"<< std::endl;
for (int j = 0; j < m; j++)
std::cout << this->query_var_vec[i][j] << " ";
std::cout << std::endl;
getBasicQuery(i).print(_out_stream);
}
}
string
SPARQLquery::triple_str()
std::string SPARQLquery::triple_str()
{
stringstream _ss;
std::stringstream _ss;
_ss << "varNum:" << this->query_var.size() << endl;
for(unsigned i = 0; i < this->query_var.size(); i ++)
for(int i = 0; i < this->query_vec.size(); i ++)
{
_ss << this->query_var[i] << "\t";
}
_ss << endl;
for(unsigned i = 0; i < this->query_union.size(); i ++)
{
_ss << "bq" << i << " :" << this->query_union[i]->triple_str() << endl;
_ss << "varVec" << i << "varNum:" << this->query_var_vec[i].size() << endl;
for (int j = 0; j < this->query_var_vec[i].size(); j++)
_ss << this->query_var_vec[i][j] << "\t";
_ss << endl;
_ss << "bq" << i << " :" << this->query_vec[i]->triple_str() << endl;
}
return _ss.str();
}
string
SPARQLquery::candidate_str()
std::string SPARQLquery::candidate_str()
{
stringstream _ss;
std::stringstream _ss;
for(unsigned i = 0; i < this->query_union.size(); i ++)
for(int i = 0; i < this->query_vec.size(); i ++)
{
_ss << "bq" << i << " :" << this->query_union[i]->candidate_str() << endl;
_ss << "bq" << i << " :" << this->query_vec[i]->candidate_str() << endl;
}
return _ss.str();
}
string
SPARQLquery::result_str()
std::string SPARQLquery::result_str()
{
stringstream _ss;
std::stringstream _ss;
for(unsigned i = 0; i < this->query_union.size(); i ++)
for(int i = 0; i < this->query_vec.size(); i ++)
{
_ss << "bq" << i << " :" << this->query_union[i]->result_str() << endl;
_ss << "bq" << i << " :" << this->query_vec[i]->result_str() << endl;
}
return _ss.str();
}
string
SPARQLquery::to_str()
std::string SPARQLquery::to_str()
{
stringstream _ss;
std::stringstream _ss;
_ss << "varNum:" << this->query_var.size() << endl;
for(unsigned i = 0; i < this->query_var.size(); i ++)
for(int i = 0; i < this->query_vec.size(); i ++)
{
_ss << this->query_var[i] << "\t";
}
_ss << endl;
for(unsigned i = 0; i < this->query_union.size(); i ++)
{
_ss << "bq" << i << " :\n" << this->query_union[i]->to_str() << endl;
_ss << "varVec" << i << "varNum:" << this->query_var_vec[i].size() << endl;
for (int j = 0; j < this->query_var_vec[i].size(); j++)
_ss << this->query_var_vec[i][j] << "\t";
_ss << endl;
_ss << "bq" << i << " :\n" << this->query_vec[i]->to_str() << endl;
}
return _ss.str();
}
//----------------------------------------------------------------------------------------------------------------------------------------------------
void SPARQLquery::addOneProjection(std::string _projection)
{
this->projections.push_back(_projection);
}
int SPARQLquery::getProjectionsNum()
{
return this->projections.size();
}
std::vector<std::string>& SPARQLquery::getProjections()
{
return this->projections;
}
SPARQLquery::PatternGroup& SPARQLquery::getPatternGroup()
{
return this->patterngroup;
}
void SPARQLquery::PatternGroup::addOnePattern(Pattern _pattern)
{
if (_pattern.subject.value[0] == '?' || _pattern.object.value[0] == '?') this->hasVar = true;
this->patterns.push_back(_pattern);
}
void SPARQLquery::PatternGroup::addOneFilterTree()
{
this->filters.push_back(FilterTree());
}
SPARQLquery::FilterTree& SPARQLquery::PatternGroup::getLastFilterTree()
{
return this->filters[(int)(this->filters.size()) - 1];
}
void SPARQLquery::PatternGroup::addOneOptional()
{
this->optionals.push_back(PatternGroup());
}
SPARQLquery::PatternGroup& SPARQLquery::PatternGroup::getLastOptional()
{
return this->optionals[(int)(this->optionals.size()) - 1];
}
void SPARQLquery::PatternGroup::addOneGroupUnion()
{
this->unions.push_back(std::vector<PatternGroup>());
}
void SPARQLquery::PatternGroup::addOneUnion()
{
int n = this->unions.size();
this->unions[n - 1].push_back(PatternGroup());
}
SPARQLquery::PatternGroup& SPARQLquery::PatternGroup::getLastUnion()
{
int n = this->unions.size();
int m = this->unions[n - 1].size();
return this->unions[n - 1][m - 1];
}

View File

@ -1,29 +1,31 @@
/*=============================================================================
# Filename: SPARQLquery.h
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-10-31 19:13
# Description: originally written by liyouhuan, modified by chenjiaqi and zengli
=============================================================================*/
/*
* SPARQLquery.h
*
* Created on: 2014-6-20
* Author: liyouhuan
*/
#ifndef _QUERY_SPARQLQUERY_H
#define _QUERY_SPARQLQUERY_H
#ifndef SPARQLQUERY_H_
#define SPARQLQUERY_H_
#include "BasicQuery.h"
class SPARQLquery
{
class SPARQLquery{
private:
vector<BasicQuery*> query_union;
vector<string> query_var;
vector<BasicQuery*> query_vec;
vector < vector<string> >query_var_vec;
public:
SPARQLquery(const string& _query);
SPARQLquery();
~SPARQLquery();
void addQueryVar(const string& _var);
void addQueryVarVec();
vector<string>& getQueryVarVec(int _var_vec_id);
void addTriple(const Triple& _triple);
void addBasicQuery(BasicQuery* _basic_q);
@ -34,23 +36,133 @@ public:
BasicQuery& getBasicQuery(int _basic_query_id);
const int getQueryVarNum();
const vector<string>& getQueryVar()const;
const string& getQueryVar(int _id);
void encodeQuery(KVstore* _p_kv_store);
void encodeQuery(KVstore* _p_kv_store, vector< vector<string> > sparql_query_varset);
vector<BasicQuery*>& getBasicQueryVec();
void print(ostream& _out_stream);
std::string triple_str();
std::string candidate_str();
std::string result_str();
std::string to_str();
public:
struct Element
{
/*
enum Type { Variable, Literal, IRI };
enum SubType { None, CustomLanguage, CustomType };
Type type;
SubType subType;
std::string subTypeValue;
*/
std::string value;
Element(const std::string& _value):value(_value){}
};
struct Pattern
{
Element subject,predicate,object;
Pattern(const Element &_subject, const Element &_predicate,const Element &_object):subject(_subject), predicate(_predicate), object(_object){}
};
struct FilterTree
{
enum FilterTree_Type
{
Or, And, Equal, NotEqual, Less, LessOrEqual, Greater, GreaterOrEqual, Plus, Minus, Mul, Div,
Not, UnaryPlus, UnaryMinus, Literal, Variable, IRI, Function, ArgumentList,
Builtin_str, Builtin_lang, Builtin_langmatches, Builtin_datatype, Builtin_bound, Builtin_sameterm,
Builtin_isiri, Builtin_isblank, Builtin_isliteral, Builtin_regex, Builtin_in
};
FilterTree_Type type;
FilterTree* parg1,*parg2;
std::string arg1, arg2;
//std::string arg1Type, arg2Type;
explicit FilterTree():parg1(NULL), parg2(NULL){}
~FilterTree()
{
if (parg1 != NULL) delete parg1;
if (parg2 != NULL) delete parg2;
}
};
class PatternGroup
{
public:
std::vector<Pattern> patterns;
std::vector<FilterTree> filters;
std::vector<PatternGroup> optionals;
std::vector<std::vector<PatternGroup> > unions;
bool hasVar;
public:
PatternGroup():hasVar(false){}
void addOnePattern(Pattern _pattern);
void addOneFilterTree();
FilterTree& getLastFilterTree();
void addOneOptional();
PatternGroup& getLastOptional();
void addOneGroupUnion();
void addOneUnion();
PatternGroup& getLastUnion();
};
class TempResult
{
public:
std::vector<std::string> var;
std::vector< std::vector<int> > res;
TempResult()
{}
TempResult(const TempResult &t):var(t.var), res(t.res)
{}
};
class EvaPlanEle
{
private:
char type;
void * p;
public:
EvaPlanEle(char _type, void *_p = NULL):type(_type), p(_p){}
char getType()
{ return type; }
void * getPointer()
{ return p; }
};
std::vector<EvaPlanEle> evaPlan;
std::stack<TempResult *> evaStack;
/*
enum ProjectionModifier { Modifier_None, Modifier_Distinct, Modifier_Reduced, Modifier_Count, Modifier_Duplicates };
struct Order
{
unsigned id;
bool descending;
};
*/
private:
std::vector<std::string> projections;
PatternGroup patterngroup;
/*
ProjectionModifier projectionModifier;
std::vector<Order> order;
unsigned limit;
*/
public:
void addOneProjection(std::string _projection);
int getProjectionsNum();
std::vector<std::string>& getProjections();
PatternGroup& getPatternGroup();
};
#endif //_QUERY_SPARQLQUERY_H
#endif /* SPARQLQUERY_H_ */

View File

@ -1,109 +0,0 @@
/*=============================================================================
# Filename: Varset.cpp
# Author: Jiaqi, Chen
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-03-02 20:35
# Description: implement functions in Varset.h
=============================================================================*/
#include "Varset.h"
using namespace std;
Varset::Varset(string & _var)
{
addVar(_var);
}
Varset::Varset(vector<string> & _varset)
{
for (int i = 0; i < (int)_varset.size(); i++)
addVar(_varset[i]);
}
bool Varset::findVar(string& _var)
{
if ((int)this->varset.size() == 0) return false;
vector<string>::iterator i = find(this->varset.begin(), this->varset.end(), _var);
return (i != this->varset.end());
}
void Varset::addVar(string& _var)
{
if (!this->findVar(_var))
this->varset.push_back(_var);
}
Varset Varset::operator + (Varset& x)
{
Varset r;
for (int i = 0; i < (int)this->varset.size(); i++)
r.addVar(this->varset[i]);
for (int i = 0; i < (int)x.varset.size(); i++)
r.addVar(x.varset[i]);
return r;
};
Varset Varset::operator * (Varset& x)
{
Varset r;
for (int i = 0; i < (int)this->varset.size(); i++)
if (x.findVar(this->varset[i]))
r.addVar(this->varset[i]);
return r;
}
Varset Varset::operator - (Varset& x)
{
Varset r;
for (int i = 0; i < (int)this->varset.size(); i++)
if (!x.findVar(this->varset[i]))
r.addVar(this->varset[i]);
return r;
}
bool Varset::operator ==(Varset &x)
{
if ((int)this->varset.size() != (int)x.varset.size()) return false;
for (int i = 0; i < (int)this->varset.size(); i++)
if (!x.findVar(this->varset[i])) return false;
return true;
}
bool Varset::hasCommonVar(Varset &x)
{
for (int i = 0; i < (int)this->varset.size(); i++)
if (x.findVar(this->varset[i])) return true;
return false;
}
bool Varset::belongTo(Varset &x)
{
for (int i = 0; i < (int)this->varset.size(); i++)
if (!x.findVar(this->varset[i])) return false;
return true;
}
vector <int> Varset::mapTo(Varset& x)
{
vector<int> r;
for (int i = 0; i < (int)this->varset.size(); i++)
{
r.push_back(-1);
for (int j = 0; j < (int)x.varset.size(); j++)
if (this->varset[i] == x.varset[j])
r[i] = j;
}
return r;
}
void Varset::print()
{
printf("Varset: ");
for (int i = 0; i < (int)this->varset.size(); i++)
{
printf("%s ", this->varset[i].c_str());
}
printf("\n");
}

View File

@ -1,39 +0,0 @@
/*=============================================================================
# Filename: Varset.h
# Author: Jiaqi, Chen
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-03-02 20:35
# Description:
=============================================================================*/
#ifndef _QUERY_VARSET_H
#define _QUERY_VARSET_H
#include "../Util/Util.h"
class Varset
{
public:
std::vector <std::string> varset;
Varset(){};
Varset(std::string & _var);
Varset(std::vector<std::string> & _varset);
public:
bool findVar(std::string& _var);
void addVar(std::string& _var);
Varset operator + (Varset& x);
Varset operator * (Varset& x);
Varset operator - (Varset& x);
bool operator ==(Varset &x);
bool hasCommonVar(Varset &x);
bool belongTo(Varset &x);
std::vector <int> mapTo(Varset& x);
void print();
};
#endif // _QUERY_VARSET_H

View File

@ -1,68 +0,0 @@
# Gstore System
Gstore System(also called gStore) is a graph database engine for managing large graph-structured data, which is open-source and targets at Linux operation systems. The whole project is written in C++, with the help of some libraries such as readline, antlr, and so on. Only source tarballs are provided currently, which means you have to compile the source code if you want to use our system.
**The formal help document is in [Handbook](docs/latex/gStore_help.pdf).**
**You can write your information in [survey](http://59.108.48.38/survey) if you like.**
## Getting Started
This system is really user-friendly and you can pick it up in several minutes. Remember to check your platform where you want to run this system by viewing [System Requirements](docs/DEMAND.md). After all are verified, please get this project's source code. There are several ways to do this:
- download the zip from this repository and extract it
- fork this repository in your github account
- type `git clone git@github.com:Caesar11/gStore.git` in your terminal or use git GUI to acquire it
Then you need to compile the project, just type `make` in the gStore root directory, and all executables will be ok. To run gStore, please type `bin/gload database_name dataset_path` to build a database named by yourself. And you can use `bin/gquery database_name` command to query a existing database. What is more, `bin/gconsole` is a wonderful tool designed for you, providing all operations you need to use gStore. Notice that all commands should be typed in the root directory of gStore, and your database name should not end with ".db".
- - -
## Advanced Help
If you want to understand the details of the gStore system, or you want to try some advanced operations(for example, using the API, server/client), please see the chapters below.
- [Basic Introduction](docs/INTRO.md): introduce the theory and features of gStore
- [Install Guide](docs/INSTALL.md): instructions on how to install this system
- [How To Use](docs/USAGE.md): detailed information about using the gStore system
- [API Explanation](docs/API.md): guide you to develop applications based on our API
- [Project Structure](docs/STRUCT.md): show the whole structure and sequence of this project
- [Related Essays](docs/ESSAY.md): contain essays and publications related with gStore
- [Update Logs](docs/CHANGELOG.md): keep the logs of the system updates
- [Test Results](docs/TEST.md): present the test results of a series of experiments
- - -
## Other Business
We have written a series of short essays addressing recurring challenges in using gStore to realize applications, which are placed in [Recipe Book](docs/TIPS.md).
You are welcome to report any advice or errors in the github Issues part of this repository, if not requiring in-time reply. However, if you want to urgent on us to deal with your reports, please email to <chenjiaqi93@163.com> to submit your suggestions and report bugs to us by emailing to <zengli-bookug@pku.edu.cn>. A full list of our whole team is in [Mailing List](docs/MAIL.md).
There are some restrictions when you use the current gStore project, you can see them on [Limit Description](docs/LIMIT.md).
Sometimes you may find some strange phenomena(but not wrong case), or something hard to understand/solve(don't know how to do next), then do not hesitate to visit the [Frequently Asked Questions](docs/FAQ.md) page.
Graph database engine is a new area and we are still trying to go further. Things we plan to do next is in [Future Plan](docs/PLAN.md) chapter, and we hope more and more people will support or even join us. You can support in many ways:
- watch/star our project
- fork this repository and submit pull requests to us
- download and use this system, report bugs or suggestions
- ...
People who inspire us or contribute to this project will be listed in the [Thanks List](docs/THANK.md) chapter.
This whole document is divided into different pieces, and each them is stored in a markdown file. You can see/download the combined markdown file in [help_markdown](docs/gStore_help.md), and for html file, please go to [help_html](docs/gStore_help.html). What is more, we also provide help file in pdf format, and you can visit it in [help_pdf](docs/latex/gStore_help.pdf).

View File

@ -1,31 +1,29 @@
/*=============================================================================
# Filename: Client.cpp
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2016-02-23 13:39
# Description: implement functions in Client.h
=============================================================================*/
/*
* Client.cpp
*
* Created on: 2014-10-19
* Author: hanshuo
*/
#include"Client.h"
using namespace std;
#include<iostream>
Client::Client()
{
this->ip = Socket::DEFAULT_SERVER_IP;
this->port = Socket::DEFAULT_CONNECT_PORT;
this->ip = Socket::DEFAULT_SERVER_IP;
this->port = Socket::DEFAULT_CONNECT_PORT;
}
Client::Client(string _ip, unsigned short _port)
Client::Client(std::string _ip, unsigned short _port)
{
this->ip = _ip;
this->port = _port;
this->ip = _ip;
this->port = _port;
}
Client::Client(unsigned short _port)
{
this->ip = Socket::DEFAULT_SERVER_IP;
this->port = _port;
this->ip = Socket::DEFAULT_SERVER_IP;
this->port = _port;
}
Client::~Client()
@ -34,295 +32,96 @@ Client::~Client()
bool Client::connectServer()
{
bool flag = this->socket.create();
if (!flag)
{
cerr << "cannot create socket. @Client::connectServer" << endl;
return false;
}
bool flag = this->socket.create();
if (!flag)
{
std::cerr << "cannot create socket. @Client::connectServer" << std::endl;
return false;
}
flag = this->socket.connect(this->ip, this->port);
flag = this->socket.connect(this->ip, this->port);
if (!flag)
{
cerr << "cannot connect to server. @Client::connectServer" << endl;
return false;
}
if (!flag)
{
std::cerr << "cannot connect to server. @Client::connectServer" << std::endl;
return false;
}
return true;
return true;
}
bool Client::disconnectServer()
{
bool flag = this->socket.close();
bool flag = this->socket.close();
return flag;
return flag;
}
bool Client::send(const string& _msg)
bool Client::send(const std::string& _msg)
{
bool flag = this->socket.send(_msg);
bool flag = this->socket.send(_msg);
return flag;
return flag;
}
bool Client::receiveResponse(string& _recv_msg)
bool Client::receiveResponse(std::string& _recv_msg)
{
bool flag = this->socket.recv(_recv_msg);
bool flag = this->socket.recv(_recv_msg);
return flag;
return flag;
}
void Client::run()
{
string cmd;
while (true)
{
std::string cmd;
#ifdef READLINE_ON
char *buf, prompt[] = "gsql>";
//printf("Type `help` for information of all commands\n");
//printf("Type `help command_t` for detail of command_t\n");
rl_bind_key('\t', rl_complete);
//QUERY: should add ';'?
while(true)
{
//BETTER:write in multi lines as in below comments
buf = readline(prompt);
if(buf == NULL)
continue;
else
add_history(buf);
if(strncmp(buf, "help", 4) == 0)
{
if(strcmp(buf, "help") == 0)
{
//print commands message
printf("help - print commands message\n");
printf("quit - quit the console normally\n");
printf("import - build a database for a given dataset\n");
printf("load - load an existen database\n");
printf("unload - unload an existen database\n");
printf("sparql - load query from the second argument\n");
printf("show - show the current database's name\n");
}
else
{
//TODO: help for a given command
}
continue;
}
else if(strcmp(buf, "quit") == 0)
break;
else if(strncmp(buf, "import", 6) != 0 && strncmp(buf, "load", 4) != 0 && strncmp(buf, "unload", 6) != 0 && strncmp(buf, "sparql", 6) != 0 && strncmp(buf, "show", 4) != 0)
{
printf("unknown commands\n");
continue;
}
while (true)
{
std::cout << "->";
std::string line;
std::getline(std::cin, line);
int line_len = line.size();
if (line_len >0 && line[line_len-1] == ';')
{
line.resize(line_len - 1);
cmd += line;
break;
}
cmd += line + "\n";
}
string query_file;
string query;
FILE* fp = stdout; ///default to output on screen
bool ifredirect = false;
//BETTER:build a parser for this console
//spaces/tabs can be before commands
// std::cout << "input end" << std::endl;
if(strncmp(buf, "sparql", 6) == 0)
{
//NOTICE: if using query string, '>' is ok to exist!
char* rp = buf;
int pos = strlen(buf) - 1;
while(pos > -1)
{
if(*(rp+pos) == '"')
{
break;
}
else if(*(rp+pos) == '>')
{
ifredirect = true;
break;
}
pos--;
}
rp += pos;
//DEBUG:redirect sometimes not work for path query
if (cmd == "exit")
{
break;
}
char* p = buf + strlen(buf) - 1;
if(ifredirect)
{
printf("redirected!\n");
char* tp = p;
while(*tp == ' ' || *tp == '\t')
tp--;
*(tp+1) = '\0';
tp = rp + 1;
while(*tp == ' ' || *tp == '\t')
tp++;
printf("redirect: %s\n", tp);
fp = fopen(tp, "w"); //NOTICE:not judge here!
p = rp - 1; //NOTICE: all separated with ' ' or '\t'
}
while(*p == ' ' || *p == '\t' || *p == '"') //set the end of path
p--;
*(p+1) = '\0';
p = buf + 6;
while(*p == ' ' || *p == '\t') //acquire the start of path
p++;
bool flag = this->connectServer();
if (!flag)
{
std::cerr << "connect server error. @Client::run" << std::endl;
continue;
}
bool isPath = true;
if(*p == '"')
{
isPath = false;
p++;
}
flag = this->send(cmd);
if (!flag)
{
std::cerr << "sent message error. @Client::run" << std::endl;
continue;
}
char* q;
if(isPath)
{
//TODO: support the soft links(or hard links)
//there are also readlink and getcwd functions for help
//http://linux.die.net/man/2/readlink
//NOTICE:getcwd and realpath cannot acquire the real path of file
//in the same directory and the program is executing when the
//system starts running
//NOTICE: use realpath(p, NULL) is ok, but need to free the memory
q = realpath(p, NULL); //QUERY:still not work for soft links
#ifdef DEBUG_PRECISE
printf("%s\n", p);
#endif
if(q == NULL)
{
printf("invalid path!\n");
free(q);
free(buf);
continue;
}
else
printf("%s\n", q);
//query = getQueryFromFile(p);
query = Util::getQueryFromFile(q);
}
else
{
//BETTER:check query in ""
query = string(p);
}
std::string recv_msg;
flag = this->receiveResponse(recv_msg);
std::cout << recv_msg << std::endl;
if(query.empty())
{
if(isPath)
free(q);
//free(resolved_path);
free(buf);
if(ifredirect)
fclose(fp);
continue;
}
printf("query is:\n%s\n\n", query.c_str());
if(isPath)
free(q);
cmd = string("query ") + query;
}
else if(strncmp(buf, "show", 4) == 0)
{
cmd = string("show databases");
}
else
{
cmd = string(buf);
}
//DEBUG!
printf("%s\n", cmd.c_str());
free(buf);
//free(resolved_path);
#ifdef DEBUG_PRECISE
printf("after buf freed!\n");
#endif
//interacte with server
bool flag = this->connectServer();
if(!flag)
{
cerr << "connect server error. @Client::run" << endl;
if(ifredirect)
fclose(fp);
continue;
}
flag = this->send(cmd);
if(!flag)
{
cerr << "sent message error. @Client::run" << endl;
if(ifredirect)
fclose(fp);
continue;
}
string recv_msg;
flag = this->receiveResponse(recv_msg);
fprintf(fp, "%s\n", recv_msg.c_str());
this->disconnectServer();
if(!flag)
{
cerr << "disconnect server error. @Client::run" << endl;
if(ifredirect)
fclose(fp);
continue;
}
}
//#else
//while (true)
//{
// while(true)
// {
// //BETTER:readline and parser
// cout << "->";
// string line;
// getline(cin, line);
// int line_len = line.size();
// if (line_len >0 && line[line_len-1] == ';')
// {
// line.resize(line_len - 1);
// cmd += line;
// break;
// }
// cmd += line + "\n";
// }
//
// //cout << "input end" << endl;
//
// if(cmd == "quit")
// {
// break;
// }
//
// bool flag = this->connectServer();
// if (!flag)
// {
// cerr << "connect server error. @Client::run" << endl;
// continue;
// }
//
// flag = this->send(cmd);
// if (!flag)
// {
// cerr << "sent message error. @Client::run" << endl;
// continue;
// }
//
// string recv_msg;
// flag = this->receiveResponse(recv_msg);
// cout << recv_msg << endl;
//
// this->disconnectServer();
// if (!flag)
// {
// cerr << "disconnect server error. @Client::run" << endl;
// continue;
// }
//}
#endif
this->disconnectServer();
if (!flag)
{
std::cerr << "disconnect server error. @Client::run" << std::endl;
continue;
}
}
}

View File

@ -1,16 +1,15 @@
/*=============================================================================
# Filename: Client.h
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2016-02-23 13:40
# Description: originally written by hanshuo, modified by zengli
=============================================================================*/
/*
* Client.h
*
* Created on: 2014-10-18
* Author: hanshuo
*/
#ifndef _SERVER_CLIENT_H
#define _SERVER_CLIENT_H
#ifndef CLIENT_H_
#define CLIENT_H_
#include "../Util/Util.h"
#include "Socket.h"
#include"Socket.h"
#include<string>
class Client
{
@ -34,5 +33,6 @@ private:
int random_key;
};
#endif // _SERVER_CLIENT_H
#endif /* CLIENT_H_ */

View File

@ -5,7 +5,8 @@
* Author: hanshuo
*/
#include "Operation.h"
#include"Operation.h"
#include<iostream>
Operation::Operation()
{
@ -33,13 +34,13 @@ Operation::~Operation()
Bstr Operation::encrypt()
{
//TODO
// to be implemented...
return Bstr(NULL, 0);
}
Bstr Operation::deencrypt()
{
//TODO
// to be implemented...
return Bstr(NULL, 0);
}
@ -50,7 +51,7 @@ CommandType Operation::getCommand()
std::string Operation::getParameter(int _idx)
{
if ((unsigned)_idx < this->parameters.size())
if (_idx < this->parameters.size())
{
return this->parameters[_idx];
}

View File

@ -8,12 +8,11 @@
#ifndef OPERATION_H_
#define OPERATION_H_
#include "../Util/Util.h"
#include"../Util/Bstr.h"
#include<string>
#include<vector>
#include"../Bstr/Bstr.h"
//NOTICE:CMD_DROP is used to remove the database, and CMD_CREATE is not useful because
//we always need to import a dataset to create a gstore db
enum CommandType {CMD_CONNECT, CMD_EXIT, CMD_LOAD, CMD_UNLOAD, CMD_CREATE, CMD_DROP,
enum CommandType {CMD_CONNECT, CMD_EXIT, CMD_LOAD, CMD_UNLOAD, CMD_CREATE_DB, CMD_DELETE_DB,
CMD_IMPORT, CMD_QUERY, CMD_SHOW, CMD_INSERT, CMD_OTHER}; // extend the operation command type here.
class Operation

View File

@ -1,14 +1,14 @@
/*=============================================================================
# Filename: Server.cpp
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-10-25 13:47
# Description:
=============================================================================*/
/*
* Server.cpp
*
* Created on: 2014-10-14
* Author: hanshuo
*/
#include "Server.h"
using namespace std;
#include"Server.h"
#include"../Database/Database.h"
#include<iostream>
#include<sstream>
Server::Server()
{
@ -31,8 +31,7 @@ Server::~Server()
delete this->database;
}
bool
Server::createConnection()
bool Server::createConnection()
{
bool flag;
@ -60,24 +59,21 @@ Server::createConnection()
return true;
}
bool
Server::deleteConnection()
bool Server::deleteConnection()
{
bool flag = this->socket.close();
return flag;
}
bool
Server::response(Socket _socket, std::string& _msg)
bool Server::response(Socket _socket, std::string& _msg)
{
bool flag = _socket.send(_msg);
return flag;
}
void
Server::listen()
void Server::listen()
{
while (true)
{
@ -134,12 +130,6 @@ Server::listen()
this->importRDF(db_name, "", rdf_path, ret_msg);
break;
}
case CMD_DROP:
{
string db_name = operation.getParameter(0);
this->dropDatabase(db_name, "", ret_msg);
break;
}
case CMD_QUERY:
{
string query = operation.getParameter(0);
@ -149,9 +139,9 @@ Server::listen()
case CMD_SHOW:
{
string para = operation.getParameter(0);
if (para == "databases" || para == "all")
if (para == "databases")
{
this->showDatabases(para, "", ret_msg);
this->showDatabases("", ret_msg);
}
else
{
@ -175,11 +165,10 @@ Server::listen()
}
}
bool
Server::parser(std::string _raw_cmd, Operation& _ret_oprt)
bool Server::parser(std::string _raw_cmd, Operation& _ret_oprt)
{
int cmd_start_pos = 0;
int raw_len = (int)_raw_cmd.size();
int raw_len = _raw_cmd.size();
for (int i=0;i<raw_len;i++)
if (_raw_cmd[i] == '\n')
@ -293,33 +282,20 @@ Server::parser(std::string _raw_cmd, Operation& _ret_oprt)
return true;
}
bool
Server::createDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg)
bool Server::createDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg)
{
// to be implemented...
return false;
}
bool
Server::dropDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg)
bool Server::deleteDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg)
{
//TODO
if (this->database == NULL || this->database->getName() != _db_name)
{
_ret_msg = "database:" + _db_name + " is not loaded.";
return false;
}
delete this->database;
this->database = NULL;
_ret_msg = "unload database done.";
return true;
// to be implemented...
return false;
}
bool
Server::loadDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg)
bool Server::loadDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg)
{
this->database = new Database(_db_name);
@ -332,15 +308,12 @@ Server::loadDatabase(std::string _db_name, std::string _ac_name, std::string& _r
else
{
_ret_msg = "load database failed.";
delete this->database;
this->database = NULL;
}
return flag;
}
bool
Server::unloadDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg)
bool Server::unloadDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg)
{
if (this->database == NULL || this->database->getName() != _db_name)
{
@ -355,12 +328,11 @@ Server::unloadDatabase(std::string _db_name, std::string _ac_name, std::string&
return true;
}
bool
Server::importRDF(std::string _db_name, std::string _ac_name, std::string _rdf_path, std::string& _ret_msg)
bool Server::importRDF(std::string _db_name, std::string _ac_name, std::string _rdf_path, std::string& _ret_msg)
{
//if (this->database != NULL && this->database->getName() != _db_name)
if (this->database != NULL)
if (this->database != NULL && this->database->getName() != _db_name)
{
this->database->unload();
delete this->database;
}
@ -379,8 +351,7 @@ Server::importRDF(std::string _db_name, std::string _ac_name, std::string _rdf_p
return flag;
}
bool
Server::insertTriple(std::string _db_name, std::string _ac_name, std::string _rdf_path, std::string& _ret_msg)
bool Server::insertTriple(std::string _db_name, std::string _ac_name, std::string _rdf_path, std::string& _ret_msg)
{
if (this->database != NULL)
{
@ -403,10 +374,9 @@ Server::insertTriple(std::string _db_name, std::string _ac_name, std::string _rd
return flag;
}
bool
Server::query(const std::string _query, std::string& _ret_msg)
bool Server::query(const std::string _query, std::string& _ret_msg)
{
if(this->database == NULL)
if (this->database == NULL)
{
_ret_msg = "database has not been loaded.";
return false;
@ -414,10 +384,8 @@ Server::query(const std::string _query, std::string& _ret_msg)
ResultSet res_set;
bool flag = this->database->query(_query, res_set);
if(flag)
if (flag)
{
//_ret_msg = "results are too large!";
//BETTER: divide and transfer if too large to be placed in memory, using Stream
_ret_msg = res_set.to_str();
}
else
@ -428,15 +396,9 @@ Server::query(const std::string _query, std::string& _ret_msg)
return flag;
}
bool
Server::showDatabases(string _para, string _ac_name, string& _ret_msg)
bool Server::showDatabases(std::string _ac_name, std::string& _ret_msg)
{
if(_para == "all")
{
_ret_msg = Util::getItemsFromDir(Util::db_home);
return true;
}
if(this->database != NULL)
if (this->database != NULL)
{
_ret_msg = "\n" + this->database->getName() + "\n";
}

View File

@ -1,18 +1,16 @@
/*=============================================================================
# Filename: Server.h
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-10-25 13:49
# Description: originally written by hanshuo, modified by zengli
=============================================================================*/
/*
* Server.h
*
* Created on: 2014-10-14
* Author: hanshuo
*/
#ifndef _SERVER_SERVER_H
#define _SERVER_SERVER_H
#ifndef SERVER_H_
#define SERVER_H_
#include "../Util/Util.h"
#include "../Database/Database.h"
#include "Socket.h"
#include "Operation.h"
#include"Socket.h"
#include"Operation.h"
#include"../Database/Database.h"
/*
* the Server is only at a original and simple version.
@ -43,10 +41,10 @@ public:
bool response(Socket _socket, std::string& _msg);
bool parser(std::string _raw_cmd, Operation& _ret_oprt);
bool createDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg);
bool dropDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg);
bool deleteDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg);
bool loadDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg);
bool unloadDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg);
bool showDatabases(std::string _para, std::string _ac_name, std::string& _ret_msg);
bool showDatabases(std::string _ac_name, std::string& _ret_msg);
bool importRDF(std::string _db_name, std::string _ac_name, std::string _rdf_path, std::string& _ret_msg);
bool insertTriple(std::string _db_name, std::string _ac_name, std::string _rdf_path, std::string& _ret_msg);
bool query(const std::string _query, std::string& _ret_msg);
@ -60,5 +58,6 @@ private:
Database* database;
};
#endif // _SERVER_SERVER_H
#endif /* SERVER_H_ */

View File

@ -4,8 +4,15 @@
* Created on: 2014-10-14
* Author: hanshuo
*/
#include "Socket.h"
#include"Socket.h"
#include <errno.h>
#include <fcntl.h>
#include <sys/time.h>
#include <sstream>
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cstring>
const std::string Socket::DEFAULT_SERVER_IP = "127.0.0.1";
@ -236,4 +243,3 @@ bool Socket::isValid()const
{
return (this->sock != -1);
}

View File

@ -5,10 +5,16 @@
* Author: hanshuo
*/
#ifndef _SERVER_SOCKET_H
#define _SERVER_SOCKET_H
#ifndef SOCKET_H_
#define SOCKET_H_
#include "../Util/Util.h"
#include<sys/types.h>
#include<sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <unistd.h>
#include <string>
#include <arpa/inet.h>
class Socket
{
@ -38,5 +44,4 @@ private:
sockaddr_in addr;
};
#endif // _SERVER_SOCKET_H
#endif /* SOCKET_H_ */

View File

@ -1,15 +1,14 @@
/*=============================================================================
# Filename: SigEntry.cpp
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2016-04-11 13:49
# Description:
=============================================================================*/
/*
* SIGEntry.cpp
*
* Created on: 2014-6-20
* Author: liyouhuan
* Modified on: 2014-6-29
* Author: hanshuo
*/
#include "SigEntry.h"
using namespace std;
SigEntry::SigEntry()
{
(this->sig).entityBitSet.reset();
@ -34,26 +33,22 @@ SigEntry::SigEntry(const EntitySig& _sig, int _entity_id)
this->entity_id = _entity_id;
}
const EntitySig&
SigEntry::getEntitySig() const
const EntitySig& SigEntry::getEntitySig()const
{
return this->sig;
}
int
SigEntry::getEntityId() const
int SigEntry::getEntityId()const
{
return this->entity_id;
}
int
SigEntry::getSigCount() const
int SigEntry::getSigCount()const
{
return (int)this->sig.entityBitSet.count();
}
SigEntry&
SigEntry::operator=(const SigEntry _sig_entry)
SigEntry& SigEntry::operator=(const SigEntry _sig_entry)
{
this->entity_id = _sig_entry.getEntityId();
this->sig.entityBitSet.reset();
@ -61,56 +56,34 @@ SigEntry::operator=(const SigEntry _sig_entry)
return *this;
}
SigEntry&
SigEntry::operator|=(const SigEntry _sig_entry)
SigEntry& SigEntry::operator|=(const SigEntry _sig_entry)
{
const EntitySig& sig = (_sig_entry.getEntitySig());
(this->sig).entityBitSet |= sig.entityBitSet;
return *this;
}
bool
SigEntry::cover(const SigEntry& _sig_entry) const
bool SigEntry::cover(const SigEntry& _sig_entry)const
{
//EQUAL:this & that == that
return (this->sig.entityBitSet | _sig_entry.getEntitySig().entityBitSet)
== (this->sig.entityBitSet);
}
bool
SigEntry::cover(const EntitySig& _sig) const
bool SigEntry::cover(const EntitySig& _sig)const
{
return (this->sig.entityBitSet | _sig.entityBitSet) == (this->sig.entityBitSet);
}
int
SigEntry::xOR(const SigEntry& _sig_entry) const
int SigEntry::xEpsilen(const SigEntry& _sig_entry)const
{
EntityBitSet entityBitSet;
entityBitSet.reset();
entityBitSet |= this->sig.entityBitSet;
//NOTICE: compute the xor distince now
//a^b = (a & ~b) | (~a & b)
EntityBitSet another;
another.reset();
another |= _sig_entry.getEntitySig().entityBitSet;
return ((entityBitSet & another.flip()) | (entityBitSet.flip() & another)).count();
entityBitSet.flip();
return (entityBitSet & _sig_entry.getEntitySig().entityBitSet).count();
}
//how many 1s in _sig_entry are contained ->flip-> not contained these 1s, as distince
//0s in _sig_entry is nonsense
int
SigEntry::xEpsilen(const SigEntry& _sig_entry) const
{
EntityBitSet entityBitSet;
entityBitSet.reset();
entityBitSet |= this->sig.entityBitSet;
entityBitSet.flip();
return (entityBitSet & _sig_entry.getEntitySig().entityBitSet).count();
}
string
SigEntry::to_str() const
std::string SigEntry::to_str()const
{
std::stringstream _ss;
@ -120,3 +93,4 @@ SigEntry::to_str() const
return _ss.str();
}

View File

@ -1,39 +1,38 @@
/*=============================================================================
# Filename: SigEntry.h
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2016-04-11 13:48
# Description: written by liyouhuan and hanshuo
=============================================================================*/
/*
* SigEntry.h
*
* Created on: 2014-6-20
* Author: liyouhuan
* Modified on: 2014-6-29
* Author: hanshuo
*/
#ifndef _SIGNATURE_SIGENTRY_H
#define _SIGNATURE_SIGENTRY_H
#ifndef SIGENTRY_H_
#define SIGENTRY_H_
#include "../Util/Util.h"
#include<iostream>
#include "Signature.h"
#include<sstream>
using namespace std;
class SigEntry
{
class SigEntry{
private:
EntitySig sig;
//-1 if not in leaf node
int entity_id;
public:
SigEntry();
SigEntry(int _entity_id, EntityBitSet& _bitset);
SigEntry(const SigEntry& _sig_entry);
SigEntry(const EntitySig& sig, int _entity_id);
const EntitySig& getEntitySig() const;
int getEntityId() const;
int getSigCount() const;
const EntitySig& getEntitySig()const;
int getEntityId()const;
int getSigCount()const;
SigEntry& operator=(const SigEntry _sig_entry);
SigEntry& operator|=(const SigEntry _sig_entry);
bool cover(const SigEntry& _sig_entry) const;
bool cover(const EntitySig& _sig) const;
int xEpsilen(const SigEntry& _sig_entry) const;
int xOR(const SigEntry& _sig_entry) const;
std::string to_str() const;
bool cover(const SigEntry& _sig_entry)const;
bool cover(const EntitySig& _sig)const;
int xEpsilen(const SigEntry& _sig_entry)const;
std::string to_str()const;
};
#endif // _SIGNATURE_SIGENTRY_H
#endif /* SIGENTRY_H_ */

View File

@ -1,21 +1,21 @@
/*=============================================================================
# Filename: Signature.cpp
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2016-04-11 13:18
# Description:
=============================================================================*/
/*
* Signature.cpp
*
* Created on: 2014-6-20
* Author: liyouhuan
* Implemented on: 2014-6-29
* Author: hanshuo
*/
#include "Signature.h"
#include "../Query/BasicQuery.h"
#include "../Database/Database.h"
using namespace std;
std::string
Signature::BitSet2str(const EntityBitSet& _bitset)
std::string Signature::BitSet2str(const EntityBitSet& _bitset)
{
std::stringstream _ss;
bool any = false;
for(unsigned i = 0; i < _bitset.size(); i ++)
for(int i = 0; i < _bitset.size(); i ++)
{
if(_bitset.test(i))
{
@ -31,52 +31,50 @@ Signature::BitSet2str(const EntityBitSet& _bitset)
return _ss.str();
}
void
Signature::encodePredicate2Entity(int _pre_id, EntityBitSet& _entity_bs, const char _type)
/* for Signature */
void Signature::encodePredicate2Entity(int _pre_id, EntityBitSet& _entity_bs, const char _type)
{
if (Signature::PREDICATE_ENCODE_METHOD == 0)
{
//WARN:change if need to use again, because the encoding method has changed now!
int pos = ( (_pre_id+10) % Signature::EDGE_SIG_LENGTH ) + Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
}
else
{
int seed_num = _pre_id % Signature::EDGE_SIG_INTERVAL_NUM_HALF;
int seed_preid = _pre_id;
if(_type == Util::EDGE_OUT)
if(_type == BasicQuery::EDGE_OUT)
{
seed_num += Signature::EDGE_SIG_INTERVAL_NUM_HALF;
seed_preid += 101;
}
//int primeSize = 5;
//int prime1[]={5003,5009,5011,5021,5023};
//int prime2[]={49943,49957,49991,49993,49999};
/*
int primeSize = 5;
int prime1[]={5003,5009,5011,5021,5023};
int prime2[]={49943,49957,49991,49993,49999};
*/
//NOTICE: more ones in the bitset(use more primes) means less conflicts, but also weakens the filtration of VSTree.
// how to hash the predicate id to signature(bitset) better?
// more ones in the bitset(use more primes) means less conflicts, but also weakens the filtration of VSTree.
// when the data set is big enough, cutting down the size of candidate list should come up to our primary consideration.
// in this case we should not encode too many ones in entities' signature.
// also, when the data set is small, hash conflicts can hardly happen.
// therefore, I think using 2 primes(set up two ones in bitset) is enough.
// --by hanshuo.
int primeSize = 2;
int prime1[] = {5003, 5011};
int prime2[] = {49957, 49993};
int prime1[]={5003,5011};
int prime2[]={49957,49993};
//for(int i = 0; i < primeSize; i++)
//{
//int seed = _pre_id * prime1[i] % prime2[i];
//int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
//_entity_bs.set(pos);
//}
int seed = _pre_id * 5003 % 49957;
int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
_entity_bs.set(pos);
for (int i=0;i<primeSize;i++)
{
int seed = seed_preid * prime1[i] % prime2[i];
int pos = (seed % Signature::EDGE_SIG_LENGTH ) + Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
}
}
}
void
Signature::encodePredicate2Edge(int _pre_id, EdgeBitSet& _edge_bs)
void Signature::encodePredicate2Edge(int _pre_id, EdgeBitSet& _edge_bs)
{
if (Signature::PREDICATE_ENCODE_METHOD == 0)
{
@ -85,81 +83,233 @@ Signature::encodePredicate2Edge(int _pre_id, EdgeBitSet& _edge_bs)
}
else
{
int seed_num = _pre_id % Signature::EDGE_SIG_INTERVAL_NUM_HALF;
//int primeSize = 5;
//int prime1[]={5003,5009,5011,5021,5023};
//int prime2[]={49943,49957,49991,49993,49999};
/*
int primeSize = 5;
int prime1[]={5003,5009,5011,5021,5023};
int prime2[]={49943,49957,49991,49993,49999};
*/
int primeSize = 2;
int prime1[] = {5003,5011};
int prime2[] = {49957,49993};
int prime1[]={5003,5011};
int prime2[]={49957,49993};
//for (int i = 0; i < primeSize; i++)
//{
//int seed = _pre_id * prime1[i] % prime2[i];
//int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
//_edge_bs.set(pos);
//}
int seed = _pre_id * 5003 % 49957;
int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
_edge_bs.set(pos);
for (int i=0;i<primeSize;i++)
{
int seed = _pre_id * prime1[i] % prime2[i];
int pos = seed % Signature::EDGE_SIG_LENGTH;
_edge_bs.set(pos);
}
}
}
//NOTICE: no need to encode itself because only variable in query need to be filtered!
//So only consider all neighbors!
void
Signature::encodeStr2Entity(const char* _str, EntityBitSet& _entity_bs)
void Signature::encodeStr2Entity(const char* _str, EntityBitSet& _entity_bs) //_str is subject or object or literal
{
//_str is subject or object or literal
if(strlen(_str) >0 && _str[0] == '?')
return;
int length = (int)strlen(_str);
unsigned int hashKey = 0;
unsigned int pos = 0;
char *str2 = (char*)calloc(length + 1, sizeof(char));
char *str2 = new char[length+1];
strcpy(str2, _str);
char *str = str2;
unsigned base = Signature::STR_SIG_BASE * (Signature::HASH_NUM - 1);
for(int i = Signature::HASH_NUM - 1; i >= 0; --i)
{
HashFunction hf = Util::hash[i];
if(hf == NULL)
break;
hashKey = hf(str);
str=str2;
pos = base + hashKey % Signature::STR_SIG_BASE;
base -= Signature::STR_SIG_BASE;
if(_str[0] == '"')
{
pos += Signature::STR_SIG_LENGTH2;
}
else if(_str[0] != '<')
{
#ifdef DEBUG_VSTREE
cerr << "error in encodeStr2Entity(): neighbor is neither a literal or entity!" << endl;
#endif
}
_entity_bs.set(pos);
}
//BETTER: use multiple threads for different hash functions
// the same consideration as encodePredicate2Entity.
// I think we should not set too many ones in entities' signature.
hashKey = Signature::simpleHash(str);
pos = hashKey % Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
str=str2;
hashKey = Signature::RSHash(str);
pos = hashKey % Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
str=str2;
hashKey = Signature::JSHash(str);
pos = hashKey % Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
str=str2;
hashKey = Signature::PJWHash(str);
pos = hashKey % Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
/*
str=str2;
hashKey = Signature::ELFHash(str);
pos = hashKey % Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
str=str2;
hashKey = Signature::SDBMHash(str);
pos = hashKey % Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
hashKey = Signature::DJBHash(str);
pos = hashKey % Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
str=str2;
hashKey = Signature::APHash(str);
pos = hashKey % Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
str=str2;
hashKey = Signature::BKDRHash(str);
pos = hashKey % Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
*/
#ifdef DEBUG_VSTREE
//std::stringstream _ss;
//_ss << "encodeStr2Entity:" << str2 << endl;
//Util::logging(_ss.str());
#endif
free(str2);
/*
//debug
{
std::stringstream _ss;
_ss << "encodeStr2Entity:" << str2 << endl;
Database::log(_ss.str());
}
*/
delete []str2;
}
void
Signature::encodeStrID2Entity(int _str_id, EntityBitSet& _entity_bs)
void Signature::encodeStrID2Entity(int _str_id, EntityBitSet& _entity_bs)
{
//TODO
//to be implement
}
unsigned int Signature::hash(const char* _str)
{
//to be implement
return 0;
}
/* some string hash functions */
unsigned int Signature::BKDRHash(const char *_str)
{
unsigned int seed = 131; // 31 131 1313 13131 131313 etc..
unsigned int key = 0;
while (*_str)
{
key = key * seed + (*_str++);
}
return (key & 0x7FFFFFFF);
}
unsigned int Signature::simpleHash(const char *_str)
{
unsigned int key;
unsigned char *p;
for(key = 0, p = (unsigned char *)_str; *p ; p++)
key = 31 * key + *p;
return (key & 0x7FFFFFFF);
}
unsigned int Signature::RSHash(const char *_str)
{
unsigned int b = 378551;
unsigned int a = 63689;
unsigned int key = 0;
while (*_str)
{
key = key * a + (*_str++);
a *= b;
}
return (key & 0x7FFFFFFF);
}
unsigned int Signature::JSHash(const char *_str)
{
unsigned int key = 1315423911;
while (*_str)
{
key ^= ((key << 5) + (*_str++) + (key >> 2));
}
return (key & 0x7FFFFFFF);
}
unsigned int Signature::PJWHash(const char *_str)
{
unsigned int bits_in_unsigned_int = (unsigned int)(sizeof(unsigned int) * 8);
unsigned int three_quarters = (unsigned int)((bits_in_unsigned_int * 3) / 4);
unsigned int one_eighth = (unsigned int)(bits_in_unsigned_int / 8);
unsigned int high_bits = (unsigned int)(0xFFFFFFFF) << (bits_in_unsigned_int - one_eighth);
unsigned int key = 0;
unsigned int test = 0;
while (*_str)
{
key = (key << one_eighth) + (*_str++);
if ((test = key & high_bits) != 0)
{
key = ((key ^ (test >> three_quarters)) & (~high_bits));
}
}
return (key & 0x7FFFFFFF);
}
unsigned int Signature::ELFHash(const char *_str)
{
unsigned int key = 0;
unsigned int x = 0;
while (*_str)
{
key = (key << 4) + (*_str++);
if ((x = key & 0xF0000000L) != 0)
{
key ^= (x >> 24);
key &= ~x;
}
}
return (key & 0x7FFFFFFF);
}
unsigned int Signature::SDBMHash(const char *_str)
{
unsigned int key = 0;
while (*_str)
{
key = (*_str++) + (key << 6) + (key << 16) - key;
}
return (key & 0x7FFFFFFF);
}
unsigned int Signature::DJBHash(const char *_str)
{
unsigned int key = 5381;
while (*_str) {
key += (key << 5) + (*_str++);
}
return (key & 0x7FFFFFFF);
}
unsigned int Signature::APHash(const char *_str)
{
unsigned int key = 0;
int i;
for (i=0; *_str; i++)
{
if ((i & 1) == 0)
{
key ^= ((key << 7) ^ (*_str++) ^ (key >> 3));
}
else
{
key ^= (~((key << 11) ^ (*_str++) ^ (key >> 5)));
}
}
return (key & 0x7FFFFFFF);
}
/* for ENTITYsig */
EntitySig::EntitySig()
{
this->entityBitSet.reset();
@ -183,50 +333,44 @@ EntitySig::EntitySig(const EntityBitSet& _bitset)
this->entityBitSet |= _bitset;
}
EntitySig&
EntitySig::operator|=(const EntitySig& _sig)
EntitySig& EntitySig::operator|=(const EntitySig& _sig)
{
this->entityBitSet |= _sig.entityBitSet;
return *this;
}
bool
EntitySig::operator==(const EntitySig& _sig)const
bool EntitySig::operator==(const EntitySig& _sig)const
{
return (this->entityBitSet == _sig.entityBitSet);
}
bool
EntitySig::operator!=(const EntitySig& _sig)const
bool EntitySig::operator!=(const EntitySig& _sig)const
{
return (this->entityBitSet != _sig.entityBitSet);
}
EntitySig&
EntitySig::operator=(const EntitySig& _sig)
EntitySig& EntitySig::operator=(const EntitySig& _sig)
{
this->entityBitSet.reset();
this->entityBitSet |= _sig.getBitset();
return *this;
}
const EntityBitSet&
EntitySig::getBitset()const
const EntityBitSet & EntitySig::getBitset()const
{
return this->entityBitSet;
}
/* for EDGEsig */
EdgeSig::EdgeSig()
{
this->edgeBitSet.reset();
}
EdgeSig::EdgeSig(const EdgeSig* _p_sig)
{
this->edgeBitSet.reset();
this->edgeBitSet |= _p_sig->edgeBitSet;
}
EdgeSig::EdgeSig(const EdgeSig& _sig)
{
this->edgeBitSet.reset();
@ -238,21 +382,8 @@ EdgeSig::EdgeSig(const EdgeBitSet& _bitset)
this->edgeBitSet.reset();
this->edgeBitSet |= _bitset;
}
EdgeSig&
EdgeSig::operator|=(const EdgeSig& _sig)
EdgeSig& EdgeSig::operator|=(const EdgeSig& _sig)
{
this->edgeBitSet |= _sig.edgeBitSet;
return *this;
}
string
EntitySig::to_str() const
{
std::stringstream _ss;
_ss << Signature::BitSet2str(this->entityBitSet);
return _ss.str();
}

View File

@ -1,86 +1,57 @@
/*=============================================================================
# Filename: Signature.h
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2016-04-11 12:50
# Description: written by liyouhuan and hanshuo
=============================================================================*/
/*
* Signature.h
*
* Created on: 2014-6-20
* Author: liyouhuan
* Modified on: 2014-6-29
* add some private hash functions,
* fix some ill-formed function names.
* Author: hanshuo
*/
#ifndef SIGNATURE_H_
#define SIGNATURE_H_
#ifndef _SIGNATURE_SIGNATURE_H
#define _SIGNATURE_SIGNATURE_H
#include<iostream>
#include<string.h>
#include<bitset>
#include<sstream>
using namespace std;
#include "../Util/Util.h"
class Signature
{
class Signature{
public:
//static HashFunction hash[HashNum];
//must make sure: ENTITY_SIG_LENGTH = EDGE_SIG_LENGTH + STR_SIG_LENGTH
//const static int ENTITY_SIG_LENGTH = 400;
static const int STR_SIG_BASE = 100;
//NOTICE: we can also use id here, but string is recommended due to special structure
//(maybe needed later, for example, wildcards)
//Th ehash function is costly, so just use two
static const int HASH_NUM = 3; //no more than Util::HashNum
//NOTICE:if using str id, we can also divide like EDGE_SIG
//here we divide as entity neighbors and literal neighbors: ENTITY, LITERAL
static const int STR_SIG_LENGTH = 2 * STR_SIG_BASE * HASH_NUM; //250
static const int STR_SIG_LENGTH2 = STR_SIG_BASE * HASH_NUM;
//QUERY:I think that str filter is more important in VSTree than predicate, because
//a predicate may correspond to a lot of entities and predicate num is usually small
static const int EDGE_SIG_INTERVAL_NUM_HALF = 5; //in edge or out edge
static const int EDGE_SIG_INTERVAL_NUM = 2 * EDGE_SIG_INTERVAL_NUM_HALF;
static const int EDGE_SIG_INTERVAL_BASE = 20;
static const int EDGE_SIG_LENGTH = EDGE_SIG_INTERVAL_NUM * EDGE_SIG_INTERVAL_BASE; //150
static const int EDGE_SIG_LENGTH2 = EDGE_SIG_INTERVAL_NUM_HALF * EDGE_SIG_INTERVAL_BASE; //150
/* must make sure:
* ENTITY_SIG_LENGTH = EDGE_SIG_LENGTH + STR_SIG_LENGTH */
const static int ENTITY_SIG_LENGTH = 400;
const static int EDGE_SIG_LENGTH = 150;
const static int STR_SIG_LENGTH = 250;
static const int ENTITY_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH;
//static const int ENTITY_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH + NEIGHBOR_SIG_LENGTH;
typedef std::bitset<Signature::EDGE_SIG_LENGTH2> EdgeBitSet;
typedef std::bitset<Signature::ENTITY_SIG_LENGTH> EntityBitSet;
typedef bitset<Signature::EDGE_SIG_LENGTH> EdgeBitSet;
typedef bitset<Signature::ENTITY_SIG_LENGTH> EntityBitSet;
static std::string BitSet2str(const EntityBitSet& _bitset);
//NOTICE: there are two predicate encoding method now, see the encoding functions @Signature.cpp for details
/* there are two predicate encoding method now, see the encoding functions @Signature.cpp for details. */
const static int PREDICATE_ENCODE_METHOD = 1;
static void encodePredicate2Entity(int _pre_id, EntityBitSet& _entity_bs, const char _type);
static void encodePredicate2Edge(int _pre_id, EdgeBitSet& _edge_bs);
static void encodeStr2Entity(const char* _str, EntityBitSet& _entity_bs); //_str is subject or object(literal)
static void encodeStrID2Entity(int _str_id, EntityBitSet& _entity_bs);
//Signature()
//{
//NOTICE:not exceed the HashNum
//this->hash = new HashFunction[HashNum];
//this->hash[0] = Util::simpleHash;
//this->hash[1] = Util::APHash;
//this->hash[2] = Util::BKDRHash;
//this->hash[3] = Util::DJBHash;
//this->hash[4] = Util::ELFHash;
//this->hash[5] = Util::DEKHash;
//this->hash[6] = Util::BPHash;
//this->hash[7] = Util::FNVHash;
//this->hash[8] = Util::HFLPHash;
//this->hash[9] = Util::HFHash;
//this->hash[10] = Util::JSHash;
//this->hash[11] = Util::PJWHash;
//this->hash[12] = Util::RSHash;
//this->hash[13] = Util::SDBMHash;
//this->hash[14] = Util::StrHash;
//this->hash[15] = Util::TianlHash;
//}
//~Signature()
//{
//delete[] this->hash;
//}
unsigned int hash(const char* _str);
private:
static unsigned int BKDRHash(const char *_str);
static unsigned int simpleHash(const char *_str);
static unsigned int RSHash(const char *_str);
static unsigned int JSHash(const char *_str);
static unsigned int PJWHash(const char *_str);
static unsigned int ELFHash(const char *_str);
static unsigned int SDBMHash(const char *_str);
static unsigned int DJBHash(const char *_str);
static unsigned int APHash(const char *_str);
};
//WARN:also defined in Signature, must be same!!!
//NOTICE:EdgeBitSet is only used in Query, not for VSTree
typedef std::bitset<Signature::EDGE_SIG_LENGTH2> EdgeBitSet;
typedef std::bitset<Signature::ENTITY_SIG_LENGTH> EntityBitSet;
typedef bitset<Signature::EDGE_SIG_LENGTH> EdgeBitSet;
typedef bitset<Signature::ENTITY_SIG_LENGTH> EntityBitSet;
class EntitySig : Signature{
public:
@ -95,7 +66,6 @@ public:
EntitySig& operator=(const EntitySig& _sig);
const EntityBitSet& getBitset()const;
void encode(const char * _str, int _pre_id);
std::string to_str() const;
};
class EdgeSig : Signature{
@ -108,5 +78,4 @@ public:
EdgeSig& operator|=(const EdgeSig& _sig);
};
#endif // _SIGNATURE_SIGNATURE_H
#endif /* SIGNATURE_H_ */

View File

@ -5,11 +5,10 @@
* Author: liyouhuan
*/
#ifndef _UTIL_TRIPLE_H
#define _UTIL_TRIPLE_H
#include "Util.h"
#ifndef TRIPLE_H_
#define TRIPLE_H_
#include<iostream>
#include<string.h>
using namespace std;
class Triple{
@ -62,5 +61,4 @@ public:
const string toString()const;
};
#endif //_UTIL_TRIPLE_H
#endif /* TRIPLE_H_ */

View File

@ -1,70 +0,0 @@
/*=============================================================================
# Filename: BloomFilter.cpp
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2016-03-04 17:49
# Description: implement functions in BloomFilter.h
=============================================================================*/
#include "BloomFilter.h"
BloomFilter::BloomFilter()
{
//TODO
}
BloomFilter::BloomFilter(unsigned _num)
{
//TODO:fix _num to mod 8 == 0
}
void
BloomFilter::init()
{
this->filter = (char *)calloc(this->length/8, sizeof(char));
//TODO:assign hash functions for hfptr
//assign the rate of false positive, and then compute the length and hfnum according to key num
}
BloomFilter::~BloomFilter()
{
//TODO
}
//NOTICE:there are two ways to change int to string, one digit to one character or just change int* to char*
//The latter is more efficient because the former consumes space and time:O(32) >= O(lgn)
void
BloomFilter::addRecord(int _record)
{
//TODO
}
void
BloomFilter::addRecord(const char* _record, unsigned _len)
{
//TODO
}
bool
BloomFilter::checkRecord(int _record) const
{
//TODO
return false;
}
bool
BloomFilter::checkRecord(const char* _record, unsigned _len) const
{
//TODO
return false;
}
//if( GETBIT(vector, Util::HFLPHash(ch,strlen(ch))%MAX) )
//{
//flag++;
//}
//else
//{
//SETBIT(vector,Util::HFLPHash(ch,strlen(ch))%MAX );
//}

View File

@ -1,41 +0,0 @@
/*=============================================================================
# Filename: BloomFilter.h
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-10-20 13:18
# Description: http://blog.csdn.net/hguisu/article/details/7866173
TODO:this strategy can be used in Join and KVstore-search/modify/remove, or the uppest level!
really better? appropiate if key num small but search too many!
=============================================================================*/
#ifndef _UTIL_BLOOMFILTER_H
#define _UTIL_BLOOMFILTER_H
#include "Util.h"
#define SETBIT(ch, n) ch[n/8] |= 1 << (7-n%8)
#define GETBIT(ch, n) (ch[n/8] & (1<<(7-n%8))) >> (7-n%8)
class BloomFilter
{
public:
BloomFilter();
BloomFilter(unsigned _num); //num of all keys
void addRecord(int _record);
//NOTICE:we hope a Bstr-like struct here, for the length maybe very large
void addRecord(const char* _record, unsigned _len);
bool checkRecord(int _record) const;
bool checkRecord(const char* _record, unsigned _len) const;
~BloomFilter();
private:
unsigned length; //length of total bits, mod 8 == 0
char *filter; //the bit space
unsigned hfnum; //num of hash functions
double rate; //false positive
HashFunction* hfptr; //hash functions pointer array
void init();
};
#endif //_UTIL_BLOOMFILTER_H

View File

@ -1,200 +0,0 @@
/*=============================================================================
# Filename: Bstr.cpp
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-10-16 13:18
# Description: achieve functions in Bstr.h
=============================================================================*/
#include "Bstr.h"
using namespace std;
//default construct function
Bstr::Bstr()
{
this->length = 0;
this->str = NULL;
}
Bstr::Bstr(const char* _str, unsigned _len)
{
//WARN: if need a string .please add '\0' in your own!
this->length = _len;
//DEBUG:if copy memory?
//this->str = _str; //not valid:const char* -> char*
this->str = (char*)malloc(_len);
memcpy(this->str, _str, sizeof(char) * _len);
//this->str[_len]='\0';
}
//Bstr::Bstr(char* _str, unsigned _len)
//{
// this->length = _len;
// this->str = _str;
//}
//copy construct function
Bstr::Bstr(const Bstr& _bstr)
{
//DEBUG:if copy memory here
this->length = _bstr.length;
this->str = _bstr.str;
}
//assign function for class
//Bstr& Bstr::operate =(const Bstr& _bstr)
//{
// if(*this == _bstr)
// return *this; //a=a
// //WARN:not copy memory. if need to copy, delete original first!
// this->length = _bstr.length;
// this->str = _bstr.str;
// return *this;
//}
bool
Bstr::operator > (const Bstr& _bstr)
{
int res = Util::compare(this->str, this->length, _bstr.str, _bstr.length);
if(res == 1)
return true;
else
return false;
}
bool
Bstr::operator < (const Bstr& _bstr)
{
int res = Util::compare(this->str, this->length, _bstr.str, _bstr.length);
if(res == -1)
return true;
else
return false;
}
bool
Bstr::operator == (const Bstr& _bstr)
{
int res = Util::compare(this->str, this->length, _bstr.str, _bstr.length);
if(res == 0)
return true;
else
return false;
}
bool
Bstr::operator <= (const Bstr& _bstr)
{
int res = Util::compare(this->str, this->length, _bstr.str, _bstr.length);
if(res <= 0)
return true;
else
return false;
}
bool
Bstr::operator >= (const Bstr& _bstr)
{
int res = Util::compare(this->str, this->length, _bstr.str, _bstr.length);
if(res >= 0)
return true;
else
return false;
}
bool
Bstr::operator != (const Bstr& _bstr)
{
int res = Util::compare(this->str, this->length, _bstr.str, _bstr.length);
if(res != 0)
return true;
else
return false;
}
unsigned
Bstr::getLen() const
{
return length;
}
void
Bstr::setLen(unsigned _len)
{
this->length = _len;
}
char*
Bstr::getStr() const
{
return str;
}
void
Bstr::setStr(char* _str)
{
this->str = _str;
}
void
Bstr::copy(const Bstr* _bp)
{
this->length = _bp->getLen();
this->str = (char*)malloc(this->length);
memcpy(this->str, _bp->getStr(), this->length);
}
void
Bstr::copy(const char* _str, unsigned _len)
{
this->length = _len;
this->str = (char*)malloc(this->length);
memcpy(this->str, _str, this->length);
}
void
Bstr::clear()
{
this->str = NULL;
this->length = 0;
}
void
Bstr::release()
{
free(this->str); //ok to be null, do nothing
clear();
}
Bstr::~Bstr()
{ //avoid mutiple delete
release();
}
void
Bstr::print(string s) const
{
//TODO: add a new debug file in Util(maybe a total?)
//#ifdef DEBUG
// Util::showtime();
// fputs("Class Bstr\n", Util::logsfp);
// fputs("Message: ", Util::logsfp);
// fputs(s.c_str(), Util::logsfp);
// fputs("\n", Util::logsfp);
// if(s == "BSTR")
// { //total information, providing accurate debugging
// fprintf(Util::logsfp, "length: %u\t the string is:\n", this->length);
// unsigned i;
// for(i = 0; i < this->length; ++i)
// fputc(this->str[i], Util::logsfp);
// fputs("\n", Util::logsfp);
// }
// else if(s == "bstr")
// { //only length information, needed when string is very long
// fprintf(Util::logsfp, "length: %u\n", this->length);
// }
// else;
//#endif
}

View File

@ -1,53 +0,0 @@
/*=============================================================================
# Filename: Bstr.h
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-10-16 13:01
# Description:
1. firstly written by liyouhuan, modified by zengli
2. class declaration for Bstr(used to store arbitary string)
=============================================================================*/
#ifndef _UTIL_BSTR_H
#define _UTIL_BSTR_H
#include "Util.h"
class Bstr
{
private:
char* str; //pointers consume 8 byte in 64-bit system
unsigned length;
public:
Bstr();
//if copy memory, then use const char*, but slow
//else, can not use const char* -> char*
Bstr(const char* _str, unsigned _len);
//Bstr(char* _str, unsigned _len);
Bstr(const Bstr& _bstr);
//Bstr& operate = (const Bstr& _bstr);
bool operator > (const Bstr& _bstr);
bool operator < (const Bstr& _bstr);
bool operator == (const Bstr& _bstr);
bool operator <= (const Bstr& _bstr);
bool operator >= (const Bstr& _bstr);
bool operator != (const Bstr& _bstr);
unsigned getLen() const;
void setLen(unsigned _len);
char* getStr() const;
void setStr(char* _str); //reuse a TBstr
void release(); //release memory
void clear(); //set str/length to 0
void copy(const Bstr* _bp);
void copy(const char* _str, unsigned _len);
//bool read(FILE* _fp);
//int write(FILE* _fp);
~Bstr();
void print(std::string s) const; //DEBUG
};
#endif // _UTIL_BSTR_H

View File

@ -1,532 +0,0 @@
/*=============================================================================
# Filename: Stream.cpp
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-10-20 14:10
# Description: achieve functions in Stream.h
=============================================================================*/
#include "Stream.h"
using namespace std;
ResultCmp mycmp;
//DEBUG: error when using STL::sort() to sort the Bstr[] units with mycmp, null pointer(Bstr*)
//reported sometimes(for example, watdiv_30.db and watdiv_200.db, query/C3.sql).
//Notice that sort() uses quick-sorting method when size is large, which usually
//performs faster than merge-sorting used by STL::stable_sort() which can ensures the order between same
//value(only in the sorted column) units.
//The error is marked by DEBUG1 and DEBUG2, and I just use STL::stable_sort() here, because I cannot find
//the reason of the null pointer error if using STL::sort()
void
Stream::init()
{
this->inMem = true;
this->mode = -1;
this->ansMem = NULL;
this->ansDisk = NULL;
this->rownum = this->colnum = 0;
this->needSort = false;
this->xpos = this->ypos = 0; //the 0-th pos is not used now
this->record = NULL;
this->record_size = NULL;
this->space = 0;
this->tempfp = NULL;
}
Stream::Stream()
{
this->init();
}
Stream::Stream(std::vector<int>& _keys, std::vector<bool>& _desc, unsigned _rownum, unsigned _colnum, bool _flag)
{
this->init();
#ifdef DEBUG_STREAM
printf("Stream:now to open stream\n");
#endif
this->rownum = _rownum;
this->colnum = _colnum;
this->needSort = _flag;
//this->cmp = ResultCmp(this->rownum, _keys);
mycmp = ResultCmp(this->rownum, _keys, _desc);
this->record = new Bstr[this->colnum];
this->record_size = new unsigned[this->colnum];
for(unsigned i = 0; i < this->colnum; ++i)
{
this->record[i].setStr((char*)malloc(Util::TRANSFER_SIZE));
this->record_size[i] = Util::TRANSFER_SIZE;
}
this->mode = 0; //wait for writing records
int size = _rownum * _colnum * 100 / Util::GB;
if(Util::memoryLeft() < size)
{
this->inMem = false;
fprintf(stderr, "Stream: memory is not enough!\n");
}
else
{
fprintf(stderr, "Stream: memory is enough!\n");
}
#ifdef DEBUG_STREAM
fprintf(stderr, "Stream:after memory check!\n");
#endif
#ifdef DEBUG_STREAM
fprintf(stderr, "Stream::Stream() - basic information\n");
fprintf(stderr, "rownum: %u\tcolnum: %u\n", this->rownum, this->colnum);
if(this->needSort)
{
fprintf(stderr, "the result needs to be sorted, the keys are listed below:\n");
for(vector<int>::iterator it = _keys.begin(); it != _keys.end(); ++it)
fprintf(stderr, "%d\t", *it);
fprintf(stderr, "\n");
}
else
{
fprintf(stderr, "the result needs not to be sorted!\n");
}
//WARN: this is just for debugging!
//this->inMem = false;
#endif
if(this->inMem)
{
this->ansMem = new Bstr*[this->rownum];
for(unsigned i = 0; i < this->rownum; ++i)
{
this->ansMem[i] = new Bstr[this->colnum];
}
return;
}
//below are for disk
if(!this->needSort) // in disk and need sort
{
string file_name = Util::tmp_path + Util::int2string(Util::get_cur_time());
file_name += ".dat";
#ifdef DEBUG_STREAM
fprintf(stderr, "%s\n", file_name.c_str());
#endif
//FILE* fp = NULL;
if((this->ansDisk = fopen(file_name.c_str(), "w+b")) == NULL)
{
fprintf(stderr, "Stream::Stream(): open error!\n");
return;
}
this->result = file_name;
}
//return true;
}
bool operator < (const Element& _a, const Element& _b)
{
return mycmp(_a.val, _b.val);
}
bool operator > (const Element& _a, const Element& _b)
{
return !mycmp(_a.val, _b.val);
}
bool
Stream::copyToRecord(const char* _str, unsigned _len, unsigned _idx)
{
if(_idx >= this->colnum)
{
fprintf(stderr, "Stream::copyToRecord: index out of range!\n");
return false;
}
unsigned length = _len;
if(length + 1 > this->record_size[_idx])
{
this->record[_idx].release();
this->record[_idx].setStr((char*)malloc((length + 1) * sizeof(char)));
this->record_size[_idx] = length + 1; //one more byte: convenient to add \0
}
memcpy(this->record[_idx].getStr(), _str, length);
this->record[_idx].getStr()[length] = '\0'; //set for string() in KVstore
this->record[_idx].setLen(length);
return true;
}
void
Stream::outputCache()
{
//DEBUG1
//sort and output to file
stable_sort(this->tempst.begin(), this->tempst.end(), mycmp);
unsigned size = this->tempst.size();
for(unsigned i = 0; i < size; ++i)
{
Bstr* p = this->tempst[i];
for(unsigned j = 0; j < this->colnum; ++j)
{
unsigned len = p[j].getLen();
char* str = p[j].getStr();
fwrite(&len, sizeof(unsigned), 1, this->tempfp);
fwrite(str, sizeof(char), len, this->tempfp);
}
delete[] p;
}
this->tempst.clear();
//reset and add to heap, waiting for merge sort
fseek(this->tempfp, 0, SEEK_SET);
Bstr* bp = new Bstr[this->colnum];
for(unsigned i = 0; i < this->colnum; ++i)
{
unsigned len;
fread(&len, sizeof(unsigned), 1, this->tempfp);
char* p = (char*)malloc(len * sizeof(char));
fread(p, sizeof(char), len, this->tempfp);
bp[i].setLen(len);
bp[i].setStr(p);
}
this->sortHeap.push_back(Element(this->tempfp, bp));
this->tempfp = NULL;
this->space = 0;
}
bool
Stream::write(const char* _str, unsigned _len)
{
#ifdef DEBUG_PRECISE
fprintf(stderr, "Stream::write(): the current column is %u\n", this->ypos);
#endif
this->copyToRecord(_str, _len, this->ypos);
this->ypos++;
if(this->ypos == this->colnum)
{
this->ypos = 0;
#ifdef DEBUG_PRECISE
fprintf(stderr, "Stream::write(): now a record is ready, the current row is %u\n", this->xpos);
#endif
return this->write(this->record);
}
return true;
}
bool
Stream::write(const Bstr* _bp)
{
if(this->xpos >= this->rownum)
{
fprintf(stderr, "you should set the end now!\n");
return false;
}
if(this->inMem)
{
//Bstr** p = (Bstr**)this->ans;
for(unsigned i = 0; i < this->colnum; ++i)
{
//this->ansMem[this->xpos][i].release();
this->ansMem[this->xpos][i].copy(_bp + i);
}
this->xpos++;
return true;
}
//below are for disk
if(needSort) //NOTICE:in disk and need sort
{
if(this->tempfp == NULL)
{
string name = Util::tmp_path + "stream_" + Util::int2string(this->files.size());
//NOTICE:name derived from time maybe same
//name = Util::tmp_path + Util::int2string(Util::get_cur_time());
name += ".dat";
#ifdef DEBUG_STREAM
fprintf(stderr, "%s\n", name.c_str());
#endif
if((this->tempfp = fopen(name.c_str(), "w+b")) == NULL)
{
fprintf(stderr, "Stream::write(): open error!\n");
return false;
}
this->files.push_back(name);
}
Bstr* p = new Bstr[this->colnum];
for(unsigned i = 0; i < this->colnum; ++i)
{
//p[i].release();
p[i].copy(_bp + i);
this->space += _bp->getLen();
}
this->space += sizeof(unsigned) * this->colnum;
this->space += sizeof(char*) * this->colnum;
this->tempst.push_back(p);
this->xpos++;
if(this->space > Stream::BASE_MEMORY_LIMIT)
{
this->outputCache();
}
}
else
{
//FILE* fp = (FILE*)(this->ans);
for(unsigned i = 0; i < this->colnum; ++i)
{
unsigned len = _bp[i].getLen();
const char* str = _bp[i].getStr();
fwrite(&len, sizeof(unsigned), 1, this->ansDisk);
fwrite(str, sizeof(char), len, this->ansDisk);
}
this->xpos++;
}
return true;
}
const Bstr*
Stream::read()
{
if(this->isEnd())
{
fprintf(stderr, "read to end now!\n");
return NULL;
}
if(this->inMem)
{
//Bstr** bp = (Bstr**)(this->ans);
Bstr* ip = this->ansMem[this->xpos];
for(unsigned i = 0; i < this->colnum; ++i)
{
this->copyToRecord(ip[i].getStr(), ip[i].getLen(), i);
//this->record[i].release();
//unsigned len = ip[i].getLen();
//char* s = (char*)calloc(len + 1, sizeof(char));
//memcpy(s, ip[i].getStr(), len);
//this->record[i].setLen(len);
//this->record[i].setStr(s);
}
}
else
{
//below are for disk, both needSort and not
//FILE* fp = (FILE*)(this->ans);
for(unsigned i = 0; i < this->colnum; ++i)
{
//BETTER:alloca and reuse the space in Bstr?
unsigned len;
fread(&len, sizeof(unsigned), 1, this->ansDisk);
char* s = (char*)calloc(len + 1, sizeof(char));
fread(s, sizeof(char), len, this->ansDisk);
this->copyToRecord(s, len, i);
}
}
this->xpos++;
if(this->xpos == this->rownum)
this->mode = 2;
return this->record;
//if(feof((FILE*)this->fp))
//return NULL; //indicate the end
//unsigned len = 0;
//fread(&len, sizeof(unsigned), 1, (FILE*)this->fp);
//if(len + 1 > this->transfer_size)
//{
//transfer.release();
//transfer.setStr((char*)malloc(len+1));
//this->transfer_size = len + 1;
//}
//fread(transfer.getStr(), sizeof(char), len, (FILE*)this->fp);
//transfer.getStr()[len] = '\0'; //set for string() in KVstore
//transfer.setLen(len);
//return &transfer;
}
bool
Stream::isEnd()
{
return this->mode == 2;
}
//do multi-list merge sort using heap
void
Stream::mergeSort()
{
string file_name = Util::tmp_path + Util::int2string(Util::get_cur_time());
file_name += ".dat";
#ifdef DEBUG_STREAM
fprintf(stderr, "%s\n", file_name.c_str());
#endif
//FILE* fp = NULL;
if((this->ansDisk = fopen(file_name.c_str(), "w+b")) == NULL)
{
fprintf(stderr, "Stream::mergeSort: open error!\n");
return;
}
unsigned valid = this->sortHeap.size();
vector<Element>::iterator begin = this->sortHeap.begin();
make_heap(begin, begin + valid, greater<Element>());
while(valid > 0)
{
#ifdef DEBUG_STREAM
fprintf(stderr, "valid: %u\n", valid);
#endif
//write contents of the first element to result file
Bstr* bp = this->sortHeap[0].val;
for(unsigned i = 0; i < this->colnum; ++i)
{
unsigned len = bp[i].getLen();
char* s = bp[i].getStr();
#ifdef DEBUG_STREAM
fprintf(stderr, "top %u: %u\n", i, len);
for(unsigned j = 0; j < len; ++j)
fprintf(stderr, "%c", s[j]);
fprintf(stderr, "\n");
#endif
fwrite(&len, sizeof(unsigned), 1, this->ansDisk);
fwrite(s, sizeof(char), len, this->ansDisk);
bp[i].release();
}
#ifdef DEBUG_STREAM
fprintf(stderr, "\n");
#endif
//pop, read and adjust
pop_heap(begin, begin + valid, greater<Element>());
bp = this->sortHeap[valid-1].val;
bool tillEnd = false;
for(unsigned i = 0; i < this->colnum; ++i)
{
unsigned len;
char* s;
FILE* tp = this->sortHeap[valid-1].fp;
fread(&len, sizeof(unsigned), 1, tp);
if(feof(tp))
{
this->sortHeap[valid-1].release();
valid--;
tillEnd = true;
#ifdef DEBUG_STREAM
fprintf(stderr, "now a stream file reaches its end!\n");
#endif
break;
}
s = (char*)malloc(sizeof(char) * len);
fread(s, sizeof(char), len, tp);
bp[i].setLen(len);
bp[i].setStr(s);
}
if(!tillEnd)
push_heap(begin, begin + valid, greater<Element>());
}
//fseek(fp, 0, SEEK_SET);
//this->ans = fp;
this->result = file_name;
}
void
Stream::setEnd()
{
if(this->mode == 1)
{
fprintf(stderr, "Stream::setEnd(): already in read mode!\n");
this->xpos = 0;
//FILE* fp = (FILE*)(this->ans);
if(!this->inMem)
fseek(this->ansDisk, 0, SEEK_SET);
return;
}
this->mode = 1; //wait for reading records
this->xpos = 0;
#ifdef DEBUG_STREAM
fprintf(stderr, "Stream::setEnd(): now is in read mode!\n");
#endif
if(this->inMem)
{
//Bstr** p = (Bstr**)(this->ans);
if(this->needSort)
{
//DEBUG2
stable_sort(this->ansMem, this->ansMem + this->rownum, mycmp);
}
return;
}
//below are for disk
if(this->needSort)
{
if(this->tempfp != NULL)
{
this->outputCache();
}
if(this->files.size() > 1)
{
#ifdef DEBUG_STREAM
fprintf(stderr, "Stream::setEnd(): merge sort is needed here!\n");
#endif
//do multi-list merge sort using heap
this->mergeSort();
}
else if(this->files.size() > 0) //==1
{
this->sortHeap[0].release();
this->ansDisk = fopen(this->files[0].c_str(), "r+b");
this->result = this->files[0];
}
}
//FILE* fp = (FILE*)(this->ans);
fseek(this->ansDisk, 0, SEEK_SET);
}
Stream::~Stream()
{
delete[] this->record;
delete[] this->record_size;
#ifdef DEBUG_STREAM
fprintf(stderr, "Stream::~Stream(): record deleted!\n");
#endif
if(this->inMem)
{
//Bstr** bp = (Bstr**)(this->ans);
for(unsigned i = 0; i < this->rownum; ++i)
{
delete[] this->ansMem[i];
//bp[i] = NULL;
}
delete[] this->ansMem;
#ifdef DEBUG_STREAM
fprintf(stderr, "Stream::~Stream(): in memory, now table deleted!\n");
#endif
return;
}
//below are for disk, both needSort and not
//FILE* fp = (FILE*)(this->ans);
fclose(this->ansDisk);
//remove files and result
remove(this->result.c_str());
for(vector<string>::iterator it = this->files.begin(); it != this->files.end(); ++it)
remove((*it).c_str());
#ifdef DEBUG_STREAM
fprintf(stderr, "Stream::~Stream(): in disk, now all files removed!\n");
#endif
//#ifdef DEBUG_PRECISE
//printf("file is closed in Stream!\n");
//#endif
}

View File

@ -1,158 +0,0 @@
/*=============================================================================
# Filename: Stream.h
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-10-20 13:18
# Description:
1. stream buffer for medium results, store/write one record at a time
2. dynamicly change method(memory/disk) according to the memory usage of system
3. each Stream instance is asociated with one result-like object, and one file
4. functions using this class should implement writeToStream... to operate on different records
5. the records should be viewed as tables to deal with
=============================================================================*/
#ifndef _UTIL_STREAM_H
#define _UTIL_STREAM_H
#include "Util.h"
#include "Bstr.h"
//BETTER: use vector instead of table with fixed size, no need to indicate the rownum first(colnum required)
//It is really a question to use Bstr[] or string[] to store a record
//struct Stream::ResultEqual
//{
// int result_len;
// ResultEqual(int _l):result_len(_l){}
// bool operator() (Bstr* const& a, Bstr* const& b)
// {
// for (int i = 0; i < result_len; ++i)
// {
// if (a[i] != b[i])
// return false;
// }
// return true;
// }
//};
struct ResultCmp
{
int result_len;
std::vector<int> keys;
std::vector<bool> desc;
//ResultCmp(int _l):result_len(_l){}
ResultCmp()
{
this->result_len = 0;
}
ResultCmp(int _l, std::vector<int>& _keys, std::vector<bool> &_desc)
{
this->result_len = _l;
this->keys = std::vector<int>(_keys);
this->desc = std::vector<bool>(_desc);
}
bool operator() (Bstr* const& a, Bstr* const& b)
{
//for(int i = 0; i < result_len; ++i)
//{
//if (a[i] != b[i])
//return (a[i] < b[i]);
//}
unsigned size = this->keys.size();
for(unsigned i = 0; i < size; ++i)
{
int t = this->keys[i];
if(a[t] != b[t])
{
if (!this->desc[i])
return (a[t] < b[t]);
else
return (a[t] > b[t]);
}
}
return true;
}
};
//static ResultCmp mycmp;
typedef struct StreamElement
{
FILE* fp;
Bstr* val;
StreamElement(FILE* _fp, Bstr* _val)
{
this->fp = _fp;
this->val = _val;
}
void release()
{
delete[] this->val;
this->val = NULL;
fclose(this->fp);
this->fp = NULL;
}
}Element;
//static bool operator < (const Element& _a, const Element& _b);
//BETTER:use mmap part by part to get output
//NOTICE:new and delete the Stream when you use it to store a series of result
//duplicates should not be considered here, because sort based on int-int is faster
//(so easy to remove duplicates)
//However, for 'order by', the string comparision is a must, which should be done here!
//(maybe in memory , maybe internal-external)
class Stream
{
private:
//multi-way merge sort is used here to do the internal-external sort
std::vector<Element> sortHeap;
std::vector<std::string> files;
FILE* tempfp;
std::vector<Bstr*> tempst;
unsigned space; //space used in disk for one file
//struct ResultCmp cmp;
//void* ans; //FILE* if in disk, Bstr** if in memory
Bstr** ansMem;
FILE* ansDisk;
std::string result; //needed if stored in disk, to be removed later
unsigned rownum, colnum;
bool needSort;
//std::vector<int> keys;
int mode; //-1:invalid;0:only write;1:only read;2:read end
bool inMem;
//below are for record position
unsigned xpos, ypos;
Bstr* record; //one record for read, array of Bstrs
unsigned* record_size;
void init();
bool copyToRecord(const char* _str, unsigned _len, unsigned _idx);
void outputCache();
void mergeSort();
public:
//NOTICE:max num of opened files is 1024 in Linux by default, but this is enough for a result
//as large as 1T
static const unsigned BASE_MEMORY_LIMIT = 1 << 30;
Stream();
Stream(std::vector<int>& _keys, std::vector<bool>& _desc, unsigned _rownum, unsigned _colnum, bool _flag);
//read/write should be based on the unit of record
//NOTICE:this function copy/save the data, caller free the memory if needed
bool write(const Bstr* _bp);
bool write(const char* _str, unsigned _len);
//NOTICE:the memory should not be freed by user, and the latter will flush the former!
const Bstr* read();
void setEnd();
bool isEnd();
~Stream();
};
#endif //_UTIL_STREAM_H

File diff suppressed because it is too large Load Diff

View File

@ -1,228 +0,0 @@
/*=============================================================================
# Filename: Util.h
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-10-16 10:43
# Description:
1. firstly written by liyouhuan, modified by zengli
2. common macros, functions, classes, etc
# Notice: we only talk about sub-graph isomorphism in the essay, however, in
# this system, the homomorphism is supported.(which means that multiple variables
in the sparql query can point to the same node in data graph)
=============================================================================*/
#ifndef _UTIL_UTIL_H
#define _UTIL_UTIL_H
/* basic macros and types are defined here, including common headers */
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <dirent.h>
#include <string.h>
#include <unistd.h>
#include <ctype.h>
#include <time.h>
#include <fcntl.h>
#include <errno.h>
#include <regex.h>
#include <locale.h>
#include <assert.h>
#include <libgen.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <arpa/inet.h>
//NOTICE:below are restricted to C++, C files should not include(maybe nested) this header!
#include <bitset>
#include <string>
#include <fstream>
#include <iostream>
#include <sstream>
#include <map>
#include <set>
#include <stack>
#include <queue>
#include <vector>
#include <list>
#include <iterator>
#include <algorithm>
#include <functional>
#include <utility>
//NOTICE:below are libraries need to link
#include <math.h>
#include <readline/readline.h>
#include <readline/history.h>
#define STREAM_ON 1
#define READLINE_ON 1
#define MULTI_INDEX 1
//#define SO2P 1
//indicate that in debug mode
//#define DEBUG_STREAM
//#define DEBUG_PRECISE 1 all information
//#define DEBUG_KVSTORE 1 //in KVstore
//#define DEBUG_VSTREE 1 //in Database
//#define DEBUG_DATABASE 1 //in Database
#define DEBUG_JOIN
#ifdef DEBUG_PRECISE
#ifndef DEBUG
#define DEBUG
#endif
#endif
#ifdef DEBUG_KVSTORE
#ifndef DEBUG
#define DEBUG
#endif
#endif
#ifdef DEBUG_VSTREE
#ifndef DEBUG
#define DEBUG
#endif
#endif
#ifdef DEBUG_DATABASE
#ifndef DEBUG
#define DEBUG
#endif
#endif
#ifdef DEBUG_JOIN
#ifndef DEBUG
#define DEBUG
#endif
#endif
#ifndef DEBUG
//#define DEBUG
#endif
#define xfree(x) free(x); x = NULL;
//NOTICE:include Util.h and below in each main function
//(the beginning position)
//#ifdef DEBUG
// Util util;
//#endif
typedef unsigned(*HashFunction)(const char*);
//NOTICE:hash functions for int are not so many, so we represent int by a 4-byte stringinstead
//(not totally change int to string, which is costly)
//http://www.cppblog.com/aurain/archive/2010/07/06/119463.html
//http://blog.csdn.net/mycomputerxiaomei/article/details/7641221
//http://kb.cnblogs.com/page/189480/
/******** all static&universal constants and fucntions ********/
class Util
{
public:
static int triple_num;
static int pre_num;
static int entity_num;
static int literal_num;
static const unsigned MB = 1048576;
static const unsigned GB = 1073741824;
static const int TRIPLE_NUM_MAX = 1000*1000*1000;
static const char EDGE_IN = 'i';
static const char EDGE_OUT= 'o';
//In order to differentiate the sub-part and literal-part of object
//let subid begin with 0, while literalid begins with LITERAL_FIRST_ID
//used in Database and Join
static const int LITERAL_FIRST_ID = 1000*1000*1000;
//initial transfer buffer size in Tree/ and Stream/
static const unsigned TRANSFER_SIZE = 1 << 20; //1M
static std::string db_home;
static std::string tmp_path;
// this are for debugging
//to build logs-system, each class: print() in time
static std::string debug_path;
static FILE* debug_kvstore;
static FILE* debug_database;
static FILE* debug_vstree;
static int memUsedPercentage();
static int memoryLeft();
static int compare(const char* _str1, unsigned _len1, const char* _str2, unsigned _len2); //QUERY(how to use default args)
static int string2int(std::string s);
static std::string int2string(long n);
//string2str: s.c_str()
//str2string: string(str)
static std::string showtime();
static int cmp_int(const void* _i1, const void* _i2);
static void sort(int*& _id_list, int _list_len);
static int bsearch_int_uporder(int _key, const int* _array,int _array_num);
static bool bsearch_preid_uporder(int _preid, int* _pair_idlist, int _list_len);
static int bsearch_vec_uporder(int _key, const std::vector<int>* _vec);
static std::string result_id_str(std::vector<int*>& _v, int _var_num);
static bool dir_exist(const std::string _dir);
static bool create_dir(const std:: string _dir);
static long get_cur_time();
static bool save_to_file(const char*, const std::string _content);
static bool is_literal_ele(int);
static int removeDuplicate(int*, int);
static std::string getQueryFromFile(const char* _file_path);
static std::string getSystemOutput(std::string cmd);
static std::string getExactPath(const char* path);
static std::string getItemsFromDir(std::string path);
static void logging(std::string _str);
// Below are some useful hash functions for string
static unsigned simpleHash(const char *_str);
static unsigned APHash(const char *_str);
static unsigned BKDRHash(const char *_str);
static unsigned DJBHash(const char *_str);
static unsigned ELFHash(const char *_str);
static unsigned DEKHash(const char* _str);
static unsigned BPHash(const char* _str);
static unsigned FNVHash(const char* _str);
static unsigned HFLPHash(const char* _str);
static unsigned HFHash(const char* _str);
static unsigned JSHash(const char *_str);
static unsigned PJWHash(const char *_str);
static unsigned RSHash(const char *_str);
static unsigned SDBMHash(const char *_str);
static unsigned StrHash(const char* _str);
static unsigned TianlHash(const char* _str);
static const unsigned HashNum = 16;
static HashFunction hash[];
static double logarithm(double _a, double _b);
static void intersect(int*& _id_list, int& _id_list_len, const int* _list1, int _len1, const int* _list2, int _len2);
static char* l_trim(char * szOutput, const char *szInput);
static char* r_trim(char *szOutput, const char *szInput);
static char* a_trim(char * szOutput, const char * szInput);
//NOTICE: this function must be called at the beginning of executing!
Util();
~Util();
static std::string profile;
static bool configure(); //read init.conf and set the parameters for this system
static bool config_setting();
static bool config_advanced();
static bool config_debug();
static bool gStore_mode;
};
#endif //_UTIL_UTIL_H

View File

@ -6,8 +6,8 @@
*/
#include"EntryBuffer.h"
using namespace std;
#include"../Signature/SigEntry.h"
#include"../Database/Database.h"
int EntryBuffer::DEFAULT_CAPACITY = 2*1000*1000;

View File

@ -8,10 +8,8 @@
#ifndef ENTRYBUFFER_H_
#define ENTRYBUFFER_H_
#include "../Util/Util.h"
#include "../Signature/SigEntry.h"
//class SigEntry;
class SigEntry;
#include<stdio.h>
/* EntryBuffer is used to loading entries from hard disk when building VSTree. */
class EntryBuffer

View File

@ -5,10 +5,11 @@
* Author: hanshuo
*/
#include "LRUCache.h"
#include "VNode.h"
using namespace std;
#include"LRUCache.h"
#include"VNode.h"
#include"../Database/Database.h"
#include<stdio.h>
#include<algorithm>
int LRUCache::DEFAULT_CAPACITY = 1*1000*1000;
@ -23,12 +24,6 @@ LRUCache::LRUCache(int _capacity)
this->prev = new int[this->capacity + 2];
this->keys = new int[this->capacity + 2];
this->values = new VNode*[this->capacity + 2];
for(int i = 0; i < this->capacity + 2; ++i)
{
this->values[i] = NULL;
}
this->next[LRUCache::START_INDEX] = LRUCache::END_INDEX;
this->next[LRUCache::END_INDEX] = LRUCache::NULL_INDEX;
this->prev[LRUCache::START_INDEX] = LRUCache::NULL_INDEX;
@ -42,10 +37,6 @@ LRUCache::~LRUCache()
delete []this->next;
delete []this->prev;
delete []this->keys;
for(int i = 0; i < this->size; ++i)
{
delete this->values[i];
}
delete []this->values;
}
@ -95,7 +86,7 @@ bool LRUCache::loadCache(string _filePath)
{
stringstream _ss;
_ss << "error file line: " << _tmp_cycle_count << " " << nodePtr->getFileLine() << " " << nodePtr->getChildNum() << endl;
Util::logging(_ss.str());
Database::log(_ss.str());
}
}
@ -273,7 +264,7 @@ void LRUCache:: freeElem(int _pos)
}
/* set the memory of the _pos element in cache */
void LRUCache::setElem(int _pos, int _key, VNode* _value)
void LRUCache:: setElem(int _pos, int _key, VNode* _value)
{
this->key2pos[_key] = _pos;
this->keys[_pos] = _key;
@ -286,8 +277,8 @@ void LRUCache::setElem(int _pos, int _key, VNode* _value)
this->prev[nextPos] = _pos;
this->next[_pos] = LRUCache::END_INDEX;
this->prev[_pos] = prevPos;
//NOTICE: this cannot be placed in loadCache() because this may be called by other functions
this->size++;
this->size ++;
}
/* just write the values[_pos] to the hard disk, the VNode in memory will not be free. */
@ -315,7 +306,7 @@ bool LRUCache::writeOut(int _pos, int _fileLine)
int line = _fileLine == -1 ? nodePtr->getFileLine() : _fileLine;
size_t vNodeSize = sizeof(VNode);
int flag = 0;
long long seekPos = (long long)line * vNodeSize;
int seekPos = (long long)line * vNodeSize;
flag = fseek(filePtr, seekPos, SEEK_SET);
@ -364,7 +355,7 @@ bool LRUCache::readIn(int _pos, int _fileLine)
return false;
}
//bool is_node_read = (fread((char *)nodePtr,vNodeSize,1,filePtr) == 1);
bool is_node_read = (fread((char *)nodePtr,vNodeSize,1,filePtr) == 1);
fclose(filePtr);
if (nodePtr == NULL || nodePtr->getFileLine() != _fileLine)
@ -403,7 +394,7 @@ bool LRUCache::flush()
{
stringstream _ss;
_ss << "line error at !!!" << line << " " << nodePtr->getFileLine() << endl;
Util::logging(_ss.str());
Database::log(_ss.str());
}
}

View File

@ -1,16 +1,15 @@
/*=============================================================================
# Filename: LRUCache.h
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2016-04-11 14:05
# Description: written by hanshuo
=============================================================================*/
/*
* LRUCache.h
*
* Created on: 2014-6-30
* Author: hanshuo
*/
#ifndef LRUCACHE_H_
#define LRUCACHE_H_
#include "../Util/Util.h"
#include<map>
#include<string>
class VNode;
// before using the cache, you must loadCache or createCache.

View File

@ -1,20 +1,21 @@
/*=============================================================================
# Filename: VNode.cpp
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2016-04-11 14:08
# Description: by liyouhuan and hanshuo
=============================================================================*/
/*
* VNode.cpp
*
* Created on: 2014-6-20
* Author: liyouhuan
* Implement on: 2014-7-2
* Author: hanshuo
*/
#include "VNode.h"
using namespace std;
#include"VNode.h"
#include<iostream>
VNode::VNode()
{
this->is_leaf = false;
this->is_root = false;
this->child_num = 0;
// the following three lines are unnecessary.
this->self_file_line = -1;
this->father_file_line = -1;
for(int i = 0; i < VNode::MAX_CHILD_NUM; i ++)
@ -115,8 +116,8 @@ void VNode::setChildEntry(int _i, const SigEntry _entry)
// {
// if (this->getFileLine() == 0 && this->getChildFileLine(_i) == 153)
// {
// Util::logging("set node 0's child node 153's entry:");
// Util::logging(Signature::BitSet2str(this->child_entries[_i].getEntitySig().entityBitSet));
// Database::log("set node 0's child node 153's entry:");
// Database::log(Signature::BitSet2str(this->child_entries[_i].getEntitySig().entityBitSet));
// }
// }
}

View File

@ -1,25 +1,22 @@
/*=============================================================================
# Filename: VNode.h
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2016-04-11 14:05
# Description: written by liyouhuan
=============================================================================*/
/*
* VNode.h
*
* Created on: 2014-6-20
* Author: liyouhuan
*/
#ifndef _VSTREE_VNODE_H
#define _VSTREE_VNODE_H
#ifndef VNODE_H_
#define VNODE_H_
#include "../Util/Util.h"
#include <vector>
#include "../Signature/SigEntry.h"
#include "LRUCache.h"
#include"LRUCache.h"
#include<sstream>
class VNode
{
class VNode{
public:
static const int MAX_CHILD_NUM = 200;
//static const int MAX_CHILD_NUM = 151;
static const int MIN_CHILD_NUM = 100;
//static const int MIN_CHILD_NUM = 60;
static const int MAX_CHILD_NUM = 151;
static const int MIN_CHILD_NUM = 60;
//debug
// static const int MAX_CHILD_NUM = 50;
@ -69,10 +66,9 @@ private:
int self_file_line;
int father_file_line;
SigEntry entry;
//BETTER:is this necessary? too much memory?
SigEntry child_entries[VNode::MAX_CHILD_NUM];
int child_file_lines[VNode::MAX_CHILD_NUM];
};
#endif // _VSTREE_VNODE_H
#endif /* VNODE_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -1,57 +1,55 @@
/*=============================================================================
# Filename: VSTree.h
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-11-01 13:02
# Description: originally written by liyouhuan, modified by zengli
=============================================================================*/
/*
* VSTREE.h
*
* Created on: 2014-6-20
* Author: liyouhuan
*/
#ifndef _VSTREE_VSTREE_H
#define _VSTREE_VSTREE_H
#ifndef VSTREE_H_
#define VSTREE_H_
#include "../Util/Util.h"
#include "../Query/SPARQLquery.h"
#include "VNode.h"
#include "LRUCache.h"
#include "EntryBuffer.h"
#include<string>
#include<map>
#include"VNode.h"
#include"LRUCache.h"
#include"EntryBuffer.h"
#include"../Query/SPARQLquery.h"
class VSTree
{
class VSTree{
friend class VNode;
public:
VSTree(std::string _store_path);
~VSTree();
int getHeight()const;
//build the VSTree from the _entity_signature_file.
/* build the VSTree from the _entity_signature_file. */
bool buildTree(std::string _entity_signature_file);
bool deleteTree();
//Incrementally update bitset of _entity_id conduct OR operation on Entry(_entity_id)'s EntityBitSet with _bitset
//Entry of _entity_id must exists
/* Incrementally update bitset of _entity_id
* conduct OR operation on Entry(_entity_id)'s EntityBitSet with _bitset
* Entry of _entity_id must exists */
bool updateEntry(int _entity_id, const EntityBitSet& _bitset);
//Replace the Entry(_enitty_id)'s EntityBitSet with _bitset Entry of _entity_id must exists
/* Replace the Entry(_enitty_id)'s EntityBitSet with _bitset
* Entry of _entity_id must exists */
bool replaceEntry(int _entity_id, const EntityBitSet& _bitset);
//insert an new Entry, whose entity doesn't exist before
/* insert an new Entry, whose entity doesn't exist before */
bool insertEntry(const SigEntry& _entry);
//remove an existed Entry(_entity_id) from VSTree
/* remove an existed Entry(_entity_id) from VSTree */
bool removeEntry(int _entity_id);
//save the tree information to tree_info_file_path, and flush the tree nodes in memory to tree_node_file_path.
/* save the tree information to tree_info_file_path, and flush the tree nodes in memory to tree_node_file_path. */
bool saveTree();
//load tree from tree_info_file_path and tree_node_file_path files.
/* load tree from tree_info_file_path and tree_node_file_path files. */
bool loadTree();
//get the tree's root node pointer.
/* get the tree's root node pointer. */
VNode* getRoot();
//get the node pointer by its file line.
/* get the node pointer by its file line. */
VNode* getNode(int _line);
//retrieve candidate result set by the var_sig in the _query.
/* retrieve candidate result set by the var_sig in the _query. */
void retrieve(SPARQLquery& _query);
//retrieve the candidate entity ID which signature can cover the_entity_bit_set, and add them to the _p_id_list.
void retrieveEntity(const EntityBitSet& _entity_bit_set, IDList* _p_id_list);
private:
int root_file_line;
int node_num;
@ -64,29 +62,32 @@ private:
static std::string tree_node_file_path;
static std::string tree_info_file_path;
//choose the best leaf node to insert the _entry, return the choosed leaf node's pointer.
/* choose the best leaf node to insert the _entry, return the choosed leaf node's pointer. */
VNode* chooseNode(VNode* _p_node, const SigEntry& _entry);
//split the _p_full_node to two new node when it is full.
//the parameter _insert_entry and _p_insert_node are the entry/node
//need to be insert to the _p_full_node.
/* split the _p_full_node to two new node when it is full.
* the parameter _insert_entry and _p_insert_node are the entry/node
* need to be insert to the _p_full_node.
*/
void split(VNode* _p_full_node, const SigEntry& _insert_entry, VNode* _p_insert_node);
//create a new node when one node need splitting.
/* create a new node when one node need splitting. */
VNode* createNode();
//swap two nodes' file line, their related nodes(father and children nodes) will also be updated.
/* swap two nodes' file line, their related nodes(father and children nodes) will also be updated. */
void swapNodeFileLine(VNode* _p_node_a, VNode* _p_node_b);
//save VSTree's information to tree_info_file_path, such as node_num, entry_num, height, etc.
/* save VSTree's information to tree_info_file_path, such as node_num, entry_num, height, etc. */
bool saveTreeInfo();
//load VSTree's information from tree_info_file_path.
/* load VSTree's information from tree_info_file_path. */
bool loadTreeInfo();
//traverse the tree_node_file_path file, load the mapping from entity id to file line.
/* traverse the tree_node_file_path file, load the mapping from entity id to file line. */
bool loadEntityID2FileLineMap();
//update the entityID2FileLineMap with the _p_node's child entries, the _p_node should be leaf node.
/* update the entityID2FileLineMap with the _p_node's child entries, the _p_node should be leaf node. */
void updateEntityID2FileLineMap(VNode* _p_node);
//get the leaf node pointer by the given _entityID
/* get the leaf node pointer by the given _entityID */
VNode* getLeafNodeByEntityID(int _entityID);
/* retrieve the candidate entity ID which signature can cover the_entity_bit_set, and add them to the _p_id_list. */
void retrieveEntity(const EntityBitSet& _entity_bit_set, IDList* _p_id_list);
std::string to_str();
};
#endif // _VSTREE_VSTREE_H
#endif /* VSTREE_H_ */

5
api/.gitignore vendored
View File

@ -1,5 +0,0 @@
*.a
*.o
*.class
*.jar

View File

@ -1,14 +1,13 @@
/*=============================================================================
# Filename: CppAPIExample.cpp
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2016-02-21 21:32
# Description: originally written by hanshuo, modified by zengli
=============================================================================*/
/*
* CppAPIExample.cpp
*
* Created on: 2014-11-4
* Author: hanshuo
*/
#include "GstoreConnector.h"
#include <string>
#include <iostream>
#include"GstoreConnector.h"
#include<string>
#include<iostream>
// before run this example, you must start up the GStore server at first (use command ./gserver).
int main(int argc, char * argv[])
@ -18,30 +17,31 @@ int main(int argc, char * argv[])
// build a new database by a RDF file.
// note that the relative path is related to gserver.
gc.build("LUBM10.db", "data/LUBM_10.n3");
gc.build("db_LUBM10", "example/rdf_triple/LUBM_10_GStore.n3");
// then you can execute SPARQL query on this database.
std::string sparql = "select ?x where \
{ \
?x <rdf:type> <ub:UndergraduateStudent>. \
?y <ub:name> <Course1>. \
?x <ub:takesCourse> ?y. \
?z <ub:teacherOf> ?y. \
?z <ub:name> <FullProfessor1>. \
?z <ub:worksFor> ?w. \
?w <ub:name> <Department0>. \
?x rdf:type <ub:UndergraduateStudent>. \
?y ub:name <Course1>. \
?x ub:takesCourse ?y. \
?z ub:teacherOf ?y. \
?z ub:name <FullProfessor1>. \
?z ub:worksFor ?w. \
?w ub:name <Department0>. \
}";
std::string answer = gc.query(sparql);
std::cout << answer << std::endl;
// unload this database.
gc.unload("LUBM10.db");
gc.unload("db_LUBM10");
// also, you can load some exist database directly and then query.
gc.load("LUBM10.db");
gc.load("db_LUBM10");
answer = gc.query(sparql);
std::cout << answer << std::endl;
return 0;
}

View File

@ -1,3 +0,0 @@
*
!.gitignore

View File

@ -1,27 +1,19 @@
/*=============================================================================
# Filename: GstoreConnector.cpp
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2016-02-21 21:24
# Description: achieve functions in GstoreConnector.h
=============================================================================*/
/*
* GstoreConnector.cpp
*
* Created on: 2014-11-1
* Author: hanshuo
*/
#include "GstoreConnector.h"
#include <iostream>
#include"GstoreConnector.h"
#include<iostream>
using namespace std;
string GstoreConnector::defaultServerIP = "127.0.0.1";
std::string GstoreConnector::defaultServerIP = "127.0.0.1";
unsigned short GstoreConnector::defaultServerPort = 3305;
GstoreConnector::GstoreConnector()
{
this->serverIP = GstoreConnector::defaultServerIP;
}
GstoreConnector::GstoreConnector(string _ip)
{
this->serverIP = _ip;
this->serverPort = GstoreConnector::defaultServerPort;
}
@ -31,44 +23,40 @@ GstoreConnector::GstoreConnector(unsigned short _port)
this->serverPort = _port;
}
GstoreConnector::GstoreConnector(string _ip, unsigned short _port)
GstoreConnector::GstoreConnector(std::string _ip, unsigned short _port)
{
if(_ip == "localhost")
this->serverIP = "127.0.0.1";
else
this->serverIP = _ip;
this->serverIP = _ip;
this->serverPort = _port;
}
GstoreConnector::~GstoreConnector()
{
this->disconnect();
}
bool
GstoreConnector::load(string _db_name)
bool GstoreConnector::load(std::string _db_name)
{
bool connect_return = this->connect();
if (!connect_return)
{
cerr << "connect to server error. @GstoreConnector::load" << endl;
std::cerr << "connect to server error. @GstoreConnector::load" << std::endl;
return false;
}
string cmd = "load " + _db_name;
std::string cmd = "load " + _db_name;
bool send_return = this->socket.send(cmd);
if (!send_return)
{
cerr << "send load command error. @GstoreConnector.load" << endl;
std::cerr << "send load command error. @GstoreConnector.load" << std::endl;
return false;
}
string recv_msg;
std::string recv_msg;
this->socket.recv(recv_msg);
this->disconnect();
cout << recv_msg << endl; //debug
std::cout << recv_msg << std::endl; //debug
if (recv_msg == "load database done.")
{
return true;
@ -77,30 +65,29 @@ GstoreConnector::load(string _db_name)
return false;
}
bool
GstoreConnector::unload(string _db_name)
bool GstoreConnector::unload(std::string _db_name)
{
bool connect_return = this->connect();
if (!connect_return)
{
cerr << "connect to server error. @GstoreConnector::unload" << endl;
std::cerr << "connect to server error. @GstoreConnector::unload" << std::endl;
return false;
}
string cmd = "unload " + _db_name;
std::string cmd = "unload " + _db_name;
bool send_return = this->socket.send(cmd);
if (!send_return)
{
cerr << "send unload command error. @GstoreConnector::unload" << endl;
std::cerr << "send unload command error. @GstoreConnector::unload" << std::endl;
return false;
}
string recv_msg;
std::string recv_msg;
this->socket.recv(recv_msg);
this->disconnect();
cout << recv_msg << endl; //debug
std::cout << recv_msg << std::endl; //debug
if (recv_msg == "unload database done.")
{
return true;
@ -109,30 +96,29 @@ GstoreConnector::unload(string _db_name)
return false;
}
bool
GstoreConnector::build(string _db_name, string _rdf_file_path)
bool GstoreConnector::build(std::string _db_name, std::string _rdf_file_path)
{
bool connect_return = this->connect();
if (!connect_return)
{
cerr << "connect to server error. @GstoreConnector::build" << endl;
std::cerr << "connect to server error. @GstoreConnector::build" << std::endl;
return false;
}
string cmd = "import " + _db_name + " " + _rdf_file_path;
std::string cmd = "import " + _db_name + " " + _rdf_file_path;
bool send_return = this->socket.send(cmd);
if (!send_return)
{
cerr << "send import command error. @GstoreConnector::build" << endl;
std::cerr << "send import command error. @GstoreConnector::build" << std::endl;
return false;
}
string recv_msg;
std::string recv_msg;
this->socket.recv(recv_msg);
this->disconnect();
cerr << recv_msg << endl; //debug
std::cerr << recv_msg << std::endl; //debug
if (recv_msg == "import RDF file to database done.")
{
return true;
@ -141,56 +127,24 @@ GstoreConnector::build(string _db_name, string _rdf_file_path)
return false;
}
bool
GstoreConnector::drop(string _db_name)
std::string GstoreConnector::query(std::string _sparql)
{
bool connect_return = this->connect();
if (!connect_return)
{
cerr << "connect to server error. @GstoreConnector::unload" << endl;
return false;
}
string cmd = "drop " + _db_name;
bool send_return = this->socket.send(cmd);
if (!send_return)
{
cerr << "send unload command error. @GstoreConnector::unload" << endl;
return false;
}
string recv_msg;
this->socket.recv(recv_msg);
this->disconnect();
cout << recv_msg << endl; //debug
//if (recv_msg == "unload database done.")
//{
//return true;
//}
return true;
}
string
GstoreConnector::query(string _sparql)
{
bool connect_return = this->connect();
if (!connect_return)
{
cerr << "connect to server error. @GstoreConnector::query" << endl;
std::cerr << "connect to server error. @GstoreConnector::query" << std::endl;
return "connect to server error.";
}
string cmd = "query " + _sparql;
std::string cmd = "query " + _sparql;
bool send_return = this->socket.send(cmd);
if (!send_return)
{
cerr << "send query command error. @GstoreConnector::query";
std::cerr << "send query command error. @GstoreConnector::query";
return "send query command error.";
}
string recv_msg;
std::string recv_msg;
this->socket.recv(recv_msg);
this->disconnect();
@ -198,46 +152,12 @@ GstoreConnector::query(string _sparql)
return recv_msg;
}
string
GstoreConnector::show(bool _type)
{
bool connect_return = this->connect();
if (!connect_return)
{
cerr << "connect to server error. @GstoreConnector::show" << endl;
return "connect to server error.";
}
string cmd;
if(_type)
{
cmd = "show all";
}
else
{
cmd = "show databases";
}
bool send_return = this->socket.send(cmd);
if (!send_return)
{
cerr << "send show command error. @GstoreConnector::show";
return "send query command error.";
}
string recv_msg;
this->socket.recv(recv_msg);
this->disconnect();
return recv_msg;
}
bool
GstoreConnector::connect()
bool GstoreConnector::connect()
{
bool flag = this->socket.create();
if (!flag)
{
cerr << "cannot create socket. @GstoreConnector::connect" << endl;
std::cerr << "cannot create socket. @GstoreConnector::connect" << std::endl;
return false;
}
@ -245,18 +165,16 @@ GstoreConnector::connect()
if (!flag)
{
cerr << "cannot connect to server. @GstoreConnector::connect" << endl;
std::cerr << "cannot connect to server. @GstoreConnector::connect" << std::endl;
return false;
}
return true;
}
bool
GstoreConnector::disconnect()
bool GstoreConnector::disconnect()
{
bool flag = this->socket.close();
return flag;
}

View File

@ -1,32 +1,28 @@
/*=============================================================================
# Filename: GstoreConnector.h
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2016-02-21 21:22
# Description: originally written by hanshuo, modified by zengli
=============================================================================*/
/*
* GstoreConnector.h
*
* Created on: 2014-11-1
* Author: hanshuo
*/
#ifndef _GSTORECONNECTOR_H
#define _GSTORECONNECTOR_H
#ifndef GSTORECONNECTOR_H_
#define GSTORECONNECTOR_H_
#include "../../../Server/Socket.h"
#include <cstring>
#include"../../../Server/Socket.h"
#include<cstring>
class GstoreConnector
{
public:
GstoreConnector();
GstoreConnector(std::string _ip);
GstoreConnector(unsigned short _port);
GstoreConnector(std::string _ip, unsigned short _port);
GstoreConnector(unsigned short _port);
~GstoreConnector();
bool load(std::string _db_name);
bool unload(std::string _db_name);
bool build(std::string _db_name, std::string _rdf_file_path);
bool drop(std::string _db_name);
std::string query(std::string _sparql);
std::string show(bool _type=false); //show current or all databases
private:
static std::string defaultServerIP;
@ -42,5 +38,4 @@ private:
bool disconnect();
};
#endif // _GSTORECONNECTOR_H
#endif /* GSTORECONNECTOR_H_ */

View File

@ -1,9 +1,7 @@
lib_dir=../lib/
socket_obj_dir=../../../.objs/
socket_obj_dir=../../../objs/
all: $(lib_dir)libgstoreconnector.a
$(lib_dir)libgstoreconnector.a: GstoreConnector.o $(socket_obj_dir)Socket.o
libgstoreconnector.a: GstoreConnector.o $(socket_obj_dir)Socket.o
ar -crv $(lib_dir)libgstoreconnector.a GstoreConnector.o $(socket_obj_dir)Socket.o
GstoreConnector.o: GstoreConnector.cpp GstoreConnector.h $(socket_obj_dir)Socket.o

View File

@ -17,29 +17,28 @@ public class JavaAPIExample
// build a new database by a RDF file.
// note that the relative path is related to gserver.
gc.build("LUBM10.db", "data/LUBM_10.n3");
gc.build("db_LUBM10", "example/rdf_triple/LUBM_10_GStore.n3");
// then you can execute SPARQL query on this database.
String sparql = "select ?x where "
+ "{"
+ "?x <rdf:type> <ub:UndergraduateStudent>. "
+ "?y <ub:name> <Course1>. "
+ "?x <ub:takesCourse> ?y. "
+ "?z <ub:teacherOf> ?y. "
+ "?z <ub:name> <FullProfessor1>. "
+ "?z <ub:worksFor> ?w. "
+ "?w <ub:name> <Department0>. "
+ "?x rdf:type <ub:UndergraduateStudent>. "
+ "?y ub:name <Course1>. "
+ "?x ub:takesCourse ?y. "
+ "?z ub:teacherOf ?y. "
+ "?z ub:name <FullProfessor1>. "
+ "?z ub:worksFor ?w. "
+ "?w ub:name <Department0>. "
+ "}";
String answer = gc.query(sparql);
System.out.println(answer);
// unload this database.
gc.unload("LUBM10.db");
gc.unload("db_LUBM10");
// also, you can load some exist database directly and then query.
gc.load("LUBM10.db");
gc.load("db_LUBM10");
answer = gc.query(sparql);
System.out.println(answer);
}
}

View File

@ -1,11 +0,0 @@
JavaAPIExample.class:
javac -cp ../lib/GstoreJavaAPI.jar JavaAPIExample.java
.PHONY: clean run
run: JavaAPIExample.class
java -cp ../lib/GstoreJavaAPI.jar:. JavaAPIExample
clean:
rm -f JavaAPIExample.class

Some files were not shown because too many files have changed in this diff Show More