merge newest devGstore

This commit is contained in:
bookug 2016-05-16 03:16:22 +08:00
parent 782155629d
commit 02e54e2c17
106 changed files with 15185 additions and 4911 deletions

9
.gitignore vendored
View File

@ -81,4 +81,13 @@ cscope*
.cproject
.project
tags
.settings
# latex files
*.aux
*.toc
*.synctex.gz
*.log
*.out
*.bak~

File diff suppressed because it is too large Load Diff

View File

@ -129,10 +129,18 @@ private:
bool sub2id_pre2id(const string _rdf_file, int**& _p_id_tuples, int & _id_tuples_max);
bool literal2id_RDFintoSignature(const string _rdf_file, int** _p_id_tuples, int _id_tuples_max);
bool s2o_sp2o_s2po(int** _p_id_tuples, int _id_tuples_max);
bool o2s_op2s_o2ps(int** _p_id_tuples, int _id_tuples_max);
bool s2o_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max);
bool o2s_o2ps_op2s(int** _p_id_tuples, int _id_tuples_max);
//NOTICE: below is the new one
bool s2p_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max);
bool o2p_o2s_o2ps_op2s(int** _p_id_tuples, int _id_tuples_max);
bool p2s_p2o_p2so(int** _p_id_tuples, int _id_tuples_max);
bool so2p_s2o(int** _p_id_tuples, int _id_tuples_max);
static int _spo_cmp(const void* _a, const void* _b);
static int _ops_cmp(const void* _a, const void* _b);
static int _pso_cmp(const void* _a, const void* _b);
static int _sop_cmp(const void* _a, const void* _b);
bool objIDIsEntityID(int _id);
//* join on the vector of CandidateList, available after retrieve from the VSTREE

File diff suppressed because it is too large Load Diff

View File

@ -15,8 +15,8 @@
#include "../KVstore/KVstore.h"
#include "../Util/Util.h"
//BETTER?:use vector<int> with predefined size in inner
//needed function is same, and cache hits are high
//BETTER?:place multi_join and index_join in separated files
typedef vector<int> RecordType;
typedef vector<int>::iterator RecordIterator;
typedef list<RecordType> TableType;
@ -28,21 +28,98 @@ typedef list<RecordType>::reverse_iterator TableReverseIterator;
typedef vector< vector<int*> > IdLists;
typedef vector< vector<int> > IdListsLen;
typedef struct IndexItem
{
int value;
bool isValid; //needed for final travelling
//NOTICE: the size of vector is expected to be small
//the order in vector must be same as in IndexList vector
vector< list< list<struct IndexItem>::iterator > > travel;
vector< set<int> > check;
//map< int, list < list<struct IndexItem>::iterator > > links; //direct next index list id and linking
//map< int, set <int> > check; //indirect previous index list id and verifying
IndexItem()
{
this->value = -1;
this->isValid = false;
}
IndexItem(int _val)
{
this->value = _val;
this->isValid = true;
}
}IndexItem;
typedef struct IndexList
{
//int next;
//NOTICE:the list should be ordered at the beginning
list<IndexItem> candidates;
list<IndexItem>::iterator border; //used to divide valid and invalid area
bool prepared; //find and set all invalid eles restricted by subtree in travelling
int position; //current neighbor to travel
vector<int> travel_map; //the mapping between links position and IndexList id
vector<int> check_map; //the mapping between check position and IndexList id
IndexList()
{
//this->next = -1;
this->prepared = false;
this->position = 0;
}
bool end()
{
return this->position == (int)this->travel_map.size();
}
int next()
{
return this->travel_map[this->position++];
}
//NOTICE:we can not use binary-search in list, but this search method maybe slow
//BETTER?:adjust the list to binary-tree or other struture?
list<IndexItem>::iterator search(int _val)
{
for(list<IndexItem>::iterator it = this->candidates.begin(); it != this->border; ++it)
{
if(it->value == _val)
return it;
}
return this->border;
}
}IndexList;
typedef struct Satellite
{
int id;
int* idlist;
int idlist_len;
Satellite(int _id, int* _idlist, int _idlist_len)
{
this->id = _id;
this->idlist = _idlist;
this->idlist_len = _idlist_len;
}
}Satellite;
typedef list<IndexItem> ItemList;
typedef list<IndexItem>::iterator ItemListIterator;
typedef list< list<struct IndexItem>::iterator > IteratorList;
//Database new Join and pass something like kvstore
class Join
{
private:
int start_id;
int var_num;
//bool* dealed_triple;
BasicQuery* basic_query;
KVstore* kvstore;
//used by score_node for parameters
static const unsigned PARAM_DEGREE = 1;
static const unsigned PARAM_SIZE = 10000;
static const unsigned PARAM_SIZE = 100000;
static const unsigned PARAM_DENSE = 1;
static const double JUDGE_LIMIT = 0.5;
static const int LIMIT_CANDIDATE_LIST_SIZE = 1000;
//BETTER:inner vector?predefine size to avoid copy cost
//BETTER?:predefine size to avoid copy cost
TableType current_table;
TableIterator new_start; //keep to end() as default
//list<bool> table_row_new;
@ -54,9 +131,15 @@ private:
bool* dealed_triple;
stack<int> mystack;
vector<int*>* result_list;
vector<Satellite> satellites;
int* record;
int record_len;
void init(BasicQuery* _basic_query);
void clear();
void add_id_pos_mapping(int _id);
void reset_id_pos_mapping();
//judge which method should be used according to
//the size of candidates and structure of quering graph
@ -72,28 +155,69 @@ private:
//score the node according to degree and size
double score_node(unsigned _degree, unsigned _size);
void filter_before_join();
void literal_edge_filter(int _var_i);
bool filter_before_join();
bool constant_edge_filter(int _var_i);
void preid_filter(int _var_i);
void only_pre_filter_after_join();
bool only_pre_filter_after_join();
void add_literal_candidate();
bool pre_var_handler();
//bool filterBySatellites(int _var, int _ele);
bool allFilterBySatellites(int _var);
void generateAllSatellites();
void cartesian(int pos, int end);
//functions for help
//copy/add to the end of current_table and set true
void add_new_to_results(TableIterator it, int id);
//void set_results_old(list<bool>::iterator it);
int choose_next_node(bool* _dealed_triple, int id);
void acquire_all_id_lists(IdLists& _id_lists, IdListsLen& _id_lists_len, IDList& _can_list, vector<int>& _edges, bool* _dealed_triple, int _id, int _can_list_size);
int choose_next_node(int id);
bool is_literal_var(int id);
bool is_literal_ele(int _id);
void copyToResult();
//BETTER?:change these params to members in class
void acquire_all_id_lists(IdLists& _id_lists, IdListsLen& _id_lists_len, IDList& _can_list, vector<int>& _edges, int _id, int _can_list_size);
bool if_prepare_idlist(int _can_list_size, bool _is_literal);
bool new_join_with_multi_vars_prepared(IdLists& _id_lists, IdListsLen& _id_lists_len, vector<int>& _edges, IDList& _can_list, int _can_list_size);
bool new_join_with_multi_vars_not_prepared(vector<int>& _edges, IDList& _can_list, int _can_list_size, int _id, bool _is_literal);
void multi_join();
bool multi_join();
void index_join();
//================================================================================================
//The index join method saves the memory cost because 2m+2mn < 3mn,
//and time may be reduced if the pre-process is not too costly
//because we can reuse the links other than recompute in temporal table
//New struct is needed for node, i.e. list<bool, int, list<iterator> >,
//because we may have to delete, but how can we know if an iterator
//is valid if the one it points to is removed?(remove if the other is removed; using end())
//1. based on edges: process each time only in valid area(already
//macthed with others, invalid is removed), and finally it must be
//all ok, just copy to result_list. We should select the edge order
//to better the efficiency, but how can we keep only a neighbor links
//set if we want to save memory?(ensure all can be linked later)
//2. based on points: search deeply like multi-index-join, only a
//neighbor links set is kept for a node(not every edge), so memory
//cost is low. Finally, travel around along valid iterator, copy...
IndexList* index_lists;
void buildIndexLists();
bool travel_init(int _lid);
bool index_link(int _nid, int _idx);
bool index_filter(int _nid, int _idx);
bool table_travel(int _id1, int _id2);
bool table_check(int _id1, int _id2);
bool index_travel_one();
bool index_travel_two();
bool index_travel();
bool index_join();
//NOTICE:this is only used to join a BasicQuery
bool join();
@ -101,8 +225,9 @@ private:
public:
Join();
Join(KVstore* _kvstore);
//this function can be called by Database
//these functions can be called by Database
bool join_sparql(SPARQLquery& _sparql_query);
bool join_basic(BasicQuery* _basic_query);
~Join();
};

357
Database/Strategy.cpp Normal file
View File

@ -0,0 +1,357 @@
/*=============================================================================
# Filename: Strategy.cpp
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2016-05-07 16:31
# Description: implement functions in Strategy.h
=============================================================================*/
#include "Strategy.h"
using namespace std;
Strategy::Strategy()
{
this->method = 0;
this->kvstore = NULL;
this->vstree = NULL;
//this->prepare_handler();
}
Strategy::Strategy(KVstore* _kvstore, VSTree* _vstree)
{
this->method = 0;
this->kvstore = _kvstore;
this->vstree = _vstree;
//this->prepare_handler();
}
Strategy::~Strategy()
{
//delete[] this->dispatch;
}
//void
//Strategy::prepare_handler()
//{
//this->dispatch = new QueryHandler[Strategy::QUERY_HANDLER_NUM];
//this->dispatch[0] = Strategy::handler0;
//}
//NOTICE: 2-triple case ?s1 p1 c0 ?s2 p2 c0 is viewed as an unconnected graph
//however, this can be dealed due to several basicquery and linking
bool
Strategy::handle(SPARQLquery& _query)
{
#ifdef MULTI_INDEX
Util::logging("IN GeneralEvaluation::handle");
vector<BasicQuery*>& queryList = _query.getBasicQueryVec();
// enumerate each BasicQuery and retrieve their variables' mapping entity in the VSTree.
vector<BasicQuery*>::iterator iter=queryList.begin();
for(; iter != queryList.end(); iter++)
{
this->method = 0;
vector<int*>& result_list = (*iter)->getResultList();
int select_var_num = (*iter)->getSelectVarNum();
int varNum = (*iter)->getVarNum(); //the num of vars needing to be joined
int total_num = (*iter)->getTotalVarNum();
int pre_varNum = (*iter)->getPreVarNum();
if((*iter)->getTripleNum() == 1 && pre_varNum == 1)
{
Triple triple = (*iter)->getTriple(0);
int* id_list = NULL;
int id_list_len = 0;
result_list.clear();
if(total_num == 2)
{
//TODO:consider special case, select ?s (?p) ?o where { ?s ?p ?o . }
//filter and join is too costly, should enum all predicates and use p2so
//maybe the selected vars are ?s (?p) or ?o (?p)
cerr << "not supported now!" << endl;
}
else if(total_num == 1)
{
//TODO:if just select s/o, use o2s/s2o
//if only p is selected, use s2p or o2p
//only if both s/o and p are selected, use s2po or o2ps
if(triple.subject[0] != '?') //constant
{
int sid = (this->kvstore)->getIDByEntity(triple.subject);
this->kvstore->getpreIDobjIDlistBysubID(sid, id_list, id_list_len);
}
else if(triple.object[0] != '?') //constant
{
int oid = (this->kvstore)->getIDByEntity(triple.object);
if(oid == -1)
{
oid = (this->kvstore)->getIDByLiteral(triple.object);
}
this->kvstore->getpreIDsubIDlistByobjID(oid, id_list, id_list_len);
}
//always place s/o before p in result list
for(int i = 0; i < id_list_len; i += 2)
{
int* record = new int[2]; //2 vars selected
record[1] = id_list[i]; //for the pre var
record[0] = id_list[i+1]; //for the s/o var
result_list.push_back(record);
}
}
else if(total_num == 0) //only ?p
{
//just use so2p
int sid = (this->kvstore)->getIDByEntity(triple.subject);
int oid = (this->kvstore)->getIDByEntity(triple.object);
if(oid == -1)
{
oid = (this->kvstore)->getIDByLiteral(triple.object);
}
this->kvstore->getpreIDlistBysubIDobjID(sid, oid, id_list, id_list_len);
//copy to result list
for(int i = 0; i < id_list_len; ++i)
{
int* record = new int[1];
record[0] = id_list[i];
result_list.push_back(record);
}
}
delete[] id_list;
continue;
}
if(pre_varNum == 0 && (*iter)->getTripleNum() == 1) //only one triple and no predicates
{
//only one variable and one triple: ?s pre obj or sub pre ?o
if(total_num == 1)
{
this->method = 1;
}
//only two vars: ?s pre ?o
else if(total_num == 2)
{
if(varNum == 1) //the selected id should be 0
{
this->method = 2;
}
else //==2
{
this->method = 3;
}
}
//cerr << "this BasicQuery use query strategy 2" << endl;
//cerr<<"Final result size: "<<(*iter)->getResultList().size()<<endl;
//continue;
}
//QueryHandler dispatch;
//dispatch[0] = handler0;
switch(this->method)
{
case 0:
this->handler0(*iter, result_list);
break;
case 1:
this->handler1(*iter, result_list);
break;
case 2:
this->handler2(*iter, result_list);
break;
case 3:
this->handler3(*iter, result_list);
break;
default:
cerr << "not support this method" << endl;
}
cerr<<"Final result size: "<<(*iter)->getResultList().size()<<endl;
//BETTER: use function pointer array in C++ class
}
#else
cerr << "this BasicQuery use original query strategy" << endl;
long tv_handle = Util::get_cur_time();
(this->vstree)->retrieve(_query);
long tv_retrieve = Util::get_cur_time();
cout << "after Retrieve, used " << (tv_retrieve - tv_handle) << "ms." << endl;
this->join = new Join(kvstore);
this->join->join_sparql(_query);
delete this->join;
long tv_join = Util::get_cur_time();
cout << "after Join, used " << (tv_join - tv_retrieve) << "ms." << endl;
#endif
Util::logging("OUT Strategy::handle");
return true;
}
void
Strategy::handler0(BasicQuery* _bq, vector<int*>& _result_list)
{
long before_filter = Util::get_cur_time();
cerr << "this BasicQuery use query strategy 0" << endl;
//BETTER:not all vars in join filtered by vstree
//(A)-B-c: B should by vstree, then by c, but A should be generated in join(first set A as not)
//if A not in join, just filter B by pre
//divided into star graphs, join core vertices, generate satellites
//join should also start from a core vertex(neighbor can be constants or vars) if available
//
//QUERY: is there any case that a node should be retrieved by other index?(instead of vstree or generate whne join)
//
//we had better treat 1-triple case(no ?p) as special, and then in other cases, core vertex exist(if connected)
//However, if containing ?p and 1-triple, we should treat it also as a special case, or select a variable as core vertex
//and retrieved (for example, ?s ?p o or s ?p ?o, generally no core vertex in these cases)
long tv_handle = Util::get_cur_time();
int varNum = _bq->getVarNum(); //the num of vars needing to be joined
for(int i = 0; i < varNum; ++i)
{
if(_bq->if_need_retrieve(i) == false)
continue;
bool flag = _bq->isLiteralVariable(i);
const EntityBitSet& entityBitSet = _bq->getVarBitSet(i);
IDList* idListPtr = &( _bq->getCandidateList(i) );
this->vstree->retrieveEntity(entityBitSet, idListPtr);
//the basic query should end if one non-literal var has no candidates
if(idListPtr->size() == 0 && !flag)
{
break;
}
}
//TODO:end directly if one is empty!
long tv_retrieve = Util::get_cur_time();
cout << "after Retrieve, used " << (tv_retrieve - tv_handle) << "ms." << endl;
Join *join = new Join(kvstore);
join->join_basic(_bq);
delete join;
long tv_join = Util::get_cur_time();
cout << "after Join, used " << (tv_join - tv_retrieve) << "ms." << endl;
}
void
Strategy::handler1(BasicQuery* _bq, vector<int*>& _result_list)
{
long before_filter = Util::get_cur_time();
cerr << "this BasicQuery use query strategy 1" << endl;
//int neighbor_id = (*_bq->getEdgeNeighborID(0, 0); //constant, -1
char edge_type = _bq->getEdgeType(0, 0);
int triple_id = _bq->getEdgeID(0, 0);
Triple triple = _bq->getTriple(triple_id);
int pre_id = _bq->getEdgePreID(0, 0);
int* id_list = NULL;
int id_list_len = 0;
if(edge_type == Util::EDGE_OUT)
{
//cerr<<"edge out!!!"<<endl;
int nid = (this->kvstore)->getIDByEntity(triple.object);
if(nid == -1)
{
nid = (this->kvstore)->getIDByLiteral(triple.object);
}
this->kvstore->getsubIDlistByobjIDpreID(nid, pre_id, id_list, id_list_len);
}
else
{
//cerr<<"edge in!!!"<<endl;
this->kvstore->getobjIDlistBysubIDpreID(this->kvstore->getIDByEntity(triple.subject), pre_id, id_list, id_list_len);
}
long after_filter = Util::get_cur_time();
cerr << "after filter, used " << (after_filter - before_filter) << "ms" << endl;
_result_list.clear();
//cerr<<"now to copy result to list"<<endl;
for(int i = 0; i < id_list_len; ++i)
{
int* record = new int[1]; //only this var is selected
record[0] = id_list[i];
//cerr<<this->kvstore->getEntityByID(record[0])<<endl;
_result_list.push_back(record);
}
long after_copy = Util::get_cur_time();
cerr<<"after copy to result list: used "<<(after_copy-after_filter)<<" ms"<<endl;
delete[] id_list;
cerr<<"Final result size: "<<_result_list.size()<<endl;
}
void
Strategy::handler2(BasicQuery* _bq, vector<int*>& _result_list)
{
long before_filter = Util::get_cur_time();
cerr << "this BasicQuery use query strategy 2" << endl;
int triple_id = _bq->getEdgeID(0, 0);
Triple triple = _bq->getTriple(triple_id);
int pre_id = _bq->getEdgePreID(0, 0);
int var1_id = _bq->getIDByVarName(triple.subject);
int var2_id = _bq->getIDByVarName(triple.object);
int* id_list = NULL;
int id_list_len = 0;
if(var1_id == 0) //subject var selected
{
//use p2s directly
this->kvstore->getsubIDlistBypreID(pre_id, id_list, id_list_len);
}
else if(var2_id == 0) //object var selected
{
//use p2o directly
this->kvstore->getobjIDlistBypreID(pre_id, id_list, id_list_len);
}
else
{
cerr << "ERROR in Database::handle(): no selected var!"<<endl;
}
long after_filter = Util::get_cur_time();
cerr << "after filter, used " << (after_filter - before_filter) << "ms" << endl;
_result_list.clear();
for(int i = 0; i < id_list_len; ++i)
{
int* record = new int[1]; //only one var
record[0] = id_list[i];
_result_list.push_back(record);
}
long after_copy = Util::get_cur_time();
cerr<<"after copy to result list: used "<<(after_copy-after_filter)<<" ms"<<endl;
delete[] id_list;
cerr<<"Final result size: "<<_result_list.size()<<endl;
}
void
Strategy::handler3(BasicQuery* _bq, vector<int*>& _result_list)
{
long before_filter = Util::get_cur_time();
cerr << "this BasicQuery use query strategy 3" << endl;
int triple_id = _bq->getEdgeID(0, 0);
Triple triple = _bq->getTriple(triple_id);
int pre_id = _bq->getEdgePreID(0, 0);
int* id_list = NULL;
int id_list_len = 0;
this->kvstore->getsubIDobjIDlistBypreID(pre_id, id_list, id_list_len);
int var1_id = _bq->getIDByVarName(triple.subject);
int var2_id = _bq->getIDByVarName(triple.object);
long after_filter = Util::get_cur_time();
cerr << "after filter, used " << (after_filter - before_filter) << "ms" << endl;
_result_list.clear();
for(int i = 0; i < id_list_len; i += 2)
{
int* record = new int[2]; //2 vars and selected
record[var1_id] = id_list[i];
record[var2_id] = id_list[i+1];
_result_list.push_back(record);
}
long after_copy = Util::get_cur_time();
cerr<<"after copy to result list: used "<<(after_copy-after_filter)<<" ms"<<endl;
delete[] id_list;
cerr<<"Final result size: "<<_result_list.size()<<endl;
}

47
Database/Strategy.h Normal file
View File

@ -0,0 +1,47 @@
/*=============================================================================
# Filename: Strategy.h
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2016-05-07 16:28
# Description:
=============================================================================*/
#ifndef _DATABASE_STRATEGY_H
#define _DATABASE_STRATEGY_H
#include "../Util/Util.h"
#include "../Util/Triple.h"
#include "Join.h"
#include "../Query/IDList.h"
#include "../Query/SPARQLquery.h"
#include "../Query/BasicQuery.h"
#include "../KVstore/KVstore.h"
#include "../VSTree/VSTree.h"
class Strategy
{
public:
Strategy();
Strategy(KVstore*, VSTree*);
~Strategy();
//select efficient strategy to do the sparql query
bool handle(SPARQLquery&);
private:
int method;
KVstore* kvstore;
VSTree* vstree;
void handler0(BasicQuery*, vector<int*>&);
void handler1(BasicQuery*, vector<int*>&);
void handler2(BasicQuery*, vector<int*>&);
void handler3(BasicQuery*, vector<int*>&);
//QueryHandler *dispatch;
//void prepare_handler();
};
static const unsigned QUERY_HANDLER_NUM = 4;
typedef void (Strategy::*QueryHandler[QUERY_HANDLER_NUM]) (BasicQuery*, vector<int*>&);
//QueryHandler dispatch;
#endif //_DATABASE_STRATEGY_H

View File

@ -7,7 +7,6 @@
=============================================================================*/
#include "KVstore.h"
#include "../Database/Database.h"
using namespace std;
@ -39,7 +38,7 @@ KVstore::getEntityOutDegree(int _entity_id)
}
/* there are two situation when we need to update tuples list: s2o o2s sp2o op2s s2po o2ps
/* there are two situation when we need to update tuples list: s2o o2s sp2o op2s s2po o2ps s2p p2s o2p p2o so2p p2so
* 1. insert triple(finished in this function)
* 2. remove triple
* before call this function, we were sure that this triple did not exist
@ -47,26 +46,25 @@ KVstore::getEntityOutDegree(int _entity_id)
int
KVstore::updateTupleslist_insert(int _sub_id, int _pre_id, int _obj_id)
{
//debug
// {
// stringstream _ss;
// _ss << "updateTupleslist_insert: " << _sub_id << " " << _pre_id << " " << _obj_id << endl;
// Util::logging(_ss.str());
// }
#ifdef DEBUG_PRECISE
stringstream _ss;
_ss << "updateTupleslist_insert: " << _sub_id << " " << _pre_id << " " << _obj_id << endl;
Util::logging(_ss.str());
#endif
//debug
int updateListLen = 0;
/* update sp2o */
//update sp2o
{
int* _sp2olist = NULL;
int _sp2o_len = 0;
this->getobjIDlistBysubIDpreID(_sub_id, _pre_id, _sp2olist, _sp2o_len);
/* if no duplication, _insert will be true
* this->setXXX function will override the previous value */
//if no duplication, _insert will be true
//this->setXXX function will override the previous value
bool _insert = this->insert_x(_sp2olist, _sp2o_len, _obj_id);
if(_insert){
if(_insert)
{
this->setobjIDlistBysubIDpreID(_sub_id, _pre_id, _sp2olist, _sp2o_len);
}
@ -78,17 +76,16 @@ KVstore::updateTupleslist_insert(int _sub_id, int _pre_id, int _obj_id)
}
//debug
// Util::logging("update sp2o done.");
/* update op2s */
//update op2s
{
int* _op2slist = NULL;
int _op2s_len = 0;
this->getsubIDlistByobjIDpreID(_obj_id, _pre_id, _op2slist, _op2s_len);
/* if no duplication, _insert will be true
* this->setXXX function will override the previous value */
//if no duplication, _insert will be true
//this->setXXX function will override the previous value
bool _insert = this->insert_x(_op2slist, _op2s_len, _sub_id);
if(_insert){
this->setsubIDlistByobjIDpreID(_obj_id, _pre_id, _op2slist, _op2s_len);
@ -101,17 +98,16 @@ KVstore::updateTupleslist_insert(int _sub_id, int _pre_id, int _obj_id)
_op2s_len = 0;
}
//debug
// Util::logging("update op2s done.");
/* update s2po */
//update s2po
{
int* _s2polist = NULL;
int _s2po_len = 0;
this->getpreIDobjIDlistBysubID(_sub_id, _s2polist, _s2po_len);
/* if no duplication, _insert will be true
* this->setXXX function will override the previous value */
//if no duplication, _insert will be true
//this->setXXX function will override the previous value
bool _insert = this->insert_xy(_s2polist, _s2po_len, _pre_id, _obj_id);
if(_insert){
this->setpreIDobjIDlistBysubID(_sub_id, _s2polist, _s2po_len);
@ -124,19 +120,19 @@ KVstore::updateTupleslist_insert(int _sub_id, int _pre_id, int _obj_id)
_s2po_len = 0;
}
//debug
// Util::logging("update s2po done.");
/* update o2ps */
//update o2ps
{
int* _o2pslist = NULL;
int _o2ps_len = 0;
this->getpreIDsubIDlistByobjID(_obj_id, _o2pslist, _o2ps_len);
/* if no duplication, _insert will be true
* this->setXXX function will override the previous value */
//if no duplication, _insert will be true
//this->setXXX function will override the previous value
bool _insert = this->insert_xy(_o2pslist, _o2ps_len, _pre_id, _sub_id);
if(_insert){
if(_insert)
{
this->setpreIDsubIDlistByobjID(_obj_id, _o2pslist, _o2ps_len);
}
@ -147,18 +143,18 @@ KVstore::updateTupleslist_insert(int _sub_id, int _pre_id, int _obj_id)
_o2ps_len = 0;
}
//debug
// Util::logging("update o2ps done.");
/* update s2o */
//update s2o
{
int* _s2olist = NULL;
int _s2o_len = 0;
this->getobjIDlistBysubID(_sub_id, _s2olist, _s2o_len);
/* if no duplication, _insert will be true
* this->setXXX function will override the previous value */
//if no duplication, _insert will be true
//this->setXXX function will override the previous value
bool _insert = this->insert_x(_s2olist, _s2o_len, _obj_id);
if(_insert){
if(_insert)
{
this->setobjIDlistBysubID(_sub_id, _s2olist, _s2o_len);
}
@ -169,17 +165,16 @@ KVstore::updateTupleslist_insert(int _sub_id, int _pre_id, int _obj_id)
_s2o_len = 0;
}
//debug
// Util::logging("update s2o done.");
/* update o2s */
//update o2s
{
int* _o2slist = NULL;
int _o2s_len = 0;
this->getsubIDlistByobjID(_obj_id, _o2slist, _o2s_len);
/* if no duplication, _insert will be true
* this->setXXX function will override the previous value */
//if no duplication, _insert will be true
//this->setXXX function will override the previous value
bool _insert = this->insert_x(_o2slist, _o2s_len, _sub_id);
if(_insert){
this->setsubIDlistByobjID(_obj_id, _o2slist, _o2s_len);
@ -192,19 +187,112 @@ KVstore::updateTupleslist_insert(int _sub_id, int _pre_id, int _obj_id)
_o2s_len = 0;
}
//debug
//update s2p
{
int* _s2plist = NULL;
int _s2p_len = 0;
this->getpreIDlistBysubID(_sub_id, _s2plist, _s2p_len);
bool _insert = this->insert_x(_s2plist, _s2p_len, _pre_id);
if(_insert)
{
this->setpreIDlistBysubID(_sub_id, _s2plist, _s2p_len);
}
updateListLen += _s2p_len;
delete[] _s2plist;
_s2plist = NULL;
_s2p_len = 0;
}
//update p2s
{
int* _p2slist = NULL;
int _p2s_len = 0;
this->getsubIDlistBypreID(_pre_id, _p2slist, _p2s_len);
bool _insert = this->insert_x(_p2slist, _p2s_len, _sub_id);
if(_insert)
{
this->setsubIDlistBypreID(_pre_id, _p2slist, _p2s_len);
}
updateListLen += _p2s_len;
delete[] _p2slist;
_p2slist = NULL;
_p2s_len = 0;
}
//update o2p
{
int* _o2plist = NULL;
int _o2p_len = 0;
this->getpreIDlistByobjID(_obj_id, _o2plist, _o2p_len);
bool _insert = this->insert_x(_o2plist, _o2p_len, _pre_id);
if(_insert)
{
this->setpreIDlistByobjID(_obj_id, _o2plist, _o2p_len);
}
updateListLen += _o2p_len;
delete[] _o2plist;
_o2plist = NULL;
_o2p_len = 0;
}
//update p2o
{
int* _p2olist = NULL;
int _p2o_len = 0;
this->getobjIDlistBypreID(_pre_id, _p2olist, _p2o_len);
bool _insert = this->insert_x(_p2olist, _p2o_len, _obj_id);
if(_insert)
{
this->setobjIDlistBypreID(_pre_id, _p2olist, _p2o_len);
}
updateListLen += _p2o_len;
delete[] _p2olist;
_p2olist = NULL;
_p2o_len = 0;
}
//update so2p
{
int* _so2plist = NULL;
int _so2p_len = 0;
this->getpreIDlistBysubIDobjID(_sub_id, _obj_id, _so2plist, _so2p_len);
bool _insert = this->insert_x(_so2plist, _so2p_len, _pre_id);
if(_insert)
{
this->setpreIDlistBysubIDobjID(_sub_id, _obj_id, _so2plist, _so2p_len);
}
updateListLen += _so2p_len;
delete[] _so2plist;
_so2plist = NULL;
_so2p_len = 0;
}
//update p2so
{
int* _p2solist = NULL;
int _p2so_len = 0;
this->getsubIDobjIDlistBypreID(_pre_id, _p2solist, _p2so_len);
bool _insert = this->insert_xy(_p2solist, _p2so_len, _sub_id, _obj_id);
if(_insert)
{
this->setsubIDobjIDlistBypreID(_pre_id, _p2solist, _p2so_len);
}
updateListLen += _p2so_len;
delete[] _p2solist;
_p2solist = NULL;
_p2so_len = 0;
}
return updateListLen;
//debug
// Util::logging("update o2s done.");
}
/* insert <_x_id, _y_id> into _xylist(keep _xylist(<x,y>) in ascending order) */
//insert <_x_id, _y_id> into _xylist(keep _xylist(<x,y>) in ascending order)
bool
KVstore::insert_xy(int*& _xylist, int& _list_len,int _x_id, int _y_id)
KVstore::insert_xy(int*& _xylist, int& _list_len, int _x_id, int _y_id)
{
/* check duplication */
//check duplication
for(int i = 0; i < _list_len; i += 2)
{
if(_xylist[i] == _x_id &&
@ -283,12 +371,10 @@ KVstore::insert_xy(int*& _xylist, int& _list_len,int _x_id, int _y_id)
return true;
}
/* insert _x_id into _xlist(keep _xlist in ascending order) */
//insert _x_id into _xlist(keep _xlist in ascending order)
bool
KVstore::insert_x(int*& _xlist, int& _list_len, int _x_id)
{
/* check duplication */
for(int i = 0; i < _list_len; i ++)
{
@ -353,15 +439,15 @@ KVstore::insert_x(int*& _xlist, int& _list_len, int _x_id)
}
return true;
}
/* there are two situation when we need to update tuples list: s2o o2s sp2o op2s s2po o2ps
/* there are two situation when we need to update tuples list: s2o o2s sp2o op2s s2po o2ps s2p p2s o2p p2o so2p p2so
* 1. insert triple
* 2. remove triple(finished in this function)
* before call this function, we were sure that this triple did not exist
* before call this function, we were sure that this triple did exist
*/
void
KVstore::updateTupleslist_remove(int _sub_id, int _pre_id, int _obj_id)
{
/* update sp2o */
//update sp2o
{
int* _sp2olist = NULL;
int _sp2o_len = 0;
@ -372,7 +458,7 @@ KVstore::updateTupleslist_remove(int _sub_id, int _pre_id, int _obj_id)
int* _sp = new int[2];
_sp[0] = _sub_id;
_sp[1] = _pre_id;
(this->removeKey(this->subID2preIDobjIDlist, (char*)_sp, sizeof(int)*2 ));
this->removeKey(this->subIDpreID2objIDlist, (char*)_sp, sizeof(int)*2 );
delete[] _sp;
}
else
@ -384,7 +470,7 @@ KVstore::updateTupleslist_remove(int _sub_id, int _pre_id, int _obj_id)
delete[] _sp2olist;
}
/* update op2s */
//update op2s
{
int* _op2slist = NULL;
int _op2s_len = 0;
@ -407,7 +493,7 @@ KVstore::updateTupleslist_remove(int _sub_id, int _pre_id, int _obj_id)
delete[] _op2slist;
}
/* update s2po */
//update s2po
{
int* _s2polist = NULL;
int _s2po_len = 0;
@ -426,7 +512,7 @@ KVstore::updateTupleslist_remove(int _sub_id, int _pre_id, int _obj_id)
delete[] _s2polist;
}
/* update o2ps */
//update o2ps
{
int* _o2pslist = NULL;
int _o2ps_len = 0;
@ -445,7 +531,7 @@ KVstore::updateTupleslist_remove(int _sub_id, int _pre_id, int _obj_id)
delete[] _o2pslist;
}
/* update s2o */
//update s2o
{
int* _s2olist = NULL;
int _s2o_len = 0;
@ -464,7 +550,7 @@ KVstore::updateTupleslist_remove(int _sub_id, int _pre_id, int _obj_id)
delete[] _s2olist;
}
/* update o2s */
//update o2s
{
int* _o2slist = NULL;
int _o2s_len = 0;
@ -483,6 +569,112 @@ KVstore::updateTupleslist_remove(int _sub_id, int _pre_id, int _obj_id)
delete[] _o2slist;
}
//update s2p
{
int* _s2plist = NULL;
int _s2p_len = 0;
this->getpreIDlistBysubID(_sub_id, _s2plist, _s2p_len);
bool _remove = this->remove_x(_s2plist, _s2p_len, _pre_id);
if(_s2p_len == 0)
{
this->removeKey(this->subID2preIDlist, (char*)&_sub_id, sizeof(int));
}
else if(_remove)
{
this->setpreIDlistBysubID(_sub_id, _s2plist, _s2p_len);
}
delete[] _s2plist;
}
//update p2s
{
int* _p2slist = NULL;
int _p2s_len = 0;
this->getsubIDlistBypreID(_pre_id, _p2slist, _p2s_len);
bool _remove = this->remove_x(_p2slist, _p2s_len, _sub_id);
if(_p2s_len == 0)
{
this->removeKey(this->preID2subIDlist, (char*)&_pre_id, sizeof(int));
}
else if(_remove)
{
this->setsubIDlistBypreID(_pre_id, _p2slist, _p2s_len);
}
delete[] _p2slist;
}
//update o2p
{
int* _o2plist = NULL;
int _o2p_len = 0;
this->getpreIDlistByobjID(_obj_id, _o2plist, _o2p_len);
bool _remove = this->remove_x(_o2plist, _o2p_len, _pre_id);
if(_o2p_len == 0)
{
this->removeKey(this->objID2preIDlist, (char*)&_obj_id, sizeof(int));
}
else if(_remove)
{
this->setpreIDlistByobjID(_obj_id, _o2plist, _o2p_len);
}
delete[] _o2plist;
}
//update p2o
{
int* _p2olist = NULL;
int _p2o_len = 0;
this->getobjIDlistBypreID(_pre_id, _p2olist, _p2o_len);
bool _remove = this->remove_x(_p2olist, _p2o_len, _obj_id);
if(_p2o_len == 0)
{
this->removeKey(this->preID2objIDlist, (char*)&_pre_id, sizeof(int));
}
else if(_remove)
{
this->setobjIDlistBypreID(_pre_id, _p2olist, _p2o_len);
}
delete[] _p2olist;
}
//update so2p
{
int* _so2plist = NULL;
int _so2p_len = 0;
this->getpreIDlistBysubIDobjID(_sub_id, _obj_id, _so2plist, _so2p_len);
bool _remove = this->remove_x(_so2plist, _so2p_len, _pre_id);
if(_so2p_len == 0)
{
int* _so = new int[2];
_so[0] = _sub_id;
_so[1] = _obj_id;
this->removeKey(this->subIDobjID2preIDlist, (char*)_so, sizeof(int)*2 );
delete[] _so;
}
else if(_remove)
{
this->setpreIDlistBysubIDobjID(_sub_id, _obj_id, _so2plist, _so2p_len);
}
delete[] _so2plist;
}
//update p2so
{
int* _p2solist = NULL;
int _p2so_len = 0;
this->getsubIDobjIDlistBypreID(_pre_id, _p2solist, _p2so_len);
bool _remove = this->remove_xy(_p2solist, _p2so_len, _sub_id, _obj_id);
if(_p2so_len == 0)
{
this->removeKey(this->preID2subIDobjIDlist, (char*)&_pre_id, sizeof(int));
}
else if(_remove)
{
this->setsubIDobjIDlistBypreID(_pre_id, _p2solist, _p2so_len);
}
delete[] _p2solist;
}
}
bool
@ -492,7 +684,7 @@ KVstore::remove_x(int*& _xlist, int& _list_len, int _x_id)
{
if(_xlist[i] == _x_id)
{
/* move the latter ones backward */
//move the latter ones backward
for(int j = i+1; j < _list_len; j ++)
{
_xlist[j-1] = _xlist[j];
@ -513,7 +705,7 @@ KVstore::remove_xy(int*& _xylist, int& _list_len,int _x_id, int _y_id)
bool _remove = (_xylist[i] == _x_id) && (_xylist[i+1] == _y_id);
if(_remove)
{
/* move the latter pairs backward */
//move the latter pairs backward
for(int j = i+2; j < _list_len; j += 2)
{
_xylist[j-2] = _xylist[j];
@ -726,7 +918,7 @@ bool KVstore::setLiteralByID(const int _id, string _literal){
/* for subID2objIDlist
* _mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE
* ***/
bool KVstore::open_subid2objidlist(const int _mode){
bool KVstore::open_subID2objIDlist(const int _mode){
return this->open(this->subID2objIDlist, KVstore::s_sID2oIDlist, _mode);
}
bool KVstore::getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len){
@ -758,9 +950,11 @@ bool KVstore::setobjIDlistBysubID(int _subid, const int* _objidlist, int _list_l
/* for objID2subIDlist
* _mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE
* ***/
bool KVstore::open_objid2subidlist(const int _mode){
bool KVstore::open_objID2subIDlist(const int _mode)
{
return this->open(this->objID2subIDlist, KVstore::s_oID2sIDlist, _mode);
}
bool KVstore::getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len){
char* _tmp = NULL;
int _len = 0;
@ -782,7 +976,9 @@ bool KVstore::getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len){
return true;
}
bool KVstore::setsubIDlistByobjID(int _objid, const int* _subidlist, int _list_len){
bool KVstore::setsubIDlistByobjID(int _objid, const int* _subidlist, int _list_len)
{
return this->setValueByKey
(this->objID2subIDlist, (char*)&_objid, sizeof(int),(char*)_subidlist, _list_len * sizeof(int));
}
@ -790,7 +986,8 @@ bool KVstore::setsubIDlistByobjID(int _objid, const int* _subidlist, int _list_l
/* for subID&preID2objIDlist
* _mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE
* ***/
bool KVstore::open_subIDpreID2objIDlist(const int _mode){
bool KVstore::open_subIDpreID2objIDlist(const int _mode)
{
return this->open(this->subIDpreID2objIDlist, KVstore::s_sIDpID2oIDlist, _mode);
}
@ -821,7 +1018,8 @@ bool KVstore::getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist,
return true;
}
bool KVstore::setobjIDlistBysubIDpreID(int _subid, int _preid, const int* _objidlist, int _list_len){
bool KVstore::setobjIDlistBysubIDpreID(int _subid, int _preid, const int* _objidlist, int _list_len)
{
int* _sp = new int[2];
_sp[0] = _subid;
_sp[1] = _preid;
@ -834,15 +1032,16 @@ bool KVstore::setobjIDlistBysubIDpreID(int _subid, int _preid, const int* _objid
return _set;
}
/* for objID&preID2subIDlist
* _mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE
* ***/
bool KVstore::open_objIDpreID2subIDlist(const int _mode){
bool KVstore::open_objIDpreID2subIDlist(const int _mode)
{
return this->open(this->objIDpreID2subIDlist, KVstore::s_oIDpID2sIDlist, _mode);
}
bool KVstore::getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, int& _list_len){
bool KVstore::getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, int& _list_len)
{
char* _tmp = NULL;
int _len = 0;
int* _sp = new int[2];
@ -870,7 +1069,8 @@ bool KVstore::getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist,
return true;
}
bool KVstore::setsubIDlistByobjIDpreID(int _objid, int _preid, const int* _subidlist, int _list_len){
bool KVstore::setsubIDlistByobjIDpreID(int _objid, int _preid, const int* _subidlist, int _list_len)
{
int* _sp=new int[2];
_sp[0] = _objid;
_sp[1] = _preid;
@ -882,11 +1082,13 @@ bool KVstore::setsubIDlistByobjIDpreID(int _objid, int _preid, const int* _subid
return _set;
}
/* for subID 2 preID&objIDlist */
bool KVstore::open_subID2preIDobjIDlist(const int _mode)
{
return this->open(this->subID2preIDobjIDlist, KVstore::s_sID2pIDoIDlist, _mode);
}
bool KVstore::getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _list_len)
{
char* _tmp = NULL;
@ -909,6 +1111,8 @@ bool KVstore::getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int&
return true;
}
//NOTICE: the list is ordered by each two (pre, sub), and the whole is viewed as a string
bool KVstore::setpreIDobjIDlistBysubID(int _subid, const int* _preid_objidlist, int _list_len)
{
return this->setValueByKey
@ -920,6 +1124,7 @@ bool KVstore::open_objID2preIDsubIDlist(const int _mode)
{
return this->open(this->objID2preIDsubIDlist, KVstore::s_oID2pIDsIDlist, _mode);
}
bool KVstore::getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _list_len)
{
char* _tmp = NULL;
@ -942,19 +1147,240 @@ bool KVstore::getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int&
return true;
}
bool KVstore::setpreIDsubIDlistByobjID(int _objid, const int* _preid_subidlist, int _list_len)
{
return this->setValueByKey
(this->objID2preIDsubIDlist, (char*)&_objid, sizeof(int),(char*)_preid_subidlist, _list_len * sizeof(int));
}
/* set the store_path as the root dir of this KVstore
* initial all Tree pointer as NULL
* */
//for subID 2 preIDlist
bool KVstore::open_subID2preIDlist(const int _mode)
{
return this->open(this->subID2preIDlist, KVstore::s_sID2pIDlist, _mode);
}
bool KVstore::getpreIDlistBysubID(int _subid, int*& _preidlist, int& _list_len)
{
char* _tmp = NULL;
int _len = 0;
bool _get = this->getValueByKey(this->subID2preIDlist, (char*)&_subid, sizeof(int), _tmp, _len);
{
if(!_get)
{
_preidlist = NULL;
_list_len = 0;
return false;
}
}
{
_list_len = _len / sizeof(int);
_preidlist = new int[_list_len];
memcpy((char*)_preidlist, _tmp, sizeof(int)*_list_len);
}
//delete[] _tmp;
return true;
}
bool KVstore::setpreIDlistBysubID(int _subid, const int* _preidlist, int _list_len)
{
return this->setValueByKey
(this->subID2preIDlist, (char*)&_subid, sizeof(int),(char*)_preidlist, _list_len * sizeof(int));
}
//for preID 2 subIDlist
bool KVstore::open_preID2subIDlist(const int _mode)
{
return this->open(this->preID2subIDlist, KVstore::s_pID2sIDlist, _mode);
}
bool KVstore::getsubIDlistBypreID(int _preid, int*& _subidlist, int& _list_len)
{
char* _tmp = NULL;
int _len = 0;
bool _get = this->getValueByKey(this->preID2subIDlist, (char*)&_preid, sizeof(int), _tmp, _len);
{
if(!_get)
{
_subidlist = NULL;
_list_len = 0;
return false;
}
}
{
_list_len = _len / sizeof(int);
_subidlist = new int[_list_len];
memcpy((char*)_subidlist, _tmp, sizeof(int)*_list_len);
}
//delete[] _tmp;
return true;
}
bool KVstore::setsubIDlistBypreID(int _preid, const int* _subidlist, int _list_len)
{
return this->setValueByKey
(this->preID2subIDlist, (char*)&_preid, sizeof(int),(char*)_subidlist, _list_len * sizeof(int));
}
//for objID 2 preIDlist
bool KVstore::open_objID2preIDlist(const int _mode)
{
return this->open(this->objID2preIDlist, KVstore::s_oID2pIDlist, _mode);
}
bool KVstore::getpreIDlistByobjID(int _objid, int*& _preidlist, int& _list_len)
{
char* _tmp = NULL;
int _len = 0;
bool _get = this->getValueByKey(this->objID2preIDlist, (char*)&_objid, sizeof(int), _tmp, _len);
{
if(!_get)
{
_preidlist = NULL;
_list_len = 0;
return false;
}
}
{
_list_len = _len / sizeof(int);
_preidlist = new int[_list_len];
memcpy((char*)_preidlist, _tmp, sizeof(int)*_list_len);
}
//delete[] _tmp;
return true;
}
bool KVstore::setpreIDlistByobjID(int _objid, const int* _preidlist, int _list_len)
{
return this->setValueByKey
(this->objID2preIDlist, (char*)&_objid, sizeof(int),(char*)_preidlist, _list_len * sizeof(int));
}
//for preID 2 objIDlist
bool KVstore::open_preID2objIDlist(const int _mode)
{
return this->open(this->preID2objIDlist, KVstore::s_pID2oIDlist, _mode);
}
bool KVstore::getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len)
{
char* _tmp = NULL;
int _len = 0;
bool _get = this->getValueByKey(this->preID2objIDlist, (char*)&_preid, sizeof(int), _tmp, _len);
{
if(!_get)
{
_objidlist = NULL;
_list_len = 0;
return false;
}
}
{
_list_len = _len / sizeof(int);
_objidlist = new int[_list_len];
memcpy((char*)_objidlist, _tmp, sizeof(int)*_list_len);
}
//delete[] _tmp;
return true;
}
bool KVstore::setobjIDlistBypreID(int _preid, const int* _objidlist, int _list_len)
{
return this->setValueByKey
(this->preID2objIDlist, (char*)&_preid, sizeof(int), (char*)_objidlist, _list_len * sizeof(int));
}
//for subID&objID2preIDlist _mode is either KVstore::CREATE_MODE or KVstore::READ_WRITE_MODE
bool KVstore::open_subIDobjID2preIDlist(const int _mode)
{
return this->open(this->subIDobjID2preIDlist, KVstore::s_sIDoID2pIDlist, _mode);
}
bool KVstore::getpreIDlistBysubIDobjID(int _subid, int _objid, int*& _preidlist, int& _list_len)
{
char* _tmp = NULL;
int _len = 0;
int* _sp = new int[2];
_sp[0] = _subid;
_sp[1] = _objid;
bool _get = this->getValueByKey(this->subIDobjID2preIDlist, (char*)_sp, sizeof(int)*2, _tmp, _len);
delete[] _sp;
{
if(!_get)
{
_preidlist = NULL;
_list_len = 0;
return false;
}
}
{
_list_len = _len / sizeof(int);
_preidlist = new int[_list_len];
memcpy((char*)_preidlist, _tmp, sizeof(int)*_list_len);
}
//delete[] _tmp;
return true;
}
bool KVstore::setpreIDlistBysubIDobjID(int _subid, int _objid, const int* _preidlist, int _list_len)
{
int* _sp = new int[2];
_sp[0] = _subid;
_sp[1] = _objid;
bool _set = this->setValueByKey
(this->subIDobjID2preIDlist, (char*)_sp, sizeof(int)*2,(char*)_preidlist, _list_len * sizeof(int));
delete[] _sp;
return _set;
}
//preID2subID&objIDlist
bool KVstore::open_preID2subIDobjIDlist(const int _mode)
{
return this->open(this->preID2subIDobjIDlist, KVstore::s_pID2sIDoIDlist, _mode);
}
bool KVstore::getsubIDobjIDlistBypreID(int _preid, int*& _subid_objidlist, int& _list_len)
{
char* _tmp = NULL;
int _len = 0;
bool _get = this->getValueByKey(this->preID2subIDobjIDlist, (char*)&_preid, sizeof(int), _tmp, _len);
{
if(!_get)
{
_subid_objidlist = NULL;
_list_len = 0;
return false;
}
}
{
_list_len = _len / sizeof(int);
_subid_objidlist = new int[_list_len];
memcpy((char*)_subid_objidlist, _tmp, sizeof(int)*_list_len);
}
//delete[] _tmp;
return true;
}
bool KVstore::setsubIDobjIDlistBypreID(int _preid, const int* _subid_objidlist, int _list_len)
{
return this->setValueByKey
(this->preID2subIDobjIDlist, (char*)&_preid, sizeof(int),(char*)_subid_objidlist, _list_len * sizeof(int));
}
//set the store_path as the root dir of this KVstore
//initial all Tree pointer as NULL
KVstore::KVstore(const string _store_path){
this->store_path = _store_path;
this->entity2id = NULL;
this->id2entity = NULL;
@ -972,12 +1398,21 @@ KVstore::KVstore(const string _store_path){
this->subID2preIDobjIDlist = NULL;
this->objID2preIDsubIDlist = NULL;
this->subID2preIDlist = NULL;
this->preID2subIDlist = NULL;
this->objID2preIDlist = NULL;
this->preID2objIDlist = NULL;
this->subIDobjID2preIDlist = NULL;
this->preID2subIDobjIDlist = NULL;
}
/* release all the memory used in this KVstore
* before destruction
* */
KVstore::~KVstore(){
//release all the memory used in this KVstore
//before destruction
KVstore::~KVstore()
{
//this->release();
this->flush();
@ -998,6 +1433,13 @@ KVstore::~KVstore(){
delete this->subID2preIDobjIDlist;
delete this->objID2preIDsubIDlist;
delete this->subID2preIDlist;
delete this->preID2subIDlist;
delete this->objID2preIDlist;
delete this->preID2objIDlist;
delete this->subIDobjID2preIDlist;
delete this->preID2subIDobjIDlist;
}
/*
@ -1024,7 +1466,15 @@ void KVstore::flush(){
this->flush(this->subID2preIDobjIDlist);
this->flush(this->objID2preIDsubIDlist);
this->flush(this->subID2preIDlist);
this->flush(this->preID2subIDlist);
this->flush(this->objID2preIDlist);
this->flush(this->preID2objIDlist);
this->flush(this->subIDobjID2preIDlist);
this->flush(preID2subIDobjIDlist);
}
/* Release all the memory used in this KVstore,
* following an flush() for each Tree pointer
* any Tree pointer that is null or
@ -1053,6 +1503,7 @@ void KVstore::flush(){
void KVstore::open()
{
cout << "open KVstore" << endl;
this->open(this->entity2id, KVstore::s_entity2id, KVstore::READ_WRITE_MODE);
this->open(this->id2entity, KVstore::s_id2entity, KVstore::READ_WRITE_MODE);
@ -1071,11 +1522,17 @@ void KVstore::open()
this->open(this->subID2preIDobjIDlist, KVstore::s_sID2pIDoIDlist, KVstore::READ_WRITE_MODE);
this->open(this->objID2preIDsubIDlist, KVstore::s_oID2pIDsIDlist, KVstore::READ_WRITE_MODE);
this->open(this->subID2preIDlist, KVstore::s_sID2pIDlist, KVstore::READ_WRITE_MODE);
this->open(this->preID2subIDlist, KVstore::s_pID2sIDlist, KVstore::READ_WRITE_MODE);
this->open(this->objID2preIDlist, KVstore::s_oID2pIDlist, KVstore::READ_WRITE_MODE);
this->open(this->preID2objIDlist, KVstore::s_pID2oIDlist, KVstore::READ_WRITE_MODE);
this->open(this->subIDobjID2preIDlist, KVstore::s_sIDoID2pIDlist, KVstore::READ_WRITE_MODE);
this->open(this->preID2subIDobjIDlist, KVstore::s_pID2sIDoIDlist, KVstore::READ_WRITE_MODE);
}
/*
* private methods:
*/
void KVstore::flush(Tree* _p_btree){
void KVstore::flush(Tree* _p_btree)
{
if(_p_btree != NULL)
{
_p_btree->save();
@ -1096,8 +1553,7 @@ bool KVstore::open(Tree* & _p_btree, const string _tree_name, const int _mode){
_p_btree = new Tree(this->store_path, _tree_name, "build");
return true;
}
else
if(_mode == KVstore::READ_WRITE_MODE)
else if(_mode == KVstore::READ_WRITE_MODE)
{
_p_btree = new Tree(this->store_path, _tree_name, "open");
return true;
@ -1110,13 +1566,13 @@ bool KVstore::open(Tree* & _p_btree, const string _tree_name, const int _mode){
return false;
}
//DEBUG:not achieve multiple-type functions, may have to organize in Bstr, or add functions in btree
bool KVstore::setValueByKey(Tree* _p_btree, const char* _key, int _klen, const char* _val, int _vlen){
bool KVstore::setValueByKey(Tree* _p_btree, const char* _key, int _klen, const char* _val, int _vlen)
{
return _p_btree->insert(_key, _klen, _val, _vlen);
}
bool KVstore::getValueByKey(Tree* _p_btree, const char* _key, int _klen, char*& _val, int& _vlen){
bool KVstore::getValueByKey(Tree* _p_btree, const char* _key, int _klen, char*& _val, int& _vlen)
{
return _p_btree->search(_key, _klen, _val, _vlen);
}
@ -1159,3 +1615,12 @@ string KVstore::s_oIDpID2sIDlist="s_oIDpID2sIDlist";
string KVstore::s_sID2pIDoIDlist="s_sID2pIDoIDlist";
string KVstore::s_oID2pIDsIDlist="s_oID2pIDsIDlist";
string KVstore::s_sID2pIDlist="s_sID2pIDlist";
string KVstore::s_pID2sIDlist="s_pID2sIDlist";
string KVstore::s_oID2pIDlist="s_oID2pIDlist";
string KVstore::s_pID2oIDlist="s_pID2oIDlist";
string KVstore::s_sIDoID2pIDlist="s_sIDoID2pIDlist";
string KVstore::s_pID2sIDoIDlist="s_pID2sIDoIDlist";

View File

@ -12,6 +12,9 @@
#include "../Util/Util.h"
#include "tree/Tree.h"
//TODO:add debug instruction, control if using the so2p index and p2so index
//these are really costly
class KVstore
{
public:
@ -44,72 +47,97 @@ private:
bool remove_xy(int*& _xylist, int& _list_len,int _x_id, int _y_id);
public:
/* for entity2id */
//for entity2id
bool open_entity2id(const int _mode);
int getIDByEntity(const std::string _entity);
bool setIDByEntity(const std::string _entity, int _id);
/* for id2entity */
//for id2entity
bool open_id2entity(const int _mode);
std::string getEntityByID(int _id);
bool setEntityByID(int _id, std::string _entity);
/* for predicate2id */
//for predicate2id
bool open_predicate2id(const int _mode);
int getIDByPredicate(const std::string _predicate);
bool setIDByPredicate(const std::string _predicate, int _id);
/* for id2predicate */
//for id2predicate
bool open_id2predicate(const int _mode);
std::string getPredicateByID(int _id);
bool setPredicateByID(const int _id, std::string _predicate);
/* for id2literal */
//for id2literal
bool open_id2literal(const int _mode);
std::string getLiteralByID(int _id);
bool setLiteralByID(const int _id, std::string _literal);
/* for literal2id */
//for literal2id
bool open_literal2id(const int _mode);
int getIDByLiteral(std::string _literal);
bool setIDByLiteral(const std::string _literal, int _id);
/* for subID 2 objIDlist */
bool open_subid2objidlist(const int _mode);
//for subID 2 objIDlist
bool open_subID2objIDlist(const int _mode);
bool getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len);
bool setobjIDlistBysubID(int _subid, const int* _objidlist, int _list_len);
/* for objID 2 subIDlist */
bool open_objid2subidlist(const int _mode);
//for objID 2 subIDlist
bool open_objID2subIDlist(const int _mode);
bool getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len);
bool setsubIDlistByobjID(int _objid, const int* _subidlist, int _list_len);
/* for subID&preID 2 objIDlist */
//for subID&preID 2 objIDlist
bool open_subIDpreID2objIDlist(const int _mode);
bool getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist, int& _list_len);
bool setobjIDlistBysubIDpreID(int _subid, int _preid, const int* _objidlist, int _list_len);
/* for objID&preID 2 subIDlist */
//for objID&preID 2 subIDlist
bool open_objIDpreID2subIDlist(const int _mode);
bool getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, int& _list_len);
bool setsubIDlistByobjIDpreID(int _objid, int _preid, const int* _subidlist, int _list_len);
/* for subID 2 preID&objIDlist */
//for subID 2 preID&objIDlist
bool open_subID2preIDobjIDlist(const int _mode);
bool getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _list_len);
bool setpreIDobjIDlistBysubID(int _subid, const int* _preid_objidlist, int _list_len);
/* for objID 2 preID&subIDlist */
//for objID 2 preID&subIDlist
bool open_objID2preIDsubIDlist(const int _mode);
bool getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _list_len);
bool setpreIDsubIDlistByobjID(int _objid, const int* _preid_subidlist, int _list_len);
/*
* _store_path denotes where to store the data
*/
//for subID 2 preIDlist
bool open_subID2preIDlist(const int _mode);
bool getpreIDlistBysubID(int _subid, int*& _preidlist, int& _list_len);
bool setpreIDlistBysubID(int _subid, const int* _preidlist, int _list_len);
//for preID 2 subIDlist
bool open_preID2subIDlist(const int _mode);
bool getsubIDlistBypreID(int _preid, int*& _subidlist, int& _list_len);
bool setsubIDlistBypreID(int _preid, const int* _subidlist, int _list_len);
//for objID 2 preIDlist
bool open_objID2preIDlist(const int _mode);
bool getpreIDlistByobjID(int _objid, int*& _preidlist, int& _list_len);
bool setpreIDlistByobjID(int _objid, const int* _preidlist, int _list_len);
//for preID 2 objIDlist
bool open_preID2objIDlist(const int _mode);
bool getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len);
bool setobjIDlistBypreID(int _preid, const int* _objidlist, int _list_len);
//for subID&objID 2 preIDlist
bool open_subIDobjID2preIDlist(const int _mode);
bool getpreIDlistBysubIDobjID(int _subID, int _objID, int*& _preidlist, int& _list_len);
bool setpreIDlistBysubIDobjID(int _subID, int _objID, const int* _preidlist, int _list_len);
//for preID 2 subID&objIDlist
bool open_preID2subIDobjIDlist(const int _mode);
bool getsubIDobjIDlistBypreID(int _preid, int*& _subid_objidlist, int& _list_len);
bool setsubIDobjIDlistBypreID(int _preid, const int* _subid_objidlist, int _list_len);
KVstore(std::string _store_path = ".");
~KVstore();
void flush();
@ -119,11 +147,8 @@ public:
private:
std::string store_path;
/*
*
* map entity to its id, and id to the entity
* s_entity2id is relative store file name
*/
//map entity to its id, and id to the entity
//s_entity2id is relative store file name
Tree* entity2id;
Tree* id2entity;
static std::string s_entity2id;
@ -145,7 +170,7 @@ private:
static std::string s_sID2oIDlist;
static std::string s_oID2sIDlist;
/* lack exist in update tuple */
//lack exist in update tuple
Tree* subIDpreID2objIDlist;
Tree* objIDpreID2subIDlist;
static std::string s_sIDpID2oIDlist;
@ -156,6 +181,20 @@ private:
static std::string s_sID2pIDoIDlist;
static std::string s_oID2pIDsIDlist;
Tree* subID2preIDlist;
Tree* preID2subIDlist;
static std::string s_sID2pIDlist;
static std::string s_pID2sIDlist;
Tree* objID2preIDlist;
Tree* preID2objIDlist;
static std::string s_oID2pIDlist;
static std::string s_pID2oIDlist;
Tree* subIDobjID2preIDlist;
Tree* preID2subIDobjIDlist;
static std::string s_sIDoID2pIDlist;
static std::string s_pID2sIDoIDlist;
void flush(Tree* _p_btree);
bool setValueByKey(Tree* _p_btree, const char* _key, int _klen, const char* _val, int _vlen);

View File

@ -271,3 +271,59 @@ Node::subKey(int _index, bool ifdel)
return true;
}
int
Node::searchKey_less(const Bstr& _bstr) const
{
int num = this->getNum();
//for(i = 0; i < num; ++i)
//if(bstr < *(p->getKey(i)))
//break;
int low = 0, high = num - 1, mid = -1;
while(low <= high)
{
mid = (low + high) / 2;
if(this->keys[mid] > _bstr)
{
if(low == mid)
break;
high = mid;
}
else
{
low = mid + 1;
}
}
return low;
}
int
Node::searchKey_equal(const Bstr& _bstr) const
{
int num = this->getNum();
//for(i = 0; i < num; ++i)
// if(bstr == *(p->getKey(i)))
// {
int ret = this->searchKey_less(_bstr);
if(ret > 0 && this->keys[ret-1] == _bstr)
return ret - 1;
else
return num;
}
int
Node::searchKey_lessEqual(const Bstr& _bstr) const
{
int num = this->getNum();
//for(i = 0; i < num; ++i)
//if(bstr <= *(p->getKey(i)))
//break;
int ret = this->searchKey_less(_bstr);
if(ret > 0 && this->keys[ret-1] == _bstr)
return ret - 1;
else
return ret;
}

View File

@ -67,6 +67,12 @@ public:
bool setKey(const Bstr* _key, int _index, bool ifcopy = false);
bool addKey(const Bstr* _key, int _index, bool ifcopy = false);
bool subKey(int _index, bool ifdel = false);
//several binary key search utilities
int searchKey_less(const Bstr& _bstr) const;
int searchKey_equal(const Bstr& _bstr) const;
int searchKey_lessEqual(const Bstr& _bstr) const;
//virtual functions: polymorphic
virtual Node* getChild(int _index) const { return NULL; };
virtual bool setChild(Node* _child, int _index) { return true; };

View File

@ -19,7 +19,9 @@ class Storage
{
public:
static const unsigned BLOCK_SIZE = 1 << 16; //fixed size of disk-block
static const unsigned long long MAX_BUFFER_SIZE = 0x1ffffffff; //max buffer size
//there are 18 B+Tree indexes and one vstree index, so set 3G buffer size
static const unsigned long long MAX_BUFFER_SIZE = 0xC0000000; //max buffer size
//static const unsigned long long MAX_BUFFER_SIZE = 0x1ffffffff; //max buffer size
//static const unsigned SET_BUFFER_SIZE = 1 << 30; //set buffer size
static const unsigned HEAP_SIZE = MAX_BUFFER_SIZE/Node::INTL_SIZE;
static const unsigned MAX_BLOCK_NUM = 1 << 24; //max block-num

View File

@ -195,10 +195,13 @@ Tree::insert(const Bstr* _key, const Bstr* _value)
Bstr bstr = *_key;
while(!p->isLeaf())
{
j = p->getNum();
for(i = 0; i < j; ++i)
if(bstr < *(p->getKey(i)))
break;
//j = p->getNum();
//for(i = 0; i < j; ++i)
//if(bstr < *(p->getKey(i)))
//break;
//NOTICE: using binary search is better here
i = p->searchKey_less(bstr);
q = p->getChild(i);
this->prepare(q);
if(q->getNum() == Node::MAX_KEY_NUM)
@ -226,10 +229,12 @@ Tree::insert(const Bstr* _key, const Bstr* _value)
p = q;
}
}
j = p->getNum();
for(i = 0; i < j; ++i)
if(bstr < *(p->getKey(i)))
break;
//j = p->getNum();
//for(i = 0; i < j; ++i)
//if(bstr < *(p->getKey(i)))
//break;
i = p->searchKey_less(bstr);
//insert existing key is ok, but not inserted in
//however, the tree-shape may change due to possible split in former code
bool ifexist = false;
@ -287,7 +292,7 @@ Tree::modify(const Bstr* _key, const Bstr* _value)
return true;
}
/* this function is useful for search and modify, and range-query */
//this function is useful for search and modify, and range-query
Node* //return the first key's position that >= *_key
Tree::find(const Bstr* _key, int* _store, bool ifmodify) const
{ //to assign value for this->bstr, function shouldn't be const!
@ -300,17 +305,22 @@ Tree::find(const Bstr* _key, int* _store, bool ifmodify) const
{
if(ifmodify)
p->setDirty();
j = p->getNum();
for(i = 0; i < j; ++i) //BETTER(Binary-Search)
if(bstr < *(p->getKey(i)))
break;
//j = p->getNum();
//for(i = 0; i < j; ++i) //BETTER(Binary-Search)
//if(bstr < *(p->getKey(i)))
//break;
i = p->searchKey_less(bstr);
p = p->getChild(i);
this->prepare(p);
}
j = p->getNum();
for(i = 0; i < j; ++i)
if(bstr <= *(p->getKey(i)))
break;
//for(i = 0; i < j; ++i)
//if(bstr <= *(p->getKey(i)))
//break;
i = p->searchKey_lessEqual(bstr);
if(i == j)
*_store = -1; //Not Found
else
@ -353,9 +363,11 @@ Tree::remove(const Bstr* _key)
while(!p->isLeaf())
{
j = p->getNum();
for(i = 0; i < j; ++i)
if(bstr < *(p->getKey(i)))
break;
//for(i = 0; i < j; ++i)
//if(bstr < *(p->getKey(i)))
//break;
i = p->searchKey_less(bstr);
q = p->getChild(i);
this->prepare(q);
if(q->getNum() < Node::MIN_CHILD_NUM) //==MIN_KEY_NUM
@ -389,27 +401,44 @@ Tree::remove(const Bstr* _key)
p = q;
}
bool flag = false;
j = p->getNum(); //LeafNode(maybe root)
for(i = 0; i < j; ++i)
if(bstr == *(p->getKey(i)))
{
request -= p->getKey(i)->getLen();
request -= p->getValue(i)->getLen();
p->subKey(i, true); //to release
p->subValue(i, true); //to release
p->subNum();
if(p->getNum() == 0) //root leaf 0 key
{
this->root = NULL;
this->leaves_head = NULL;
this->leaves_tail = NULL;
this->height = 0;
this->TSM->updateHeap(p, 0, true); //instead of delete p
}
p->setDirty();
flag = true;
break;
}
//j = p->getNum(); //LeafNode(maybe root)
//for(i = 0; i < j; ++i)
// if(bstr == *(p->getKey(i)))
// {
// request -= p->getKey(i)->getLen();
// request -= p->getValue(i)->getLen();
// p->subKey(i, true); //to release
// p->subValue(i, true); //to release
// p->subNum();
// if(p->getNum() == 0) //root leaf 0 key
// {
// this->root = NULL;
// this->leaves_head = NULL;
// this->leaves_tail = NULL;
// this->height = 0;
// this->TSM->updateHeap(p, 0, true); //instead of delete p
// }
// p->setDirty();
// flag = true;
// break;
// }
i = p->searchKey_equal(bstr);
request -= p->getKey(i)->getLen();
request -= p->getValue(i)->getLen();
p->subKey(i, true); //to release
p->subValue(i, true); //to release
p->subNum();
if(p->getNum() == 0) //root leaf 0 key
{
this->root = NULL;
this->leaves_head = NULL;
this->leaves_tail = NULL;
this->height = 0;
this->TSM->updateHeap(p, 0, true); //instead of delete p
}
p->setDirty();
flag = true;
this->TSM->request(request);
bstr.clear();
return flag; //i == j, not found

View File

@ -11,9 +11,8 @@
int main(int argc, char * argv[])
{
#ifdef DEBUG
//chdir(dirname(argv[0]));
Util util;
#endif
std::string ip = Socket::DEFAULT_SERVER_IP;
unsigned short port = Socket::DEFAULT_CONNECT_PORT;

View File

@ -6,7 +6,10 @@
# Description:
This is a console integrating all commands in Gstore System and others. It
provides completion of command names, line editing features, and access to the
history list.
history list.
NOTICE: no separators required in the end of your commands, and please just type
one command at a time. If there are many instructions to execute, please write
them in a file like test.sql, and tell the gconsole to use this file
=============================================================================*/
#include "../Database/Database.h"
@ -15,11 +18,15 @@ history list.
using namespace std;
//NOTICE: not imitate the usage of gload/gquery/gclient/gserver in command line
//but need to support the query scripts(so support parameters indirectly)
//extern char *xmalloc PARAMS((size_t));
//The names of functions that actually do the manipulation.
//common commands
int help_handler PARAMS((char *));
int source_handler PARAMS((char *));
int quit_handler PARAMS((char *));
//C/S commands
int connect_handler PARAMS((char *));
@ -43,13 +50,14 @@ int show_handler PARAMS((char *));
//A structure which contains information on the commands this program can understand.
typedef struct {
const char *name; // User printable name of the function
rl_icpfunc_t *func; // Function to call to do the job
rl_icpfunc_t *func; // Function to call to do the job
const char *doc; // Documentation for this function
} COMMAND;
//
COMMAND native_commands[] = {
{ "help", help_handler, "Display this text" },
{ "?", help_handler, "Synonym for `help'" },
{ "?", help_handler, "Synonym for `help`" },
{ "source", source_handler, "use a file containing SPARQL queries" },
{ "quit", quit_handler, "Quit this console" },
{ "connect", connect_handler, "Connect to a server running Gstore" },
{ "show", show_handler, "Show the database name which is used now" },
@ -59,14 +67,14 @@ COMMAND native_commands[] = {
{ "unload", unload_handler, "Unload the current used database" },
{ "query", query_handler, "Answer a SPARQL query" },
//{ "cd", cd_handler, "Change to directory DIR" },
//{ "delete", delete_handler, "Delete FILE" },
//{ "list", list_handler, "List files in DIR" },
//{ "ls", list_handler, "Synonym for `list'" },
//{ "pwd", pwd_handler, "Print the current working directory" },
//{ "rename", rename_handler, "Rename FILE to NEWNAME" },
//{ "stat", stat_handler, "Print out statistics on FILE" },
//{ "view", view_handler, "View the contents of FILE" },
{ "cd", cd_handler, "Change to directory DIR" },
{ "delete", delete_handler, "Delete FILE" },
{ "list", list_handler, "List files in DIR" },
{ "ls", list_handler, "Synonym for `list'" },
{ "pwd", pwd_handler, "Print the current working directory" },
{ "rename", rename_handler, "Rename FILE to NEWNAME" },
{ "stat", stat_handler, "Print out statistics on FILE" },
{ "view", view_handler, "View the contents of FILE" },
{NULL, NULL, NULL }
//char* rl_icpfunc_t*, char*
@ -75,6 +83,7 @@ COMMAND native_commands[] = {
COMMAND remote_commands[] = {
{ "help", help_handler, "Display this text" },
{ "?", help_handler, "Synonym for `help'" },
{ "source", source_handler, "use a file containing SPARQL queries" },
{ "show", show_handler, "Show the database name which is used now" },
{ "build", build_handler, "Build a database from a dataset" },
{ "drop", drop_handler, "Drop a database according to the given path" },
@ -105,6 +114,8 @@ int execute_line(char *);
int valid_argument(char *, char *);
//
int too_dangerous(char *);
//
int deal_with_script(char *);
@ -122,17 +133,64 @@ FILE *output = stdout;
//current using database in local
Database *current_database = NULL;
//TODO:how to support commands scripts out or in console
//TODO:to start/close the server(using this machine)
//TODO:history in file, not only in memory
//TODO:redirect 2>&1 or adjust theh fprintf->stderr to file pointer
int
main(int argc, char **argv)
{
#ifdef DEBUG
//NOTICE:this is needed to ensure the file path is the work path
//chdir(dirname(argv[0]));
//NOTICE:this is needed to set several debug files
Util util;
#endif
char *line, *s;
progname = argv[0];
cout<<Util::logarithm(3,2)<<endl;
cout<<Util::logarithm(100,2)<<endl;
int n = 10000;
cout<<Util::logarithm(n,2)<<endl;
//the info to be printed
fprintf(stderr, "\n\n\n");
fprintf(stderr, "Gstore Console(gconsole), an interactive shell based utility to communicate with gStore repositories.\n");
fprintf(stderr, "usage: start-gconsole [OPTION]\n");
fprintf(stderr, " -h,--help print this help\n");
fprintf(stderr, " -s,--source source the SPARQL script\n");
//fprintf(stderr, "-q,--quiet suppresses prompts, useful for scripting\n");
//fprintf(stderr, "-v,--version print version information\n");
fprintf(stderr, "For bug reports and suggestions, see https://github.com/Caesar11/gStore\n");
fprintf(stderr, "\n\n");
if(argc > 1)
{
if(strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0)
{
fprintf(stderr, "type `?` or `help` in the console to see info of all commands\n");
if(argc > 2)
fprintf(stderr, "nonsense to add more parameters!\n");
exit(0);
}
else if(strcmp(argv[1], "-s") == 0 || strcmp(argv[1], "--source") == 0)
{
if(argc != 3)
{
fprintf(stderr, "you should just add one script file to be sourced!\n");
exit(1);
}
return deal_with_script(argv[2]);
}
else
{
fprintf(stderr, "wrong option used, please see the help info first!\n");
exit(1);
}
}
fprintf(stderr, "notice that commands are a little different between native mode and remote mode!\n");
fprintf(stderr, "now is in native mode, please type your commands.\n");
fprintf(stderr, "please do not use any separators in the end.\n");
fprintf(stderr, "\n");
initialize_readline(); //Bind our completer
@ -145,6 +203,7 @@ main(int argc, char **argv)
line = readline("server>");
//BETTER:multi lines input in alignment?need separators like ';' in gclient.cpp
//For simplicity, we do not use this feature here.
if(line == NULL) //EOF or Ctrl-D
{
@ -266,7 +325,7 @@ execute_line(char *line)
word = line + i;
int ret = cmd->func(word);
#ifdef DEBUG
#ifdef DEBUG_PRECISE
fprintf(stderr, "all done, now to close the file!\n");
#endif
if(output != stdout)
@ -335,6 +394,48 @@ valid_argument(char *caller, char *arg)
return(1);
}
//support commands scripts
//QUERY:source another file again(how about exactly this script twice or more)
int
deal_with_script(char* file)
{
FILE* fp = NULL;
if((fp = fopen(file, "r")) == NULL)
{
fprintf(stderr, "open error: %s\n", file);
return -1;
}
//WARN:the length of each line in the script should <= 500
char line[505], *s = NULL;
while((fgets(line, 501, fp)) != NULL)
{
//NOTICE:empty line here also contains '\n'
if(strlen(line) == 1)
continue;
s = stripwhite(line);
if(*s)
{
execute_line(s);
}
}
//end of file
if(current_database != NULL)
{
fprintf(stderr, "\nplease unload your database before quiting!\n\n");
//TODO
}
if(gc != NULL)
{
fprintf(stderr, "\nplease return to native mode before quiting!\n\n");
//TODO
}
return 0;
}
/* **************************************************************** */
/* */
@ -464,6 +565,13 @@ help_handler(char *args)
return(0);
}
//NOTICE:the SPARQL file to be used should be placed in the local machine even when in remote mode
int
source_handler(char *args)
{
return deal_with_script(args);
}
int
quit_handler(char *args)
{
@ -526,6 +634,7 @@ connect_handler(char *args)
//return -1;
//}
current_commands = remote_commands;
fprintf(stderr, "now is in remote mode, please type your commands.\n");
return 0;
}
@ -547,6 +656,7 @@ disconnect_handler(char *args)
delete gc;
gc = NULL;
current_commands = native_commands;
fprintf(stderr, "now is in native mode, please type your commands.\n");
return 0;
}
@ -554,15 +664,24 @@ disconnect_handler(char *args)
int
show_handler(char *args)
{
//BETTER:show all or inuse, ls|grep "\.db" > ans.txt, as well as server
bool flag = false;
if(strcmp(args, "all") == 0)
flag = true;
if(gc != NULL)
{
string database = gc->show();
string database = gc->show(flag);
fprintf(stderr, "%s", database.c_str());
return 0;
}
//native mode
if(flag)
{
string database = Util::getItemsFromDir(Util::db_home);
fprintf(stderr, "%s", database.c_str());
return 0;
}
if(current_database == NULL)
{
fprintf(stderr, "no database used now!\n");
@ -574,6 +693,8 @@ show_handler(char *args)
return 0;
}
//NOTICE: for build() and load(), always keep database in the root of gStore
int
build_handler(char *args)
{
@ -583,9 +704,22 @@ build_handler(char *args)
i++;
}
args[i++] = '\0';
//BETTER:the position is the root of Gstore by default
//(or change to a specified folder later)
string database = string(args) + string(".db");
string database = string(args);
//WARN:user better not end with ".db" by themselves!!!
if(database.substr(database.length()-3, 3) == ".db")
{
fprintf(stderr, "your db name to be built should not ends with '.db')\n");
return -1;
}
database += string(".db");
//if(database[0] != '/' && database[0] != '~') //using relative path
//{
//database = string("../") + database;
//}
string dataset = string(args + i);
while(args[i] && whitespace(args[i]))
{
@ -638,6 +772,8 @@ build_handler(char *args)
else
{
fprintf(stderr, "import RDF file to database fail.\n");
delete current_database;
current_database = NULL;
return -1;
}
}
@ -645,8 +781,7 @@ build_handler(char *args)
int
drop_handler(char *args)
{
//TODO:native and remote
//NOTICE: not using databases, drop a given one at a time
//REQUIRE: not using databases, drop a given one at a time
if(current_database != NULL) //how to judge when remote
{
fprintf(stderr, "please donot use this command when you are using a database!\n");
@ -658,24 +793,52 @@ drop_handler(char *args)
return -1;
}
char info[] = "drop";
too_dangerous(info);
//DEBUG:not works
//char info[] = "drop";
//too_dangerous(info);
//only drop when *.db, avoid other files be removed
string database = string(args) + string(".db");
if(gc != NULL)
{
if(gc->drop(database))
return 0;
else
return -1;
}
string cmd = string("rm -rf ") + database;
fprintf(stderr, "%s\n", cmd.c_str());
fprintf(stderr, "%s dropped!\n", database.c_str());
return system(cmd.c_str());
//return remove(args);
//TODO:only drop when *.db, avoid other files be removed
//string cmd = string("rm -rf ") + string(args);
//fprintf(stderr, "%s\n", cmd.c_str());
//return system(cmd.c_str());
return 0;
//return 0;
}
//NOTICE+WARN:
//generally, datasets are very large while a query file cannot be too large.
//So, when in remote mode, we expect that datasets in the server are used, while
//queries in local machine are used(transformed to string and passed to server).
int
load_handler(char *args)
{
bool flag = true;
//NOTICE: user should use exactly the name they type to build database
string database = string(args);
if(database.substr(database.length()-3, 3) == ".db")
{
fprintf(stderr, "you should use exactly the same db name as building!(which should not ends with '.db')\n");
return -1;
}
database += string(".db");
//if(database[0] != '/' && database[0] != '~') //using relative path
//{
//database = string("../") + database;
//}
if(gc != NULL)
{
gc->load(database);
if(gc->load(database))
flag = false;
}
else
{
@ -685,10 +848,26 @@ load_handler(char *args)
delete current_database;
}
current_database = new Database(database);
current_database->load();
if(!current_database->load())
flag = false;
}
return 0;
if(flag)
{
fprintf(stderr, "database loaded successfully!\n");
return 0;
}
else
{
fprintf(stderr, "fail to load the database!\n");
if(gc == NULL)
{
delete current_database;
current_database = NULL;
}
//QUERY:else?
return -1;
}
}
int
@ -731,6 +910,8 @@ unload_handler(char *args)
int
query_handler(char *args)
{
//DEBUG:when using `query lubm.db ../data/ex0.sql`
//endless, and the db file is damaged!
if(args == NULL || *args == '\0')
{
fprintf(stderr, "invalid arguments!\n");
@ -794,7 +975,7 @@ query_handler(char *args)
bool ret = current_database->query(sparql, rs, output);
if(ret)
{
#ifdef DEBUG
#ifdef DEBUG_PRECISE
fprintf(stderr, "query() returns true!\n");
#endif
return 0;

View File

@ -12,20 +12,31 @@ TODO: add -h/--help for help message
using namespace std;
/*
* [0]./gload [1]data_folder_path [2]rdf_file_path
*/
//[0]./gload [1]data_folder_path [2]rdf_file_path
int
main(int argc, char * argv[])
{
#ifdef DEBUG
//chdir(dirname(argv[0]));
Util util;
#endif
//system("clock");
cout << "gload..." << endl;
{
cout << "argc: " << argc << "\t";
cout << "DB_store:" << argv[1] << "\t";
cout << "RDF_data: " << argv[2] << "\t";
cout << endl;
}
string _db_path = string(argv[1]);
//if(_db_path[0] != '/' && _db_path[0] != '~') //using relative path
//{
//_db_path = string("../") + _db_path;
//}
string _rdf = string(argv[2]);
//if(_rdf[0] != '/' && _rdf[0] != '~') //using relative path
//{
//_rdf = string("../") + _rdf;
//}
Database _db(_db_path);
bool flag = _db.build(_rdf);
if (flag)

View File

@ -37,9 +37,8 @@ help()
int
main(int argc, char * argv[])
{
#ifdef DEBUG
//chdir(dirname(argv[0]));
Util util;
#endif
if(argc == 1 || strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0)
{
help();
@ -51,10 +50,20 @@ main(int argc, char * argv[])
cerr << "error: lack of DB_store to be queried" << endl;
return 0;
}
{
cout << "argc: " << argc << "\t";
cout << "DB_store:" << argv[1] << "\t";
cout << endl;
}
string db_folder = string(argv[1]);
//if(db_folder[0] != '/' && db_folder[0] != '~') //using relative path
//{
//db_folder = string("../") + db_folder;
//}
Database _db(db_folder);
_db.load();
cout << "finish loading" << endl;
// read query from file.
if (argc >= 3)
@ -76,11 +85,17 @@ main(int argc, char * argv[])
//
// string query = _ss.str();
string query = Util::getQueryFromFile(argv[2]);
string query = string(argv[2]);
//if(query[0] != '/' && query[0] != '~') //using relative path
//{
//query = string("../") + query;
//}
query = Util::getQueryFromFile(query.c_str());
if (query.empty())
{
return 0;
}
printf("query is:\n%s\n\n", query.c_str());
ResultSet _rs;
_db.query(query, _rs, stdout);
if (argc >= 4)
@ -186,6 +201,8 @@ main(int argc, char * argv[])
free(buf);
continue;
}
else
printf("%s\n", q);
//query = getQueryFromFile(p);
query = Util::getQueryFromFile(q);
if(query.empty())
@ -197,6 +214,8 @@ main(int argc, char * argv[])
fclose(fp);
continue;
}
printf("query is:\n");
printf("%s\n\n", query.c_str());
ResultSet _rs;
_db.query(query, _rs, fp);
//test...

View File

@ -11,9 +11,8 @@
int main(int argc, char * argv[])
{
#ifdef DEBUG
//chdir(dirname(argv[0]));
Util util;
#endif
unsigned short port = Socket::DEFAULT_CONNECT_PORT;
if (argc > 1)

108
NOTES.md
View File

@ -1,108 +0,0 @@
# TODO
- full_test中捕捉stderr信息重要信息如时间结果应该确保是标准输出结果第一行是变量名有select *时应该和jena分列比
- record the usage of Virtuoso and Sesame(maybe change their source code to be used by full_test)
- 查询控制器+数值型查询局部和整体的区别一个查询中是否允许同时包含数值和非数值应该考虑全局编码还是拆分后均用B+树的范围查询)
- 完成毕业论文(图数据库查询器优化),第一部分应给出介绍和运行框架图,之后对每个部分的功能作简单说明,重点阐述自己设计/改进的模块
- - -
# TEST
using multi-join and stream, compared with jena
gstore performs worser than jena in these cases:
bsbm series: self1.sql, sellf3.sql, self8.sql (self4,5,6)
dbpedia series: q3.sql, q4.sql, q5.sql (q9.sql)
lubm series: q0.sql, q2.sql, q13.sql, q16.sql
watdiv series: C1.sql, F3.sql
- - -
# DEBUG
- - -
# BETTER
- 在index_join中考虑所有判断情况、一次多边、交集/过滤等等multi_join不动
- 在join时两个表时有太多策略和条件对大的需要频繁使用的数据列可考虑建立BloomFilter进行过滤
- not operate on the same db when connect to local server and native!
- VStree部分的内存和时间开销都很大测试gstore时应打印/分析各部分的时间。打印编码实例用于分析和测试,如何划分或运算空间(异或最大或夹角最大,立方体,只取决于方向而非长度)
- - -
# DOCS:
- how about STL:
http://www.zhihu.com/question/38225973?sort=created
http://www.zhihu.com/question/20201972
http://www.oschina.net/question/188977_58777
- - -
# WARN
重定义问题绝对不能忍受,现已全部解决(否则会影响其他库的使用,vim的quickfix也会一直显示
类型不匹配问题也要注意尽量不要有SparqlLexer.c的多字节常量问题
变量定义但未使用对antlr3生成的解析部分可以不考虑文件太大自动生成影响不大
以后最好使用antlr最新版支持C++的来重新生成基于面向对象防止与Linux库中的定义冲突
(目前是在重定义项前加前缀)
- - -
# KEEP
- always use valgrind to test and improve
- 此版本用于开发最终需要将整个项目转到gStore仓库中用于发布。转移时先删除gStore中除.git和LICENSE外的所有文件然后复制即可不要复制LICENSE因为版本不同也不用复制NOTES.md因为仅用于记录开发事项
- build git-page for gStore
- 测试时应都在热启动的情况下比较才有意义gStore应该开-O3优化且注释掉-g以及代码中所有debug宏
- 新算法在冷启动时时间不理想即便只是0轮join开销也很大对比时采用热启动
- 不能支持非连通图查询
- - -
# ADVICE
- 构造极小型RDF数据用于调试
- 将KVstore模块中在堆中寻找Node*的操作改为用treap实现(或多存指针避开搜索?)
- 尽量合并相似模块比如storage和LRUCache多处对heap的需要等等
- 无法查询谓词因为VSTREE中只能过滤得到点的候选解如果有对边的查询是否可以分离另加考虑。
- ->constant->B被视为不连通图不处理是否可以在中修改使得到结果select ?v0 where { ?v1 <predicate> ?v2 . }
- Can not operate on the same db when connect to local server and native!
- auto关键字和智能指针
- 实现内存池来管理内存?
- 在BasicQuery.cpp中的encodeBasicQuery函数中发现有pre_id==-1时就可以直接中止查询返回空值
- 能不能不用转换到表连接来获得结果,目前只相当于用图算法做了一遍预处理!
- join可以不按照树序考虑评估每两个表的连接代价
1. 用机器学习(对查询分类寻找最优,梯度下降,调参)评估深搜顺序的好坏
2. 压缩字符串:整体控制是否启动(比如安装时),同时/不同时用于内存和硬盘。对单个string根据结构判断是否压缩一个标志位关键词映射string相关操作主要是比较相关压缩算法必须有效且不能太复杂
3. 实现对谓词的查询(再看论文)
4. 将查询里的常量加入变量集否则可能不连通而无法查询也可能影响join效率。如A->c, B->c本来应该是通过c相连的子图才对但目前的gstore无法识别。

View File

@ -37,7 +37,7 @@ void QueryParser::sparqlParser(const string& query, QueryTree& querytree)
parseTree(root, querytree);
printQuery(querytree);
querytree.print();
parser->free(parser);
tokens->free(tokens);
@ -53,6 +53,8 @@ int QueryParser::printNode(pANTLR3_BASE_TREE node, int dep)
int hasErrorNode = 0;
if (treeType == 0) hasErrorNode = 1;
for (int i=0; i < dep; i++) printf(" ");
printf("%d: %s\n",treeType,s);
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
@ -62,93 +64,97 @@ int QueryParser::printNode(pANTLR3_BASE_TREE node, int dep)
return hasErrorNode;
}
void QueryParser::parseTree(pANTLR3_BASE_TREE node, QueryTree& querytree)
{
printf("parseTree\n");
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, i);
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, i);
//prologue 144
if (childNode->getType(childNode) == 144)
{
parsePrologue(childNode);
}
else
//select 155
if (childNode->getType(childNode) == 155)
{
querytree.setQueryForm(QueryTree::Select_Query);
parseTree(childNode, querytree);
}
else
//ask 13
if (childNode->getType(childNode) == 13)
{
querytree.setQueryForm(QueryTree::Ask_Query);
parseTree(childNode, querytree);
}
else
//select clause 156
if (childNode->getType(childNode) == 156)
{
parseSelectClause(childNode, querytree);
}
else
//group graph pattern 77
if (childNode->getType(childNode) == 77)
{
parseGroupPattern(childNode, querytree.getPatternGroup());
}
else
//order by 127
if (childNode->getType(childNode) == 127)
{
parseOrderBy(childNode, querytree);
}
else
//offset 120 limit 102
if (childNode->getType(childNode) == 120 || childNode->getType(childNode) == 102)
{
pANTLR3_BASE_TREE gchildNode=(pANTLR3_BASE_TREE) childNode->getChild(childNode, 0);
//prologue 144
if (childNode->getType(childNode) == 144)
{
parsePrologue(childNode);
}
else
//select 155
if (childNode->getType(childNode) == 155)
{
querytree.setQueryForm(QueryTree::Select_Query);
parseTree(childNode, querytree);
}
else
//ask 13
if (childNode->getType(childNode) == 13)
{
querytree.setQueryForm(QueryTree::Ask_Query);
parseTree(childNode, querytree);
}
else
//select clause 156
if (childNode->getType(childNode) == 156)
{
parseSelectClause(childNode, querytree);
}
else
//group graph pattern 77
if (childNode->getType(childNode) == 77)
{
parseGroupPattern(childNode, querytree.getGroupPattern());
}
else
//order by 127
if (childNode->getType(childNode) == 127)
{
parseOrderBy(childNode, querytree);
}
else
//offset 120 limit 102
if (childNode->getType(childNode) == 120 || childNode->getType(childNode) == 102)
{
pANTLR3_BASE_TREE gchildNode=(pANTLR3_BASE_TREE) childNode->getChild(childNode, 0);
//integer 83
if (gchildNode->getType(gchildNode) == 83)
{
string str;
parseString(gchildNode, str, 0);
//integer 83
if (gchildNode->getType(gchildNode) == 83)
{
string str;
parseString(gchildNode, str, 0);
stringstream str2int;
stringstream str2int;
int num;
int num;
str2int << str;
str2int >> num;
str2int << str;
str2int >> num;
if (childNode->getType(childNode) == 120 && num >= 0)
querytree.setOffset(num);
if (childNode->getType(childNode) == 102 && num >= 0)
querytree.setLimit(num);
}
if (childNode->getType(childNode) == 120 && num >= 0)
querytree.setOffset(num);
if (childNode->getType(childNode) == 102 && num >= 0)
querytree.setLimit(num);
}
else parseTree(childNode, querytree);
}
else parseTree(childNode, querytree);
}
}
void QueryParser::parsePrologue(pANTLR3_BASE_TREE node)
{
printf("parsePrologue\n");
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, i);
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, i);
//prefix 143
if (childNode->getType(childNode) == 143)
parsePrefix(childNode);
//prefix 143
if (childNode->getType(childNode) == 143)
parsePrefix(childNode);
}
}
void QueryParser::parsePrefix(pANTLR3_BASE_TREE node)
{
printf("parsePrefix\n");
string key;
string value;
@ -178,9 +184,11 @@ void QueryParser::replacePrefix(string& str)
//blank node
if (prefix == "_:") return;
cout << "prefix: " << prefix << endl;
if (_prefix_map.find(prefix) != _prefix_map.end())
{
str=_prefix_map[prefix].substr(0, _prefix_map[prefix].length() - 1) + str.substr(sep + 1 ,str.length() - sep - 1) + ">";
cout << "str: " << str << endl;
}
else
{
@ -192,6 +200,7 @@ void QueryParser::replacePrefix(string& str)
void QueryParser::parseSelectClause(pANTLR3_BASE_TREE node, QueryTree& querytree)
{
printf("parseSelectClause\n");
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
@ -213,6 +222,7 @@ void QueryParser::parseSelectClause(pANTLR3_BASE_TREE node, QueryTree& querytree
void QueryParser::parseSelectVar(pANTLR3_BASE_TREE node, QueryTree& querytree)
{
printf("parseSelectVar\n");
string var = "";
for (unsigned int i = 0; i < node->getChildCount(node); i++)
@ -227,8 +237,9 @@ void QueryParser::parseSelectVar(pANTLR3_BASE_TREE node, QueryTree& querytree)
}
}
void QueryParser::parseGroupPattern(pANTLR3_BASE_TREE node, QueryTree::PatternGroup& patterngroup)
void QueryParser::parseGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern)
{
printf("parseGroupPattern\n");
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
@ -237,39 +248,40 @@ void QueryParser::parseGroupPattern(pANTLR3_BASE_TREE node, QueryTree::PatternGr
//triples same subject 185
if (childNode->getType(childNode) == 185)
{
parsePattern(childNode, patterngroup);
parsePattern(childNode, grouppattern);
}
//optional 124 minus 108
if (childNode->getType(childNode) == 124 || childNode->getType(childNode) == 108)
{
parseOptionalOrMinus(childNode, patterngroup);
parseOptionalOrMinus(childNode, grouppattern);
}
//union 195
if (childNode->getType(childNode) == 195)
{
patterngroup.addOneGroupUnion();
parseUnion(childNode, patterngroup);
grouppattern.addOneGroupUnion();
parseUnion(childNode, grouppattern);
}
//filter 67
if (childNode->getType(childNode) == 67)
{
parseFilter(childNode, patterngroup);
parseFilter(childNode, grouppattern);
}
//group graph pattern 77
//redundant {}
if (childNode->getType(childNode) == 77)
{
parseGroupPattern(childNode, patterngroup);
parseGroupPattern(childNode, grouppattern);
}
}
}
void QueryParser::parsePattern(pANTLR3_BASE_TREE node, QueryTree::PatternGroup& patterngroup)
void QueryParser::parsePattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern)
{
printf("parsePattern\n");
string subject = "";
string predicate = "";
@ -288,7 +300,16 @@ void QueryParser::parsePattern(pANTLR3_BASE_TREE node, QueryTree::PatternGroup&
//predicate 142
if (childNode->getType(childNode) == 142)
{
parseString(childNode, predicate, 4);
pANTLR3_BASE_TREE gchildNode=(pANTLR3_BASE_TREE) childNode->getChild(childNode, 0);
//var 200
if (gchildNode->getType(gchildNode) == 200)
{
parseString(childNode, predicate, 1);
}
else
{
parseString(childNode, predicate, 4);
}
replacePrefix(predicate);
}
@ -301,12 +322,14 @@ void QueryParser::parsePattern(pANTLR3_BASE_TREE node, QueryTree::PatternGroup&
if (i != 0 && i % 2 == 0) //triples same subject
{
patterngroup.addOnePattern(QueryTree::Pattern(QueryTree::Element(subject), QueryTree::Element(predicate), QueryTree::Element(object)));
grouppattern.addOnePattern(QueryTree::GroupPattern::Pattern( QueryTree::GroupPattern::Pattern::Element(subject),
QueryTree::GroupPattern::Pattern::Element(predicate),
QueryTree::GroupPattern::Pattern::Element(object)));
}
}
}
void QueryParser::parseOptionalOrMinus(pANTLR3_BASE_TREE node, QueryTree::PatternGroup& patterngroup)
void QueryParser::parseOptionalOrMinus(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern)
{
//optional 124 minus 108
if (node->getType(node) == 124)
@ -322,15 +345,16 @@ void QueryParser::parseOptionalOrMinus(pANTLR3_BASE_TREE node, QueryTree::Patter
if (childNode->getType(childNode) == 77)
{
if (node->getType(node) == 124)
patterngroup.addOneOptionalOrMinus('o');
grouppattern.addOneOptionalOrMinus('o');
else if (node->getType(node) == 108)
patterngroup.addOneOptionalOrMinus('m');
parseGroupPattern(childNode, patterngroup.getLastOptionalOrMinus());
grouppattern.addOneOptionalOrMinus('m');
parseGroupPattern(childNode, grouppattern.getLastOptionalOrMinus());
}
}
}
void QueryParser::parseUnion(pANTLR3_BASE_TREE node, QueryTree::PatternGroup& patterngroup)
void QueryParser::parseUnion(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern)
{
printf("parseUnion\n");
@ -341,19 +365,19 @@ void QueryParser::parseUnion(pANTLR3_BASE_TREE node, QueryTree::PatternGroup& pa
//group graph pattern 77
if (childNode->getType(childNode) == 77)
{
patterngroup.addOneUnion();
parseGroupPattern(childNode, patterngroup.getLastUnion());
grouppattern.addOneUnion();
parseGroupPattern(childNode, grouppattern.getLastUnion());
}
//union 195
if (childNode->getType(childNode) == 195)
{
parseUnion(childNode, patterngroup);
parseUnion(childNode, grouppattern);
}
}
}
void QueryParser::parseFilter(pANTLR3_BASE_TREE node, QueryTree::PatternGroup& patterngroup)
void QueryParser::parseFilter(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern)
{
printf("parseFilter\n");
@ -365,52 +389,52 @@ void QueryParser::parseFilter(pANTLR3_BASE_TREE node, QueryTree::PatternGroup& p
if (childNode->getType(childNode) == 190)
childNode = (pANTLR3_BASE_TREE) childNode->getChild(childNode, 0);
patterngroup.addOneFilterTree();
parseFilterTree(childNode, patterngroup, patterngroup.getLastFilterTree());
grouppattern.addOneFilterTree();
parseFilterTree(childNode, grouppattern, grouppattern.getLastFilterTree());
}
}
void QueryParser::parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::PatternGroup& patterngroup, QueryTree::FilterTree& filter)
void QueryParser::parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern, QueryTree::GroupPattern::FilterTreeNode& filter)
{
printf("parseFilterTree\n");
switch (node->getType(node))
{
//! 192
case 192: filter.type = QueryTree::FilterTree::Not_type; break;
case 192: filter.type = QueryTree::GroupPattern::FilterTreeNode::Not_type; break;
//not 115
case 115: filter.type = QueryTree::FilterTree::Not_type; break;
case 115: filter.type = QueryTree::GroupPattern::FilterTreeNode::Not_type; break;
//or 125
case 125: filter.type = QueryTree::FilterTree::Or_type; break;
case 125: filter.type = QueryTree::GroupPattern::FilterTreeNode::Or_type; break;
//and 8
case 8: filter.type = QueryTree::FilterTree::And_type; break;
case 8: filter.type = QueryTree::GroupPattern::FilterTreeNode::And_type; break;
//equal 62
case 62: filter.type = QueryTree::FilterTree::Equal_type; break;
case 62: filter.type = QueryTree::GroupPattern::FilterTreeNode::Equal_type; break;
//not equal 116
case 116: filter.type = QueryTree::FilterTree::NotEqual_type; break;
case 116: filter.type = QueryTree::GroupPattern::FilterTreeNode::NotEqual_type; break;
//less 100
case 100: filter.type = QueryTree::FilterTree::Less_type; break;
case 100: filter.type = QueryTree::GroupPattern::FilterTreeNode::Less_type; break;
//less equal 101
case 101: filter.type = QueryTree::FilterTree::LessOrEqual_type; break;
case 101: filter.type = QueryTree::GroupPattern::FilterTreeNode::LessOrEqual_type; break;
//greater 72
case 72: filter.type = QueryTree::FilterTree::Greater_type; break;
case 72: filter.type = QueryTree::GroupPattern::FilterTreeNode::Greater_type; break;
//greater equal 73
case 73: filter.type = QueryTree::FilterTree::GreaterOrEqual_type; break;
case 73: filter.type = QueryTree::GroupPattern::FilterTreeNode::GreaterOrEqual_type; break;
//regex 150
case 150: filter.type = QueryTree::FilterTree::Builtin_regex_type; break;
case 150: filter.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type; break;
//lang 96
case 96: filter.type = QueryTree::FilterTree::Builtin_lang_type; break;
case 96: filter.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_lang_type; break;
//langmatches 97
case 97: filter.type = QueryTree::FilterTree::Builtin_langmatches_type; break;
case 97: filter.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_langmatches_type; break;
//bound 23
case 23: filter.type = QueryTree::FilterTree::Builtin_bound_type; break;
case 23: filter.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_bound_type; break;
//in 81
case 81: filter.type = QueryTree::FilterTree::Builtin_in_type; break;
case 81: filter.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_in_type; break;
//exists 63
case 63: filter.type = QueryTree::FilterTree::Builtin_exists_type; break;
case 63: filter.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_exists_type; break;
//not exists 117
case 117: filter.type = QueryTree::FilterTree::Not_type; break;
case 117: filter.type = QueryTree::GroupPattern::FilterTreeNode::Not_type; break;
default:
return;
@ -425,11 +449,10 @@ void QueryParser::parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::PatternGrou
//in 81
if (childNode->getType(childNode) == 81)
{
filter.child.push_back(QueryTree::FilterTree::FilterTreeChild());
filter.child.push_back(QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild());
filter.child[0].type = 't';
filter.child[0].ptr = new QueryTree::FilterTree();
filter.child[0].ptr->type = QueryTree::FilterTree::Builtin_in_type;
parseVarInExpressionList(node, *filter.child[0].ptr, 1);
filter.child[0].node.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_in_type;
parseVarInExpressionList(node, filter.child[0].node, 1);
return;
}
@ -446,12 +469,11 @@ void QueryParser::parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::PatternGrou
//not exists 117
if (node->getType(node) == 117)
{
filter.child.push_back(QueryTree::FilterTree::FilterTreeChild());
filter.child.push_back(QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild());
filter.child[0].type = 't';
filter.child[0].ptr = new QueryTree::FilterTree();
filter.child[0].ptr->type = QueryTree::FilterTree::Builtin_exists_type;
filter.child[0].node.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_exists_type;
parseExistsGroupPattern(node, patterngroup, *filter.child[0].ptr);
parseExistsGroupPattern(node, grouppattern, filter.child[0].node);
return;
}
@ -459,7 +481,7 @@ void QueryParser::parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::PatternGrou
//exists 63
if (node->getType(node) == 63)
{
parseExistsGroupPattern(node, patterngroup, filter);
parseExistsGroupPattern(node, grouppattern, filter);
return;
}
@ -479,7 +501,7 @@ void QueryParser::parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::PatternGrou
childNode = gchildNode;
}
filter.child.push_back(QueryTree::FilterTree::FilterTreeChild());
filter.child.push_back(QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild());
//unary 190
if (childNode->getType(childNode) == 190)
@ -497,14 +519,14 @@ void QueryParser::parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::PatternGrou
else
{
filter.child[i].type = 't';
filter.child[i].ptr = new QueryTree::FilterTree();
parseFilterTree(childNode, patterngroup, *filter.child[i].ptr);
parseFilterTree(childNode, grouppattern, filter.child[i].node);
}
}
}
void QueryParser::parseVarInExpressionList(pANTLR3_BASE_TREE node, QueryTree::FilterTree& filter, unsigned int begin)
void QueryParser::parseVarInExpressionList(pANTLR3_BASE_TREE node, QueryTree::GroupPattern::FilterTreeNode& filter, unsigned int begin)
{
printf("parseVarInExpressionList\n");
for (unsigned int i = begin; i < node->getChildCount(node); i++)
{
@ -513,7 +535,7 @@ void QueryParser::parseVarInExpressionList(pANTLR3_BASE_TREE node, QueryTree::Fi
//unary 190
if (childNode->getType(childNode) == 190)
{
filter.child.push_back(QueryTree::FilterTree::FilterTreeChild());
filter.child.push_back(QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild());
filter.child[i - begin].type = 's';
parseString(childNode, filter.child[i - begin].arg, 1);
@ -527,7 +549,7 @@ void QueryParser::parseVarInExpressionList(pANTLR3_BASE_TREE node, QueryTree::Fi
{
pANTLR3_BASE_TREE gchildNode=(pANTLR3_BASE_TREE) childNode->getChild(childNode, j);
filter.child.push_back(QueryTree::FilterTree::FilterTreeChild());
filter.child.push_back(QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild());
filter.child[i + j - begin].type = 's';
parseString(gchildNode, filter.child[i + j - begin].arg, 1);
@ -537,17 +559,18 @@ void QueryParser::parseVarInExpressionList(pANTLR3_BASE_TREE node, QueryTree::Fi
}
}
void QueryParser::parseExistsGroupPattern(pANTLR3_BASE_TREE node, QueryTree::PatternGroup& patterngroup, QueryTree::FilterTree& filter)
void QueryParser::parseExistsGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern, QueryTree::GroupPattern::FilterTreeNode& filter)
{
printf("parseExistsGroupPattern\n");
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, 0);
//group graph pattern 77
if (childNode->getType(childNode) == 77)
{
patterngroup.addOneExistsGroupPattern();
filter.exists_patterngroup_id = (int)patterngroup.filter_exists_patterngroups[(int)patterngroup.filter_exists_patterngroups.size() - 1].size() - 1;
parseGroupPattern(childNode, patterngroup.getLastExistsGroupPattern());
grouppattern.addOneExistsGroupPattern();
filter.exists_grouppattern_id = (int)grouppattern.filter_exists_grouppatterns[(int)grouppattern.filter_exists_grouppatterns.size() - 1].size() - 1;
parseGroupPattern(childNode, grouppattern.getLastExistsGroupPattern());
return;
}
@ -636,76 +659,3 @@ void QueryParser::parseString(pANTLR3_BASE_TREE node, string& str, int dep)
}
}
}
void QueryParser::printQuery(QueryTree& querytree)
{
}
void QueryParser::printPatternGroup(QueryTree::PatternGroup &patterngroup, int dep)
{
}
void QueryParser::printFilter(std::vector<QueryTree::PatternGroup> &exist_patterngroups, QueryTree::FilterTree &filter, int dep)
{
if (filter.type == QueryTree::FilterTree::Not_type) printf("!");
if (filter.type == QueryTree::FilterTree::Builtin_regex_type) printf("regex");
if (filter.type == QueryTree::FilterTree::Builtin_lang_type) printf("lang");
if (filter.type == QueryTree::FilterTree::Builtin_langmatches_type) printf("langmatches");
if (filter.type == QueryTree::FilterTree::Builtin_bound_type) printf("bound");
if (filter.type == QueryTree::FilterTree::Builtin_in_type)
{
if (filter.child[0].type == 's') printf("%s", filter.child[0].arg.c_str());
printf(" in (");
for (int i = 1; i < (int)filter.child.size(); i++)
{
if (i != 1) printf(" , ");
if (filter.child[i].type == 's') printf("%s", filter.child[i].arg.c_str());
}
printf(")");
return;
}
if (filter.type == QueryTree::FilterTree::Builtin_exists_type)
{
printf("exists");
printPatternGroup(exist_patterngroups[filter.exists_patterngroup_id], dep);
return;
}
printf("(");
if ((int)filter.child.size() >= 1)
{
if (filter.child[0].type == 's') printf("%s", filter.child[0].arg.c_str());
if (filter.child[0].type == 't') printFilter(exist_patterngroups, *filter.child[0].ptr, dep);
}
if (filter.type == QueryTree::FilterTree::Or_type) printf(" || ");
if (filter.type == QueryTree::FilterTree::And_type) printf(" && ");
if (filter.type == QueryTree::FilterTree::Equal_type) printf(" = ");
if (filter.type == QueryTree::FilterTree::NotEqual_type) printf(" != ");
if (filter.type == QueryTree::FilterTree::Less_type) printf(" < ");
if (filter.type == QueryTree::FilterTree::LessOrEqual_type) printf(" <= ");
if (filter.type == QueryTree::FilterTree::Greater_type) printf(" > ");
if (filter.type == QueryTree::FilterTree::GreaterOrEqual_type) printf(" >= ");
if (filter.type == QueryTree::FilterTree::Builtin_regex_type || filter.type == QueryTree::FilterTree::Builtin_langmatches_type) printf(", ");
if ((int)filter.child.size() >= 2)
{
if (filter.child[1].type == 's') printf("%s", filter.child[1].arg.c_str());
if (filter.child[1].type == 't') printFilter(exist_patterngroups, *filter.child[1].ptr, dep);
}
if ((int)filter.child.size() >= 3)
{
if (filter.type == QueryTree::FilterTree::Builtin_regex_type && filter.child[2].type == 's')
printf(", %s", filter.child[2].arg.c_str());
}
printf(")");
}

View File

@ -25,20 +25,17 @@ private:
void replacePrefix(std::string& str);
void parseSelectClause(pANTLR3_BASE_TREE node, QueryTree& querytree);
void parseSelectVar(pANTLR3_BASE_TREE node, QueryTree& querytree);
void parseGroupPattern(pANTLR3_BASE_TREE node, QueryTree::PatternGroup& patterngroup);
void parsePattern(pANTLR3_BASE_TREE node, QueryTree::PatternGroup& patterngroup);
void parseOptionalOrMinus(pANTLR3_BASE_TREE node, QueryTree::PatternGroup& patterngroup);
void parseUnion(pANTLR3_BASE_TREE node, QueryTree::PatternGroup& patterngroup);
void parseFilter(pANTLR3_BASE_TREE node, QueryTree::PatternGroup& patterngroup);
void parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::PatternGroup& patterngroup, QueryTree::FilterTree& filter);
void parseVarInExpressionList(pANTLR3_BASE_TREE node, QueryTree::FilterTree& filter, unsigned int begin);
void parseExistsGroupPattern(pANTLR3_BASE_TREE node, QueryTree::PatternGroup& patterngroup, QueryTree::FilterTree& filter);
void parseGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern);
void parsePattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern);
void parseOptionalOrMinus(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern);
void parseUnion(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern);
void parseFilter(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern);
void parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern, QueryTree::GroupPattern::FilterTreeNode& filter);
void parseVarInExpressionList(pANTLR3_BASE_TREE node, QueryTree::GroupPattern::FilterTreeNode& filter, unsigned int begin);
void parseExistsGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern, QueryTree::GroupPattern::FilterTreeNode& filter);
void parseOrderBy(pANTLR3_BASE_TREE node, QueryTree& querytree);
void parseString(pANTLR3_BASE_TREE node, std::string& str, int dep);
void printQuery(QueryTree& querytree);
void printPatternGroup(QueryTree::PatternGroup &patterngroup, int dep = 0);
void printFilter(std::vector<QueryTree::PatternGroup> &exist_patterngroups, QueryTree::FilterTree &filter, int dep);
public:
QueryParser();

View File

@ -57,6 +57,8 @@ BasicQuery::clear()
this->candidate_list = NULL;
delete[] this->is_literal_candidate_added;
this->is_literal_candidate_added = NULL;
delete[] this->need_retrieve;
this->need_retrieve = NULL;
for (unsigned i=0;i<this->result_list.size();++i)
{
delete[] this->result_list[i];
@ -64,6 +66,18 @@ BasicQuery::clear()
}
}
int
BasicQuery::getRetrievedVarNum()
{
return this->retrieve_var_num;
}
int
BasicQuery::getTotalVarNum()
{
return this->total_var_num;
}
// get the num of vars need to be joined(>= select_var_num)
int
BasicQuery::getVarNum()
@ -161,6 +175,12 @@ BasicQuery::getResultList()
return result_list;
}
vector<int*>*
BasicQuery::getResultListPointer()
{
return &result_list;
}
const EntityBitSet&
BasicQuery::getVarBitSet(int _i)const
{
@ -172,18 +192,18 @@ BasicQuery::getVarBitSet(int _i)const
bool
BasicQuery::isInEdge(int _var, int _i_th_edge)const
{
return this->edge_type[_var][_i_th_edge] == BasicQuery::EDGE_IN;
return this->edge_type[_var][_i_th_edge] == Util::EDGE_IN;
}
// check whether the i-th edge of _var is OUT edge
bool
BasicQuery::isOutEdge(int _var, int _i_th_edge)const
{
return this->edge_type[_var][_i_th_edge] == BasicQuery::EDGE_OUT;
return this->edge_type[_var][_i_th_edge] == Util::EDGE_OUT;
}
bool
BasicQuery::isOneDegreeNotSelectVar(string& _no_sense_var)
BasicQuery::isOneDegreeNotJoinVar(string& _no_sense_var)
{
// vars begin with ?
if(_no_sense_var.at(0) != '?')
@ -195,6 +215,12 @@ BasicQuery::isOneDegreeNotSelectVar(string& _no_sense_var)
{
return false;
}
//ERROR:the value returned is 0
//if(this->var_str2id[_no_sense_var] != -1)
if(this->var_str2id.find(_no_sense_var) != this->var_str2id.end())
{
return false;
}
return true;
}
@ -251,6 +277,18 @@ BasicQuery::setAddedLiteralCandidate(int _var)
this->is_literal_candidate_added[_var] = true;
}
bool
BasicQuery::if_need_retrieve(int _var)
{
return _var >= 0 && this->need_retrieve[_var];
}
bool
BasicQuery::isSatelliteInJoin(int _var)
{
return _var >= 0 && _var < this->graph_var_num && !(this->need_retrieve[_var]);
}
void
BasicQuery::updateSubSig(int _sub_id, int _pre_id, int _obj_id, string _obj,int _line_id)
{
@ -261,9 +299,9 @@ BasicQuery::updateSubSig(int _sub_id, int _pre_id, int _obj_id, string _obj,int
Signature::encodeStr2Entity(_obj.c_str(), this->var_sig[_sub_id]);
}
if(_pre_id != -1)
if(_pre_id >= 0)
{
Signature::encodePredicate2Entity(_pre_id, this->var_sig[_sub_id], BasicQuery::EDGE_OUT);
Signature::encodePredicate2Entity(_pre_id, this->var_sig[_sub_id], Util::EDGE_OUT);
}
// update var(sub)_degree & edge_id according to this triple
@ -271,7 +309,7 @@ BasicQuery::updateSubSig(int _sub_id, int _pre_id, int _obj_id, string _obj,int
// edge_id[var_id][i] : the ID of the i-th edge of the var
this->edge_id[_sub_id][sub_degree] = _line_id;
this->edge_nei_id[_sub_id][sub_degree] = _obj_id;
this->edge_type[_sub_id][sub_degree] = BasicQuery::EDGE_OUT;
this->edge_type[_sub_id][sub_degree] = Util::EDGE_OUT;
this->edge_pre_id[_sub_id][sub_degree] = _pre_id;
this->var_degree[_sub_id] ++;
}
@ -283,14 +321,14 @@ BasicQuery::updateObjSig(int _obj_id, int _pre_id, int _sub_id, string _sub,int
bool sub_is_str = (_sub_id == -1) && (_sub.at(0) != '?');
if(sub_is_str)
{
//////cout << "str2entity" << endl;
cout << "str2entity" << endl;
Signature::encodeStr2Entity(_sub.c_str(), this->var_sig[_obj_id]);
}
if(_pre_id != -1)
if(_pre_id >= 0)
{
//////cout << "pre2entity" << endl;
Signature::encodePredicate2Entity(_pre_id, this->var_sig[_obj_id], BasicQuery::EDGE_IN);
cout << "pre2entity" << endl;
Signature::encodePredicate2Entity(_pre_id, this->var_sig[_obj_id], Util::EDGE_IN);
}
// update var(sub)_degree & edge_id according to this triple
@ -298,7 +336,7 @@ BasicQuery::updateObjSig(int _obj_id, int _pre_id, int _sub_id, string _sub,int
// edge_id[var_id][i] : the ID of the i-th edge of the var
this->edge_id[_obj_id][obj_degree] = _line_id;
this->edge_nei_id[_obj_id][obj_degree] = _sub_id;
this->edge_type[_obj_id][obj_degree] = BasicQuery::EDGE_IN;
this->edge_type[_obj_id][obj_degree] = Util::EDGE_IN;
this->edge_pre_id[_obj_id][obj_degree] = _pre_id;
this->var_degree[_obj_id] ++;
}
@ -307,9 +345,13 @@ BasicQuery::updateObjSig(int _obj_id, int _pre_id, int _sub_id, string _sub,int
void
BasicQuery::encodeBasicQuery(KVstore* _p_kvstore, const vector<string>& _query_var)
{
//////cout << "IN buildBasicSignature" << endl;
this->initial();
//////cout << "after init" << endl;
//TODO:the third parameter should be selected predicate variables
//(ordered, and merged with selected s/o in upper level)
//we append the candidates for selected pre_var to original select_var_num columns
//TODO:add pre var, assign name and select=true (not disturb the order between pres)
cout << "IN buildBasicSignature" << endl;
//this->initial();
//cout << "after init" << endl;
this->buildTuple2Freq();
@ -325,18 +367,20 @@ BasicQuery::encodeBasicQuery(KVstore* _p_kvstore, const vector<string>& _query_v
#endif
for(int i = 0; i < (this->select_var_num); ++i)
{
//NOTICE:not place pre var in join
string _var = _query_var[i];
this->var_str2id[_var] = i;
this->var_name[i] = _var;
}
//////cout << "select variables: ";
cout << "select variables: ";
for(unsigned i = 0; i < this->var_str2id.size(); ++i)
{
//////cout << "[" << this->var_name[i] << ", " << i << " " << this->var_str2id[this->var_name[i]] << "]\t";
cout << "[" << this->var_name[i] << ", " << i << " " << this->var_str2id[this->var_name[i]] << "]\t";
}
//////cout << endl;
cout << endl;
this->total_var_num = this->select_var_num;
if(this->encode_method == BasicQuery::SELECT_VAR)
{
this->findVarNotInSelect();
@ -348,13 +392,13 @@ BasicQuery::encodeBasicQuery(KVstore* _p_kvstore, const vector<string>& _query_v
// assign the this->var_num, all need to join
this->graph_var_num = this->var_str2id.size();
////cout<< "graph variables: ";
cout<< "graph variables: ";
for(unsigned i = 0; i < this->var_str2id.size(); i ++)
{
////cout << "[" << this->var_name[i] << ", " << i << " " << this->var_str2id[this->var_name[i]] << "]\t";
cout << "[" << this->var_name[i] << ", " << i << " " << this->var_str2id[this->var_name[i]] << "]\t";
}
////cout << endl;
////cout << "before new IDList!" << endl; //just for debug
cout << endl;
cout << "before new IDList!" << endl; //just for debug
this->candidate_list = new IDList[this->graph_var_num];
@ -363,13 +407,38 @@ BasicQuery::encodeBasicQuery(KVstore* _p_kvstore, const vector<string>& _query_v
string& sub = this->triple_vt[i].subject;
string& pre = this->triple_vt[i].predicate;
string& obj = this->triple_vt[i].object;
// -1 if not found, this means this query is invalid
int pre_id = _p_kvstore->getIDByPredicate(pre);
{
stringstream _ss;
_ss << "pre2id: " << pre << "=>" << pre_id << endl;
Util::logging(_ss.str());
}
int pre_id = -1; //not found
if(pre[0] == '?') //pre var
{
int pid = this->getPreVarID(pre);
if(pid == -1)
{
//pid = this->select_var_num + this->pre_var.size();
//this->pre_var[pid] = PreVar(pre);
this->pre_var.push_back(PreVar(pre));
pid = this->pre_var.size() - 1;
}
//map<int, PreVar>::iterator it = this->pre_var.find(pid);
//it->second.triples.push_back(i);
this->pre_var[pid].triples.push_back(i);
pre_id = -2; //mark that this is a pre var
}
else
{
// -1 if not found, this means this query is invalid
pre_id = _p_kvstore->getIDByPredicate(pre);
{
stringstream _ss;
_ss << "pre2id: " << pre << "=>" << pre_id << endl;
Util::logging(_ss.str());
}
if(pre_id == -1)
{
//BETTER:this is too robust, not only one query, try return false
cerr << "invalid query because the pre is not found: " << pre << endl;
exit(1);
}
}
int sub_id = -1;
int obj_id = -1;
@ -421,18 +490,70 @@ BasicQuery::encodeBasicQuery(KVstore* _p_kvstore, const vector<string>& _query_v
bool two_var_edge = (sub_is_var && obj_is_var);
if(two_var_edge)
{
if(pre_id != -1)
if(pre_id >= 0)
{
////cout << "pre2edge" << endl;
cout << "pre2edge" << endl;
Signature::encodePredicate2Edge(pre_id, this->edge_sig[sub_id][obj_id]);
// this->edge_pre_id[sub_id][obj_id] = pre_id;
}
}
}
////cout << "OUT encodeBasicQuery" << endl;
//set need_retrieve for vars in join whose total degree > 1
this->retrieve_var_num = 0;
for(int i = 0; i < this->graph_var_num; ++i)
{
if(this->var_degree[i] > 1)
{
this->need_retrieve[i] = true;
this->retrieve_var_num++;
}
}
cout << "OUT encodeBasicQuery" << endl;
}
int
BasicQuery::getPreVarID(const string& _name) const
{
//map<int, PreVar>::const_iterator it; //this is a const function
//for(it = this->pre_var.begin(); it != this->pre_var.end(); ++it)
//if(it->second.name == _name)
//return it->first;
for(unsigned i = 0; i < this->pre_var.size(); ++i)
if(this->pre_var[i].name == _name)
return i;
return -1;
//BETTER:use string2int map to speed up if using in too-many-loops
}
unsigned
BasicQuery::getPreVarNum() const
{
return this->pre_var.size();
}
const PreVar&
BasicQuery::getPreVarByID(unsigned _id) const
{
if(_id >= 0 && _id < this->pre_var.size())
return this->pre_var[_id];
else
return this->pre_var[0];
}
//int
//BasicQuery::getIDByPreVarName(const string& _name) const
//{
//return 0;
//}
int
BasicQuery::getIDByVarName(const string& _name)
{
return this->var_str2id[_name];
}
void
BasicQuery::addTriple(const Triple& _triple)
@ -455,6 +576,7 @@ BasicQuery::null_initial()
this->var_str2id.clear();
this->var_degree = NULL;
this->is_literal_candidate_added = NULL;
this->need_retrieve = NULL;
this->edge_id = NULL;
this->edge_nei_id = NULL;
this->edge_pre_id = NULL;
@ -486,13 +608,15 @@ BasicQuery::initial()
this->edge_type = new char*[BasicQuery::MAX_VAR_NUM];
this->is_literal_candidate_added = new bool[BasicQuery::MAX_VAR_NUM];
this->need_retrieve = new bool[BasicQuery::MAX_VAR_NUM];
for(int i = 0; i < BasicQuery::MAX_VAR_NUM; i ++)
for(int i = 0; i < BasicQuery::MAX_VAR_NUM; ++i)
{
this->var_degree[i] = 0;
this->var_sig[i].reset();
this->var_name[i] = "";
this->is_literal_candidate_added[i] = false;
this->need_retrieve[i] = false;
this->edge_sig[i] = new EdgeBitSet[BasicQuery::MAX_VAR_NUM];
this->edge_id[i] = new int[BasicQuery::MAX_VAR_NUM];
@ -500,7 +624,7 @@ BasicQuery::initial()
this->edge_pre_id[i] = new int[BasicQuery::MAX_VAR_NUM];
this->edge_type[i] = new char[BasicQuery::MAX_VAR_NUM];
for(int j = 0; j < BasicQuery::MAX_VAR_NUM; j ++)
for(int j = 0; j < BasicQuery::MAX_VAR_NUM; ++j)
{
this->edge_sig[i][j].reset();
this->edge_id[i][j] = -1;
@ -527,6 +651,7 @@ void BasicQuery::addInVarNotInSelect()
bool not_var_yet = (find_sub_itr == this->var_str2id.end());
if(not_var_yet)
{
this->total_var_num++;
int _freq = this->tuple2freq[sub];
// so the var str must occur more than once
if(_freq > 1)
@ -545,6 +670,7 @@ void BasicQuery::addInVarNotInSelect()
bool not_var_yet = (find_obj_itr == this->var_str2id.end());
if(not_var_yet)
{
this->total_var_num++;
int _freq = this->tuple2freq[obj];
// so the var str must occur more than once
if(_freq > 1)
@ -695,7 +821,7 @@ BasicQuery::getVarID_FirstProcessWhenJoin()
{
// when join variables' mapping candidate list, we should start with entity variable.
// since literal variables' candidate list may not include all literals.
if (this->isLiteralVariable(i))
if(this->isLiteralVariable(i) || this->isSatelliteInJoin(i))
{
continue;
}

View File

@ -16,11 +16,12 @@
#include "IDList.h"
//NOTICE: the query graph must be linked
//var_id == -1: constant(string), entity or literal, or vars not in join process
//var_id == -1: constant(string), entity or literal
//All constants should be dealed before joining tables!
//A var in query can be subject or object, and both. Once in subject,
//it cannot contain literal, while in object it may contain entity,
//literal, or both
//The vars not in join process are also encoded, so not -1
//
//a subject cannot be literal, but an object can be entity or literal
//not supported: ?v1 and this is a predicate
@ -34,13 +35,11 @@
//(i.e. no constant neighbor which will restrict this variable, otherwise, we can acquire
//this var's can_list by querying in kvstore according to the constant and pre)
//TODO:free var's neighbor id != -1, how about vars not in join?(degree == 1 ), donot
//need to add? or already added in literal_edge_filter, just as constants?
//it is ok for var in select to be free var, but this can not be used as start point.
//(we assume start point is all ok and then search deeply)
//TODO:fix the graph below!!!
//It is ok for var in select to be free var, but this can not be used as start point.
//(we assume candidates of the start point is all ok and then search deeply)
//However, we can always find a start point because not all vars are all in objects!
//(otherwise, no edge in query graph)
//
//What is more, some graphs will be regarded as not-connected, such as:
//A-c0-B, c0 is a constant, we should do a A x B here!
//two-part-matching, ABC and c1c2, each node connects with this two constants.
@ -78,6 +77,28 @@
//| +---object |
//+---------------------------------------------------------------------------+
class PreVar
{
public:
string name;
vector<int> triples;
bool selected;
PreVar()
{
this->selected = false;
}
PreVar(string _name)
{
this->name = _name;
this->selected = false;
}
PreVar(string _name, bool _flag)
{
this->name = _name;
this->selected = _flag;
}
};
class BasicQuery
{
private:
@ -92,16 +113,20 @@ private:
// id < select_var_num means in select
int select_var_num;
// var_num is different from that in SPARQLquery
// var_num is different from that in SPARQLquery
// because there are some variable not in select
int graph_var_num;
int total_var_num;
int retrieve_var_num;
string* var_name;
IDList* candidate_list;
vector<int*> result_list;
int* var_degree;
// whether has added the variable's literal candidate
//whether has added the variable's literal candidate
bool* is_literal_candidate_added;
//if need to be retrieved by vstree or generate when join(first is graph var)
bool* need_retrieve;
char encode_method;
@ -121,6 +146,7 @@ private:
EntityBitSet* var_sig;
// BETTER:edge sig is of little importance
// edge_sig[sub_id][obj_id]
EdgeBitSet** edge_sig;
@ -133,10 +159,12 @@ private:
void updateSubSig(int _sub_id, int _pre_id, int _obj_id, std::string _obj, int _line_id);
void updateObjSig(int _obj_id, int _pre_id, int _sub_id, std::string _sub, int _line_id);
//infos for predicate variables
vector<PreVar> pre_var;
public:
static const char EDGE_IN = 'i';
static const char EDGE_OUT= 'o';
static const int MAX_VAR_NUM = 10;
static const int MAX_PRE_VAR_NUM = 10;
static const char NOT_JUST_SELECT = 'a';
static const char SELECT_VAR = 's';
@ -151,12 +179,20 @@ public:
//get selected number of variadbles
int getSelectVarNum();
//get the total number of variables
int getTotalVarNum();
//get the retrieved number of variables
int getRetrievedVarNum();
// get the name of _var in the query graph
std::string getVarName(int _var);
// get triples number, also sentences number
int getTripleNum();
int getIDByVarName(const string& _name);
std::string to_str();
@ -166,7 +202,7 @@ public:
// get the ID of the i-th edge of _var
int getEdgeID(int _var, int _i_th_edge);
// get the ID of the i-th edge of _var
// get the ID of var, where the i-th edge of _var points to
int getEdgeNeighborID(int _var, int _i_th_edge);
// get the preID of the i-th edge of _var
@ -190,6 +226,7 @@ public:
// get the result list of _var in the query graph
vector<int*>& getResultList();
vector<int*>* getResultListPointer();
// get the entity signature of _var in the query graph
const EntityBitSet& getEntitySignature(int _var);
@ -200,7 +237,7 @@ public:
// check whether the i-th edge of _var is OUT edge
bool isOutEdge(int _var, int _i_th_edge)const;
bool isOneDegreeNotSelectVar(std::string& _not_select_var);
bool isOneDegreeNotJoinVar(std::string& _not_select_var);
// check whether _var may include some literal results
bool isLiteralVariable(int _var);
@ -213,9 +250,18 @@ public:
// set _var's literal candidates has been added
void setAddedLiteralCandidate(int _var);
//check if need to be retrieved
bool if_need_retrieve(int _var);
bool isSatelliteInJoin(int _var);
// encode relative signature data of the query graph
void encodeBasicQuery(KVstore* _p_kvstore, const std::vector<std::string>& _query_var);
unsigned getPreVarNum() const;
const PreVar& getPreVarByID(unsigned) const;
//int getIDByPreVarName(const std::string& _name) const;
int getPreVarID(const string& _name) const;
void addTriple(const Triple& _triple);
void print(ostream& _out_stream);

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
/*=============================================================================
# Filename: GeneralEvaluation.h
# Author: Jiaqi, Chen
# Mail: 1181955272@qq.com
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-03-02 20:33
# Description:
=============================================================================*/
@ -9,37 +9,45 @@
#ifndef _QUERY_GENERALEVALUATION_H
#define _QUERY_GENERALEVALUATION_H
//#include "../Database/Database.h"
#include "SPARQLquery.h"
#include "../VSTree/VSTree.h"
#include "../Database/Join.h"
#include "../Database/Strategy.h"
#include "../KVstore/KVstore.h"
#include "../Query/ResultSet.h"
#include "../Util/Util.h"
#include "../Parser/QueryParser.h"
#include "QueryTree.h"
#include "SPARQLquery.h"
#include "Varset.h"
#include "../Database/Database.h"
#include "../KVstore/KVstore.h"
#include "RegexExpression.h"
#include "ResultFilter.h"
class GeneralEvaluation
{
private:
QueryParser _query_parser;
QueryTree _query_tree;
SPARQLquery _sparql_query;
std::vector <Varset> _sparql_query_varset;
QueryParser query_parser;
QueryTree query_tree;
SPARQLquery sparql_query;
std::vector <Varset> sparql_query_varset;
VSTree *vstree;
KVstore *kvstore;
ResultSet &result_set;
ResultFilter result_filter;
bool handle(SPARQLquery&);
public:
explicit GeneralEvaluation(KVstore *_kvstore):kvstore(_kvstore){}
QueryParser& getQueryParser();
QueryTree& getQueryTree();
SPARQLquery& getSPARQLQuery();
std::vector< std::vector< std::string > > getSPARQLQueryVarset();
explicit GeneralEvaluation(VSTree *_vstree, KVstore *_kvstore, ResultSet &_result_set):
vstree(_vstree), kvstore(_kvstore), result_set(_result_set){}
bool parseQuery(const std::string &query);
std::vector<std::vector<std::string> > getSPARQLQueryVarset();
void getVarset(QueryTree::PatternGroup &patterngroup);
void getBasicQuery(QueryTree::PatternGroup &patterngroup);
void doQuery(const std::string &_query);
bool parseQuery(const std::string &_query);
class FilterExistsPatternGroupResultSetRecord;
void getBasicQuery(QueryTree::GroupPattern &grouppattern);
class FilterExistsGroupPatternResultSetRecord;
class TempResult
{
@ -60,10 +68,10 @@ public:
void doMinus(TempResult &x, TempResult &r);
void doDistinct(TempResult &r);
void mapFilterTree2Varset(QueryTree::FilterTree& filter, Varset &v);
void doFilter(QueryTree::FilterTree &filter, FilterExistsPatternGroupResultSetRecord &filter_exists_patterngroup_resultset_record, TempResult &r, KVstore *kvstore);
void getFilterString(int* x, QueryTree::FilterTree::FilterTreeChild &child, string &str, KVstore *kvstore);
bool matchFilterTree(int* x, QueryTree::FilterTree& filter, FilterExistsPatternGroupResultSetRecord &filter_exists_patterngroup_resultset_record, KVstore *kvstore);
void mapFilterTree2Varset(QueryTree::GroupPattern::FilterTreeNode& filter, Varset &v);
void doFilter(QueryTree::GroupPattern::FilterTreeNode &filter, FilterExistsGroupPatternResultSetRecord &filter_exists_grouppattern_resultset_record, TempResult &r, KVstore *kvstore);
void getFilterString(int* x, QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild &child, string &str, KVstore *kvstore);
bool matchFilterTree(int* x, QueryTree::GroupPattern::FilterTreeNode& filter, FilterExistsGroupPatternResultSetRecord &filter_exists_grouppattern_resultset_record, KVstore *kvstore);
void print();
};
@ -83,7 +91,7 @@ public:
void doMinus(TempResultSet &x, TempResultSet &r);
void doDistinct(Varset &projection, TempResultSet &r);
void doFilter(QueryTree::FilterTree& filter, FilterExistsPatternGroupResultSetRecord &filter_exists_patterngroup_resultset_record, TempResultSet &r, KVstore *kvstore);
void doFilter(QueryTree::GroupPattern::FilterTreeNode& filter, FilterExistsGroupPatternResultSetRecord &filter_exists_grouppattern_resultset_record, TempResultSet &r, KVstore *kvstore);
void print();
};
@ -101,24 +109,36 @@ public:
{ return p; }
};
std::vector<EvaluationUnit> evaluationPlan;
std::vector<EvaluationUnit> semantic_evaluation_plan;
void generateEvaluationPlan(QueryTree::PatternGroup &patterngroup);
void dfsJoinableResultGraph(int x, vector < pair<char, int> > &node_info, vector < vector<int> > &edge, QueryTree::PatternGroup &patterngroup);
void generateEvaluationPlan(QueryTree::GroupPattern &grouppattern);
void dfsJoinableResultGraph(int x, vector < pair<char, int> > &node_info, vector < vector<int> > &edge, QueryTree::GroupPattern &grouppattern);
std::stack<TempResultSet*> evaluationStack;
std::stack<TempResultSet*> semantic_evaluation_result_stack;
class FilterExistsPatternGroupResultSetRecord
class FilterExistsGroupPatternResultSetRecord
{
public:
std::vector<TempResultSet*> resultset;
std::vector< std::vector<Varset> > common;
std::vector< std::vector< std::pair< std::vector<int>, std::vector<int> > > > common2resultset;
} filter_exists_patterngroup_resultset_record;
} filter_exists_grouppattern_resultset_record;
int countFilterExistsPatternGroup(QueryTree::FilterTree& filter);
int countFilterExistsGroupPattern(QueryTree::GroupPattern::FilterTreeNode& filter);
void doEvaluationPlan();
class ExpansionEvaluationStackUnit
{
public:
QueryTree::GroupPattern grouppattern;
SPARQLquery sparql_query;
TempResultSet* result;
};
std::vector <ExpansionEvaluationStackUnit> expansion_evaluation_stack;
bool expanseFirstOuterUnionGroupPattern(QueryTree::GroupPattern &grouppattern, std::deque<QueryTree::GroupPattern> &queue);
void queryRewriteEncodeRetrieveJoin(int dep, ResultFilter &result_filter);
void getFinalResult(ResultSet& result_str);
};

View File

@ -1,9 +1,10 @@
/*
* IDList.cpp
*
* Created on: 2014-7-2
* Author: liyouhuan
*/
/*=============================================================================
# Filename: IDList.cpp
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2016-05-08 12:44
# Description: originally written by liyouhuan, modified by zengli
=============================================================================*/
#include "IDList.h"
@ -14,10 +15,9 @@ IDList::IDList()
this->id_list.clear();
}
/* return the _i-th id of the list
* if _i exceeds, return -1;
* */
int IDList::getID(int _i)const
//return the _i-th id of the list if _i exceeds, return -1
int
IDList::getID(int _i)const
{
if(this->size() > _i)
{
@ -26,19 +26,22 @@ int IDList::getID(int _i)const
return -1;
}
bool IDList::addID(int _id)
bool
IDList::addID(int _id)
{
/* a check for duplicate case will be more reliable */
//a check for duplicate case will be more reliable
this->id_list.push_back(_id);
return true;
}
int IDList::size()const
int
IDList::size()const
{
return this->id_list.size();
}
bool IDList::isExistID(int _id)const
bool
IDList::isExistID(int _id)const
{
// naive implementation of searching(linear search).
// you can use binary search when the id list is sorted, if necessary.
@ -53,13 +56,15 @@ bool IDList::isExistID(int _id)const
return false;
}
const std::vector<int>* IDList::getList()const
const vector<int>*
IDList::getList()const
{
return &(this->id_list);
}
int& IDList::operator[](const int& _i)
int&
IDList::operator[](const int& _i)
{
if(this->size() > _i)
{
@ -68,7 +73,8 @@ int& IDList::operator[](const int& _i)
return id_list[0];
}
std::string IDList::to_str()
string
IDList::to_str()
{
std::stringstream _ss;
_ss << "size=" << this->id_list.size() << "";
@ -79,58 +85,139 @@ std::string IDList::to_str()
return _ss.str();
}
int IDList::sort()
int
IDList::sort()
{
std::sort(id_list.begin(),id_list.end());
return 0;
}
void IDList::clear()
void
IDList::clear()
{
this->id_list.clear();
}
int IDList::intersectList(const int* _id_list, int _list_len)
void
IDList::copy(vector<int>& _new_idlist)
{
int id_i = 0;
int index_move_forward = 0;
std::vector<int>::iterator it = this->id_list.begin();
while(it != (this->id_list).end())
this->id_list = _new_idlist;
}
int
IDList::intersectList(const int* _id_list, int _list_len)
{
if(_id_list == NULL || _list_len == 0)
{
int can_id = *it;
while( (id_i < _list_len) && (_id_list[id_i] < can_id) )
{
id_i ++;
}
if(id_i == _list_len){
break;
}
if(can_id == _id_list[id_i])
{
(this->id_list)[index_move_forward] = can_id;
index_move_forward ++;
id_i ++;
}
it ++;
int remove_number = this->id_list.size();
this->id_list.clear();
return remove_number;
}
int remove_number = this->id_list.size() - index_move_forward;
std::vector<int>::iterator new_end =
this->id_list.begin() + index_move_forward;
//when size is almost the same, intersect O(n)
//when one size is small ratio, search in the larger one O(mlogn)
//
//n>0 m=nk(0<k<1)
//compare n(k+1) and nklogn: k0 = log(n/2)2 requiring that n>2
//k<=k0 binary search; k>k0 intersect
int method = -1; //0: intersect 1: search in vector 2: search in int*
int n = this->id_list.size();
double k = 0;
if(n < _list_len)
{
k = (double)n / (double)_list_len;
n = _list_len;
method = 2;
}
else
{
k = (double)_list_len / (double)n;
method = 1;
}
if(n <= 2)
method = 0;
else
{
double limit = Util::logarithm(n/2, 2);
if(k > limit)
method = 0;
}
(this->id_list).erase(new_end, this->id_list.end());
int remove_number = 0;
switch(method)
{
case 0:
{ //this bracket is needed if vars are defined in case
int id_i = 0;
int index_move_forward = 0;
vector<int>::iterator it = this->id_list.begin();
while(it != (this->id_list).end())
{
int can_id = *it;
while((id_i < _list_len) && (_id_list[id_i] < can_id))
{
id_i ++;
}
if(id_i == _list_len)
{
break;
}
if(can_id == _id_list[id_i])
{
(this->id_list)[index_move_forward] = can_id;
index_move_forward ++;
id_i ++;
}
it ++;
}
remove_number = this->id_list.size() - index_move_forward;
vector<int>::iterator new_end = this->id_list.begin() + index_move_forward;
(this->id_list).erase(new_end, this->id_list.end());
break;
}
case 1:
{
vector<int> new_id_list;
for(int i = 0; i < _list_len; ++i)
{
if(Util::bsearch_vec_uporder(_id_list[i], this->getList()) != -1)
new_id_list.push_back(_id_list[i]);
}
this->id_list = new_id_list;
remove_number = n - this->id_list.size();
break;
}
case 2:
{
vector<int> new_id_list;
int m = this->id_list.size(), i;
for(i = 0; i < m; ++i)
{
if(Util::bsearch_int_uporder(this->id_list[i], _id_list, _list_len) != -1)
new_id_list.push_back(this->id_list[i]);
}
this->id_list = new_id_list;
remove_number = m - this->id_list.size();
break;
}
default:
cerr << "no such method in IDList::intersectList()" << endl;
break;
}
return remove_number;
}
int IDList::intersectList(const IDList& _id_list)
int
IDList::intersectList(const IDList& _id_list)
{
// copy _id_list to the temp array first.
int temp_list_len = _id_list.size();
int* temp_list = new int[temp_list_len];
//BETTER:not to copy, just achieve here
for (int i = 0; i < temp_list_len; i ++)
{
temp_list[i] = _id_list.getID(i);
@ -138,12 +225,27 @@ int IDList::intersectList(const IDList& _id_list)
int remove_number = this->intersectList(temp_list, temp_list_len);
delete []temp_list;
return remove_number;
}
int IDList::unionList(const int* _id_list, int _list_len)
int
IDList::unionList(const int* _id_list, int _list_len, bool only_literal)
{
if(_id_list == NULL || _list_len == 0)
return 0;
if(only_literal)
{
//NOTICE:this means that the original is no literals and we need to add from a list(containing entities/literals)
int k = 0;
//NOTICE:literal id > entity id; the list is ordered
for(; k < _list_len; ++k)
if(Util::is_literal_ele(_id_list[k]))
break;
for(; k < _list_len; ++k)
this->addID(_id_list[k]);
return _list_len - k;
}
// O(n)
int origin_size = (this->id_list).size();
int* temp_list = new int[origin_size + _list_len];
@ -230,27 +332,129 @@ int IDList::unionList(const int* _id_list, int _list_len)
*/
}
int IDList::unionList(const IDList& _id_list)
int
IDList::unionList(const IDList& _id_list, bool only_literal)
{
// copy _id_list to the temp array first.
int temp_list_len = _id_list.size();
int* temp_list = new int[temp_list_len];
for (int i = 0; i < temp_list_len; i ++)
//BETTER:not to copy, just achieve here
for(int i = 0; i < temp_list_len; i ++)
{
temp_list[i] = _id_list.getID(i);
}
return this->unionList(temp_list, temp_list_len);
int ret = this->unionList(temp_list, temp_list_len, only_literal);
delete[] temp_list;
return ret;
}
int IDList::erase(int i)
IDList*
IDList::intersect(const IDList& _id_list, const int* _list, int _len)
{
id_list.erase(id_list.begin()+i,id_list.end());
IDList* p = new IDList;
if(_list == NULL || _len == 0) //just copy _id_list
{
int size = _id_list.size();
for(int i = 0; i < size; ++i)
p->addID(_id_list.getID(i));
return p;
}
//when size is almost the same, intersect O(n)
//when one size is small ratio, search in the larger one O(mlogn)
//
//n>0 m=nk(0<k<1)
//compare n(k+1) and nklogn: k0 = log(n/2)2 requiring that n>2
//k<=k0 binary search; k>k0 intersect
int method = -1; //0: intersect 1: search in vector 2: search in int*
int n = _id_list.size();
double k = 0;
if(n < _len)
{
k = (double)n / (double)_len;
n = _len;
method = 2;
}
else
{
k = (double)_len / (double)n;
method = 1;
}
if(n <= 2)
method = 0;
else
{
double limit = Util::logarithm(n/2, 2);
if(k > limit)
method = 0;
}
int remove_number = 0;
switch(method)
{
case 0:
{ //this bracket is needed if vars are defined in case
int id_i = 0;
int num = _id_list.size();
for(int i = 0; i < num; ++i)
{
int can_id = _id_list.getID(i);
while((id_i < _len) && (_list[id_i] < can_id))
{
id_i ++;
}
if(id_i == _len)
{
break;
}
if(can_id == _list[id_i])
{
p->addID(can_id);
id_i ++;
}
}
break;
}
case 1:
{
for(int i = 0; i < _len; ++i)
{
if(Util::bsearch_vec_uporder(_list[i], _id_list.getList()) != -1)
p->addID(_list[i]);
}
break;
}
case 2:
{
int m = _id_list.size(), i;
for(i = 0; i < m; ++i)
{
int t = _id_list.getID(i);
if(Util::bsearch_int_uporder(t, _list, _len) != -1)
p->addID(t);
}
break;
}
default:
cerr << "no such method in IDList::intersectList()" << endl;
break;
}
return p;
}
int
IDList::erase(int i)
{
id_list.erase(id_list.begin()+i, id_list.end());
return 0;
}
int IDList::bsearch_uporder(int _key)
int
IDList::bsearch_uporder(int _key)
{
return Util::bsearch_vec_uporder(_key, this->id_list);
return Util::bsearch_vec_uporder(_key, this->getList());
}

View File

@ -26,13 +26,15 @@ public:
std::string to_str();
int sort();
void clear();
void copy(std::vector<int>& _new_idlist);
// intersect/union _id_list to this IDList, note that the two list must be ordered before using these two functions.
int intersectList(const int* _id_list, int _list_len);
int intersectList(const IDList&);
int unionList(const int* _id_list, int _list_len);
int unionList(const IDList&);
int unionList(const int* _id_list, int _list_len, bool only_literal=false);
int unionList(const IDList&, bool only_literal=false);
int bsearch_uporder(int _key);
static IDList* intersect(const IDList&, const int*, int);
private:
std::vector<int> id_list;
int erase(int i);

View File

@ -1,7 +1,7 @@
/*=============================================================================
# Filename: GeneralEvaluation.cpp
# Filename: QueryTree.cpp
# Author: Jiaqi, Chen
# Mail: 1181955272@qq.com
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-03-02 20:35
# Description: implement functions in QueryTree.h
=============================================================================*/
@ -10,6 +10,350 @@
using namespace std;
void QueryTree::GroupPattern::FilterTreeNode::getVarset(Varset &varset)
{
for (int i = 0; i < (int)this->child.size(); i++)
{
if (this->child[i].type == 's' && this->child[i].arg[0] == '?')
varset.addVar(this->child[i].arg);
if (this->child[i].type == 't')
this->child[i].node.getVarset(varset);
}
}
void QueryTree::GroupPattern::FilterTreeNode::print(vector<GroupPattern> &exist_grouppatterns, int dep)
{
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Not_type) printf("!");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type) printf("regex");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_lang_type) printf("lang");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_langmatches_type) printf("langmatches");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_bound_type) printf("bound");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_in_type)
{
if (this->child[0].type == 's') printf("%s", this->child[0].arg.c_str());
printf(" in (");
for (int i = 1; i < (int)this->child.size(); i++)
{
if (i != 1) printf(" , ");
if (this->child[i].type == 's') printf("%s", this->child[i].arg.c_str());
}
printf(")");
return;
}
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_exists_type)
{
printf("exists");
exist_grouppatterns[this->exists_grouppattern_id].print(dep);
return;
}
printf("(");
if ((int)this->child.size() >= 1)
{
if (this->child[0].type == 's') printf("%s", this->child[0].arg.c_str());
if (this->child[0].type == 't') this->child[0].node.print(exist_grouppatterns, dep);
}
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Or_type) printf(" || ");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::And_type) printf(" && ");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Equal_type) printf(" = ");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::NotEqual_type) printf(" != ");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Less_type) printf(" < ");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::LessOrEqual_type) printf(" <= ");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Greater_type) printf(" > ");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::GreaterOrEqual_type) printf(" >= ");
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type || this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_langmatches_type) printf(", ");
if ((int)this->child.size() >= 2)
{
if (this->child[1].type == 's') printf("%s", this->child[1].arg.c_str());
if (this->child[1].type == 't') this->child[1].node.print(exist_grouppatterns, dep);
}
if ((int)this->child.size() >= 3)
{
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type && this->child[2].type == 's')
printf(", %s", this->child[2].arg.c_str());
}
printf(")");
}
//----------------------------------------------------------------------------------------------------------------------------------------------------
void QueryTree::GroupPattern::addOnePattern(Pattern _pattern)
{
this->patterns.push_back(_pattern);
}
void QueryTree::GroupPattern::addOneGroupUnion()
{
this->unions.push_back(GroupPatternUnions((int)this->patterns.size() - 1));
}
void QueryTree::GroupPattern::addOneUnion()
{
int n = (int)this->unions.size();
this->unions[n - 1].grouppattern_vec.push_back(GroupPattern());
}
QueryTree::GroupPattern& QueryTree::GroupPattern::getLastUnion()
{
int n = (int)this->unions.size();
int m = (int)this->unions[n - 1].grouppattern_vec.size();
return this->unions[n - 1].grouppattern_vec[m - 1];
}
void QueryTree::GroupPattern::addOneOptionalOrMinus(char _type)
{
this->optionals.push_back(OptionalOrMinusGroupPattern((int)this->patterns.size() - 1, (int)this->unions.size() - 1, _type));
}
QueryTree::GroupPattern& QueryTree::GroupPattern::getLastOptionalOrMinus()
{
int n = (int)this->optionals.size();
return this->optionals[n - 1].grouppattern;
}
void QueryTree::GroupPattern::addOneFilterTree()
{
this->filters.push_back(FilterTreeRoot());
this->filter_exists_grouppatterns.push_back(vector<GroupPattern>());
}
QueryTree::GroupPattern::FilterTreeNode& QueryTree::GroupPattern::getLastFilterTree()
{
return this->filters[(int)(this->filters.size()) - 1].root;
}
void QueryTree::GroupPattern::addOneExistsGroupPattern()
{
int n = (int)this->filter_exists_grouppatterns.size();
this->filter_exists_grouppatterns[n - 1].push_back(GroupPattern());
}
QueryTree::GroupPattern& QueryTree::GroupPattern::getLastExistsGroupPattern()
{
int n = (int)this->filter_exists_grouppatterns.size();
int m = (int)this->filter_exists_grouppatterns[n - 1].size();
return this->filter_exists_grouppatterns[n - 1][m - 1];
}
void QueryTree::GroupPattern::getVarset()
{
for (int i = 0; i < (int)this->patterns.size(); i++)
{
if (this->patterns[i].subject.value[0] == '?')
this->patterns[i].varset.addVar(this->patterns[i].subject.value);
if (this->patterns[i].object.value[0] == '?')
this->patterns[i].varset.addVar(this->patterns[i].object.value);
this->grouppattern_resultset_minimal_varset = this->grouppattern_resultset_minimal_varset + this->patterns[i].varset;
this->grouppattern_resultset_maximal_varset = this->grouppattern_resultset_maximal_varset + this->patterns[i].varset;
}
for (int i = 0; i < (int)this->unions.size(); i++)
{
Varset minimal_varset;
for (int j = 0; j < (int)this->unions[i].grouppattern_vec.size(); j++)
{
this->unions[i].grouppattern_vec[j].getVarset();
if (j == 0) minimal_varset = minimal_varset + this->unions[i].grouppattern_vec[j].grouppattern_resultset_minimal_varset;
else minimal_varset = minimal_varset * this->unions[i].grouppattern_vec[j].grouppattern_resultset_minimal_varset;
this->grouppattern_resultset_maximal_varset = this->grouppattern_resultset_maximal_varset + this->unions[i].grouppattern_vec[j].grouppattern_resultset_maximal_varset;
}
this->grouppattern_resultset_minimal_varset = this->grouppattern_resultset_minimal_varset + minimal_varset;
}
for (int i = 0; i < (int)this->optionals.size(); i++)
{
this->optionals[i].grouppattern.getVarset();
if (this->optionals[i].type == 'o')
this->grouppattern_resultset_maximal_varset = this->grouppattern_resultset_maximal_varset + this->optionals[i].grouppattern.grouppattern_resultset_maximal_varset;
}
for (int i = 0; i < (int)this->filters.size(); i++)
{
this->filters[i].root.getVarset(this->filters[i].varset);
}
for(int i = 0; i < (int)this->filter_exists_grouppatterns.size(); i++)
for (int j = 0; j < (int)this->filter_exists_grouppatterns[i].size(); j++)
{
this->filter_exists_grouppatterns[i][j].getVarset();
}
}
bool QueryTree::GroupPattern::checkOnlyUnionOptionalFilterNoExists()
{
for (int i = 0; i < (int)this->unions.size(); i++)
{
for (int j = 0; j < (int)this->unions[i].grouppattern_vec.size(); j++)
if (!this->unions[i].grouppattern_vec[j].checkOnlyUnionOptionalFilterNoExists())
return false;
}
for (int i = 0; i < (int)this->optionals.size(); i++)
{
if (this->optionals[i].type != 'o')
return false;
if (!this->optionals[i].grouppattern.checkOnlyUnionOptionalFilterNoExists())
return false;
}
for (int i = 0; i < (int)this->filter_exists_grouppatterns.size(); i++)
if ((int)this->filter_exists_grouppatterns[i].size() != 0)
return false;
return true;
}
pair<Varset, Varset> QueryTree::GroupPattern::checkOptionalGroupPatternVarsAndSafeFilter(Varset occur , Varset ban, bool &check_condition)
//return occur varset and ban varset
{
if (!check_condition) return make_pair(Varset(), Varset());
Varset this_ban;
int lastpattern = -1, lastunions = -1, lastoptional = -1;
while (check_condition && (lastpattern + 1 < (int)this->patterns.size() || lastunions + 1 < (int)this->unions.size() || lastoptional + 1 < (int)this->optionals.size()))
{
if (lastoptional + 1 < (int)this->optionals.size() && this->optionals[lastoptional + 1].lastpattern == lastpattern && this->optionals[lastoptional + 1].lastunions == lastunions)
//optional
{
pair<Varset, Varset> sub_grouppattern_return_varset = this->optionals[lastoptional + 1].grouppattern.checkOptionalGroupPatternVarsAndSafeFilter(Varset(), ban, check_condition);
if (occur.hasCommonVar(sub_grouppattern_return_varset.second))
check_condition = false;
Varset out = this->optionals[lastoptional + 1].grouppattern.grouppattern_resultset_maximal_varset - occur;
occur = occur + sub_grouppattern_return_varset.first;
this_ban = this_ban + sub_grouppattern_return_varset.second;
this_ban = this_ban + out;
ban = ban + this_ban;
lastoptional++;
}
else if (lastunions + 1 < (int)this->unions.size() && this->unions[lastunions + 1].lastpattern == lastpattern)
//union
{
Varset sub_grouppattern_occur, sub_grouppattern_ban;
for (int i = 0; i < (int)this->unions[lastunions + 1].grouppattern_vec.size(); i++)
{
pair<Varset, Varset> sub_grouppattern_result = this->unions[lastunions + 1].grouppattern_vec[i].checkOptionalGroupPatternVarsAndSafeFilter(occur, ban, check_condition);
if (i == 0)
sub_grouppattern_occur = sub_grouppattern_occur + sub_grouppattern_result.first;
else
sub_grouppattern_occur = sub_grouppattern_occur * sub_grouppattern_result.first;
sub_grouppattern_ban = sub_grouppattern_ban + sub_grouppattern_result.second;
}
occur = occur + sub_grouppattern_occur;
this_ban = this_ban + sub_grouppattern_ban;
ban = ban + this_ban;
lastunions++;
}
else
//triple pattern
{
if (this->patterns[lastpattern + 1].varset.hasCommonVar(ban))
check_condition = false;
occur = occur + this->patterns[lastpattern + 1].varset;
lastpattern++;
}
}
//filter
for (int i = 0; i < (int)this->filters.size(); i++)
if (!this->filters[i].varset.belongTo(occur))
{
check_condition = false;
break;
}
return make_pair(occur, this_ban);
}
void QueryTree::GroupPattern::initPatternBlockid()
{
for (int i = 0; i < (int)this->patterns.size(); i++)
this->pattern_blockid.push_back(i);
}
int QueryTree::GroupPattern::getRootPatternBlockid(int x)
{
if (this->pattern_blockid[x] == x) return x;
this->pattern_blockid[x] = getRootPatternBlockid(this->pattern_blockid[x]);
return this->pattern_blockid[x];
}
void QueryTree::GroupPattern::mergePatternBlockid(int x, int y)
{
int px = getRootPatternBlockid(x);
int py = getRootPatternBlockid(y);
this->pattern_blockid[px] = py;
}
void QueryTree::GroupPattern::print(int dep)
{
for (int t = 0; t < dep; t++) printf("\t"); printf("{\n");
int lastpattern = -1, lastunions = -1, lastoptional = -1;
while (lastpattern + 1 < (int)this->patterns.size() || lastunions + 1 < (int)this->unions.size() || lastoptional + 1 < (int)this->optionals.size())
{
if (lastoptional + 1 < (int)this->optionals.size() && this->optionals[lastoptional + 1].lastpattern == lastpattern && this->optionals[lastoptional + 1].lastunions == lastunions)
//optional
{
for (int t = 0; t <= dep; t++) printf("\t");
if (this->optionals[lastoptional + 1].type == 'o') printf("OPTIONAL\n");
if (this->optionals[lastoptional + 1].type == 'm') printf("MINUS\n");
this->optionals[lastoptional + 1].grouppattern.print(dep + 1);
lastoptional++;
}
else if (lastunions + 1 < (int)this->unions.size() && this->unions[lastunions + 1].lastpattern == lastpattern)
//union
{
for (int i = 0; i < (int)this->unions[lastunions + 1].grouppattern_vec.size(); i++)
{
if (i != 0)
{
for (int t = 0; t <= dep; t++) printf("\t"); printf("UNION\n");
}
this->unions[lastunions + 1].grouppattern_vec[i].print(dep + 1);
}
lastunions++;
}
else
//triple pattern
{
for (int t = 0; t <= dep; t++) printf("\t");
printf("%s\t%s\t%s.\n", this->patterns[lastpattern + 1].subject.value.c_str(), this->patterns[lastpattern + 1].predicate.value.c_str(), this->patterns[lastpattern + 1].object.value.c_str());
lastpattern++;
}
}
//filter
for (int i = 0; i < (int)this->filters.size(); i++)
{
for (int t = 0; t <= dep; t++) printf("\t"); printf("FILTER\t");
this->filters[i].root.print(this->filter_exists_grouppatterns[i], dep + 1);
printf("\n");
}
for (int t = 0; t < dep; t++) printf("\t"); printf("}\n");
}
//----------------------------------------------------------------------------------------------------------------------------------------------------
void QueryTree::setQueryForm(QueryForm _queryform)
{
this->query_form = _queryform;
@ -30,7 +374,7 @@ QueryTree::ProjectionModifier QueryTree::getProjectionModifier()
return this->projection_modifier;
}
void QueryTree::addProjectionVar(std::string _projection)
void QueryTree::addProjectionVar(string _projection)
{
this->projection.addVar(_projection);
}
@ -55,12 +399,12 @@ bool QueryTree::checkProjectionAsterisk()
return this->projection_asterisk;
}
void QueryTree::addOrder(std::string &_var, bool _descending)
void QueryTree::addOrder(string &_var, bool _descending)
{
this->order.push_back(Order(_var, _descending));
}
std::vector<QueryTree::Order>& QueryTree::getOrder()
vector<QueryTree::Order>& QueryTree::getOrder()
{
return this->order;
}
@ -85,86 +429,61 @@ int QueryTree::getLimit()
return this->limit;
}
QueryTree::PatternGroup& QueryTree::getPatternGroup()
QueryTree::GroupPattern& QueryTree::getGroupPattern()
{
return this->patterngroup;
return this->grouppattern;
}
void QueryTree::PatternGroup::addOnePattern(Pattern _pattern)
bool QueryTree::checkWellDesigned()
{
this->patterns.push_back(_pattern);
if (!this->getGroupPattern().checkOnlyUnionOptionalFilterNoExists())
return false;
bool check_condition = true;
this->getGroupPattern().checkOptionalGroupPatternVarsAndSafeFilter(Varset(), Varset(), check_condition);
return check_condition;
}
void QueryTree::PatternGroup::addOneGroupUnion()
void QueryTree::print()
{
this->unions.push_back(vector<PatternGroup>());
}
for (int j = 0; j < 80; j++) printf("="); printf("\n");
void QueryTree::PatternGroup::addOneUnion()
{
int n = (int)this->unions.size();
this->unions[n - 1].push_back(PatternGroup());
}
if (this->getQueryForm() == QueryTree::Select_Query)
{
printf("select");
if (this->getProjectionModifier() == QueryTree::Modifier_Distinct)
printf(" distinct");
printf("\n");
QueryTree::PatternGroup& QueryTree::PatternGroup::getLastUnion()
{
int n = (int)this->unions.size();
int m = (int)this->unions[n - 1].size();
return this->unions[n - 1][m - 1];
}
printf("var is : \t");
vector <string> &varvec = this->getProjection().varset;
for (int i = 0; i < (int)varvec.size(); i++)
printf("%s\t", varvec[i].c_str());
if (this->checkProjectionAsterisk())
printf("*");
printf("\n");
}
else printf("ask\n");
void QueryTree::PatternGroup::addOneOptionalOrMinus(char _type)
{
this->optionals.push_back(OptionalOrMinusPatternGroup((int)this->patterns.size() - 1, (int)this->unions.size() - 1, _type));
}
this->getGroupPattern().print(0);
QueryTree::PatternGroup& QueryTree::PatternGroup::getLastOptionalOrMinus()
{
int n = (int)this->optionals.size();
return this->optionals[n - 1].patterngroup;
}
if ((int)this->getOrder().size() > 0)
{
printf("order by : \t");
void QueryTree::PatternGroup::addOneFilterTree()
{
this->filters.push_back(FilterTree());
this->filter_exists_patterngroups.push_back(vector<PatternGroup>());
}
vector<QueryTree::Order>&order = this->getOrder();
for (int i = 0; i < (int)order.size(); i++)
{
if (!order[i].descending) printf("ASC(");
else printf("DESC(");
printf("%s) ", order[i].var.c_str());
}
printf("\n");
}
if (this->getOffset() != 0)
printf("offset : %d\n", this->getOffset());
if (this->getLimit() != -1)
printf("limit : %d\n", this->getLimit());
QueryTree::FilterTree& QueryTree::PatternGroup::getLastFilterTree()
{
return this->filters[(int)(this->filters.size()) - 1];
for (int j = 0; j < 80; j++) printf("="); printf("\n");
}
void QueryTree::PatternGroup::addOneExistsGroupPattern()
{
int n = (int)this->filter_exists_patterngroups.size();
this->filter_exists_patterngroups[n - 1].push_back(PatternGroup());
}
QueryTree::PatternGroup& QueryTree::PatternGroup::getLastExistsGroupPattern()
{
int n = (int)this->filter_exists_patterngroups.size();
int m = (int)this->filter_exists_patterngroups[n - 1].size();
return this->filter_exists_patterngroups[n - 1][m - 1];
}
void QueryTree::PatternGroup::initPatternBlockid()
{
for (int i = 0; i < (int)this->patterns.size(); i++)
this->pattern_blockid.push_back(i);
}
int QueryTree::PatternGroup::getRootPatternBlockid(int x)
{
if (this->pattern_blockid[x] == x) return x;
this->pattern_blockid[x] = getRootPatternBlockid(this->pattern_blockid[x]);
return this->pattern_blockid[x];
}
void QueryTree::PatternGroup::mergePatternBlockid(int x, int y)
{
int px = getRootPatternBlockid(x);
int py = getRootPatternBlockid(y);
this->pattern_blockid[px] = py;
}

View File

@ -1,7 +1,7 @@
/*=============================================================================
# Filename: GeneralEvaluation.cpp
# Filename: QueryTree.h
# Author: Jiaqi, Chen
# Mail: 1181955272@qq.com
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-03-02 20:35
# Description:
=============================================================================*/
@ -15,85 +15,51 @@
class QueryTree
{
public:
QueryTree():query_form(Select_Query), projection_modifier(Modifier_None), projection_asterisk(false), offset(0), limit(-1){}
QueryTree():
query_form(Select_Query), projection_modifier(Modifier_None), projection_asterisk(false), offset(0), limit(-1){}
enum QueryForm {Select_Query, Ask_Query};
enum ProjectionModifier {Modifier_None, Modifier_Distinct, Modifier_Reduced, Modifier_Count, Modifier_Duplicates};
enum ProjectionModifier { Modifier_None, Modifier_Distinct, Modifier_Reduced, Modifier_Count, Modifier_Duplicates };
class Element
class GroupPattern
{
public:
/*
enum Type { Variable, Literal, IRI };
enum SubType { None, CustomLanguage, CustomType };
Type type;
SubType subType;
std::string subTypeValue;
*/
std::string value;
Element(const std::string& _value):value(_value){}
};
class Pattern
{
public:
Element subject, predicate, object;
Pattern(const Element _subject, const Element _predicate,const Element _object):subject(_subject), predicate(_predicate), object(_object){}
};
class FilterTree
{
public:
enum FilterTree_Type
{
None_type, Or_type, And_type, Equal_type, NotEqual_type, Less_type, LessOrEqual_type, Greater_type, GreaterOrEqual_type,
Plus_type, Minus_type, Mul_type, Div_type, Not_type, UnaryPlus_type, UnaryMinus_type, Literal_type, Variable_type, IRI_type,
Function_type, ArgumentList_type,Builtin_str_type, Builtin_lang_type, Builtin_langmatches_type, Builtin_datatype_type, Builtin_bound_type,
Builtin_sameterm_type,Builtin_isiri_type, Builtin_isblank_type, Builtin_isliteral_type, Builtin_regex_type, Builtin_in_type, Builtin_exists_type
};
FilterTree_Type type;
class FilterTreeChild
class Pattern
{
public:
FilterTreeChild():type(' '), ptr(NULL), arg(""), pos(-1){}
FilterTreeChild(const FilterTreeChild &ftc):type(ftc.type), ptr(NULL), arg(ftc.arg), pos(ftc.pos)
class Element
{
if (ftc.ptr != NULL)
{
ptr = new FilterTree();
*ptr = *ftc.ptr;
}
}
~FilterTreeChild(){ if (ptr != NULL) delete ptr; }
char type;
FilterTree *ptr;
std::string arg;
int pos;
public:
/*
enum Type { Variable, Literal, IRI };
enum SubType { None, CustomLanguage, CustomType };
Type type;
SubType subType;
std::string subTypeValue;
*/
std::string value;
Element(const std::string& _value):
value(_value){}
};
Element subject, predicate, object;
Varset varset;
Pattern(const Element _subject, const Element _predicate,const Element _object):subject(_subject), predicate(_predicate), object(_object){}
};
std::vector<FilterTreeChild> child;
int exists_patterngroup_id;
FilterTree():type(None_type), exists_patterngroup_id(-1){};
FilterTree(const FilterTree &ft):type(ft.type), child(ft.child), exists_patterngroup_id(ft.exists_patterngroup_id) {}
};
class PatternGroup
{
public:
class OptionalOrMinusPatternGroup;
class GroupPatternUnions;
class OptionalOrMinusGroupPattern;
class FilterTreeNode;
class FilterTreeRoot;
std::vector<Pattern> patterns;
std::vector<std::vector<PatternGroup> > unions;
std::vector<OptionalOrMinusPatternGroup> optionals;
std::vector<GroupPatternUnions> unions;
std::vector<OptionalOrMinusGroupPattern> optionals;
std::vector<FilterTree> filters;
std::vector< std::vector<PatternGroup> > filter_exists_patterngroups;
std::vector<FilterTreeRoot> filters;
std::vector<std::vector<GroupPattern> > filter_exists_grouppatterns;
std::vector<Varset> pattern_varset;
Varset patterngroup_varset;
Varset grouppattern_resultset_minimal_varset, grouppattern_resultset_maximal_varset;
std::vector<int> pattern_blockid;
@ -101,29 +67,89 @@ class QueryTree
void addOneGroupUnion();
void addOneUnion();
PatternGroup& getLastUnion();
GroupPattern& getLastUnion();
void addOneOptionalOrMinus(char _type);
PatternGroup& getLastOptionalOrMinus();
GroupPattern& getLastOptionalOrMinus();
void addOneFilterTree();
FilterTree& getLastFilterTree();
FilterTreeNode& getLastFilterTree();
void addOneExistsGroupPattern();
PatternGroup& getLastExistsGroupPattern();
GroupPattern& getLastExistsGroupPattern();
void getVarset();
bool checkOnlyUnionOptionalFilterNoExists();
std::pair<Varset, Varset> checkOptionalGroupPatternVarsAndSafeFilter(Varset occur , Varset ban, bool &check_condition);
void initPatternBlockid();
int getRootPatternBlockid(int x);
void mergePatternBlockid(int x, int y);
void print(int dep);
};
class PatternGroup::OptionalOrMinusPatternGroup
class GroupPattern::GroupPatternUnions
{
public:
PatternGroup patterngroup;
std::vector<GroupPattern> grouppattern_vec;
int lastpattern;
GroupPatternUnions(int _lastpattern):
lastpattern(_lastpattern){}
};
class GroupPattern::OptionalOrMinusGroupPattern
{
public:
GroupPattern grouppattern;
int lastpattern, lastunions;
char type;
OptionalOrMinusPatternGroup(int _lastpattern, int _lastunions, char _type):
patterngroup(PatternGroup()), lastpattern(_lastpattern), lastunions(_lastunions), type(_type){}
OptionalOrMinusGroupPattern(int _lastpattern, int _lastunions, char _type):
grouppattern(GroupPattern()), lastpattern(_lastpattern), lastunions(_lastunions), type(_type){}
};
class GroupPattern::FilterTreeNode
{
public:
enum FilterType
{
None_type, Or_type, And_type, Equal_type, NotEqual_type, Less_type, LessOrEqual_type, Greater_type, GreaterOrEqual_type,
Plus_type, Minus_type, Mul_type, Div_type, Not_type, UnaryPlus_type, UnaryMinus_type, Literal_type, Variable_type, IRI_type,
Function_type, ArgumentList_type,Builtin_str_type, Builtin_lang_type, Builtin_langmatches_type, Builtin_datatype_type, Builtin_bound_type,
Builtin_sameterm_type,Builtin_isiri_type, Builtin_isblank_type, Builtin_isliteral_type, Builtin_regex_type, Builtin_in_type, Builtin_exists_type
};
FilterType type;
class FilterTreeChild;
std::vector<FilterTreeChild> child;
int exists_grouppattern_id;
FilterTreeNode():
type(None_type), exists_grouppattern_id(-1){}
void getVarset(Varset &varset);
void print(std::vector<GroupPattern> &exist_grouppatterns, int dep);
};
class GroupPattern::FilterTreeNode::FilterTreeChild
{
public:
FilterTreeChild():
type(' '), pos(-1){}
char type;
FilterTreeNode node;
std::string arg;
int pos;
};
class GroupPattern::FilterTreeRoot
{
public:
FilterTreeNode root;
Varset varset;
};
class Order
@ -131,7 +157,8 @@ class QueryTree
public:
std::string var;
bool descending;
Order(std::string &_var, bool _descending):var(_var), descending(_descending){}
Order(std::string &_var, bool _descending):
var(_var), descending(_descending){}
};
@ -143,7 +170,7 @@ class QueryTree
std::vector<Order> order;
int offset, limit;
PatternGroup patterngroup;
GroupPattern grouppattern;
public:
void setQueryForm(QueryForm _queryform);
@ -162,7 +189,11 @@ class QueryTree
void setLimit(int _limit);
int getLimit();
PatternGroup& getPatternGroup();
GroupPattern& getGroupPattern();
bool checkWellDesigned();
void print();
};
#endif // _QUERY_QUERYTREE_H

View File

@ -1,7 +1,7 @@
/*=============================================================================
# Filename: RegexExpression.cpp
# Author: Jiaqi, Chen
# Mail: 1181955272@qq.com
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-03-02 20:40
# Description:
=============================================================================*/

81
Query/ResultFilter.cpp Normal file
View File

@ -0,0 +1,81 @@
/*=============================================================================
# Filename: ResultFilter.cpp
# Author: Jiaqi, Chen
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-05-03 15:36
# Description: implement functions in ResultFilter.h
=============================================================================*/
#include "ResultFilter.h"
using namespace std;
void ResultFilter::addVar(string var)
{
if (this->hash_table.count(var) == 0)
this->hash_table.insert(pair<string, vector<int> >(var, vector<int>(this->MAX_SIZE, 0)));
}
vector<int>* ResultFilter::findVar(string var)
{
if (this->hash_table.count(var) == 0)
return NULL;
return &this->hash_table[var];
}
void ResultFilter::change(SPARQLquery& query, int value)
{
for (int i = 0; i < query.getBasicQueryNum(); i++)
{
BasicQuery& basicquery = query.getBasicQuery(i);
vector<int*>& basicquery_result =basicquery.getResultList();
int result_num = basicquery_result.size();
int var_num = basicquery.getVarNum();
for (int j = 0; j < var_num; j++)
this->addVar(basicquery.getVarName(j));
vector<vector<int>*> refer;
for (int j = 0; j < var_num; j++)
refer.push_back(this->findVar(basicquery.getVarName(j)));
for (int j = 0; j < result_num; j++)
for (int k = 0; k < var_num; k++)
{
(*refer[k])[this->hash(basicquery_result[j][k])] += value;
}
}
}
void ResultFilter::candFilter(SPARQLquery& query)
{
for (int i = 0; i < query.getBasicQueryNum(); i++)
{
BasicQuery& basicquery = query.getBasicQuery(i);
for (int j = 0; j < basicquery.getVarNum(); j++)
{
vector<int>* col = this->findVar(basicquery.getVarName(j));
if (col != NULL)
{
IDList& idlist = basicquery.getCandidateList(j);
IDList new_idlist;
printf("candFilter on %s\n", basicquery.getVarName(j).c_str());
printf("before candFilter, size = %d\n", idlist.size());
for (int k = 0; k < idlist.size(); k++)
{
int id = idlist.getID(k);
if ((*col)[hash(id)] > 0)
{
new_idlist.addID(id);
}
}
idlist = new_idlist;
printf("after candFilter, size = %d\n", idlist.size());
}
}
}
}

35
Query/ResultFilter.h Normal file
View File

@ -0,0 +1,35 @@
/*=============================================================================
# Filename: ResultFilter.h
# Author: Jiaqi, Chen
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-05-03 15:36
# Description:
=============================================================================*/
#ifndef _QUERY_RESULTFILTER_H
#define _QUERY_RESULTFILTER_H
#include "SPARQLquery.h"
#include "../Util/Util.h"
class ResultFilter
{
private:
static const int MAX_SIZE = 1048576;
inline int hash(int x)
{
return ((x & (MAX_SIZE - 1)) * 17) & (MAX_SIZE - 1);
}
std::map<std::string, std::vector<int> > hash_table;
public:
void addVar(std::string var);
std::vector<int>* findVar(std::string var);
void change(SPARQLquery& query, int value);
void candFilter(SPARQLquery& query);
};
#endif // _QUERY_RESULTFILTER_H

View File

@ -1,9 +1,9 @@
/*=============================================================================
# Filename: GeneralEvaluation.cpp
# Filename: Varset.cpp
# Author: Jiaqi, Chen
# Mail: 1181955272@qq.com
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-03-02 20:35
# Description: implement functions in varset.h
# Description: implement functions in Varset.h
=============================================================================*/
#include "Varset.h"
@ -70,6 +70,20 @@ bool Varset::operator ==(Varset &x)
return true;
}
bool Varset::hasCommonVar(Varset &x)
{
for (int i = 0; i < (int)this->varset.size(); i++)
if (x.findVar(this->varset[i])) return true;
return false;
}
bool Varset::belongTo(Varset &x)
{
for (int i = 0; i < (int)this->varset.size(); i++)
if (!x.findVar(this->varset[i])) return false;
return true;
}
vector <int> Varset::mapTo(Varset& x)
{
vector<int> r;

View File

@ -1,7 +1,7 @@
/*=============================================================================
# Filename: GeneralEvaluation.cpp
# Filename: Varset.h
# Author: Jiaqi, Chen
# Mail: 1181955272@qq.com
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-03-02 20:35
# Description:
=============================================================================*/
@ -27,6 +27,8 @@ class Varset
Varset operator * (Varset& x);
Varset operator - (Varset& x);
bool operator ==(Varset &x);
bool hasCommonVar(Varset &x);
bool belongTo(Varset &x);
std::vector <int> mapTo(Varset& x);

View File

@ -11,7 +11,9 @@
#include "../Util/Util.h"
#include"../Util/Bstr.h"
enum CommandType {CMD_CONNECT, CMD_EXIT, CMD_LOAD, CMD_UNLOAD, CMD_CREATE_DB, CMD_DELETE_DB,
//NOTICE:CMD_DROP is used to remove the database, and CMD_CREATE is not useful because
//we always need to import a dataset to create a gstore db
enum CommandType {CMD_CONNECT, CMD_EXIT, CMD_LOAD, CMD_UNLOAD, CMD_CREATE, CMD_DROP,
CMD_IMPORT, CMD_QUERY, CMD_SHOW, CMD_INSERT, CMD_OTHER}; // extend the operation command type here.
class Operation

View File

@ -8,6 +8,8 @@
#include "Server.h"
using namespace std;
Server::Server()
{
this->connectionPort = Socket::DEFAULT_CONNECT_PORT; // default communication port is 3305.
@ -132,6 +134,12 @@ Server::listen()
this->importRDF(db_name, "", rdf_path, ret_msg);
break;
}
case CMD_DROP:
{
string db_name = operation.getParameter(0);
this->dropDatabase(db_name, "", ret_msg);
break;
}
case CMD_QUERY:
{
string query = operation.getParameter(0);
@ -141,9 +149,9 @@ Server::listen()
case CMD_SHOW:
{
string para = operation.getParameter(0);
if (para == "databases")
if (para == "databases" || para == "all")
{
this->showDatabases("", ret_msg);
this->showDatabases(para, "", ret_msg);
}
else
{
@ -294,10 +302,20 @@ Server::createDatabase(std::string _db_name, std::string _ac_name, std::string&
}
bool
Server::deleteDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg)
Server::dropDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg)
{
// to be implemented...
return false;
//TODO
if (this->database == NULL || this->database->getName() != _db_name)
{
_ret_msg = "database:" + _db_name + " is not loaded.";
return false;
}
delete this->database;
this->database = NULL;
_ret_msg = "unload database done.";
return true;
}
bool
@ -314,6 +332,8 @@ Server::loadDatabase(std::string _db_name, std::string _ac_name, std::string& _r
else
{
_ret_msg = "load database failed.";
delete this->database;
this->database = NULL;
}
return flag;
@ -397,7 +417,7 @@ Server::query(const std::string _query, std::string& _ret_msg)
if(flag)
{
//_ret_msg = "results are too large!";
//TODO: divide and transfer if too large to be placed in memory, using Stream
//BETTER: divide and transfer if too large to be placed in memory, using Stream
_ret_msg = res_set.to_str();
}
else
@ -409,9 +429,14 @@ Server::query(const std::string _query, std::string& _ret_msg)
}
bool
Server::showDatabases(std::string _ac_name, std::string& _ret_msg)
Server::showDatabases(string _para, string _ac_name, string& _ret_msg)
{
if (this->database != NULL)
if(_para == "all")
{
_ret_msg = Util::getItemsFromDir(Util::db_home);
return true;
}
if(this->database != NULL)
{
_ret_msg = "\n" + this->database->getName() + "\n";
}

View File

@ -43,10 +43,10 @@ public:
bool response(Socket _socket, std::string& _msg);
bool parser(std::string _raw_cmd, Operation& _ret_oprt);
bool createDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg);
bool deleteDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg);
bool dropDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg);
bool loadDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg);
bool unloadDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg);
bool showDatabases(std::string _ac_name, std::string& _ret_msg);
bool showDatabases(std::string _para, std::string _ac_name, std::string& _ret_msg);
bool importRDF(std::string _db_name, std::string _ac_name, std::string _rdf_path, std::string& _ret_msg);
bool insertTriple(std::string _db_name, std::string _ac_name, std::string _rdf_path, std::string& _ret_msg);
bool query(const std::string _query, std::string& _ret_msg);

View File

@ -1,14 +1,15 @@
/*
* SIGEntry.cpp
*
* Created on: 2014-6-20
* Author: liyouhuan
* Modified on: 2014-6-29
* Author: hanshuo
*/
/*=============================================================================
# Filename: SigEntry.cpp
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2016-04-11 13:49
# Description:
=============================================================================*/
#include "SigEntry.h"
using namespace std;
SigEntry::SigEntry()
{
(this->sig).entityBitSet.reset();
@ -33,22 +34,26 @@ SigEntry::SigEntry(const EntitySig& _sig, int _entity_id)
this->entity_id = _entity_id;
}
const EntitySig& SigEntry::getEntitySig()const
const EntitySig&
SigEntry::getEntitySig() const
{
return this->sig;
}
int SigEntry::getEntityId()const
int
SigEntry::getEntityId() const
{
return this->entity_id;
}
int SigEntry::getSigCount()const
int
SigEntry::getSigCount() const
{
return (int)this->sig.entityBitSet.count();
}
SigEntry& SigEntry::operator=(const SigEntry _sig_entry)
SigEntry&
SigEntry::operator=(const SigEntry _sig_entry)
{
this->entity_id = _sig_entry.getEntityId();
this->sig.entityBitSet.reset();
@ -56,34 +61,56 @@ SigEntry& SigEntry::operator=(const SigEntry _sig_entry)
return *this;
}
SigEntry& SigEntry::operator|=(const SigEntry _sig_entry)
SigEntry&
SigEntry::operator|=(const SigEntry _sig_entry)
{
const EntitySig& sig = (_sig_entry.getEntitySig());
(this->sig).entityBitSet |= sig.entityBitSet;
return *this;
}
bool SigEntry::cover(const SigEntry& _sig_entry)const
bool
SigEntry::cover(const SigEntry& _sig_entry) const
{
//EQUAL:this & that == that
return (this->sig.entityBitSet | _sig_entry.getEntitySig().entityBitSet)
== (this->sig.entityBitSet);
}
bool SigEntry::cover(const EntitySig& _sig)const
bool
SigEntry::cover(const EntitySig& _sig) const
{
return (this->sig.entityBitSet | _sig.entityBitSet) == (this->sig.entityBitSet);
}
int SigEntry::xEpsilen(const SigEntry& _sig_entry)const
int
SigEntry::xOR(const SigEntry& _sig_entry) const
{
EntityBitSet entityBitSet;
entityBitSet.reset();
entityBitSet |= this->sig.entityBitSet;
entityBitSet.flip();
return (entityBitSet & _sig_entry.getEntitySig().entityBitSet).count();
//NOTICE: compute the xor distince now
//a^b = (a & ~b) | (~a & b)
EntityBitSet another;
another.reset();
another |= _sig_entry.getEntitySig().entityBitSet;
return ((entityBitSet & another.flip()) | (entityBitSet.flip() & another)).count();
}
std::string SigEntry::to_str()const
//how many 1s in _sig_entry are contained ->flip-> not contained these 1s, as distince
//0s in _sig_entry is nonsense
int
SigEntry::xEpsilen(const SigEntry& _sig_entry) const
{
EntityBitSet entityBitSet;
entityBitSet.reset();
entityBitSet |= this->sig.entityBitSet;
entityBitSet.flip();
return (entityBitSet & _sig_entry.getEntitySig().entityBitSet).count();
}
string
SigEntry::to_str() const
{
std::stringstream _ss;
@ -93,4 +120,3 @@ std::string SigEntry::to_str()const
return _ss.str();
}

View File

@ -1,38 +1,39 @@
/*
* SigEntry.h
*
* Created on: 2014-6-20
* Author: liyouhuan
* Modified on: 2014-6-29
* Author: hanshuo
*/
/*=============================================================================
# Filename: SigEntry.h
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2016-04-11 13:48
# Description: written by liyouhuan and hanshuo
=============================================================================*/
#ifndef SIGENTRY_H_
#define SIGENTRY_H_
#ifndef _SIGNATURE_SIGENTRY_H
#define _SIGNATURE_SIGENTRY_H
#include "../Util/Util.h"
#include "Signature.h"
using namespace std;
class SigEntry{
class SigEntry
{
private:
EntitySig sig;
//-1 if not in leaf node
int entity_id;
public:
SigEntry();
SigEntry(int _entity_id, EntityBitSet& _bitset);
SigEntry(const SigEntry& _sig_entry);
SigEntry(const EntitySig& sig, int _entity_id);
const EntitySig& getEntitySig()const;
int getEntityId()const;
int getSigCount()const;
const EntitySig& getEntitySig() const;
int getEntityId() const;
int getSigCount() const;
SigEntry& operator=(const SigEntry _sig_entry);
SigEntry& operator|=(const SigEntry _sig_entry);
bool cover(const SigEntry& _sig_entry)const;
bool cover(const EntitySig& _sig)const;
int xEpsilen(const SigEntry& _sig_entry)const;
std::string to_str()const;
bool cover(const SigEntry& _sig_entry) const;
bool cover(const EntitySig& _sig) const;
int xEpsilen(const SigEntry& _sig_entry) const;
int xOR(const SigEntry& _sig_entry) const;
std::string to_str() const;
};
#endif /* SIGENTRY_H_ */
#endif // _SIGNATURE_SIGENTRY_H

View File

@ -1,16 +1,17 @@
/*
* Signature.cpp
*
* Created on: 2014-6-20
* Author: liyouhuan
* Implemented on: 2014-6-29
* Author: hanshuo
*/
/*=============================================================================
# Filename: Signature.cpp
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2016-04-11 13:18
# Description:
=============================================================================*/
#include "Signature.h"
#include "../Query/BasicQuery.h"
std::string Signature::BitSet2str(const EntityBitSet& _bitset)
using namespace std;
std::string
Signature::BitSet2str(const EntityBitSet& _bitset)
{
std::stringstream _ss;
bool any = false;
@ -30,50 +31,52 @@ std::string Signature::BitSet2str(const EntityBitSet& _bitset)
return _ss.str();
}
/* for Signature */
void Signature::encodePredicate2Entity(int _pre_id, EntityBitSet& _entity_bs, const char _type)
void
Signature::encodePredicate2Entity(int _pre_id, EntityBitSet& _entity_bs, const char _type)
{
if (Signature::PREDICATE_ENCODE_METHOD == 0)
{
//WARN:change if need to use again, because the encoding method has changed now!
int pos = ( (_pre_id+10) % Signature::EDGE_SIG_LENGTH ) + Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
}
else
{
int seed_preid = _pre_id;
int seed_num = _pre_id % Signature::EDGE_SIG_INTERVAL_NUM_HALF;
if(_type == BasicQuery::EDGE_OUT)
if(_type == Util::EDGE_OUT)
{
seed_preid += 101;
seed_num += Signature::EDGE_SIG_INTERVAL_NUM_HALF;
}
/*
int primeSize = 5;
int prime1[]={5003,5009,5011,5021,5023};
int prime2[]={49943,49957,49991,49993,49999};
*/
//int primeSize = 5;
//int prime1[]={5003,5009,5011,5021,5023};
//int prime2[]={49943,49957,49991,49993,49999};
// how to hash the predicate id to signature(bitset) better?
// more ones in the bitset(use more primes) means less conflicts, but also weakens the filtration of VSTree.
//NOTICE: more ones in the bitset(use more primes) means less conflicts, but also weakens the filtration of VSTree.
// when the data set is big enough, cutting down the size of candidate list should come up to our primary consideration.
// in this case we should not encode too many ones in entities' signature.
// also, when the data set is small, hash conflicts can hardly happen.
// therefore, I think using 2 primes(set up two ones in bitset) is enough.
// --by hanshuo.
int primeSize = 2;
int prime1[]={5003,5011};
int prime2[]={49957,49993};
int prime1[] = {5003, 5011};
int prime2[] = {49957, 49993};
for (int i=0;i<primeSize;i++)
{
int seed = seed_preid * prime1[i] % prime2[i];
int pos = (seed % Signature::EDGE_SIG_LENGTH ) + Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
}
//for(int i = 0; i < primeSize; i++)
//{
//int seed = _pre_id * prime1[i] % prime2[i];
//int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
//_entity_bs.set(pos);
//}
int seed = _pre_id * 5003 % 49957;
int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
_entity_bs.set(pos);
}
}
void Signature::encodePredicate2Edge(int _pre_id, EdgeBitSet& _edge_bs)
void
Signature::encodePredicate2Edge(int _pre_id, EdgeBitSet& _edge_bs)
{
if (Signature::PREDICATE_ENCODE_METHOD == 0)
{
@ -82,233 +85,81 @@ void Signature::encodePredicate2Edge(int _pre_id, EdgeBitSet& _edge_bs)
}
else
{
/*
int primeSize = 5;
int prime1[]={5003,5009,5011,5021,5023};
int prime2[]={49943,49957,49991,49993,49999};
*/
int seed_num = _pre_id % Signature::EDGE_SIG_INTERVAL_NUM_HALF;
//int primeSize = 5;
//int prime1[]={5003,5009,5011,5021,5023};
//int prime2[]={49943,49957,49991,49993,49999};
int primeSize = 2;
int prime1[]={5003,5011};
int prime2[]={49957,49993};
int prime1[] = {5003,5011};
int prime2[] = {49957,49993};
for (int i=0;i<primeSize;i++)
{
int seed = _pre_id * prime1[i] % prime2[i];
int pos = seed % Signature::EDGE_SIG_LENGTH;
_edge_bs.set(pos);
}
//for (int i = 0; i < primeSize; i++)
//{
//int seed = _pre_id * prime1[i] % prime2[i];
//int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
//_edge_bs.set(pos);
//}
int seed = _pre_id * 5003 % 49957;
int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
_edge_bs.set(pos);
}
}
void Signature::encodeStr2Entity(const char* _str, EntityBitSet& _entity_bs) //_str is subject or object or literal
//NOTICE: no need to encode itself because only variable in query need to be filtered!
//So only consider all neighbors!
void
Signature::encodeStr2Entity(const char* _str, EntityBitSet& _entity_bs)
{
//_str is subject or object or literal
if(strlen(_str) >0 && _str[0] == '?')
return;
int length = (int)strlen(_str);
unsigned int hashKey = 0;
unsigned int pos = 0;
char *str2 = new char[length+1];
char *str2 = (char*)calloc(length + 1, sizeof(char));
strcpy(str2, _str);
char *str = str2;
// the same consideration as encodePredicate2Entity.
// I think we should not set too many ones in entities' signature.
hashKey = Signature::simpleHash(str);
pos = hashKey % Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
str=str2;
hashKey = Signature::RSHash(str);
pos = hashKey % Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
str=str2;
hashKey = Signature::JSHash(str);
pos = hashKey % Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
str=str2;
hashKey = Signature::PJWHash(str);
pos = hashKey % Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
/*
str=str2;
hashKey = Signature::ELFHash(str);
pos = hashKey % Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
str=str2;
hashKey = Signature::SDBMHash(str);
pos = hashKey % Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
hashKey = Signature::DJBHash(str);
pos = hashKey % Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
str=str2;
hashKey = Signature::APHash(str);
pos = hashKey % Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
str=str2;
hashKey = Signature::BKDRHash(str);
pos = hashKey % Signature::STR_SIG_LENGTH;
_entity_bs.set(pos);
*/
unsigned base = Signature::STR_SIG_BASE * (Signature::HASH_NUM - 1);
for(int i = Signature::HASH_NUM - 1; i >= 0; --i)
{
HashFunction hf = Util::hash[i];
if(hf == NULL)
break;
hashKey = hf(str);
str=str2;
pos = base + hashKey % Signature::STR_SIG_BASE;
base -= Signature::STR_SIG_BASE;
if(_str[0] == '"')
{
pos += Signature::STR_SIG_LENGTH2;
}
else if(_str[0] != '<')
{
#ifdef DEBUG_VSTREE
cerr << "error in encodeStr2Entity(): neighbor is neither a literal or entity!" << endl;
#endif
}
_entity_bs.set(pos);
}
//BETTER: use multiple threads for different hash functions
/*
//debug
{
std::stringstream _ss;
_ss << "encodeStr2Entity:" << str2 << endl;
Util::logging(_ss.str());
}
*/
delete []str2;
#ifdef DEBUG_VSTREE
//std::stringstream _ss;
//_ss << "encodeStr2Entity:" << str2 << endl;
//Util::logging(_ss.str());
#endif
free(str2);
}
void Signature::encodeStrID2Entity(int _str_id, EntityBitSet& _entity_bs)
void
Signature::encodeStrID2Entity(int _str_id, EntityBitSet& _entity_bs)
{
//to be implement
//TODO
}
unsigned int Signature::hash(const char* _str)
{
//to be implement
return 0;
}
/* some string hash functions */
unsigned int Signature::BKDRHash(const char *_str)
{
unsigned int seed = 131; // 31 131 1313 13131 131313 etc..
unsigned int key = 0;
while (*_str)
{
key = key * seed + (*_str++);
}
return (key & 0x7FFFFFFF);
}
unsigned int Signature::simpleHash(const char *_str)
{
unsigned int key;
unsigned char *p;
for(key = 0, p = (unsigned char *)_str; *p ; p++)
key = 31 * key + *p;
return (key & 0x7FFFFFFF);
}
unsigned int Signature::RSHash(const char *_str)
{
unsigned int b = 378551;
unsigned int a = 63689;
unsigned int key = 0;
while (*_str)
{
key = key * a + (*_str++);
a *= b;
}
return (key & 0x7FFFFFFF);
}
unsigned int Signature::JSHash(const char *_str)
{
unsigned int key = 1315423911;
while (*_str)
{
key ^= ((key << 5) + (*_str++) + (key >> 2));
}
return (key & 0x7FFFFFFF);
}
unsigned int Signature::PJWHash(const char *_str)
{
unsigned int bits_in_unsigned_int = (unsigned int)(sizeof(unsigned int) * 8);
unsigned int three_quarters = (unsigned int)((bits_in_unsigned_int * 3) / 4);
unsigned int one_eighth = (unsigned int)(bits_in_unsigned_int / 8);
unsigned int high_bits = (unsigned int)(0xFFFFFFFF) << (bits_in_unsigned_int - one_eighth);
unsigned int key = 0;
unsigned int test = 0;
while (*_str)
{
key = (key << one_eighth) + (*_str++);
if ((test = key & high_bits) != 0)
{
key = ((key ^ (test >> three_quarters)) & (~high_bits));
}
}
return (key & 0x7FFFFFFF);
}
unsigned int Signature::ELFHash(const char *_str)
{
unsigned int key = 0;
unsigned int x = 0;
while (*_str)
{
key = (key << 4) + (*_str++);
if ((x = key & 0xF0000000L) != 0)
{
key ^= (x >> 24);
key &= ~x;
}
}
return (key & 0x7FFFFFFF);
}
unsigned int Signature::SDBMHash(const char *_str)
{
unsigned int key = 0;
while (*_str)
{
key = (*_str++) + (key << 6) + (key << 16) - key;
}
return (key & 0x7FFFFFFF);
}
unsigned int Signature::DJBHash(const char *_str)
{
unsigned int key = 5381;
while (*_str) {
key += (key << 5) + (*_str++);
}
return (key & 0x7FFFFFFF);
}
unsigned int Signature::APHash(const char *_str)
{
unsigned int key = 0;
int i;
for (i=0; *_str; i++)
{
if ((i & 1) == 0)
{
key ^= ((key << 7) ^ (*_str++) ^ (key >> 3));
}
else
{
key ^= (~((key << 11) ^ (*_str++) ^ (key >> 5)));
}
}
return (key & 0x7FFFFFFF);
}
/* for ENTITYsig */
EntitySig::EntitySig()
{
this->entityBitSet.reset();
@ -332,44 +183,50 @@ EntitySig::EntitySig(const EntityBitSet& _bitset)
this->entityBitSet |= _bitset;
}
EntitySig& EntitySig::operator|=(const EntitySig& _sig)
EntitySig&
EntitySig::operator|=(const EntitySig& _sig)
{
this->entityBitSet |= _sig.entityBitSet;
return *this;
}
bool EntitySig::operator==(const EntitySig& _sig)const
bool
EntitySig::operator==(const EntitySig& _sig)const
{
return (this->entityBitSet == _sig.entityBitSet);
}
bool EntitySig::operator!=(const EntitySig& _sig)const
bool
EntitySig::operator!=(const EntitySig& _sig)const
{
return (this->entityBitSet != _sig.entityBitSet);
}
EntitySig& EntitySig::operator=(const EntitySig& _sig)
EntitySig&
EntitySig::operator=(const EntitySig& _sig)
{
this->entityBitSet.reset();
this->entityBitSet |= _sig.getBitset();
return *this;
}
const EntityBitSet & EntitySig::getBitset()const
const EntityBitSet&
EntitySig::getBitset()const
{
return this->entityBitSet;
}
/* for EDGEsig */
EdgeSig::EdgeSig()
{
this->edgeBitSet.reset();
}
EdgeSig::EdgeSig(const EdgeSig* _p_sig)
{
this->edgeBitSet.reset();
this->edgeBitSet |= _p_sig->edgeBitSet;
}
EdgeSig::EdgeSig(const EdgeSig& _sig)
{
this->edgeBitSet.reset();
@ -381,8 +238,21 @@ EdgeSig::EdgeSig(const EdgeBitSet& _bitset)
this->edgeBitSet.reset();
this->edgeBitSet |= _bitset;
}
EdgeSig& EdgeSig::operator|=(const EdgeSig& _sig)
EdgeSig&
EdgeSig::operator|=(const EdgeSig& _sig)
{
this->edgeBitSet |= _sig.edgeBitSet;
return *this;
}
string
EntitySig::to_str() const
{
std::stringstream _ss;
_ss << Signature::BitSet2str(this->entityBitSet);
return _ss.str();
}

View File

@ -1,54 +1,86 @@
/*
* Signature.h
*
* Created on: 2014-6-20
* Author: liyouhuan
* Modified on: 2014-6-29
* add some private hash functions,
* fix some ill-formed function names.
* Author: hanshuo
*/
/*=============================================================================
# Filename: Signature.h
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2016-04-11 12:50
# Description: written by liyouhuan and hanshuo
=============================================================================*/
#ifndef SIGNATURE_H_
#define SIGNATURE_H_
#ifndef _SIGNATURE_SIGNATURE_H
#define _SIGNATURE_SIGNATURE_H
#include "../Util/Util.h"
using namespace std;
class Signature{
class Signature
{
public:
/* must make sure:
* ENTITY_SIG_LENGTH = EDGE_SIG_LENGTH + STR_SIG_LENGTH */
const static int ENTITY_SIG_LENGTH = 400;
const static int EDGE_SIG_LENGTH = 150;
const static int STR_SIG_LENGTH = 250;
//static HashFunction hash[HashNum];
//must make sure: ENTITY_SIG_LENGTH = EDGE_SIG_LENGTH + STR_SIG_LENGTH
//const static int ENTITY_SIG_LENGTH = 400;
static const int STR_SIG_BASE = 100;
//NOTICE: we can also use id here, but string is recommended due to special structure
//(maybe needed later, for example, wildcards)
//Th ehash function is costly, so just use two
static const int HASH_NUM = 3; //no more than Util::HashNum
//NOTICE:if using str id, we can also divide like EDGE_SIG
//here we divide as entity neighbors and literal neighbors: ENTITY, LITERAL
static const int STR_SIG_LENGTH = 2 * STR_SIG_BASE * HASH_NUM; //250
static const int STR_SIG_LENGTH2 = STR_SIG_BASE * HASH_NUM;
//QUERY:I think that str filter is more important in VSTree than predicate, because
//a predicate may correspond to a lot of entities and predicate num is usually small
static const int EDGE_SIG_INTERVAL_NUM_HALF = 5; //in edge or out edge
static const int EDGE_SIG_INTERVAL_NUM = 2 * EDGE_SIG_INTERVAL_NUM_HALF;
static const int EDGE_SIG_INTERVAL_BASE = 20;
static const int EDGE_SIG_LENGTH = EDGE_SIG_INTERVAL_NUM * EDGE_SIG_INTERVAL_BASE; //150
static const int EDGE_SIG_LENGTH2 = EDGE_SIG_INTERVAL_NUM_HALF * EDGE_SIG_INTERVAL_BASE; //150
typedef bitset<Signature::EDGE_SIG_LENGTH> EdgeBitSet;
typedef bitset<Signature::ENTITY_SIG_LENGTH> EntityBitSet;
static const int ENTITY_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH;
//static const int ENTITY_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH + NEIGHBOR_SIG_LENGTH;
typedef std::bitset<Signature::EDGE_SIG_LENGTH2> EdgeBitSet;
typedef std::bitset<Signature::ENTITY_SIG_LENGTH> EntityBitSet;
static std::string BitSet2str(const EntityBitSet& _bitset);
/* there are two predicate encoding method now, see the encoding functions @Signature.cpp for details. */
//NOTICE: there are two predicate encoding method now, see the encoding functions @Signature.cpp for details
const static int PREDICATE_ENCODE_METHOD = 1;
static void encodePredicate2Entity(int _pre_id, EntityBitSet& _entity_bs, const char _type);
static void encodePredicate2Edge(int _pre_id, EdgeBitSet& _edge_bs);
static void encodeStr2Entity(const char* _str, EntityBitSet& _entity_bs); //_str is subject or object(literal)
static void encodeStrID2Entity(int _str_id, EntityBitSet& _entity_bs);
unsigned int hash(const char* _str);
private:
static unsigned int BKDRHash(const char *_str);
static unsigned int simpleHash(const char *_str);
static unsigned int RSHash(const char *_str);
static unsigned int JSHash(const char *_str);
static unsigned int PJWHash(const char *_str);
static unsigned int ELFHash(const char *_str);
static unsigned int SDBMHash(const char *_str);
static unsigned int DJBHash(const char *_str);
static unsigned int APHash(const char *_str);
//Signature()
//{
//NOTICE:not exceed the HashNum
//this->hash = new HashFunction[HashNum];
//this->hash[0] = Util::simpleHash;
//this->hash[1] = Util::APHash;
//this->hash[2] = Util::BKDRHash;
//this->hash[3] = Util::DJBHash;
//this->hash[4] = Util::ELFHash;
//this->hash[5] = Util::DEKHash;
//this->hash[6] = Util::BPHash;
//this->hash[7] = Util::FNVHash;
//this->hash[8] = Util::HFLPHash;
//this->hash[9] = Util::HFHash;
//this->hash[10] = Util::JSHash;
//this->hash[11] = Util::PJWHash;
//this->hash[12] = Util::RSHash;
//this->hash[13] = Util::SDBMHash;
//this->hash[14] = Util::StrHash;
//this->hash[15] = Util::TianlHash;
//}
//~Signature()
//{
//delete[] this->hash;
//}
};
typedef bitset<Signature::EDGE_SIG_LENGTH> EdgeBitSet;
typedef bitset<Signature::ENTITY_SIG_LENGTH> EntityBitSet;
//WARN:also defined in Signature, must be same!!!
//NOTICE:EdgeBitSet is only used in Query, not for VSTree
typedef std::bitset<Signature::EDGE_SIG_LENGTH2> EdgeBitSet;
typedef std::bitset<Signature::ENTITY_SIG_LENGTH> EntityBitSet;
class EntitySig : Signature{
public:
@ -63,6 +95,7 @@ public:
EntitySig& operator=(const EntitySig& _sig);
const EntityBitSet& getBitset()const;
void encode(const char * _str, int _pre_id);
std::string to_str() const;
};
class EdgeSig : Signature{
@ -75,4 +108,5 @@ public:
EdgeSig& operator|=(const EdgeSig& _sig);
};
#endif /* SIGNATURE_H_ */
#endif // _SIGNATURE_SIGNATURE_H

View File

@ -68,9 +68,11 @@ Stream::Stream(std::vector<int>& _keys, std::vector<bool>& _desc, unsigned _rown
if(Util::memoryLeft() < size)
{
this->inMem = false;
fprintf(stderr, "Stream: memory is not enough!\n");
}
else
{
fprintf(stderr, "Stream: memory is enough!\n");
}
#ifdef DEBUG_STREAM
fprintf(stderr, "Stream:after memory check!\n");

File diff suppressed because it is too large Load Diff

View File

@ -6,6 +6,9 @@
# Description:
1. firstly written by liyouhuan, modified by zengli
2. common macros, functions, classes, etc
# Notice: we only talk about sub-graph isomorphism in the essay, however, in
# this system, the homomorphism is supported.(which means that multiple variables
in the sparql query can point to the same node in data graph)
=============================================================================*/
#ifndef _UTIL_UTIL_H
@ -25,6 +28,8 @@
#include <errno.h>
#include <regex.h>
#include <locale.h>
#include <assert.h>
#include <libgen.h>
#include <sys/time.h>
#include <sys/types.h>
@ -47,6 +52,7 @@
#include <map>
#include <set>
#include <stack>
#include <queue>
#include <vector>
#include <list>
#include <iterator>
@ -61,6 +67,7 @@
#define STREAM_ON 1
#define READLINE_ON 1
#define MULTI_INDEX 1
//indicate that in debug mode
//#define DEBUG_STREAM
@ -68,7 +75,7 @@
//#define DEBUG_KVSTORE 1 //in KVstore
//#define DEBUG_VSTREE 1 //in Database
//#define DEBUG_DATABASE 1 //in Database
//#define DEBUG_JOIN
#define DEBUG_JOIN
#ifdef DEBUG_PRECISE
#ifndef DEBUG
@ -104,13 +111,15 @@
//#define DEBUG
#endif
#define xfree(x) free(x); x = NULL;
//NOTICE:include Util.h and below in each main function
//(the beginning position)
//#ifdef DEBUG
// Util util;
//#endif
typedef unsigned(*HashFunction)(const char*, unsigned);
typedef unsigned(*HashFunction)(const char*);
//NOTICE:hash functions for int are not so many, so we represent int by a 4-byte stringinstead
//(not totally change int to string, which is costly)
//http://www.cppblog.com/aurain/archive/2010/07/06/119463.html
@ -124,6 +133,8 @@ public:
static const unsigned MB = 1048576;
static const unsigned GB = 1073741824;
static const int TRIPLE_NUM_MAX = 1000*1000*1000;
static const char EDGE_IN = 'i';
static const char EDGE_OUT= 'o';
//In order to differentiate the sub-part and literal-part of object
//let subid begin with 0, while literalid begins with LITERAL_FIRST_ID
//used in Database and Join
@ -131,12 +142,14 @@ public:
//initial transfer buffer size in Tree/ and Stream/
static const unsigned TRANSFER_SIZE = 1 << 20; //1M
static std::string db_home;
static std::string tmp_path;
// this are for debugging
//to build logs-system, each class: print() in time
static std::string debug_path;
static FILE* debug_kvstore;
static FILE* debug_database;
static FILE* debug_vstree;
static int memUsedPercentage();
static int memoryLeft();
@ -148,44 +161,60 @@ public:
static std::string showtime();
static int cmp_int(const void* _i1, const void* _i2);
static void sort(int*& _id_list, int _list_len);
static int bsearch_int_uporder(int _key,int* _array,int _array_num);
static int bsearch_int_uporder(int _key, const int* _array,int _array_num);
static bool bsearch_preid_uporder(int _preid, int* _pair_idlist, int _list_len);
static int bsearch_vec_uporder(int _key, const std::vector<int>& _vec);
static int bsearch_vec_uporder(int _key, const std::vector<int>* _vec);
static std::string result_id_str(std::vector<int*>& _v, int _var_num);
static bool dir_exist(const std::string _dir);
static bool create_dir(const std:: string _dir);
static long get_cur_time();
static bool save_to_file(const char*, const std::string _content);
static bool is_literal_ele(int);
static int removeDuplicate(int*, int);
static std::string getQueryFromFile(const char* _file_path);
static std::string getSystemOutput(std::string cmd);
static std::string getExactPath(const char* path);
static std::string getItemsFromDir(std::string path);
static void logging(std::string _str);
// Below are some useful hash functions for string
// NOTICE:the string is general type and maybe very large, so length
// should as parameter(caller may use Bstr to avoid compute the length each time)
static unsigned simpleHash(const char *_str, unsigned _len);
static unsigned APHash(const char *_str, unsigned _len);
static unsigned BKDRHash(const char *_str, unsigned _len);
static unsigned DJBHash(const char *_str, unsigned _len);
static unsigned ELFHash(const char *_str, unsigned _len);
static unsigned DEKHash(const char* _str, unsigned _len);
static unsigned BPHash(const char* _str, unsigned _len);
static unsigned FNVHash(const char* _str, unsigned _len);
static unsigned HFLPHash(const char* _str, unsigned _len);
static unsigned HFHash(const char* _str, unsigned _len);
static unsigned JSHash(const char *_str, unsigned _len);
static unsigned PJWHash(const char *_str, unsigned _len);
static unsigned RSHash(const char *_str, unsigned _len);
static unsigned SDBMHash(const char *_str, unsigned _len);
static unsigned StrHash(const char* _str, unsigned _len);
static unsigned TianlHash(const char* _str, unsigned _len);
static unsigned simpleHash(const char *_str);
static unsigned APHash(const char *_str);
static unsigned BKDRHash(const char *_str);
static unsigned DJBHash(const char *_str);
static unsigned ELFHash(const char *_str);
static unsigned DEKHash(const char* _str);
static unsigned BPHash(const char* _str);
static unsigned FNVHash(const char* _str);
static unsigned HFLPHash(const char* _str);
static unsigned HFHash(const char* _str);
static unsigned JSHash(const char *_str);
static unsigned PJWHash(const char *_str);
static unsigned RSHash(const char *_str);
static unsigned SDBMHash(const char *_str);
static unsigned StrHash(const char* _str);
static unsigned TianlHash(const char* _str);
static const unsigned HashNum = 16;
static HashFunction hash[];
static double logarithm(double _a, double _b);
static char* l_trim(char * szOutput, const char *szInput);
static char* r_trim(char *szOutput, const char *szInput);
static char* a_trim(char * szOutput, const char * szInput);
//NOTICE: this function must be called at the beginning of executing!
Util();
~Util();
static std::string profile;
static bool configure(); //read init.conf and set the parameters for this system
static bool config_setting();
static bool config_advanced();
static bool config_debug();
static bool gStore_mode;
};
#endif //_UTIL_UTIL_H

View File

@ -6,8 +6,8 @@
*/
#include"EntryBuffer.h"
#include"../Signature/SigEntry.h"
#include"../Database/Database.h"
using namespace std;
int EntryBuffer::DEFAULT_CAPACITY = 2*1000*1000;

View File

@ -8,8 +8,10 @@
#ifndef ENTRYBUFFER_H_
#define ENTRYBUFFER_H_
class SigEntry;
#include "../Util/Util.h"
#include "../Signature/SigEntry.h"
//class SigEntry;
/* EntryBuffer is used to loading entries from hard disk when building VSTree. */
class EntryBuffer

View File

@ -8,10 +8,13 @@
#include "LRUCache.h"
#include "VNode.h"
using namespace std;
int LRUCache::DEFAULT_CAPACITY = 1*1000*1000;
LRUCache::LRUCache(int _capacity)
{
cout << "LRUCache initial..." << endl;
this->capacity = _capacity > 0 ? _capacity : LRUCache::DEFAULT_CAPACITY;
// we should guarantee the cache is big enough.
this->capacity = std::max(this->capacity, VNode::MAX_CHILD_NUM * 2000);
@ -31,6 +34,7 @@ LRUCache::LRUCache(int _capacity)
this->prev[LRUCache::START_INDEX] = LRUCache::NULL_INDEX;
this->prev[LRUCache::END_INDEX] = LRUCache::START_INDEX;
this->size = 0;
cout << "LRUCache initial finish" << endl;
}
LRUCache::~LRUCache()

View File

@ -1,14 +1,16 @@
/*
* LRUCache.h
*
* Created on: 2014-6-30
* Author: hanshuo
*/
/*=============================================================================
# Filename: LRUCache.h
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2016-04-11 14:05
# Description: written by hanshuo
=============================================================================*/
#ifndef LRUCACHE_H_
#define LRUCACHE_H_
#include "../Util/Util.h"
class VNode;
// before using the cache, you must loadCache or createCache.

View File

@ -1,21 +1,20 @@
/*
* VNode.cpp
*
* Created on: 2014-6-20
* Author: liyouhuan
* Implement on: 2014-7-2
* Author: hanshuo
*/
/*=============================================================================
# Filename: VNode.cpp
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2016-04-11 14:08
# Description: by liyouhuan and hanshuo
=============================================================================*/
#include"VNode.h"
#include<iostream>
#include "VNode.h"
using namespace std;
VNode::VNode()
{
this->is_leaf = false;
this->is_root = false;
this->child_num = 0;
// the following three lines are unnecessary.
this->self_file_line = -1;
this->father_file_line = -1;
for(int i = 0; i < VNode::MAX_CHILD_NUM; i ++)

View File

@ -1,18 +1,20 @@
/*
* VNode.h
*
* Created on: 2014-6-20
* Author: liyouhuan
*/
/*=============================================================================
# Filename: VNode.h
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2016-04-11 14:05
# Description: written by liyouhuan
=============================================================================*/
#ifndef VNODE_H_
#define VNODE_H_
#ifndef _VSTREE_VNODE_H
#define _VSTREE_VNODE_H
#include "../Util/Util.h"
#include "../Signature/SigEntry.h"
#include"LRUCache.h"
#include "LRUCache.h"
class VNode{
class VNode
{
public:
static const int MAX_CHILD_NUM = 151;
static const int MIN_CHILD_NUM = 60;
@ -65,9 +67,10 @@ private:
int self_file_line;
int father_file_line;
SigEntry entry;
//BETTER:is this necessary? too much memory?
SigEntry child_entries[VNode::MAX_CHILD_NUM];
int child_file_lines[VNode::MAX_CHILD_NUM];
};
#endif // _VSTREE_VNODE_H
#endif /* VNODE_H_ */

View File

@ -1,18 +1,13 @@
/*
* VSTREE.cpp
*
* Created on: 2014-6-20
* Author: liyouhuan
*/
/*=============================================================================
# Filename: VSTree.cpp
# Author: Bookug Lobert
# Mail: zengli-bookug@pku.edu.cn
# Last Modified: 2016-04-11 14:02
# Description:
=============================================================================*/
#include "VSTree.h"
#include"VSTree.h"
#include<stdio.h>
#include<queue>
#include"../Database/Database.h"
#include"../Signature/Signature.h"
#include<algorithm>
#include<vector>
#include<iostream>
using namespace std;
string VSTree::tree_file_foler_path;
@ -27,7 +22,7 @@ VSTree::VSTree(std::string _store_path)
this->root_file_line = 0;
this->entry_buffer = NULL;
this->node_buffer = NULL;
/* set the store path */
//set the store path
VSTree::tree_file_foler_path = _store_path;
VSTree::tree_node_file_path = VSTree::tree_file_foler_path + "/tree_node_file.dat";
VSTree::tree_info_file_path = VSTree::tree_file_foler_path + "/tree_info_file.dat";
@ -39,19 +34,22 @@ VSTree::~VSTree()
delete this->entry_buffer;
}
int VSTree::getHeight()const
int
VSTree::getHeight()const
{
return this->height;
}
/* get the tree's root node pointer. */
VNode* VSTree::getRoot()
VNode*
VSTree::getRoot()
{
return (this->node_buffer)->get(this->root_file_line);
}
/* get the node pointer by its file line. */
VNode* VSTree::getNode(int _line)
VNode*
VSTree::getNode(int _line)
{
if (_line >= this->node_num)
{
@ -62,10 +60,9 @@ VNode* VSTree::getNode(int _line)
return this->node_buffer->get(_line);
}
/* retrieve candidate result set by the var_sig in the _query. */
//retrieve candidate result set by the var_sig in the _query.
void VSTree::retrieve(SPARQLquery& _query)
{
//TODO: change log to Util
Util::logging("IN retrieve");
//debug
@ -101,7 +98,7 @@ void VSTree::retrieve(SPARQLquery& _query)
for(; iter != queryList.end(); iter++)
{
int varNum = (*iter)->getVarNum();
for (int i=0;i<varNum;i++)
for (int i = 0; i < varNum; i++)
{
//debug
{
@ -109,28 +106,34 @@ void VSTree::retrieve(SPARQLquery& _query)
_ss << "retrieve of var: " << i << endl;
Util::logging(_ss.str());
}
bool flag = (*iter)->isLiteralVariable(i);
const EntityBitSet& entityBitSet = (*iter)->getVarBitSet(i);
IDList* idListPtr = &( (*iter)->getCandidateList(i) );
this->retrieveEntity(entityBitSet, idListPtr);
#ifdef DEBUG_VSTREE
stringstream _ss;
_ss << "total num: " << this->entry_num << endl;
_ss << "candidate num: " << idListPtr->size() << endl;
_ss << endl;
_ss << "isExist 473738: " << (idListPtr->isExistID(473738)?"true":"false") <<endl;
_ss << "isExist 473472: " << (idListPtr->isExistID(473472)?"true":"false") <<endl;
_ss << "isExist 473473: " << (idListPtr->isExistID(473473)?"true":"false") <<endl;
Util::logging(_ss.str());
#endif
//debug
// {
// std::stringstream _ss;
// _ss << "candidate num: " << idListPtr->size() << endl;
// _ss << endl;
// _ss << "isExist 473738: " << (idListPtr->isExistID(473738)?"true":"false") <<endl;
// _ss << "isExist 473472: " << (idListPtr->isExistID(473472)?"true":"false") <<endl;
// _ss << "isExist 473473: " << (idListPtr->isExistID(473473)?"true":"false") <<endl;
// Util::logging(_ss.str());
// }
//the basic query should end if one non-literal var has no candidates
if(idListPtr->size() == 0 && !flag)
{
break;
}
}
}
Util::logging("OUT retrieve");
}
/* build the VSTree from the _entity_signature_file. */
bool VSTree::buildTree(std::string _entry_file_path)
//build the VSTree from the _entity_signature_file.
bool
VSTree::buildTree(std::string _entry_file_path)
{
Util::logging("IN VSTree::buildTree");
@ -147,9 +150,9 @@ bool VSTree::buildTree(std::string _entry_file_path)
this->node_num ++;
this->height ++;
/* when building a new VSTree,
* we should first create a new tree node file as the external storage
* of the node buffer on hard disk.*/
//when building a new VSTree,
//we should first create a new tree node file as the external storage
//of the node buffer on hard disk.
this->node_buffer->createCache(VSTree::tree_node_file_path);
FILE* filePtr = fopen(_entry_file_path.c_str(), "rb");
@ -159,8 +162,8 @@ bool VSTree::buildTree(std::string _entry_file_path)
return false;
}
/* load the entry file to entry buffer in memory, when the entry buffer is full,
insert them into the tree. */
//load the entry file to entry buffer in memory, when the entry buffer is full,
//insert them into the tree.
int n;
n = this->entry_buffer->fillElemsFromFile(filePtr);
while (n != 0)
@ -203,7 +206,8 @@ bool VSTree::buildTree(std::string _entry_file_path)
return flag;
}
bool VSTree::deleteTree()
bool
VSTree::deleteTree()
{
this->height = 0;
this->node_num = 0;
@ -219,9 +223,9 @@ bool VSTree::deleteTree()
return false;
}
/* Incrementally update bitset of _entity_id
* conduct OR operation on Entry(_entity_id)'s EntityBitSet with _bitset
* Entry of _entity_id must exists */
//Incrementally update bitset of _entity_id
//conduct OR operation on Entry(_entity_id)'s EntityBitSet with _bitset
//Entry of _entity_id must exists
bool VSTree::updateEntry(int _entity_id, const EntityBitSet& _bitset)
{
VNode* leafNodePtr = this->getLeafNodeByEntityID(_entity_id);
@ -235,7 +239,7 @@ bool VSTree::updateEntry(int _entity_id, const EntityBitSet& _bitset)
// find the mapping child entry, update it and refresh signature.
int childNum = leafNodePtr->getChildNum();
bool findFlag = false;
for (int i=0;i<childNum;i++)
for (int i = 0; i < childNum; i++)
{
const SigEntry& entry = leafNodePtr->getChildEntry(i);
@ -273,9 +277,10 @@ bool VSTree::updateEntry(int _entity_id, const EntityBitSet& _bitset)
return true;
}
/* Replace the Entry(_enitty_id)'s EntityBitSet with _bitset
* Entry of _entity_id must exists */
bool VSTree::replaceEntry(int _entity_id, const EntityBitSet& _bitset)
//Replace the Entry(_enitty_id)'s EntityBitSet with _bitset
//Entry of _entity_id must exists
bool
VSTree::replaceEntry(int _entity_id, const EntityBitSet& _bitset)
{
VNode* leafNodePtr = this->getLeafNodeByEntityID(_entity_id);
@ -288,7 +293,7 @@ bool VSTree::replaceEntry(int _entity_id, const EntityBitSet& _bitset)
// find the mapping child entry, update it and refresh signature.
int childNum = leafNodePtr->getChildNum();
bool findFlag = false;
for (int i=0;i<childNum;i++)
for (int i = 0; i < childNum; i++)
{
const SigEntry& entry = leafNodePtr->getChildEntry(i);
if (entry.getEntityId() == _entity_id)
@ -310,31 +315,31 @@ bool VSTree::replaceEntry(int _entity_id, const EntityBitSet& _bitset)
return true;
}
/* insert an new Entry, whose entity doesn't exist before */
bool VSTree::insertEntry(const SigEntry& _entry)
//insert an new Entry, whose entity doesn't exist before
bool
VSTree::insertEntry(const SigEntry& _entry)
{
/* choose the best leaf node to insert the _entry */
//choose the best leaf node to insert the _entry
VNode* choosedNodePtr = this->chooseNode(this->getRoot(), _entry);
//debug
// {
// if (_entry.getEntityId() == 4000001)
// {
// stringstream _ss;
// if (choosedNodePtr)
// {
// _ss << "insert " << _entry.getEntityId()
// << " into [" << choosedNodePtr->getFileLine() << "],\t";
// _ss << "whose childnum is " << choosedNodePtr->getChildNum() << endl;
// }
// else
// {
// _ss << "insert " << _entry.getEntityId() << " , can not choose a leaf node to insert entry. @VSTree::insert" << endl;
// }
// Util::logging(_ss.str());
// }
// }
#ifdef DEBUG_VSTREE
if (_entry.getEntityId() == 4000001)
{
stringstream _ss;
if (choosedNodePtr)
{
_ss << "insert " << _entry.getEntityId()
<< " into [" << choosedNodePtr->getFileLine() << "],\t";
_ss << "whose childnum is " << choosedNodePtr->getChildNum() << endl;
}
else
{
_ss << "insert " << _entry.getEntityId() << " , can not choose a leaf node to insert entry. @VSTree::insert" << endl;
}
Util::logging(_ss.str());
}
#endif
if (choosedNodePtr == NULL)
{
@ -344,7 +349,7 @@ bool VSTree::insertEntry(const SigEntry& _entry)
if (choosedNodePtr->isFull())
{
/* if the choosed leaf node to insert is full, the node should be split.*/
//if the choosed leaf node to insert is full, the node should be split.
this->split(choosedNodePtr, _entry, NULL);
//debug
@ -378,8 +383,9 @@ bool VSTree::insertEntry(const SigEntry& _entry)
return true;
}
/* remove an existed Entry(_entity_id) from VSTree */
bool VSTree::removeEntry(int _entity_id)
//remove an existed Entry(_entity_id) from VSTree
bool
VSTree::removeEntry(int _entity_id)
{
VNode* leafNodePtr = this->getLeafNodeByEntityID(_entity_id);
@ -412,18 +418,18 @@ bool VSTree::removeEntry(int _entity_id)
leafNodePtr->refreshAncestorSignature(*(this->node_buffer));
this->entry_num --;
/* we do not consider the situation which the leaf node is to be empty by now...
* in a better way, if the leaf node is empty after removing entry, we should delete it. and recursively judge whether its
* father is empty, and delete its father node if true. to make the VSTree more balanced, we should combine two nodes if
* their child number are less than the MIN_CHILD_NUM. when deleting one node from the tree, we should also remove it from
* tree node file in hard disk by doing some operations on the node_buffer.
*/
//we do not consider the situation which the leaf node is to be empty by now...
//in a better way, if the leaf node is empty after removing entry, we should delete it. and recursively judge whether its
//father is empty, and delete its father node if true. to make the VSTree more balanced, we should combine two nodes if
//their child number are less than the MIN_CHILD_NUM. when deleting one node from the tree, we should also remove it from
//tree node file in hard disk by doing some operations on the node_buffer.
return true;
}
/* save the tree information to tree_info_file_path, and flush the tree nodes in memory to tree_node_file_path. */
bool VSTree::saveTree()
//save the tree information to tree_info_file_path, and flush the tree nodes in memory to tree_node_file_path.
bool
VSTree::saveTree()
{
bool flag = this->saveTreeInfo();
@ -436,8 +442,10 @@ bool VSTree::saveTree()
return flag;
}
bool VSTree::loadTree()
bool
VSTree::loadTree()
{
cout << "load VSTree..." << endl;
(this->node_buffer) = new LRUCache(LRUCache::DEFAULT_CAPACITY);
bool flag = this->loadTreeInfo();
@ -451,61 +459,67 @@ bool VSTree::loadTree()
if (flag)
{
this->node_buffer->loadCache(VSTree::tree_node_file_path);
cout << "finish loadCache" << endl;
}
if (flag)
{
flag = loadEntityID2FileLineMap();
cout << "finish loadEntityID2FileLineMap" << endl;
}
return flag;
}
/* choose the best leaf node to insert the _entry,
* return the choosed leaf node's pointer.
* Recursion function! */
VNode* VSTree::chooseNode(VNode* _p_node, const SigEntry& _entry)
//choose the best leaf node to insert the _entry, return the choosed leaf node's pointer. Recursion!
VNode*
VSTree::chooseNode(VNode* _p_node, const SigEntry& _entry)
{
if (_p_node->isLeaf())
if(_p_node->isLeaf())
{
return _p_node;
}
else
{
int minDis = Signature::ENTITY_SIG_LENGTH + 1;
int minDis = Signature::ENTITY_SIG_LENGTH + 1;
//int maxDis = Signature::ENTITY_SIG_LENGTH + 1;
int candidateIndex[VNode::MAX_CHILD_NUM];
int candidateNum = 0;
int childNum = _p_node->getChildNum();
for (int i=0;i<childNum;i++)
for(int i = 0; i < childNum; i++)
{
int curDis = _p_node->getChildEntry(i).xEpsilen(_entry);
if (minDis >= curDis)
if(minDis >= curDis)
{
if (minDis > curDis)
if(minDis > curDis)
{
minDis = curDis;
candidateNum = 0;
}
candidateIndex[candidateNum ++] = i;
candidateIndex[candidateNum++] = i;
}
}
minDis = Signature::ENTITY_SIG_LENGTH + 1;
//NOTICE: the basic idea is to place similar signatures together?(the smaller num?)
//BETTER: recursion is too costly , and the performance maybe not so good
minDis = Signature::ENTITY_SIG_LENGTH + 1;
//maxDis = Signature::ENTITY_SIG_LENGTH + 1;
VNode* ret = NULL;
for (int i=0;i<candidateNum;i++)
for(int i = 0; i < candidateNum; i++)
{
int child_i = candidateIndex[i];
VNode* p_child = _p_node->getChild(child_i, *(this->node_buffer));
/* Recursion */
//Recursion
VNode *candidateLeafPtr = this->chooseNode(p_child, _entry);
int curDis = candidateLeafPtr->getEntry().xEpsilen(_entry);
if (curDis == 0)
if(curDis == 0)
{
return candidateLeafPtr;
}
if (minDis > curDis)
if(minDis > curDis)
{
minDis = curDis;
ret = candidateLeafPtr;
@ -516,19 +530,19 @@ VNode* VSTree::chooseNode(VNode* _p_node, const SigEntry& _entry)
}
}
void VSTree::split(VNode* _p_node_being_split, const SigEntry& _insert_entry, VNode* _p_insert_node)
void
VSTree::split(VNode* _p_node_being_split, const SigEntry& _insert_entry, VNode* _p_insert_node)
{
//debug
// {
// stringstream _ss;
// _ss << "**********************split happen at "
// << _p_node_being_split->getFileLine() << endl;
// _ss << _p_node_being_split->to_str() << endl;
// Util::logging(_ss.str());
// }
#ifdef DEBUG_VSTREE
stringstream _ss;
_ss << "**********************split happen at "
<< _p_node_being_split->getFileLine() << endl;
_ss << _p_node_being_split->to_str() << endl;
Util::logging(_ss.str());
#endif
// first, add the new child node(if not leaf) or child entry(if leaf) to the full node.
bool just_insert_entry = (_p_insert_node == NULL);
if (just_insert_entry)
if(just_insert_entry)
{
_p_node_being_split->addChildEntry(_insert_entry, true);
}
@ -538,32 +552,38 @@ void VSTree::split(VNode* _p_node_being_split, const SigEntry& _insert_entry, VN
}
SigEntry entryA, entryB;
/* two seeds to generate two new nodes.
* seedA kernel: the SigEntry with the minimal count of signature.
* seedB kernel: the SigEntry with the second minimal count of signature.
* */
int minCount = 0; // record the minimal signature count.
//BETTER: use hanming, xor result or the vector included angle to guess the distince.
//And then also use the farest two as seeds.
//
//two seeds to generate two new nodes.
//seedA kernel: the SigEntry with the minimal count of signature.
//seedB kernel: the SigEntry with the maximal count of signature.
int maxCount = 0; // record the minimal signature count.
int entryA_index = 0; // record the seedA kernel index.
for (int i=0;i<VNode::MAX_CHILD_NUM;i++)
for(int i = 0; i < VNode::MAX_CHILD_NUM; i++)
{
int currentCount = (int) _p_node_being_split->getChildEntry(i).getSigCount();
if (minCount < currentCount)
if(maxCount < currentCount)
{
minCount = currentCount;
maxCount = currentCount;
entryA_index = i;
}
}
entryA = _p_node_being_split->getChildEntry(entryA_index);
minCount = 0;
maxCount = 0;
int entryB_index = 0; // record the seedB kernel index.
for (int i=0;i<VNode::MAX_CHILD_NUM;i++)
for(int i = 0; i < VNode::MAX_CHILD_NUM; i++)
{
int currentCount = entryA.xEpsilen(_p_node_being_split->getChildEntry(i));
if (i != entryA_index && minCount <= currentCount)
//NOTICE:I think xOR should be used here to choose the farest two
int currentCount = entryA.xOR(_p_node_being_split->getChildEntry(i));
//int currentCount = entryA.xEpsilen(_p_node_being_split->getChildEntry(i));
if(i != entryA_index && maxCount <= currentCount)
{
minCount = currentCount;
maxCount = currentCount;
entryB_index = i;
}
}
@ -577,21 +597,20 @@ void VSTree::split(VNode* _p_node_being_split, const SigEntry& _insert_entry, VN
entryIndex_nearA.push_back(entryA_index);
entryIndex_nearB.push_back(entryB_index);
/* just tmp variables, for more readibility */
int nearA_max_size, nearB_max_size;
bool nearA_tooSmall, nearB_tooSmall;
for (int i=0;i<VNode::MAX_CHILD_NUM;i++)
for(int i = 0; i < VNode::MAX_CHILD_NUM; i++)
{
if (i == entryA_index || i == entryB_index) continue;
if(i == entryA_index || i == entryB_index) continue;
/* should guarantee that each new node has at least MIN_CHILD_NUM children. */
//should guarantee that each new node has at least MIN_CHILD_NUM children.
nearA_max_size = VNode::MAX_CHILD_NUM - entryIndex_nearB.size();
nearA_tooSmall = (nearA_max_size <= VNode::MIN_CHILD_NUM);
if (nearA_tooSmall)
if(nearA_tooSmall)
{
for (;i<VNode::MAX_CHILD_NUM;i++)
for(; i < VNode::MAX_CHILD_NUM; i++)
{
if (i == entryA_index || i == entryB_index) continue;
entryIndex_nearA.push_back(i);
@ -601,94 +620,93 @@ void VSTree::split(VNode* _p_node_being_split, const SigEntry& _insert_entry, VN
nearB_max_size = VNode::MAX_CHILD_NUM - entryIndex_nearA.size();
nearB_tooSmall = (nearB_max_size <= VNode::MIN_CHILD_NUM);
if (nearB_tooSmall)
if(nearB_tooSmall)
{
for (;i<VNode::MAX_CHILD_NUM;i++)
for(; i < VNode::MAX_CHILD_NUM; i++)
{
if (i == entryA_index || i == entryB_index) continue;
if(i == entryA_index || i == entryB_index) continue;
entryIndex_nearB.push_back(i);
}
break;
}
/* calculate the distance from
* the i-th child entry signature to seedA(or seedB).*/
//calculate the distance from
//the i-th child entry signature to seedA(or seedB).
/*debug target 1*/
//NOTICE:we should expect that the candidate can be almost contained!
//However, the precondition there are not too many 1s
int disToSeedA = entryA.xEpsilen(_p_node_being_split->getChildEntry(i));
int disToSeedB = entryB.xEpsilen(_p_node_being_split->getChildEntry(i));
// choose the near one seed to add into
if (disToSeedA <= disToSeedB)
if(disToSeedA <= disToSeedB)
{
entryIndex_nearA.push_back(i);
entryIndex_nearA.push_back(i);
}
else
{
entryIndex_nearB.push_back(i);
entryIndex_nearB.push_back(i);
}
}
// then create a new node to act as BEntryIndex's father.
VNode* newNodePtr = this->createNode();
//debug
// {
// stringstream _ss;
// _ss << "new Node is :[" << newNodePtr->getFileLine() << "]" << endl;
// Util::logging(_ss.str());
// }
#ifdef DEBUG_VSTREE
stringstream _ss2;
_ss2 << "new Node is :[" << newNodePtr->getFileLine() << "]" << endl;
Util::logging(_ss2.str());
#endif
// the old one acts as AEntryIndex's father.
VNode* oldNodePtr = _p_node_being_split;
// if the old node is leaf, set the new node as a leaf.
if (oldNodePtr->isLeaf())
if(oldNodePtr->isLeaf())
{
newNodePtr->setAsLeaf(true);
}
/* add all the entries in BEntryIndex into the new node child entry array,
and calculate the new node's entry.*/
for (unsigned i=0;i<entryIndex_nearB.size();i++)
//add all the entries in BEntryIndex into the new node child entry array,
//and calculate the new node's entry.
for(unsigned i = 0; i < entryIndex_nearB.size(); i++)
{
if (oldNodePtr->isLeaf())
if(oldNodePtr->isLeaf())
{
newNodePtr->addChildEntry(oldNodePtr->getChildEntry(entryIndex_nearB[i]), false);
}
else
{
/*debug target 2*/
//debug target 2
VNode* childPtr = oldNodePtr->getChild(entryIndex_nearB[i], *(this->node_buffer));
newNodePtr->addChildNode(childPtr);
}
}
newNodePtr->refreshSignature();
/* label the child being removed with -1,
* and update the old node's entry.*/
std::sort(entryIndex_nearA.begin(), entryIndex_nearA.end(), less<int>());
//label the child being removed with -1,
//and update the old node's entry.
sort(entryIndex_nearA.begin(), entryIndex_nearA.end(), less<int>());
//debug
// {
// stringstream _ss;
// {
// _ss << "nearA: ";
// for(int i = 0; i < entryIndex_nearA.size(); i ++)
// {
// _ss << entryIndex_nearA[i] << " ";
// }
// _ss << endl;
//
// _ss << "nearB: ";
// for(int i = 0; i < entryIndex_nearB.size(); i ++)
// {
// _ss << entryIndex_nearB[i] << " ";
// }
// _ss << endl;
// }
// Util::logging(_ss.str());
// }
#ifdef DEBUG_VSTREE
stringstream _ss1;
{
_ss1 << "nearA: ";
for(unsigned i = 0; i < entryIndex_nearA.size(); i++)
{
_ss1 << entryIndex_nearA[i] << " ";
}
_ss1 << endl;
for (unsigned i=0;i<entryIndex_nearA.size();i++)
_ss1 << "nearB: ";
for(unsigned i = 0; i < entryIndex_nearB.size(); i++)
{
_ss1 << entryIndex_nearB[i] << " ";
}
_ss1 << endl;
}
Util::logging(_ss1.str());
#endif
for(unsigned i = 0; i < entryIndex_nearA.size(); i++)
{
oldNodePtr->setChildEntry(i, oldNodePtr->getChildEntry(entryIndex_nearA[i]));
oldNodePtr->setChildFileLine(i, oldNodePtr->getChildFileLine(entryIndex_nearA[i]));
@ -699,20 +717,20 @@ void VSTree::split(VNode* _p_node_being_split, const SigEntry& _insert_entry, VN
int oldNode_index = oldNodePtr->getIndexInFatherNode(*(this->node_buffer));
// full node's father pointer.
VNode* oldNodeFatherPtr = oldNodePtr->getFather(*(this->node_buffer));
if (oldNodePtr->isRoot())
if(oldNodePtr->isRoot())
{
/* if the old node is root,
* split the root, create a new root,
* and the tree height will be increased.*/
//if the old node is root,
//split the root, create a new root,
//and the tree height will be increased.
VNode* RootNewPtr = this->createNode();
/* change the old root node to not-root node,
* and set the RootNew to root node.*/
//change the old root node to not-root node,
//and set the RootNew to root node.
oldNodePtr->setAsRoot(false);
RootNewPtr->setAsRoot(true);
/* set the split two node(old node and new node) as the new root's child,
* and update signatures.*/
//set the split two node(old node and new node) as the new root's child,
//and update signatures.
RootNewPtr->addChildNode(oldNodePtr);
RootNewPtr->addChildNode(newNodePtr);
RootNewPtr->refreshSignature();
@ -725,10 +743,10 @@ void VSTree::split(VNode* _p_node_being_split, const SigEntry& _insert_entry, VN
// Util::logging(_ss.str());
// }
/* should keep the root node always being
* at the first line(line zero) of the tree node file.*/
//should keep the root node always being
//at the first line(line zero) of the tree node file.
this->swapNodeFileLine(RootNewPtr, oldNodePtr);
this->height ++;
this->height++;
//debug
// {
@ -742,12 +760,12 @@ void VSTree::split(VNode* _p_node_being_split, const SigEntry& _insert_entry, VN
}
else
{
/* if the (OldNode) is not Root,
* change the old node's signature to A's signature.*/
//if the (OldNode) is not Root,
//change the old node's signature to A's signature.
oldNodeFatherPtr->setChildEntry(oldNode_index, oldNodePtr->getEntry());
if (oldNodeFatherPtr->isFull())
if(oldNodeFatherPtr->isFull())
{
oldNodeFatherPtr->refreshAncestorSignature(*(this->node_buffer));
this->split(oldNodeFatherPtr, newNodePtr->getEntry(), newNodePtr);
@ -778,8 +796,9 @@ void VSTree::split(VNode* _p_node_being_split, const SigEntry& _insert_entry, VN
this->updateEntityID2FileLineMap(newNodePtr);
}
/* create a new node when one node need splitting. */
VNode* VSTree::createNode()
//create a new node when one node need splitting.
VNode*
VSTree::createNode()
{
VNode* newNodePtr = new VNode();
newNodePtr->setFileLine(this->node_num);
@ -790,7 +809,8 @@ VNode* VSTree::createNode()
}
/* swap two nodes' file line, their related nodes(father and children nodes) will also be updated. */
void VSTree::swapNodeFileLine(VNode* _p_node_a, VNode* _p_node_b)
void
VSTree::swapNodeFileLine(VNode* _p_node_a, VNode* _p_node_b)
{
int oldNodeAFileLine = _p_node_a->getFileLine();
int oldNodeBFileLine = _p_node_b->getFileLine();
@ -851,8 +871,9 @@ void VSTree::swapNodeFileLine(VNode* _p_node_a, VNode* _p_node_b)
this->node_buffer->update(newNodeBFileLine, _p_node_b);
}
/* save VSTree's information to tree_info_file_path, such as node_num, entry_num, height, etc. */
bool VSTree::saveTreeInfo()
//save VSTree's information to tree_info_file_path, such as node_num, entry_num, height, etc.
bool
VSTree::saveTreeInfo()
{
FILE* filePtr = fopen(VSTree::tree_info_file_path.c_str(), "wb");
@ -883,8 +904,9 @@ bool VSTree::saveTreeInfo()
return true;
}
/* load VSTree's information from tree_info_file_path. */
bool VSTree::loadTreeInfo()
//load VSTree's information from tree_info_file_path.
bool
VSTree::loadTreeInfo()
{
FILE* filePtr = fopen(VSTree::tree_info_file_path.c_str(), "rb");
@ -924,8 +946,9 @@ bool VSTree::loadTreeInfo()
return true;
}
/* traverse the tree_node_file_path file, load the mapping from entity id to file line. */
bool VSTree::loadEntityID2FileLineMap()
//traverse the tree_node_file_path file, load the mapping from entity id to file line.
bool
VSTree::loadEntityID2FileLineMap()
{
FILE* filePtr = fopen(VSTree::tree_node_file_path.c_str(), "rb");
@ -976,8 +999,9 @@ bool VSTree::loadEntityID2FileLineMap()
return true;
}
/* update the entityID2FileLineMap with the _p_node's child entries, the _p_node should be leaf node. */
void VSTree::updateEntityID2FileLineMap(VNode* _p_node)
//update the entityID2FileLineMap with the _p_node's child entries, the _p_node should be leaf node.
void
VSTree::updateEntityID2FileLineMap(VNode* _p_node)
{
if (_p_node->isLeaf())
{
@ -1001,8 +1025,9 @@ void VSTree::updateEntityID2FileLineMap(VNode* _p_node)
}
}
/* get the leaf node pointer by the given _entityID */
VNode* VSTree::getLeafNodeByEntityID(int _entityID)
//get the leaf node pointer by the given _entityID
VNode*
VSTree::getLeafNodeByEntityID(int _entityID)
{
map<int,int>::iterator iter = this->entityID2FileLineMap.find(_entityID);
@ -1018,12 +1043,16 @@ VNode* VSTree::getLeafNodeByEntityID(int _entityID)
return this->getNode(line);
}
/* retrieve the candidate entity ID which signature can cover the_entity_bit_set, and add them to the _p_id_list. */
void VSTree::retrieveEntity(const EntityBitSet& _entity_bit_set, IDList* _p_id_list)
//retrieve the candidate entity ID which signature can cover the _entity_bit_set, and add them to the _p_id_list.
void
VSTree::retrieveEntity(const EntityBitSet& _entity_bit_set, IDList* _p_id_list)
{
Util::logging("IN retrieveEntity");
EntitySig filterSig(_entity_bit_set);
std::queue<int>nodeFileFileQueue; //searching node file line queue.
#ifdef DEBUG_VSTREE
cerr << "the filter signature: " << filterSig.to_str() << endl;
#endif
queue<int> nodeQueue; //searching node file line queue.
//debug
{
@ -1037,7 +1066,7 @@ void VSTree::retrieveEntity(const EntityBitSet& _entity_bit_set, IDList* _p_id_l
if(root_entry.cover(filterSig))
{
nodeFileFileQueue.push(this->getRoot()->getFileLine());
nodeQueue.push(this->getRoot()->getFileLine());
Util::logging("root cover the filter_sig");
}
else
@ -1051,11 +1080,11 @@ void VSTree::retrieveEntity(const EntityBitSet& _entity_bit_set, IDList* _p_id_l
// Util::logging("Before BFS");
// }
/* using BFS algorithm to traverse the VSTree and retrieve the entry.*/
while (!nodeFileFileQueue.empty())
//using BFS algorithm to traverse the VSTree and retrieve the entry.
while (!nodeQueue.empty())
{
int currentNodeFileLine = nodeFileFileQueue.front();
nodeFileFileQueue.pop();
int currentNodeFileLine = nodeQueue.front();
nodeQueue.pop();
VNode* currentNodePtr = this->getNode(currentNodeFileLine);
int childNum = currentNodePtr->getChildNum();
@ -1077,10 +1106,14 @@ void VSTree::retrieveEntity(const EntityBitSet& _entity_bit_set, IDList* _p_id_l
// }
int valid = 0;
for (int i=0;i<childNum;i++)
for (int i = 0; i < childNum; i++)
{
const SigEntry& entry = currentNodePtr->getChildEntry(i);
#ifdef DEBUG_VSTREE
//cerr << "current entry: " << entry.to_str() << endl;
#endif
if (entry.cover(filterSig))
{
valid++;
@ -1103,7 +1136,7 @@ void VSTree::retrieveEntity(const EntityBitSet& _entity_bit_set, IDList* _p_id_l
//VNode* childPtr = currentNodePtr->getChild(i, *(this->node_buffer));
// if non-leaf node, add the child node file line to the searching queue.
int childNodeFileLine = currentNodePtr->getChildFileLine(i);
nodeFileFileQueue.push(childNodeFileLine);
nodeQueue.push(childNodeFileLine);
//debug
// {
@ -1114,12 +1147,15 @@ void VSTree::retrieveEntity(const EntityBitSet& _entity_bit_set, IDList* _p_id_l
}
}
}
#ifdef DEBUG_VSTREE
//cerr << "child num: " << childNum << " valid num: " << valid << endl;
#endif
}
Util::logging("OUT retrieveEntity");
}
std::string VSTree::to_str()
string
VSTree::to_str()
{
//debug
{
@ -1154,3 +1190,4 @@ std::string VSTree::to_str()
return _ss.str();
}

View File

@ -3,17 +3,17 @@
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-11-01 13:02
# Description:
# Description: originally written by liyouhuan, modified by zengli
=============================================================================*/
#ifndef _VSTREE_VSTREE_H
#define _VSTREE_VSTREE_H
#include "../Util/Util.h"
#include"../Query/SPARQLquery.h"
#include"VNode.h"
#include"LRUCache.h"
#include"EntryBuffer.h"
#include "../Query/SPARQLquery.h"
#include "VNode.h"
#include "LRUCache.h"
#include "EntryBuffer.h"
class VSTree
{
@ -22,35 +22,36 @@ public:
VSTree(std::string _store_path);
~VSTree();
int getHeight()const;
/* build the VSTree from the _entity_signature_file. */
//build the VSTree from the _entity_signature_file.
bool buildTree(std::string _entity_signature_file);
bool deleteTree();
/* Incrementally update bitset of _entity_id
* conduct OR operation on Entry(_entity_id)'s EntityBitSet with _bitset
* Entry of _entity_id must exists */
//Incrementally update bitset of _entity_id conduct OR operation on Entry(_entity_id)'s EntityBitSet with _bitset
//Entry of _entity_id must exists
bool updateEntry(int _entity_id, const EntityBitSet& _bitset);
/* Replace the Entry(_enitty_id)'s EntityBitSet with _bitset
* Entry of _entity_id must exists */
//Replace the Entry(_enitty_id)'s EntityBitSet with _bitset Entry of _entity_id must exists
bool replaceEntry(int _entity_id, const EntityBitSet& _bitset);
/* insert an new Entry, whose entity doesn't exist before */
//insert an new Entry, whose entity doesn't exist before
bool insertEntry(const SigEntry& _entry);
/* remove an existed Entry(_entity_id) from VSTree */
//remove an existed Entry(_entity_id) from VSTree
bool removeEntry(int _entity_id);
/* save the tree information to tree_info_file_path, and flush the tree nodes in memory to tree_node_file_path. */
//save the tree information to tree_info_file_path, and flush the tree nodes in memory to tree_node_file_path.
bool saveTree();
/* load tree from tree_info_file_path and tree_node_file_path files. */
//load tree from tree_info_file_path and tree_node_file_path files.
bool loadTree();
/* get the tree's root node pointer. */
//get the tree's root node pointer.
VNode* getRoot();
/* get the node pointer by its file line. */
//get the node pointer by its file line.
VNode* getNode(int _line);
/* retrieve candidate result set by the var_sig in the _query. */
//retrieve candidate result set by the var_sig in the _query.
void retrieve(SPARQLquery& _query);
//retrieve the candidate entity ID which signature can cover the_entity_bit_set, and add them to the _p_id_list.
void retrieveEntity(const EntityBitSet& _entity_bit_set, IDList* _p_id_list);
private:
int root_file_line;
int node_num;
@ -63,32 +64,29 @@ private:
static std::string tree_node_file_path;
static std::string tree_info_file_path;
/* choose the best leaf node to insert the _entry, return the choosed leaf node's pointer. */
//choose the best leaf node to insert the _entry, return the choosed leaf node's pointer.
VNode* chooseNode(VNode* _p_node, const SigEntry& _entry);
/* split the _p_full_node to two new node when it is full.
* the parameter _insert_entry and _p_insert_node are the entry/node
* need to be insert to the _p_full_node.
*/
//split the _p_full_node to two new node when it is full.
//the parameter _insert_entry and _p_insert_node are the entry/node
//need to be insert to the _p_full_node.
void split(VNode* _p_full_node, const SigEntry& _insert_entry, VNode* _p_insert_node);
/* create a new node when one node need splitting. */
//create a new node when one node need splitting.
VNode* createNode();
/* swap two nodes' file line, their related nodes(father and children nodes) will also be updated. */
//swap two nodes' file line, their related nodes(father and children nodes) will also be updated.
void swapNodeFileLine(VNode* _p_node_a, VNode* _p_node_b);
/* save VSTree's information to tree_info_file_path, such as node_num, entry_num, height, etc. */
//save VSTree's information to tree_info_file_path, such as node_num, entry_num, height, etc.
bool saveTreeInfo();
/* load VSTree's information from tree_info_file_path. */
//load VSTree's information from tree_info_file_path.
bool loadTreeInfo();
/* traverse the tree_node_file_path file, load the mapping from entity id to file line. */
//traverse the tree_node_file_path file, load the mapping from entity id to file line.
bool loadEntityID2FileLineMap();
/* update the entityID2FileLineMap with the _p_node's child entries, the _p_node should be leaf node. */
//update the entityID2FileLineMap with the _p_node's child entries, the _p_node should be leaf node.
void updateEntityID2FileLineMap(VNode* _p_node);
/* get the leaf node pointer by the given _entityID */
//get the leaf node pointer by the given _entityID
VNode* getLeafNodeByEntityID(int _entityID);
/* retrieve the candidate entity ID which signature can cover the_entity_bit_set, and add them to the _p_id_list. */
void retrieveEntity(const EntityBitSet& _entity_bit_set, IDList* _p_id_list);
std::string to_str();
};
#endif // _VSTREE_VSTREE_H
#endif /* VSTREE_H_ */

View File

@ -141,6 +141,37 @@ GstoreConnector::build(string _db_name, string _rdf_file_path)
return false;
}
bool
GstoreConnector::drop(string _db_name)
{
bool connect_return = this->connect();
if (!connect_return)
{
cerr << "connect to server error. @GstoreConnector::unload" << endl;
return false;
}
string cmd = "drop " + _db_name;
bool send_return = this->socket.send(cmd);
if (!send_return)
{
cerr << "send unload command error. @GstoreConnector::unload" << endl;
return false;
}
string recv_msg;
this->socket.recv(recv_msg);
this->disconnect();
cout << recv_msg << endl; //debug
//if (recv_msg == "unload database done.")
//{
//return true;
//}
return true;
}
string
GstoreConnector::query(string _sparql)
{
@ -168,7 +199,7 @@ GstoreConnector::query(string _sparql)
}
string
GstoreConnector::show()
GstoreConnector::show(bool _type)
{
bool connect_return = this->connect();
if (!connect_return)
@ -177,7 +208,15 @@ GstoreConnector::show()
return "connect to server error.";
}
string cmd = "show databases";
string cmd;
if(_type)
{
cmd = "show all";
}
else
{
cmd = "show databases";
}
bool send_return = this->socket.send(cmd);
if (!send_return)
{

View File

@ -24,8 +24,9 @@ public:
bool load(std::string _db_name);
bool unload(std::string _db_name);
bool build(std::string _db_name, std::string _rdf_file_path);
bool drop(std::string _db_name);
std::string query(std::string _sparql);
std::string show();
std::string show(bool _type=false); //show current or all databases
private:
static std::string defaultServerIP;

View File

@ -1 +0,0 @@
*.pyc

View File

@ -1,27 +0,0 @@
# coding: utf-8
# zhangxiaoyang.hit#gmail.com
# github.com/zhangxiaoyang
import sys
sys.path.append('../src')
from GstoreConnector import GstoreConnector
if __name__ == '__main__':
gc = GstoreConnector('127.0.0.1', 3305)
gc.build('LUBM10.db', 'data/LUBM_10.n3')
sparql = '''select ?x where {
?x <rdf:type> <ub:UndergraduateStudent>.
?y <ub:name> <Course1>.
?x <ub:takesCourse> ?y.
?z <ub:teacherOf> ?y.
?z <ub:name> <FullProfessor1>.
?z <ub:worksFor> ?w.
?w <ub:name> <Department0>.
}'''
answer = gc.query(sparql)
print answer
gc.unload('LUBM10.db')
gc.load('LUBM10.db')
answer = gc.query(sparql)
print answer

View File

@ -1,3 +0,0 @@
*
!.gitignore

View File

@ -1,124 +0,0 @@
# coding: utf-8
# zhangxiaoyang.hit#gmail.com
# github.com/zhangxiaoyang
import socket
import traceback
class GstoreConnector:
def _connect(self):
try:
self._sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self._sock.connect((self.ip, self.port))
return True
except Exception, e:
print 'socket connection error. @GstoreConnector.connect'
traceback.print_exc()
return False
def _disconnect(self):
try:
self._sock.close()
return True
except Exception, e:
print 'socket disconnection error. @GstoreConnector.disconnect'
traceback.print_exc()
return False
def _send(self, msg):
data = self._pack(msg)
self._sock.send(data)
return True
def _recv(self):
head = self._sock.recv(4)
context_len = 0
for i in range(4):
context_len |= (ord(head[i]) & 0xFF) << i * 8
data = bytearray()
recv_len = 0
while recv_len < context_len:
chunk = self._sock.recv(context_len - recv_len)
data.extend(chunk)
recv_len += len(chunk)
return data.rstrip('\x00').decode('utf-8')
def _pack(self, msg):
data_context = bytearray()
data_context.extend(msg)
context_len = len(data_context) + 1
data_len = context_len + 4
data = bytearray(data_len)
for i in range(4):
data[i] = chr((context_len >> i * 8) & 0xFF)
for i, _ in enumerate(data_context):
data[i + 4] = data_context[i]
data[data_len - 1] = 0
return data
def _communicate(f):
def wrapper(self, *args, **kwargs):
if not self._connect():
print 'connect to server error. @GstoreConnector.%s' % f.__name__
return False
if f.__name__ == 'build':
cmd = 'import'
elif f.__name__ == 'show':
cmd = 'show databases'
else:
cmd = f.__name__
params = ' '.join(map(lambda x:str(x), args))
full_cmd = ' '.join([
cmd,
params
]).strip()
if not self._send(full_cmd):
print 'send %s command error. @GstoreConnector.build' % cmd
return False
recv_msg = self._recv()
self._disconnect()
succ = {
'load': 'load database done.',
'unload': 'unload database done.',
'import': 'import RDF file to database done.',
'query': None,
'show databases': None,
}
if cmd in succ:
if succ[cmd] == recv_msg:
return True
else:
return recv_msg
return False
return wrapper
def __init__(self, ip='127.0.0.1', port=3305):
self.ip = ip
self.port = port
@_communicate
def load(self, db_name):
pass
@_communicate
def unload(self, db_name):
pass
@_communicate
def build(self, db_name, rdf_file_path):
pass
@_communicate
def query(self, sparql):
pass
@_communicate
def show(self):
pass

4
data/all.sql Normal file
View File

@ -0,0 +1,4 @@
select ?s ?p ?o where
{
?s ?p ?o .
}

5
data/dist.sql Normal file
View File

@ -0,0 +1,5 @@
select ?s1 ?s2 where
{
?s1 <close> <point7> .
?s2 <own> <point7> .
}

5
data/error.sql Normal file
View File

@ -0,0 +1,5 @@
select ?s1 ?s2 ?o where
{
?s1 ?p ?o .
?s2 ?p ?o .
}

33
data/num.nt Normal file
View File

@ -0,0 +1,33 @@
<http://example/alice> <http://xmlns.com/foaf/0.1/givenName> "Alice" .
<http://example/alice> <http://xmlns.com/foaf/0.1/familyName> "Smith" .
<http://example/alice> <http://xmlns.com/foaf/0.1/mbox> <mailto:alice@example.com> .
<http://example/alice> <http://xmlns.com/foaf/0.1/mbox> <mailto:alice@work.example> .
<http://example/alice> <http://xmlns.com/foaf/0.1/salary> "2500"^^<http://www.w3.org/2001/XMLSchema#integer> .
<http://example/alice> <http://xmlns.com/foaf/0.1/height> "161.5"^^<http://www.w3.org/2001/XMLSchema#float> .
<http://example/alice> <http://xmlns.com/foaf/0.1/knows> <http://example/david> .
<http://example/alice> <http://xmlns.com/foaf/0.1/knows> <http://example/bob> .
<http://example/bob> <http://xmlns.com/foaf/0.1/givenName> "Bob" .
<http://example/bob> <http://xmlns.com/foaf/0.1/familyName> "Jones" .
<http://example/bob> <http://xmlns.com/foaf/0.1/mbox> <mailto:bob.Jones@example.com> .
<http://example/bob> <http://xmlns.com/foaf/0.1/salary> "5000"^^<http://www.w3.org/2001/XMLSchema#integer> .
<http://example/bob> <http://xmlns.com/foaf/0.1/height> "171.0"^^<http://www.w3.org/2001/XMLSchema#float> .
<http://example/bob> <http://xmlns.com/foaf/0.1/knows> <http://example/alice> .
<http://example/bob> <http://xmlns.com/foaf/0.1/knows> <http://example/carol> .
<http://example/carol> <http://xmlns.com/foaf/0.1/givenName> "Carol" .
<http://example/carol> <http://xmlns.com/foaf/0.1/familyName> "Smith" .
<http://example/carol> <http://xmlns.com/foaf/0.1/mbox> <mailto:carol@gmail.com> .
<http://example/carol> <http://xmlns.com/foaf/0.1/salary> "2000"^^<http://www.w3.org/2001/XMLSchema#integer> .
<http://example/carol> <http://xmlns.com/foaf/0.1/height> "155.5"^^<http://www.w3.org/2001/XMLSchema#float> .
<http://example/carol> <http://xmlns.com/foaf/0.1/knows> <http://example/bob> .
<http://example/carol> <http://xmlns.com/foaf/0.1/knows> <http://example/david> .
<http://example/david> <http://xmlns.com/foaf/0.1/givenName> "David" .
<http://example/david> <http://xmlns.com/foaf/0.1/familyName> "Williams" .
<http://example/david> <http://xmlns.com/foaf/0.1/mbox> <mailto:david_Williams@microsoft.com> .
<http://example/david> <http://xmlns.com/foaf/0.1/salary> "10000"^^<http://www.w3.org/2001/XMLSchema#integer> .
<http://example/david> <http://xmlns.com/foaf/0.1/height> "178.0"^^<http://www.w3.org/2001/XMLSchema#float> .
<http://example/david> <http://xmlns.com/foaf/0.1/knows> <http://example/carol> .
<http://example/david> <http://xmlns.com/foaf/0.1/knows> <http://example/alice> .

12
data/num0.sql Normal file
View File

@ -0,0 +1,12 @@
PREFIX : <http://example/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT ?nx ?ny ?sx ?sy
{
?x foaf:knows ?y.
?x foaf:givenName ?nx.
?y foaf:givenName ?ny.
?x foaf:salary ?sx.
?y foaf:salary ?sy.
}

13
data/num1.sql Normal file
View File

@ -0,0 +1,13 @@
PREFIX : <http://example/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT ?nx ?ny ?sx ?sy
{
?x foaf:knows ?y.
?x foaf:givenName ?nx.
?y foaf:givenName ?ny.
?x foaf:salary ?sx.
?y foaf:salary ?sy.
FILTER(?sx < ?sy && abs(?sx - ?sy) < "3000"^^xsd:integer)
}

15
data/num2.sql Normal file
View File

@ -0,0 +1,15 @@
PREFIX : <http://example/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT ?nx ?ny ?sx ?sy ?hx ?hy
{
?x foaf:knows ?y.
?x foaf:givenName ?nx.
?y foaf:givenName ?ny.
?x foaf:salary ?sx.
?y foaf:salary ?sy.
?x foaf:height ?hx.
?y foaf:height ?hy.
FILTER(?sx > ?sy && (?hx > ?hy || ?hx >= "170.0"^^xsd:float))
}

4
data/num_pre.sql Normal file
View File

@ -0,0 +1,4 @@
select ?p where
{
<http://example/bob> ?p "Bob" .
}

24
data/small.nt Normal file
View File

@ -0,0 +1,24 @@
<root> <name> "Bookug Lobert" .
<root> <contain> <node0> .
<root> <contain> <node1> .
<root> <contain> <node2> .
<root> <contain> <node3> .
<root> <contain> <node4> .
<node1> <own> <point0> .
<node1> <own> <point1> .
<node2> <own> <point2> .
<node2> <own> <point3> .
<node2> <own> <point4> .
<node3> <own> <point5> .
<node4> <own> <point6> .
<node4> <own> <point7> .
<point0> <from> <point1> .
<point1> <to> <point2> .
<point1> <from> <point3> .
<point4> <similar> <point5> .
<point5> <inherit> <point6> .
<point3> <close> <point7> .
<point6> <close> <point7> .
<point7> <name> "sadhjb wdhcbd sdcjhdsjkc1" .
<point1> <name> "sbsb 12.879" .
<point5> <name> "tobe NO1." .

4
data/small_all.sql Normal file
View File

@ -0,0 +1,4 @@
select ?s ?o where
{
?s ?p ?o .
}

4
data/small_q0.sql Normal file
View File

@ -0,0 +1,4 @@
select ?v0 ?v1 where
{
?v0 <contain> ?v1 .
}

4
data/small_q1.sql Normal file
View File

@ -0,0 +1,4 @@
select ?v0 where
{
?v0 <contain> ?v1 .
}

4
data/small_q2.sql Normal file
View File

@ -0,0 +1,4 @@
select ?s where
{
?s ?p "Bookug Lobert" .
}

6
data/small_q3.sql Normal file
View File

@ -0,0 +1,6 @@
select ?s1 ?s2 ?o1 ?o2 where
{
?s1 ?p1 ?o1 .
?s2 ?p1 ?o2 .
?o1 ?p1 ?o2 .
}

6
data/small_q4.sql Normal file
View File

@ -0,0 +1,6 @@
select ?v0 ?v1 ?v2 where
{
?v0 <own> ?v1 .
?v0 <own> ?v2 .
?v1 <close> ?v2 .
}

6
data/special.sql Normal file
View File

@ -0,0 +1,6 @@
select ?s where
{
?s <to> <point2> .
?s <from> <point3> .
?s <name> "sbsb 12.879" .
}

View File

@ -5,7 +5,7 @@
ybar,
ymajorgrids = true,
ylabel = {Query Response Time (in ms)},
xlabel = {Queries},
%xlabel = {Queries},
symbolic x coords = {$Q_{0}$,$Q_{1}$,$Q_{2}$,$Q_{3}$,$Q_{4}$,$Q_{5}$,$Q_{6}$,$Q_{7}$,$Q_{8}$,$Q_{9}$,$Q_{10}$,$Q_{11}$,$Q_{12}$,$Q_{13}$,$Q_{14}$,$Q_{15}$,$Q_{16}$,$Q_{17}$,$Q_{18}$,$Q_{19}$,$Q_{20}$,$Q_{21}$},
bar width=5pt,
enlarge x limits=0.02,

View File

@ -6,7 +6,7 @@
ybar,
ymajorgrids = true,
ylabel = {Query Response Time (in ms)},
xlabel = {Queries},
%xlabel = {Queries},
symbolic x coords = {$Q_{0}$,$Q_{1}$,$Q_{2}$,$Q_{3}$,$Q_{4}$,$Q_{5}$,$Q_{6}$,$Q_{7}$,$Q_{8}$,$Q_{9}$,$Q_{10}$,$Q_{11}$,$Q_{12}$,$Q_{13}$,$Q_{14}$,$Q_{15}$,$Q_{16}$,$Q_{17}$,$Q_{18}$,$Q_{19}$,$Q_{20}$,$Q_{21}$},
bar width=5pt,
enlarge x limits=0.02,

View File

@ -5,7 +5,7 @@
ybar,
ymajorgrids = true,
ylabel = {Query Response Time (in ms)},
xlabel = {Queries},
%xlabel = {Queries},
symbolic x coords = {$C_{1}$,$C_{2}$,$C_{3}$,$F_{1}$,$F_{2}$,$F_{3}$,$L_{1}$,$L_{2}$,$L_{3}$,$S_{1}$,$S_{2}$,$S_{3}$},
bar width=5pt,
enlarge x limits=0.04,

View File

@ -6,7 +6,7 @@
ybar,
ymajorgrids = true,
ylabel = {Query Response Time (in ms)},
xlabel = {Queries},
%xlabel = {Queries},
symbolic x coords = {$C_{1}$,$C_{2}$,$C_{3}$,$F_{1}$,$F_{2}$,$F_{3}$,$L_{1}$,$L_{2}$,$L_{3}$,$S_{1}$,$S_{2}$,$S_{3}$},
bar width=5pt,
enlarge x limits=0.04,

View File

@ -5,7 +5,7 @@
ybar,
ymajorgrids = true,
ylabel = {Query Response Time (in ms)},
xlabel = {Queries},
%xlabel = {Queries},
symbolic x coords = {$C_{1}$,$C_{2}$,$C_{3}$,$F_{1}$,$F_{2}$,$F_{3}$,$L_{1}$,$L_{2}$,$L_{3}$,$S_{1}$,$S_{2}$,$S_{3}$},
bar width=5pt,
enlarge x limits=0.04,

View File

@ -0,0 +1,25 @@
\begin{tikzpicture} [font=\Large]
\begin{semilogyaxis} [
width = 10cm,
height = 9cm ,
ymax=900000,
ybar,
ymajorgrids = true,
ylabel = {Query Response Time (in ms)},
%xlabel = {Queries},
symbolic x coords = {$Q_{1}$,$Q_{2}$,$Q_{3}$,$Q_{4}$,$Q_{5}$,$Q_{6}$,$Q_{7}$,$Q_{8}$},
bar width=5pt,
enlarge x limits=0.10,
scaled y ticks = true,
legend pos= north west,
legend cell align=left
]
\addplot coordinates {($Q_{1}$, 34) ($Q_{2}$, 31) ($Q_{3}$, 49) ($Q_{4}$, 92688) ($Q_{5}$, 3480) ($Q_{6}$, 30020) ($Q_{7}$, 409) ($Q_{8}$, 109) };
\addplot coordinates {($Q_{1}$, 939) ($Q_{2}$, 26888) ($Q_{3}$, 926) ($Q_{4}$, 50256) ($Q_{5}$, 249) ($Q_{6}$, 2061) ($Q_{7}$, 437) ($Q_{8}$, 837756) };
\addplot coordinates {($Q_{1}$, 1122) ($Q_{2}$, 47059) ($Q_{3}$, 146) ($Q_{4}$, 66916) ($Q_{5}$, 105) ($Q_{6}$, 5654) ($Q_{7}$, 1364) ($Q_{8}$, 392) };
\legend{gStore,Jena,Virtuoso,Sesame}
\end{semilogyaxis}
\end{tikzpicture}

View File

@ -6,7 +6,7 @@
ybar,
ymajorgrids = true,
ylabel = {Query Response Time (in ms)},
xlabel = {Queries},
%xlabel = {Queries},
symbolic x coords = {$Q_{1}$,$Q_{2}$,$Q_{3}$,$Q_{4}$,$Q_{5}$,$Q_{6}$,$Q_{7}$,$Q_{8}$,$Q_{9}$,$Q_{10}$},
bar width=5pt,
enlarge x limits=0.07,

View File

@ -1,6 +1,6 @@
\begin{tikzpicture}[font=\large]
\begin{semilogyaxis}[
width = 10cm,
width = 12cm,
height = 8cm,
ybar,
%ymin = 1,
@ -8,8 +8,8 @@
% ytick = {1,10,100,1000,10000,100000,1000000,10000000},
ymajorgrids = true,
ylabel = {Query Response Time (in ms)},
xlabel = {Queries},
symbolic x coords = {$Q_0$,$Q_1$,$Q_2$,$Q_3$,$Q_4$,$Q_5$,$Q_9$},
%xlabel = {Queries},
symbolic x coords = {$Q_0$,$Q_1$,$Q_2$,$Q_3$,$Q_4$,$Q_5$,$Q_6$},
scaled y ticks = true,
legend pos= north west,
legend cell align=left
@ -18,7 +18,9 @@
\addplot coordinates {($Q_0$, 490) ($Q_1$, 165) ($Q_2$, 166) ($Q_3$, 3727) ($Q_4$, 3847) ($Q_5$, 393) ($Q_9$, 1309775)};
\addplot coordinates {($Q_0$, 50) ($Q_1$, 217) ($Q_2$, 210) ($Q_3$, 23797) ($Q_4$, 5536) ($Q_5$, 2736) ($Q_9$, 9515231)};
\legend{gStore,Jena,Jena}
\legend{gStore,Jena,Virtuoso}
\end{semilogyaxis}
\end{tikzpicture}

View File

@ -0,0 +1,120 @@
\relax
\providecommand\hyper@newdestlabel[2]{}
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
\global\let\oldcontentsline\contentsline
\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
\global\let\oldnewlabel\newlabel
\gdef\newlabel#1#2{\newlabelxx{#1}#2}
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\AtEndDocument{\ifx\hyper@anchor\@undefined
\let\contentsline\oldcontentsline
\let\newlabel\oldnewlabel
\fi}
\fi}
\global\let\hyper@last\relax
\gdef\HyperFirstAtBeginDocument#1{#1}
\providecommand*\HyPL@Entry[1]{}
\HyPL@Entry{0<</S/D>>}
\HyPL@Entry{1<</S/D>>}
\@writefile{toc}{\contentsline {section}{\numberline {1}Preface}{4}{section.1}}
\@writefile{toc}{\contentsline {section}{\numberline {2}Environment Setup}{5}{section.2}}
\@writefile{toc}{\contentsline {section}{\numberline {3}Experiment Result}{7}{section.3}}
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Offline Performance}}{7}{table.1}}
\newlabel{table:loading}{{1}{7}{Offline Performance}{table.1}{}}
\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {BSBM 10000}}}{7}{figure.2}}
\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {BSBM 100000}}}{7}{figure.2}}
\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {LUBM 500}}}{7}{figure.3}}
\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {LUBM 5000}}}{7}{figure.3}}
\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {WatDiv 10M}}}{7}{figure.4}}
\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {WatDiv 100M}}}{7}{figure.4}}
\@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {WatDiv 300M}}}{7}{figure.4}}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Query Performance over DBpedia 2014}}{7}{figure.1}}
\newlabel{fig:dbpedia2014Performance}{{1}{7}{Query Performance over DBpedia 2014}{figure.1}{}}
\newlabel{fig:Bsbm10000Performance}{{2(a)}{8}{Subfigure 2(a)}{subfigure.2.1}{}}
\newlabel{sub@fig:Bsbm10000Performance}{{(a)}{8}{Subfigure 2(a)\relax }{subfigure.2.1}{}}
\newlabel{fig:BSBM100000Performance}{{2(b)}{8}{Subfigure 2(b)}{subfigure.2.2}{}}
\newlabel{sub@fig:BSBM100000Performance}{{(b)}{8}{Subfigure 2(b)\relax }{subfigure.2.2}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Query Performance over Bsbm 10000}}{8}{figure.2}}
\newlabel{fig:BSBMPerformance}{{2}{8}{Query Performance over Bsbm 10000}{figure.2}{}}
\newlabel{fig:LUBM500Performance}{{3(a)}{9}{Subfigure 3(a)}{subfigure.3.1}{}}
\newlabel{sub@fig:LUBM500Performance}{{(a)}{9}{Subfigure 3(a)\relax }{subfigure.3.1}{}}
\newlabel{fig:LUBM5000Performance}{{3(b)}{9}{Subfigure 3(b)}{subfigure.3.2}{}}
\newlabel{sub@fig:LUBM5000Performance}{{(b)}{9}{Subfigure 3(b)\relax }{subfigure.3.2}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Query Performance over LUBM}}{9}{figure.3}}
\newlabel{fig:LUBMPerformance}{{3}{9}{Query Performance over LUBM}{figure.3}{}}
\newlabel{fig:WatDiv10MPerformance}{{4(a)}{10}{Subfigure 4(a)}{subfigure.4.1}{}}
\newlabel{sub@fig:WatDiv10MPerformance}{{(a)}{10}{Subfigure 4(a)\relax }{subfigure.4.1}{}}
\newlabel{fig:WatDiv100MPerformance}{{4(b)}{10}{Subfigure 4(b)}{subfigure.4.2}{}}
\newlabel{sub@fig:WatDiv100MPerformance}{{(b)}{10}{Subfigure 4(b)\relax }{subfigure.4.2}{}}
\newlabel{fig:WatDiv300MPerformance}{{4(c)}{10}{Subfigure 4(c)}{subfigure.4.3}{}}
\newlabel{sub@fig:WatDiv300MPerformance}{{(c)}{10}{Subfigure 4(c)\relax }{subfigure.4.3}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Query Performance over WatDiv}}{10}{figure.4}}
\newlabel{fig:WatDivPerformance}{{4}{10}{Query Performance over WatDiv}{figure.4}{}}
\@writefile{toc}{\contentsline {section}{\numberline {4}Comparative Analysis}{11}{section.4}}
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces WatDiv series}}{11}{table.2}}
\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces LUBM series}}{12}{table.3}}
\@writefile{lot}{\contentsline {table}{\numberline {4}{\ignorespaces BSBM series}}{12}{table.4}}
\@writefile{lot}{\contentsline {table}{\numberline {5}{\ignorespaces DBpedia series}}{13}{table.5}}
\@writefile{toc}{\contentsline {section}{\numberline {5}Conclusion}{14}{section.5}}
\@writefile{toc}{\contentsline {section}{\numberline {6}Prospective}{15}{section.6}}
\@writefile{toc}{\contentsline {section}{\numberline {7}Appendix}{16}{section.7}}
\@writefile{toc}{\contentsline {subsection}{\numberline {7.1}WatDiv queries}{16}{subsection.7.1}}
\newlabel{watdiv}{{7.1}{16}{WatDiv queries}{subsection.7.1}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.1.1}C1.sql}{16}{subsubsection.7.1.1}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.1.2}C2.sql}{16}{subsubsection.7.1.2}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.1.3}C3.sql}{17}{subsubsection.7.1.3}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.1.4}F1.sql}{17}{subsubsection.7.1.4}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.1.5}F2.sql}{17}{subsubsection.7.1.5}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.1.6}F3.sql}{18}{subsubsection.7.1.6}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.1.7}L1.sql}{18}{subsubsection.7.1.7}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.1.8}L2.sql}{18}{subsubsection.7.1.8}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.1.9}L3.sql}{19}{subsubsection.7.1.9}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.1.10}S1.sql}{19}{subsubsection.7.1.10}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.1.11}S2.sql}{20}{subsubsection.7.1.11}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.1.12}S3.sql}{20}{subsubsection.7.1.12}}
\@writefile{toc}{\contentsline {subsection}{\numberline {7.2}LUBM queries}{20}{subsection.7.2}}
\newlabel{lubm}{{7.2}{20}{LUBM queries}{subsection.7.2}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.1}q0.sql}{20}{subsubsection.7.2.1}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.2}q1.sql}{21}{subsubsection.7.2.2}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.3}q2.sql}{21}{subsubsection.7.2.3}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.4}q3.sql}{21}{subsubsection.7.2.4}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.5}q4.sql}{22}{subsubsection.7.2.5}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.6}q5.sql}{22}{subsubsection.7.2.6}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.7}q6.sql}{22}{subsubsection.7.2.7}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.8}q7.sql}{22}{subsubsection.7.2.8}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.9}q8.sql}{23}{subsubsection.7.2.9}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.10}q9.sql}{23}{subsubsection.7.2.10}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.11}q10.sql}{23}{subsubsection.7.2.11}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.12}q11.sql}{24}{subsubsection.7.2.12}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.13}q12.sql}{24}{subsubsection.7.2.13}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.14}q13.sql}{24}{subsubsection.7.2.14}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.15}q14.sql}{25}{subsubsection.7.2.15}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.16}q15.sql}{25}{subsubsection.7.2.16}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.17}q16.sql}{25}{subsubsection.7.2.17}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.18}q17.sql}{26}{subsubsection.7.2.18}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.19}q18.sql}{26}{subsubsection.7.2.19}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.20}q19.sql}{26}{subsubsection.7.2.20}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.21}q20.sql}{26}{subsubsection.7.2.21}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.22}q21.sql}{27}{subsubsection.7.2.22}}
\@writefile{toc}{\contentsline {subsection}{\numberline {7.3}BSBM queries}{27}{subsection.7.3}}
\newlabel{bsbm}{{7.3}{27}{BSBM queries}{subsection.7.3}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.3.1}self0.sql}{27}{subsubsection.7.3.1}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.3.2}self1.sql}{27}{subsubsection.7.3.2}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.3.3}self2.sql}{27}{subsubsection.7.3.3}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.3.4}self3.sql}{28}{subsubsection.7.3.4}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.3.5}self4.sql}{28}{subsubsection.7.3.5}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.3.6}self5.sql}{28}{subsubsection.7.3.6}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.3.7}self6.sql}{29}{subsubsection.7.3.7}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.3.8}self7.sql}{29}{subsubsection.7.3.8}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.3.9}self8.sql}{29}{subsubsection.7.3.9}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.3.10}self9.sql}{30}{subsubsection.7.3.10}}
\@writefile{toc}{\contentsline {subsection}{\numberline {7.4}DBpedia queries}{30}{subsection.7.4}}
\newlabel{dbpedia}{{7.4}{30}{DBpedia queries}{subsection.7.4}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.4.1}q0.sql}{30}{subsubsection.7.4.1}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.4.2}q1.sql}{31}{subsubsection.7.4.2}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.4.3}q2.sql}{31}{subsubsection.7.4.3}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.4.4}q3.sql}{31}{subsubsection.7.4.4}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.4.5}q4.sql}{31}{subsubsection.7.4.5}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.4.6}q5.sql}{32}{subsubsection.7.4.6}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {7.4.7}q9.sql}{32}{subsubsection.7.4.7}}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,62 @@
\BOOKMARK [1][-]{section.1}{Preface}{}% 1
\BOOKMARK [1][-]{section.2}{Environment\040Setup}{}% 2
\BOOKMARK [1][-]{section.3}{Experiment\040Result}{}% 3
\BOOKMARK [1][-]{section.4}{Comparative\040Analysis}{}% 4
\BOOKMARK [1][-]{section.5}{Conclusion}{}% 5
\BOOKMARK [1][-]{section.6}{Prospective}{}% 6
\BOOKMARK [1][-]{section.7}{Appendix}{}% 7
\BOOKMARK [2][-]{subsection.7.1}{WatDiv\040queries}{section.7}% 8
\BOOKMARK [3][-]{subsubsection.7.1.1}{C1.sql}{subsection.7.1}% 9
\BOOKMARK [3][-]{subsubsection.7.1.2}{C2.sql}{subsection.7.1}% 10
\BOOKMARK [3][-]{subsubsection.7.1.3}{C3.sql}{subsection.7.1}% 11
\BOOKMARK [3][-]{subsubsection.7.1.4}{F1.sql}{subsection.7.1}% 12
\BOOKMARK [3][-]{subsubsection.7.1.5}{F2.sql}{subsection.7.1}% 13
\BOOKMARK [3][-]{subsubsection.7.1.6}{F3.sql}{subsection.7.1}% 14
\BOOKMARK [3][-]{subsubsection.7.1.7}{L1.sql}{subsection.7.1}% 15
\BOOKMARK [3][-]{subsubsection.7.1.8}{L2.sql}{subsection.7.1}% 16
\BOOKMARK [3][-]{subsubsection.7.1.9}{L3.sql}{subsection.7.1}% 17
\BOOKMARK [3][-]{subsubsection.7.1.10}{S1.sql}{subsection.7.1}% 18
\BOOKMARK [3][-]{subsubsection.7.1.11}{S2.sql}{subsection.7.1}% 19
\BOOKMARK [3][-]{subsubsection.7.1.12}{S3.sql}{subsection.7.1}% 20
\BOOKMARK [2][-]{subsection.7.2}{LUBM\040queries}{section.7}% 21
\BOOKMARK [3][-]{subsubsection.7.2.1}{q0.sql}{subsection.7.2}% 22
\BOOKMARK [3][-]{subsubsection.7.2.2}{q1.sql}{subsection.7.2}% 23
\BOOKMARK [3][-]{subsubsection.7.2.3}{q2.sql}{subsection.7.2}% 24
\BOOKMARK [3][-]{subsubsection.7.2.4}{q3.sql}{subsection.7.2}% 25
\BOOKMARK [3][-]{subsubsection.7.2.5}{q4.sql}{subsection.7.2}% 26
\BOOKMARK [3][-]{subsubsection.7.2.6}{q5.sql}{subsection.7.2}% 27
\BOOKMARK [3][-]{subsubsection.7.2.7}{q6.sql}{subsection.7.2}% 28
\BOOKMARK [3][-]{subsubsection.7.2.8}{q7.sql}{subsection.7.2}% 29
\BOOKMARK [3][-]{subsubsection.7.2.9}{q8.sql}{subsection.7.2}% 30
\BOOKMARK [3][-]{subsubsection.7.2.10}{q9.sql}{subsection.7.2}% 31
\BOOKMARK [3][-]{subsubsection.7.2.11}{q10.sql}{subsection.7.2}% 32
\BOOKMARK [3][-]{subsubsection.7.2.12}{q11.sql}{subsection.7.2}% 33
\BOOKMARK [3][-]{subsubsection.7.2.13}{q12.sql}{subsection.7.2}% 34
\BOOKMARK [3][-]{subsubsection.7.2.14}{q13.sql}{subsection.7.2}% 35
\BOOKMARK [3][-]{subsubsection.7.2.15}{q14.sql}{subsection.7.2}% 36
\BOOKMARK [3][-]{subsubsection.7.2.16}{q15.sql}{subsection.7.2}% 37
\BOOKMARK [3][-]{subsubsection.7.2.17}{q16.sql}{subsection.7.2}% 38
\BOOKMARK [3][-]{subsubsection.7.2.18}{q17.sql}{subsection.7.2}% 39
\BOOKMARK [3][-]{subsubsection.7.2.19}{q18.sql}{subsection.7.2}% 40
\BOOKMARK [3][-]{subsubsection.7.2.20}{q19.sql}{subsection.7.2}% 41
\BOOKMARK [3][-]{subsubsection.7.2.21}{q20.sql}{subsection.7.2}% 42
\BOOKMARK [3][-]{subsubsection.7.2.22}{q21.sql}{subsection.7.2}% 43
\BOOKMARK [2][-]{subsection.7.3}{BSBM\040queries}{section.7}% 44
\BOOKMARK [3][-]{subsubsection.7.3.1}{self0.sql}{subsection.7.3}% 45
\BOOKMARK [3][-]{subsubsection.7.3.2}{self1.sql}{subsection.7.3}% 46
\BOOKMARK [3][-]{subsubsection.7.3.3}{self2.sql}{subsection.7.3}% 47
\BOOKMARK [3][-]{subsubsection.7.3.4}{self3.sql}{subsection.7.3}% 48
\BOOKMARK [3][-]{subsubsection.7.3.5}{self4.sql}{subsection.7.3}% 49
\BOOKMARK [3][-]{subsubsection.7.3.6}{self5.sql}{subsection.7.3}% 50
\BOOKMARK [3][-]{subsubsection.7.3.7}{self6.sql}{subsection.7.3}% 51
\BOOKMARK [3][-]{subsubsection.7.3.8}{self7.sql}{subsection.7.3}% 52
\BOOKMARK [3][-]{subsubsection.7.3.9}{self8.sql}{subsection.7.3}% 53
\BOOKMARK [3][-]{subsubsection.7.3.10}{self9.sql}{subsection.7.3}% 54
\BOOKMARK [2][-]{subsection.7.4}{DBpedia\040queries}{section.7}% 55
\BOOKMARK [3][-]{subsubsection.7.4.1}{q0.sql}{subsection.7.4}% 56
\BOOKMARK [3][-]{subsubsection.7.4.2}{q1.sql}{subsection.7.4}% 57
\BOOKMARK [3][-]{subsubsection.7.4.3}{q2.sql}{subsection.7.4}% 58
\BOOKMARK [3][-]{subsubsection.7.4.4}{q3.sql}{subsection.7.4}% 59
\BOOKMARK [3][-]{subsubsection.7.4.5}{q4.sql}{subsection.7.4}% 60
\BOOKMARK [3][-]{subsubsection.7.4.6}{q5.sql}{subsection.7.4}% 61
\BOOKMARK [3][-]{subsubsection.7.4.7}{q9.sql}{subsection.7.4}% 62

Binary file not shown.

Binary file not shown.

View File

@ -168,7 +168,7 @@ for different systems.
\begin{threeparttable}
\begin{tabular}{|c||c|c|c||c|c|c|}
\hline
& \multicolumn{3}{c||}{Index Size(KB)}& \multicolumn{3}{c|}{Loading Time(second)}\\
& \multicolumn{3}{c||}{Index Size(KB)}& \multicolumn{3}{c|}{Loading Time(ms)}\\
\hline
\hline
Datasets & gStore & Jena& Virtuoso& gStore & Jena& Virtuoso\\
@ -177,6 +177,8 @@ for different systems.
\hline
Bsbm 10000 & 1,814,480 & 718,024 & 2,080,000 & 244,153 & 76,000 & 59999 \\
\hline
Bsbm 100000 & 12,369,232 & 7,007,988 & 4,390,000 & 2,259,036 & 681,000 & 507,647 \\
\hline
LUBM 500 &2,171,084 &1,022,528 & 38,000,000 & 291,382& 94,000 &100,532 \\
\hline
LUBM 5000 & 23,397,548& 10,262,524 & - & 3,767,764 &1,098,000 & - \\
@ -190,14 +192,14 @@ for different systems.
\end{tabular}
\begin{tablenotes}
\small
\item[$1$] ``-'' means that loading does not terminate in 10 hour
\item[$1$] ``-'' means that loading does not terminate in 20 hour
\end{tablenotes}
\end{threeparttable}
\caption{Offline Performance}
\label{table:loading}
\end{table}
The performance of different database management systems is shown in Figures \ref{fig:dbpedia2014Performance}, \ref{fig:BsbmPerformance}, \ref{fig:LUBMPerformance} and \ref{fig:WatDivPerformance}.
The performance of different database management systems is shown in Figures \ref{fig:dbpedia2014Performance}, \ref{fig:Bsbm10000Performance}, \ref{fig:LUBMPerformance} and \ref{fig:WatDivPerformance}.
\begin{figure}[b]%
\resizebox{0.48\columnwidth}{!}{
@ -208,11 +210,21 @@ The performance of different database management systems is shown in Figures \re
\end{figure}
\begin{figure}%
\resizebox{0.8\columnwidth}{!}{
\input{bsbm_comparison}
\subfigure[BSBM 10000]{%
\resizebox{0.8\columnwidth}{!}{
\input{bsbm10000_comparison}
}
\label{fig:Bsbm10000Performance}%
}
\caption{Query Performance over Bsbm 10000}%
\label{fig:BsbmPerformance}
\\
\subfigure[BSBM 100000]{%
\resizebox{0.8\columnwidth}{!}{
\input{bsbm100000_comparison}
}
\label{fig:BSBM100000Performance}%
}
\caption{Query Performance over Bsbm}%
\label{fig:BSBMPerformance}
\end{figure}
%\clearpage

View File

@ -0,0 +1,62 @@
\contentsline {section}{\numberline {1}Preface}{4}{section.1}
\contentsline {section}{\numberline {2}Environment Setup}{5}{section.2}
\contentsline {section}{\numberline {3}Experiment Result}{7}{section.3}
\contentsline {section}{\numberline {4}Comparative Analysis}{11}{section.4}
\contentsline {section}{\numberline {5}Conclusion}{14}{section.5}
\contentsline {section}{\numberline {6}Prospective}{15}{section.6}
\contentsline {section}{\numberline {7}Appendix}{16}{section.7}
\contentsline {subsection}{\numberline {7.1}WatDiv queries}{16}{subsection.7.1}
\contentsline {subsubsection}{\numberline {7.1.1}C1.sql}{16}{subsubsection.7.1.1}
\contentsline {subsubsection}{\numberline {7.1.2}C2.sql}{16}{subsubsection.7.1.2}
\contentsline {subsubsection}{\numberline {7.1.3}C3.sql}{17}{subsubsection.7.1.3}
\contentsline {subsubsection}{\numberline {7.1.4}F1.sql}{17}{subsubsection.7.1.4}
\contentsline {subsubsection}{\numberline {7.1.5}F2.sql}{17}{subsubsection.7.1.5}
\contentsline {subsubsection}{\numberline {7.1.6}F3.sql}{18}{subsubsection.7.1.6}
\contentsline {subsubsection}{\numberline {7.1.7}L1.sql}{18}{subsubsection.7.1.7}
\contentsline {subsubsection}{\numberline {7.1.8}L2.sql}{18}{subsubsection.7.1.8}
\contentsline {subsubsection}{\numberline {7.1.9}L3.sql}{19}{subsubsection.7.1.9}
\contentsline {subsubsection}{\numberline {7.1.10}S1.sql}{19}{subsubsection.7.1.10}
\contentsline {subsubsection}{\numberline {7.1.11}S2.sql}{20}{subsubsection.7.1.11}
\contentsline {subsubsection}{\numberline {7.1.12}S3.sql}{20}{subsubsection.7.1.12}
\contentsline {subsection}{\numberline {7.2}LUBM queries}{20}{subsection.7.2}
\contentsline {subsubsection}{\numberline {7.2.1}q0.sql}{20}{subsubsection.7.2.1}
\contentsline {subsubsection}{\numberline {7.2.2}q1.sql}{21}{subsubsection.7.2.2}
\contentsline {subsubsection}{\numberline {7.2.3}q2.sql}{21}{subsubsection.7.2.3}
\contentsline {subsubsection}{\numberline {7.2.4}q3.sql}{21}{subsubsection.7.2.4}
\contentsline {subsubsection}{\numberline {7.2.5}q4.sql}{22}{subsubsection.7.2.5}
\contentsline {subsubsection}{\numberline {7.2.6}q5.sql}{22}{subsubsection.7.2.6}
\contentsline {subsubsection}{\numberline {7.2.7}q6.sql}{22}{subsubsection.7.2.7}
\contentsline {subsubsection}{\numberline {7.2.8}q7.sql}{22}{subsubsection.7.2.8}
\contentsline {subsubsection}{\numberline {7.2.9}q8.sql}{23}{subsubsection.7.2.9}
\contentsline {subsubsection}{\numberline {7.2.10}q9.sql}{23}{subsubsection.7.2.10}
\contentsline {subsubsection}{\numberline {7.2.11}q10.sql}{23}{subsubsection.7.2.11}
\contentsline {subsubsection}{\numberline {7.2.12}q11.sql}{24}{subsubsection.7.2.12}
\contentsline {subsubsection}{\numberline {7.2.13}q12.sql}{24}{subsubsection.7.2.13}
\contentsline {subsubsection}{\numberline {7.2.14}q13.sql}{24}{subsubsection.7.2.14}
\contentsline {subsubsection}{\numberline {7.2.15}q14.sql}{25}{subsubsection.7.2.15}
\contentsline {subsubsection}{\numberline {7.2.16}q15.sql}{25}{subsubsection.7.2.16}
\contentsline {subsubsection}{\numberline {7.2.17}q16.sql}{25}{subsubsection.7.2.17}
\contentsline {subsubsection}{\numberline {7.2.18}q17.sql}{26}{subsubsection.7.2.18}
\contentsline {subsubsection}{\numberline {7.2.19}q18.sql}{26}{subsubsection.7.2.19}
\contentsline {subsubsection}{\numberline {7.2.20}q19.sql}{26}{subsubsection.7.2.20}
\contentsline {subsubsection}{\numberline {7.2.21}q20.sql}{26}{subsubsection.7.2.21}
\contentsline {subsubsection}{\numberline {7.2.22}q21.sql}{27}{subsubsection.7.2.22}
\contentsline {subsection}{\numberline {7.3}BSBM queries}{27}{subsection.7.3}
\contentsline {subsubsection}{\numberline {7.3.1}self0.sql}{27}{subsubsection.7.3.1}
\contentsline {subsubsection}{\numberline {7.3.2}self1.sql}{27}{subsubsection.7.3.2}
\contentsline {subsubsection}{\numberline {7.3.3}self2.sql}{27}{subsubsection.7.3.3}
\contentsline {subsubsection}{\numberline {7.3.4}self3.sql}{28}{subsubsection.7.3.4}
\contentsline {subsubsection}{\numberline {7.3.5}self4.sql}{28}{subsubsection.7.3.5}
\contentsline {subsubsection}{\numberline {7.3.6}self5.sql}{28}{subsubsection.7.3.6}
\contentsline {subsubsection}{\numberline {7.3.7}self6.sql}{29}{subsubsection.7.3.7}
\contentsline {subsubsection}{\numberline {7.3.8}self7.sql}{29}{subsubsection.7.3.8}
\contentsline {subsubsection}{\numberline {7.3.9}self8.sql}{29}{subsubsection.7.3.9}
\contentsline {subsubsection}{\numberline {7.3.10}self9.sql}{30}{subsubsection.7.3.10}
\contentsline {subsection}{\numberline {7.4}DBpedia queries}{30}{subsection.7.4}
\contentsline {subsubsection}{\numberline {7.4.1}q0.sql}{30}{subsubsection.7.4.1}
\contentsline {subsubsection}{\numberline {7.4.2}q1.sql}{31}{subsubsection.7.4.2}
\contentsline {subsubsection}{\numberline {7.4.3}q2.sql}{31}{subsubsection.7.4.3}
\contentsline {subsubsection}{\numberline {7.4.4}q3.sql}{31}{subsubsection.7.4.4}
\contentsline {subsubsection}{\numberline {7.4.5}q4.sql}{31}{subsubsection.7.4.5}
\contentsline {subsubsection}{\numberline {7.4.6}q5.sql}{32}{subsubsection.7.4.6}
\contentsline {subsubsection}{\numberline {7.4.7}q9.sql}{32}{subsubsection.7.4.7}

188
docs/latex/gStore_help.aux Normal file
View File

@ -0,0 +1,188 @@
\relax
\providecommand\hyper@newdestlabel[2]{}
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
\global\let\oldcontentsline\contentsline
\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
\global\let\oldnewlabel\newlabel
\gdef\newlabel#1#2{\newlabelxx{#1}#2}
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\AtEndDocument{\ifx\hyper@anchor\@undefined
\let\contentsline\oldcontentsline
\let\newlabel\oldnewlabel
\fi}
\fi}
\global\let\hyper@last\relax
\gdef\HyperFirstAtBeginDocument#1{#1}
\providecommand*\HyPL@Entry[1]{}
\HyPL@Entry{0<</S/D>>}
\HyPL@Entry{1<</S/D>>}
\@writefile{toc}{\contentsline {section}{Preface}{5}{section*.2}}
\@writefile{toc}{\contentsline {part}{I\hspace {1em}Start}{7}{part.1}}
\@writefile{toc}{\contentsline {subsection}{Chapter 00: A Quick Tour}{7}{section*.3}}
\newlabel{chapter00}{{I}{7}{Chapter 00: A Quick Tour}{section*.3}{}}
\@writefile{toc}{\contentsline {subsubsection}{Getting Started}{7}{section*.4}}
\newlabel{getting-started}{{I}{7}{Getting Started}{section*.4}{}}
\@writefile{toc}{\contentsline {subsubsection}{Advanced Help}{8}{section*.5}}
\newlabel{advanced-help}{{I}{8}{Advanced Help}{section*.5}{}}
\@writefile{toc}{\contentsline {subsubsection}{Other Business}{8}{section*.6}}
\newlabel{other-business}{{I}{8}{Other Business}{section*.6}{}}
\gdef \LT@i {\LT@entry
{2}{93.61201pt}\LT@entry
{2}{223.72803pt}}
\gdef \FBLTpage@i {\gdef\flrow@LTlastpage{1}}
\@writefile{toc}{\contentsline {subsection}{Chapter 01: System Requirements}{10}{section*.7}}
\newlabel{chapter01}{{I}{10}{Chapter 01: System Requirements}{section*.7}{}}
\@writefile{lot}{\contentsline {table}{\numberline {1}{software requirement}}{10}{table.1}}
\@writefile{toc}{\contentsline {subsection}{Chapter 02: Basic Introduction}{12}{section*.8}}
\newlabel{chapter02}{{I}{12}{Chapter 02: Basic Introduction}{section*.8}{}}
\@writefile{toc}{\contentsline {subsubsection}{What Is gStore}{12}{section*.9}}
\newlabel{what-is-gstore}{{I}{12}{What Is gStore}{section*.9}{}}
\@writefile{toc}{\contentsline {subsubsection}{Why gStore}{12}{section*.10}}
\newlabel{why-gstore}{{I}{12}{Why gStore}{section*.10}{}}
\@writefile{toc}{\contentsline {subsubsection}{Open Source}{13}{section*.11}}
\newlabel{open-source}{{I}{13}{Open Source}{section*.11}{}}
\@writefile{toc}{\contentsline {subsection}{Chapter 03: Install Guide}{14}{section*.12}}
\newlabel{chapter03}{{I}{14}{Chapter 03: Install Guide}{section*.12}{}}
\@writefile{toc}{\contentsline {subsection}{Chapter 04: How To Use}{15}{section*.13}}
\newlabel{chapter04}{{I}{15}{Chapter 04: How To Use}{section*.13}{}}
\newlabel{0-gconsole}{{I}{15}{0. gconsole}{section*.14}{}}
\@writefile{toc}{\contentsline {paragraph}{0. gconsole}{15}{section*.14}}
\newlabel{1-gload}{{I}{18}{1. gload}{section*.15}{}}
\@writefile{toc}{\contentsline {paragraph}{1. gload}{18}{section*.15}}
\newlabel{2-gquery}{{I}{18}{2. gquery}{section*.16}{}}
\@writefile{toc}{\contentsline {paragraph}{2. gquery}{18}{section*.16}}
\newlabel{3-gserver}{{I}{20}{3. gserver}{section*.17}{}}
\@writefile{toc}{\contentsline {paragraph}{3. gserver}{20}{section*.17}}
\newlabel{4-gclient}{{I}{21}{4. gclient}{section*.18}{}}
\@writefile{toc}{\contentsline {paragraph}{4. gclient}{21}{section*.18}}
\newlabel{5-test-utilities}{{I}{22}{5. test utilities}{section*.19}{}}
\@writefile{toc}{\contentsline {paragraph}{5. test utilities}{22}{section*.19}}
\@writefile{toc}{\contentsline {part}{II\hspace {1em}Advanced}{24}{part.2}}
\@writefile{toc}{\contentsline {subsection}{Chapter 05: API Explanation}{24}{section*.20}}
\newlabel{chapter05}{{II}{24}{Chapter 05: API Explanation}{section*.20}{}}
\@writefile{toc}{\contentsline {subsubsection}{Easy Examples}{24}{section*.21}}
\newlabel{easy-examples}{{II}{24}{Easy Examples}{section*.21}{}}
\@writefile{toc}{\contentsline {subsubsection}{API structure}{25}{section*.22}}
\newlabel{api-structure}{{II}{25}{API structure}{section*.22}{}}
\@writefile{toc}{\contentsline {subsubsection}{C++ API}{26}{section*.23}}
\newlabel{c-api}{{II}{26}{C++ API}{section*.23}{}}
\newlabel{interface}{{II}{26}{Interface}{section*.24}{}}
\@writefile{toc}{\contentsline {paragraph}{Interface}{26}{section*.24}}
\newlabel{compile}{{II}{27}{Compile}{section*.25}{}}
\@writefile{toc}{\contentsline {paragraph}{Compile}{27}{section*.25}}
\@writefile{toc}{\contentsline {subsubsection}{Java API}{28}{section*.26}}
\newlabel{java-api}{{II}{28}{Java API}{section*.26}{}}
\newlabel{interface-1}{{II}{28}{Interface}{section*.27}{}}
\@writefile{toc}{\contentsline {paragraph}{Interface}{28}{section*.27}}
\newlabel{compile-1}{{II}{30}{Compile}{section*.28}{}}
\@writefile{toc}{\contentsline {paragraph}{Compile}{30}{section*.28}}
\@writefile{toc}{\contentsline {subsection}{Chapter 06: Project Structure}{31}{section*.29}}
\newlabel{chapter06}{{II}{31}{Chapter 06: Project Structure}{section*.29}{}}
\newlabel{the-core-source-codes}{{II}{31}{The core source codes are listed below:}{section*.30}{}}
\@writefile{toc}{\contentsline {paragraph}{The core source codes are listed below:}{31}{section*.30}}
\newlabel{the-parser-part}{{II}{33}{The parser part is listed below:}{section*.31}{}}
\@writefile{toc}{\contentsline {paragraph}{The parser part is listed below:}{33}{section*.31}}
\newlabel{the-utilities}{{II}{34}{The utilities are listed below:}{section*.32}{}}
\@writefile{toc}{\contentsline {paragraph}{The utilities are listed below:}{34}{section*.32}}
\newlabel{the-interface-part}{{II}{34}{The interface part is listed below:}{section*.33}{}}
\@writefile{toc}{\contentsline {paragraph}{The interface part is listed below:}{34}{section*.33}}
\newlabel{more-details}{{II}{35}{More details}{section*.34}{}}
\@writefile{toc}{\contentsline {paragraph}{More details}{35}{section*.34}}
\newlabel{others}{{II}{36}{Others}{section*.35}{}}
\@writefile{toc}{\contentsline {paragraph}{Others}{36}{section*.35}}
\@writefile{toc}{\contentsline {subsection}{Chapter 07: Publications}{38}{section*.36}}
\newlabel{chapter07}{{II}{38}{Chapter 07: Publications}{section*.36}{}}
\newlabel{publications-related-with-gstore-are-listed-here}{{II}{38}{Publications related with gStore are listed here:}{section*.37}{}}
\@writefile{toc}{\contentsline {paragraph}{Publications related with gStore are listed here:}{38}{section*.37}}
\@writefile{toc}{\contentsline {subsection}{Chapter 08: Limitations}{39}{section*.38}}
\newlabel{chapter08}{{II}{39}{Chapter 08: Limitations}{section*.38}{}}
\@writefile{toc}{\contentsline {subsection}{Chapter 09: Frequently Asked Questions}{40}{section*.39}}
\newlabel{chapter09}{{II}{40}{Chapter 09: Frequently Asked Questions}{section*.39}{}}
\newlabel{when-i-use-the-newer-gstore-system-to-query-the-original-database-why-error}{{II}{40}{When I use the newer gStore system to query the original database, why error?}{section*.40}{}}
\@writefile{toc}{\contentsline {paragraph}{When I use the newer gStore system to query the original database, why error?}{40}{section*.40}}
\newlabel{why-error-when-i-try-to-write-programs-based-on-gstore-just-like-the-maingconsolecpp}{{II}{40}{Why error when I try to write programs based on gStore, just like the Main/gconsole.cpp?}{section*.41}{}}
\@writefile{toc}{\contentsline {paragraph}{Why error when I try to write programs based on gStore, just like the Main/gconsole.cpp?}{40}{section*.41}}
\newlabel{why-does-gstore-report-garbage-collection-failed-error-when-i-use-teh-java-api}{{II}{40}{\texorpdfstring {Why does gStore report ``garbage collection failed'' error when I use the Java API?}{Why does gStore report garbage collection failed error when I use teh Java API?}}{section*.42}{}}
\@writefile{toc}{\contentsline {paragraph}{Why does gStore report ``garbage collection failed'' error when I use the Java API?}{40}{section*.42}}
\newlabel{when-i-compile-the-code-in-archlinux-why-the-error-that-no-ltermcap-is-reported}{{II}{40}{\texorpdfstring {When I compile the code in ArchLinux, why the error that ``no -ltermcap'' is reported?}{When I compile the code in ArchLinux, why the error that no -ltermcap is reported?}}{section*.43}{}}
\@writefile{toc}{\contentsline {paragraph}{When I compile the code in ArchLinux, why the error that ``no -ltermcap'' is reported?}{40}{section*.43}}
\newlabel{why-does-gstore-report-errors-that-the-format-of-some-rdf-datasets-are-not-supported}{{II}{40}{Why does gStore report errors that the format of some RDF datasets are not supported?}{section*.44}{}}
\@writefile{toc}{\contentsline {paragraph}{Why does gStore report errors that the format of some RDF datasets are not supported?}{40}{section*.44}}
\newlabel{when-i-read-on-github-why-are-some-documents-unable-to-be-opened}{{II}{41}{When I read on GitHub, why are some documents unable to be opened?}{section*.45}{}}
\@writefile{toc}{\contentsline {paragraph}{When I read on GitHub, why are some documents unable to be opened?}{41}{section*.45}}
\newlabel{why-sometimes-strange-characters-appear-when-i-use-gstore}{{II}{41}{Why sometimes strange characters appear when I use gStore?}{section*.46}{}}
\@writefile{toc}{\contentsline {paragraph}{Why sometimes strange characters appear when I use gStore?}{41}{section*.46}}
\newlabel{in-centos7-if-the-watdivdba-generated-database-after-gload-is-copied-or-compresseduncompressed-the-size-of-watdivdb-will-be-differentgenerally-increasing-if-using-du-h-command-to-check}{{II}{41}{\texorpdfstring {In centos7, if the watdiv.db(a generated database after gload) is copied or compressed/uncompressed, the size of watdiv.db will be different(generally increasing) if using \texttt {du\ -h} command to check?}{In centos7, if the watdiv.db(a generated database after gload) is copied or compressed/uncompressed, the size of watdiv.db will be different(generally increasing) if using du -h command to check?}}{section*.47}{}}
\@writefile{toc}{\contentsline {paragraph}{In centos7, if the watdiv.db(a generated database after gload) is copied or compressed/uncompressed, the size of watdiv.db will be different(generally increasing) if using \texttt {du\ -h} command to check?}{41}{section*.47}}
\newlabel{in-gclient-console-a-database-is-built-queried-and-then-i-quit-the-console-next-time-i-enter-the-console-load-the-originally-imported-database-but-no-output-for-any-queriesoriginally-the-output-is-not-empty}{{II}{42}{In gclient console, a database is built, queried, and then I quit the console. Next time I enter the console, load the originally imported database, but no output for any queries(originally the output is not empty)?}{section*.48}{}}
\@writefile{toc}{\contentsline {paragraph}{In gclient console, a database is built, queried, and then I quit the console. Next time I enter the console, load the originally imported database, but no output for any queries(originally the output is not empty)?}{42}{section*.48}}
\newlabel{if-query-results-contain-null-value-how-can-i-use-the-fulltest-utility-tab-separated-method-will-cause-problem-here-because-null-value-cannot-be-checked}{{II}{42}{\texorpdfstring {If query results contain null value, how can I use the \href {run:../../test/full_test.sh}{full\_test} utility? Tab separated method will cause problem here because null value cannot be checked!}{If query results contain null value, how can I use the full\_test utility? Tab separated method will cause problem here because null value cannot be checked!}}{section*.49}{}}
\@writefile{toc}{\contentsline {paragraph}{If query results contain null value, how can I use the \href {run:../../test/full_test.sh}{full\_test} utility? Tab separated method will cause problem here because null value cannot be checked!}{42}{section*.49}}
\newlabel{when-i-compile-and-run-the-api-examples-it-reports-the-unable-to-connect-to-server-error}{{II}{42}{\texorpdfstring {When I compile and run the API examples, it reports the ``unable to connect to server'' error?}{When I compile and run the API examples, it reports the unable to connect to server error?}}{section*.50}{}}
\@writefile{toc}{\contentsline {paragraph}{When I compile and run the API examples, it reports the ``unable to connect to server'' error?}{42}{section*.50}}
\newlabel{when-i-use-the-java-api-to-write-my-own-program-it-reports-not-found-main-class-error}{{II}{42}{\texorpdfstring {When I use the Java API to write my own program, it reports ``not found main class'' error?}{When I use the Java API to write my own program, it reports not found main class error?}}{section*.51}{}}
\@writefile{toc}{\contentsline {paragraph}{When I use the Java API to write my own program, it reports ``not found main class'' error?}{42}{section*.51}}
\@writefile{toc}{\contentsline {subsection}{Chapter 10: Recipe Book}{43}{section*.52}}
\newlabel{chapter10}{{II}{43}{Chapter 10: Recipe Book}{section*.52}{}}
\@writefile{toc}{\contentsline {part}{III\hspace {1em}Others}{44}{part.3}}
\@writefile{toc}{\contentsline {subsection}{Chapter 11: Contributors}{44}{section*.53}}
\newlabel{chapter11}{{III}{44}{Chapter 11: Contributors}{section*.53}{}}
\newlabel{faculty}{{III}{44}{Faculty}{section*.54}{}}
\@writefile{toc}{\contentsline {paragraph}{Faculty}{44}{section*.54}}
\newlabel{students}{{III}{44}{Students}{section*.55}{}}
\@writefile{toc}{\contentsline {paragraph}{Students}{44}{section*.55}}
\newlabel{alumni}{{III}{45}{Alumni}{section*.56}{}}
\@writefile{toc}{\contentsline {paragraph}{Alumni}{45}{section*.56}}
\@writefile{toc}{\contentsline {subsection}{Chapter 12: Updated Logs}{46}{section*.57}}
\newlabel{chapter12}{{III}{46}{Chapter 12: Updated Logs}{section*.57}{}}
\@writefile{toc}{\contentsline {subsubsection}{Apr 01, 2016}{46}{section*.58}}
\newlabel{apr-01-2016}{{III}{46}{Apr 01, 2016}{section*.58}{}}
\@writefile{toc}{\contentsline {subsubsection}{Nov 06, 2015}{46}{section*.59}}
\newlabel{nov-06-2015}{{III}{46}{Nov 06, 2015}{section*.59}{}}
\@writefile{toc}{\contentsline {subsubsection}{Oct 20, 2015}{47}{section*.60}}
\newlabel{oct-20-2015}{{III}{47}{Oct 20, 2015}{section*.60}{}}
\@writefile{toc}{\contentsline {subsubsection}{Sep 25, 2015}{47}{section*.61}}
\newlabel{sep-25-2015}{{III}{47}{Sep 25, 2015}{section*.61}{}}
\@writefile{toc}{\contentsline {subsubsection}{Feb 2, 2015}{47}{section*.62}}
\newlabel{feb-2-2015}{{III}{47}{Feb 2, 2015}{section*.62}{}}
\@writefile{toc}{\contentsline {subsubsection}{Dec 11, 2014}{48}{section*.63}}
\newlabel{dec-11-2014}{{III}{48}{Dec 11, 2014}{section*.63}{}}
\@writefile{toc}{\contentsline {subsubsection}{Nov 20, 2014}{48}{section*.64}}
\newlabel{nov-20-2014}{{III}{48}{Nov 20, 2014}{section*.64}{}}
\@writefile{toc}{\contentsline {subsection}{Chapter 13: Test Result}{49}{section*.65}}
\newlabel{chapter13}{{III}{49}{Chapter 13: Test Result}{section*.65}{}}
\@writefile{toc}{\contentsline {subsubsection}{Preparation}{49}{section*.66}}
\newlabel{preparation}{{III}{49}{Preparation}{section*.66}{}}
\@writefile{toc}{\contentsline {subsubsection}{Result}{49}{section*.67}}
\newlabel{result}{{III}{49}{Result}{section*.67}{}}
\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {LUBM 5000}}}{50}{figure.3}}
\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {WatDiv 300M}}}{50}{figure.4}}
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Datasets}}{50}{table.2}}
\newlabel{table:datasets}{{2}{50}{Datasets}{table.2}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Query Performance over DBpedia 2014}}{51}{figure.1}}
\newlabel{fig:dbpedia2014Performance}{{1}{51}{Query Performance over DBpedia 2014}{figure.1}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Query Performance over Bsbm 10000}}{51}{figure.2}}
\newlabel{fig:Bsbm10000Performance}{{2}{51}{Query Performance over Bsbm 10000}{figure.2}{}}
\newlabel{fig:LUBM5000Performance}{{3(a)}{51}{Subfigure 3(a)}{subfigure.3.1}{}}
\newlabel{sub@fig:LUBM5000Performance}{{(a)}{51}{Subfigure 3(a)\relax }{subfigure.3.1}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Query Performance over LUBM}}{51}{figure.3}}
\newlabel{fig:LUBMPerformance}{{3}{51}{Query Performance over LUBM}{figure.3}{}}
\newlabel{fig:WatDiv300MPerformance}{{4(a)}{52}{Subfigure 4(a)}{subfigure.4.1}{}}
\newlabel{sub@fig:WatDiv300MPerformance}{{(a)}{52}{Subfigure 4(a)\relax }{subfigure.4.1}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Query Performance over WatDiv}}{52}{figure.4}}
\newlabel{fig:WatDivPerformance}{{4}{52}{Query Performance over WatDiv}{figure.4}{}}
\@writefile{toc}{\contentsline {subsection}{Chapter 14: Future Plan}{53}{section*.68}}
\newlabel{chapter14}{{III}{53}{Chapter 14: Future Plan}{section*.68}{}}
\@writefile{toc}{\contentsline {subsubsection}{Improve The Core}{53}{section*.69}}
\newlabel{improve-the-core}{{III}{53}{Improve The Core}{section*.69}{}}
\@writefile{toc}{\contentsline {subsubsection}{Better The Interface}{53}{section*.70}}
\newlabel{better-the-interface}{{III}{53}{Better The Interface}{section*.70}{}}
\@writefile{toc}{\contentsline {subsubsection}{Idea Collection Box}{53}{section*.71}}
\newlabel{idea-collection-box}{{III}{53}{Idea Collection Box}{section*.71}{}}
\@writefile{toc}{\contentsline {subsection}{Chapter 15: Thanks List}{55}{section*.72}}
\newlabel{chapter15}{{III}{55}{Chapter 15: Thanks List}{section*.72}{}}
\@writefile{toc}{\contentsline {paragraph}{GitHub user zhangxiaoyang \\ https://github.com/zhangxiaoyang \\ 1. add python api \\ 2. fix logger message}{55}{section*.73}}
\@writefile{toc}{\contentsline {subsection}{Chapter 16: Legal Issues}{56}{section*.74}}
\newlabel{chapter16}{{III}{56}{Chapter 16: Legal Issues}{section*.74}{}}
\@writefile{toc}{\contentsline {section}{End}{58}{section*.75}}

2203
docs/latex/gStore_help.log Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,77 @@
\BOOKMARK [1][-]{section*.2}{Preface}{}% 1
\BOOKMARK [0][-]{part.1}{I\040Start}{}% 2
\BOOKMARK [1][-]{section*.3}{Chapter\04000:\040A\040Quick\040Tour}{part.1}% 3
\BOOKMARK [2][-]{section*.4}{Getting\040Started}{section*.3}% 4
\BOOKMARK [3][-]{section*.5}{Advanced\040Help}{section*.4}% 5
\BOOKMARK [3][-]{section*.6}{Other\040Business}{section*.4}% 6
\BOOKMARK [2][-]{section*.7}{Chapter\04001:\040System\040Requirements}{section*.3}% 7
\BOOKMARK [2][-]{section*.8}{Chapter\04002:\040Basic\040Introduction}{section*.3}% 8
\BOOKMARK [3][-]{section*.9}{What\040Is\040gStore}{section*.8}% 9
\BOOKMARK [3][-]{section*.10}{Why\040gStore}{section*.8}% 10
\BOOKMARK [3][-]{section*.11}{Open\040Source}{section*.8}% 11
\BOOKMARK [2][-]{section*.12}{Chapter\04003:\040Install\040Guide}{section*.3}% 12
\BOOKMARK [2][-]{section*.13}{Chapter\04004:\040How\040To\040Use}{section*.3}% 13
\BOOKMARK [3][-]{section*.14}{0.\040gconsole}{section*.13}% 14
\BOOKMARK [4][-]{section*.15}{1.\040gload}{section*.14}% 15
\BOOKMARK [4][-]{section*.16}{2.\040gquery}{section*.14}% 16
\BOOKMARK [4][-]{section*.17}{3.\040gserver}{section*.14}% 17
\BOOKMARK [4][-]{section*.18}{4.\040gclient}{section*.14}% 18
\BOOKMARK [4][-]{section*.19}{5.\040test\040utilities}{section*.14}% 19
\BOOKMARK [0][-]{part.2}{II\040Advanced}{}% 20
\BOOKMARK [1][-]{section*.20}{Chapter\04005:\040API\040Explanation}{part.2}% 21
\BOOKMARK [2][-]{section*.21}{Easy\040Examples}{section*.20}% 22
\BOOKMARK [3][-]{section*.22}{API\040structure}{section*.21}% 23
\BOOKMARK [3][-]{section*.23}{C++\040API}{section*.21}% 24
\BOOKMARK [4][-]{section*.24}{Interface}{section*.23}% 25
\BOOKMARK [4][-]{section*.25}{Compile}{section*.23}% 26
\BOOKMARK [3][-]{section*.26}{Java\040API}{section*.21}% 27
\BOOKMARK [4][-]{section*.27}{Interface}{section*.26}% 28
\BOOKMARK [4][-]{section*.28}{Compile}{section*.26}% 29
\BOOKMARK [2][-]{section*.29}{Chapter\04006:\040Project\040Structure}{section*.20}% 30
\BOOKMARK [3][-]{section*.30}{The\040core\040source\040codes\040are\040listed\040below:}{section*.29}% 31
\BOOKMARK [4][-]{section*.31}{The\040parser\040part\040is\040listed\040below:}{section*.30}% 32
\BOOKMARK [4][-]{section*.32}{The\040utilities\040are\040listed\040below:}{section*.30}% 33
\BOOKMARK [4][-]{section*.33}{The\040interface\040part\040is\040listed\040below:}{section*.30}% 34
\BOOKMARK [4][-]{section*.34}{More\040details}{section*.30}% 35
\BOOKMARK [4][-]{section*.35}{Others}{section*.30}% 36
\BOOKMARK [2][-]{section*.36}{Chapter\04007:\040Publications}{section*.20}% 37
\BOOKMARK [3][-]{section*.37}{Publications\040related\040with\040gStore\040are\040listed\040here:}{section*.36}% 38
\BOOKMARK [2][-]{section*.38}{Chapter\04008:\040Limitations}{section*.20}% 39
\BOOKMARK [2][-]{section*.39}{Chapter\04009:\040Frequently\040Asked\040Questions}{section*.20}% 40
\BOOKMARK [3][-]{section*.40}{When\040I\040use\040the\040newer\040gStore\040system\040to\040query\040the\040original\040database,\040why\040error?}{section*.39}% 41
\BOOKMARK [4][-]{section*.41}{Why\040error\040when\040I\040try\040to\040write\040programs\040based\040on\040gStore,\040just\040like\040the\040Main/gconsole.cpp?}{section*.40}% 42
\BOOKMARK [4][-]{section*.42}{Why\040does\040gStore\040report\040garbage\040collection\040failed\040error\040when\040I\040use\040teh\040Java\040API?}{section*.40}% 43
\BOOKMARK [4][-]{section*.43}{When\040I\040compile\040the\040code\040in\040ArchLinux,\040why\040the\040error\040that\040no\040-ltermcap\040is\040reported?}{section*.40}% 44
\BOOKMARK [4][-]{section*.44}{Why\040does\040gStore\040report\040errors\040that\040the\040format\040of\040some\040RDF\040datasets\040are\040not\040supported?}{section*.40}% 45
\BOOKMARK [4][-]{section*.45}{When\040I\040read\040on\040GitHub,\040why\040are\040some\040documents\040unable\040to\040be\040opened?}{section*.40}% 46
\BOOKMARK [4][-]{section*.46}{Why\040sometimes\040strange\040characters\040appear\040when\040I\040use\040gStore?}{section*.40}% 47
\BOOKMARK [4][-]{section*.47}{In\040centos7,\040if\040the\040watdiv.db\(a\040generated\040database\040after\040gload\)\040is\040copied\040or\040compressed/uncompressed,\040the\040size\040of\040watdiv.db\040will\040be\040different\(generally\040increasing\)\040if\040using\040du\040-h\040command\040to\040check?}{section*.40}% 48
\BOOKMARK [4][-]{section*.48}{In\040gclient\040console,\040a\040database\040is\040built,\040queried,\040and\040then\040I\040quit\040the\040console.\040Next\040time\040I\040enter\040the\040console,\040load\040the\040originally\040imported\040database,\040but\040no\040output\040for\040any\040queries\(originally\040the\040output\040is\040not\040empty\)?}{section*.40}% 49
\BOOKMARK [4][-]{section*.49}{If\040query\040results\040contain\040null\040value,\040how\040can\040I\040use\040the\040full_test\040utility?\040Tab\040separated\040method\040will\040cause\040problem\040here\040because\040null\040value\040cannot\040be\040checked!}{section*.40}% 50
\BOOKMARK [4][-]{section*.50}{When\040I\040compile\040and\040run\040the\040API\040examples,\040it\040reports\040the\040unable\040to\040connect\040to\040server\040error?}{section*.40}% 51
\BOOKMARK [4][-]{section*.51}{When\040I\040use\040the\040Java\040API\040to\040write\040my\040own\040program,\040it\040reports\040not\040found\040main\040class\040error?}{section*.40}% 52
\BOOKMARK [2][-]{section*.52}{Chapter\04010:\040Recipe\040Book}{section*.20}% 53
\BOOKMARK [0][-]{part.3}{III\040Others}{}% 54
\BOOKMARK [1][-]{section*.53}{Chapter\04011:\040Contributors}{part.3}% 55
\BOOKMARK [2][-]{section*.54}{Faculty}{section*.53}% 56
\BOOKMARK [3][-]{section*.55}{Students}{section*.54}% 57
\BOOKMARK [4][-]{section*.56}{Alumni}{section*.55}% 58
\BOOKMARK [2][-]{section*.57}{Chapter\04012:\040Updated\040Logs}{section*.53}% 59
\BOOKMARK [3][-]{section*.58}{Apr\04001,\0402016}{section*.57}% 60
\BOOKMARK [3][-]{section*.59}{Nov\04006,\0402015}{section*.57}% 61
\BOOKMARK [3][-]{section*.60}{Oct\04020,\0402015}{section*.57}% 62
\BOOKMARK [3][-]{section*.61}{Sep\04025,\0402015}{section*.57}% 63
\BOOKMARK [3][-]{section*.62}{Feb\0402,\0402015}{section*.57}% 64
\BOOKMARK [3][-]{section*.63}{Dec\04011,\0402014}{section*.57}% 65
\BOOKMARK [3][-]{section*.64}{Nov\04020,\0402014}{section*.57}% 66
\BOOKMARK [2][-]{section*.65}{Chapter\04013:\040Test\040Result}{section*.53}% 67
\BOOKMARK [3][-]{section*.66}{Preparation}{section*.65}% 68
\BOOKMARK [3][-]{section*.67}{Result}{section*.65}% 69
\BOOKMARK [2][-]{section*.68}{Chapter\04014:\040Future\040Plan}{section*.53}% 70
\BOOKMARK [3][-]{section*.69}{Improve\040The\040Core}{section*.68}% 71
\BOOKMARK [3][-]{section*.70}{Better\040The\040Interface}{section*.68}% 72
\BOOKMARK [3][-]{section*.71}{Idea\040Collection\040Box}{section*.68}% 73
\BOOKMARK [2][-]{section*.72}{Chapter\04015:\040Thanks\040List}{section*.53}% 74
\BOOKMARK [3][-]{section*.73}{GitHub\040user\040zhangxiaoyang\040\040https://github.com/zhangxiaoyang\040\0401.\040add\040python\040api\040\0402.\040fix\040logger\040message}{section*.72}% 75
\BOOKMARK [2][-]{section*.74}{Chapter\04016:\040Legal\040Issues}{section*.53}% 76
\BOOKMARK [1][-]{section*.75}{End}{part.3}% 77

Binary file not shown.

Binary file not shown.

View File

@ -1,5 +1,6 @@
\documentclass[titlepage, a4paper, 12pt]{article}
%\usepackage{ctex}
\usepackage{lmodern}
\usepackage{ifxetex,ifluatex}
\usepackage{fixltx2e}
@ -115,7 +116,7 @@
\begin{document}
\title{\includegraphics[scale=0.3, bb=0 0 385 567]{../logo.png} \\
The handbook of gStore System}
The handbook of gStore System测试}
%\author{Bookug Lobert\footnote{EECS of Peking University, zengli-bookug@pku.edu.cn}\\[2ex]}
\author{Edited by gStore team \footnote{The mailing list is given in Chapter 11.}}
\date{\today}
@ -153,7 +154,6 @@ matching semantics of SPARQL. \textbf{Here, we implement a graph-based RDF tripl
\part{Start}
\hyperdef{}{chapter00}{\subsection{Chapter 00: A Quick Tour}\label{chapter00}}
Gstore System(also called gStore) is a graph database engine for managing large graph-structured data, which is open-source and targets at Linux operation systems. The whole project is written in C++, with the help of some libraries such as readline, antlr, and so on. Only source tarballs are provided currently, which means you have to compile the source code if you want to use our system.
\hyperdef{}{getting-started}{\subsubsection{Getting
@ -1347,7 +1347,7 @@ I use the newer gStore system to query the original database, why
error?}\label{when-i-use-the-newer-gstore-system-to-query-the-original-database-why-error}}
\quad\\
The database produced by gStore contains several indexes, whose
structures may have been chnaged in the new gStore version. So, please
structures may have been changed in the new gStore version. So, please
rebuild your dataset just in case.
\hyperdef{}{why-error-when-i-try-to-write-programs-based-on-gstore-just-like-the-maingconsolecpp}{\paragraph{Why
@ -1611,7 +1611,7 @@ system think that the debugging log is still open)
We implement the version of B+Tree, and replace the old one.
After testing on DBpedia, LUBM, and WatDiv benchmark, we conclude that
the new BTree performs more efficient than\\ the old version. For the
the new BTree performs more efficient than the old version. For the
same triple file, the new version spends shorter time on executing gload
command.
@ -1718,7 +1718,7 @@ for different systems.
\begin{threeparttable}
\begin{tabular}{|c||c|c|c||c|c|c|}
\hline
& \multicolumn{3}{c||}{Index Size(KB)}& \multicolumn{3}{c|}{Loading Time(second)}\\
& \multicolumn{3}{c||}{Index Size(KB)}& \multicolumn{3}{c|}{Loading Time(ms)}\\
\hline
\hline
Datasets & gStore & Jena& Virtuoso& gStore & Jena& Virtuoso\\
@ -1748,7 +1748,7 @@ for different systems.
\end{table}
\end{comment}
The performance of different database management systems is shown in Figures \ref{fig:dbpedia2014Performance}, \ref{fig:BsbmPerformance}, \ref{fig:LUBMPerformance} and \ref{fig:WatDivPerformance}.
The performance of different database management systems is shown in Figures \ref{fig:dbpedia2014Performance}, \ref{fig:Bsbm10000Performance}, \ref{fig:LUBMPerformance} and \ref{fig:WatDivPerformance}.
Notice that Sesame and Virtuoso are unable to operate on DBpedia 2014 and
WatDiv 300M, because the size is too large. In addition, we do not use
@ -1765,10 +1765,10 @@ Generally speaking, Virtuoso is not scalable, and Sesame is so weak. \\
\begin{figure}%
\resizebox{0.8\columnwidth}{!}{
\input{bsbm_comparison}
\input{bsbm10000_comparison}
}
\caption{Query Performance over Bsbm 10000}%
\label{fig:BsbmPerformance}
\label{fig:Bsbm10000Performance}
\end{figure}
\begin{figure}[h]%
@ -1906,7 +1906,10 @@ Box}\label{idea-collection-box}}
\textit{This chapter lists people who inspire us or contribute to this project.}
\emph{nobody now}
\paragraph{GitHub user zhangxiaoyang \\
https://github.com/zhangxiaoyang \\
1. add python api \\
2. fix logger message}
%\begin{center}\rule{0.5\linewidth}{\linethickness}\end{center}
\clearpage

View File

@ -0,0 +1,77 @@
\contentsline {section}{Preface}{5}{section*.2}
\contentsline {part}{I\hspace {1em}Start}{7}{part.1}
\contentsline {subsection}{Chapter 00: A Quick Tour}{7}{section*.3}
\contentsline {subsubsection}{Getting Started}{7}{section*.4}
\contentsline {subsubsection}{Advanced Help}{8}{section*.5}
\contentsline {subsubsection}{Other Business}{8}{section*.6}
\contentsline {subsection}{Chapter 01: System Requirements}{10}{section*.7}
\contentsline {subsection}{Chapter 02: Basic Introduction}{12}{section*.8}
\contentsline {subsubsection}{What Is gStore}{12}{section*.9}
\contentsline {subsubsection}{Why gStore}{12}{section*.10}
\contentsline {subsubsection}{Open Source}{13}{section*.11}
\contentsline {subsection}{Chapter 03: Install Guide}{14}{section*.12}
\contentsline {subsection}{Chapter 04: How To Use}{15}{section*.13}
\contentsline {paragraph}{0. gconsole}{15}{section*.14}
\contentsline {paragraph}{1. gload}{18}{section*.15}
\contentsline {paragraph}{2. gquery}{18}{section*.16}
\contentsline {paragraph}{3. gserver}{20}{section*.17}
\contentsline {paragraph}{4. gclient}{21}{section*.18}
\contentsline {paragraph}{5. test utilities}{22}{section*.19}
\contentsline {part}{II\hspace {1em}Advanced}{24}{part.2}
\contentsline {subsection}{Chapter 05: API Explanation}{24}{section*.20}
\contentsline {subsubsection}{Easy Examples}{24}{section*.21}
\contentsline {subsubsection}{API structure}{25}{section*.22}
\contentsline {subsubsection}{C++ API}{26}{section*.23}
\contentsline {paragraph}{Interface}{26}{section*.24}
\contentsline {paragraph}{Compile}{27}{section*.25}
\contentsline {subsubsection}{Java API}{28}{section*.26}
\contentsline {paragraph}{Interface}{28}{section*.27}
\contentsline {paragraph}{Compile}{30}{section*.28}
\contentsline {subsection}{Chapter 06: Project Structure}{31}{section*.29}
\contentsline {paragraph}{The core source codes are listed below:}{31}{section*.30}
\contentsline {paragraph}{The parser part is listed below:}{33}{section*.31}
\contentsline {paragraph}{The utilities are listed below:}{34}{section*.32}
\contentsline {paragraph}{The interface part is listed below:}{34}{section*.33}
\contentsline {paragraph}{More details}{35}{section*.34}
\contentsline {paragraph}{Others}{36}{section*.35}
\contentsline {subsection}{Chapter 07: Publications}{38}{section*.36}
\contentsline {paragraph}{Publications related with gStore are listed here:}{38}{section*.37}
\contentsline {subsection}{Chapter 08: Limitations}{39}{section*.38}
\contentsline {subsection}{Chapter 09: Frequently Asked Questions}{40}{section*.39}
\contentsline {paragraph}{When I use the newer gStore system to query the original database, why error?}{40}{section*.40}
\contentsline {paragraph}{Why error when I try to write programs based on gStore, just like the Main/gconsole.cpp?}{40}{section*.41}
\contentsline {paragraph}{Why does gStore report ``garbage collection failed'' error when I use the Java API?}{40}{section*.42}
\contentsline {paragraph}{When I compile the code in ArchLinux, why the error that ``no -ltermcap'' is reported?}{40}{section*.43}
\contentsline {paragraph}{Why does gStore report errors that the format of some RDF datasets are not supported?}{40}{section*.44}
\contentsline {paragraph}{When I read on GitHub, why are some documents unable to be opened?}{41}{section*.45}
\contentsline {paragraph}{Why sometimes strange characters appear when I use gStore?}{41}{section*.46}
\contentsline {paragraph}{In centos7, if the watdiv.db(a generated database after gload) is copied or compressed/uncompressed, the size of watdiv.db will be different(generally increasing) if using \texttt {du\ -h} command to check?}{41}{section*.47}
\contentsline {paragraph}{In gclient console, a database is built, queried, and then I quit the console. Next time I enter the console, load the originally imported database, but no output for any queries(originally the output is not empty)?}{42}{section*.48}
\contentsline {paragraph}{If query results contain null value, how can I use the \href {run:../../test/full_test.sh}{full\_test} utility? Tab separated method will cause problem here because null value cannot be checked!}{42}{section*.49}
\contentsline {paragraph}{When I compile and run the API examples, it reports the ``unable to connect to server'' error?}{42}{section*.50}
\contentsline {paragraph}{When I use the Java API to write my own program, it reports ``not found main class'' error?}{42}{section*.51}
\contentsline {subsection}{Chapter 10: Recipe Book}{43}{section*.52}
\contentsline {part}{III\hspace {1em}Others}{44}{part.3}
\contentsline {subsection}{Chapter 11: Contributors}{44}{section*.53}
\contentsline {paragraph}{Faculty}{44}{section*.54}
\contentsline {paragraph}{Students}{44}{section*.55}
\contentsline {paragraph}{Alumni}{45}{section*.56}
\contentsline {subsection}{Chapter 12: Updated Logs}{46}{section*.57}
\contentsline {subsubsection}{Apr 01, 2016}{46}{section*.58}
\contentsline {subsubsection}{Nov 06, 2015}{46}{section*.59}
\contentsline {subsubsection}{Oct 20, 2015}{47}{section*.60}
\contentsline {subsubsection}{Sep 25, 2015}{47}{section*.61}
\contentsline {subsubsection}{Feb 2, 2015}{47}{section*.62}
\contentsline {subsubsection}{Dec 11, 2014}{48}{section*.63}
\contentsline {subsubsection}{Nov 20, 2014}{48}{section*.64}
\contentsline {subsection}{Chapter 13: Test Result}{49}{section*.65}
\contentsline {subsubsection}{Preparation}{49}{section*.66}
\contentsline {subsubsection}{Result}{49}{section*.67}
\contentsline {subsection}{Chapter 14: Future Plan}{53}{section*.68}
\contentsline {subsubsection}{Improve The Core}{53}{section*.69}
\contentsline {subsubsection}{Better The Interface}{53}{section*.70}
\contentsline {subsubsection}{Idea Collection Box}{53}{section*.71}
\contentsline {subsection}{Chapter 15: Thanks List}{55}{section*.72}
\contentsline {paragraph}{GitHub user zhangxiaoyang \\ https://github.com/zhangxiaoyang \\ 1. add python api \\ 2. fix logger message}{55}{section*.73}
\contentsline {subsection}{Chapter 16: Legal Issues}{56}{section*.74}
\contentsline {section}{End}{58}{section*.75}

Binary file not shown.

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

17
init.conf Normal file
View File

@ -0,0 +1,17 @@
# NOTICE: this file configures the gStore system, please remember to edit it before using gStore
# NOTICE: each line should not exceed 500
[setting]
# Basic settings for gStore system to run correctly as you want
# MUST: please give your choice about using gStore for a single machine(mode = single) or using distributed gStore(mode = distribute)
mode = single
[option]
#BETTER:the position is the root of Gstore by default
#(or change to a specified folder later)
#DBpath = .

Some files were not shown because too many files have changed in this diff Show More