move the oldest gStore to the backup branch
This commit is contained in:
parent
1fce0907bb
commit
7bd3d5145c
|
@ -1,3 +0,0 @@
|
|||
*
|
||||
!.gitignore
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
*
|
||||
!.gitignore
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
*
|
||||
!.gitignore
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* Bstr.cpp
|
||||
*
|
||||
* Created on: 2014-6-20
|
||||
* Author: liyouhuan
|
||||
*/
|
||||
|
||||
#include "Bstr.h"
|
||||
|
||||
Bstr::Bstr(const char* _str, const int _len)
|
||||
{
|
||||
this->len = _len;
|
||||
this->str = new char[len+1];
|
||||
memcpy(this->str, _str, sizeof(char)*_len);
|
||||
this->str[_len]='\0';
|
||||
}
|
||||
bool Bstr::operator > (const Bstr& _b_str)
|
||||
{
|
||||
|
||||
return true;
|
||||
}
|
||||
bool Bstr::operator < (const Bstr& _b_str)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
bool Bstr::operator == (const Bstr& _b_str)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
bool Bstr::read(FILE* _fp)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
int Bstr::write(FILE* _fp)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
* Bstr.h
|
||||
*
|
||||
* Created on: 2014-6-20
|
||||
* Author: liyouhuan
|
||||
*/
|
||||
|
||||
#ifndef BSTR_H_
|
||||
#define BSTR_H_
|
||||
#include<iostream>
|
||||
#include<stdio.h>
|
||||
#include<stdlib.h>
|
||||
#include<string.h>
|
||||
using namespace std;
|
||||
class Bstr{
|
||||
private:
|
||||
char* str;
|
||||
int len;
|
||||
public:
|
||||
Bstr(const char* _str, const int _len);
|
||||
bool operator > (const Bstr& _b_str);
|
||||
bool operator < (const Bstr& _b_str);
|
||||
bool operator == (const Bstr& _b_str);
|
||||
bool read(FILE* _fp);
|
||||
int write(FILE* _fp);
|
||||
};
|
||||
|
||||
|
||||
#endif /* BSTR_H_ */
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,57 +1,73 @@
|
|||
/*=============================================================================
|
||||
# Filename: Database.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-23 14:20
|
||||
# Description: originally written by liyouhuan, modified by zengli and chenjiaqi
|
||||
=============================================================================*/
|
||||
/*
|
||||
* database.h
|
||||
*
|
||||
* Created on: 2014-6-20
|
||||
* Author: liyouhuan
|
||||
*/
|
||||
|
||||
#ifndef _DATABASE_DATABASE_H
|
||||
#define _DATABASE_DATABASE_H
|
||||
#ifndef DATABASE_H_
|
||||
#define DATABASE_H_
|
||||
#include<iostream>
|
||||
#include<string.h>
|
||||
using namespace std;
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Util/Triple.h"
|
||||
#include "Join.h"
|
||||
#include "../Query/IDList.h"
|
||||
#include "../Query/ResultSet.h"
|
||||
#include "../Query/SPARQLquery.h"
|
||||
#include "../Query/BasicQuery.h"
|
||||
#include "../Triple/Triple.h"
|
||||
#include "../Signature/SigEntry.h"
|
||||
#include "../KVstore/KVstore.h"
|
||||
#include "../VSTree/VSTree.h"
|
||||
#include "../Parser/DBparser.h"
|
||||
#include "../Parser/RDFParser.h"
|
||||
#include "../Parser/SparqlParser.h"
|
||||
#include "../Query/GeneralEvaluation.h"
|
||||
#include "../util/util.h"
|
||||
#include<stdio.h>
|
||||
#include<sys/time.h>
|
||||
#include "fstream"
|
||||
|
||||
|
||||
class Database{
|
||||
|
||||
class Database
|
||||
{
|
||||
public:
|
||||
|
||||
static const bool debug_1 = false;
|
||||
static const bool debug_2 = false;
|
||||
static const bool only_sub2idpre2id = true;
|
||||
static const bool debug_vstree = false;
|
||||
static const int internal = 100*1000;
|
||||
static FILE* fp_debug;
|
||||
static void log(std::string _str);
|
||||
void test();
|
||||
void test_build_sig();
|
||||
void test_join();
|
||||
void printIDlist(int _i, int* _list, int _len, std::string _log);
|
||||
void printPairList(int _i, int* _list, int _len, std::string _log);
|
||||
|
||||
//when encode EntitySig, one way uses STRING-hash, the other one uses ID-hash
|
||||
//depending on this->encode_mode
|
||||
static const int TRIPLE_NUM_MAX = 1000*1000*1000;
|
||||
|
||||
/* In order to differentiate the sub-part and literal-part of object
|
||||
* let subid begin with 0, while literalid begins with LITERAL_FIRST_ID */
|
||||
static const int LITERAL_FIRST_ID = 1000*1000*1000;
|
||||
|
||||
/* when encode EntitySig, one way uses STRING-hash, the other one uses ID-hash
|
||||
* depending on this->encode_mode */
|
||||
static const int STRING_MODE = 1;
|
||||
static const int ID_MODE = 2;
|
||||
Database();
|
||||
Database(std::string _name);
|
||||
void release(FILE* fp0);
|
||||
~Database();
|
||||
|
||||
bool load();
|
||||
bool unload();
|
||||
bool query(const string _query, ResultSet& _result_set, FILE* _fp = stdout);
|
||||
bool query(const string _query, ResultSet& _result_set);
|
||||
|
||||
//1. if subject of _triple doesn't exist,
|
||||
//then assign a new subid, and insert a new SigEntry
|
||||
//2. assign new tuple_id to tuple, if predicate or object doesn't exist before too;
|
||||
//3. if subject exist, update SigEntry, and update spo, ops... etc. if needed
|
||||
/*
|
||||
* 1. if subject of _triple doesn't exist,
|
||||
* then assign a new subid, and insert a new SigEntry
|
||||
* 2. assign new tuple_id to tuple, if predicate or object doesn't exist before too;
|
||||
* 3. if subject exist, update SigEntry, and update spo, ops... etc. if needed
|
||||
* 4.
|
||||
* */
|
||||
|
||||
bool insert(const string& _insert_rdf_file);
|
||||
bool remove(const string& _rdf_file);
|
||||
|
@ -81,14 +97,13 @@ private:
|
|||
|
||||
VSTree* vstree;
|
||||
KVstore* kvstore;
|
||||
Join* join;
|
||||
|
||||
//metadata of this database: sub_num, pre_num, obj_num, literal_num, etc.
|
||||
/* metadata of this database: sub_num, pre_num, obj_num, literal_num, etc. */
|
||||
string db_info_file;
|
||||
|
||||
//six tuples: <sub pre obj sid pid oid>
|
||||
/* six tuples: <sub pre obj sid pid oid> */
|
||||
string six_tuples_file;
|
||||
//B means binary
|
||||
/* B means binary */
|
||||
string signature_binary_file;
|
||||
|
||||
bool saveDBInfoFile();
|
||||
|
@ -96,28 +111,30 @@ private:
|
|||
|
||||
string getStorePath();
|
||||
|
||||
//encode relative signature data of all Basic Graph Query, who union together into SPARQLquery
|
||||
/* encode relative signature data of all Basic Graph Query, who union together into SPARQLquery */
|
||||
void buildSparqlSignature(SPARQLquery & _sparql_q);
|
||||
|
||||
//encode Triple into Subject EntityBitSet
|
||||
/* encode Triple into Subject EntityBitSet */
|
||||
bool encodeTriple2SubEntityBitSet(EntityBitSet& _bitset, const Triple* _p_triple);
|
||||
//encode Triple into Object EntityBitSet
|
||||
/* encode Triple into Object EntityBitSet */
|
||||
bool encodeTriple2ObjEntityBitSet(EntityBitSet& _bitset, const Triple* _p_triple);
|
||||
|
||||
bool calculateEntityBitSet(int _sub_id, EntityBitSet & _bitset);
|
||||
|
||||
//check whether the relative 3-tuples exist
|
||||
//usually, through sp2olist
|
||||
/* check whether the relative 3-tuples exist
|
||||
* usually, through sp2olist */
|
||||
bool exist_triple(int _sub_id, int _pre_id, int _obj_id);
|
||||
|
||||
//* _rdf_file denotes the path of the RDF file, where stores the rdf data
|
||||
//* there are many step in this function, each one responds to an sub-function
|
||||
//* 1. map sub2id and pre2id
|
||||
//* 2. map literal2id and encode RDF data into signature,
|
||||
//* storing in binary file: this->getSignatureBFile(), the order responds to subID
|
||||
//* also, store six_tuples in file: this->getSixTuplesFile()
|
||||
//* 3. build: subID2objIDlist, <subIDpreID>2objIDlist subID2<preIDobjID>list
|
||||
//* 4. build: objID2subIDlist, <objIDpreID>2subIDlist objID2<preIDsubID>list
|
||||
/*
|
||||
* _rdf_file denotes the path of the RDF file, where stores the rdf data
|
||||
* there are many step in this function, each one responds to an sub-function
|
||||
* 1. map sub2id and pre2id
|
||||
* 2. map literal2id and encode RDF data into signature,
|
||||
* storing in binary file: this->getSignatureBFile(), the order responds to subID
|
||||
* also, store six_tuples in file: this->getSixTuplesFile()
|
||||
* 3. build: subID2objIDlist, <subIDpreID>2objIDlist subID2<preIDobjID>list
|
||||
* 4. build: objID2subIDlist, <objIDpreID>2subIDlist objID2<preIDsubID>list
|
||||
* */
|
||||
//encodeRDF_new invoke new rdfParser to solve task 1 & 2 in one time scan.
|
||||
bool encodeRDF(const string _rdf_file);
|
||||
bool encodeRDF_new(const string _rdf_file);
|
||||
|
@ -129,34 +146,43 @@ private:
|
|||
bool sub2id_pre2id(const string _rdf_file, int**& _p_id_tuples, int & _id_tuples_max);
|
||||
bool literal2id_RDFintoSignature(const string _rdf_file, int** _p_id_tuples, int _id_tuples_max);
|
||||
|
||||
bool s2o_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max);
|
||||
bool o2s_o2ps_op2s(int** _p_id_tuples, int _id_tuples_max);
|
||||
//NOTICE: below is the new one
|
||||
//bool s2p_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max);
|
||||
bool s2p_s2o_s2po_sp2o(int** _p_id_tuples, int _id_tuples_max);
|
||||
//bool s2p_s2o_s2po_sp2o_sp2n(int** _p_id_tuples, int _id_tuples_max);
|
||||
bool o2p_o2s_o2ps_op2s(int** _p_id_tuples, int _id_tuples_max);
|
||||
//bool o2p_o2s_o2ps_op2s_op2n(int** _p_id_tuples, int _id_tuples_max);
|
||||
bool p2s_p2o_p2so(int** _p_id_tuples, int _id_tuples_max);
|
||||
//bool p2s_p2o_p2so_p2n(int** _p_id_tuples, int _id_tuples_max);
|
||||
bool so2p_s2o(int** _p_id_tuples, int _id_tuples_max);
|
||||
|
||||
bool s2o_sp2o_s2po(int** _p_id_tuples, int _id_tuples_max);
|
||||
bool o2s_op2s_o2ps(int** _p_id_tuples, int _id_tuples_max);
|
||||
static int _spo_cmp(const void* _a, const void* _b);
|
||||
static int _ops_cmp(const void* _a, const void* _b);
|
||||
static int _pso_cmp(const void* _a, const void* _b);
|
||||
static int _sop_cmp(const void* _a, const void* _b);
|
||||
bool objIDIsEntityID(int _id);
|
||||
/*
|
||||
* join on the vector of CandidateList, available after retrieve from the VSTREE
|
||||
* and store the resut in _result_set
|
||||
* */
|
||||
bool join(SPARQLquery& _sparql_query);
|
||||
|
||||
//* join on the vector of CandidateList, available after retrieve from the VSTREE
|
||||
//* and store the resut in _result_set
|
||||
void filter_before_join(BasicQuery* basic_query);
|
||||
void literal_edge_filter(BasicQuery* basic_query, int _var_i);
|
||||
void preid_filter(BasicQuery* basic_query, int _var_i);
|
||||
void only_pre_filter_after_join(BasicQuery* basic_query);
|
||||
void add_literal_candidate(BasicQuery* basic_query);
|
||||
bool join_basic(BasicQuery* _basic_query);
|
||||
bool join(vector<int*>& _result_list, int _var_id, int _pre_id, int _var_id2, const char _edge_type,
|
||||
int _var_num, bool shouldAddLiteral, IDList& _can_list);
|
||||
|
||||
//bool join(vector<int*>& _result_list, int _var_id, int _pre_id, int _var_id2, const char _edge_type, int _var_num, bool shouldAddLiteral, IDList& _can_list);
|
||||
bool select(vector<int*>& _result_list, int _var_id, int _pre_id, int _var_id2, const char _edge_type, int _var_num);
|
||||
|
||||
//bool select(vector<int*>& _result_list, int _var_id, int _pre_id, int _var_id2, const char _edge_type, int _var_num);
|
||||
void mapVarVec(vector<string> &a, vector<string> &b, vector<int> &mapvar, int &total_vars);
|
||||
void mergeJoin(SPARQLquery::TempResult &a, SPARQLquery::TempResult &b, SPARQLquery::TempResult &r);
|
||||
void mergeUnion(SPARQLquery::TempResult &a, SPARQLquery::TempResult &b, SPARQLquery::TempResult &r);
|
||||
void mergeLeftOuterJoin(SPARQLquery::TempResult &a, SPARQLquery::TempResult &b, SPARQLquery::TempResult &r);
|
||||
void doFilter(SPARQLquery::TempResult &a, SPARQLquery::FilterTree *ft, SPARQLquery::TempResult &r);
|
||||
void getFilterStr(string &str , vector<int> &r, map<string, int> &dict, string &ftarg);
|
||||
bool matchFilter(vector<int> &r,map<string, int> &dict, SPARQLquery::FilterTree *ft);
|
||||
|
||||
//get the final string result_set from SPARQLquery
|
||||
int genEvaPlan(SPARQLquery::PatternGroup& pg, SPARQLquery& query, int id);
|
||||
void doEvaPlan(SPARQLquery &query);
|
||||
|
||||
/* get the final string result_set from SPARQLquery */
|
||||
bool getFinalResult(SPARQLquery& _sparql_q, ResultSet& _result_set);
|
||||
};
|
||||
|
||||
#endif //_DATABASE_DATABASE_H
|
||||
|
||||
|
||||
#endif /* DATABASE_H_ */
|
||||
|
|
2612
Database/Join.cpp
2612
Database/Join.cpp
File diff suppressed because it is too large
Load Diff
238
Database/Join.h
238
Database/Join.h
|
@ -1,238 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: Join.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-12-13 16:05
|
||||
# Description: design join strategies and select/cost modules
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _JOIN_JOIN_H
|
||||
#define _JOIN_JOIN_H
|
||||
|
||||
#include "../Query/IDList.h"
|
||||
#include "../Query/BasicQuery.h"
|
||||
#include "../Query/SPARQLquery.h"
|
||||
#include "../KVstore/KVstore.h"
|
||||
#include "../Util/Util.h"
|
||||
|
||||
//BETTER?:place multi_join and index_join in separated files
|
||||
|
||||
typedef vector<int> RecordType;
|
||||
typedef vector<int>::iterator RecordIterator;
|
||||
typedef list<RecordType> TableType;
|
||||
typedef list<RecordType>::iterator TableIterator;
|
||||
typedef list<RecordType>::reverse_iterator TableReverseIterator;
|
||||
//typedef list< vector<int> > TableType;
|
||||
//typedef list< vector<int> >::iterator TableIterator;
|
||||
//typedef list< vector<int> >::reverse_iterator TableReverseIterator;
|
||||
typedef vector< vector<int*> > IdLists;
|
||||
typedef vector< vector<int> > IdListsLen;
|
||||
|
||||
typedef struct IndexItem
|
||||
{
|
||||
int value;
|
||||
bool isValid; //needed for final travelling
|
||||
//NOTICE: the size of vector is expected to be small
|
||||
//the order in vector must be same as in IndexList vector
|
||||
vector< list< list<struct IndexItem>::iterator > > travel;
|
||||
vector< set<int> > check;
|
||||
//map< int, list < list<struct IndexItem>::iterator > > links; //direct next index list id and linking
|
||||
//map< int, set <int> > check; //indirect previous index list id and verifying
|
||||
IndexItem()
|
||||
{
|
||||
this->value = -1;
|
||||
this->isValid = false;
|
||||
}
|
||||
IndexItem(int _val)
|
||||
{
|
||||
this->value = _val;
|
||||
this->isValid = true;
|
||||
}
|
||||
}IndexItem;
|
||||
|
||||
typedef struct IndexList
|
||||
{
|
||||
//int next;
|
||||
//NOTICE:the list should be ordered at the beginning
|
||||
list<IndexItem> candidates;
|
||||
list<IndexItem>::iterator border; //used to divide valid and invalid area
|
||||
bool prepared; //find and set all invalid eles restricted by subtree in travelling
|
||||
int position; //current neighbor to travel
|
||||
vector<int> travel_map; //the mapping between links position and IndexList id
|
||||
vector<int> check_map; //the mapping between check position and IndexList id
|
||||
IndexList()
|
||||
{
|
||||
//this->next = -1;
|
||||
this->prepared = false;
|
||||
this->position = 0;
|
||||
}
|
||||
bool end()
|
||||
{
|
||||
return this->position == (int)this->travel_map.size();
|
||||
}
|
||||
int next()
|
||||
{
|
||||
return this->travel_map[this->position++];
|
||||
}
|
||||
//NOTICE:we can not use binary-search in list, but this search method maybe slow
|
||||
//BETTER?:adjust the list to binary-tree or other struture?
|
||||
list<IndexItem>::iterator search(int _val)
|
||||
{
|
||||
for(list<IndexItem>::iterator it = this->candidates.begin(); it != this->border; ++it)
|
||||
{
|
||||
if(it->value == _val)
|
||||
return it;
|
||||
}
|
||||
return this->border;
|
||||
}
|
||||
}IndexList;
|
||||
|
||||
typedef struct Satellite
|
||||
{
|
||||
int id;
|
||||
int* idlist;
|
||||
int idlist_len;
|
||||
Satellite(int _id, int* _idlist, int _idlist_len)
|
||||
{
|
||||
this->id = _id;
|
||||
this->idlist = _idlist;
|
||||
this->idlist_len = _idlist_len;
|
||||
}
|
||||
}Satellite;
|
||||
|
||||
typedef list<IndexItem> ItemList;
|
||||
typedef list<IndexItem>::iterator ItemListIterator;
|
||||
typedef list< list<struct IndexItem>::iterator > IteratorList;
|
||||
|
||||
//Database new Join and pass something like kvstore
|
||||
class Join
|
||||
{
|
||||
private:
|
||||
int start_id;
|
||||
int var_num;
|
||||
//bool* dealed_triple;
|
||||
BasicQuery* basic_query;
|
||||
KVstore* kvstore;
|
||||
//used by score_node for parameters
|
||||
static const unsigned PARAM_DEGREE = 1;
|
||||
static const unsigned PARAM_SIZE = 100000;
|
||||
static const unsigned PARAM_DENSE = 1;
|
||||
static const double JUDGE_LIMIT = 0.5;
|
||||
static const int LIMIT_CANDIDATE_LIST_SIZE = 1000;
|
||||
//BETTER?:predefine size to avoid copy cost
|
||||
TableType current_table;
|
||||
TableIterator new_start; //keep to end() as default
|
||||
//list<bool> table_row_new;
|
||||
|
||||
//keep the mapping for disordered ids in vector<int> table
|
||||
int* id2pos;
|
||||
int id_pos; //the num of id put into id2pos currently
|
||||
int* pos2id;
|
||||
bool* dealed_triple;
|
||||
stack<int> mystack;
|
||||
|
||||
vector<int*>* result_list;
|
||||
vector<Satellite> satellites;
|
||||
int* record;
|
||||
int record_len;
|
||||
|
||||
void init(BasicQuery* _basic_query);
|
||||
void clear();
|
||||
void add_id_pos_mapping(int _id);
|
||||
void reset_id_pos_mapping();
|
||||
|
||||
//judge which method should be used according to
|
||||
//the size of candidates and structure of quering graph
|
||||
int judge(int _smallest, int _biggest);
|
||||
|
||||
//select the start point and search order
|
||||
void select();
|
||||
|
||||
//score the cost to link two tables and the efficience
|
||||
//of filtering
|
||||
//int score(List1, List2);
|
||||
|
||||
//score the node according to degree and size
|
||||
double score_node(unsigned _degree, unsigned _size);
|
||||
|
||||
void toStartJoin();
|
||||
|
||||
bool filter_before_join();
|
||||
bool constant_edge_filter(int _var_i);
|
||||
void preid_filter(int _var_i);
|
||||
bool only_pre_filter_after_join();
|
||||
void add_literal_candidate();
|
||||
bool pre_var_handler();
|
||||
//bool filterBySatellites(int _var, int _ele);
|
||||
bool filterBySatellites(int _var);
|
||||
bool allFilterByPres();
|
||||
void generateAllSatellites();
|
||||
void cartesian(int pos, int end);
|
||||
|
||||
//functions for help
|
||||
//copy/add to the end of current_table and set true
|
||||
void add_new_to_results(TableIterator it, int id);
|
||||
|
||||
//void set_results_old(list<bool>::iterator it);
|
||||
int choose_next_node(int id);
|
||||
|
||||
bool is_literal_var(int id);
|
||||
bool is_literal_ele(int _id);
|
||||
|
||||
void copyToResult();
|
||||
|
||||
//BETTER?:change these params to members in class
|
||||
void acquire_all_id_lists(IdLists& _id_lists, IdListsLen& _id_lists_len, IDList& _can_list, vector<int>& _edges, int _id, int _can_list_size);
|
||||
bool if_prepare_idlist(int _can_list_size, bool _is_literal);
|
||||
bool new_join_with_multi_vars_prepared(IdLists& _id_lists, IdListsLen& _id_lists_len, vector<int>& _edges, IDList& _can_list, int _can_list_size);
|
||||
bool new_join_with_multi_vars_not_prepared(vector<int>& _edges, IDList& _can_list, int _can_list_size, int _id, bool _is_literal);
|
||||
|
||||
bool multi_join();
|
||||
|
||||
|
||||
|
||||
//================================================================================================
|
||||
//The index join method saves the memory cost because 2m+2mn < 3mn,
|
||||
//and time may be reduced if the pre-process is not too costly
|
||||
//because we can reuse the links other than recompute in temporal table
|
||||
//New struct is needed for node, i.e. list<bool, int, list<iterator> >,
|
||||
//because we may have to delete, but how can we know if an iterator
|
||||
//is valid if the one it points to is removed?(remove if the other is removed; using end())
|
||||
//1. based on edges: process each time only in valid area(already
|
||||
//macthed with others, invalid is removed), and finally it must be
|
||||
//all ok, just copy to result_list. We should select the edge order
|
||||
//to better the efficiency, but how can we keep only a neighbor links
|
||||
//set if we want to save memory?(ensure all can be linked later)
|
||||
//2. based on points: search deeply like multi-index-join, only a
|
||||
//neighbor links set is kept for a node(not every edge), so memory
|
||||
//cost is low. Finally, travel around along valid iterator, copy...
|
||||
|
||||
IndexList* index_lists;
|
||||
|
||||
void buildIndexLists();
|
||||
bool travel_init(int _lid);
|
||||
|
||||
bool index_link(int _nid, int _idx);
|
||||
bool index_filter(int _nid, int _idx);
|
||||
bool table_travel(int _id1, int _id2);
|
||||
bool table_check(int _id1, int _id2);
|
||||
|
||||
bool index_travel_one();
|
||||
bool index_travel_two();
|
||||
bool index_travel();
|
||||
bool index_join();
|
||||
|
||||
//NOTICE:this is only used to join a BasicQuery
|
||||
bool join();
|
||||
|
||||
public:
|
||||
Join();
|
||||
Join(KVstore* _kvstore);
|
||||
//these functions can be called by Database
|
||||
bool join_sparql(SPARQLquery& _sparql_query);
|
||||
bool join_basic(BasicQuery* _basic_query);
|
||||
~Join();
|
||||
};
|
||||
|
||||
#endif //_JOIN_JOIN_H
|
||||
|
|
@ -1,363 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: Strategy.cpp
|
||||
# Author: Bookug Lobert
|
||||
# Mail: zengli-bookug@pku.edu.cn
|
||||
# Last Modified: 2016-05-07 16:31
|
||||
# Description: implement functions in Strategy.h
|
||||
=============================================================================*/
|
||||
|
||||
#include "Strategy.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Strategy::Strategy()
|
||||
{
|
||||
this->method = 0;
|
||||
this->kvstore = NULL;
|
||||
this->vstree = NULL;
|
||||
//this->prepare_handler();
|
||||
}
|
||||
|
||||
Strategy::Strategy(KVstore* _kvstore, VSTree* _vstree)
|
||||
{
|
||||
this->method = 0;
|
||||
this->kvstore = _kvstore;
|
||||
this->vstree = _vstree;
|
||||
//this->prepare_handler();
|
||||
}
|
||||
|
||||
Strategy::~Strategy()
|
||||
{
|
||||
//delete[] this->dispatch;
|
||||
}
|
||||
|
||||
//void
|
||||
//Strategy::prepare_handler()
|
||||
//{
|
||||
//this->dispatch = new QueryHandler[Strategy::QUERY_HANDLER_NUM];
|
||||
//this->dispatch[0] = Strategy::handler0;
|
||||
//}
|
||||
|
||||
//NOTICE: 2-triple case ?s1 p1 c0 ?s2 p2 c0 is viewed as an unconnected graph
|
||||
//however, this can be dealed due to several basicquery and linking
|
||||
|
||||
bool
|
||||
Strategy::handle(SPARQLquery& _query)
|
||||
{
|
||||
#ifdef MULTI_INDEX
|
||||
Util::logging("IN GeneralEvaluation::handle");
|
||||
|
||||
vector<BasicQuery*>& queryList = _query.getBasicQueryVec();
|
||||
// enumerate each BasicQuery and retrieve their variables' mapping entity in the VSTree.
|
||||
vector<BasicQuery*>::iterator iter=queryList.begin();
|
||||
for(; iter != queryList.end(); iter++)
|
||||
{
|
||||
this->method = 0;
|
||||
|
||||
vector<int*>& result_list = (*iter)->getResultList();
|
||||
int select_var_num = (*iter)->getSelectVarNum();
|
||||
int varNum = (*iter)->getVarNum(); //the num of vars needing to be joined
|
||||
int total_num = (*iter)->getTotalVarNum();
|
||||
int pre_varNum = (*iter)->getPreVarNum();
|
||||
|
||||
if((*iter)->getTripleNum() == 1 && pre_varNum == 1)
|
||||
{
|
||||
Triple triple = (*iter)->getTriple(0);
|
||||
int* id_list = NULL;
|
||||
int id_list_len = 0;
|
||||
result_list.clear();
|
||||
|
||||
if(total_num == 2)
|
||||
{
|
||||
//TODO:consider special case, select ?s (?p) ?o where { ?s ?p ?o . }
|
||||
//filter and join is too costly, should enum all predicates and use p2so
|
||||
//maybe the selected vars are ?s (?p) or ?o (?p)
|
||||
cerr << "not supported now!" << endl;
|
||||
}
|
||||
else if(total_num == 1)
|
||||
{
|
||||
//TODO:if just select s/o, use o2s/s2o
|
||||
//if only p is selected, use s2p or o2p
|
||||
//only if both s/o and p are selected, use s2po or o2ps
|
||||
|
||||
if(triple.subject[0] != '?') //constant
|
||||
{
|
||||
int sid = (this->kvstore)->getIDByEntity(triple.subject);
|
||||
this->kvstore->getpreIDobjIDlistBysubID(sid, id_list, id_list_len);
|
||||
}
|
||||
else if(triple.object[0] != '?') //constant
|
||||
{
|
||||
int oid = (this->kvstore)->getIDByEntity(triple.object);
|
||||
if(oid == -1)
|
||||
{
|
||||
oid = (this->kvstore)->getIDByLiteral(triple.object);
|
||||
}
|
||||
this->kvstore->getpreIDsubIDlistByobjID(oid, id_list, id_list_len);
|
||||
}
|
||||
|
||||
//always place s/o before p in result list
|
||||
for(int i = 0; i < id_list_len; i += 2)
|
||||
{
|
||||
int* record = new int[2]; //2 vars selected
|
||||
record[1] = id_list[i]; //for the pre var
|
||||
record[0] = id_list[i+1]; //for the s/o var
|
||||
result_list.push_back(record);
|
||||
}
|
||||
}
|
||||
else if(total_num == 0) //only ?p
|
||||
{
|
||||
//just use so2p
|
||||
int sid = (this->kvstore)->getIDByEntity(triple.subject);
|
||||
int oid = (this->kvstore)->getIDByEntity(triple.object);
|
||||
if(oid == -1)
|
||||
{
|
||||
oid = (this->kvstore)->getIDByLiteral(triple.object);
|
||||
}
|
||||
|
||||
this->kvstore->getpreIDlistBysubIDobjID(sid, oid, id_list, id_list_len);
|
||||
//copy to result list
|
||||
for(int i = 0; i < id_list_len; ++i)
|
||||
{
|
||||
int* record = new int[1];
|
||||
record[0] = id_list[i];
|
||||
result_list.push_back(record);
|
||||
}
|
||||
}
|
||||
|
||||
delete[] id_list;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(pre_varNum == 0 && (*iter)->getTripleNum() == 1) //only one triple and no predicates
|
||||
{
|
||||
//only one variable and one triple: ?s pre obj or sub pre ?o
|
||||
if(total_num == 1)
|
||||
{
|
||||
this->method = 1;
|
||||
}
|
||||
//only two vars: ?s pre ?o
|
||||
else if(total_num == 2)
|
||||
{
|
||||
if(varNum == 1) //the selected id should be 0
|
||||
{
|
||||
this->method = 2;
|
||||
}
|
||||
else //==2
|
||||
{
|
||||
this->method = 3;
|
||||
}
|
||||
}
|
||||
//cerr << "this BasicQuery use query strategy 2" << endl;
|
||||
//cerr<<"Final result size: "<<(*iter)->getResultList().size()<<endl;
|
||||
//continue;
|
||||
}
|
||||
|
||||
//QueryHandler dispatch;
|
||||
//dispatch[0] = handler0;
|
||||
switch(this->method)
|
||||
{
|
||||
case 0:
|
||||
this->handler0(*iter, result_list);
|
||||
break;
|
||||
case 1:
|
||||
this->handler1(*iter, result_list);
|
||||
break;
|
||||
case 2:
|
||||
this->handler2(*iter, result_list);
|
||||
break;
|
||||
case 3:
|
||||
this->handler3(*iter, result_list);
|
||||
break;
|
||||
default:
|
||||
cerr << "not support this method" << endl;
|
||||
|
||||
}
|
||||
cerr<<"Final result size: "<<(*iter)->getResultList().size()<<endl;
|
||||
//BETTER: use function pointer array in C++ class
|
||||
}
|
||||
#else
|
||||
cerr << "this BasicQuery use original query strategy" << endl;
|
||||
long tv_handle = Util::get_cur_time();
|
||||
(this->vstree)->retrieve(_query);
|
||||
long tv_retrieve = Util::get_cur_time();
|
||||
cout << "after Retrieve, used " << (tv_retrieve - tv_handle) << "ms." << endl;
|
||||
|
||||
this->join = new Join(kvstore);
|
||||
this->join->join_sparql(_query);
|
||||
delete this->join;
|
||||
|
||||
long tv_join = Util::get_cur_time();
|
||||
cout << "after Join, used " << (tv_join - tv_retrieve) << "ms." << endl;
|
||||
#endif
|
||||
Util::logging("OUT Strategy::handle");
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
Strategy::handler0(BasicQuery* _bq, vector<int*>& _result_list)
|
||||
{
|
||||
long before_filter = Util::get_cur_time();
|
||||
cerr << "this BasicQuery use query strategy 0" << endl;
|
||||
|
||||
//BETTER:not all vars in join filtered by vstree
|
||||
//(A)-B-c: B should by vstree, then by c, but A should be generated in join(first set A as not)
|
||||
//if A not in join, just filter B by pre
|
||||
//divided into star graphs, join core vertices, generate satellites
|
||||
//join should also start from a core vertex(neighbor can be constants or vars) if available
|
||||
//
|
||||
//QUERY: is there any case that a node should be retrieved by other index?(instead of vstree or generate whne join)
|
||||
//
|
||||
//we had better treat 1-triple case(no ?p) as special, and then in other cases, core vertex exist(if connected)
|
||||
//However, if containing ?p and 1-triple, we should treat it also as a special case, or select a variable as core vertex
|
||||
//and retrieved (for example, ?s ?p o or s ?p ?o, generally no core vertex in these cases)
|
||||
|
||||
long tv_handle = Util::get_cur_time();
|
||||
int varNum = _bq->getVarNum(); //the num of vars needing to be joined
|
||||
for(int i = 0; i < varNum; ++i)
|
||||
{
|
||||
if(_bq->if_need_retrieve(i) == false)
|
||||
continue;
|
||||
bool flag = _bq->isLiteralVariable(i);
|
||||
const EntityBitSet& entityBitSet = _bq->getVarBitSet(i);
|
||||
IDList* idListPtr = &( _bq->getCandidateList(i) );
|
||||
this->vstree->retrieveEntity(entityBitSet, idListPtr);
|
||||
if(!flag)
|
||||
{
|
||||
_bq->setReady(i);
|
||||
}
|
||||
//the basic query should end if one non-literal var has no candidates
|
||||
if(idListPtr->size() == 0 && !flag)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
//if(_bq->isReady(0))
|
||||
//cout<<"error: var 0 is ready?"<<endl;
|
||||
//TODO:end directly if one is empty!
|
||||
|
||||
long tv_retrieve = Util::get_cur_time();
|
||||
cout << "after Retrieve, used " << (tv_retrieve - tv_handle) << "ms." << endl;
|
||||
Join *join = new Join(kvstore);
|
||||
join->join_basic(_bq);
|
||||
delete join;
|
||||
|
||||
long tv_join = Util::get_cur_time();
|
||||
cout << "after Join, used " << (tv_join - tv_retrieve) << "ms." << endl;
|
||||
}
|
||||
|
||||
void
|
||||
Strategy::handler1(BasicQuery* _bq, vector<int*>& _result_list)
|
||||
{
|
||||
long before_filter = Util::get_cur_time();
|
||||
cerr << "this BasicQuery use query strategy 1" << endl;
|
||||
//int neighbor_id = (*_bq->getEdgeNeighborID(0, 0); //constant, -1
|
||||
char edge_type = _bq->getEdgeType(0, 0);
|
||||
int triple_id = _bq->getEdgeID(0, 0);
|
||||
Triple triple = _bq->getTriple(triple_id);
|
||||
int pre_id = _bq->getEdgePreID(0, 0);
|
||||
int* id_list = NULL;
|
||||
int id_list_len = 0;
|
||||
if(edge_type == Util::EDGE_OUT)
|
||||
{
|
||||
//cerr<<"edge out!!!"<<endl;
|
||||
int nid = (this->kvstore)->getIDByEntity(triple.object);
|
||||
if(nid == -1)
|
||||
{
|
||||
nid = (this->kvstore)->getIDByLiteral(triple.object);
|
||||
}
|
||||
this->kvstore->getsubIDlistByobjIDpreID(nid, pre_id, id_list, id_list_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
//cerr<<"edge in!!!"<<endl;
|
||||
this->kvstore->getobjIDlistBysubIDpreID(this->kvstore->getIDByEntity(triple.subject), pre_id, id_list, id_list_len);
|
||||
}
|
||||
|
||||
long after_filter = Util::get_cur_time();
|
||||
cerr << "after filter, used " << (after_filter - before_filter) << "ms" << endl;
|
||||
_result_list.clear();
|
||||
//cerr<<"now to copy result to list"<<endl;
|
||||
for(int i = 0; i < id_list_len; ++i)
|
||||
{
|
||||
int* record = new int[1]; //only this var is selected
|
||||
record[0] = id_list[i];
|
||||
//cerr<<this->kvstore->getEntityByID(record[0])<<endl;
|
||||
_result_list.push_back(record);
|
||||
}
|
||||
long after_copy = Util::get_cur_time();
|
||||
cerr<<"after copy to result list: used "<<(after_copy-after_filter)<<" ms"<<endl;
|
||||
delete[] id_list;
|
||||
cerr<<"Final result size: "<<_result_list.size()<<endl;
|
||||
}
|
||||
|
||||
void
|
||||
Strategy::handler2(BasicQuery* _bq, vector<int*>& _result_list)
|
||||
{
|
||||
long before_filter = Util::get_cur_time();
|
||||
cerr << "this BasicQuery use query strategy 2" << endl;
|
||||
int triple_id = _bq->getEdgeID(0, 0);
|
||||
Triple triple = _bq->getTriple(triple_id);
|
||||
int pre_id = _bq->getEdgePreID(0, 0);
|
||||
int var1_id = _bq->getIDByVarName(triple.subject);
|
||||
int var2_id = _bq->getIDByVarName(triple.object);
|
||||
int* id_list = NULL;
|
||||
int id_list_len = 0;
|
||||
if(var1_id == 0) //subject var selected
|
||||
{
|
||||
//use p2s directly
|
||||
this->kvstore->getsubIDlistBypreID(pre_id, id_list, id_list_len);
|
||||
}
|
||||
else if(var2_id == 0) //object var selected
|
||||
{
|
||||
//use p2o directly
|
||||
this->kvstore->getobjIDlistBypreID(pre_id, id_list, id_list_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
cerr << "ERROR in Database::handle(): no selected var!"<<endl;
|
||||
}
|
||||
long after_filter = Util::get_cur_time();
|
||||
cerr << "after filter, used " << (after_filter - before_filter) << "ms" << endl;
|
||||
_result_list.clear();
|
||||
for(int i = 0; i < id_list_len; ++i)
|
||||
{
|
||||
int* record = new int[1]; //only one var
|
||||
record[0] = id_list[i];
|
||||
_result_list.push_back(record);
|
||||
}
|
||||
long after_copy = Util::get_cur_time();
|
||||
cerr<<"after copy to result list: used "<<(after_copy-after_filter)<<" ms"<<endl;
|
||||
delete[] id_list;
|
||||
cerr<<"Final result size: "<<_result_list.size()<<endl;
|
||||
}
|
||||
|
||||
void
|
||||
Strategy::handler3(BasicQuery* _bq, vector<int*>& _result_list)
|
||||
{
|
||||
long before_filter = Util::get_cur_time();
|
||||
cerr << "this BasicQuery use query strategy 3" << endl;
|
||||
int triple_id = _bq->getEdgeID(0, 0);
|
||||
Triple triple = _bq->getTriple(triple_id);
|
||||
int pre_id = _bq->getEdgePreID(0, 0);
|
||||
int* id_list = NULL;
|
||||
int id_list_len = 0;
|
||||
this->kvstore->getsubIDobjIDlistBypreID(pre_id, id_list, id_list_len);
|
||||
int var1_id = _bq->getIDByVarName(triple.subject);
|
||||
int var2_id = _bq->getIDByVarName(triple.object);
|
||||
long after_filter = Util::get_cur_time();
|
||||
cerr << "after filter, used " << (after_filter - before_filter) << "ms" << endl;
|
||||
_result_list.clear();
|
||||
for(int i = 0; i < id_list_len; i += 2)
|
||||
{
|
||||
int* record = new int[2]; //2 vars and selected
|
||||
record[var1_id] = id_list[i];
|
||||
record[var2_id] = id_list[i+1];
|
||||
_result_list.push_back(record);
|
||||
}
|
||||
long after_copy = Util::get_cur_time();
|
||||
cerr<<"after copy to result list: used "<<(after_copy-after_filter)<<" ms"<<endl;
|
||||
delete[] id_list;
|
||||
cerr<<"Final result size: "<<_result_list.size()<<endl;
|
||||
}
|
||||
|
|
@ -1,47 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: Strategy.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: zengli-bookug@pku.edu.cn
|
||||
# Last Modified: 2016-05-07 16:28
|
||||
# Description:
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _DATABASE_STRATEGY_H
|
||||
#define _DATABASE_STRATEGY_H
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Util/Triple.h"
|
||||
#include "Join.h"
|
||||
#include "../Query/IDList.h"
|
||||
#include "../Query/SPARQLquery.h"
|
||||
#include "../Query/BasicQuery.h"
|
||||
#include "../KVstore/KVstore.h"
|
||||
#include "../VSTree/VSTree.h"
|
||||
|
||||
class Strategy
|
||||
{
|
||||
public:
|
||||
Strategy();
|
||||
Strategy(KVstore*, VSTree*);
|
||||
~Strategy();
|
||||
//select efficient strategy to do the sparql query
|
||||
bool handle(SPARQLquery&);
|
||||
|
||||
private:
|
||||
int method;
|
||||
KVstore* kvstore;
|
||||
VSTree* vstree;
|
||||
void handler0(BasicQuery*, vector<int*>&);
|
||||
void handler1(BasicQuery*, vector<int*>&);
|
||||
void handler2(BasicQuery*, vector<int*>&);
|
||||
void handler3(BasicQuery*, vector<int*>&);
|
||||
//QueryHandler *dispatch;
|
||||
//void prepare_handler();
|
||||
};
|
||||
|
||||
static const unsigned QUERY_HANDLER_NUM = 4;
|
||||
typedef void (Strategy::*QueryHandler[QUERY_HANDLER_NUM]) (BasicQuery*, vector<int*>&);
|
||||
//QueryHandler dispatch;
|
||||
|
||||
#endif //_DATABASE_STRATEGY_H
|
||||
|
|
@ -15,7 +15,7 @@ string filePath_sID2s;
|
|||
string filePath_o2sID;
|
||||
string filePath_opID2sID;
|
||||
FILE * _log_btree;
|
||||
// 在中间结点中插入键
|
||||
// 在中间结点中插入键
|
||||
bool mItnlNode::Insert( mNode * pNode)
|
||||
{
|
||||
if(getCount() >= MAXNUM_KEY)
|
||||
|
@ -32,7 +32,7 @@ bool mItnlNode::Insert( mNode * pNode)
|
|||
printf("err in insert itnl\n");
|
||||
system("pause"); exit(0);
|
||||
}
|
||||
// 在要插入的点是在最右端时要特殊处理, solved
|
||||
// 在要插入的点是在最右端时要特殊处理, solved
|
||||
for(int i = getCount() + 1; i > _ikey; i --)
|
||||
{
|
||||
this ->setElement(i, this ->getElement(i - 1) );
|
||||
|
@ -50,13 +50,13 @@ bool mItnlNode::Insert( mNode * pNode)
|
|||
return true;
|
||||
}
|
||||
|
||||
// 在中间结点中删除键,以及该键后的指针
|
||||
// 在中间结点中删除键,以及该键后的指针
|
||||
int mItnlNode::Delete(const KeyType & _keytype)
|
||||
{
|
||||
int _index = -1;
|
||||
int _ibegin = 1, _iend = getCount();
|
||||
int _imiddle;
|
||||
// 二分查找index
|
||||
// 二分查找index
|
||||
while(_ibegin < _iend)
|
||||
{
|
||||
_imiddle = (_ibegin + _iend) / 2;
|
||||
|
@ -81,9 +81,9 @@ int mItnlNode::Delete(const KeyType & _keytype)
|
|||
}
|
||||
}
|
||||
|
||||
//任何一个不是根节点或不是叶子节点的节点必须保证包含至少两个元素
|
||||
// 对于insert, index = 1 的时候一定是整棵树的最右边!!!
|
||||
// delete则不同
|
||||
//任何一个不是根节点或不是叶子节点的节点必须保证包含至少两个元素
|
||||
// 对于insert, index = 1 的时候一定是整棵树的最右边!!!
|
||||
// delete则不同
|
||||
if(_index == 1 && this ->getFather() != NULL)
|
||||
{
|
||||
mItnlNode * itnl_father = (mItnlNode *)(this ->getFather() );
|
||||
|
@ -91,7 +91,7 @@ int mItnlNode::Delete(const KeyType & _keytype)
|
|||
KeyType & i_data = this ->getKey(2);
|
||||
while(itnl_father != NULL)
|
||||
{
|
||||
// 此处同insert不同,审慎其不同之处
|
||||
// 此处同insert不同,审慎其不同之处
|
||||
int tmp_key = itnl_father ->iExist(f_data);
|
||||
if(tmp_key < 1)
|
||||
{
|
||||
|
@ -142,7 +142,7 @@ KeyType & mItnlNode::Split(mItnlNode* pNode)
|
|||
return (pNode ->getElement(1)).mKey;
|
||||
}
|
||||
|
||||
// 结合结点,把指定中间结点的数据全部剪切到本中间结点
|
||||
// 结合结点,把指定中间结点的数据全部剪切到本中间结点
|
||||
bool mItnlNode::Combine(mItnlNode * pNode)
|
||||
{
|
||||
if(this ->getCount() + pNode ->getCount() > MAXNUM_KEY)
|
||||
|
@ -156,7 +156,7 @@ bool mItnlNode::Combine(mItnlNode * pNode)
|
|||
return true;
|
||||
}
|
||||
|
||||
// 从另一结点移一个元素到本结点
|
||||
// 从另一结点移一个元素到本结点
|
||||
bool mItnlNode::MoveOneElement(mNode* pNode)
|
||||
{
|
||||
|
||||
|
@ -164,9 +164,9 @@ bool mItnlNode::MoveOneElement(mNode* pNode)
|
|||
return false;
|
||||
}
|
||||
|
||||
// 清除叶子结点中的数据
|
||||
// 清除叶子结点中的数据
|
||||
|
||||
// 在叶子结点中插入数据
|
||||
// 在叶子结点中插入数据
|
||||
bool mLeafNode::Insert(const mleafdata & _leafdata)
|
||||
{
|
||||
const KeyType & data = _leafdata.mData;
|
||||
|
@ -179,9 +179,9 @@ bool mLeafNode::Insert(const mleafdata & _leafdata)
|
|||
printf("err count too large\n");
|
||||
return false;
|
||||
}
|
||||
// 返回i, data 介于i - 1 与 i 之间, 要放在i 上
|
||||
// 返回i, data 介于i - 1 与 i 之间, 要放在i 上
|
||||
int _i_insert = this ->iInsert(data);
|
||||
// 还要考虑仅根节点是叶子节点的情况, 需要再加个条件
|
||||
// 还要考虑仅根节点是叶子节点的情况, 需要再加个条件
|
||||
if(_i_insert == 1 && this ->getFather() != NULL)
|
||||
{
|
||||
mItnlNode * _pFather = (mItnlNode *)(this ->getFather());
|
||||
|
@ -196,7 +196,7 @@ bool mLeafNode::Insert(const mleafdata & _leafdata)
|
|||
system("pause");
|
||||
exit(0);
|
||||
}
|
||||
//同步使得内存位等其它位失效, 策略失策, 弥补之。。
|
||||
//同步使得内存位等其它位失效, 策略失策, 弥补之。。
|
||||
_pFather ->setKey(_ikey, _leafdata.mData);
|
||||
_pFather ->setMemory(_ikey);
|
||||
_pFather ->setModify();
|
||||
|
@ -216,10 +216,10 @@ bool mLeafNode::Insert(const mleafdata & _leafdata)
|
|||
}
|
||||
|
||||
/*
|
||||
* 删除成功返回删除的key的下标, 失败则返回-1
|
||||
* 设置修改位
|
||||
* 若是最左端的元素则向上修改对应需要修改的父节点, 此处与insert不同
|
||||
* insert若出现最左端必然是整棵树的最左端
|
||||
* 删除成功返回删除的key的下标, 失败则返回-1
|
||||
* 设置修改位
|
||||
* 若是最左端的元素则向上修改对应需要修改的父节点, 此处与insert不同
|
||||
* insert若出现最左端必然是整棵树的最左端
|
||||
*/
|
||||
int mLeafNode::Delete(KeyType & _keytype)
|
||||
{
|
||||
|
@ -249,9 +249,9 @@ int mLeafNode::Delete(KeyType & _keytype)
|
|||
_ibegin = _imiddle;
|
||||
}
|
||||
}
|
||||
//任何一个不是根节点或不是叶子节点的节点必须保证包含至少两个元素
|
||||
// 对于insert, index = 1 的时候一定是整棵树的最右边!!!
|
||||
// delete则不同
|
||||
//任何一个不是根节点或不是叶子节点的节点必须保证包含至少两个元素
|
||||
// 对于insert, index = 1 的时候一定是整棵树的最右边!!!
|
||||
// delete则不同
|
||||
if(_index == 1 && this ->getFather() != NULL)
|
||||
{
|
||||
mItnlNode * itnl_father = (mItnlNode *)(this ->getFather() );
|
||||
|
@ -259,7 +259,7 @@ int mLeafNode::Delete(KeyType & _keytype)
|
|||
KeyType & i_data = this ->getKey(2);
|
||||
while(itnl_father != NULL)
|
||||
{
|
||||
// 此处同insert不同,审慎其不同之处
|
||||
// 此处同insert不同,审慎其不同之处
|
||||
int tmp_key = itnl_father ->iExist(f_data);
|
||||
if(tmp_key < 1)
|
||||
{
|
||||
|
@ -292,7 +292,7 @@ int mLeafNode::Delete(KeyType & _keytype)
|
|||
}
|
||||
return -1;
|
||||
}
|
||||
//重载delete of leaf
|
||||
//重载delete of leaf
|
||||
int mLeafNode::Delete(KeyType & _keytype, char partval[], int & pvFlag)
|
||||
{
|
||||
int _index = -1;
|
||||
|
@ -334,12 +334,12 @@ int mLeafNode::Delete(KeyType & _keytype, char partval[], int & pvFlag)
|
|||
if(pvFlag == FLAG_NO_ZERO)
|
||||
return _index;
|
||||
|
||||
//如果删除成功并且元素变为空, 则继续删除对应的key
|
||||
//如果删除成功并且元素变为空, 则继续删除对应的key
|
||||
|
||||
|
||||
//任何一个不是根节点或不是叶子节点的节点必须保证包含至少两个元素
|
||||
// 对于insert, index = 1 的时候一定是整棵树的最右边!!!
|
||||
// delete则不同
|
||||
//任何一个不是根节点或不是叶子节点的节点必须保证包含至少两个元素
|
||||
// 对于insert, index = 1 的时候一定是整棵树的最右边!!!
|
||||
// delete则不同
|
||||
if(_index == 1 && this ->getFather() != NULL)
|
||||
{
|
||||
mItnlNode * itnl_father = (mItnlNode *)(this ->getFather() );
|
||||
|
@ -347,7 +347,7 @@ int mLeafNode::Delete(KeyType & _keytype, char partval[], int & pvFlag)
|
|||
KeyType & i_data = this ->getKey(2);
|
||||
while(itnl_father != NULL)
|
||||
{
|
||||
// 此处同insert不同,审慎其不同之处
|
||||
// 此处同insert不同,审慎其不同之处
|
||||
int tmp_key = itnl_father ->iExist(f_data);
|
||||
if(tmp_key < 1)
|
||||
{
|
||||
|
@ -379,7 +379,7 @@ int mLeafNode::Delete(KeyType & _keytype, char partval[], int & pvFlag)
|
|||
|
||||
|
||||
|
||||
// 分裂叶子结点,把本叶子结点的后一半数据剪切到指定的叶子结点中
|
||||
// 分裂叶子结点,把本叶子结点的后一半数据剪切到指定的叶子结点中
|
||||
KeyType & mLeafNode::Split(mLeafNode * pNode)
|
||||
{
|
||||
for(int i = ORDER_V + 1; i <= MAXNUM_KEY; i ++)
|
||||
|
@ -393,7 +393,7 @@ KeyType & mLeafNode::Split(mLeafNode * pNode)
|
|||
return (pNode ->getElement(1)).mData;
|
||||
}
|
||||
|
||||
// 结合结点,把指定叶子结点的数据全部剪切到本叶子结点
|
||||
// 结合结点,把指定叶子结点的数据全部剪切到本叶子结点
|
||||
bool mLeafNode::Combine(mLeafNode * pNode)
|
||||
{
|
||||
int this_count = this ->getCount();
|
||||
|
@ -405,7 +405,7 @@ bool mLeafNode::Combine(mLeafNode * pNode)
|
|||
}
|
||||
return false;
|
||||
}
|
||||
// 查找对应的叶子结点
|
||||
// 查找对应的叶子结点
|
||||
mLeafNode* BPlusTree::SearchLeafNode(const KeyType & data)const
|
||||
{
|
||||
mNode * pNode = mRoot;
|
||||
|
@ -415,15 +415,15 @@ mLeafNode* BPlusTree::SearchLeafNode(const KeyType & data)const
|
|||
}
|
||||
else// no check
|
||||
{
|
||||
/*
|
||||
* while 循环
|
||||
* 当前还是中间节点
|
||||
* 找到下层入口下标
|
||||
* 判断是否超出左边界, 是的话直接到底, pNode 指向叶子节点, break
|
||||
* 总之, 最后肯定是pNode指向叶子节点
|
||||
* 所以, 插入时还要判断是否超过左边界, 是的话要修改上层父节点的左边界
|
||||
* Search 的时候都要注意边界的率先判断, 还有当前叶节点可能是根节点
|
||||
*/
|
||||
/*
|
||||
* while 循环
|
||||
* 当前还是中间节点
|
||||
* 找到下层入口下标
|
||||
* 判断是否超出左边界, 是的话直接到底, pNode 指向叶子节点, break
|
||||
* 总之, 最后肯定是pNode指向叶子节点
|
||||
* 所以, 插入时还要判断是否超过左边界, 是的话要修改上层父节点的左边界
|
||||
* Search 的时候都要注意边界的率先判断, 还有当前叶节点可能是根节点
|
||||
*/
|
||||
// int _floor = 1;
|
||||
while(pNode ->getType() == NODE_TYPE_INTERNAL)
|
||||
{
|
||||
|
@ -461,8 +461,8 @@ mLeafNode* BPlusTree::SearchLeafNode(const KeyType & data)const
|
|||
}
|
||||
return NULL;
|
||||
}
|
||||
// 在树中查找数据
|
||||
bool BPlusTree::Search(KeyType & data, mleafdata & _ret)//增加一参数, 用于接收查找过程中进入的叶子节点
|
||||
// 在树中查找数据
|
||||
bool BPlusTree::Search(KeyType & data, mleafdata & _ret)//增加一参数, 用于接收查找过程中进入的叶子节点
|
||||
{
|
||||
mLeafNode * _pLeaf = SearchLeafNode(data);
|
||||
int _ikey = _pLeaf ->iExist(data);
|
||||
|
@ -521,7 +521,7 @@ bool BPlusTree::Insert(const mleafdata & _leafdata)
|
|||
long long int _addr_newleaf = mblockQueue.Pop();
|
||||
_pNewLeaf ->setAddrFB(_addr_newleaf);
|
||||
|
||||
// _key_tmp 也就将是_pnewleaf的第一个元素的key
|
||||
// _key_tmp 也就将是_pnewleaf的第一个元素的key
|
||||
// set modified in split;
|
||||
if(! _pOldLeaf ->getModify())
|
||||
{
|
||||
|
@ -535,8 +535,8 @@ bool BPlusTree::Insert(const mleafdata & _leafdata)
|
|||
|
||||
if(_pFather == NULL)
|
||||
{
|
||||
// _pOldLeaf以前是根节点,要把offset = 0 让出来
|
||||
// 还存在占用外存链的情况下则需要释放外存链
|
||||
// _pOldLeaf以前是根节点,要把offset = 0 让出来
|
||||
// 还存在占用外存链的情况下则需要释放外存链
|
||||
// if(!preModified)
|
||||
// {
|
||||
// DelDisk(mfp, 0l, mblockQueue);
|
||||
|
@ -545,7 +545,7 @@ bool BPlusTree::Insert(const mleafdata & _leafdata)
|
|||
_pOldLeaf ->setAddrFB(_addr_new);
|
||||
// setmodified in initial;
|
||||
mItnlNode * _pItnl = new mItnlNode;
|
||||
// 分配新的首地址ַ
|
||||
// 分配新的首地址
|
||||
long long int _addr_root = 0;
|
||||
long long int _addr_2 = _pNewLeaf ->getAddrFB();
|
||||
long long int _addr_1 = _pOldLeaf ->getAddrFB();
|
||||
|
@ -587,15 +587,15 @@ bool mLeafNode :: dupInsert(const mleafdata & _mleafdata, int _index_insert)
|
|||
return true;
|
||||
}
|
||||
|
||||
/* 删除某数据
|
||||
* 涉及删除节点时注意回收外存~~
|
||||
/* 删除某数据
|
||||
* 涉及删除节点时注意回收外存~~
|
||||
*/
|
||||
bool BPlusTree::Delete(KeyType & data)
|
||||
{
|
||||
mLeafNode * _pOldLeaf = SearchLeafNode(data);
|
||||
// for when _ikey = 1
|
||||
//if ok , return the index of the deleted
|
||||
//如果idelete是1的话则函数里会是否进行向上删除 考虑
|
||||
//如果idelete是1的话则函数里会是否进行向上删除 考虑
|
||||
int _idelete = _pOldLeaf ->Delete(data);
|
||||
if(_idelete < 0) return false;
|
||||
|
||||
|
@ -607,13 +607,13 @@ bool BPlusTree::Delete(KeyType & data)
|
|||
|
||||
if(_pOldLeaf ->getCount() >= ORDER_V)
|
||||
{
|
||||
//如果idelete = 1 向上删除 不需要
|
||||
//如果idelete = 1 向上删除 不需要
|
||||
return true;
|
||||
}
|
||||
|
||||
// count < 50%
|
||||
int flag = FLAG_LEFT;
|
||||
// 右兄弟优先
|
||||
// 右兄弟优先
|
||||
mLeafNode * _pBrother = (mLeafNode*)(_pOldLeaf ->getBrother(flag));
|
||||
|
||||
//brother > 50%
|
||||
|
@ -661,13 +661,13 @@ bool BPlusTree::Delete(KeyType & data)
|
|||
cout << "bug run" << endl;
|
||||
return false;
|
||||
}
|
||||
//重载删除函数
|
||||
//重载删除函数
|
||||
bool BPlusTree :: Delete(KeyType & data, char PartVal[])
|
||||
{
|
||||
mLeafNode * _pOldLeaf = SearchLeafNode(data);
|
||||
// for when _ikey = 1
|
||||
//if ok , return the index of the deleted
|
||||
//如果idelete是1的话则函数里会是否进行向上删除 考虑
|
||||
// for when _ikey = 1
|
||||
//if ok , return the index of the deleted
|
||||
//如果idelete是1的话则函数里会是否进行向上删除 考虑
|
||||
int pvFlag = FLAG_ZERO;
|
||||
int _idelete = _pOldLeaf ->Delete(data, PartVal, pvFlag);
|
||||
if(_idelete < 0) return false;
|
||||
|
@ -681,13 +681,13 @@ bool BPlusTree :: Delete(KeyType & data, char PartVal[])
|
|||
|
||||
if(_pOldLeaf ->getCount() >= ORDER_V)
|
||||
{
|
||||
//如果idelete = 1 向上删除 不需要
|
||||
//如果idelete = 1 向上删除 不需要
|
||||
return true;
|
||||
}
|
||||
|
||||
// count < 50%
|
||||
int flag = FLAG_LEFT;
|
||||
// <EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
// 右兄弟优先
|
||||
mLeafNode * _pBrother = (mLeafNode*)(_pOldLeaf ->getBrother(flag));
|
||||
|
||||
//brother > 50%
|
||||
|
@ -740,7 +740,7 @@ void BPlusTree::Flush(){
|
|||
this ->StoreTree();
|
||||
this ->ClearTree();
|
||||
}
|
||||
// 清除整个树,删除所有结点
|
||||
// 清除整个树,删除所有结点
|
||||
void BPlusTree :: ClearTree()
|
||||
{
|
||||
queue<mNode *> pQueue[100];
|
||||
|
@ -799,26 +799,26 @@ void BPlusTree :: ClearTree()
|
|||
return;
|
||||
}
|
||||
|
||||
// 检查树是否满足B+树的定义
|
||||
// 检查树是否满足B+树的定义
|
||||
//bool BPlusTree::CheckTree()
|
||||
//{}
|
||||
|
||||
// 递归检查结点及其子树是否满足B+树的定义
|
||||
// 递归检查结点及其子树是否满足B+树的定义
|
||||
//bool BPlusTree::CheckNode(mNode* pNode)
|
||||
//{}
|
||||
|
||||
// 打印整个树
|
||||
// 打印整个树
|
||||
//void BPlusTree::PrintTree(FILE * ifp)
|
||||
//{}
|
||||
|
||||
// 打印某结点
|
||||
// 打印某结点
|
||||
//void BPlusTree::PrintNode(mNode* pNode, FILE * ifp)
|
||||
//{}
|
||||
|
||||
|
||||
|
||||
//递归函数:插入键到中间结点
|
||||
//key即为pNode中的首个key
|
||||
//递归函数:插入键到中间结点
|
||||
//key即为pNode中的首个key
|
||||
bool BPlusTree::InsertItnlNode(mItnlNode* pNode, mNode* pSon)
|
||||
{
|
||||
if(pNode == NULL || pNode ->getType() == NODE_TYPE_LEAF)
|
||||
|
@ -878,8 +878,8 @@ bool BPlusTree::InsertItnlNode(mItnlNode* pNode, mNode* pSon)
|
|||
|
||||
if(_pFather == NULL)
|
||||
{
|
||||
// 原offset = 0处块以及相应后续链接块先清除
|
||||
// 判断是否需要删除的外存链
|
||||
// 原offset = 0处块以及相应后续链接块先清除
|
||||
// 判断是否需要删除的外存链
|
||||
// if(!preModified)
|
||||
// {
|
||||
// DelDisk(mfp, 0l, mblockQueue);
|
||||
|
@ -890,7 +890,7 @@ bool BPlusTree::InsertItnlNode(mItnlNode* pNode, mNode* pSon)
|
|||
_pOldItnl ->setAddrFB(_addr_new);
|
||||
// setmodified in mnode_initial;
|
||||
mItnlNode * _pItnl = new mItnlNode;
|
||||
//分配新的首地址
|
||||
// 分配新的首地址
|
||||
long long int _addr_root = 0l;
|
||||
long long int _addr_1 = _pOldItnl ->getAddrFB();
|
||||
long long int _addr_2 = _pNewItnl ->getAddrFB();
|
||||
|
@ -921,11 +921,11 @@ bool BPlusTree::InsertItnlNode(mItnlNode* pNode, mNode* pSon)
|
|||
return false;
|
||||
}
|
||||
/*
|
||||
* 删除key对应的元素并在delete函数中完成对父节点的修改, 注意内存位的同步修改
|
||||
* 若节点 > 50% 或为根节点, 则结束
|
||||
* 删除key对应的元素并在delete函数中完成对父节点的修改, 注意内存位的同步修改
|
||||
* 若节点 > 50% 或为根节点, 则结束
|
||||
*
|
||||
*/
|
||||
// 递归函数:在中间结点中删除键
|
||||
// 递归函数:在中间结点中删除键
|
||||
bool BPlusTree::DeleteItnlNode(mItnlNode* pItnl, KeyType & key)
|
||||
{
|
||||
int _idelete = pItnl ->Delete(key);
|
||||
|
@ -984,11 +984,11 @@ bool BPlusTree::DeleteItnlNode(mItnlNode* pItnl, KeyType & key)
|
|||
cout << "run bug" << endl;
|
||||
return false;
|
||||
}
|
||||
//保存树结构
|
||||
//保存树结构
|
||||
/*
|
||||
* 保证两点:
|
||||
* 1、内存位标记都正确
|
||||
* 2、修改位标记都正确
|
||||
* 保证两点:
|
||||
* 1、内存位标记都正确
|
||||
* 2、修改位标记都正确
|
||||
*/
|
||||
void BPlusTree :: StoreTree()
|
||||
{
|
||||
|
@ -1004,7 +1004,7 @@ void BPlusTree :: StoreTree()
|
|||
{
|
||||
pNode = pQueue.front();
|
||||
pQueue.pop();
|
||||
// 内有unmodify
|
||||
// 内有unmodify
|
||||
if(pNode->getModify()){
|
||||
any = true;
|
||||
}
|
||||
|
@ -1026,7 +1026,7 @@ void BPlusTree :: StoreTree()
|
|||
}
|
||||
}
|
||||
cout << endl;
|
||||
char _queuefile[1024];
|
||||
char _queuefile[50];
|
||||
strcpy(_queuefile, mTreeName);
|
||||
strcat(_queuefile, "_queue.btree");
|
||||
this ->mblockQueue.WriteQueue(_queuefile);
|
||||
|
@ -1039,7 +1039,7 @@ void BPlusTree :: StoreTree()
|
|||
}
|
||||
|
||||
//
|
||||
//打印树
|
||||
//打印树
|
||||
void BPlusTree :: PrintTree()
|
||||
{
|
||||
queue<mNode *> pQueue[100];
|
||||
|
@ -1083,18 +1083,18 @@ void BPlusTree :: PrintTree()
|
|||
}
|
||||
|
||||
/*
|
||||
* 新建节点需要提供的信息:
|
||||
* 节点的首块地址
|
||||
* 读出type int
|
||||
* 读出count int
|
||||
* 读出count_block int
|
||||
* 读出blocklink mBlockLink
|
||||
* 新建节点需要提供的信息:
|
||||
* 节点的首块地址
|
||||
* 读出type int
|
||||
* 读出count int
|
||||
* 读出count_block int
|
||||
* 读出blocklink mBlockLink
|
||||
*
|
||||
* 读出各元素后, 通过下块地址转移, 转以后读入块元素个数, 再读入块链接
|
||||
* 读出各元素后, 通过下块地址转移, 转以后读入块元素个数, 再读入块链接
|
||||
*
|
||||
*/
|
||||
|
||||
mNode * ReadNode(FILE * fp, long long _addrfb)//每个新建的节点都有分配的或上层节点给予的首块存储区地址
|
||||
mNode * ReadNode(FILE * fp, long long _addrfb)//每个新建的节点都有分配的或上层节点给予的首块存储区地址
|
||||
{
|
||||
fseek(fp, _addrfb, SEEK_SET);
|
||||
int _type_tmp;
|
||||
|
@ -1118,25 +1118,25 @@ mNode * ReadNode(FILE * fp, long long _addrfb)//每个新建的节点都有分
|
|||
_pLeaf ->unModify();
|
||||
_pLeaf ->setAddrFB(_addrfb);
|
||||
fread(&(_pLeaf ->leafLink), sizeof(_pLeaf ->leafLink), 1, fp);
|
||||
/*
|
||||
* while cycle里
|
||||
* for cycle
|
||||
* 通过块内元素个数读入
|
||||
*/
|
||||
/*
|
||||
* while cycle里
|
||||
* for cycle
|
||||
* 通过块内元素个数读入
|
||||
*/
|
||||
|
||||
/*
|
||||
* 读完后, 先记录当前独到节点的第几个元素
|
||||
* 文件指针偏移至指示的下一块
|
||||
* 再读入块内元素个数及上下块结构体变量
|
||||
* 继续读入, 直到元素个数达到count值
|
||||
*/
|
||||
/*
|
||||
* 先把块的剩余空间写入 -1, 再写入len(便于准确申请空间), sizeleft 减去三个4(12字节, “-1”“len”“sizeleft自身”),
|
||||
* 写入sizeleft: 到时需要分段读入 然后申请块, 在将块链接写入之前位置
|
||||
* 写完剩余的空间后, 调整buffer及其size
|
||||
* 然后每多申请一块则先跳过一个整型以及快链接结构体的大小, 以备写入当前块存放的长度buffer(考虑最后一块好处理),以及块链接
|
||||
* 写入剩余的buffer后回写长度及块链接, 若是最后一块, 要注意整理: fp指针 & sizeleft;
|
||||
*/
|
||||
/*
|
||||
* 读完后, 先记录当前独到节点的第几个元素
|
||||
* 文件指针偏移至指示的下一块
|
||||
* 再读入块内元素个数及上下块结构体变量
|
||||
* 继续读入, 直到元素个数达到count值
|
||||
*/
|
||||
/*
|
||||
* 先把块的剩余空间写入 -1, 再写入len(便于准确申请空间), sizeleft 减去三个4(12字节, “-1”“len”“sizeleft自身”),
|
||||
* 写入sizeleft: 到时需要分段读入 然后申请块, 在将块链接写入之前位置
|
||||
* 写完剩余的空间后, 调整buffer及其size
|
||||
* 然后每多申请一块则先跳过一个整型以及快链接结构体的大小, 以备写入当前块存放的长度buffer(考虑最后一块好处理),以及块链接
|
||||
* 写入剩余的buffer后回写长度及块链接, 若是最后一块, 要注意整理: fp指针 & sizeleft;
|
||||
*/
|
||||
bool ReadIn = true;
|
||||
while(_i_tmp <= _count_tmp)
|
||||
{
|
||||
|
@ -1199,7 +1199,7 @@ mNode * ReadNode(FILE * fp, long long _addrfb)//每个新建的节点都有分
|
|||
mItnlNode * _pItnl = new mItnlNode;
|
||||
_pItnl ->setCount(_count_tmp);
|
||||
_pItnl ->unModify();
|
||||
// 后加, 未知之前没有设地址的原因
|
||||
// 后加, 未知之前没有设地址的原因
|
||||
_pItnl ->setAddrFB(_addrfb);
|
||||
while(_i_tmp <= _count_tmp)
|
||||
{
|
||||
|
@ -1262,7 +1262,7 @@ void WriteNode(mNode * pNode, FILE * fp, mQueue & _mqueue)
|
|||
fwrite(&(_count_tmp), size_count, 1, fp);
|
||||
_size_left -= size_count;//--------------------------------int
|
||||
|
||||
long long int _addr_blocklink = ftell(fp); // 可能产生问题
|
||||
long long int _addr_blocklink = ftell(fp); // 可能产生问题
|
||||
fseek(fp, size_int + size_blocklink, SEEK_CUR);
|
||||
_size_left -= size_int + size_blocklink;//------------------int + long_long_int*2
|
||||
|
||||
|
@ -1284,20 +1284,20 @@ void WriteNode(mNode * pNode, FILE * fp, mQueue & _mqueue)
|
|||
mBlockLink _blocklink;
|
||||
for(; _i_tmp <= _count; _i_tmp ++)
|
||||
{
|
||||
/*
|
||||
* 传入剩余块大小, 若当前元素能够继续写入则写入并返回true, 否则不写入并返回false;
|
||||
* 如果返回false则先返写块容元素个数 及上下块的联系_ _addr_blocklink,
|
||||
* 再申请新块并继续写入: 恢复_size_left, 写入mblocklink
|
||||
* 修改上下块链接
|
||||
* 改成块内元素个数更好
|
||||
* 调整文件指针, 块内元素归零
|
||||
*/
|
||||
/*
|
||||
* 传入剩余块大小, 若当前元素能够继续写入则写入并返回true, 否则不写入并返回false;
|
||||
* 如果返回false则先返写块容元素个数 及上下块的联系_ _addr_blocklink,
|
||||
* 再申请新块并继续写入: 恢复_size_left, 写入mblocklink
|
||||
* 修改上下块链接
|
||||
* 改成块内元素个数更好
|
||||
* 调整文件指针, 块内元素归零
|
||||
*/
|
||||
int size_key = -1;
|
||||
int size_val = -1;
|
||||
int _tmp_i = -1;
|
||||
|
||||
bool_WriteIn = _pLeaf ->LeafData[_i_tmp].Write_mleafdata(fp, _size_left, size_key, size_val);
|
||||
if(!bool_WriteIn) //确定不可能有单一元素超过4K, 可能产生问题
|
||||
if(!bool_WriteIn) //确定不可能有单一元素超过4K, 可能产生问题
|
||||
{
|
||||
if(size_key + size_val > BLOCKSIZE - size_int*3 - size_lli*4
|
||||
&& _size_left > size_key + size_int * 3 + 1)
|
||||
|
@ -1324,9 +1324,9 @@ void WriteNode(mNode * pNode, FILE * fp, mQueue & _mqueue)
|
|||
// _curblockaddr = _mqueue.Pop();
|
||||
_blocklink.mBlockLink_Initial();
|
||||
_blocklink.preBlockAddr = _preblockaddr;
|
||||
_preblockaddr = _curblockaddr; // 新块的上一块就是当前块(首块指向-1)
|
||||
_curblockaddr = _mqueue.Pop(); //当前块变量再调整为指向新块
|
||||
_blocklink.nextBlockAddr = _curblockaddr; // 上一块的下一块即为新块
|
||||
_preblockaddr = _curblockaddr; // 新块的上一块就是当前块(首块指向-1)
|
||||
_curblockaddr = _mqueue.Pop(); //当前块变量再调整为指向新块
|
||||
_blocklink.nextBlockAddr = _curblockaddr; // 上一块的下一块即为新块
|
||||
|
||||
fseek(fp, _addr_blocklink, SEEK_SET);
|
||||
nEle_inBlock ++;
|
||||
|
@ -1335,7 +1335,7 @@ void WriteNode(mNode * pNode, FILE * fp, mQueue & _mqueue)
|
|||
fwrite(&_blocklink, size_blocklink, 1, fp);
|
||||
|
||||
fseek(fp, _curblockaddr, SEEK_SET);
|
||||
_addr_blocklink = _curblockaddr;//调整回写偏移
|
||||
_addr_blocklink = _curblockaddr;//调整回写偏移
|
||||
fseek(fp, size_blocklink + size_int, SEEK_CUR);
|
||||
|
||||
//write back blocklink
|
||||
|
@ -1349,16 +1349,16 @@ void WriteNode(mNode * pNode, FILE * fp, mQueue & _mqueue)
|
|||
|
||||
_blocklink.mBlockLink_Initial();
|
||||
_blocklink.preBlockAddr = _preblockaddr;
|
||||
_preblockaddr = _curblockaddr; // 新块的上一块就是当前块(首块指向-1)
|
||||
_curblockaddr = _mqueue.Pop(); //当前块变量再调整为指向新块
|
||||
_blocklink.nextBlockAddr = _curblockaddr; // 上一块的下一块即为新块
|
||||
_preblockaddr = _curblockaddr; // 新块的上一块就是当前块(首块指向-1)
|
||||
_curblockaddr = _mqueue.Pop(); //当前块变量再调整为指向新块
|
||||
_blocklink.nextBlockAddr = _curblockaddr; // 上一块的下一块即为新块
|
||||
|
||||
fseek(fp, _addr_blocklink, SEEK_SET);
|
||||
fwrite(&_block_hold, size_int, 1, fp);
|
||||
fwrite(&_blocklink, size_blocklink, 1, fp);
|
||||
|
||||
fseek(fp, _curblockaddr, SEEK_SET);
|
||||
_addr_blocklink = _curblockaddr;//调整回写偏移
|
||||
_addr_blocklink = _curblockaddr;//调整回写偏移
|
||||
fseek(fp, size_blocklink + size_int, SEEK_CUR);
|
||||
}
|
||||
|
||||
|
@ -1372,36 +1372,36 @@ void WriteNode(mNode * pNode, FILE * fp, mQueue & _mqueue)
|
|||
// _size_left = BLOCKSIZE - _size_buffer - size_int - size_blocklink;
|
||||
_size_left = -1;
|
||||
first_int = _size_buffer;
|
||||
/*
|
||||
* 先把块的剩余空间写入 -1, 再写入len(便于准确申请空间), sizeleft 减去三个4(12字节, “-1”“len”“sizeleft自身”),
|
||||
* 写入sizeleft: 到时需要分段读入 然后申请块, 在将块链接写入之前位置
|
||||
* 写完剩余的空间后, 调整buffer及其size
|
||||
* 然后每多申请一块则先跳过一个整型以及快链接结构体的大小, 以备写入当前块存放的长度buffer(考虑最后一块好处理),以及块链接
|
||||
* 写入剩余的buffer后回写长度及块链接, 若是最后一块, 要注意整理: fp指针 & sizeleft;
|
||||
*/
|
||||
/*
|
||||
* 先把块的剩余空间写入 -1, 再写入len(便于准确申请空间), sizeleft 减去三个4(12字节, “-1”“len”“sizeleft自身”),
|
||||
* 写入sizeleft: 到时需要分段读入 然后申请块, 在将块链接写入之前位置
|
||||
* 写完剩余的空间后, 调整buffer及其size
|
||||
* 然后每多申请一块则先跳过一个整型以及快链接结构体的大小, 以备写入当前块存放的长度buffer(考虑最后一块好处理),以及块链接
|
||||
* 写入剩余的buffer后回写长度及块链接, 若是最后一块, 要注意整理: fp指针 & sizeleft;
|
||||
*/
|
||||
}
|
||||
else
|
||||
{
|
||||
_i_tmp --;
|
||||
_size_left = BLOCKSIZE; //<EFBFBD>ظ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
_size_left = BLOCKSIZE; //回复块内容量
|
||||
|
||||
|
||||
|
||||
_blocklink.mBlockLink_Initial();
|
||||
_blocklink.preBlockAddr = _preblockaddr;
|
||||
_preblockaddr = _curblockaddr; // 新块的上一块就是当前块(首块指向-1)
|
||||
_curblockaddr = _mqueue.Pop(); //当前块变量再调整为指向新块
|
||||
_blocklink.nextBlockAddr = _curblockaddr; // 上一块的下一块即为新块
|
||||
/*
|
||||
* 写回块内元素个数以及快链接
|
||||
*/
|
||||
_preblockaddr = _curblockaddr; // 新块的上一块就是当前块(首块指向-1)
|
||||
_curblockaddr = _mqueue.Pop(); //当前块变量再调整为指向新块
|
||||
_blocklink.nextBlockAddr = _curblockaddr; // 上一块的下一块即为新块
|
||||
/*
|
||||
* 写回块内元素个数以及快链接
|
||||
*/
|
||||
fseek(fp, _addr_blocklink, SEEK_SET);
|
||||
int size_ele_inblock = sizeof(nEle_inBlock);
|
||||
fwrite(&first_int, size_ele_inblock, 1, fp);
|
||||
fwrite(&_blocklink, size_blocklink, 1, fp);
|
||||
|
||||
fseek(fp, _curblockaddr, SEEK_SET);
|
||||
_addr_blocklink = _curblockaddr;//调整回写偏移
|
||||
_addr_blocklink = _curblockaddr;//调整回写偏移
|
||||
fseek(fp, size_ele_inblock + size_blocklink, SEEK_CUR);
|
||||
_size_left -= size_ele_inblock + size_blocklink;//===========int + lli * 2
|
||||
|
||||
|
@ -1415,13 +1415,13 @@ void WriteNode(mNode * pNode, FILE * fp, mQueue & _mqueue)
|
|||
}// if write_in
|
||||
|
||||
}// for count
|
||||
/*
|
||||
* 最后一块的处理
|
||||
*/
|
||||
/*
|
||||
* 最后一块的处理
|
||||
*/
|
||||
|
||||
_blocklink.mBlockLink_Initial();
|
||||
_blocklink.preBlockAddr = _preblockaddr;
|
||||
_blocklink.nextBlockAddr = -1; // 上一块的下一块即为新块
|
||||
_blocklink.nextBlockAddr = -1; // 上一块的下一块即为新块
|
||||
fseek(fp, _addr_blocklink, SEEK_SET);
|
||||
fwrite(&first_int, size_int, 1, fp);
|
||||
fwrite(&_blocklink, size_blocklink, 1, fp);
|
||||
|
@ -1440,37 +1440,37 @@ void WriteNode(mNode * pNode, FILE * fp, mQueue & _mqueue)
|
|||
mBlockLink _blocklink;
|
||||
for(; _i_tmp <= _count; _i_tmp ++)
|
||||
{
|
||||
/*
|
||||
* 传入剩余块大小, 若当前元素能够继续写入则写入并返回true, 否则不写入并返回false;
|
||||
* 如果返回false则先返写块容元素个数 及上下块的联系_ _addr_blocklink,
|
||||
* 再申请新块并继续写入: 恢复_size_left, 写入mblocklink
|
||||
* 修改上下块链接
|
||||
* 改成块内元素个数更好
|
||||
* 调整文件指针, 块内元素归零
|
||||
*/
|
||||
/*
|
||||
* 传入剩余块大小, 若当前元素能够继续写入则写入并返回true, 否则不写入并返回false;
|
||||
* 如果返回false则先返写块容元素个数 及上下块的联系_ _addr_blocklink,
|
||||
* 再申请新块并继续写入: 恢复_size_left, 写入mblocklink
|
||||
* 修改上下块链接
|
||||
* 改成块内元素个数更好
|
||||
* 调整文件指针, 块内元素归零
|
||||
*/
|
||||
bool_WriteIn = _pItnl ->ItnlData[_i_tmp].Write_mitnldata(fp, _size_left);
|
||||
if(!bool_WriteIn) //确定不可能有单一元素超过4K, 可能产生问题
|
||||
if(!bool_WriteIn) //确定不可能有单一元素超过4K, 可能产生问题
|
||||
{
|
||||
_i_tmp --;
|
||||
_size_left = BLOCKSIZE; //回复块内容量
|
||||
_size_left = BLOCKSIZE; //回复块内容量
|
||||
|
||||
|
||||
|
||||
_blocklink.mBlockLink_Initial();
|
||||
_blocklink.preBlockAddr = _preblockaddr;
|
||||
_preblockaddr = _curblockaddr; // 新块的上一块就是当前块(首块指向-1)
|
||||
_curblockaddr = _mqueue.Pop(); //当前块变量再调整为指向新块
|
||||
_blocklink.nextBlockAddr = _curblockaddr; // 上一块的下一块即为新块
|
||||
/*
|
||||
* 写回块内元素个数以及块链接
|
||||
*/
|
||||
_preblockaddr = _curblockaddr; // 新块的上一块就是当前块(首块指向-1)
|
||||
_curblockaddr = _mqueue.Pop(); //当前块变量再调整为指向新块
|
||||
_blocklink.nextBlockAddr = _curblockaddr; // 上一块的下一块即为新块
|
||||
/*
|
||||
* 写回块内元素个数以及块链接
|
||||
*/
|
||||
fseek(fp, _addr_blocklink, SEEK_SET);
|
||||
int size_ele_inblock = sizeof(nEle_inBlock);
|
||||
fwrite(&nEle_inBlock, size_ele_inblock, 1, fp);
|
||||
fwrite(&_blocklink, size_blocklink, 1, fp);
|
||||
|
||||
fseek(fp, _curblockaddr, SEEK_SET);
|
||||
_addr_blocklink = _curblockaddr;//调整回写偏移
|
||||
_addr_blocklink = _curblockaddr;//调整回写偏移
|
||||
fseek(fp, size_ele_inblock + size_blocklink, SEEK_CUR);
|
||||
_size_left -= size_ele_inblock + size_blocklink;
|
||||
|
||||
|
@ -1481,13 +1481,13 @@ void WriteNode(mNode * pNode, FILE * fp, mQueue & _mqueue)
|
|||
nEle_inBlock ++;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* 最后一块的处理
|
||||
*/
|
||||
/*
|
||||
* 最后一块的处理
|
||||
*/
|
||||
|
||||
_blocklink.mBlockLink_Initial();
|
||||
_blocklink.preBlockAddr = _preblockaddr;
|
||||
_blocklink.nextBlockAddr = -1; // 上一块的下一块即为新块
|
||||
_blocklink.nextBlockAddr = -1; // 上一块的下一块即为新块
|
||||
fseek(fp, _addr_blocklink, SEEK_SET);
|
||||
int size_ele_inblock = sizeof(nEle_inBlock);
|
||||
fwrite(&nEle_inBlock, size_ele_inblock, 1, fp);
|
||||
|
@ -1517,7 +1517,7 @@ void DelDisk(FILE * fp, long long int _addrfb, mQueue & _mqueue)
|
|||
long long int BackAddr[1000] = {};
|
||||
vector<long long int> BackVec;
|
||||
// BackAddr[_i_tmp] = _addrfb;
|
||||
// _i_tmp ++; 首块地址绝非要一起回收, 在delete实现之后首块地址在适时push的!!
|
||||
// _i_tmp ++; 首块地址绝非要一起回收, 在delete实现之后首块地址在适时push的!!
|
||||
while(_curblocklink.nextBlockAddr != -1)
|
||||
{
|
||||
{
|
||||
|
@ -1582,7 +1582,7 @@ bool Delete_Key_PartVal(char keyStr[], int keyLen, char partVal[], BPlusTree * _
|
|||
return dRet;
|
||||
}
|
||||
|
||||
// 以下为四个对应的删除实现, 调用了上面两个函数;
|
||||
// 以下为四个对应的删除实现, 调用了上面两个函数;
|
||||
bool Delete_sID2sub(int _sID, BPlusTree * _p_sID2sub)
|
||||
{
|
||||
char * sid2str = new char[5];
|
||||
|
@ -1741,9 +1741,9 @@ void mitnldata::Read_mitnldata(FILE * fp)
|
|||
}
|
||||
bool mitnldata::Write_mitnldata( FILE * fp, int & _size_left )
|
||||
{
|
||||
/*
|
||||
* KeyType 需要由函数返回写入的空间大小
|
||||
*/
|
||||
/*
|
||||
* KeyType 需要由函数返回写入的空间大小
|
||||
*/
|
||||
int size_lli = sizeof(long long int);
|
||||
int size_mKey = mKey.WriteSize();
|
||||
if(_size_left < size_lli + size_mKey) return false;
|
||||
|
@ -1815,7 +1815,7 @@ BPlusTree::BPlusTree(const char * const_tree_name, const char * _build_or_open)
|
|||
}
|
||||
this ->Initial();
|
||||
insert_count = 0;
|
||||
char _tree_name[1024];
|
||||
char _tree_name[1000];
|
||||
memcpy(_tree_name, const_tree_name, (int)strlen(const_tree_name));
|
||||
_tree_name[(int)strlen(const_tree_name)] = '\0';
|
||||
int _key_chose;
|
||||
|
@ -1843,7 +1843,7 @@ BPlusTree::BPlusTree(const char * const_tree_name, const char * _build_or_open)
|
|||
mRoot = new mLeafNode;
|
||||
mRoot ->setAddrFB( 0 );
|
||||
printf("the tree called: %s\n", mTreeName);
|
||||
char _treefile[1024];
|
||||
char _treefile[500];
|
||||
strcpy(_treefile, mTreeName);
|
||||
strcat(_treefile, ".btree");
|
||||
if((mfp = fopen(_treefile, "wb+")) == NULL)
|
||||
|
@ -1874,7 +1874,7 @@ BPlusTree::BPlusTree(const char * const_tree_name, const char * _build_or_open)
|
|||
break;
|
||||
}
|
||||
}
|
||||
char _queuefile[1024];
|
||||
char _queuefile[500];
|
||||
strcpy(_queuefile, mTreeName);
|
||||
strcat(_queuefile, "_queue.btree");
|
||||
mblockQueue.ReadQueue(_queuefile);
|
|
@ -25,10 +25,10 @@
|
|||
#include<set>
|
||||
|
||||
#define BLOCKSIZE (1 << 14)/* 16K */
|
||||
#define ORDER_V 128 /* 为简单起见,把v固定为2,实际的B+树v值应该是可配的 */
|
||||
#define MAXNUM_KEY (ORDER_V * 2) /* 内部结点中最多键个数,为2v ( 1~2v )*/
|
||||
#define MAXNUM_POINTER (ORDER_V * 2 + 1) /* 内部结点中最多指向子树的指针个数,为2v ( 1~2v )*/
|
||||
#define MAXNUM_DATA (ORDER_V * 2 + 1) /* 结点中用作定义,为2v ( 1~2v )*/
|
||||
#define ORDER_V 128 /* 为简单起见,把v固定为2,实际的B+树v值应该是可配的 */
|
||||
#define MAXNUM_KEY (ORDER_V * 2) /* 内部结点中最多键个数,为2v ( 1~2v )*/
|
||||
#define MAXNUM_POINTER (ORDER_V * 2 + 1) /* 内部结点中最多指向子树的指针个数,为2v ( 1~2v )*/
|
||||
#define MAXNUM_DATA (ORDER_V * 2 + 1) /* 结点中用作定义,为2v ( 1~2v )*/
|
||||
#define TERM_NUMBER 1
|
||||
|
||||
#define FLAG_LEFT 5
|
||||
|
@ -49,8 +49,8 @@ using namespace std;
|
|||
|
||||
enum NODE_TYPE
|
||||
{
|
||||
NODE_TYPE_INTERNAL = 2, // 内部结点
|
||||
NODE_TYPE_LEAF = 3, // 叶子结点
|
||||
NODE_TYPE_INTERNAL = 2, // 内部结点
|
||||
NODE_TYPE_LEAF = 3, // 叶子结点
|
||||
};
|
||||
enum eletype
|
||||
{
|
||||
|
@ -73,7 +73,7 @@ public:
|
|||
bool is_AtMem;
|
||||
char* sKey;
|
||||
int iKey;
|
||||
int mLenKey; //关于是否读入\n的试验
|
||||
int mLenKey; //关于是否读入\n的试验
|
||||
KeyType()
|
||||
{
|
||||
KeyType_Initial();
|
||||
|
@ -179,9 +179,9 @@ public:
|
|||
void ReadKey(FILE * fp);
|
||||
|
||||
void WriteKey(FILE * fp);
|
||||
/*
|
||||
* 合适的功能函数
|
||||
*/
|
||||
/*
|
||||
* 合适的功能函数
|
||||
*/
|
||||
int WriteSize()
|
||||
{
|
||||
int size_mLenKey = sizeof(mLenKey);
|
||||
|
@ -524,7 +524,7 @@ public:
|
|||
{
|
||||
Term[0][_tag] = '\0';
|
||||
lenTerm[0] -= sizeof(int) + sizeof(char);
|
||||
//只有一个元素时很特殊, 长度是减少4, 其余减少5
|
||||
//只有一个元素时很特殊, 长度是减少4, 其余减少5
|
||||
if(lenTerm[0] <= 0) flag = FLAG_ZERO;
|
||||
else flag = FLAG_NO_ZERO;
|
||||
return true;
|
||||
|
@ -630,10 +630,9 @@ public:
|
|||
class mQueue
|
||||
{
|
||||
public:
|
||||
// static const int qLenth = 5243005; /* 5*1024*1024 */
|
||||
static const int qLenth = 20971520; /* 20*1024*1024 */
|
||||
static const int qLenth = 5243005;
|
||||
int qUsed;
|
||||
bool qAvailable[mQueue::qLenth];
|
||||
bool qAvailable[mQueue::qLenth];/* 5*1024*1024 */
|
||||
|
||||
public:
|
||||
mQueue()
|
||||
|
@ -788,7 +787,7 @@ public:
|
|||
mNode * _pBrother = NULL;
|
||||
for(int i = 1; i <= _pFather ->getCount(); i ++)
|
||||
{
|
||||
//ָ<EFBFBD><EFBFBD>ƥ<EFBFBD><EFBFBD>
|
||||
//指针匹配
|
||||
if(_pFather ->getPointer(i) == this)
|
||||
{
|
||||
if(i == (_pFather ->getCount()) + 1)
|
||||
|
@ -974,7 +973,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
//此函数需仔细考虑~~!!
|
||||
//此函数需仔细考虑~~!!
|
||||
int iExist(const KeyType &_keytype)
|
||||
{
|
||||
int _ibegin = 1, _iend = getCount();
|
||||
|
@ -992,7 +991,7 @@ public:
|
|||
}
|
||||
|
||||
if(_ibegin == _iend - 1) return -1;
|
||||
// 后加可能有风险
|
||||
// 后加可能有风险
|
||||
|
||||
if(ItnlData[_imiddle].mKey > _keytype)
|
||||
{
|
||||
|
@ -1130,17 +1129,17 @@ public:
|
|||
printf(" == ");
|
||||
}
|
||||
|
||||
// 插入键
|
||||
// 最左端递归向上
|
||||
bool Insert(mNode* _pmnode);
|
||||
// 删除键
|
||||
int Delete(const KeyType & _keytype);
|
||||
// 分裂结点
|
||||
KeyType & Split(mItnlNode* _mitnlnode);
|
||||
// 结合结点
|
||||
bool Combine(mItnlNode * _pmnode);
|
||||
// 从另一结点移一个元素到本结点
|
||||
bool MoveOneElement(mNode * _pmnode);
|
||||
// 插入键
|
||||
// 最左端递归向上
|
||||
bool Insert(mNode* _pmnode);
|
||||
// 删除键
|
||||
int Delete(const KeyType & _keytype);
|
||||
// 分裂结点
|
||||
KeyType & Split(mItnlNode* _mitnlnode);
|
||||
// 结合结点
|
||||
bool Combine(mItnlNode * _pmnode);
|
||||
// 从另一结点移一个元素到本结点
|
||||
bool MoveOneElement(mNode * _pmnode);
|
||||
|
||||
};
|
||||
|
||||
|
@ -1434,7 +1433,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
// 此两个函数对叶节点无意义
|
||||
// 此两个函数对叶节点无意义
|
||||
mNode * getPointer(int _i)
|
||||
{
|
||||
return NULL;
|
||||
|
@ -1480,7 +1479,7 @@ public:
|
|||
}
|
||||
return -1;
|
||||
}
|
||||
// 考虑cout = 0的情况
|
||||
// 考虑cout = 0的情况
|
||||
int iInsert(const KeyType & _keytype)
|
||||
{
|
||||
int _ibegin = 1, _iend = getCount();
|
||||
|
@ -1555,19 +1554,19 @@ public:
|
|||
|
||||
|
||||
|
||||
// 插入数据
|
||||
// 最左端递归向上
|
||||
bool Insert(const mleafdata & _leafdata);
|
||||
// 删除数据
|
||||
int Delete(KeyType & _keytype);
|
||||
//重载delete partval
|
||||
int Delete(KeyType & _keytype, char partval[], int & pvFlag);
|
||||
// 分裂结点
|
||||
KeyType & Split(mLeafNode* _mpnode);
|
||||
// 结合结点
|
||||
bool Combine(mLeafNode* _mpnode);
|
||||
// 重复插入
|
||||
bool dupInsert(const mleafdata & _leafdata, int _index_insert);
|
||||
// 插入数据
|
||||
// 最左端递归向上
|
||||
bool Insert(const mleafdata & _leafdata);
|
||||
// 删除数据
|
||||
int Delete(KeyType & _keytype);
|
||||
//重载delete partval
|
||||
int Delete(KeyType & _keytype, char partval[], int & pvFlag);
|
||||
// 分裂结点
|
||||
KeyType & Split(mLeafNode* _mpnode);
|
||||
// 结合结点
|
||||
bool Combine(mLeafNode* _mpnode);
|
||||
// 重复插入
|
||||
bool dupInsert(const mleafdata & _leafdata, int _index_insert);
|
||||
};
|
||||
|
||||
extern mNode* ReadNode(FILE * fp, long long int _addr);
|
||||
|
@ -1584,19 +1583,19 @@ extern bool Delete_obj2sID(char _obj_str[], int _del_sID, BPlusTree * _p_obj2sID
|
|||
extern bool Delete_objpID2sID(char _obj_str[], int _pID, int _del_sID, BPlusTree * _p_objpID2sID);
|
||||
|
||||
|
||||
/* B+树数据结构 */
|
||||
/* B+树数据结构 */
|
||||
class BPlusTree
|
||||
{
|
||||
public:
|
||||
|
||||
// 以下两个变量用于实现双向链表
|
||||
mLeafNode* pmLeafHead; // 头结点
|
||||
mLeafNode* pmLeafTail; // 尾结点
|
||||
mNode * mRoot; // 根结点
|
||||
// 以下两个变量用于实现双向链表
|
||||
mLeafNode* pmLeafHead; // 头结点
|
||||
mLeafNode* pmLeafTail; // 尾结点
|
||||
mNode * mRoot; // 根结点
|
||||
mQueue mblockQueue;
|
||||
FILE * mfp;
|
||||
char mTreeName[1024];
|
||||
int mDepth; // 树的深度
|
||||
char mTreeName[55];
|
||||
int mDepth; // 树的深度
|
||||
int insert_count;
|
||||
|
||||
void Initial();
|
||||
|
@ -1606,33 +1605,33 @@ public:
|
|||
void log(const char* _log)const;
|
||||
void forcheck();
|
||||
|
||||
// 获取和设置根结点
|
||||
mNode * getRoot() { return mRoot; }
|
||||
// 获取和设置根结点
|
||||
mNode * getRoot() { return mRoot; }
|
||||
|
||||
void setRoot(mNode * root) { mRoot = root; }
|
||||
void Flush();
|
||||
// 为插入而查找叶子结点
|
||||
mLeafNode * SearchLeafNode(const KeyType & data)const;
|
||||
//插入键到中间结点
|
||||
bool InsertItnlNode(mItnlNode * pNode, mNode * pSon);
|
||||
// 在中间结点中删除键
|
||||
bool DeleteItnlNode(mItnlNode * pNode, KeyType & key);
|
||||
// 查找指定的数据
|
||||
bool Search(KeyType & data, mleafdata & _ret);
|
||||
// 插入指定的数据
|
||||
bool Insert(const mleafdata & _mleafdata);
|
||||
// 删除指定的数据
|
||||
bool Delete(KeyType & data);
|
||||
// 重载删除函数
|
||||
bool Delete(KeyType & data, char PartVal[]);
|
||||
// 清除树
|
||||
void ClearTree();
|
||||
// 打印树
|
||||
void PrintTree();
|
||||
//读出根节点
|
||||
void ReadRoot();
|
||||
//保存树结构
|
||||
void StoreTree();
|
||||
void setRoot(mNode * root) { mRoot = root; }
|
||||
void Flush();
|
||||
// 为插入而查找叶子结点
|
||||
mLeafNode * SearchLeafNode(const KeyType & data)const;
|
||||
//插入键到中间结点
|
||||
bool InsertItnlNode(mItnlNode * pNode, mNode * pSon);
|
||||
// 在中间结点中删除键
|
||||
bool DeleteItnlNode(mItnlNode * pNode, KeyType & key);
|
||||
// 查找指定的数据
|
||||
bool Search(KeyType & data, mleafdata & _ret);
|
||||
// 插入指定的数据
|
||||
bool Insert(const mleafdata & _mleafdata);
|
||||
// 删除指定的数据
|
||||
bool Delete(KeyType & data);
|
||||
// 重载删除函数
|
||||
bool Delete(KeyType & data, char PartVal[]);
|
||||
// 清除树
|
||||
void ClearTree();
|
||||
// 打印树
|
||||
void PrintTree();
|
||||
//读出根节点
|
||||
void ReadRoot();
|
||||
//保存树结构
|
||||
void StoreTree();
|
||||
|
||||
};
|
||||
#endif /* CBTREE_H_ */
|
1007
KVstore/KVstore.cpp
1007
KVstore/KVstore.cpp
File diff suppressed because it is too large
Load Diff
|
@ -1,23 +1,22 @@
|
|||
/*=============================================================================
|
||||
# Filename: KVstore.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-23 14:23
|
||||
# Description:
|
||||
=============================================================================*/
|
||||
/*
|
||||
* KVstore.h
|
||||
*
|
||||
* Created on: 2014-5-8
|
||||
* Author: liyouhuan
|
||||
*/
|
||||
|
||||
#ifndef _KVSTORE_KVSTORE_H
|
||||
#define _KVSTORE_KVSTORE_H
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "tree/Tree.h"
|
||||
|
||||
//TODO:add debug instruction, control if using the so2p index and p2so index
|
||||
//these are really costly
|
||||
|
||||
class KVstore
|
||||
{
|
||||
#ifndef KVSTORE_H_
|
||||
#define KVSTORE_H_
|
||||
#include<iostream>
|
||||
#include<string.h>
|
||||
#include<sys/stat.h>
|
||||
#include<dirent.h>
|
||||
#include"Btree.h"
|
||||
using namespace std;
|
||||
class KVstore{
|
||||
public:
|
||||
static const bool debug_mode = false;
|
||||
static const bool test = false;
|
||||
static const int READ_WRITE_MODE = 1;
|
||||
static const int CREATE_MODE = 2;
|
||||
|
||||
|
@ -47,118 +46,73 @@ private:
|
|||
bool remove_xy(int*& _xylist, int& _list_len,int _x_id, int _y_id);
|
||||
|
||||
public:
|
||||
//for entity2id
|
||||
|
||||
/* for entity2id */
|
||||
bool open_entity2id(const int _mode);
|
||||
int getIDByEntity(const std::string _entity);
|
||||
bool setIDByEntity(const std::string _entity, int _id);
|
||||
int getIDByEntity(const string _entity);
|
||||
bool setIDByEntity(const string _entity, int _id);
|
||||
|
||||
|
||||
//for id2entity
|
||||
/* for id2entity */
|
||||
bool open_id2entity(const int _mode);
|
||||
std::string getEntityByID(int _id);
|
||||
bool setEntityByID(int _id, std::string _entity);
|
||||
string getEntityByID(int _id);
|
||||
bool setEntityByID(int _id, string _entity);
|
||||
|
||||
//for predicate2id
|
||||
/* for predicate2id */
|
||||
bool open_predicate2id(const int _mode);
|
||||
int getIDByPredicate(const std::string _predicate);
|
||||
bool setIDByPredicate(const std::string _predicate, int _id);
|
||||
int getIDByPredicate(const string _predicate);
|
||||
bool setIDByPredicate(const string _predicate, int _id);
|
||||
|
||||
//for id2predicate
|
||||
/* for id2predicate */
|
||||
bool open_id2predicate(const int _mode);
|
||||
std::string getPredicateByID(int _id);
|
||||
bool setPredicateByID(const int _id, std::string _predicate);
|
||||
string getPredicateByID(int _id);
|
||||
bool setPredicateByID(const int _id, string _predicate);
|
||||
|
||||
//for id2literal
|
||||
/* for id2literal */
|
||||
bool open_id2literal(const int _mode);
|
||||
std::string getLiteralByID(int _id);
|
||||
bool setLiteralByID(const int _id, std::string _literal);
|
||||
string getLiteralByID(int _id);
|
||||
bool setLiteralByID(const int _id, string _literal);
|
||||
|
||||
//for literal2id
|
||||
|
||||
/* for literal2id */
|
||||
bool open_literal2id(const int _mode);
|
||||
int getIDByLiteral(std::string _literal);
|
||||
bool setIDByLiteral(const std::string _literal, int _id);
|
||||
int getIDByLiteral(string _literal);
|
||||
bool setIDByLiteral(const string _literal, int _id);
|
||||
|
||||
//for subID 2 objIDlist
|
||||
bool open_subID2objIDlist(const int _mode);
|
||||
/* for subID 2 objIDlist */
|
||||
bool open_subid2objidlist(const int _mode);
|
||||
bool getobjIDlistBysubID(int _subid, int*& _objidlist, int& _list_len);
|
||||
bool setobjIDlistBysubID(int _subid, const int* _objidlist, int _list_len);
|
||||
|
||||
//for objID 2 subIDlist
|
||||
bool open_objID2subIDlist(const int _mode);
|
||||
/* for objID 2 subIDlist */
|
||||
bool open_objid2subidlist(const int _mode);
|
||||
bool getsubIDlistByobjID(int _objid, int*& _subidlist, int& _list_len);
|
||||
bool setsubIDlistByobjID(int _objid, const int* _subidlist, int _list_len);
|
||||
|
||||
//for subID&preID 2 objIDlist
|
||||
/* for subID&preID 2 objIDlist */
|
||||
bool open_subIDpreID2objIDlist(const int _mode);
|
||||
bool getobjIDlistBysubIDpreID(int _subid, int _preid, int*& _objidlist, int& _list_len);
|
||||
bool setobjIDlistBysubIDpreID(int _subid, int _preid, const int* _objidlist, int _list_len);
|
||||
|
||||
//for objID&preID 2 subIDlist
|
||||
/* for objID&preID 2 subIDlist */
|
||||
bool open_objIDpreID2subIDlist(const int _mode);
|
||||
bool getsubIDlistByobjIDpreID(int _objid, int _preid, int*& _subidlist, int& _list_len);
|
||||
bool setsubIDlistByobjIDpreID(int _objid, int _preid, const int* _subidlist, int _list_len);
|
||||
|
||||
//for subID 2 preID&objIDlist
|
||||
/* for subID 2 preID&objIDlist */
|
||||
bool open_subID2preIDobjIDlist(const int _mode);
|
||||
bool getpreIDobjIDlistBysubID(int _subid, int*& _preid_objidlist, int& _list_len);
|
||||
bool setpreIDobjIDlistBysubID(int _subid, const int* _preid_objidlist, int _list_len);
|
||||
|
||||
//for objID 2 preID&subIDlist
|
||||
/* for objID 2 preID&subIDlist */
|
||||
bool open_objID2preIDsubIDlist(const int _mode);
|
||||
bool getpreIDsubIDlistByobjID(int _objid, int*& _preid_subidlist, int& _list_len);
|
||||
bool setpreIDsubIDlistByobjID(int _objid, const int* _preid_subidlist, int _list_len);
|
||||
|
||||
//for subID 2 preIDlist
|
||||
bool open_subID2preIDlist(const int _mode);
|
||||
bool getpreIDlistBysubID(int _subid, int*& _preidlist, int& _list_len);
|
||||
bool setpreIDlistBysubID(int _subid, const int* _preidlist, int _list_len);
|
||||
|
||||
//for preID 2 subIDlist
|
||||
bool open_preID2subIDlist(const int _mode);
|
||||
bool getsubIDlistBypreID(int _preid, int*& _subidlist, int& _list_len);
|
||||
bool setsubIDlistBypreID(int _preid, const int* _subidlist, int _list_len);
|
||||
|
||||
//for objID 2 preIDlist
|
||||
bool open_objID2preIDlist(const int _mode);
|
||||
bool getpreIDlistByobjID(int _objid, int*& _preidlist, int& _list_len);
|
||||
bool setpreIDlistByobjID(int _objid, const int* _preidlist, int _list_len);
|
||||
|
||||
//for preID 2 objIDlist
|
||||
bool open_preID2objIDlist(const int _mode);
|
||||
bool getobjIDlistBypreID(int _preid, int*& _objidlist, int& _list_len);
|
||||
bool setobjIDlistBypreID(int _preid, const int* _objidlist, int _list_len);
|
||||
|
||||
//for subID&objID 2 preIDlist
|
||||
bool open_subIDobjID2preIDlist(const int _mode);
|
||||
bool getpreIDlistBysubIDobjID(int _subID, int _objID, int*& _preidlist, int& _list_len);
|
||||
bool setpreIDlistBysubIDobjID(int _subID, int _objID, const int* _preidlist, int _list_len);
|
||||
|
||||
//for preID 2 subID&objIDlist
|
||||
bool open_preID2subIDobjIDlist(const int _mode);
|
||||
bool getsubIDobjIDlistBypreID(int _preid, int*& _subid_objidlist, int& _list_len);
|
||||
bool setsubIDobjIDlistBypreID(int _preid, const int* _subid_objidlist, int _list_len);
|
||||
|
||||
//QUERY:is the below 3 indexes needed?
|
||||
//In fact, p2so can compute the num of triples if dividing so_len by 2
|
||||
//However, sometimes this can be very large and costly
|
||||
//For example, the predicate is <rdf:type>
|
||||
|
||||
//for predicate 2 triple num
|
||||
bool open_preID2num(const int _mode);
|
||||
int getNumBypreID(int _preid);
|
||||
bool setNumBypreID(int _preid, int _tripleNum);
|
||||
|
||||
//for subject&predicate 2 triple num
|
||||
bool open_subIDpreID2num(const int _mode);
|
||||
int getNumBysubIDpreID(int _subID, int _preID);
|
||||
bool setNumBysubIDpreID(int _subID, int _preID, int _tripleNum);
|
||||
|
||||
//for object&predicate 2 triple num
|
||||
bool open_objIDpreID2num(const int _mode);
|
||||
int getNumByobjIDpreID(int _objid, int _preid);
|
||||
bool setNumByobjIDpreID(int _objid, int _preid, int _tripleNum);
|
||||
|
||||
KVstore(std::string _store_path = ".");
|
||||
/*
|
||||
* _store_path denotes where to store the data
|
||||
*/
|
||||
KVstore(string _store_path = ".");
|
||||
~KVstore();
|
||||
void flush();
|
||||
void release();
|
||||
|
@ -166,74 +120,57 @@ public:
|
|||
|
||||
private:
|
||||
|
||||
std::string store_path;
|
||||
//map entity to its id, and id to the entity
|
||||
//s_entity2id is relative store file name
|
||||
Tree* entity2id;
|
||||
Tree* id2entity;
|
||||
static std::string s_entity2id;
|
||||
static std::string s_id2entity;
|
||||
string store_path;
|
||||
/*
|
||||
*
|
||||
* map entity to its id, and id to the entity
|
||||
* s_entity2id is relative store file name
|
||||
*/
|
||||
Btree* entity2id;
|
||||
Btree* id2entity;
|
||||
static string s_entity2id;
|
||||
static string s_id2entity;
|
||||
|
||||
Tree* predicate2id;
|
||||
Tree* id2predicate;
|
||||
static std::string s_predicate2id;
|
||||
static std::string s_id2predicate;
|
||||
Btree* predicate2id;
|
||||
Btree* id2predicate;
|
||||
static string s_predicate2id;
|
||||
static string s_id2predicate;
|
||||
|
||||
Tree* literal2id;
|
||||
Tree* id2literal;
|
||||
static std::string s_literal2id;
|
||||
static std::string s_id2literal;
|
||||
Btree* literal2id;
|
||||
Btree* id2literal;
|
||||
static string s_literal2id;
|
||||
static string s_id2literal;
|
||||
|
||||
|
||||
Tree* subID2objIDlist;
|
||||
Tree* objID2subIDlist;
|
||||
static std::string s_sID2oIDlist;
|
||||
static std::string s_oID2sIDlist;
|
||||
Btree* subID2objIDlist;
|
||||
Btree* objID2subIDlist;
|
||||
static string s_sID2oIDlist;
|
||||
static string s_oID2sIDlist;
|
||||
|
||||
//lack exist in update tuple
|
||||
Tree* subIDpreID2objIDlist;
|
||||
Tree* objIDpreID2subIDlist;
|
||||
static std::string s_sIDpID2oIDlist;
|
||||
static std::string s_oIDpID2sIDlist;
|
||||
/* lack exist in update tuple */
|
||||
Btree* subIDpreID2objIDlist;
|
||||
Btree* objIDpreID2subIDlist;
|
||||
static string s_sIDpID2oIDlist;
|
||||
static string s_oIDpID2sIDlist;
|
||||
|
||||
Tree* subID2preIDobjIDlist;
|
||||
Tree* objID2preIDsubIDlist;
|
||||
static std::string s_sID2pIDoIDlist;
|
||||
static std::string s_oID2pIDsIDlist;
|
||||
Btree* subID2preIDobjIDlist;
|
||||
Btree* objID2preIDsubIDlist;
|
||||
static string s_sID2pIDoIDlist;
|
||||
static string s_oID2pIDsIDlist;
|
||||
|
||||
Tree* subID2preIDlist;
|
||||
Tree* preID2subIDlist;
|
||||
static std::string s_sID2pIDlist;
|
||||
static std::string s_pID2sIDlist;
|
||||
|
||||
Tree* objID2preIDlist;
|
||||
Tree* preID2objIDlist;
|
||||
static std::string s_oID2pIDlist;
|
||||
static std::string s_pID2oIDlist;
|
||||
|
||||
Tree* subIDobjID2preIDlist;
|
||||
Tree* preID2subIDobjIDlist;
|
||||
static std::string s_sIDoID2pIDlist;
|
||||
static std::string s_pID2sIDoIDlist;
|
||||
|
||||
Tree* preID2num;
|
||||
Tree* subIDpreID2num;
|
||||
Tree* objIDpreID2num;
|
||||
static std::string s_pID2num;
|
||||
static std::string s_sIDpID2num;
|
||||
static std::string s_oIDpID2num;
|
||||
|
||||
void flush(Tree* _p_btree);
|
||||
bool setValueByKey(Tree* _p_btree, const char* _key, int _klen, const char* _val, int _vlen);
|
||||
bool getValueByKey(Tree* _p_btree, const char* _key, int _klen, char*& _val, int& _vlen);
|
||||
int getIDByStr(Tree* _p_btree, const char* _key, int _klen);
|
||||
bool removeKey(Tree* _p_btree, const char* _key, int _klen);
|
||||
void flush(Btree* _p_btree);
|
||||
void release(Btree* _p_btree);
|
||||
bool setValueByKey(Btree* _p_btree, const char* _key, int _klen, const char* _val, int _vlen);
|
||||
bool getValueByKey(Btree* _p_btree, const char* _key, int _klen, char*& _val, int& _vlen);
|
||||
int getIDByStr(Btree* _p_btree, const char* _key, int _klen);
|
||||
bool removeKey(Btree* _p_btree, const char* _key, int _klen);
|
||||
|
||||
/* Open a btree according the mode */
|
||||
/* CREATE_MODE: build a new btree and delete if exist */
|
||||
/* READ_WRITE_MODE: open a btree, btree must exist */
|
||||
bool open(Tree* & _p_btree, const std::string _tree_name, const int _mode);
|
||||
bool open(Btree* & _p_btree, const string _tree_name, const int _mode);
|
||||
};
|
||||
|
||||
#endif //_KVSTORE_KVSTORE_H
|
||||
|
||||
#endif /* KVSTORE_H_ */
|
||||
|
|
|
@ -1,187 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: Heap.cpp
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:37
|
||||
# Description: achieve functions in Heap.h
|
||||
=============================================================================*/
|
||||
|
||||
#include "Heap.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Heap::Heap()
|
||||
{
|
||||
this->length = this->size = 0;
|
||||
this->heap = NULL;
|
||||
}
|
||||
|
||||
Heap::Heap(unsigned _size)
|
||||
{
|
||||
this->length = 0;
|
||||
this->size = _size;
|
||||
//this->heap = (Node**)malloc(this->size * sizeof(Node*)); //not use 4 or 8
|
||||
this->heap = new Node*[this->size];
|
||||
if(this->heap == NULL)
|
||||
{
|
||||
this->print("error in Heap: Allocation fail!");
|
||||
exit(1);
|
||||
}
|
||||
/*
|
||||
this->npmap = (Map*)malloc(this->size * sizeof(struct Map));
|
||||
if(this->npmap == NULL)
|
||||
{
|
||||
this->print("error in Heap: Allocation fail!");
|
||||
exit(1);
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
Node*
|
||||
Heap::getTop() const
|
||||
{
|
||||
if(this->length > 0)
|
||||
return this->heap[0];
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
unsigned
|
||||
Heap::getLen() const
|
||||
{
|
||||
return this->length;
|
||||
}
|
||||
|
||||
unsigned
|
||||
Heap::getSize() const
|
||||
{
|
||||
return this->size;
|
||||
}
|
||||
|
||||
bool
|
||||
Heap::isEmpty() const
|
||||
{
|
||||
return this->length == 0;
|
||||
}
|
||||
|
||||
bool
|
||||
Heap::insert(Node* _np)
|
||||
{
|
||||
if(this->length == this->size) //when full, reallocate
|
||||
{
|
||||
this->heap = (Node**)realloc(this->heap, 2 * this->size * sizeof(Node*));
|
||||
if(this->heap == NULL)
|
||||
{
|
||||
print("error in isert: Reallocation fail!");
|
||||
return false;
|
||||
}
|
||||
/*
|
||||
this->npmap = (struct Map*)realloc(this->npmap, 2 * this->size * sizeof(struct Map));
|
||||
if(this->npmap == NULL)
|
||||
{
|
||||
print("error in insert: Reallocation fail!");
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
this->size = 2 * this->size;
|
||||
}
|
||||
unsigned i = this->length, j;
|
||||
while(i != 0)
|
||||
{
|
||||
j = (i-1)/2;
|
||||
if(_np->getRank() >= this->heap[j]->getRank())
|
||||
break;
|
||||
heap[i] = heap[j];
|
||||
//this->npmap[k].pos = i; //adjust the position
|
||||
i = j;
|
||||
}
|
||||
this->heap[i] = _np;
|
||||
this->length++;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Heap::remove()
|
||||
{
|
||||
if(this->length == 0)
|
||||
{
|
||||
print("error in remove: remove from empty heap!");
|
||||
return false;
|
||||
}
|
||||
//Node* tp = this->heap[0];
|
||||
this->length--;
|
||||
if(this->length == 0)
|
||||
return true;
|
||||
Node* xp = this->heap[this->length];
|
||||
unsigned i = 0, j = 1;
|
||||
while(j < this->length)
|
||||
{
|
||||
if(j < this->length-1 && this->heap[j]->getRank() > this->heap[j+1]->getRank())
|
||||
j++;
|
||||
if(xp->getRank() <= this->heap[j]->getRank())
|
||||
break;
|
||||
this->heap[i] = this->heap[j];
|
||||
i = j;
|
||||
j = 2 * i + 1;
|
||||
}
|
||||
this->heap[i] = xp;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Heap::modify(Node* _np, bool _flag) //control direction
|
||||
{
|
||||
//search and adjust
|
||||
unsigned i, j;
|
||||
for(i = 0; i < this->length; ++i)
|
||||
if(this->heap[i] == _np)
|
||||
break;
|
||||
if(_flag == true) //move up
|
||||
{
|
||||
while(i != 0)
|
||||
{
|
||||
j = (i-1)/2;
|
||||
if(_np->getRank() < heap[j]->getRank())
|
||||
{
|
||||
heap[i] = heap[j];
|
||||
heap[j] = _np;
|
||||
i = j;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
else //move down
|
||||
{
|
||||
j = 2 * i + 1;
|
||||
while(j < this->length)
|
||||
{
|
||||
if(j < this->length - 1 && heap[j]->getRank() > heap[j+1]->getRank())
|
||||
j++;
|
||||
if(heap[j]->getRank() < _np->getRank())
|
||||
{
|
||||
heap[i] = heap[j];
|
||||
heap[j] = _np;
|
||||
i = j;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Heap::~Heap()
|
||||
{
|
||||
delete[] this->heap;
|
||||
this->heap = NULL;
|
||||
this->length = this->size = 0;
|
||||
}
|
||||
|
||||
void
|
||||
Heap::print(string s)
|
||||
{
|
||||
#ifdef DEBUG_KVSTORE
|
||||
#endif
|
||||
}
|
||||
|
|
@ -1,42 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: Heap.h
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:37
|
||||
# Description: set and deal of Node*s in memory
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _KVSTORE_HEAP_HEAP_H
|
||||
#define _KVSTORE_HEAP_HEAP_H
|
||||
|
||||
#include "../../Util/Util.h"
|
||||
#include "../node/Node.h"
|
||||
|
||||
/* add, sub, modify: all can be done within O(logn) using adjust-function */
|
||||
//QUERY: when modified, finding right position consumes O(n). How about keeping smallest?
|
||||
//(add O(1), sub O(2n), modify O(n)
|
||||
//TODO: to solve this probem, use another hash: (pointer, pos), to find the right position of
|
||||
//given p in O(lgn) time
|
||||
|
||||
class Heap
|
||||
{
|
||||
private:
|
||||
Node** heap; //dynamic array
|
||||
unsigned length; //valid elements num
|
||||
unsigned size; //max-size of heap
|
||||
public:
|
||||
Heap();
|
||||
Heap(unsigned _size);
|
||||
Node* getTop() const; //return the top element
|
||||
unsigned getLen() const;
|
||||
unsigned getSize() const;
|
||||
bool isEmpty() const;
|
||||
bool insert(Node* _np); //insert and adjust
|
||||
bool remove(); //remove top and adjust
|
||||
bool modify(Node* _np, bool _flag); //searech modified element and adjust
|
||||
~Heap();
|
||||
void print(std::string s); //DEBUG
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
@ -1,294 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: IntlNode.cpp
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:40
|
||||
# Description: achieve functions in IntlNode.h
|
||||
=============================================================================*/
|
||||
|
||||
#include "IntlNode.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
/*
|
||||
void
|
||||
IntlNode::AllocChilds()
|
||||
{
|
||||
childs = (Node**)malloc(sizeof(Node*) * MAX_CHILD_NUM);
|
||||
}
|
||||
*/
|
||||
|
||||
IntlNode::IntlNode()
|
||||
{
|
||||
memset(childs, 0, sizeof(Node*) * MAX_CHILD_NUM);
|
||||
//this->AllocChilds();
|
||||
}
|
||||
|
||||
IntlNode::IntlNode(bool isVirtual) //call father-class's constructor automaticlly
|
||||
{
|
||||
memset(childs, 0, sizeof(Node*) * MAX_CHILD_NUM);
|
||||
//this->AllocChilds();
|
||||
}
|
||||
|
||||
/*
|
||||
IntlNode::IntlNode(Storage* TSM) //QUERY
|
||||
{
|
||||
TSM->readNode(this, Storage::OVER);
|
||||
}
|
||||
*/
|
||||
|
||||
void
|
||||
IntlNode::Virtual()
|
||||
{
|
||||
//this->FreeKeys();
|
||||
this->release();
|
||||
this->delMem();
|
||||
}
|
||||
|
||||
void
|
||||
IntlNode::Normal()
|
||||
{
|
||||
this->AllocKeys();
|
||||
this->setMem();
|
||||
}
|
||||
|
||||
Node*
|
||||
IntlNode::getChild(int _index) const
|
||||
{
|
||||
int num = this->getNum();
|
||||
if(_index < 0 || _index > num) //num keys, num+1 childs
|
||||
{
|
||||
//print(string("error in getChild: Invalid index ") + Util::int2string(_index));
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
return childs[_index];
|
||||
}
|
||||
|
||||
bool
|
||||
IntlNode::setChild(Node* _child, int _index)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if(_index < 0 || _index > num)
|
||||
{
|
||||
print(string("error in setChild: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
this->childs[_index] = _child;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IntlNode::addChild(Node* _child, int _index)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if(_index < 0 || _index > num+1)
|
||||
{
|
||||
print(string("error in addChild: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
int i;
|
||||
for(i = num; i >= _index; --i) //DEBUG: right bounder!!!
|
||||
childs[i+1] = childs[i];
|
||||
childs[_index] = _child;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IntlNode::subChild(int _index)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if(_index < 0 || _index > num)
|
||||
{
|
||||
print(string("error in subchild: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
int i;
|
||||
for(i = _index; i < num; ++i) //DEBUG: right bounder!!!
|
||||
childs[i] = childs[i+1];
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned
|
||||
IntlNode::getSize() const
|
||||
{
|
||||
unsigned sum = INTL_SIZE, num = this->getNum(), i;
|
||||
for(i = 0; i < num; ++i)
|
||||
sum += keys[i].getLen();
|
||||
return sum;
|
||||
}
|
||||
|
||||
Node*
|
||||
IntlNode::split(Node* _father, int _index)
|
||||
{
|
||||
int num = this->getNum();
|
||||
Node* p = new IntlNode; //right child
|
||||
p->setHeight(this->getHeight());
|
||||
int i, k;
|
||||
for(i = MIN_CHILD_NUM, k = 0; i < num; ++i, ++k)
|
||||
{
|
||||
p->addKey(this->keys+i, k);
|
||||
p->addChild(this->childs[i], k);
|
||||
p->addNum();
|
||||
}
|
||||
p->addChild(this->childs[i], k);
|
||||
const Bstr* tp = this->keys + MIN_KEY_NUM;
|
||||
this->setNum(MIN_KEY_NUM);
|
||||
_father->addKey(tp, _index);
|
||||
_father->addChild(p, _index+1); //DEBUG(check the index)
|
||||
_father->addNum();
|
||||
_father->setDirty();
|
||||
p->setDirty();
|
||||
this->setDirty();
|
||||
return p;
|
||||
}
|
||||
|
||||
Node*
|
||||
IntlNode::coalesce(Node* _father, int _index)
|
||||
{
|
||||
//int num = this->getNum();
|
||||
int i, j = _father->getNum(), k; //BETTER: unsigned?
|
||||
Node* p;
|
||||
int ccase = 0;
|
||||
const Bstr* bstr;
|
||||
if(_index < j) //the right neighbor
|
||||
{
|
||||
p = _father->getChild(_index+1);
|
||||
k = p->getNum();
|
||||
if((unsigned)k > MIN_KEY_NUM)
|
||||
ccase = 2;
|
||||
else //==MIN_KEY_NUM
|
||||
ccase = 1;
|
||||
}
|
||||
if(_index > 0) //the left neighbor
|
||||
{
|
||||
Node* tp = _father->getChild(_index-1);
|
||||
unsigned tk = tp->getNum();
|
||||
if(ccase < 2)
|
||||
{
|
||||
if(ccase == 0)
|
||||
ccase = 3;
|
||||
if(tk > MIN_KEY_NUM)
|
||||
ccase = 4;
|
||||
}
|
||||
if(ccase > 2)
|
||||
{
|
||||
p = tp;
|
||||
k = tk;
|
||||
}
|
||||
}
|
||||
switch(ccase)
|
||||
{
|
||||
case 1: //union right to this
|
||||
this->addKey(_father->getKey(_index), this->getNum());
|
||||
this->addNum();
|
||||
for(i = 0; i < k; ++i)
|
||||
{
|
||||
this->addKey(p->getKey(i), this->getNum());
|
||||
this->addChild(p->getChild(i), this->getNum());
|
||||
this->addNum();
|
||||
}
|
||||
this->setChild(p->getChild(i), this->getNum());
|
||||
_father->subKey(_index);
|
||||
_father->subChild(_index+1);
|
||||
_father->subNum();
|
||||
p->setNum(0);
|
||||
//delete p;
|
||||
break;
|
||||
case 2: //move one form right
|
||||
this->addKey(_father->getKey(_index), this->getNum());
|
||||
_father->setKey(p->getKey(0), _index);
|
||||
p->subKey(0);
|
||||
this->addChild(p->getChild(0), this->getNum()+1);
|
||||
p->subChild(0);
|
||||
this->addNum();
|
||||
p->subNum();
|
||||
break;
|
||||
case 3: //union left to this
|
||||
this->addKey(_father->getKey(_index-1), 0);
|
||||
this->addNum();
|
||||
for(i = k; i > 0; --i)
|
||||
{
|
||||
int t = i - 1;
|
||||
this->addKey(p->getKey(t), 0);
|
||||
this->addChild(p->getChild(i), 0);
|
||||
this->addNum();
|
||||
}
|
||||
this->addChild(p->getChild(0), 0);
|
||||
_father->subKey(_index-1);
|
||||
_father->subChild(_index-1);
|
||||
_father->subNum();
|
||||
p->setNum(0);
|
||||
//delete p;
|
||||
break;
|
||||
case 4: //move one from left
|
||||
bstr = p->getKey(k-1);
|
||||
p->subKey(k-1);
|
||||
this->addKey(_father->getKey(_index-1), 0);
|
||||
_father->setKey(bstr, _index-1);
|
||||
this->addChild(p->getChild(k), 0);
|
||||
p->subChild(k);
|
||||
this->addNum();
|
||||
p->subNum();
|
||||
break;
|
||||
default:
|
||||
print("error in coalesce: Invalid case!");
|
||||
//printf("error in coalesce: Invalid case!");
|
||||
}
|
||||
_father->setDirty();
|
||||
p->setDirty();
|
||||
this->setDirty();
|
||||
if(ccase == 1 || ccase == 3)
|
||||
return p;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
IntlNode::release()
|
||||
{
|
||||
if(!this->inMem())
|
||||
return;
|
||||
unsigned num = this->getNum();
|
||||
//delete[] keys; //this will release all!!!
|
||||
for(unsigned i = num; i < MAX_KEY_NUM; ++i)
|
||||
keys[i].clear();
|
||||
delete[] keys;
|
||||
}
|
||||
|
||||
IntlNode::~IntlNode()
|
||||
{
|
||||
release();
|
||||
//free(childs);
|
||||
}
|
||||
|
||||
void
|
||||
IntlNode::print(string s)
|
||||
{
|
||||
#ifdef DEBUG_KVSTORE
|
||||
int num = this->getNum();
|
||||
fputs(Util::showtime().c_str(), Util::debug_kvstore);
|
||||
fputs("Class IntlNode\n", Util::debug_kvstore);
|
||||
fputs("Message: ", Util::debug_kvstore);
|
||||
fputs(s.c_str(), Util::debug_kvstore);
|
||||
fputs("\n", Util::debug_kvstore);
|
||||
if(s == "node" || s == "NODE")
|
||||
{
|
||||
fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag);
|
||||
int i;
|
||||
for(i = 0; i < num; ++i)
|
||||
{
|
||||
if(s == "node")
|
||||
this->keys[i].print("bstr");
|
||||
else
|
||||
this->keys[i].print("BSTR");
|
||||
}
|
||||
}
|
||||
else if(s == "check node")
|
||||
{
|
||||
//TODO(check node, if satisfy B+ definition)
|
||||
}
|
||||
else;
|
||||
#endif
|
||||
}
|
||||
|
|
@ -1,49 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: IntlNode.h
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:40
|
||||
# Description: the internal-node of a B+ tree
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _KVSTORE_NODE_INTLNODE_H
|
||||
#define _KVSTORE_NODE_INTLNODE_H
|
||||
|
||||
#include "Node.h"
|
||||
|
||||
class IntlNode: public Node
|
||||
{
|
||||
protected:
|
||||
Node* childs[MAX_CHILD_NUM+1];
|
||||
//Node** childs;
|
||||
//void AllocChilds();
|
||||
public:
|
||||
IntlNode();
|
||||
IntlNode(bool isVirtual);
|
||||
//IntlNode(Storage* TSM);
|
||||
void Virtual();
|
||||
void Normal();
|
||||
Node* getChild(int _index) const;
|
||||
bool setChild(Node* _child, int _index);
|
||||
bool addChild(Node* _child, int _index);
|
||||
bool subChild(int _index);
|
||||
unsigned getSize() const;
|
||||
Node* split(Node* _father, int _index);
|
||||
Node* coalesce(Node* _father, int _index);
|
||||
void release();
|
||||
~IntlNode();
|
||||
void print(std::string s); //DEBUG
|
||||
/*non-sense functions: polymorphic
|
||||
Node* getPrev() const;
|
||||
Node* getNext() const;
|
||||
const Bstr* getValue(int _index) const;
|
||||
bool setValue(const Bstr* _value, int _index);
|
||||
bool addValue(const Bstr* _value, int _index);
|
||||
bool subValue(int _index);
|
||||
void setPrev(Node* _prev);
|
||||
void setNext(Node* _next);
|
||||
*/
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
@ -1,377 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: LeafNode.cpp
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:40
|
||||
# Description: ahieve functions in LeafNode.h
|
||||
=============================================================================*/
|
||||
|
||||
#include "LeafNode.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
void
|
||||
LeafNode::AllocValues()
|
||||
{
|
||||
values = new Bstr[MAX_KEY_NUM];
|
||||
}
|
||||
|
||||
/*
|
||||
void
|
||||
LeafNode::FreeValues()
|
||||
{
|
||||
delete[] values;
|
||||
}
|
||||
*/
|
||||
|
||||
LeafNode::LeafNode()
|
||||
{
|
||||
flag |= NF_IL; //leaf flag
|
||||
prev = next = NULL;
|
||||
AllocValues();
|
||||
}
|
||||
|
||||
LeafNode::LeafNode(bool isVirtual)
|
||||
{
|
||||
flag |= NF_IL;
|
||||
prev = next = NULL;
|
||||
if(!isVirtual)
|
||||
AllocValues();
|
||||
}
|
||||
|
||||
/*
|
||||
LeafNode::LeafNode(Storage* TSM)
|
||||
{
|
||||
AllocValues();
|
||||
TSM->readNode(this, Storage::OVER);
|
||||
}
|
||||
*/
|
||||
|
||||
void
|
||||
LeafNode::Virtual()
|
||||
{
|
||||
//this->FreeKeys();
|
||||
//this->FreeValues();
|
||||
this->release();
|
||||
this->delMem();
|
||||
}
|
||||
|
||||
void
|
||||
LeafNode::Normal()
|
||||
{
|
||||
this->AllocKeys();
|
||||
this->AllocValues();
|
||||
this->setMem();
|
||||
}
|
||||
|
||||
Node*
|
||||
LeafNode::getPrev() const
|
||||
{
|
||||
return prev;
|
||||
}
|
||||
|
||||
Node*
|
||||
LeafNode::getNext() const
|
||||
{
|
||||
return next;
|
||||
}
|
||||
|
||||
const Bstr*
|
||||
LeafNode::getValue(int _index) const
|
||||
{
|
||||
int num = this->getNum();
|
||||
if(_index < 0 || _index >= num)
|
||||
{
|
||||
//print(string("error in getValue: Invalid index ") + Util::int2string(_index));
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
return this->values + _index;
|
||||
}
|
||||
|
||||
bool
|
||||
LeafNode::setValue(const Bstr* _value, int _index, bool ifcopy)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if(_index < 0 || _index >= num)
|
||||
{
|
||||
print(string("error in setValue: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
this->values[_index].release(); //NOTICE: only used in modify
|
||||
if(ifcopy)
|
||||
this->values[_index].copy(_value);
|
||||
else
|
||||
this->values[_index] = *_value;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
LeafNode::addValue(const Bstr* _value, int _index, bool ifcopy)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if(_index < 0 || _index > num)
|
||||
{
|
||||
print(string("error in addValue: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
int i;
|
||||
for(i = num-1; i >= _index; --i)
|
||||
this->values[i+1] = this->values[i];
|
||||
if(ifcopy)
|
||||
this->values[_index].copy(_value);
|
||||
else
|
||||
this->values[_index] = *_value;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
LeafNode::subValue(int _index, bool ifdel)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if(_index < 0 || _index >= num)
|
||||
{
|
||||
print(string("error in subValue: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
int i;
|
||||
if(ifdel)
|
||||
values[_index].release();
|
||||
for(i = _index; i < num-1; ++i)
|
||||
this->values[i] = this->values[i+1];
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
LeafNode::setPrev(Node* _prev)
|
||||
{
|
||||
this->prev = _prev;
|
||||
}
|
||||
|
||||
void
|
||||
LeafNode::setNext(Node* _next)
|
||||
{
|
||||
this->next = _next;
|
||||
}
|
||||
|
||||
unsigned
|
||||
LeafNode::getSize() const
|
||||
{
|
||||
unsigned sum = LEAF_SIZE, num = this->getNum(), i;
|
||||
for(i = 0; i < num; ++i)
|
||||
{
|
||||
sum += keys[i].getLen();
|
||||
sum += values[i].getLen();
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
Node*
|
||||
LeafNode::split(Node* _father, int _index)
|
||||
{
|
||||
int num = this->getNum();
|
||||
Node* p = new LeafNode; //right child
|
||||
p->setHeight(this->getHeight()); //NOTICE: assign height for new node
|
||||
p->setNext(this->next);
|
||||
this->setNext(p);
|
||||
p->setPrev(this);
|
||||
int i, k;
|
||||
for(i = MIN_KEY_NUM, k = 0; i < num; ++i, ++k)
|
||||
{
|
||||
p->addKey(this->keys+i, k);
|
||||
p->addValue(this->values+i, k);
|
||||
p->addNum();
|
||||
}
|
||||
const Bstr* tp = this->keys + MIN_KEY_NUM;
|
||||
this->setNum(MIN_KEY_NUM);
|
||||
_father->addKey(tp, _index, true);
|
||||
_father->addChild(p, _index+1); //DEBUG(check the index)
|
||||
_father->addNum();
|
||||
_father->setDirty();
|
||||
p->setDirty();
|
||||
this->setDirty();
|
||||
return p;
|
||||
}
|
||||
|
||||
Node*
|
||||
LeafNode::coalesce(Node* _father, int _index)
|
||||
{ //add a key or coalesce a neighbor to this
|
||||
int i, j = _father->getNum(), k; //BETTER: unsigned?
|
||||
Node* p = NULL;
|
||||
int ccase = 0;
|
||||
const Bstr* bstr;
|
||||
if(_index < j) //the right neighbor
|
||||
{
|
||||
p = _father->getChild(_index+1);
|
||||
k = p->getNum();
|
||||
if((unsigned)k > MIN_KEY_NUM)
|
||||
ccase = 2;
|
||||
else //==MIN_KEY_NUM
|
||||
ccase = 1;
|
||||
}
|
||||
if(_index > 0) //the left neighbor
|
||||
{
|
||||
Node* tp = _father->getChild(_index-1);
|
||||
unsigned tk = tp->getNum();
|
||||
if(ccase < 2)
|
||||
{
|
||||
if(ccase == 0)
|
||||
ccase = 3;
|
||||
if(tk > MIN_KEY_NUM)
|
||||
ccase = 4;
|
||||
}
|
||||
if(ccase > 2)
|
||||
{
|
||||
p = tp;
|
||||
k = tk;
|
||||
}
|
||||
}
|
||||
switch(ccase)
|
||||
{
|
||||
case 1: //union right to this
|
||||
for(i = 0; i < k; ++i)
|
||||
{
|
||||
this->addKey(p->getKey(i), this->getNum());
|
||||
this->addValue(p->getValue(i), this->getNum());
|
||||
this->addNum();
|
||||
}
|
||||
_father->subKey(_index, true);
|
||||
_father->subChild(_index+1);
|
||||
_father->subNum();
|
||||
this->next = p->getNext();
|
||||
if(this->next != NULL)
|
||||
this->next->setPrev(this);
|
||||
p->setNum(0); //NOTICE: adjust num before delete!
|
||||
//delete p;
|
||||
break;
|
||||
case 2: //move one from right
|
||||
this->addKey(p->getKey(0), this->getNum());
|
||||
_father->setKey(p->getKey(1), _index, true);
|
||||
p->subKey(0);
|
||||
this->addValue(p->getValue(0), this->getNum());
|
||||
p->subValue(0);
|
||||
this->addNum();
|
||||
p->subNum();
|
||||
break;
|
||||
case 3: //union left to this
|
||||
//BETTER: move all keys/etc one time
|
||||
for(i = k; i > 0; --i)
|
||||
{
|
||||
int t = i - 1;
|
||||
this->addKey(p->getKey(t), 0);
|
||||
this->addValue(p->getValue(t), 0);
|
||||
this->addNum();
|
||||
}
|
||||
_father->subKey(_index-1, true);
|
||||
_father->subChild(_index-1);
|
||||
_father->subNum();
|
||||
this->prev = p->getPrev();
|
||||
if(this->prev != NULL) //else: leaves-list
|
||||
this->prev->setNext(this);
|
||||
p->setNum(0);
|
||||
//delete p;
|
||||
break;
|
||||
case 4: //move one from left
|
||||
bstr = p->getKey(k-1);
|
||||
p->subKey(k-1);
|
||||
this->addKey(bstr, 0);
|
||||
_father->setKey(bstr, _index-1, true);
|
||||
this->addValue(p->getValue(k-1), 0);
|
||||
p->subValue(k-1);
|
||||
this->addNum();
|
||||
p->subNum();
|
||||
break;
|
||||
default:
|
||||
print("error in coalesce: Invalid case!");
|
||||
//printf("error in coalesce: Invalid case!");
|
||||
}
|
||||
_father->setDirty();
|
||||
p->setDirty();
|
||||
this->setDirty();
|
||||
if(ccase == 1 || ccase == 3)
|
||||
return p;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
LeafNode::release()
|
||||
{
|
||||
if(!this->inMem())
|
||||
return;
|
||||
unsigned num = this->getNum();
|
||||
/*
|
||||
for(int i = 0; i < num; ++i)
|
||||
{
|
||||
keys[i].release();
|
||||
values[i].release();
|
||||
}
|
||||
*/
|
||||
for(unsigned i = num; i < MAX_KEY_NUM; ++i)
|
||||
{
|
||||
keys[i].clear();
|
||||
values[i].clear();
|
||||
}
|
||||
delete[] keys;
|
||||
delete[] values;
|
||||
}
|
||||
|
||||
LeafNode::~LeafNode()
|
||||
{
|
||||
release();
|
||||
}
|
||||
|
||||
void
|
||||
LeafNode::print(string s)
|
||||
{
|
||||
#ifdef DEBUG_KVSTORE
|
||||
unsigned num = this->getNum();
|
||||
fputs(Util::showtime().c_str(), Util::debug_kvstore);
|
||||
fputs("Class LeafNode\n", Util::debug_kvstore);
|
||||
fputs("Message: ", Util::debug_kvstore);
|
||||
fputs(s.c_str(), Util::debug_kvstore);
|
||||
fputs("\n", Util::debug_kvstore);
|
||||
unsigned i;
|
||||
if(s == "NODE")
|
||||
{
|
||||
fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag);
|
||||
fprintf(Util::debug_kvstore, "prev: %p\tnext: %p\n", this->prev, this->next);
|
||||
for(i = 0; i < num; ++i)
|
||||
{
|
||||
this->keys[i].print("BSTR");
|
||||
this->values[i].print("BSTR");
|
||||
}
|
||||
}
|
||||
else if(s == "node")
|
||||
{
|
||||
fprintf(Util::debug_kvstore, "store: %u\tnum: %u\tflag: %u\n", this->store, num, this->flag);
|
||||
fprintf(Util::debug_kvstore, "prev: %p\tnext: %p\n", this->prev, this->next);
|
||||
}
|
||||
else if(s == "check node")
|
||||
{
|
||||
//check the node, if satisfy B+ definition
|
||||
bool flag = true;
|
||||
if(num < MIN_KEY_NUM || num > MAX_KEY_NUM)
|
||||
flag = false;
|
||||
if(flag)
|
||||
{
|
||||
for(i = 1; i < num; ++i)
|
||||
{
|
||||
if(keys[i] > keys[i-1])
|
||||
continue;
|
||||
else
|
||||
break;
|
||||
}
|
||||
if( i < num)
|
||||
flag = false;
|
||||
}
|
||||
this->print("node");
|
||||
if(flag)
|
||||
fprintf(Util::debug_kvstore, "This node is good\n");
|
||||
else
|
||||
fprintf(Util::debug_kvstore, "This node is bad\n");
|
||||
}
|
||||
else;
|
||||
#endif
|
||||
}
|
||||
|
|
@ -1,51 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: LeafNode.h
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:39
|
||||
# Description: the leaf-node of a B+ tree
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _KVSTORE_NODE_LEAFNODE_H
|
||||
#define _KVSTORE_NODE_LEAFNODE_H
|
||||
|
||||
#include "Node.h"
|
||||
|
||||
class LeafNode: public Node
|
||||
{
|
||||
protected:
|
||||
Node* prev; //LeafNode
|
||||
Node* next;
|
||||
Bstr* values;
|
||||
void AllocValues();
|
||||
//void FreeValues();
|
||||
public:
|
||||
LeafNode();
|
||||
LeafNode(bool isVirtual);
|
||||
//LeafNode(Storage* TSM);
|
||||
void Virtual();
|
||||
void Normal();
|
||||
Node* getPrev() const;
|
||||
Node* getNext() const;
|
||||
const Bstr* getValue(int _index) const;
|
||||
bool setValue(const Bstr* _value, int _index, bool ifcopy = false);
|
||||
bool addValue(const Bstr* _value, int _index, bool ifcopy = false);
|
||||
bool subValue(int _index, bool ifdel = false);
|
||||
void setPrev(Node* _prev);
|
||||
void setNext(Node* _next);
|
||||
unsigned getSize() const;
|
||||
Node* split(Node* _father, int _index);
|
||||
Node* coalesce(Node* _father, int _index);
|
||||
void release();
|
||||
~LeafNode();
|
||||
void print(std::string s); //DEBUG
|
||||
/*non-sense virtual function
|
||||
Node* getChild(int _index) const;
|
||||
bool addChild(Node* _child, int _index);
|
||||
bool subChild(int _index);
|
||||
*/
|
||||
};
|
||||
//BETTER: prev isn't a must, and reverse-range can be achieved using recursive-next
|
||||
|
||||
#endif
|
||||
|
|
@ -1,329 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: Node.cpp
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:39
|
||||
# Description: achieve functions in Node.h
|
||||
=============================================================================*/
|
||||
|
||||
#include "Node.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
void
|
||||
Node::AllocKeys()
|
||||
{
|
||||
keys = new Bstr[MAX_KEY_NUM];
|
||||
}
|
||||
|
||||
/*
|
||||
void
|
||||
Node::FreeKeys()
|
||||
{
|
||||
delete[] keys;
|
||||
}
|
||||
*/
|
||||
|
||||
Node::Node()
|
||||
{
|
||||
store = flag = 0;
|
||||
flag |= NF_IM;
|
||||
AllocKeys();
|
||||
}
|
||||
|
||||
Node::Node(bool isVirtual)
|
||||
{
|
||||
store = flag = 0;
|
||||
if(!isVirtual)
|
||||
{
|
||||
flag |= NF_IM;
|
||||
AllocKeys();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Node::Node(Storage* TSM)
|
||||
{
|
||||
AllocKeys();
|
||||
TSM->readNode(this, Storage::OVER);
|
||||
}
|
||||
*/
|
||||
bool
|
||||
Node::isLeaf() const
|
||||
{
|
||||
return this->flag & NF_IL;
|
||||
}
|
||||
|
||||
bool
|
||||
Node::isDirty() const
|
||||
{
|
||||
return this->flag & NF_ID;
|
||||
}
|
||||
|
||||
void
|
||||
Node::setDirty()
|
||||
{
|
||||
this->flag |= NF_ID;
|
||||
}
|
||||
|
||||
void
|
||||
Node::delDirty()
|
||||
{
|
||||
this->flag &= ~NF_ID;
|
||||
}
|
||||
|
||||
bool
|
||||
Node::inMem() const
|
||||
{
|
||||
return this->flag & NF_IM;
|
||||
}
|
||||
|
||||
void
|
||||
Node::setMem()
|
||||
{
|
||||
this->flag |= NF_IM;
|
||||
}
|
||||
|
||||
void
|
||||
Node::delMem()
|
||||
{
|
||||
this->flag &= ~NF_IM;
|
||||
}
|
||||
|
||||
/*
|
||||
bool
|
||||
Node::isVirtual() const
|
||||
{
|
||||
return this->flag & NF_IV;
|
||||
}
|
||||
|
||||
void
|
||||
Node::setVirtual()
|
||||
{
|
||||
this->flag |= NF_IV;
|
||||
}
|
||||
|
||||
void
|
||||
Node::delVirtual()
|
||||
{
|
||||
this->flag &= ~NF_IV;
|
||||
}
|
||||
*/
|
||||
|
||||
unsigned
|
||||
Node::getRank() const
|
||||
{
|
||||
return this->flag & NF_RK;
|
||||
}
|
||||
|
||||
void
|
||||
Node::setRank(unsigned _rank)
|
||||
{
|
||||
this->flag &= ~NF_RK;
|
||||
this->flag |= _rank;
|
||||
}
|
||||
|
||||
unsigned
|
||||
Node::getHeight() const
|
||||
{
|
||||
return (this->flag & NF_HT)>>20;
|
||||
}
|
||||
|
||||
void
|
||||
Node::setHeight(unsigned _h)
|
||||
{
|
||||
this->flag &= ~NF_HT;
|
||||
this->flag |= (_h<<20);
|
||||
}
|
||||
|
||||
unsigned
|
||||
Node::getNum() const
|
||||
{
|
||||
return (this->flag & NF_KN)>>12;
|
||||
}
|
||||
|
||||
bool
|
||||
Node::setNum(int _num)
|
||||
{
|
||||
if(_num < 0 || (unsigned)_num > MAX_KEY_NUM)
|
||||
{
|
||||
print(string("error in setNum: Invalid num ") + Util::int2string(_num));
|
||||
return false;
|
||||
}
|
||||
this->flag &= ~NF_KN;
|
||||
this->flag |= (_num<<12);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Node::addNum()
|
||||
{
|
||||
if(this->getNum() + 1 > MAX_KEY_NUM)
|
||||
{
|
||||
print("error in addNum: Invalid!");
|
||||
return false;
|
||||
}
|
||||
this->flag += (1<<12);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Node::subNum()
|
||||
{
|
||||
if(this->getNum() < 1)
|
||||
{
|
||||
print("error in subNum: Invalid!");
|
||||
return false;
|
||||
}
|
||||
this->flag -= (1<<12);
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned
|
||||
Node::getStore() const
|
||||
{
|
||||
return this->store;
|
||||
}
|
||||
|
||||
void
|
||||
Node::setStore(unsigned _store)
|
||||
{
|
||||
this->store = _store;
|
||||
}
|
||||
|
||||
unsigned
|
||||
Node::getFlag() const
|
||||
{
|
||||
return flag;
|
||||
}
|
||||
|
||||
void
|
||||
Node::setFlag(unsigned _flag)
|
||||
{
|
||||
this->flag = _flag;
|
||||
}
|
||||
|
||||
const Bstr*
|
||||
Node::getKey(int _index) const
|
||||
{
|
||||
int num = this->getNum();
|
||||
if(_index < 0 || _index >= num)
|
||||
{
|
||||
//print(string("error in getKey: Invalid index ") + Util::int2string(_index));
|
||||
printf("error in getKey: Invalid index\n");
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
return this->keys + _index;
|
||||
}
|
||||
|
||||
bool
|
||||
Node::setKey(const Bstr* _key, int _index, bool ifcopy)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if(_index < 0 || _index >= num)
|
||||
{
|
||||
print(string("error in setKey: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
if(ifcopy)
|
||||
keys[_index].copy(_key);
|
||||
else
|
||||
keys[_index] = *_key;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Node::addKey(const Bstr* _key, int _index, bool ifcopy)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if(_index < 0 || _index > num)
|
||||
{
|
||||
print(string("error in addKey: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
int i;
|
||||
//NOTICE: if num == MAX_KEY_NUM, will visit keys[MAX_KEY_NUM], not legal!!!
|
||||
//however. tree operations ensure that: when node is full, not add but split first!
|
||||
for(i = num - 1; i >= _index; --i)
|
||||
keys[i+1] = keys[i];
|
||||
if(ifcopy)
|
||||
keys[_index].copy(_key);
|
||||
else
|
||||
keys[_index] = *_key;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Node::subKey(int _index, bool ifdel)
|
||||
{
|
||||
int num = this->getNum();
|
||||
if(_index < 0 || _index >= num)
|
||||
{
|
||||
print(string("error in subKey: Invalid index ") + Util::int2string(_index));
|
||||
return false;
|
||||
}
|
||||
int i;
|
||||
if(ifdel)
|
||||
keys[_index].release();
|
||||
for(i = _index; i < num - 1; ++i)
|
||||
keys[i] = keys[i+1];
|
||||
return true;
|
||||
}
|
||||
|
||||
int
|
||||
Node::searchKey_less(const Bstr& _bstr) const
|
||||
{
|
||||
int num = this->getNum();
|
||||
//for(i = 0; i < num; ++i)
|
||||
//if(bstr < *(p->getKey(i)))
|
||||
//break;
|
||||
|
||||
int low = 0, high = num - 1, mid = -1;
|
||||
while(low <= high)
|
||||
{
|
||||
mid = (low + high) / 2;
|
||||
if(this->keys[mid] > _bstr)
|
||||
{
|
||||
if(low == mid)
|
||||
break;
|
||||
high = mid;
|
||||
}
|
||||
else
|
||||
{
|
||||
low = mid + 1;
|
||||
}
|
||||
}
|
||||
return low;
|
||||
}
|
||||
|
||||
int
|
||||
Node::searchKey_equal(const Bstr& _bstr) const
|
||||
{
|
||||
int num = this->getNum();
|
||||
//for(i = 0; i < num; ++i)
|
||||
// if(bstr == *(p->getKey(i)))
|
||||
// {
|
||||
|
||||
int ret = this->searchKey_less(_bstr);
|
||||
if(ret > 0 && this->keys[ret-1] == _bstr)
|
||||
return ret - 1;
|
||||
else
|
||||
return num;
|
||||
}
|
||||
|
||||
int
|
||||
Node::searchKey_lessEqual(const Bstr& _bstr) const
|
||||
{
|
||||
int num = this->getNum();
|
||||
//for(i = 0; i < num; ++i)
|
||||
//if(bstr <= *(p->getKey(i)))
|
||||
//break;
|
||||
|
||||
int ret = this->searchKey_less(_bstr);
|
||||
if(ret > 0 && this->keys[ret-1] == _bstr)
|
||||
return ret - 1;
|
||||
else
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -1,114 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: Node.h
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:38
|
||||
# Description: basic Node class, father of IntlNode and LeafNode
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _KVSTORE_NODE_H
|
||||
#define _KVSTORE_NODE_H
|
||||
|
||||
#include "../../Util/Util.h"
|
||||
#include "../../Util/Bstr.h"
|
||||
|
||||
class Node //abstract basic class
|
||||
{
|
||||
public:
|
||||
static const unsigned DEGREE = 2 * 63; //the degree of B+ tree
|
||||
static const unsigned MAX_CHILD_NUM = DEGREE;
|
||||
static const unsigned MIN_CHILD_NUM = DEGREE >> 1;
|
||||
static const unsigned MAX_KEY_NUM = MAX_CHILD_NUM - 1; //max key-num
|
||||
static const unsigned MIN_KEY_NUM = MIN_CHILD_NUM - 1; //min key-num
|
||||
/* diffrent flags for tree-nodes, 32-bit put rank in low-bits means no need to move*/
|
||||
static const unsigned NF_IL = 0x80000000; //is leaf
|
||||
static const unsigned NF_ID = 0x00080000; //is dirty, in rank-area
|
||||
static const unsigned NF_IM = 0x20000000; //in memory, not virtual
|
||||
//static const unsigned NF_IV = 0x10000000; //is virtual
|
||||
static const unsigned NF_RK = 0x00ffffff; //select-rank, in Storage
|
||||
static const unsigned NF_HT = 0xf00000; //height area in rank
|
||||
static const unsigned NF_KN = 0x07f000; //NOTICE: decided by DEGREE
|
||||
static const unsigned INTL_SIZE = sizeof(Bstr) * MAX_KEY_NUM;
|
||||
static const unsigned LEAF_SIZE = 2 * INTL_SIZE;
|
||||
protected:
|
||||
unsigned store; //store address, the BLock index
|
||||
unsigned flag; //NF_RK, NF_IL,NF_ID, NF_IV, propety
|
||||
//int num; //totle keys num
|
||||
//Node* father; //point to father-node, which must be IntlNode
|
||||
Bstr* keys;
|
||||
void AllocKeys();
|
||||
//void FreeKeys();
|
||||
public:
|
||||
Node();
|
||||
Node(bool isVirtual);
|
||||
bool isLeaf() const;
|
||||
bool isDirty() const;
|
||||
void setDirty();
|
||||
void delDirty();
|
||||
bool inMem() const;
|
||||
void setMem();
|
||||
void delMem();
|
||||
//bool isVirtual() const;
|
||||
//void setVirtual();
|
||||
//void delVirtual();
|
||||
unsigned getRank() const;
|
||||
void setRank(unsigned _rank);
|
||||
unsigned getHeight() const;
|
||||
void setHeight(unsigned _h);
|
||||
unsigned getNum() const;
|
||||
bool setNum(int _num);
|
||||
bool addNum();
|
||||
bool subNum();
|
||||
unsigned getStore() const;
|
||||
void setStore(unsigned _store);
|
||||
unsigned getFlag() const;
|
||||
void setFlag(unsigned _flag);
|
||||
const Bstr* getKey(int _index) const; //need to check the index
|
||||
bool setKey(const Bstr* _key, int _index, bool ifcopy = false);
|
||||
bool addKey(const Bstr* _key, int _index, bool ifcopy = false);
|
||||
bool subKey(int _index, bool ifdel = false);
|
||||
|
||||
//several binary key search utilities
|
||||
int searchKey_less(const Bstr& _bstr) const;
|
||||
int searchKey_equal(const Bstr& _bstr) const;
|
||||
int searchKey_lessEqual(const Bstr& _bstr) const;
|
||||
|
||||
//virtual functions: polymorphic
|
||||
virtual Node* getChild(int _index) const { return NULL; };
|
||||
virtual bool setChild(Node* _child, int _index) { return true; };
|
||||
virtual bool addChild(Node* _child, int _index) { return true; };
|
||||
virtual bool subChild(int _index) { return true; };
|
||||
virtual Node* getPrev() const { return NULL; };
|
||||
virtual Node* getNext() const { return NULL; };
|
||||
virtual const Bstr* getValue(int _index) const { return NULL; };
|
||||
virtual bool setValue(const Bstr* _value, int _index, bool ifcopy = false) { return true; };
|
||||
virtual bool addValue(const Bstr* _value, int _index, bool ifcopy = false) { return true; };
|
||||
virtual bool subValue(int _index, bool ifdel = false) { return true;};
|
||||
virtual void setPrev(Node* _prev) {};
|
||||
virtual void setNext(Node* _next) {};
|
||||
virtual void Virtual() = 0;
|
||||
virtual void Normal() = 0;
|
||||
virtual unsigned getSize() const = 0; //return all memory owned
|
||||
virtual Node* split(Node* _father, int _index) = 0;
|
||||
virtual Node* coalesce(Node* _father, int _index) = 0;
|
||||
virtual void release() = 0; //release the node, only remain necessary information
|
||||
virtual ~Node() {};
|
||||
virtual void print(std::string s) = 0; //DEBUG(print the Node)
|
||||
};
|
||||
|
||||
/*NOTICE(operations in release())
|
||||
*To save memory, we can only remain store and flag(childs added for Leaf).
|
||||
*However, in this way childs'pointers is ok to change, use Node** or Node*& is also nonsense
|
||||
*because the pointer variable may die.
|
||||
*Another way is only to release dynamic memory, and store thw whole, read the Bstr only to
|
||||
*build. In this way nodes'pointer doesn't change, and operation is simplified, while memory
|
||||
*is consumed a bit more. Because Bstrs consume the most memory, and memory-disk swapping is
|
||||
*the most time-consuming thing, it seems to be a better way.
|
||||
*WARN:when a node is in memory and not deleted, its basic content is always being! If nodes are
|
||||
*really too many, this will cause disaster because we can't swap them out until tree is closed!
|
||||
*To solve this problem, there should be two types of release-function: one to release Bstr, one
|
||||
*to release the whole(pointer is invalid and rebuild problem)
|
||||
*/
|
||||
|
||||
#endif
|
||||
|
|
@ -1,637 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: Storage.cpp
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:43
|
||||
# Description: achieve functions in Storage.h
|
||||
=============================================================================*/
|
||||
|
||||
#include "Storage.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Storage::Storage()
|
||||
{ //not use ../logs/, notice the location of program
|
||||
cur_block_num = SET_BLOCK_NUM;
|
||||
filepath = "";
|
||||
freelist = NULL;
|
||||
treefp = NULL;
|
||||
minheap = NULL;
|
||||
freemem = MAX_BUFFER_SIZE;
|
||||
}
|
||||
|
||||
Storage::Storage(string& _filepath, string& _mode, unsigned* _height)
|
||||
{
|
||||
cur_block_num = SET_BLOCK_NUM; //initialize
|
||||
this->filepath = _filepath;
|
||||
if(_mode == string("build"))
|
||||
treefp = fopen(_filepath.c_str(), "w+b");
|
||||
else if(_mode == string("open"))
|
||||
treefp = fopen(_filepath.c_str(), "r+b");
|
||||
else
|
||||
{
|
||||
print(string("error in Storage: Invalid mode ") + _mode);
|
||||
return;
|
||||
}
|
||||
if(treefp == NULL)
|
||||
{
|
||||
print(string("error in Storage: Open error ") + _filepath);
|
||||
return;
|
||||
}
|
||||
this->treeheight = _height; //originally set to 0
|
||||
this->freemem = MAX_BUFFER_SIZE;
|
||||
this->freelist = new BlockInfo; //null-head
|
||||
unsigned i, j, k; //j = (SuperNum-1)*BLOCK_SIZE
|
||||
BlockInfo* bp;
|
||||
if(_mode == "build")
|
||||
{ //write basic information
|
||||
i = 0;
|
||||
fwrite(&i, sizeof(unsigned), 1, this->treefp); //height
|
||||
fwrite(&i, sizeof(unsigned), 1, this->treefp); //rootnum
|
||||
fwrite(&cur_block_num, sizeof(unsigned), 1, this->treefp); //current block num
|
||||
fseek(this->treefp, BLOCK_SIZE, SEEK_SET);
|
||||
bp = this->freelist;
|
||||
j = cur_block_num / 8;
|
||||
for(i = 0; i < j; ++i)
|
||||
{
|
||||
fputc(0, this->treefp);
|
||||
for(k = 0; k < 8; ++k)
|
||||
{
|
||||
bp->next = new BlockInfo(i*8+k+1, NULL);
|
||||
bp = bp->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
else //_mode == "open"
|
||||
{
|
||||
//read basic information
|
||||
int rootnum;
|
||||
char c;
|
||||
fread(this->treeheight, sizeof(unsigned), 1, this->treefp);
|
||||
fread(&rootnum, sizeof(unsigned), 1, this->treefp);
|
||||
fread(&cur_block_num, sizeof(unsigned), 1, this->treefp);
|
||||
fseek(this->treefp, BLOCK_SIZE, SEEK_SET);
|
||||
bp = this->freelist;
|
||||
j = cur_block_num / 8;
|
||||
for(i = 0; i < j; ++i)
|
||||
{
|
||||
c = fgetc(treefp);
|
||||
for(k = 0; k < 8; ++k)
|
||||
{
|
||||
if((c & (1 << k)) == 0)
|
||||
{
|
||||
bp->next = new BlockInfo(i*8+7-k+1, NULL);
|
||||
bp = bp->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
fseek(treefp, Address(rootnum), SEEK_SET);
|
||||
//treefp is now ahead of root-block
|
||||
}
|
||||
this->minheap = new Heap(HEAP_SIZE);
|
||||
}
|
||||
|
||||
bool
|
||||
Storage::preRead(Node*& _root, Node*& _leaves_head, Node*& _leaves_tail) //pre-read and build whole tree
|
||||
{ //set root(in memory) and leaves_head
|
||||
//TODO: false when exceed memory
|
||||
_leaves_tail = _leaves_head = _root = NULL;
|
||||
if(ftell(this->treefp) == 0) //root is null
|
||||
{
|
||||
return true;
|
||||
}
|
||||
unsigned next, store, j, pos = 0;
|
||||
unsigned h = *this->treeheight;
|
||||
Node* p;
|
||||
//read root node
|
||||
this->createNode(p);
|
||||
_root = p;
|
||||
fread(&next, sizeof(unsigned), 1, treefp);
|
||||
//use stack to achieve
|
||||
long address[h]; //current address
|
||||
unsigned used[h]; //used child num
|
||||
unsigned total[h]; //total child num
|
||||
unsigned block[h]; //next block num
|
||||
Node* nodes[h];
|
||||
address[pos] = ftell(treefp);
|
||||
used[pos] = 0;
|
||||
total[pos]= p->getNum() + 1;
|
||||
block[pos] = next;
|
||||
nodes[pos] = p;
|
||||
pos++;
|
||||
Node* prev = NULL;
|
||||
while(pos > 0)
|
||||
{
|
||||
j = pos - 1;
|
||||
if(nodes[j]->isLeaf() || used[j] == total[j]) //LeafNode or ready IntlNode
|
||||
{
|
||||
if(nodes[j]->isLeaf())
|
||||
{
|
||||
if(prev != NULL)
|
||||
{
|
||||
prev->setNext(nodes[j]);
|
||||
nodes[j]->setPrev(prev);
|
||||
}
|
||||
prev = nodes[j];
|
||||
}
|
||||
pos--;
|
||||
continue;
|
||||
}
|
||||
fseek(this->treefp, address[j], SEEK_SET);
|
||||
fread(&store, sizeof(unsigned), 1, treefp);
|
||||
this->ReadAlign(block + j);
|
||||
address[j] = ftell(treefp);
|
||||
fseek(treefp, Address(store), SEEK_SET);
|
||||
this->createNode(p);
|
||||
nodes[j]->setChild(p, used[j]);
|
||||
used[j]++;
|
||||
fread(&next, sizeof(unsigned), 1, treefp);
|
||||
address[pos] = ftell(treefp);
|
||||
used[pos] = 0;
|
||||
total[pos] = p->getNum() + 1;
|
||||
block[pos] = next;
|
||||
nodes[pos] = p;
|
||||
pos++;
|
||||
}
|
||||
//set leaves and read root, which is always keeped in-mem
|
||||
p = _root;
|
||||
while(!p->isLeaf())
|
||||
{
|
||||
p = p->getChild(0);
|
||||
}
|
||||
_leaves_head = p;
|
||||
p = _root;
|
||||
while(!p->isLeaf())
|
||||
{
|
||||
p = p->getChild(p->getNum());
|
||||
}
|
||||
_leaves_tail = p;
|
||||
int memory = 0;
|
||||
this->readNode(_root, &memory);
|
||||
this->request(memory);
|
||||
return true;
|
||||
}
|
||||
|
||||
long //8-byte in 64-bit machine
|
||||
Storage::Address(unsigned _blocknum) const //BETTER: inline function
|
||||
{
|
||||
if(_blocknum == 0)
|
||||
return 0;
|
||||
else if(_blocknum > cur_block_num)
|
||||
{
|
||||
//print(string("error in Address: Invalid blocknum ") + Util::int2string(_blocknum));
|
||||
return -1; //address should be non-negative
|
||||
}
|
||||
//NOTICE: here should explictly use long
|
||||
return (long)(this->SuperNum+_blocknum-1) * (long)BLOCK_SIZE;
|
||||
}
|
||||
|
||||
unsigned
|
||||
Storage::Blocknum(long address) const
|
||||
{
|
||||
return (address/BLOCK_SIZE) + 1 - this->SuperNum;
|
||||
}
|
||||
|
||||
unsigned
|
||||
Storage::AllocBlock()
|
||||
{
|
||||
BlockInfo* p = this->freelist->next;
|
||||
if(p == NULL)
|
||||
{
|
||||
for(unsigned i = 0; i < SET_BLOCK_INC; ++i)
|
||||
{
|
||||
cur_block_num++; //BETTER: check if > MAX_BLOCK_NUM
|
||||
this->FreeBlock(cur_block_num);
|
||||
}
|
||||
p = this->freelist->next;
|
||||
}
|
||||
unsigned t = p->num;
|
||||
this->freelist->next = p->next;
|
||||
delete p;
|
||||
return t;
|
||||
}
|
||||
|
||||
void
|
||||
Storage::FreeBlock(unsigned _blocknum)
|
||||
{ //QUERY: head-sub and tail-add will be better?
|
||||
BlockInfo* bp = new BlockInfo(_blocknum, this->freelist->next);
|
||||
this->freelist->next = bp;
|
||||
}
|
||||
|
||||
void
|
||||
Storage::ReadAlign(unsigned* _next)
|
||||
{
|
||||
if(ftell(treefp) % BLOCK_SIZE == 0)
|
||||
{
|
||||
fseek(treefp, Address(*_next), SEEK_SET);
|
||||
fread(_next, sizeof(unsigned), 1, treefp);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Storage::WriteAlign(unsigned* _curnum, bool& _SpecialBlock)
|
||||
{
|
||||
if(ftell(treefp) % BLOCK_SIZE == 0)
|
||||
{
|
||||
unsigned blocknum = this->AllocBlock();
|
||||
fseek(treefp, Address(*_curnum), SEEK_SET);
|
||||
if(_SpecialBlock)
|
||||
{
|
||||
fseek(treefp, 4, SEEK_CUR);
|
||||
_SpecialBlock = false;
|
||||
}
|
||||
fwrite(&blocknum, sizeof(unsigned), 1, treefp);
|
||||
fseek(treefp, Address(blocknum)+4, SEEK_SET);
|
||||
*_curnum = blocknum;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
Storage::readNode(Node* _np, int* _request)
|
||||
{
|
||||
if(_np == NULL || _np->inMem())
|
||||
return false; //can't read or needn't
|
||||
fseek(treefp, Address(_np->getStore()), SEEK_SET);
|
||||
bool flag = _np->isLeaf();
|
||||
unsigned next;
|
||||
unsigned i, num = _np->getNum();
|
||||
Bstr bstr;
|
||||
fseek(treefp, 4, SEEK_CUR);
|
||||
fread(&next, sizeof(unsigned), 1, treefp);
|
||||
//read data, use readBstr...
|
||||
//fread(treefp, "%u", &num);
|
||||
//_np->setNum(num);
|
||||
if(flag)
|
||||
*_request += Node::LEAF_SIZE;
|
||||
else
|
||||
*_request += Node::INTL_SIZE;
|
||||
_np->Normal();
|
||||
if(!flag)
|
||||
fseek(treefp, 4 * (num + 1), SEEK_CUR);
|
||||
for(i = 0; i < num; ++i)
|
||||
{
|
||||
this->readBstr(&bstr, &next);
|
||||
_np->setKey(&bstr, i);
|
||||
}
|
||||
if(flag)
|
||||
{
|
||||
for(i = 0; i < num; ++i)
|
||||
{
|
||||
this->readBstr(&bstr, &next);
|
||||
*_request += bstr.getLen();
|
||||
_np->setValue(&bstr, i);
|
||||
}
|
||||
}
|
||||
//_np->setFlag((_np->getFlag() & ~Node::NF_IV & ~Node::NF_ID) | Node::NF_IM);
|
||||
//_np->delVirtual();
|
||||
_np->delDirty();
|
||||
//_np->setMem();
|
||||
this->updateHeap(_np, _np->getRank(), false);
|
||||
bstr.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Storage::createNode(Node*& _np) //cretae virtual nodes, not in-mem
|
||||
{
|
||||
/*
|
||||
if(ftell(this->treefp)== 0) //null root
|
||||
{
|
||||
_np = NULL;
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
unsigned t; //QUERY: maybe next-flag... will be better-storage?
|
||||
bool flag = false; //IntlNode
|
||||
fread(&t, sizeof(unsigned), 1, treefp);
|
||||
if((t & Node::NF_IL) > 0) //WARN: according to setting
|
||||
flag = true; //LeafNode
|
||||
if(flag)
|
||||
{
|
||||
//this->request(sizeof(LeafNode));
|
||||
_np = new LeafNode(true);
|
||||
}
|
||||
else
|
||||
{
|
||||
//this->request(sizeof(IntlNode));
|
||||
_np = new IntlNode(true);
|
||||
}
|
||||
//fseek(treefp, -4, SEEK_CUR);
|
||||
//_np->setFlag(_np->getFlag() | (t & Node::NF_RK));
|
||||
//_np->setRank(t);
|
||||
_np->setFlag(t);
|
||||
_np->delDirty();
|
||||
_np->delMem();
|
||||
_np->setStore(Blocknum(ftell(treefp)-4));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Storage::writeNode(Node* _np)
|
||||
{
|
||||
if(_np == NULL || !_np->inMem() || (_np->getRank() > 0 && !_np->isDirty()))
|
||||
return false; //not need to write back
|
||||
unsigned num = _np->getNum(), i;
|
||||
bool flag = _np->isLeaf(), SpecialBlock = true;
|
||||
/*
|
||||
if(!flag)
|
||||
{
|
||||
for(i = 0; i <= num; ++i)
|
||||
if(_np->getChild(i)->isDirty())
|
||||
return false; //NOTICE: all childs must be clean!
|
||||
}
|
||||
*/
|
||||
//to release original blocks
|
||||
unsigned store = _np->getStore(), next;
|
||||
//if first store is 0, meaning a new node
|
||||
fseek(this->treefp, Address(store)+4, SEEK_SET);
|
||||
fread(&next, sizeof(unsigned), 1, treefp);
|
||||
while(store != 0)
|
||||
{
|
||||
this->FreeBlock(store);
|
||||
store = next;
|
||||
fseek(treefp, Address(store), SEEK_SET);
|
||||
fread(&next, sizeof(unsigned), 1, treefp);
|
||||
}
|
||||
if(num == 0)
|
||||
return true; //node is empty!
|
||||
unsigned t;
|
||||
//write Node information
|
||||
unsigned blocknum = this->AllocBlock();
|
||||
_np->setStore(blocknum);
|
||||
long address = this->Address(blocknum);
|
||||
fseek(this->treefp, address, SEEK_SET);
|
||||
t = _np->getFlag();
|
||||
fwrite(&t, sizeof(unsigned), 1, treefp); //DEBUG
|
||||
fseek(treefp, 4, SEEK_CUR);
|
||||
if(!flag)
|
||||
{
|
||||
for(i = 0; i <= num; ++i)
|
||||
{
|
||||
t = _np->getChild(i)->getStore();
|
||||
fwrite(&t, sizeof(unsigned), 1, treefp); //DEBUG
|
||||
this->WriteAlign(&blocknum, SpecialBlock);
|
||||
}
|
||||
}
|
||||
for(i = 0; i < num; ++i)
|
||||
this->writeBstr(_np->getKey(i), &blocknum, SpecialBlock);
|
||||
if(flag)
|
||||
{
|
||||
for(i = 0; i < num; ++i)
|
||||
this->writeBstr(_np->getValue(i), &blocknum, SpecialBlock);
|
||||
}
|
||||
fseek(treefp, Address(blocknum), SEEK_SET);
|
||||
if(SpecialBlock)
|
||||
fseek(treefp, 4, SEEK_CUR);
|
||||
t = 0;
|
||||
fwrite(&t, sizeof(unsigned), 1, treefp); //the end-block
|
||||
//_np->setFlag(_np->getFlag() & ~Node::NF_ID);
|
||||
_np->delDirty();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Storage::readBstr(Bstr* _bp, unsigned* _next)
|
||||
{
|
||||
//long address;
|
||||
unsigned len, i, j;
|
||||
fread(&len, sizeof(unsigned), 1, this->treefp);
|
||||
this->ReadAlign(_next);
|
||||
//this->request(len);
|
||||
char* s = (char*)malloc(len);
|
||||
_bp->setLen(len);
|
||||
for(i = 0; i + 4 < len; i += 4)
|
||||
{
|
||||
fread(s+i, sizeof(char), 4, treefp);
|
||||
this->ReadAlign(_next);
|
||||
}
|
||||
while(i < len)
|
||||
{
|
||||
fread(s+i, sizeof(char), 1, treefp); //BETTER
|
||||
i++;
|
||||
}
|
||||
j = len % 4;
|
||||
if(j > 0)
|
||||
j = 4 - j;
|
||||
fseek(treefp, j, SEEK_CUR);
|
||||
this->ReadAlign(_next);
|
||||
_bp->setStr(s);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Storage::writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock)
|
||||
{
|
||||
unsigned i, j, len = _bp->getLen();
|
||||
fwrite(&len, sizeof(unsigned), 1, treefp);
|
||||
this->WriteAlign(_curnum, _SpecialBlock);
|
||||
char* s = _bp->getStr();
|
||||
for(i = 0; i + 4 < len; i += 4)
|
||||
{
|
||||
fwrite(s+i, sizeof(char), 4, treefp);
|
||||
this->WriteAlign(_curnum, _SpecialBlock);
|
||||
}
|
||||
while(i < len)
|
||||
{
|
||||
fwrite(s+i, sizeof(char), 1, treefp);
|
||||
i++;
|
||||
}
|
||||
j = len % 4;
|
||||
if(j > 0)
|
||||
j = 4 - j;
|
||||
fseek(treefp, j, SEEK_CUR);
|
||||
this->WriteAlign(_curnum, _SpecialBlock);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Storage::writeTree(Node* _root) //write the whole tree back and close treefp
|
||||
{
|
||||
fseek(this->treefp, 0, SEEK_SET);
|
||||
fwrite(this->treeheight, sizeof(unsigned), 1, treefp);
|
||||
//delete all nonsense-node in heap, otherwise will waste storage permanently
|
||||
Node* p;
|
||||
while(1)
|
||||
{ //all non-sense nodes will be in-head-area, due to minimal rank
|
||||
p = minheap->getTop();
|
||||
if(p == NULL) //heap is empty, only when root==NULL
|
||||
break;
|
||||
if(p->getRank() == 0) //indicate non-sense node
|
||||
{
|
||||
this->minheap->remove();
|
||||
this->writeNode(p);
|
||||
delete p;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
unsigned i, j, t;
|
||||
//QUERY: another way to write all nodes back is to print out all nodes in heap
|
||||
//but this method will cause no node in heap any more, while operations may be
|
||||
//afetr tree-saving. Which method is better?
|
||||
//write nodes recursively using stack, including root-num
|
||||
if(_root != NULL)
|
||||
{
|
||||
Node* p = _root;
|
||||
unsigned h = *this->treeheight, pos = 0;
|
||||
Node* ns[h];
|
||||
int ni[h];
|
||||
ns[pos] = p;
|
||||
ni[pos] = p->getNum();
|
||||
pos++;
|
||||
while(pos > 0)
|
||||
{
|
||||
j = pos - 1;
|
||||
p = ns[j];
|
||||
if(p->isLeaf() || ni[j] < 0) //leaf or all childs are ready
|
||||
{
|
||||
this->writeNode(p);
|
||||
pos--;
|
||||
continue;
|
||||
}
|
||||
ns[pos] = p->getChild(ni[j]);
|
||||
ni[pos] = ns[pos]->getNum();
|
||||
pos++;
|
||||
ni[j]--;
|
||||
}
|
||||
t = _root->getStore();
|
||||
}
|
||||
else
|
||||
t = 0;
|
||||
fseek(this->treefp, 4, SEEK_SET);
|
||||
fwrite(&t, sizeof(unsigned), 1, treefp); //write the root num
|
||||
fwrite(&cur_block_num, sizeof(unsigned), 1, treefp);//write current blocks num
|
||||
fseek(treefp, BLOCK_SIZE, SEEK_SET);
|
||||
j = cur_block_num / 8; //(SuperNum-1)*BLOCK_SIZE;
|
||||
//reset to 1 first
|
||||
for(i = 0; i < j; ++i)
|
||||
{
|
||||
fputc(0xff, treefp);
|
||||
}
|
||||
char c;
|
||||
BlockInfo* bp = this->freelist->next;
|
||||
while(bp != NULL)
|
||||
{
|
||||
//if not-use then set 0, aligned to byte!
|
||||
#ifdef DEBUG_KVSTORE
|
||||
if(bp->num > cur_block_num)
|
||||
{
|
||||
printf("blocks num exceed, cur_block_num: %u\n", cur_block_num);
|
||||
exit(1);
|
||||
}
|
||||
#endif
|
||||
j = bp->num - 1;
|
||||
i = j / 8;
|
||||
j = 7 - j % 8;
|
||||
fseek(treefp, BLOCK_SIZE+i, SEEK_SET);
|
||||
c = fgetc(treefp);
|
||||
fseek(treefp, -1, SEEK_CUR);
|
||||
fputc(c & ~(1 << j), treefp);
|
||||
bp = bp->next;
|
||||
}
|
||||
//fclose(this->treefp);
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
Storage::updateHeap(Node* _np, unsigned _rank, bool _inheap) const
|
||||
{
|
||||
if(_inheap) //already in heap, to modify
|
||||
{
|
||||
unsigned t = _np->getRank();
|
||||
_np->setRank(_rank);
|
||||
if(t < _rank)
|
||||
this->minheap->modify(_np, false);
|
||||
else if(t > _rank)
|
||||
this->minheap->modify(_np, true);
|
||||
else;
|
||||
}
|
||||
else //not in heap, to add
|
||||
{
|
||||
_np->setRank(_rank);
|
||||
this->minheap->insert(_np);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Storage::request(int _needmem) //aligned to byte
|
||||
{ //NOTICE: <0 means release
|
||||
if(_needmem > 0 && this->freemem < (unsigned)_needmem)
|
||||
if(!this->handler(_needmem - freemem)) //disaster in buffer memory
|
||||
{
|
||||
print(string("error in request: out of buffer-mem, now to exit"));
|
||||
exit(1);
|
||||
}
|
||||
this->freemem -= _needmem;
|
||||
}
|
||||
|
||||
bool
|
||||
Storage::handler(unsigned _needmem) //>0
|
||||
{
|
||||
Node* p;
|
||||
unsigned size;
|
||||
//if(_needmem < SET_BUFFER_SIZE) //to recover to SET_BUFFER_SIZE buffer
|
||||
// _needmem = SET_BUFFER_SIZE;
|
||||
while(1)
|
||||
{
|
||||
p = this->minheap->getTop();
|
||||
if(p == NULL)
|
||||
return false; //can't satisfy or can't recover to SET_BUFFER_SIZE
|
||||
this->minheap->remove();
|
||||
size = p->getSize();
|
||||
this->freemem += size;
|
||||
this->writeNode(p);
|
||||
if(p->getNum() > 0)
|
||||
p->Virtual();
|
||||
else
|
||||
delete p; //non-sense node
|
||||
if(_needmem > size)
|
||||
_needmem -= size;
|
||||
else
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Storage::~Storage()
|
||||
{
|
||||
//release heap and freelist...
|
||||
#ifdef DEBUG_KVSTORE
|
||||
printf("now to release the kvstore!\n");
|
||||
#endif
|
||||
BlockInfo* bp = this->freelist;
|
||||
BlockInfo* next;
|
||||
while(bp != NULL)
|
||||
{
|
||||
next = bp->next;
|
||||
delete bp;
|
||||
bp = next;
|
||||
}
|
||||
#ifdef DEBUG_KVSTORE
|
||||
printf("already empty the freelist!\n");
|
||||
#endif
|
||||
delete this->minheap;
|
||||
#ifdef DEBUG_KVSTORE
|
||||
printf("already empty the buffer heap!\n");
|
||||
#endif
|
||||
fclose(this->treefp);
|
||||
//#ifdef DEBUG_KVSTORE
|
||||
// //NOTICE:there is more than one tree
|
||||
// fclose(Util::debug_kvstore); //NULL is ok!
|
||||
// Util::debug_kvstore = NULL;
|
||||
//#endif
|
||||
}
|
||||
|
||||
void
|
||||
Storage::print(string s)
|
||||
{
|
||||
#ifdef DEBUG_KVSTORE
|
||||
fputs(Util::showtime().c_str(), Util::debug_kvstore);
|
||||
fputs("Class Storage\n", Util::debug_kvstore);
|
||||
fputs("Message: ", Util::debug_kvstore);
|
||||
fputs(s.c_str(), Util::debug_kvstore);
|
||||
fputs("\n", Util::debug_kvstore);
|
||||
#endif
|
||||
}
|
||||
|
|
@ -1,74 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: Storage.h
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:43
|
||||
# Description: swap between memory and disk, achieving system-like method
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _KVSTORE_STORAGE_STORAGE_H
|
||||
#define _KVSTORE_STORAGE_STORAGE_H
|
||||
|
||||
#include "../node/IntlNode.h"
|
||||
#include "../node/LeafNode.h"
|
||||
#include "../heap/Heap.h"
|
||||
#include "file.h"
|
||||
|
||||
//It controls read, write, swap
|
||||
class Storage
|
||||
{
|
||||
public:
|
||||
static const unsigned BLOCK_SIZE = 1 << 16; //fixed size of disk-block
|
||||
//there are 18 B+Tree indexes and one vstree index, so set 3G buffer size
|
||||
//static const unsigned long long MAX_BUFFER_SIZE = 0xC0000000; //max buffer size
|
||||
//static const unsigned long long MAX_BUFFER_SIZE = 0x1ffffffff; //max buffer size
|
||||
static const unsigned long long MAX_BUFFER_SIZE = 0xffffffff; //max buffer size
|
||||
//static const unsigned SET_BUFFER_SIZE = 1 << 30; //set buffer size
|
||||
static const unsigned HEAP_SIZE = MAX_BUFFER_SIZE/Node::INTL_SIZE;
|
||||
static const unsigned MAX_BLOCK_NUM = 1 << 24; //max block-num
|
||||
//below two constants: must can be exactly divided by 8
|
||||
static const unsigned SET_BLOCK_NUM = 1 << 8; //initial blocks num
|
||||
static const unsigned SET_BLOCK_INC = SET_BLOCK_NUM; //base of blocks-num inc
|
||||
static const unsigned SuperNum = MAX_BLOCK_NUM/(8*BLOCK_SIZE)+1;
|
||||
//static const unsigned TRANSFER_CAPACITY = BLOCK_SIZE;
|
||||
//enum ReadType { OVER = 0, EXPAND, NORMAL };
|
||||
private:
|
||||
unsigned cur_block_num;
|
||||
std::string filepath;
|
||||
unsigned* treeheight;
|
||||
BlockInfo* freelist;
|
||||
FILE* treefp; //file: tree nodes
|
||||
Heap* minheap; //heap of Nodes's pointer, sorted in NF_RK
|
||||
//NOTICE: freemem's type is long long here, due to large memory in server.
|
||||
//However, needmem in handler() and request() is ok to be int/unsigned.
|
||||
//Because the bstr' size is controlled, so is the node.
|
||||
unsigned long long freemem; //free memory to use, non-negative
|
||||
//unsigned long long time; //QUERY(achieving an old-swap startegy?)
|
||||
long Address(unsigned _blocknum) const;
|
||||
unsigned Blocknum(long address) const;
|
||||
unsigned AllocBlock();
|
||||
void FreeBlock(unsigned _blocknum);
|
||||
void ReadAlign(unsigned* _next);
|
||||
void WriteAlign(unsigned* _next, bool& _SpecialBlock);
|
||||
|
||||
public:
|
||||
Storage();
|
||||
Storage(std::string& _filepath, std::string& _mode, unsigned* _height);//create a fixed-size file or open an existence
|
||||
bool preRead(Node*& _root, Node*& _leaves_head, Node*& _leaves_tail); //read and build all nodes, only root in memory
|
||||
bool readNode(Node* _np, int* _request); //read, if virtual
|
||||
bool createNode(Node*& _np); //use fp to create a new node
|
||||
//NOTICE(if children and child not exist, build children's Nodes)
|
||||
bool writeNode(Node* _np);
|
||||
bool readBstr(Bstr* _bp, unsigned* _next);
|
||||
bool writeBstr(const Bstr* _bp, unsigned* _curnum, bool& _SpecialBlock);
|
||||
bool writeTree(Node* _np);
|
||||
void updateHeap(Node* _np, unsigned _rank, bool _inheap) const;
|
||||
void request(int _needmem); //deal with memory request
|
||||
bool handler(unsigned _needmem); //swap some nodes out
|
||||
//bool update(); //update InMem Node's rank, with clock
|
||||
~Storage();
|
||||
void print(std::string s); //DEBUG
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
@ -1,90 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: file.h
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:42
|
||||
# Description: disk file memlayout
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _KVSTORE_STORAGE_FILE_H
|
||||
#define _KVSTORE_STORAGE_FILE_H
|
||||
|
||||
#include "../../Util/Util.h"
|
||||
|
||||
/******** manage the disk-file as Blocks-list ********/
|
||||
/* All blocks of given file are viewed as an array, extended dynamicly
|
||||
* The first block(0, super-block) includes the information
|
||||
* about the whole file(for example, the root Node's Block),
|
||||
* especially a bitset use[MAXBN] which is used to recognize which
|
||||
* block is in use
|
||||
* In practical, the normal block numbers from 1 to MAXBN-1,
|
||||
* so 0 can be used as division.
|
||||
* (normal block is made of header and data)
|
||||
* When file opened, program must read this bitset and create an
|
||||
* freelist(several 10Ms memory),
|
||||
* and remember to write back the bitset when closing.
|
||||
* We store each Tree-Node as a unit, which may contain several
|
||||
* blocks, not requiring continuous.
|
||||
* While the tree is not closed, better to keep root Node in
|
||||
* memory all the time.
|
||||
*/
|
||||
|
||||
/*
|
||||
struct Header
|
||||
{ //this is the header information at the
|
||||
//beginning of each block, then the data
|
||||
//
|
||||
//If this the first block of a node, we must also store
|
||||
* the necessary information about the node. For example,
|
||||
* a bit indicates whether a leaf-node, deciding how it
|
||||
* should be read. The first block's prev and the
|
||||
//final block's next should be 0
|
||||
//blockaddr_t prev;
|
||||
blockaddr_t next; //WARN(maybe larger type!)
|
||||
//unsigned short end; //valid data:0~end
|
||||
};
|
||||
|
||||
struct SuperBlock //SuperNum blocks, numbered 0
|
||||
{
|
||||
unsigned height;
|
||||
unsigned rootnum; //use a whole block, may store other information
|
||||
//for example, nodes's num
|
||||
char use[BNWD]; //exactly SuperNum-1 blocks
|
||||
};
|
||||
|
||||
//numbered from 1 to MAX_BLOCK_NUM
|
||||
struct Node //may use several blocks, not must continuously
|
||||
{
|
||||
unsigned flag; //only in first block, special-block
|
||||
unsigned next; //each real data-block, 0 means the end
|
||||
information:
|
||||
unsigned num;
|
||||
unsigned childs[]; //only in IntlNodes
|
||||
Bstr keys[];
|
||||
Bstr values[]; //only in LeafNodes
|
||||
};
|
||||
*/
|
||||
|
||||
//When stored in disk, every Node* pointer should be changed to block-address
|
||||
//(a bit indicates whether a leaf!),
|
||||
//and char* should be changed to the real string.
|
||||
|
||||
class BlockInfo
|
||||
{
|
||||
public:
|
||||
unsigned num;
|
||||
BlockInfo* next;
|
||||
BlockInfo()
|
||||
{
|
||||
num = 0;
|
||||
next = NULL;
|
||||
}
|
||||
BlockInfo(unsigned _num, BlockInfo* _bp)
|
||||
{
|
||||
num = _num;
|
||||
next = _bp;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
@ -1,689 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: Tree.cpp
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:45
|
||||
# Description: achieve functions in Tree.h
|
||||
=============================================================================*/
|
||||
|
||||
#include "Tree.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
//tree's operations should be atom(if read nodes)
|
||||
//sum the request and send to Storage at last
|
||||
//ensure that all nodes operated are in memory
|
||||
int request = 0;
|
||||
|
||||
Tree::Tree()
|
||||
{
|
||||
height = 0;
|
||||
mode = "";
|
||||
root = NULL;
|
||||
leaves_head = NULL;
|
||||
leaves_tail = NULL;
|
||||
TSM = NULL;
|
||||
storepath = "";
|
||||
filename = "";
|
||||
transfer_size[0] = transfer_size[1] = transfer_size[2] = 0;
|
||||
this->stream = NULL;
|
||||
}
|
||||
|
||||
Tree::Tree(const string& _storepath, const string& _filename, const char* _mode)
|
||||
{
|
||||
storepath = _storepath;
|
||||
filename = _filename;
|
||||
this->height = 0;
|
||||
this->mode = string(_mode);
|
||||
string filepath = this->getFilePath();
|
||||
TSM = new Storage(filepath, this->mode, &this->height);
|
||||
if(this->mode == "open")
|
||||
this->TSM->preRead(this->root, this->leaves_head, this->leaves_tail);
|
||||
else
|
||||
this->root = NULL;
|
||||
this->transfer[0].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
this->transfer[1].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
this->transfer[2].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
this->transfer_size[0] = this->transfer_size[1] = this->transfer_size[2] = Util::TRANSFER_SIZE; //initialied to 1M
|
||||
this->stream = NULL;
|
||||
}
|
||||
|
||||
string
|
||||
Tree::getFilePath()
|
||||
{
|
||||
return storepath+"/"+filename;
|
||||
}
|
||||
|
||||
void //WARN: not check _str and _len
|
||||
Tree::CopyToTransfer(const char* _str, unsigned _len, unsigned _index)
|
||||
{
|
||||
if(_index > 2)
|
||||
return;
|
||||
/*
|
||||
if(_str == NULL || _len == 0)
|
||||
{
|
||||
printf("error in CopyToTransfer: empty string\n");
|
||||
return;
|
||||
}
|
||||
*/
|
||||
//unsigned length = _bstr->getLen();
|
||||
unsigned length = _len;
|
||||
if(length + 1 > this->transfer_size[_index])
|
||||
{
|
||||
transfer[_index].release();
|
||||
transfer[_index].setStr((char*)malloc(length+1));
|
||||
this->transfer_size[_index] = length + 1; //one more byte: convenient to add \0
|
||||
}
|
||||
memcpy(this->transfer[_index].getStr(), _str, length);
|
||||
this->transfer[_index].getStr()[length] = '\0'; //set for string() in KVstore
|
||||
this->transfer[_index].setLen(length);
|
||||
}
|
||||
|
||||
unsigned
|
||||
Tree::getHeight() const
|
||||
{
|
||||
return this->height;
|
||||
}
|
||||
|
||||
void
|
||||
Tree::setHeight(unsigned _h)
|
||||
{
|
||||
this->height = _h;
|
||||
}
|
||||
|
||||
Node*
|
||||
Tree::getRoot() const
|
||||
{
|
||||
return this->root;
|
||||
}
|
||||
|
||||
void
|
||||
Tree::prepare(Node* _np) const
|
||||
{
|
||||
bool flag = _np->inMem();
|
||||
if(!flag)
|
||||
this->TSM->readNode(_np, &request); //readNode deal with request
|
||||
}
|
||||
|
||||
bool
|
||||
Tree::search(const char* _str1, unsigned _len1, char*& _str2, int& _len2)
|
||||
{
|
||||
const Bstr* value = NULL;
|
||||
if(_str1 == NULL || _len1 == 0)
|
||||
{
|
||||
printf("error in Tree-search: empty string\n");
|
||||
return false;
|
||||
}
|
||||
this->CopyToTransfer(_str1, _len1, 1);
|
||||
bool ret = this->search(&transfer[1], value);
|
||||
if(ret)
|
||||
{
|
||||
_str2 = value->getStr();
|
||||
_len2 = value->getLen();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool
|
||||
Tree::search(const Bstr* _key, const Bstr*& _value)
|
||||
{
|
||||
request = 0;
|
||||
Bstr bstr = *_key; //not to modify its memory
|
||||
int store;
|
||||
Node* ret = this->find(_key, &store, false);
|
||||
if(ret == NULL || store == -1 || bstr != *(ret->getKey(store))) //tree is empty or not found
|
||||
{
|
||||
bstr.clear();
|
||||
return false;
|
||||
}
|
||||
const Bstr* val = ret->getValue(store);
|
||||
this->CopyToTransfer(val->getStr(), val->getLen(), 0); //not sum to request
|
||||
_value = &transfer[0];
|
||||
this->TSM->request(request);
|
||||
bstr.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Tree::insert(const char* _str1, unsigned _len1, const char* _str2, unsigned _len2)
|
||||
{
|
||||
if(_str1 == NULL || _len1 == 0)
|
||||
{
|
||||
printf("error in Tree-insert: empty string\n");
|
||||
return false;
|
||||
}
|
||||
this->CopyToTransfer(_str1, _len1, 1);
|
||||
this->CopyToTransfer(_str2, _len2, 2); //not check value
|
||||
bool ret = this->insert(&transfer[1], &transfer[2]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool
|
||||
Tree::insert(const Bstr* _key, const Bstr* _value)
|
||||
{
|
||||
request = 0;
|
||||
Node* ret;
|
||||
if(this->root == NULL) //tree is empty
|
||||
{
|
||||
leaves_tail = leaves_head = root = new LeafNode;
|
||||
request += Node::LEAF_SIZE;
|
||||
this->height = 1;
|
||||
root->setHeight(1); //add to heap later
|
||||
}
|
||||
//this->prepare(this->root); //root must be in-mem
|
||||
if(root->getNum() == Node::MAX_KEY_NUM)
|
||||
{
|
||||
Node* father = new IntlNode;
|
||||
request += Node::INTL_SIZE;
|
||||
father->addChild(root, 0);
|
||||
ret = root->split(father, 0);
|
||||
if(ret->isLeaf() && ret->getNext() == NULL)
|
||||
this->leaves_tail = ret;
|
||||
if(ret->isLeaf())
|
||||
request += Node::LEAF_SIZE;
|
||||
else
|
||||
request += Node::INTL_SIZE;
|
||||
this->height++; //height rises only when root splits
|
||||
//WARN: height area in Node: 4 bit!
|
||||
father->setHeight(this->height); //add to heap later
|
||||
this->TSM->updateHeap(ret, ret->getRank(), false);
|
||||
this->root = father;
|
||||
}
|
||||
Node* p = this->root;
|
||||
Node* q;
|
||||
int i, j;
|
||||
Bstr bstr = *_key;
|
||||
while(!p->isLeaf())
|
||||
{
|
||||
//j = p->getNum();
|
||||
//for(i = 0; i < j; ++i)
|
||||
//if(bstr < *(p->getKey(i)))
|
||||
//break;
|
||||
//NOTICE: using binary search is better here
|
||||
i = p->searchKey_less(bstr);
|
||||
|
||||
q = p->getChild(i);
|
||||
this->prepare(q);
|
||||
if(q->getNum() == Node::MAX_KEY_NUM)
|
||||
{
|
||||
ret = q->split(p, i);
|
||||
if(ret->isLeaf() && ret->getNext() == NULL)
|
||||
this->leaves_tail = ret;
|
||||
if(ret->isLeaf())
|
||||
request += Node::LEAF_SIZE;
|
||||
else
|
||||
request += Node::INTL_SIZE;
|
||||
//BETTER: in loop may update multiple times
|
||||
this->TSM->updateHeap(ret, ret->getRank(), false);
|
||||
this->TSM->updateHeap(q, q->getRank(), true);
|
||||
this->TSM->updateHeap(p, p->getRank(), true);
|
||||
if(bstr < *(p->getKey(i)))
|
||||
p = q;
|
||||
else
|
||||
p = ret;
|
||||
}
|
||||
else
|
||||
{
|
||||
p->setDirty();
|
||||
this->TSM->updateHeap(p, p->getRank(), true);
|
||||
p = q;
|
||||
}
|
||||
}
|
||||
//j = p->getNum();
|
||||
//for(i = 0; i < j; ++i)
|
||||
//if(bstr < *(p->getKey(i)))
|
||||
//break;
|
||||
i = p->searchKey_less(bstr);
|
||||
|
||||
//insert existing key is ok, but not inserted in
|
||||
//however, the tree-shape may change due to possible split in former code
|
||||
bool ifexist = false;
|
||||
if(i > 0 && bstr == *(p->getKey(i-1)))
|
||||
ifexist = true;
|
||||
else
|
||||
{
|
||||
p->addKey(_key, i, true);
|
||||
p->addValue(_value, i, true);
|
||||
p->addNum();
|
||||
request += (_key->getLen() + _value->getLen());
|
||||
p->setDirty();
|
||||
this->TSM->updateHeap(p, p->getRank(), true);
|
||||
//_key->clear();
|
||||
//_value->clear();
|
||||
}
|
||||
this->TSM->request(request);
|
||||
bstr.clear(); //NOTICE: must be cleared!
|
||||
return !ifexist; //QUERY(which case:return false)
|
||||
}
|
||||
|
||||
bool
|
||||
Tree::modify(const char* _str1, unsigned _len1, const char* _str2, unsigned _len2)
|
||||
{
|
||||
if(_str1 == NULL || _len1 == 0)
|
||||
{
|
||||
printf("error in Tree-modify: empty string\n");
|
||||
return false;
|
||||
}
|
||||
this->CopyToTransfer(_str1, _len1, 1);
|
||||
this->CopyToTransfer(_str2, _len2, 2); //not check value
|
||||
bool ret = this->modify(&transfer[1], &transfer[2]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool
|
||||
Tree::modify(const Bstr* _key, const Bstr* _value)
|
||||
{
|
||||
request = 0;
|
||||
Bstr bstr = *_key;
|
||||
int store;
|
||||
Node* ret = this->find(_key, &store, true);
|
||||
if(ret == NULL || store == -1 || bstr != *(ret->getKey(store))) //tree is empty or not found
|
||||
{
|
||||
bstr.clear();
|
||||
return false;
|
||||
}
|
||||
unsigned len = ret->getValue(store)->getLen();
|
||||
ret->setValue(_value, store, true);
|
||||
request += (_value->getLen()-len);
|
||||
//_value->clear();
|
||||
ret->setDirty();
|
||||
this->TSM->request(request);
|
||||
bstr.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
//this function is useful for search and modify, and range-query
|
||||
Node* //return the first key's position that >= *_key
|
||||
Tree::find(const Bstr* _key, int* _store, bool ifmodify) const
|
||||
{ //to assign value for this->bstr, function shouldn't be const!
|
||||
if(this->root == NULL)
|
||||
return NULL; //Tree Is Empty
|
||||
Node* p = root;
|
||||
int i, j;
|
||||
Bstr bstr = *_key; //local Bstr: multiple delete
|
||||
while(!p->isLeaf())
|
||||
{
|
||||
if(ifmodify)
|
||||
p->setDirty();
|
||||
//j = p->getNum();
|
||||
//for(i = 0; i < j; ++i) //BETTER(Binary-Search)
|
||||
//if(bstr < *(p->getKey(i)))
|
||||
//break;
|
||||
i = p->searchKey_less(bstr);
|
||||
|
||||
p = p->getChild(i);
|
||||
this->prepare(p);
|
||||
}
|
||||
|
||||
j = p->getNum();
|
||||
//for(i = 0; i < j; ++i)
|
||||
//if(bstr <= *(p->getKey(i)))
|
||||
//break;
|
||||
i = p->searchKey_lessEqual(bstr);
|
||||
|
||||
if(i == j)
|
||||
*_store = -1; //Not Found
|
||||
else
|
||||
*_store = i;
|
||||
bstr.clear();
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
Node*
|
||||
Tree::find(unsigned _len, const char* _str, int* store) const
|
||||
{
|
||||
}
|
||||
*/
|
||||
|
||||
bool
|
||||
Tree::remove(const char* _str, unsigned _len)
|
||||
{
|
||||
if(_str == NULL || _len == 0)
|
||||
{
|
||||
printf("error in Tree-remove: empty string\n");
|
||||
return false;
|
||||
}
|
||||
this->CopyToTransfer(_str, _len, 1);
|
||||
bool ret = this->remove(&transfer[1]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool //BETTER: if not found, the road are also dirty! find first?
|
||||
Tree::remove(const Bstr* _key)
|
||||
{
|
||||
request = 0;
|
||||
Node* ret;
|
||||
if(this->root == NULL) //tree is empty
|
||||
return false;
|
||||
Node* p = this->root;
|
||||
Node* q;
|
||||
int i, j;
|
||||
Bstr bstr = *_key;
|
||||
while(!p->isLeaf())
|
||||
{
|
||||
j = p->getNum();
|
||||
//for(i = 0; i < j; ++i)
|
||||
//if(bstr < *(p->getKey(i)))
|
||||
//break;
|
||||
i = p->searchKey_less(bstr);
|
||||
|
||||
q = p->getChild(i);
|
||||
this->prepare(q);
|
||||
if(q->getNum() < Node::MIN_CHILD_NUM) //==MIN_KEY_NUM
|
||||
{
|
||||
if(i > 0)
|
||||
this->prepare(p->getChild(i-1));
|
||||
if(i < j)
|
||||
this->prepare(p->getChild(i+1));
|
||||
ret = q->coalesce(p, i);
|
||||
if(ret != NULL)
|
||||
this->TSM->updateHeap(ret, 0, true);//non-sense node
|
||||
this->TSM->updateHeap(q, q->getRank(), true);
|
||||
if(q->isLeaf())
|
||||
{
|
||||
if(q->getPrev() == NULL)
|
||||
this->leaves_head = q;
|
||||
if(q->getNext() == NULL)
|
||||
this->leaves_tail = q;
|
||||
}
|
||||
if(p->getNum() == 0) //root shrinks
|
||||
{
|
||||
//this->leaves_head = q;
|
||||
this->root = q;
|
||||
this->TSM->updateHeap(p, 0, true); //instead of delete p
|
||||
this->height--;
|
||||
}
|
||||
}
|
||||
else
|
||||
p->setDirty();
|
||||
this->TSM->updateHeap(p, p->getRank(), true);
|
||||
p = q;
|
||||
}
|
||||
bool flag = false;
|
||||
//j = p->getNum(); //LeafNode(maybe root)
|
||||
//for(i = 0; i < j; ++i)
|
||||
// if(bstr == *(p->getKey(i)))
|
||||
// {
|
||||
// request -= p->getKey(i)->getLen();
|
||||
// request -= p->getValue(i)->getLen();
|
||||
// p->subKey(i, true); //to release
|
||||
// p->subValue(i, true); //to release
|
||||
// p->subNum();
|
||||
// if(p->getNum() == 0) //root leaf 0 key
|
||||
// {
|
||||
// this->root = NULL;
|
||||
// this->leaves_head = NULL;
|
||||
// this->leaves_tail = NULL;
|
||||
// this->height = 0;
|
||||
// this->TSM->updateHeap(p, 0, true); //instead of delete p
|
||||
// }
|
||||
// p->setDirty();
|
||||
// flag = true;
|
||||
// break;
|
||||
// }
|
||||
i = p->searchKey_equal(bstr);
|
||||
request -= p->getKey(i)->getLen();
|
||||
request -= p->getValue(i)->getLen();
|
||||
p->subKey(i, true); //to release
|
||||
p->subValue(i, true); //to release
|
||||
p->subNum();
|
||||
if(p->getNum() == 0) //root leaf 0 key
|
||||
{
|
||||
this->root = NULL;
|
||||
this->leaves_head = NULL;
|
||||
this->leaves_tail = NULL;
|
||||
this->height = 0;
|
||||
this->TSM->updateHeap(p, 0, true); //instead of delete p
|
||||
}
|
||||
p->setDirty();
|
||||
flag = true;
|
||||
|
||||
this->TSM->request(request);
|
||||
bstr.clear();
|
||||
return flag; //i == j, not found
|
||||
}
|
||||
|
||||
const Bstr*
|
||||
Tree::getRangeValue()
|
||||
{
|
||||
if(this->stream == NULL)
|
||||
{
|
||||
fprintf(stderr, "Tree::getRangeValue(): no results now!\n");
|
||||
return NULL;
|
||||
}
|
||||
if(this->stream->isEnd())
|
||||
{
|
||||
fprintf(stderr, "Tree::getRangeValue(): read till end now!\n");
|
||||
return NULL;
|
||||
}
|
||||
//NOTICE:this is one record, and donot free the memory!
|
||||
//NOTICE:Bstr[] but only one element, used as Bstr*
|
||||
return this->stream->read();
|
||||
}
|
||||
|
||||
void
|
||||
Tree::resetStream()
|
||||
{
|
||||
if(this->stream == NULL)
|
||||
{
|
||||
fprintf(stderr, "no results now!\n");
|
||||
return;
|
||||
}
|
||||
this->stream->setEnd();
|
||||
}
|
||||
|
||||
bool //special case: not exist, one-edge-case
|
||||
Tree::range_query(const Bstr* _key1, const Bstr* _key2)
|
||||
{ //the range is: *_key1 <= x < *_key2
|
||||
//if(_key1 == NULL && _key2 == NULL)
|
||||
//return false;
|
||||
//ok to search one-edge, requiring only one be NULL
|
||||
//find and write value
|
||||
int store1, store2;
|
||||
Node *p1, *p2;
|
||||
if(_key1 != NULL)
|
||||
{
|
||||
request = 0;
|
||||
p1 = this->find(_key1, &store1, false);
|
||||
if(p1 == NULL || store1 == -1)
|
||||
return false; //no element
|
||||
this->TSM->request(request);
|
||||
}
|
||||
else
|
||||
{
|
||||
p1 = this->leaves_head;
|
||||
store1 = 0;
|
||||
}
|
||||
if(_key2 != NULL)
|
||||
{ //QUERY: another strategy is to getnext and compare every time to tell end
|
||||
request = 0;
|
||||
p2 = this->find(_key2, &store2, false);
|
||||
if(p2 == NULL)
|
||||
return false;
|
||||
else if(store2 == -1)
|
||||
store2 = p2->getNum();
|
||||
else if(store2 == 0)
|
||||
{
|
||||
p2 = p2->getPrev();
|
||||
if(p2 == NULL)
|
||||
return false; //no element
|
||||
store2 = p2->getNum();
|
||||
}
|
||||
this->TSM->request(request);
|
||||
}
|
||||
else
|
||||
{
|
||||
p2 = this->leaves_tail;
|
||||
store2 = p2->getNum();
|
||||
}
|
||||
|
||||
Node* p = p1;
|
||||
unsigned i, l, r;
|
||||
//get the num of answers first, not need to prepare the node
|
||||
unsigned ansNum = 0;
|
||||
while(true)
|
||||
{
|
||||
//request = 0;
|
||||
//this->prepare(p);
|
||||
if(p == p1)
|
||||
l = store1;
|
||||
else
|
||||
l = 0;
|
||||
if(p == p2)
|
||||
r = store2;
|
||||
else
|
||||
r = p->getNum();
|
||||
ansNum += (r - l);
|
||||
//this->TSM->request(request);
|
||||
if(p != p2)
|
||||
p = p->getNext();
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
if(this->stream != NULL)
|
||||
{
|
||||
delete this->stream;
|
||||
this->stream = NULL;
|
||||
}
|
||||
vector<int> keys;
|
||||
vector<bool> desc;
|
||||
this->stream = new Stream(keys, desc, ansNum, 1, false);
|
||||
|
||||
p = p1;
|
||||
while(1)
|
||||
{
|
||||
request = 0;
|
||||
this->prepare(p);
|
||||
if(p == p1)
|
||||
l = store1;
|
||||
else
|
||||
l = 0;
|
||||
if(p == p2)
|
||||
r = store2;
|
||||
else
|
||||
r = p->getNum();
|
||||
for(i = l; i < r; ++i)
|
||||
{
|
||||
//NOTICE:Bstr* in an array, used as Bstr[]
|
||||
this->stream->write(p->getValue(i));
|
||||
}
|
||||
this->TSM->request(request);
|
||||
if(p != p2)
|
||||
p = p->getNext();
|
||||
else
|
||||
break;
|
||||
}
|
||||
this->stream->setEnd();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Tree::save() //save the whole tree to disk
|
||||
{
|
||||
#ifdef DEBUG_KVSTORE
|
||||
printf("now to save tree!\n");
|
||||
#endif
|
||||
if(TSM->writeTree(this->root))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
Tree::release(Node* _np) const
|
||||
{
|
||||
if(_np == NULL) return;
|
||||
if(_np->isLeaf())
|
||||
{
|
||||
delete _np;
|
||||
return;
|
||||
}
|
||||
int cnt = _np->getNum();
|
||||
for(; cnt >= 0; --cnt)
|
||||
release(_np->getChild(cnt));
|
||||
delete _np;
|
||||
}
|
||||
|
||||
Tree::~Tree()
|
||||
{
|
||||
delete this->stream; //maybe NULL
|
||||
delete TSM;
|
||||
#ifdef DEBUG_KVSTORE
|
||||
printf("already empty the buffer, now to delete all nodes in tree!\n");
|
||||
#endif
|
||||
//recursively delete each Node
|
||||
release(root);
|
||||
}
|
||||
|
||||
void
|
||||
Tree::print(string s)
|
||||
{
|
||||
#ifdef DEBUG_KVSTORE
|
||||
fputs(Util::showtime().c_str(), Util::debug_kvstore);
|
||||
fputs("Class Tree\n", Util::debug_kvstore);
|
||||
fputs("Message: ", Util::debug_kvstore);
|
||||
fputs(s.c_str(), Util::debug_kvstore);
|
||||
fputs("\n", Util::debug_kvstore);
|
||||
fprintf(Util::debug_kvstore, "Height: %d\n", this->height);
|
||||
if(s == "tree" || s == "TREE")
|
||||
{
|
||||
if(this->root == NULL)
|
||||
{
|
||||
fputs("Null Tree\n", Util::debug_kvstore);
|
||||
return;
|
||||
}
|
||||
Node** ns = new Node*[this->height];
|
||||
int* ni = new int[this->height];
|
||||
Node* np;
|
||||
int i, pos = 0;
|
||||
ns[pos] = this->root;
|
||||
ni[pos] = this->root->getNum();
|
||||
pos++;
|
||||
while(pos > 0)
|
||||
{
|
||||
np = ns[pos-1];
|
||||
i = ni[pos-1];
|
||||
this->prepare(np);
|
||||
if(np->isLeaf() || i < 0) //LeafNode or ready IntlNode
|
||||
{ //child-num ranges: 0~num
|
||||
if(s == "tree")
|
||||
np->print("node");
|
||||
else
|
||||
np->print("NODE"); //print full node-information
|
||||
pos--;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
ns[pos] = np->getChild(i);
|
||||
ni[pos-1]--;
|
||||
ni[pos] = ns[pos]->getNum();
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
delete[] ns;
|
||||
delete[] ni;
|
||||
}
|
||||
else if(s == "LEAVES" || s == "leaves")
|
||||
{
|
||||
Node* np;
|
||||
for(np = this->leaves_head; np != NULL; np = np->getNext())
|
||||
{
|
||||
this->prepare(np);
|
||||
if(s == "leaves")
|
||||
np->print("node");
|
||||
else
|
||||
np->print("NODE");
|
||||
}
|
||||
}
|
||||
else if(s == "check tree")
|
||||
{
|
||||
//check the tree, if satisfy B+ definition
|
||||
//TODO
|
||||
}
|
||||
else;
|
||||
#endif
|
||||
}
|
||||
|
|
@ -1,79 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: Tree.h
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-04-26 16:44
|
||||
# Description: struct and interface of the B+ tree
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _KVSTORE_TREE_TREE_H
|
||||
#define _KVSTORE_TREE_TREE_H
|
||||
|
||||
#include "../../Util/Util.h"
|
||||
#include "../../Util/Stream.h"
|
||||
#include "../node/Node.h"
|
||||
#include "../node/IntlNode.h"
|
||||
#include "../node/LeafNode.h"
|
||||
#include "../storage/Storage.h"
|
||||
|
||||
class Tree
|
||||
{
|
||||
private:
|
||||
unsigned int height; //0 indicates an empty tree
|
||||
Node* root;
|
||||
Node* leaves_head; //the head of LeafNode-list
|
||||
Node* leaves_tail; //the tail of LeafNode-list
|
||||
std::string mode; //BETTER(to use enum)
|
||||
Storage* TSM; //Tree-Storage-Manage
|
||||
//BETTER:multiple stream maybe needed:)
|
||||
Stream* stream;
|
||||
|
||||
//always alloc one more byte than length, then user can add a '\0'
|
||||
//to get a real string, instead of new and copy
|
||||
//other operations will be harmful to search, so store value in
|
||||
//transfer temporally, while length adjusted.
|
||||
//TODO: in multi-user case, multiple-search will cause problem,
|
||||
//so lock is a must. Add lock to transfer is better than to add
|
||||
//lock to every key/value. However, modify requires a lock for a
|
||||
//key/value, and multiple search for different keys are ok!!!
|
||||
Bstr transfer[3]; //0:transfer value searched; 1:copy key-data from const char*; 2:copy val-data from const char*
|
||||
unsigned transfer_size[3];
|
||||
std::string storepath;
|
||||
std::string filename; //ok for user to change
|
||||
/* some private functions */
|
||||
std::string getFilePath(); //in UNIX system
|
||||
void CopyToTransfer(const char* _str, unsigned _len, unsigned _index);
|
||||
void release(Node* _np) const;
|
||||
void prepare(Node* _np) const;
|
||||
|
||||
public:
|
||||
Tree(); //always need to initial transfer
|
||||
Tree(const std::string& _storepath, const std::string& _filename, const char* _mode);
|
||||
unsigned int getHeight() const;
|
||||
void setHeight(unsigned _h);
|
||||
Node* getRoot() const;
|
||||
//void setRoot(Node* _root);
|
||||
//insert, search, remove, set
|
||||
bool search(const char* _str1, unsigned _len1, char*& _str2, int& _len2);
|
||||
bool search(const Bstr* _key1, const Bstr*& _value);
|
||||
bool insert(const Bstr* _key, const Bstr* _value);
|
||||
bool insert(const char* _str1, unsigned _len1, const char* _str2, unsigned _len2);
|
||||
bool modify(const Bstr* _key, const Bstr* _value);
|
||||
bool modify(const char* _str1, unsigned _len1, const char* _str2, unsigned _len2);
|
||||
Node* find(const Bstr* _key, int* store, bool ifmodify) const;
|
||||
//Node* find(unsigned _len, const char* _str, int* store) const;
|
||||
bool remove(const Bstr* _key);
|
||||
bool remove(const char* _str, unsigned _len);
|
||||
const Bstr* getRangeValue();
|
||||
void resetStream();
|
||||
bool range_query(const Bstr* _key1, const Bstr* _key2);
|
||||
bool save();
|
||||
~Tree();
|
||||
void print(std::string s); //DEBUG(print the tree)
|
||||
};
|
||||
//NOTICE: need to save tree manually before delete, otherwise will cause problem.
|
||||
//(problem range between two extremes: not-modified, totally-modified)
|
||||
//After saved, it's ok to continue operations on tree!
|
||||
|
||||
#endif
|
||||
|
675
LICENSE
675
LICENSE
|
@ -1,675 +0,0 @@
|
|||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 3, 29 June 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The GNU General Public License is a free, copyleft license for
|
||||
software and other kinds of works.
|
||||
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
the GNU General Public License is intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users. We, the Free Software Foundation, use the
|
||||
GNU General Public License for most of our software; it applies also to
|
||||
any other work released this way by its authors. You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to prevent others from denying you
|
||||
these rights or asking you to surrender the rights. Therefore, you have
|
||||
certain responsibilities if you distribute copies of the software, or if
|
||||
you modify it: responsibilities to respect the freedom of others.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must pass on to the recipients the same
|
||||
freedoms that you received. You must make sure that they, too, receive
|
||||
or can get the source code. And you must show them these terms so they
|
||||
know their rights.
|
||||
|
||||
Developers that use the GNU GPL protect your rights with two steps:
|
||||
(1) assert copyright on the software, and (2) offer you this License
|
||||
giving you legal permission to copy, distribute and/or modify it.
|
||||
|
||||
For the developers' and authors' protection, the GPL clearly explains
|
||||
that there is no warranty for this free software. For both users' and
|
||||
authors' sake, the GPL requires that modified versions be marked as
|
||||
changed, so that their problems will not be attributed erroneously to
|
||||
authors of previous versions.
|
||||
|
||||
Some devices are designed to deny users access to install or run
|
||||
modified versions of the software inside them, although the manufacturer
|
||||
can do so. This is fundamentally incompatible with the aim of
|
||||
protecting users' freedom to change the software. The systematic
|
||||
pattern of such abuse occurs in the area of products for individuals to
|
||||
use, which is precisely where it is most unacceptable. Therefore, we
|
||||
have designed this version of the GPL to prohibit the practice for those
|
||||
products. If such problems arise substantially in other domains, we
|
||||
stand ready to extend this provision to those domains in future versions
|
||||
of the GPL, as needed to protect the freedom of users.
|
||||
|
||||
Finally, every program is threatened constantly by software patents.
|
||||
States should not allow patents to restrict development and use of
|
||||
software on general-purpose computers, but in those that do, we wish to
|
||||
avoid the special danger that patents applied to a free program could
|
||||
make it effectively proprietary. To prevent this, the GPL assures that
|
||||
patents cannot be used to render the program non-free.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
TERMS AND CONDITIONS
|
||||
|
||||
0. Definitions.
|
||||
|
||||
"This License" refers to version 3 of the GNU General Public License.
|
||||
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
|
||||
"The Program" refers to any copyrightable work licensed under this
|
||||
License. Each licensee is addressed as "you". "Licensees" and
|
||||
"recipients" may be individuals or organizations.
|
||||
|
||||
To "modify" a work means to copy from or adapt all or part of the work
|
||||
in a fashion requiring copyright permission, other than the making of an
|
||||
exact copy. The resulting work is called a "modified version" of the
|
||||
earlier work or a work "based on" the earlier work.
|
||||
|
||||
A "covered work" means either the unmodified Program or a work based
|
||||
on the Program.
|
||||
|
||||
To "propagate" a work means to do anything with it that, without
|
||||
permission, would make you directly or secondarily liable for
|
||||
infringement under applicable copyright law, except executing it on a
|
||||
computer or modifying a private copy. Propagation includes copying,
|
||||
distribution (with or without modification), making available to the
|
||||
public, and in some countries other activities as well.
|
||||
|
||||
To "convey" a work means any kind of propagation that enables other
|
||||
parties to make or receive copies. Mere interaction with a user through
|
||||
a computer network, with no transfer of a copy, is not conveying.
|
||||
|
||||
An interactive user interface displays "Appropriate Legal Notices"
|
||||
to the extent that it includes a convenient and prominently visible
|
||||
feature that (1) displays an appropriate copyright notice, and (2)
|
||||
tells the user that there is no warranty for the work (except to the
|
||||
extent that warranties are provided), that licensees may convey the
|
||||
work under this License, and how to view a copy of this License. If
|
||||
the interface presents a list of user commands or options, such as a
|
||||
menu, a prominent item in the list meets this criterion.
|
||||
|
||||
1. Source Code.
|
||||
|
||||
The "source code" for a work means the preferred form of the work
|
||||
for making modifications to it. "Object code" means any non-source
|
||||
form of a work.
|
||||
|
||||
A "Standard Interface" means an interface that either is an official
|
||||
standard defined by a recognized standards body, or, in the case of
|
||||
interfaces specified for a particular programming language, one that
|
||||
is widely used among developers working in that language.
|
||||
|
||||
The "System Libraries" of an executable work include anything, other
|
||||
than the work as a whole, that (a) is included in the normal form of
|
||||
packaging a Major Component, but which is not part of that Major
|
||||
Component, and (b) serves only to enable use of the work with that
|
||||
Major Component, or to implement a Standard Interface for which an
|
||||
implementation is available to the public in source code form. A
|
||||
"Major Component", in this context, means a major essential component
|
||||
(kernel, window system, and so on) of the specific operating system
|
||||
(if any) on which the executable work runs, or a compiler used to
|
||||
produce the work, or an object code interpreter used to run it.
|
||||
|
||||
The "Corresponding Source" for a work in object code form means all
|
||||
the source code needed to generate, install, and (for an executable
|
||||
work) run the object code and to modify the work, including scripts to
|
||||
control those activities. However, it does not include the work's
|
||||
System Libraries, or general-purpose tools or generally available free
|
||||
programs which are used unmodified in performing those activities but
|
||||
which are not part of the work. For example, Corresponding Source
|
||||
includes interface definition files associated with source files for
|
||||
the work, and the source code for shared libraries and dynamically
|
||||
linked subprograms that the work is specifically designed to require,
|
||||
such as by intimate data communication or control flow between those
|
||||
subprograms and other parts of the work.
|
||||
|
||||
The Corresponding Source need not include anything that users
|
||||
can regenerate automatically from other parts of the Corresponding
|
||||
Source.
|
||||
|
||||
The Corresponding Source for a work in source code form is that
|
||||
same work.
|
||||
|
||||
2. Basic Permissions.
|
||||
|
||||
All rights granted under this License are granted for the term of
|
||||
copyright on the Program, and are irrevocable provided the stated
|
||||
conditions are met. This License explicitly affirms your unlimited
|
||||
permission to run the unmodified Program. The output from running a
|
||||
covered work is covered by this License only if the output, given its
|
||||
content, constitutes a covered work. This License acknowledges your
|
||||
rights of fair use or other equivalent, as provided by copyright law.
|
||||
|
||||
You may make, run and propagate covered works that you do not
|
||||
convey, without conditions so long as your license otherwise remains
|
||||
in force. You may convey covered works to others for the sole purpose
|
||||
of having them make modifications exclusively for you, or provide you
|
||||
with facilities for running those works, provided that you comply with
|
||||
the terms of this License in conveying all material for which you do
|
||||
not control copyright. Those thus making or running the covered works
|
||||
for you must do so exclusively on your behalf, under your direction
|
||||
and control, on terms that prohibit them from making any copies of
|
||||
your copyrighted material outside their relationship with you.
|
||||
|
||||
Conveying under any other circumstances is permitted solely under
|
||||
the conditions stated below. Sublicensing is not allowed; section 10
|
||||
makes it unnecessary.
|
||||
|
||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||
|
||||
No covered work shall be deemed part of an effective technological
|
||||
measure under any applicable law fulfilling obligations under article
|
||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||
similar laws prohibiting or restricting circumvention of such
|
||||
measures.
|
||||
|
||||
When you convey a covered work, you waive any legal power to forbid
|
||||
circumvention of technological measures to the extent such circumvention
|
||||
is effected by exercising rights under this License with respect to
|
||||
the covered work, and you disclaim any intention to limit operation or
|
||||
modification of the work as a means of enforcing, against the work's
|
||||
users, your or third parties' legal rights to forbid circumvention of
|
||||
technological measures.
|
||||
|
||||
4. Conveying Verbatim Copies.
|
||||
|
||||
You may convey verbatim copies of the Program's source code as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice;
|
||||
keep intact all notices stating that this License and any
|
||||
non-permissive terms added in accord with section 7 apply to the code;
|
||||
keep intact all notices of the absence of any warranty; and give all
|
||||
recipients a copy of this License along with the Program.
|
||||
|
||||
You may charge any price or no price for each copy that you convey,
|
||||
and you may offer support or warranty protection for a fee.
|
||||
|
||||
5. Conveying Modified Source Versions.
|
||||
|
||||
You may convey a work based on the Program, or the modifications to
|
||||
produce it from the Program, in the form of source code under the
|
||||
terms of section 4, provided that you also meet all of these conditions:
|
||||
|
||||
a) The work must carry prominent notices stating that you modified
|
||||
it, and giving a relevant date.
|
||||
|
||||
b) The work must carry prominent notices stating that it is
|
||||
released under this License and any conditions added under section
|
||||
7. This requirement modifies the requirement in section 4 to
|
||||
"keep intact all notices".
|
||||
|
||||
c) You must license the entire work, as a whole, under this
|
||||
License to anyone who comes into possession of a copy. This
|
||||
License will therefore apply, along with any applicable section 7
|
||||
additional terms, to the whole of the work, and all its parts,
|
||||
regardless of how they are packaged. This License gives no
|
||||
permission to license the work in any other way, but it does not
|
||||
invalidate such permission if you have separately received it.
|
||||
|
||||
d) If the work has interactive user interfaces, each must display
|
||||
Appropriate Legal Notices; however, if the Program has interactive
|
||||
interfaces that do not display Appropriate Legal Notices, your
|
||||
work need not make them do so.
|
||||
|
||||
A compilation of a covered work with other separate and independent
|
||||
works, which are not by their nature extensions of the covered work,
|
||||
and which are not combined with it such as to form a larger program,
|
||||
in or on a volume of a storage or distribution medium, is called an
|
||||
"aggregate" if the compilation and its resulting copyright are not
|
||||
used to limit the access or legal rights of the compilation's users
|
||||
beyond what the individual works permit. Inclusion of a covered work
|
||||
in an aggregate does not cause this License to apply to the other
|
||||
parts of the aggregate.
|
||||
|
||||
6. Conveying Non-Source Forms.
|
||||
|
||||
You may convey a covered work in object code form under the terms
|
||||
of sections 4 and 5, provided that you also convey the
|
||||
machine-readable Corresponding Source under the terms of this License,
|
||||
in one of these ways:
|
||||
|
||||
a) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by the
|
||||
Corresponding Source fixed on a durable physical medium
|
||||
customarily used for software interchange.
|
||||
|
||||
b) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by a
|
||||
written offer, valid for at least three years and valid for as
|
||||
long as you offer spare parts or customer support for that product
|
||||
model, to give anyone who possesses the object code either (1) a
|
||||
copy of the Corresponding Source for all the software in the
|
||||
product that is covered by this License, on a durable physical
|
||||
medium customarily used for software interchange, for a price no
|
||||
more than your reasonable cost of physically performing this
|
||||
conveying of source, or (2) access to copy the
|
||||
Corresponding Source from a network server at no charge.
|
||||
|
||||
c) Convey individual copies of the object code with a copy of the
|
||||
written offer to provide the Corresponding Source. This
|
||||
alternative is allowed only occasionally and noncommercially, and
|
||||
only if you received the object code with such an offer, in accord
|
||||
with subsection 6b.
|
||||
|
||||
d) Convey the object code by offering access from a designated
|
||||
place (gratis or for a charge), and offer equivalent access to the
|
||||
Corresponding Source in the same way through the same place at no
|
||||
further charge. You need not require recipients to copy the
|
||||
Corresponding Source along with the object code. If the place to
|
||||
copy the object code is a network server, the Corresponding Source
|
||||
may be on a different server (operated by you or a third party)
|
||||
that supports equivalent copying facilities, provided you maintain
|
||||
clear directions next to the object code saying where to find the
|
||||
Corresponding Source. Regardless of what server hosts the
|
||||
Corresponding Source, you remain obligated to ensure that it is
|
||||
available for as long as needed to satisfy these requirements.
|
||||
|
||||
e) Convey the object code using peer-to-peer transmission, provided
|
||||
you inform other peers where the object code and Corresponding
|
||||
Source of the work are being offered to the general public at no
|
||||
charge under subsection 6d.
|
||||
|
||||
A separable portion of the object code, whose source code is excluded
|
||||
from the Corresponding Source as a System Library, need not be
|
||||
included in conveying the object code work.
|
||||
|
||||
A "User Product" is either (1) a "consumer product", which means any
|
||||
tangible personal property which is normally used for personal, family,
|
||||
or household purposes, or (2) anything designed or sold for incorporation
|
||||
into a dwelling. In determining whether a product is a consumer product,
|
||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||
product received by a particular user, "normally used" refers to a
|
||||
typical or common use of that class of product, regardless of the status
|
||||
of the particular user or of the way in which the particular user
|
||||
actually uses, or expects or is expected to use, the product. A product
|
||||
is a consumer product regardless of whether the product has substantial
|
||||
commercial, industrial or non-consumer uses, unless such uses represent
|
||||
the only significant mode of use of the product.
|
||||
|
||||
"Installation Information" for a User Product means any methods,
|
||||
procedures, authorization keys, or other information required to install
|
||||
and execute modified versions of a covered work in that User Product from
|
||||
a modified version of its Corresponding Source. The information must
|
||||
suffice to ensure that the continued functioning of the modified object
|
||||
code is in no case prevented or interfered with solely because
|
||||
modification has been made.
|
||||
|
||||
If you convey an object code work under this section in, or with, or
|
||||
specifically for use in, a User Product, and the conveying occurs as
|
||||
part of a transaction in which the right of possession and use of the
|
||||
User Product is transferred to the recipient in perpetuity or for a
|
||||
fixed term (regardless of how the transaction is characterized), the
|
||||
Corresponding Source conveyed under this section must be accompanied
|
||||
by the Installation Information. But this requirement does not apply
|
||||
if neither you nor any third party retains the ability to install
|
||||
modified object code on the User Product (for example, the work has
|
||||
been installed in ROM).
|
||||
|
||||
The requirement to provide Installation Information does not include a
|
||||
requirement to continue to provide support service, warranty, or updates
|
||||
for a work that has been modified or installed by the recipient, or for
|
||||
the User Product in which it has been modified or installed. Access to a
|
||||
network may be denied when the modification itself materially and
|
||||
adversely affects the operation of the network or violates the rules and
|
||||
protocols for communication across the network.
|
||||
|
||||
Corresponding Source conveyed, and Installation Information provided,
|
||||
in accord with this section must be in a format that is publicly
|
||||
documented (and with an implementation available to the public in
|
||||
source code form), and must require no special password or key for
|
||||
unpacking, reading or copying.
|
||||
|
||||
7. Additional Terms.
|
||||
|
||||
"Additional permissions" are terms that supplement the terms of this
|
||||
License by making exceptions from one or more of its conditions.
|
||||
Additional permissions that are applicable to the entire Program shall
|
||||
be treated as though they were included in this License, to the extent
|
||||
that they are valid under applicable law. If additional permissions
|
||||
apply only to part of the Program, that part may be used separately
|
||||
under those permissions, but the entire Program remains governed by
|
||||
this License without regard to the additional permissions.
|
||||
|
||||
When you convey a copy of a covered work, you may at your option
|
||||
remove any additional permissions from that copy, or from any part of
|
||||
it. (Additional permissions may be written to require their own
|
||||
removal in certain cases when you modify the work.) You may place
|
||||
additional permissions on material, added by you to a covered work,
|
||||
for which you have or can give appropriate copyright permission.
|
||||
|
||||
Notwithstanding any other provision of this License, for material you
|
||||
add to a covered work, you may (if authorized by the copyright holders of
|
||||
that material) supplement the terms of this License with terms:
|
||||
|
||||
a) Disclaiming warranty or limiting liability differently from the
|
||||
terms of sections 15 and 16 of this License; or
|
||||
|
||||
b) Requiring preservation of specified reasonable legal notices or
|
||||
author attributions in that material or in the Appropriate Legal
|
||||
Notices displayed by works containing it; or
|
||||
|
||||
c) Prohibiting misrepresentation of the origin of that material, or
|
||||
requiring that modified versions of such material be marked in
|
||||
reasonable ways as different from the original version; or
|
||||
|
||||
d) Limiting the use for publicity purposes of names of licensors or
|
||||
authors of the material; or
|
||||
|
||||
e) Declining to grant rights under trademark law for use of some
|
||||
trade names, trademarks, or service marks; or
|
||||
|
||||
f) Requiring indemnification of licensors and authors of that
|
||||
material by anyone who conveys the material (or modified versions of
|
||||
it) with contractual assumptions of liability to the recipient, for
|
||||
any liability that these contractual assumptions directly impose on
|
||||
those licensors and authors.
|
||||
|
||||
All other non-permissive additional terms are considered "further
|
||||
restrictions" within the meaning of section 10. If the Program as you
|
||||
received it, or any part of it, contains a notice stating that it is
|
||||
governed by this License along with a term that is a further
|
||||
restriction, you may remove that term. If a license document contains
|
||||
a further restriction but permits relicensing or conveying under this
|
||||
License, you may add to a covered work material governed by the terms
|
||||
of that license document, provided that the further restriction does
|
||||
not survive such relicensing or conveying.
|
||||
|
||||
If you add terms to a covered work in accord with this section, you
|
||||
must place, in the relevant source files, a statement of the
|
||||
additional terms that apply to those files, or a notice indicating
|
||||
where to find the applicable terms.
|
||||
|
||||
Additional terms, permissive or non-permissive, may be stated in the
|
||||
form of a separately written license, or stated as exceptions;
|
||||
the above requirements apply either way.
|
||||
|
||||
8. Termination.
|
||||
|
||||
You may not propagate or modify a covered work except as expressly
|
||||
provided under this License. Any attempt otherwise to propagate or
|
||||
modify it is void, and will automatically terminate your rights under
|
||||
this License (including any patent licenses granted under the third
|
||||
paragraph of section 11).
|
||||
|
||||
However, if you cease all violation of this License, then your
|
||||
license from a particular copyright holder is reinstated (a)
|
||||
provisionally, unless and until the copyright holder explicitly and
|
||||
finally terminates your license, and (b) permanently, if the copyright
|
||||
holder fails to notify you of the violation by some reasonable means
|
||||
prior to 60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, you do not qualify to receive new licenses for the same
|
||||
material under section 10.
|
||||
|
||||
9. Acceptance Not Required for Having Copies.
|
||||
|
||||
You are not required to accept this License in order to receive or
|
||||
run a copy of the Program. Ancillary propagation of a covered work
|
||||
occurring solely as a consequence of using peer-to-peer transmission
|
||||
to receive a copy likewise does not require acceptance. However,
|
||||
nothing other than this License grants you permission to propagate or
|
||||
modify any covered work. These actions infringe copyright if you do
|
||||
not accept this License. Therefore, by modifying or propagating a
|
||||
covered work, you indicate your acceptance of this License to do so.
|
||||
|
||||
10. Automatic Licensing of Downstream Recipients.
|
||||
|
||||
Each time you convey a covered work, the recipient automatically
|
||||
receives a license from the original licensors, to run, modify and
|
||||
propagate that work, subject to this License. You are not responsible
|
||||
for enforcing compliance by third parties with this License.
|
||||
|
||||
An "entity transaction" is a transaction transferring control of an
|
||||
organization, or substantially all assets of one, or subdividing an
|
||||
organization, or merging organizations. If propagation of a covered
|
||||
work results from an entity transaction, each party to that
|
||||
transaction who receives a copy of the work also receives whatever
|
||||
licenses to the work the party's predecessor in interest had or could
|
||||
give under the previous paragraph, plus a right to possession of the
|
||||
Corresponding Source of the work from the predecessor in interest, if
|
||||
the predecessor has it or can get it with reasonable efforts.
|
||||
|
||||
You may not impose any further restrictions on the exercise of the
|
||||
rights granted or affirmed under this License. For example, you may
|
||||
not impose a license fee, royalty, or other charge for exercise of
|
||||
rights granted under this License, and you may not initiate litigation
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
any patent claim is infringed by making, using, selling, offering for
|
||||
sale, or importing the Program or any portion of it.
|
||||
|
||||
11. Patents.
|
||||
|
||||
A "contributor" is a copyright holder who authorizes use under this
|
||||
License of the Program or a work on which the Program is based. The
|
||||
work thus licensed is called the contributor's "contributor version".
|
||||
|
||||
A contributor's "essential patent claims" are all patent claims
|
||||
owned or controlled by the contributor, whether already acquired or
|
||||
hereafter acquired, that would be infringed by some manner, permitted
|
||||
by this License, of making, using, or selling its contributor version,
|
||||
but do not include claims that would be infringed only as a
|
||||
consequence of further modification of the contributor version. For
|
||||
purposes of this definition, "control" includes the right to grant
|
||||
patent sublicenses in a manner consistent with the requirements of
|
||||
this License.
|
||||
|
||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||
patent license under the contributor's essential patent claims, to
|
||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||
propagate the contents of its contributor version.
|
||||
|
||||
In the following three paragraphs, a "patent license" is any express
|
||||
agreement or commitment, however denominated, not to enforce a patent
|
||||
(such as an express permission to practice a patent or covenant not to
|
||||
sue for patent infringement). To "grant" such a patent license to a
|
||||
party means to make such an agreement or commitment not to enforce a
|
||||
patent against the party.
|
||||
|
||||
If you convey a covered work, knowingly relying on a patent license,
|
||||
and the Corresponding Source of the work is not available for anyone
|
||||
to copy, free of charge and under the terms of this License, through a
|
||||
publicly available network server or other readily accessible means,
|
||||
then you must either (1) cause the Corresponding Source to be so
|
||||
available, or (2) arrange to deprive yourself of the benefit of the
|
||||
patent license for this particular work, or (3) arrange, in a manner
|
||||
consistent with the requirements of this License, to extend the patent
|
||||
license to downstream recipients. "Knowingly relying" means you have
|
||||
actual knowledge that, but for the patent license, your conveying the
|
||||
covered work in a country, or your recipient's use of the covered work
|
||||
in a country, would infringe one or more identifiable patents in that
|
||||
country that you have reason to believe are valid.
|
||||
|
||||
If, pursuant to or in connection with a single transaction or
|
||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||
covered work, and grant a patent license to some of the parties
|
||||
receiving the covered work authorizing them to use, propagate, modify
|
||||
or convey a specific copy of the covered work, then the patent license
|
||||
you grant is automatically extended to all recipients of the covered
|
||||
work and works based on it.
|
||||
|
||||
A patent license is "discriminatory" if it does not include within
|
||||
the scope of its coverage, prohibits the exercise of, or is
|
||||
conditioned on the non-exercise of one or more of the rights that are
|
||||
specifically granted under this License. You may not convey a covered
|
||||
work if you are a party to an arrangement with a third party that is
|
||||
in the business of distributing software, under which you make payment
|
||||
to the third party based on the extent of your activity of conveying
|
||||
the work, and under which the third party grants, to any of the
|
||||
parties who would receive the covered work from you, a discriminatory
|
||||
patent license (a) in connection with copies of the covered work
|
||||
conveyed by you (or copies made from those copies), or (b) primarily
|
||||
for and in connection with specific products or compilations that
|
||||
contain the covered work, unless you entered into that arrangement,
|
||||
or that patent license was granted, prior to 28 March 2007.
|
||||
|
||||
Nothing in this License shall be construed as excluding or limiting
|
||||
any implied license or other defenses to infringement that may
|
||||
otherwise be available to you under applicable patent law.
|
||||
|
||||
12. No Surrender of Others' Freedom.
|
||||
|
||||
If conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot convey a
|
||||
covered work so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you may
|
||||
not convey it at all. For example, if you agree to terms that obligate you
|
||||
to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Use with the GNU Affero General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU Affero General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the special requirements of the GNU Affero General Public License,
|
||||
section 13, concerning interaction through a network will apply to the
|
||||
combination as such.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
Later license versions may give you additional or different
|
||||
permissions. However, no additional obligations are imposed on any
|
||||
author or copyright holder as a result of your choosing to follow a
|
||||
later version.
|
||||
|
||||
15. Disclaimer of Warranty.
|
||||
|
||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. Limitation of Liability.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.
|
||||
|
||||
17. Interpretation of Sections 15 and 16.
|
||||
|
||||
If the disclaimer of warranty and limitation of liability provided
|
||||
above cannot be given local legal effect according to their terms,
|
||||
reviewing courts shall apply local law that most closely approximates
|
||||
an absolute waiver of all civil liability in connection with the
|
||||
Program, unless a warranty or assumption of liability accompanies a
|
||||
copy of the Program in return for a fee.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
state the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
{one line to give the program's name and a brief idea of what it does.}
|
||||
Copyright (C) {year} {name of author}
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program does terminal interaction, make it output a short
|
||||
notice like this when it starts in an interactive mode:
|
||||
|
||||
{project} Copyright (C) {year} {fullname}
|
||||
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, your program's commands
|
||||
might be different; for a GUI interface, you would use an "about box".
|
||||
|
||||
You should also get your employer (if you work as a programmer) or school,
|
||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||
For more information on this, and how to apply and follow the GNU GPL, see
|
||||
<http://www.gnu.org/licenses/>.
|
||||
|
||||
The GNU General Public License does not permit incorporating your program
|
||||
into proprietary programs. If your program is a subroutine library, you
|
||||
may consider it more useful to permit linking proprietary applications with
|
||||
the library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License. But first, please read
|
||||
<http://www.gnu.org/philosophy/why-not-lgpl.html>.
|
||||
|
1100
Main/gconsole.cpp
1100
Main/gconsole.cpp
File diff suppressed because it is too large
Load Diff
|
@ -1,53 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: gload.cpp
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-24 19:27
|
||||
# Description: firstly written by liyouhuan, modified by zengli
|
||||
TODO: add -h/--help for help message
|
||||
=============================================================================*/
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Database/Database.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
//[0]./gload [1]data_folder_path [2]rdf_file_path
|
||||
int
|
||||
main(int argc, char * argv[])
|
||||
{
|
||||
//chdir(dirname(argv[0]));
|
||||
Util util;
|
||||
//system("clock");
|
||||
cout << "gload..." << endl;
|
||||
{
|
||||
cout << "argc: " << argc << "\t";
|
||||
cout << "DB_store:" << argv[1] << "\t";
|
||||
cout << "RDF_data: " << argv[2] << "\t";
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
string _db_path = string(argv[1]);
|
||||
//if(_db_path[0] != '/' && _db_path[0] != '~') //using relative path
|
||||
//{
|
||||
//_db_path = string("../") + _db_path;
|
||||
//}
|
||||
string _rdf = string(argv[2]);
|
||||
//if(_rdf[0] != '/' && _rdf[0] != '~') //using relative path
|
||||
//{
|
||||
//_rdf = string("../") + _rdf;
|
||||
//}
|
||||
Database _db(_db_path);
|
||||
bool flag = _db.build(_rdf);
|
||||
if (flag)
|
||||
{
|
||||
cout << "import RDF file to database done." << endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "import RDF file to database failed." << endl;
|
||||
}
|
||||
//system("clock");
|
||||
return 0;
|
||||
}
|
||||
|
256
Main/gquery.cpp
256
Main/gquery.cpp
|
@ -1,256 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: gquery.cpp
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-20 12:23
|
||||
# Description: query a database, there are several ways to use this program:
|
||||
1. ./gquery print the help message
|
||||
2. ./gquery --help simplified as -h, equal to 1
|
||||
3. ./gquery db_folder query_path load query from given path fro given database
|
||||
4. ./gquery db_folder load the given database and open console
|
||||
=============================================================================*/
|
||||
|
||||
#include "../Database/Database.h"
|
||||
#include "../Util/Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
//WARN:cannot support soft links!
|
||||
|
||||
void
|
||||
help()
|
||||
{
|
||||
printf("\
|
||||
/*=============================================================================\n\
|
||||
# Filename: gquery.cpp\n\
|
||||
# Author: Bookug Lobert\n\
|
||||
# Mail: 1181955272@qq.com\n\
|
||||
# Last Modified: 2015-10-20 12:23\n\
|
||||
# Description: query a database, there are several ways to use this program:\n\
|
||||
1. ./gquery print the help message\n\
|
||||
2. ./gquery --help simplified as -h, equal to 1\n\
|
||||
3. ./gquery db_folder query_path load query from given path fro given database\n\
|
||||
4. ./gquery db_folder load the given database and open console\n\
|
||||
=============================================================================*/\n");
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char * argv[])
|
||||
{
|
||||
//chdir(dirname(argv[0]));
|
||||
Util util;
|
||||
if(argc == 1 || strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0)
|
||||
{
|
||||
help();
|
||||
return 0;
|
||||
}
|
||||
cout << "gquery..." << endl;
|
||||
if(argc < 2)
|
||||
{
|
||||
cerr << "error: lack of DB_store to be queried" << endl;
|
||||
return 0;
|
||||
}
|
||||
{
|
||||
cout << "argc: " << argc << "\t";
|
||||
cout << "DB_store:" << argv[1] << "\t";
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
string db_folder = string(argv[1]);
|
||||
//if(db_folder[0] != '/' && db_folder[0] != '~') //using relative path
|
||||
//{
|
||||
//db_folder = string("../") + db_folder;
|
||||
//}
|
||||
Database _db(db_folder);
|
||||
_db.load();
|
||||
cout << "finish loading" << endl;
|
||||
|
||||
// read query from file.
|
||||
if (argc >= 3)
|
||||
{
|
||||
// ifstream fin(argv[2]);
|
||||
// if(!fin)
|
||||
// {
|
||||
// cout << "can not open: " << buf << endl;
|
||||
// return 0;
|
||||
// }
|
||||
//
|
||||
// memset(buf, 0, sizeof(buf));
|
||||
// stringstream _ss;
|
||||
// while(!fin.eof()){
|
||||
// fin.getline(buf, 9999);
|
||||
// _ss << buf << "\n";
|
||||
// }
|
||||
// fin.close();
|
||||
//
|
||||
// string query = _ss.str();
|
||||
|
||||
string query = string(argv[2]);
|
||||
//if(query[0] != '/' && query[0] != '~') //using relative path
|
||||
//{
|
||||
//query = string("../") + query;
|
||||
//}
|
||||
query = Util::getQueryFromFile(query.c_str());
|
||||
if (query.empty())
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
printf("query is:\n%s\n\n", query.c_str());
|
||||
ResultSet _rs;
|
||||
_db.query(query, _rs, stdout);
|
||||
if (argc >= 4)
|
||||
{
|
||||
Util::save_to_file(argv[3], _rs.to_str());
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// read query file path from terminal.
|
||||
// BETTER: sighandler ctrl+C/D/Z
|
||||
string query;
|
||||
//char resolved_path[PATH_MAX+1];
|
||||
#ifdef READLINE_ON
|
||||
char *buf, prompt[] = "gsql>";
|
||||
//const int commands_num = 3;
|
||||
//char commands[][20] = {"help", "quit", "sparql"};
|
||||
printf("Type `help` for information of all commands\n");
|
||||
printf("Type `help command_t` for detail of command_t\n");
|
||||
rl_bind_key('\t', rl_complete);
|
||||
while(true)
|
||||
{
|
||||
buf = readline(prompt);
|
||||
if(buf == NULL)
|
||||
continue;
|
||||
else
|
||||
add_history(buf);
|
||||
if(strncmp(buf, "help", 4) == 0)
|
||||
{
|
||||
if(strcmp(buf, "help") == 0)
|
||||
{
|
||||
//print commands message
|
||||
printf("help - print commands message\n");
|
||||
printf("quit - quit the console normally\n");
|
||||
printf("sparql - load query from the second argument\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
//TODO: help for a given command
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else if(strcmp(buf, "quit") == 0)
|
||||
break;
|
||||
else if(strncmp(buf, "sparql", 6) != 0)
|
||||
{
|
||||
printf("unknown commands\n");
|
||||
continue;
|
||||
}
|
||||
//TODO: sparql + string, not only path
|
||||
string query_file;
|
||||
//BETTER:build a parser for this console
|
||||
bool ifredirect = false;
|
||||
|
||||
char* rp = buf;
|
||||
int pos = strlen(buf) - 1;
|
||||
while(pos > -1)
|
||||
{
|
||||
if(*(rp+pos) == '>')
|
||||
{
|
||||
ifredirect = true;
|
||||
break;
|
||||
}
|
||||
pos--;
|
||||
}
|
||||
rp += pos;
|
||||
|
||||
char* p = buf + strlen(buf) - 1;
|
||||
FILE* fp = stdout; ///default to output on screen
|
||||
if(ifredirect)
|
||||
{
|
||||
char* tp = p;
|
||||
while(*tp == ' ' || *tp == '\t')
|
||||
tp--;
|
||||
*(tp+1) = '\0';
|
||||
tp = rp + 1;
|
||||
while(*tp == ' ' || *tp == '\t')
|
||||
tp++;
|
||||
fp = fopen(tp, "w"); //NOTICE:not judge here!
|
||||
p = rp - 1; //NOTICE: all separated with ' ' or '\t'
|
||||
}
|
||||
while(*p == ' ' || *p == '\t') //set the end of path
|
||||
p--;
|
||||
*(p+1) = '\0';
|
||||
p = buf + 6;
|
||||
while(*p == ' ' || *p == '\t') //acquire the start of path
|
||||
p++;
|
||||
//TODO: support the soft links(or hard links)
|
||||
//there are also readlink and getcwd functions for help
|
||||
//http://linux.die.net/man/2/readlink
|
||||
//NOTICE:getcwd and realpath cannot acquire the real path of file
|
||||
//in the same directory and the program is executing when the
|
||||
//system starts running
|
||||
//NOTICE: use realpath(p, NULL) is ok, but need to free the memory
|
||||
char* q = realpath(p, NULL); //QUERY:still not work for soft links
|
||||
#ifdef DEBUG_PRECISE
|
||||
printf("%s\n", p);
|
||||
#endif
|
||||
if(q == NULL)
|
||||
{
|
||||
printf("invalid path!\n");
|
||||
free(q);
|
||||
free(buf);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
printf("%s\n", q);
|
||||
//query = getQueryFromFile(p);
|
||||
query = Util::getQueryFromFile(q);
|
||||
if(query.empty())
|
||||
{
|
||||
free(q);
|
||||
//free(resolved_path);
|
||||
free(buf);
|
||||
if(ifredirect)
|
||||
fclose(fp);
|
||||
continue;
|
||||
}
|
||||
printf("query is:\n");
|
||||
printf("%s\n\n", query.c_str());
|
||||
ResultSet _rs;
|
||||
_db.query(query, _rs, fp);
|
||||
//test...
|
||||
//string answer_file = query_file+".out";
|
||||
//Util::save_to_file(answer_file.c_str(), _rs.to_str());
|
||||
free(q);
|
||||
//free(resolved_path);
|
||||
free(buf);
|
||||
if(ifredirect)
|
||||
fclose(fp);
|
||||
#ifdef DEBUG_PRECISE
|
||||
printf("after buf freed!\n");
|
||||
#endif
|
||||
}
|
||||
//#else //DEBUG:this not work!
|
||||
// while(true)
|
||||
// {
|
||||
// cout << "please input query file path:" << endl;
|
||||
// string query_file;
|
||||
// cin >> query_file;
|
||||
// //char* q = realpath(query_file.c_str(), NULL);
|
||||
// string query = getQueryFromFile(query_file.c_str());
|
||||
// if(query.empty())
|
||||
// {
|
||||
// //free(resolved_path);
|
||||
// continue;
|
||||
// }
|
||||
// cout << "query is:" << endl;
|
||||
// cout << query << endl << endl;
|
||||
// ResultSet _rs;
|
||||
// _db.query(query, _rs, stdout);
|
||||
// //free(resolved_path);
|
||||
// }
|
||||
#endif // READLINE_ON
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: gserver.cpp
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2016-02-26 19:15
|
||||
# Description: first written by hanshuo, modified by zengli
|
||||
=============================================================================*/
|
||||
|
||||
#include "../Server/Server.h"
|
||||
#include "../Util/Util.h"
|
||||
|
||||
int main(int argc, char * argv[])
|
||||
{
|
||||
//chdir(dirname(argv[0]));
|
||||
Util util;
|
||||
unsigned short port = Socket::DEFAULT_CONNECT_PORT;
|
||||
|
||||
if (argc > 1)
|
||||
{
|
||||
std::stringstream ss(argv[1]);
|
||||
ss >> port;
|
||||
}
|
||||
|
||||
Server server(port);
|
||||
|
||||
std::cout << "port=" << port << std::endl; //debug
|
||||
|
||||
server.createConnection();
|
||||
server.listen();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,111 @@
|
|||
objdir=objs/
|
||||
objfile= $(objdir)Bstr.o $(objdir)Database.o $(objdir)KVstore.o $(objdir)Btree.o \
|
||||
$(objdir)CBtreeFunc.o $(objdir)SPARQLquery.o $(objdir)BasicQuery.o $(objdir)ResultSet.o \
|
||||
$(objdir)SigEntry.o $(objdir)Signature.o $(objdir)Triple.o $(objdir)util.o $(objdir)VSTree.o \
|
||||
$(objdir)IDList.o $(objdir)EntryBuffer.o $(objdir)LRUCache.o $(objdir)VNode.o $(objdir)DBparser.o \
|
||||
$(objdir)SparqlParser.o $(objdir)SparqlLexer.o $(objdir)Operation.o $(objdir)Socket.o \
|
||||
$(objdir)Server.o $(objdir)Client.o \
|
||||
$(objdir)TurtleParser.o $(objdir)RDFParser.o
|
||||
|
||||
inc=-I./tools/libantlr3c-3.4/ -I./tools/libantlr3c-3.4/include
|
||||
|
||||
all: gload gquery gserver gclient
|
||||
|
||||
gload: $(objdir)gload.o $(objfile)
|
||||
g++ -o gload $(objdir)gload.o $(objfile) lib/libantlr.a
|
||||
|
||||
gquery: $(objdir)gquery.o $(objfile)
|
||||
g++ -o gquery $(objdir)gquery.o $(objfile) lib/libantlr.a
|
||||
|
||||
gserver: $(objdir)gserver.o $(objfile)
|
||||
g++ -o gserver $(objdir)gserver.o $(objfile) lib/libantlr.a
|
||||
|
||||
gclient: $(objdir)gclient.o $(objfile)
|
||||
g++ -o gclient $(objdir)gclient.o $(objfile) lib/libantlr.a
|
||||
|
||||
$(objdir)gload.o: main/gload.cpp
|
||||
g++ -c main/gload.cpp $(inc) -L./lib lib/libantlr.a -o $(objdir)gload.o
|
||||
|
||||
$(objdir)gquery.o: main/gquery.cpp
|
||||
g++ -c main/gquery.cpp $(inc) -o $(objdir)gquery.o
|
||||
|
||||
$(objdir)gserver.o: main/gserver.cpp
|
||||
g++ -c main/gserver.cpp $(inc) -o $(objdir)gserver.o
|
||||
|
||||
$(objdir)gclient.o: main/gclient.cpp
|
||||
g++ -c main/gclient.cpp $(inc) -o $(objdir)gclient.o
|
||||
|
||||
$(objdir)Bstr.o: Bstr/Bstr.cpp Bstr/Bstr.h
|
||||
g++ -c Bstr/Bstr.cpp $(inc) -o $(objdir)Bstr.o
|
||||
|
||||
$(objdir)Database.o: Database/Database.cpp Database/Database.h $(objdir)IDList.o $(objdir)ResultSet.o $(objdir)SPARQLquery.o \
|
||||
$(objdir)BasicQuery.o \
|
||||
$(objdir)Triple.o $(objdir)SigEntry.o $(objdir)KVstore.o $(objdir)VSTree.o $(objdir)DBparser.o $(objdir)util.o \
|
||||
$(objdir)RDFParser.o
|
||||
g++ -c Database/Database.cpp $(inc) -o $(objdir)Database.o
|
||||
|
||||
$(objdir)KVstore.o: KVstore/KVstore.cpp KVstore/KVstore.h $(objdir)Btree.o
|
||||
g++ -c KVstore/KVstore.cpp $(inc) -o $(objdir)KVstore.o
|
||||
|
||||
$(objdir)Btree.o: KVstore/Btree.cpp KVstore/Btree.h $(objdir)CBtreeFunc.o
|
||||
g++ -c KVstore/Btree.cpp -o $(objdir)Btree.o
|
||||
|
||||
$(objdir)CBtreeFunc.o: KVstore/CBtreeFunc.cpp KVstore/CBtreeH.h
|
||||
g++ -c KVstore/CBtreeFunc.cpp -o $(objdir)CBtreeFunc.o -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE
|
||||
|
||||
$(objdir)IDList.o: Query/IDList.cpp Query/IDList.h
|
||||
g++ -c Query/IDList.cpp $(inc) -o $(objdir)IDList.o
|
||||
$(objdir)SPARQLquery.o: Query/SPARQLquery.cpp Query/SPARQLquery.h $(objdir)BasicQuery.o
|
||||
g++ -c Query/SPARQLquery.cpp $(inc) -o $(objdir)SPARQLquery.o
|
||||
$(objdir)BasicQuery.o: Query/BasicQuery.cpp Query/BasicQuery.h $(objdir)Signature.o
|
||||
g++ -c Query/BasicQuery.cpp $(inc) -o $(objdir)BasicQuery.o
|
||||
$(objdir)ResultSet.o: Query/ResultSet.cpp Query/ResultSet.h
|
||||
g++ -c Query/ResultSet.cpp $(inc) -o $(objdir)ResultSet.o
|
||||
|
||||
$(objdir)SigEntry.o: Signature/SigEntry.cpp Signature/SigEntry.h $(objdir)Signature.o
|
||||
g++ -c Signature/SigEntry.cpp $(inc) -o $(objdir)SigEntry.o
|
||||
$(objdir)Signature.o: Signature/Signature.cpp Signature/Signature.h
|
||||
g++ -c Signature/Signature.cpp $(inc) -o $(objdir)Signature.o
|
||||
$(objdir)Triple.o: Triple/Triple.cpp Triple/Triple.h
|
||||
g++ -c Triple/Triple.cpp $(inc) -o $(objdir)Triple.o
|
||||
$(objdir)util.o: util/util.cpp util/util.h
|
||||
g++ -c util/util.cpp $(inc) -o $(objdir)util.o
|
||||
$(objdir)VSTree.o: VSTree/VSTree.cpp VSTree/VSTree.h $(objdir)EntryBuffer.o $(objdir)LRUCache.o $(objdir)VNode.o
|
||||
g++ -c VSTree/VSTree.cpp $(inc) -o $(objdir)VSTree.o
|
||||
$(objdir)EntryBuffer.o: VSTree/EntryBuffer.cpp VSTree/EntryBuffer.h Signature/SigEntry.h
|
||||
g++ -c VSTree/EntryBuffer.cpp $(inc) -o $(objdir)EntryBuffer.o
|
||||
$(objdir)LRUCache.o: VSTree/LRUCache.cpp VSTree/LRUCache.h VSTree/VNode.h
|
||||
g++ -c VSTree/LRUCache.cpp $(inc) -o $(objdir)LRUCache.o
|
||||
$(objdir)VNode.o: VSTree/VNode.cpp VSTree/VNode.h
|
||||
g++ -c VSTree/VNode.cpp $(inc) -o $(objdir)VNode.o
|
||||
$(objdir)DBparser.o: Parser/DBparser.cpp Parser/DBparser.h $(objdir)SparqlParser.o $(objdir)SparqlLexer.o $(objdir)Triple.o
|
||||
g++ -c Parser/DBparser.cpp $(inc) -o $(objdir)DBparser.o
|
||||
$(objdir)SparqlParser.o: Parser/SparqlParser.c Parser/SparqlParser.h
|
||||
gcc -c Parser/SparqlParser.c $(inc) -o $(objdir)SparqlParser.o
|
||||
$(objdir)SparqlLexer.o: Parser/SparqlLexer.c Parser/SparqlLexer.h
|
||||
gcc -c Parser/SparqlLexer.c $(inc) -o $(objdir)SparqlLexer.o
|
||||
|
||||
$(objdir)TurtleParser.o: Parser/TurtleParser.cpp Parser/TurtleParser.h Parser/Type.h
|
||||
gcc -c Parser/TurtleParser.cpp $(inc) -o $(objdir)TurtleParser.o
|
||||
$(objdir)RDFParser.o: Parser/RDFParser.cpp Parser/RDFParser.h $(objdir)TurtleParser.o $(objdir)Triple.o
|
||||
gcc -c Parser/RDFParser.cpp $(inc) -o $(objdir)RDFParser.o
|
||||
|
||||
$(objdir)Operation.o: Server/Operation.cpp Server/Operation.h
|
||||
g++ -c Server/Operation.cpp $(inc) -o $(objdir)Operation.o
|
||||
$(objdir)Socket.o: Server/Socket.cpp Server/Socket.h
|
||||
g++ -c Server/Socket.cpp $(inc) -o $(objdir)Socket.o
|
||||
$(objdir)Server.o: Server/Server.cpp Server/Server.h $(objdir)Socket.o $(objdir)Database.o $(objdir)Operation.o
|
||||
g++ -c Server/Server.cpp $(inc) -o $(objdir)Server.o
|
||||
$(objdir)Client.o: Server/Client.cpp Server/Client.h $(objdir)Socket.o
|
||||
g++ -c Server/Client.cpp $(inc) -o $(objdir)Client.o
|
||||
|
||||
lib_antlr:
|
||||
rm -rf tools/libantlr3c-3.4/
|
||||
cd tools; tar -zxvf libantlr3c-3.4.tar.gz;
|
||||
cd tools; cd libantlr3c-3.4/; ./configure -enable-64bit; make;
|
||||
rm -rf lib/libantlr.a
|
||||
ar -crv lib/libantlr.a tools/libantlr3c-3.4/*.o
|
||||
|
||||
clean:
|
||||
rm -rf gload gquery gserver gclient $(objdir)/*.o
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
/*
|
||||
* DBparser.cpp
|
||||
*
|
||||
* Created on: 2014-6-20
|
||||
* Author: liyouhuan
|
||||
* Created on: 2015-4-11
|
||||
* Author: cjq
|
||||
*/
|
||||
|
||||
#include "DBparser.h"
|
||||
|
@ -13,16 +13,13 @@ DBparser::DBparser()
|
|||
_prefix_map.clear();
|
||||
}
|
||||
|
||||
/* input sparql query string and parse query into SPARQLquery
|
||||
* the returned string is set for log when error happen
|
||||
* */
|
||||
string DBparser::sparqlParser(const string& _sparql, SPARQLquery& _sparql_query)
|
||||
void DBparser::sparqlParser(const std::string& _sparql, SPARQLquery& _sparql_query)
|
||||
{
|
||||
pANTLR3_INPUT_STREAM input;
|
||||
pSparqlLexer lex;
|
||||
pANTLR3_COMMON_TOKEN_STREAM tokens;
|
||||
pSparqlParser parser;
|
||||
input = antlr3StringStreamNew((ANTLR3_UINT8 *)(_sparql.c_str()),ANTLR3_ENC_UTF8,_sparql.length(),(ANTLR3_UINT8 *)"QueryString");
|
||||
input = antlr3StringStreamNew((ANTLR3_UINT8 *)(_sparql.c_str()), ANTLR3_ENC_UTF8, _sparql.length(), (ANTLR3_UINT8 *)"QueryString");
|
||||
//input = antlr3FileStreamNew((pANTLR3_UINT8)filePath,ANTLR3_ENC_8BIT);
|
||||
lex = SparqlLexerNew(input);
|
||||
|
||||
|
@ -32,285 +29,446 @@ string DBparser::sparqlParser(const string& _sparql, SPARQLquery& _sparql_query)
|
|||
|
||||
SparqlParser_workload_return r = parser->workload(parser);
|
||||
pANTLR3_BASE_TREE root = r.tree;
|
||||
//pANTLR3_BASE_TREE treeNode;
|
||||
|
||||
printNode(root);
|
||||
parseNode(root,_sparql_query,0);
|
||||
if (printNode(root) > 0) throw "Some errors are found in the SPARQL query request.";
|
||||
parseTree(root,_sparql_query);
|
||||
|
||||
printquery(_sparql_query);
|
||||
|
||||
genQueryVec(_sparql_query.getPatternGroup(), _sparql_query);
|
||||
|
||||
parser->free(parser);
|
||||
tokens->free(tokens);
|
||||
lex->free(lex);
|
||||
input->close(input);
|
||||
return "";
|
||||
}
|
||||
|
||||
/* file pointer _fp points to rdfFile
|
||||
* that was opened previously in Database::encodeRDF
|
||||
* rdfParser() will be called many times until all triples in the rdfFile is parsed
|
||||
* and after each call, a group of triples will be parsed into the vector;
|
||||
* the returned string is set for log when error happen;
|
||||
* a single line in file responds to a triple and end up with a '.'
|
||||
* tuple in a line separated by '\t'
|
||||
*/
|
||||
string DBparser::rdfParser(ifstream& _fin, Triple* _triple_array, int& _triple_num)
|
||||
int DBparser::printNode(pANTLR3_BASE_TREE node, int depth)
|
||||
{
|
||||
memset(line_buf, 0, buf_len);
|
||||
_triple_num = 0;
|
||||
int _line_len = 0;
|
||||
while(_triple_num < DBparser::TRIPLE_NUM_PER_GROUP
|
||||
&& (! _fin.eof()))
|
||||
const char* s = (const char*) node->getText(node)->chars;
|
||||
ANTLR3_UINT32 treeType = node->getType(node);
|
||||
|
||||
int hasErrorNode = 0;
|
||||
if (treeType == 0) hasErrorNode = 1;
|
||||
|
||||
for (int i=0; i < depth; i++) printf(" ");
|
||||
printf("%d: %s\n",treeType,s);
|
||||
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
||||
{
|
||||
_fin.getline(line_buf, buf_len-1);
|
||||
|
||||
_line_len = strlen(line_buf);
|
||||
|
||||
/* maybe this is an empty line at the ending of file */
|
||||
if(_line_len < 4)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
bool _end_with_dot = true;
|
||||
int _i_dot = 0;
|
||||
for(int i = _line_len-1; i >= 0; i --)
|
||||
{
|
||||
if(line_buf[i] == '.')
|
||||
{
|
||||
_i_dot = i;
|
||||
break;
|
||||
}
|
||||
if(line_buf[i] == '\t')
|
||||
{
|
||||
_end_with_dot = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* check the '.' */
|
||||
if(! _end_with_dot)
|
||||
{
|
||||
cerr << "'.' is expected at line:" << line_buf << endl;
|
||||
cerr << " line_length = " << _line_len << endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
line_buf[_i_dot+1] = '\0';
|
||||
_line_len = strlen(line_buf);
|
||||
|
||||
/* find the first TAB */
|
||||
int _first_tab = -1;
|
||||
for(int i = 0; i < _line_len; i ++)
|
||||
{
|
||||
if(line_buf[i] == '\t')
|
||||
{
|
||||
_first_tab = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(_first_tab == -1)
|
||||
{
|
||||
cerr << "First TAB is expected at line:" << line_buf << endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* find the second TAB */
|
||||
int _second_tab = -1;
|
||||
for(int i = _first_tab+1; i < _line_len; i ++)
|
||||
{
|
||||
if(line_buf[i] == '\t')
|
||||
{
|
||||
_second_tab = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(_second_tab == -1)
|
||||
{
|
||||
cerr << "Second TAB is expected at line:" << line_buf << endl;
|
||||
}
|
||||
|
||||
/* get sub, pre, obj and add new triple */
|
||||
{
|
||||
string _line = string(line_buf);
|
||||
int _sub_size = _first_tab - 0;
|
||||
_triple_array[_triple_num].subject = _line.substr(0, _sub_size);
|
||||
|
||||
int _pre_size = _second_tab - (_first_tab+1);
|
||||
_triple_array[_triple_num].predicate = _line.substr(_first_tab+1, _pre_size);
|
||||
|
||||
/* (_line_len-1) make sure that '.' is not included */
|
||||
int _obj_size = (_line_len-1) - (_second_tab+1);
|
||||
_triple_array[_triple_num].object = _line.substr(_second_tab+1, _obj_size);
|
||||
}
|
||||
_triple_num ++;
|
||||
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
|
||||
int childNodeType = childNode->getType(childNode);
|
||||
hasErrorNode += printNode(childNode, depth+1);
|
||||
}
|
||||
return "";
|
||||
return hasErrorNode;
|
||||
}
|
||||
|
||||
/*
|
||||
* used in readline of FILE, avoiding new memory each time
|
||||
*/
|
||||
char* DBparser::line_buf = new char[100*1000];
|
||||
int DBparser::buf_len = 100*1000;
|
||||
|
||||
int DBparser::parseString(pANTLR3_BASE_TREE node,std::string& str,int depth){
|
||||
const char* s =(const char*) node->getText(node)->chars;
|
||||
//std::cout<<"parseString: "<<s<<std::endl;
|
||||
if (depth==0){
|
||||
str = s;
|
||||
void DBparser::parseTree(pANTLR3_BASE_TREE node, SPARQLquery& query)
|
||||
{
|
||||
printf("parseTree\n");
|
||||
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, j);
|
||||
|
||||
//prologue 144
|
||||
if (childNode->getType(childNode) == 144)
|
||||
{
|
||||
parsePrologue(childNode);
|
||||
}
|
||||
else
|
||||
//select clause 156
|
||||
if (childNode->getType(childNode) == 156)
|
||||
{
|
||||
parseSelectClause(childNode, query);
|
||||
}
|
||||
else
|
||||
//group graph pattern 77
|
||||
if (childNode->getType(childNode) == 77)
|
||||
{
|
||||
parseGroupPattern(childNode, query.getPatternGroup());
|
||||
}
|
||||
else parseTree(childNode, query);
|
||||
}
|
||||
else{
|
||||
parseString((pANTLR3_BASE_TREE) node->getChild(node,0),str,depth-1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
void DBparser::parsePrologue(pANTLR3_BASE_TREE node)
|
||||
{
|
||||
printf("parsePrologue\n");
|
||||
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, j);
|
||||
|
||||
//prefix 143
|
||||
if (childNode->getType(childNode) == 143)
|
||||
{
|
||||
parsePrefix(childNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
void DBparser::parsePrefix(pANTLR3_BASE_TREE node)
|
||||
{
|
||||
printf("parsePrefix\n");
|
||||
|
||||
int DBparser::parsePrefix(pANTLR3_BASE_TREE node,std::pair<std::string,std::string>& prefixPair){
|
||||
//const char* s =(const char*) node->getText(node)->chars;
|
||||
std::string key;
|
||||
std::string value;
|
||||
for (unsigned int j=0;j<node->getChildCount(node);j++){
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node,j);
|
||||
|
||||
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
|
||||
//prefix key string 136
|
||||
if (childNode->getType(childNode)==136){
|
||||
parseString(childNode,key);
|
||||
if (childNode->getType(childNode) == 136)
|
||||
{
|
||||
parseString(childNode, key);
|
||||
}
|
||||
|
||||
//prefix value URL 89
|
||||
if (childNode->getType(childNode)==89){
|
||||
parseString(childNode,value);
|
||||
if (childNode->getType(childNode) == 89)
|
||||
{
|
||||
parseString(childNode, value);
|
||||
}
|
||||
}
|
||||
prefixPair = make_pair(key,value);
|
||||
return 0;
|
||||
_prefix_map.insert(make_pair(key, value));
|
||||
}
|
||||
|
||||
void DBparser::replacePrefix(string& str){
|
||||
if (str[0]!='<'){
|
||||
void DBparser::replacePrefix(std::string& str)
|
||||
{
|
||||
if (str[0] != '<' && str[0] != '\"' && str[0] != '?')
|
||||
{
|
||||
int sep=str.find(":");
|
||||
std::string prefix=str.substr(0,sep+1);
|
||||
std::cout<<"prefix: "<<prefix<<std::endl;
|
||||
if (_prefix_map.find(prefix)!=_prefix_map.end()){
|
||||
str=_prefix_map[prefix].substr(0,_prefix_map[prefix].length()-1)+str.substr(sep+1,str.length()-sep-1)+">";
|
||||
std::cout<<"str: "<<str<<std::endl;
|
||||
if (sep == -1) return;
|
||||
std::string prefix=str.substr(0, sep+1);
|
||||
std::cout << "prefix: " << prefix << std::endl;
|
||||
if (_prefix_map.find(prefix) != _prefix_map.end())
|
||||
{
|
||||
str=_prefix_map[prefix].substr(0, _prefix_map[prefix].length() - 1) + str.substr(sep + 1 ,str.length() - sep - 1) + ">";
|
||||
std::cout << "str: " << str << std::endl;
|
||||
}
|
||||
else{
|
||||
std::cout<<"prefix not found..."<<std::endl;
|
||||
else
|
||||
{
|
||||
std::cout << "prefix not found..." << std::endl;
|
||||
throw "Some errors are found in the SPARQL query request.";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int DBparser::parseTriple(pANTLR3_BASE_TREE node,Triple& triple){
|
||||
//const char* s =(const char*) node->getText(node)->chars;
|
||||
std::string subject="";
|
||||
std::string predicate="";
|
||||
std::string object="";
|
||||
for (unsigned int j=0;j<node->getChildCount(node);j++){
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node,j);
|
||||
void DBparser::parseSelectClause(pANTLR3_BASE_TREE node, SPARQLquery& query)
|
||||
{
|
||||
printf("parseSelectClause\n");
|
||||
|
||||
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, j);
|
||||
|
||||
//var 199
|
||||
if (childNode->getType(childNode) == 199)
|
||||
{
|
||||
parseSelectVar(childNode, query);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DBparser::parseSelectVar(pANTLR3_BASE_TREE node, SPARQLquery& query)
|
||||
{
|
||||
printf("parseSelectVar\n");
|
||||
|
||||
std::string var = "";
|
||||
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
|
||||
|
||||
if (childNode->getType(childNode) == 200)
|
||||
{
|
||||
parseString(childNode,var);
|
||||
query.addOneProjection(var);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DBparser::parseGroupPattern(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup)
|
||||
{
|
||||
printf("parseGroupPattern\n");
|
||||
|
||||
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
|
||||
|
||||
//triples same subject 185
|
||||
if (childNode->getType(childNode) == 185)
|
||||
{
|
||||
parsePattern(childNode, patterngroup);
|
||||
}
|
||||
|
||||
//optional 124
|
||||
if (childNode->getType(childNode) == 124)
|
||||
{
|
||||
parseOptional(childNode, patterngroup);
|
||||
}
|
||||
|
||||
//union 195
|
||||
if (childNode->getType(childNode) == 195)
|
||||
{
|
||||
patterngroup.addOneGroupUnion();
|
||||
parseUnion(childNode, patterngroup);
|
||||
}
|
||||
|
||||
//filter 67
|
||||
if (childNode->getType(childNode) == 67)
|
||||
{
|
||||
parseFilter(childNode, patterngroup);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DBparser::parsePattern(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup)
|
||||
{
|
||||
printf("parsePattern\n");
|
||||
|
||||
std::string subject = "";
|
||||
std::string predicate = "";
|
||||
std::string object = "";
|
||||
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
|
||||
|
||||
//subject 177
|
||||
if (childNode->getType(childNode)==177){
|
||||
parseString(childNode,subject,1);
|
||||
if (childNode->getType(childNode) == 177)
|
||||
{
|
||||
parseString(childNode, subject, 1);
|
||||
replacePrefix(subject);
|
||||
}
|
||||
|
||||
//predicate 142
|
||||
if (childNode->getType(childNode)==142){
|
||||
parseString(childNode,predicate,4);
|
||||
if (childNode->getType(childNode) == 142)
|
||||
{
|
||||
parseString(childNode, predicate, 4);
|
||||
replacePrefix(predicate);
|
||||
}
|
||||
|
||||
//object 119
|
||||
if (childNode->getType(childNode)==119){
|
||||
parseString(childNode,object,1);
|
||||
if (childNode->getType(childNode) == 119)
|
||||
{
|
||||
parseString(childNode, object, 1);
|
||||
replacePrefix(object);
|
||||
}
|
||||
}
|
||||
triple=Triple(subject,predicate,object);
|
||||
std::cout<<"Triple: \n\ts|"<<subject<<"|\n\tp|"<<predicate<<"|\n\to|"<<object<<"|"<<std::endl;
|
||||
return 0;
|
||||
patterngroup.addOnePattern(SPARQLquery::Pattern(SPARQLquery::Element(subject), SPARQLquery::Element(predicate), SPARQLquery::Element(object)));
|
||||
}
|
||||
|
||||
int DBparser::parseBasicQuery(pANTLR3_BASE_TREE node,BasicQuery& basicQuery){
|
||||
//const char* s =(const char*) node->getText(node)->chars;
|
||||
Triple triple;
|
||||
for (unsigned int j=0;j<node->getChildCount(node);j++){
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node,j);
|
||||
//basicQuery 185
|
||||
std::cout<<"Child type: "<<childNode->getType(childNode)<<endl;
|
||||
if (childNode->getType(childNode)==185){
|
||||
parseTriple(childNode,triple);
|
||||
basicQuery.addTriple(triple);
|
||||
}
|
||||
if (childNode->getType(childNode)==195){
|
||||
//Union part here!!
|
||||
//parseUnion(childNode,U);
|
||||
//basicQuery.addTriple(triple);
|
||||
void DBparser::parseOptional(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup)
|
||||
{
|
||||
printf("parseOptional\n");
|
||||
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
|
||||
|
||||
//group graph pattern 77
|
||||
if (childNode->getType(childNode) == 77)
|
||||
{
|
||||
patterngroup.addOneOptional();
|
||||
parseGroupPattern(childNode, patterngroup.getLastOptional());
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int DBparser::parseVar(pANTLR3_BASE_TREE node,SPARQLquery& query){
|
||||
//const char* s =(const char*) node->getText(node)->chars;
|
||||
std::string var="";
|
||||
for (unsigned int j=0;j<node->getChildCount(node);j++){
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node,j);
|
||||
//var 200
|
||||
if (childNode->getType(childNode)==200){
|
||||
parseString(childNode,var,0);
|
||||
query.addQueryVar(var);
|
||||
void DBparser::parseUnion(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup)
|
||||
{
|
||||
printf("parseUnion\n");
|
||||
|
||||
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
|
||||
|
||||
//group graph pattern 77
|
||||
if (childNode->getType(childNode) == 77)
|
||||
{
|
||||
patterngroup.addOneUnion();
|
||||
parseGroupPattern(childNode, patterngroup.getLastUnion());
|
||||
}
|
||||
|
||||
//union 195
|
||||
if (childNode->getType(childNode) == 195)
|
||||
{
|
||||
parseUnion(childNode, patterngroup);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int DBparser::parseNode(pANTLR3_BASE_TREE node, SPARQLquery& query,int depth){
|
||||
const char* s =(const char*) node->getText(node)->chars;
|
||||
ANTLR3_UINT32 treeType = node->getType(node);
|
||||
void DBparser::parseFilter(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup)
|
||||
{
|
||||
printf("parseFilter\n");
|
||||
|
||||
for (int i=0;i<depth;i++){
|
||||
printf(" ");
|
||||
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
|
||||
|
||||
patterngroup.addOneFilterTree();
|
||||
parseFilterTree(childNode, patterngroup.getLastFilterTree());
|
||||
}
|
||||
printf("%d: %s\n",treeType,s);
|
||||
}
|
||||
|
||||
for (unsigned int j=0;j<node->getChildCount(node);j++){
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node,j);
|
||||
int childNodeType = childNode->getType(childNode);
|
||||
switch (childNodeType){
|
||||
//prefix
|
||||
case 199:{
|
||||
parseVar(childNode,query);
|
||||
break;
|
||||
void DBparser::parseFilterTree(pANTLR3_BASE_TREE node, SPARQLquery::FilterTree& filter)
|
||||
{
|
||||
printf("parseFilterTree\n");
|
||||
|
||||
//not 192
|
||||
if (node->getType(node) == 192) filter.type = SPARQLquery::FilterTree::Not;
|
||||
//or 125
|
||||
if (node->getType(node) == 125) filter.type = SPARQLquery::FilterTree::Or;
|
||||
//and 8
|
||||
if (node->getType(node) == 8) filter.type = SPARQLquery::FilterTree::And;
|
||||
//equal 62
|
||||
if (node->getType(node) == 62) filter.type = SPARQLquery::FilterTree::Equal;
|
||||
//not equal 116
|
||||
if (node->getType(node) == 116) filter.type = SPARQLquery::FilterTree::NotEqual;
|
||||
//less 100
|
||||
if (node->getType(node) == 100) filter.type = SPARQLquery::FilterTree::Less;
|
||||
//less equal 101
|
||||
if (node->getType(node) == 101) filter.type = SPARQLquery::FilterTree::LessOrEqual;
|
||||
//greater 72
|
||||
if (node->getType(node) == 72) filter.type = SPARQLquery::FilterTree::Greater;
|
||||
//greater equal 73
|
||||
if (node->getType(node) == 73) filter.type = SPARQLquery::FilterTree::GreaterOrEqual;
|
||||
|
||||
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
|
||||
|
||||
//unary 190
|
||||
if (childNode->getType(childNode) == 190)
|
||||
if (j == 0)
|
||||
{
|
||||
parseString(childNode, filter.arg1, 1);
|
||||
replacePrefix(filter.arg1);
|
||||
}
|
||||
//var
|
||||
case 143:{
|
||||
std::pair<std::string,std::string> prefixPair;
|
||||
parsePrefix(childNode,prefixPair);
|
||||
_prefix_map.insert(prefixPair);
|
||||
break;
|
||||
else
|
||||
{
|
||||
parseString(childNode, filter.arg2, 1);
|
||||
replacePrefix(filter.arg2);
|
||||
}
|
||||
//BasicQuery
|
||||
case 77:{
|
||||
BasicQuery* basicQuery=new BasicQuery();
|
||||
parseBasicQuery(childNode,*basicQuery);
|
||||
query.addBasicQuery(basicQuery);
|
||||
break;
|
||||
else
|
||||
if (j == 0)
|
||||
{
|
||||
filter.parg1 = new SPARQLquery::FilterTree();
|
||||
parseFilterTree(childNode, *filter.parg1);
|
||||
}
|
||||
default:
|
||||
parseNode(childNode,query,depth+1);
|
||||
else
|
||||
{
|
||||
filter.parg2 = new SPARQLquery::FilterTree();
|
||||
parseFilterTree(childNode, *filter.parg2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DBparser::parseString(pANTLR3_BASE_TREE node, std::string& str, int depth)
|
||||
{
|
||||
while (depth > 0 && node != NULL)
|
||||
{
|
||||
node = (pANTLR3_BASE_TREE) node->getChild(node, 0);
|
||||
depth--;
|
||||
}
|
||||
if (node != NULL)
|
||||
str = (const char*) node->getText(node)->chars;
|
||||
else
|
||||
throw "Some errors are found in the SPARQL query request.";
|
||||
}
|
||||
|
||||
|
||||
void DBparser::printquery(SPARQLquery& query)
|
||||
{
|
||||
std::vector <std::string> &varvec = query.getProjections();
|
||||
printf("===========================================================================\n");
|
||||
printf("var is :");
|
||||
for (int i = 0; i < (int)varvec.size(); i++)
|
||||
printf("%s\t", varvec[i].c_str());
|
||||
printf("\n");
|
||||
printgrouppattern(query.getPatternGroup(), 0);
|
||||
printf("===========================================================================\n");
|
||||
}
|
||||
|
||||
void DBparser::printgrouppattern(SPARQLquery::PatternGroup &pg, int dep)
|
||||
{
|
||||
for (int j = 0; j < dep; j++) printf("\t"); printf("{\n");
|
||||
for (int j = 0; j < dep; j++) printf("\t"); printf("pattern:\n");
|
||||
for(int i = 0; i < pg.patterns.size(); i++)
|
||||
{
|
||||
for (int j = 0; j < dep; j++) printf("\t");
|
||||
printf("\t%s\t%s\t%s\n", pg.patterns[i].subject.value.c_str(), pg.patterns[i].predicate.value.c_str(), pg.patterns[i].object.value.c_str());
|
||||
}
|
||||
|
||||
if (pg.optionals.size() > 0)
|
||||
{
|
||||
for (int j = 0; j < dep; j++) printf("\t"); printf("optional:\n");
|
||||
for (int i = 0; i < pg.optionals.size(); i++)
|
||||
printgrouppattern(pg.optionals[i], dep + 1);
|
||||
}
|
||||
|
||||
for (int i = 0; i < pg.unions.size(); i++)
|
||||
{
|
||||
for (int j = 0; j < dep; j++) printf("\t"); printf("union %d:\n", i + 1);
|
||||
for (int k = 0; k < pg.unions[i].size(); k++)
|
||||
printgrouppattern(pg.unions[i][k], dep + 1);
|
||||
}
|
||||
|
||||
if (pg.filters.size() > 0)
|
||||
{
|
||||
for (int j = 0; j < dep; j++) printf("\t"); printf("filter:\n");
|
||||
for (int i = 0; i < pg.filters.size(); i++)
|
||||
{
|
||||
for (int j = 0; j <= dep; j++) printf("\t");
|
||||
printfilter(pg.filters[i]);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
for (int j = 0; j < dep; j++) printf("\t"); printf("}\n");
|
||||
}
|
||||
|
||||
void DBparser::printNode(pANTLR3_BASE_TREE node, int depth){
|
||||
const char* s =(const char*) node->getText(node)->chars;
|
||||
ANTLR3_UINT32 treeType = node->getType(node);
|
||||
void DBparser::printfilter(SPARQLquery::FilterTree &ft)
|
||||
{
|
||||
printf("(");
|
||||
|
||||
for (int i=0;i<depth;i++){
|
||||
printf(" ");
|
||||
}
|
||||
printf("%d: %s\n",treeType,s);
|
||||
for (unsigned int j=0;j<node->getChildCount(node);j++){
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node,j);
|
||||
//int childNodeType = childNode->getType(childNode);
|
||||
printNode(childNode,depth+1);
|
||||
}
|
||||
if (ft.type == SPARQLquery::FilterTree::Not) printf("!");
|
||||
|
||||
if (ft.parg1 == NULL) printf("%s", ft.arg1.c_str());
|
||||
else printfilter(*ft.parg1);
|
||||
if (ft.type == SPARQLquery::FilterTree::Or) printf("||");
|
||||
if (ft.type == SPARQLquery::FilterTree::And) printf("&&");
|
||||
if (ft.type == SPARQLquery::FilterTree::Equal) printf("=");
|
||||
if (ft.type == SPARQLquery::FilterTree::NotEqual) printf("!=");
|
||||
if (ft.type == SPARQLquery::FilterTree::Less) printf("<");
|
||||
if (ft.type == SPARQLquery::FilterTree::LessOrEqual) printf("<=");
|
||||
if (ft.type == SPARQLquery::FilterTree::Greater) printf(">");
|
||||
if (ft.type == SPARQLquery::FilterTree::GreaterOrEqual) printf(">=");
|
||||
|
||||
|
||||
if (ft.type != SPARQLquery::FilterTree::Not)
|
||||
if (ft.parg2 == NULL) printf("%s", ft.arg2.c_str());
|
||||
else printfilter(*ft.parg2);
|
||||
printf(")");
|
||||
}
|
||||
|
||||
void DBparser::genQueryVec(SPARQLquery::PatternGroup &pg, SPARQLquery& query)
|
||||
{
|
||||
if (pg.hasVar)
|
||||
{
|
||||
query.addBasicQuery();
|
||||
query.addQueryVarVec();
|
||||
|
||||
for(int i = 0; i < pg.patterns.size(); i++)
|
||||
{
|
||||
string &sub = pg.patterns[i].subject.value;
|
||||
string &pre = pg.patterns[i].predicate.value;
|
||||
string &obj = pg.patterns[i].object.value;
|
||||
query.addTriple(Triple(sub, pre, obj));
|
||||
|
||||
if (sub[0] == '?') query.addQueryVar(sub);
|
||||
if (obj[0] == '?') query.addQueryVar(obj);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < pg.unions.size(); i++)
|
||||
for (int j = 0; j < pg.unions[i].size(); j++)
|
||||
genQueryVec(pg.unions[i][j], query);
|
||||
|
||||
for (int i = 0; i < pg.optionals.size(); i++)
|
||||
genQueryVec(pg.optionals[i], query);
|
||||
}
|
||||
|
|
|
@ -1,62 +1,52 @@
|
|||
/*
|
||||
* DBparser.h
|
||||
*
|
||||
* Created on: 2014-6-20
|
||||
* Author: liyouhuan
|
||||
* Created on: 2015-4-11
|
||||
* Author: cjq
|
||||
*/
|
||||
|
||||
#ifndef DBPARSER_H_
|
||||
#define DBPARSER_H_
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Util/Triple.h"
|
||||
#include<string>
|
||||
#include<vector>
|
||||
#include<map>
|
||||
#include<fstream>
|
||||
#include<stdio.h>
|
||||
#include "../Query/SPARQLquery.h"
|
||||
#include "SparqlParser.h"
|
||||
#include "SparqlLexer.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
class DBparser{
|
||||
private:
|
||||
|
||||
void printNode(pANTLR3_BASE_TREE node,int depth=0);
|
||||
int parseNode(pANTLR3_BASE_TREE node, SPARQLquery& query,int depth=0);
|
||||
int parsePrefix(pANTLR3_BASE_TREE node,std::pair<std::string,std::string>& prefixPair);
|
||||
int parseString(pANTLR3_BASE_TREE node,std::string& str,int depth=0);
|
||||
int parseTriple(pANTLR3_BASE_TREE node,Triple& triple);
|
||||
int parseBasicQuery(pANTLR3_BASE_TREE node,BasicQuery& basicQuery);
|
||||
int parseVar(pANTLR3_BASE_TREE node,SPARQLquery& query);
|
||||
void replacePrefix(string& str);
|
||||
int printNode(pANTLR3_BASE_TREE node,int depth=0);
|
||||
|
||||
void parseTree(pANTLR3_BASE_TREE node, SPARQLquery& query);
|
||||
void parsePrologue(pANTLR3_BASE_TREE node);
|
||||
void parsePrefix(pANTLR3_BASE_TREE node);
|
||||
void replacePrefix(std::string& str);
|
||||
void parseSelectClause(pANTLR3_BASE_TREE node, SPARQLquery& query);
|
||||
void parseSelectVar(pANTLR3_BASE_TREE node, SPARQLquery& query);
|
||||
void parseGroupPattern(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup);
|
||||
void parsePattern(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup);
|
||||
void parseOptional(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup);
|
||||
void parseUnion(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup);
|
||||
void parseFilter(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup);
|
||||
void parseFilterTree(pANTLR3_BASE_TREE node, SPARQLquery::FilterTree& filter);
|
||||
void parseString(pANTLR3_BASE_TREE node, std::string& str, int depth=0);
|
||||
|
||||
void printquery(SPARQLquery& query);
|
||||
void printgrouppattern(SPARQLquery::PatternGroup &pg, int dep = 0);
|
||||
void printfilter(SPARQLquery::FilterTree &ft);
|
||||
|
||||
void genQueryVec(SPARQLquery::PatternGroup &pg, SPARQLquery& query);
|
||||
std::map<std::string,std::string> _prefix_map;
|
||||
|
||||
public:
|
||||
/* how many triples at most will be parsed out when call rdfParser() once
|
||||
* when -1, it means parse all triples in the file into the triples set(array)
|
||||
* */
|
||||
static const int TRIPLE_NUM_PER_GROUP = 10*1000*1000;
|
||||
|
||||
DBparser();
|
||||
|
||||
/* input sparql query string and parse query into SPARQLquery
|
||||
* the returned string is set for log when error happen */
|
||||
string sparqlParser(const string& _sparql, SPARQLquery& _sparql_query);
|
||||
|
||||
/* file stream _fin points to rdfFile
|
||||
* that was opened previously in Database::encodeRDF
|
||||
* rdfParser() will be called many times until all triples in the rdfFile is parsed
|
||||
* and after each call, a group of triples will be parsed into the vector;
|
||||
* the returned string is set for log when error happen;
|
||||
*/
|
||||
string rdfParser(ifstream& _fin, Triple* _triple_array, int& _triple_num);
|
||||
|
||||
private:
|
||||
/*
|
||||
* used in readline of FILE, avoiding new memory each time
|
||||
*/
|
||||
static char* line_buf;
|
||||
static int buf_len;
|
||||
void sparqlParser(const std::string& _sparql, SPARQLquery& _sparql_query);
|
||||
};
|
||||
|
||||
|
||||
#endif /* DBPARSER_H_ */
|
||||
|
|
|
@ -1,661 +0,0 @@
|
|||
/*
|
||||
* QueryParser.cpp
|
||||
*
|
||||
* Created on: 2015-4-11
|
||||
* Author: cjq
|
||||
*/
|
||||
|
||||
#include "QueryParser.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
QueryParser::QueryParser()
|
||||
{
|
||||
_prefix_map.clear();
|
||||
}
|
||||
|
||||
void QueryParser::sparqlParser(const string& query, QueryTree& querytree)
|
||||
{
|
||||
//uncompress before use
|
||||
dfa34_Table_uncompress();
|
||||
|
||||
pANTLR3_INPUT_STREAM input;
|
||||
pSparqlLexer lex;
|
||||
pANTLR3_COMMON_TOKEN_STREAM tokens;
|
||||
pSparqlParser parser;
|
||||
input = antlr3StringStreamNew((ANTLR3_UINT8 *)(query.c_str()), ANTLR3_ENC_UTF8, query.length(), (ANTLR3_UINT8 *)"QueryString");
|
||||
//input = antlr3FileStreamNew((pANTLR3_UINT8)filePath,ANTLR3_ENC_8BIT);
|
||||
lex = SparqlLexerNew(input);
|
||||
|
||||
tokens = antlr3CommonTokenStreamSourceNew(ANTLR3_SIZE_HINT,TOKENSOURCE(lex));
|
||||
parser = SparqlParserNew(tokens);
|
||||
|
||||
SparqlParser_workload_return r = parser->workload(parser);
|
||||
pANTLR3_BASE_TREE root = r.tree;
|
||||
|
||||
if (printNode(root) > 0) throw "Some errors are found in the SPARQL query request.";
|
||||
|
||||
parseTree(root, querytree);
|
||||
|
||||
querytree.print();
|
||||
|
||||
parser->free(parser);
|
||||
tokens->free(tokens);
|
||||
lex->free(lex);
|
||||
input->close(input);
|
||||
}
|
||||
|
||||
int QueryParser::printNode(pANTLR3_BASE_TREE node, int dep)
|
||||
{
|
||||
const char* s = (const char*) node->getText(node)->chars;
|
||||
ANTLR3_UINT32 treeType = node->getType(node);
|
||||
|
||||
int hasErrorNode = 0;
|
||||
if (treeType == 0) hasErrorNode = 1;
|
||||
|
||||
for (int i=0; i < dep; i++) printf(" ");
|
||||
printf("%d: %s\n",treeType,s);
|
||||
|
||||
for (unsigned int i = 0; i < node->getChildCount(node); i++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
|
||||
hasErrorNode += printNode(childNode, dep + 1);
|
||||
}
|
||||
return hasErrorNode;
|
||||
}
|
||||
|
||||
void QueryParser::parseTree(pANTLR3_BASE_TREE node, QueryTree& querytree)
|
||||
{
|
||||
printf("parseTree\n");
|
||||
|
||||
for (unsigned int i = 0; i < node->getChildCount(node); i++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, i);
|
||||
|
||||
//prologue 144
|
||||
if (childNode->getType(childNode) == 144)
|
||||
{
|
||||
parsePrologue(childNode);
|
||||
}
|
||||
else
|
||||
//select 155
|
||||
if (childNode->getType(childNode) == 155)
|
||||
{
|
||||
querytree.setQueryForm(QueryTree::Select_Query);
|
||||
parseTree(childNode, querytree);
|
||||
}
|
||||
else
|
||||
//ask 13
|
||||
if (childNode->getType(childNode) == 13)
|
||||
{
|
||||
querytree.setQueryForm(QueryTree::Ask_Query);
|
||||
parseTree(childNode, querytree);
|
||||
}
|
||||
else
|
||||
//select clause 156
|
||||
if (childNode->getType(childNode) == 156)
|
||||
{
|
||||
parseSelectClause(childNode, querytree);
|
||||
}
|
||||
else
|
||||
//group graph pattern 77
|
||||
if (childNode->getType(childNode) == 77)
|
||||
{
|
||||
parseGroupPattern(childNode, querytree.getGroupPattern());
|
||||
}
|
||||
else
|
||||
//order by 127
|
||||
if (childNode->getType(childNode) == 127)
|
||||
{
|
||||
parseOrderBy(childNode, querytree);
|
||||
}
|
||||
else
|
||||
//offset 120 limit 102
|
||||
if (childNode->getType(childNode) == 120 || childNode->getType(childNode) == 102)
|
||||
{
|
||||
pANTLR3_BASE_TREE gchildNode=(pANTLR3_BASE_TREE) childNode->getChild(childNode, 0);
|
||||
|
||||
//integer 83
|
||||
if (gchildNode->getType(gchildNode) == 83)
|
||||
{
|
||||
string str;
|
||||
parseString(gchildNode, str, 0);
|
||||
|
||||
stringstream str2int;
|
||||
|
||||
int num;
|
||||
|
||||
str2int << str;
|
||||
str2int >> num;
|
||||
|
||||
if (childNode->getType(childNode) == 120 && num >= 0)
|
||||
querytree.setOffset(num);
|
||||
if (childNode->getType(childNode) == 102 && num >= 0)
|
||||
querytree.setLimit(num);
|
||||
}
|
||||
}
|
||||
else parseTree(childNode, querytree);
|
||||
}
|
||||
}
|
||||
|
||||
void QueryParser::parsePrologue(pANTLR3_BASE_TREE node)
|
||||
{
|
||||
printf("parsePrologue\n");
|
||||
|
||||
for (unsigned int i = 0; i < node->getChildCount(node); i++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, i);
|
||||
|
||||
//prefix 143
|
||||
if (childNode->getType(childNode) == 143)
|
||||
parsePrefix(childNode);
|
||||
}
|
||||
}
|
||||
|
||||
void QueryParser::parsePrefix(pANTLR3_BASE_TREE node)
|
||||
{
|
||||
printf("parsePrefix\n");
|
||||
|
||||
string key;
|
||||
string value;
|
||||
|
||||
for (unsigned int i = 0; i < node->getChildCount(node); i++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
|
||||
//prefix namespace 136
|
||||
if (childNode->getType(childNode) == 136)
|
||||
parseString(childNode, key, 0);
|
||||
|
||||
//prefix IRI 89
|
||||
if (childNode->getType(childNode) == 89)
|
||||
parseString(childNode, value, 0);
|
||||
}
|
||||
_prefix_map.insert(make_pair(key, value));
|
||||
}
|
||||
|
||||
void QueryParser::replacePrefix(string& str)
|
||||
{
|
||||
if (str[0] != '<' && str[0] != '\"' && str[0] != '?')
|
||||
{
|
||||
int sep=str.find(":");
|
||||
if (sep == -1) return;
|
||||
string prefix=str.substr(0, sep + 1);
|
||||
|
||||
//blank node
|
||||
if (prefix == "_:") return;
|
||||
|
||||
cout << "prefix: " << prefix << endl;
|
||||
if (_prefix_map.find(prefix) != _prefix_map.end())
|
||||
{
|
||||
str=_prefix_map[prefix].substr(0, _prefix_map[prefix].length() - 1) + str.substr(sep + 1 ,str.length() - sep - 1) + ">";
|
||||
cout << "str: " << str << endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "prefix not found..." << endl;
|
||||
throw "Some errors are found in the SPARQL query request.";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void QueryParser::parseSelectClause(pANTLR3_BASE_TREE node, QueryTree& querytree)
|
||||
{
|
||||
printf("parseSelectClause\n");
|
||||
|
||||
for (unsigned int i = 0; i < node->getChildCount(node); i++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, i);
|
||||
|
||||
//distinct 52
|
||||
if (childNode->getType(childNode) == 52)
|
||||
querytree.setProjectionModifier(QueryTree::Modifier_Distinct);
|
||||
|
||||
//var 199
|
||||
if (childNode->getType(childNode) == 199)
|
||||
parseSelectVar(childNode, querytree);
|
||||
|
||||
//asterisk 14
|
||||
if (childNode->getType(childNode) == 14)
|
||||
querytree.setProjectionAsterisk();
|
||||
}
|
||||
}
|
||||
|
||||
void QueryParser::parseSelectVar(pANTLR3_BASE_TREE node, QueryTree& querytree)
|
||||
{
|
||||
printf("parseSelectVar\n");
|
||||
|
||||
string var = "";
|
||||
for (unsigned int i = 0; i < node->getChildCount(node); i++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
|
||||
|
||||
if (childNode->getType(childNode) == 200)
|
||||
{
|
||||
parseString(childNode, var, 0);
|
||||
querytree.addProjectionVar(var);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void QueryParser::parseGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern)
|
||||
{
|
||||
printf("parseGroupPattern\n");
|
||||
|
||||
for (unsigned int i = 0; i < node->getChildCount(node); i++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
|
||||
|
||||
//triples same subject 185
|
||||
if (childNode->getType(childNode) == 185)
|
||||
{
|
||||
parsePattern(childNode, grouppattern);
|
||||
}
|
||||
|
||||
//optional 124 minus 108
|
||||
if (childNode->getType(childNode) == 124 || childNode->getType(childNode) == 108)
|
||||
{
|
||||
parseOptionalOrMinus(childNode, grouppattern);
|
||||
}
|
||||
|
||||
//union 195
|
||||
if (childNode->getType(childNode) == 195)
|
||||
{
|
||||
grouppattern.addOneGroupUnion();
|
||||
parseUnion(childNode, grouppattern);
|
||||
}
|
||||
|
||||
//filter 67
|
||||
if (childNode->getType(childNode) == 67)
|
||||
{
|
||||
parseFilter(childNode, grouppattern);
|
||||
}
|
||||
|
||||
//group graph pattern 77
|
||||
//redundant {}
|
||||
if (childNode->getType(childNode) == 77)
|
||||
{
|
||||
parseGroupPattern(childNode, grouppattern);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void QueryParser::parsePattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern)
|
||||
{
|
||||
printf("parsePattern\n");
|
||||
|
||||
string subject = "";
|
||||
string predicate = "";
|
||||
string object = "";
|
||||
for (unsigned int i = 0; i < node->getChildCount(node); i++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
|
||||
|
||||
//subject 177
|
||||
if (childNode->getType(childNode) == 177)
|
||||
{
|
||||
parseString(childNode, subject, 1);
|
||||
replacePrefix(subject);
|
||||
}
|
||||
|
||||
//predicate 142
|
||||
if (childNode->getType(childNode) == 142)
|
||||
{
|
||||
pANTLR3_BASE_TREE gchildNode=(pANTLR3_BASE_TREE) childNode->getChild(childNode, 0);
|
||||
//var 200
|
||||
if (gchildNode->getType(gchildNode) == 200)
|
||||
{
|
||||
parseString(childNode, predicate, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
parseString(childNode, predicate, 4);
|
||||
}
|
||||
replacePrefix(predicate);
|
||||
}
|
||||
|
||||
//object 119
|
||||
if (childNode->getType(childNode) == 119)
|
||||
{
|
||||
parseString(childNode, object, 1);
|
||||
replacePrefix(object);
|
||||
}
|
||||
|
||||
if (i != 0 && i % 2 == 0) //triples same subject
|
||||
{
|
||||
grouppattern.addOnePattern(QueryTree::GroupPattern::Pattern( QueryTree::GroupPattern::Pattern::Element(subject),
|
||||
QueryTree::GroupPattern::Pattern::Element(predicate),
|
||||
QueryTree::GroupPattern::Pattern::Element(object)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void QueryParser::parseOptionalOrMinus(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern)
|
||||
{
|
||||
//optional 124 minus 108
|
||||
if (node->getType(node) == 124)
|
||||
printf("parseOptional\n");
|
||||
else if (node->getType(node) == 108)
|
||||
printf("parseMinus\n");
|
||||
|
||||
for (unsigned int i = 0; i < node->getChildCount(node); i++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
|
||||
|
||||
//group graph pattern 77
|
||||
if (childNode->getType(childNode) == 77)
|
||||
{
|
||||
if (node->getType(node) == 124)
|
||||
grouppattern.addOneOptionalOrMinus('o');
|
||||
else if (node->getType(node) == 108)
|
||||
grouppattern.addOneOptionalOrMinus('m');
|
||||
|
||||
parseGroupPattern(childNode, grouppattern.getLastOptionalOrMinus());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void QueryParser::parseUnion(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern)
|
||||
{
|
||||
printf("parseUnion\n");
|
||||
|
||||
for (unsigned int i = 0; i < node->getChildCount(node); i++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
|
||||
|
||||
//group graph pattern 77
|
||||
if (childNode->getType(childNode) == 77)
|
||||
{
|
||||
grouppattern.addOneUnion();
|
||||
parseGroupPattern(childNode, grouppattern.getLastUnion());
|
||||
}
|
||||
|
||||
//union 195
|
||||
if (childNode->getType(childNode) == 195)
|
||||
{
|
||||
parseUnion(childNode, grouppattern);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void QueryParser::parseFilter(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern)
|
||||
{
|
||||
printf("parseFilter\n");
|
||||
|
||||
for (unsigned int i = 0; i < node->getChildCount(node); i++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
|
||||
|
||||
//unary 190
|
||||
if (childNode->getType(childNode) == 190)
|
||||
childNode = (pANTLR3_BASE_TREE) childNode->getChild(childNode, 0);
|
||||
|
||||
grouppattern.addOneFilterTree();
|
||||
parseFilterTree(childNode, grouppattern, grouppattern.getLastFilterTree());
|
||||
}
|
||||
}
|
||||
|
||||
void QueryParser::parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern, QueryTree::GroupPattern::FilterTreeNode& filter)
|
||||
{
|
||||
printf("parseFilterTree\n");
|
||||
|
||||
switch (node->getType(node))
|
||||
{
|
||||
//! 192
|
||||
case 192: filter.type = QueryTree::GroupPattern::FilterTreeNode::Not_type; break;
|
||||
//not 115
|
||||
case 115: filter.type = QueryTree::GroupPattern::FilterTreeNode::Not_type; break;
|
||||
//or 125
|
||||
case 125: filter.type = QueryTree::GroupPattern::FilterTreeNode::Or_type; break;
|
||||
//and 8
|
||||
case 8: filter.type = QueryTree::GroupPattern::FilterTreeNode::And_type; break;
|
||||
//equal 62
|
||||
case 62: filter.type = QueryTree::GroupPattern::FilterTreeNode::Equal_type; break;
|
||||
//not equal 116
|
||||
case 116: filter.type = QueryTree::GroupPattern::FilterTreeNode::NotEqual_type; break;
|
||||
//less 100
|
||||
case 100: filter.type = QueryTree::GroupPattern::FilterTreeNode::Less_type; break;
|
||||
//less equal 101
|
||||
case 101: filter.type = QueryTree::GroupPattern::FilterTreeNode::LessOrEqual_type; break;
|
||||
//greater 72
|
||||
case 72: filter.type = QueryTree::GroupPattern::FilterTreeNode::Greater_type; break;
|
||||
//greater equal 73
|
||||
case 73: filter.type = QueryTree::GroupPattern::FilterTreeNode::GreaterOrEqual_type; break;
|
||||
|
||||
//regex 150
|
||||
case 150: filter.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type; break;
|
||||
//lang 96
|
||||
case 96: filter.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_lang_type; break;
|
||||
//langmatches 97
|
||||
case 97: filter.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_langmatches_type; break;
|
||||
//bound 23
|
||||
case 23: filter.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_bound_type; break;
|
||||
//in 81
|
||||
case 81: filter.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_in_type; break;
|
||||
//exists 63
|
||||
case 63: filter.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_exists_type; break;
|
||||
//not exists 117
|
||||
case 117: filter.type = QueryTree::GroupPattern::FilterTreeNode::Not_type; break;
|
||||
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
//in the "NOT IN" case, in, var and expression list is on the same layer.
|
||||
//not 115
|
||||
if (node->getType(node) == 115)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, 0);
|
||||
|
||||
//in 81
|
||||
if (childNode->getType(childNode) == 81)
|
||||
{
|
||||
filter.child.push_back(QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild());
|
||||
filter.child[0].type = 't';
|
||||
filter.child[0].node.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_in_type;
|
||||
parseVarInExpressionList(node, filter.child[0].node, 1);
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
//in 81
|
||||
if (node->getType(node) == 81)
|
||||
{
|
||||
parseVarInExpressionList(node, filter, 0);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
//not exists 117
|
||||
if (node->getType(node) == 117)
|
||||
{
|
||||
filter.child.push_back(QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild());
|
||||
filter.child[0].type = 't';
|
||||
filter.child[0].node.type = QueryTree::GroupPattern::FilterTreeNode::Builtin_exists_type;
|
||||
|
||||
parseExistsGroupPattern(node, grouppattern, filter.child[0].node);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
//exists 63
|
||||
if (node->getType(node) == 63)
|
||||
{
|
||||
parseExistsGroupPattern(node, grouppattern, filter);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < node->getChildCount(node); i++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
|
||||
|
||||
//unary 190
|
||||
if (childNode->getType(childNode) == 190)
|
||||
{
|
||||
pANTLR3_BASE_TREE gchildNode = (pANTLR3_BASE_TREE) childNode->getChild(childNode, 0);
|
||||
//unsigned int type = gchildNode->getType(gchildNode);
|
||||
//regex 150 lang 96 langmatches 97 bound 23 exists 63
|
||||
//if (type == 150 || type == 96 || type == 97 || type == 23 || type == 63)
|
||||
if (gchildNode->getChildCount(gchildNode) != 0)
|
||||
childNode = gchildNode;
|
||||
}
|
||||
|
||||
filter.child.push_back(QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild());
|
||||
|
||||
//unary 190
|
||||
if (childNode->getType(childNode) == 190)
|
||||
{
|
||||
filter.child[i].type = 's';
|
||||
parseString(childNode, filter.child[i].arg, 1);
|
||||
replacePrefix(filter.child[i].arg);
|
||||
}
|
||||
else if (childNode->getChildCount(childNode) == 0)
|
||||
{
|
||||
filter.child[i].type = 's';
|
||||
parseString(childNode, filter.child[i].arg, 0);
|
||||
replacePrefix(filter.child[i].arg);
|
||||
}
|
||||
else
|
||||
{
|
||||
filter.child[i].type = 't';
|
||||
parseFilterTree(childNode, grouppattern, filter.child[i].node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void QueryParser::parseVarInExpressionList(pANTLR3_BASE_TREE node, QueryTree::GroupPattern::FilterTreeNode& filter, unsigned int begin)
|
||||
{
|
||||
printf("parseVarInExpressionList\n");
|
||||
|
||||
for (unsigned int i = begin; i < node->getChildCount(node); i++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
|
||||
|
||||
//unary 190
|
||||
if (childNode->getType(childNode) == 190)
|
||||
{
|
||||
filter.child.push_back(QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild());
|
||||
|
||||
filter.child[i - begin].type = 's';
|
||||
parseString(childNode, filter.child[i - begin].arg, 1);
|
||||
replacePrefix(filter.child[i - begin].arg);
|
||||
}
|
||||
|
||||
//expression list 65
|
||||
if (childNode->getType(childNode) == 65)
|
||||
{
|
||||
for (unsigned int j = 0; j < childNode->getChildCount(childNode); j++)
|
||||
{
|
||||
pANTLR3_BASE_TREE gchildNode=(pANTLR3_BASE_TREE) childNode->getChild(childNode, j);
|
||||
|
||||
filter.child.push_back(QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild());
|
||||
|
||||
filter.child[i + j - begin].type = 's';
|
||||
parseString(gchildNode, filter.child[i + j - begin].arg, 1);
|
||||
replacePrefix(filter.child[i + j - begin].arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void QueryParser::parseExistsGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern, QueryTree::GroupPattern::FilterTreeNode& filter)
|
||||
{
|
||||
printf("parseExistsGroupPattern\n");
|
||||
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, 0);
|
||||
|
||||
//group graph pattern 77
|
||||
if (childNode->getType(childNode) == 77)
|
||||
{
|
||||
grouppattern.addOneExistsGroupPattern();
|
||||
filter.exists_grouppattern_id = (int)grouppattern.filter_exists_grouppatterns[(int)grouppattern.filter_exists_grouppatterns.size() - 1].size() - 1;
|
||||
parseGroupPattern(childNode, grouppattern.getLastExistsGroupPattern());
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void QueryParser::parseOrderBy(pANTLR3_BASE_TREE node, QueryTree& querytree)
|
||||
{
|
||||
printf("parseOrderBy\n");
|
||||
|
||||
for (unsigned int i = 0; i < node->getChildCount(node); i++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, i);
|
||||
|
||||
//order by condition
|
||||
if (childNode->getType(childNode) == 128)
|
||||
{
|
||||
string var;
|
||||
bool desending = false;
|
||||
for (unsigned int k = 0; k < childNode->getChildCount(childNode); k++)
|
||||
{
|
||||
pANTLR3_BASE_TREE gchildNode=(pANTLR3_BASE_TREE) childNode->getChild(childNode, k);
|
||||
|
||||
//var 200
|
||||
if (gchildNode->getType(gchildNode) == 200)
|
||||
parseString(gchildNode, var, 0);
|
||||
|
||||
//unary 190
|
||||
if (gchildNode->getType(gchildNode) == 190)
|
||||
parseString(gchildNode, var, 1);
|
||||
|
||||
//asend 12
|
||||
if (gchildNode->getType(gchildNode) == 12)
|
||||
desending = false;
|
||||
|
||||
//desend 49
|
||||
if (gchildNode->getType(gchildNode) == 49)
|
||||
desending = true;
|
||||
}
|
||||
|
||||
querytree.addOrder(var, desending);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void QueryParser::parseString(pANTLR3_BASE_TREE node, string& str, int dep)
|
||||
{
|
||||
if (dep == 0)
|
||||
{
|
||||
str = (const char*) node->getText(node)->chars;
|
||||
return;
|
||||
}
|
||||
|
||||
while (dep > 1 && node != NULL)
|
||||
{
|
||||
node = (pANTLR3_BASE_TREE) node->getChild(node, 0);
|
||||
dep--;
|
||||
}
|
||||
|
||||
if (node == NULL || node->getChildCount(node) == 0)
|
||||
throw "Some errors are found in the SPARQL query request.";
|
||||
else
|
||||
{
|
||||
for (unsigned int i = 0; i < node->getChildCount(node); i++)
|
||||
{
|
||||
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, i);
|
||||
|
||||
unsigned int type = childNode->getType(childNode);
|
||||
|
||||
//var 200 string literal 170(single quotation marks) 171(double quotation marks)
|
||||
//IRI 89 PNAME_LN 135
|
||||
//if (type == 200 || type == 170 || type == 171 || type == 89 || type == 135)
|
||||
if (type != 98)
|
||||
{
|
||||
str = (const char*) childNode->getText(childNode)->chars;
|
||||
if (type == 170)
|
||||
str = "\"" + str.substr(1, str.length() - 2) + "\"";
|
||||
}
|
||||
|
||||
//custom language 98
|
||||
if (type == 98)
|
||||
{
|
||||
string custom_lang;
|
||||
custom_lang = (const char*) childNode->getText(childNode)->chars;
|
||||
str += custom_lang;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,46 +0,0 @@
|
|||
/*
|
||||
* QueryParser.h
|
||||
*
|
||||
* Created on: 2015-4-11
|
||||
* Author: cjq
|
||||
*/
|
||||
|
||||
#ifndef QUERYPARSER_H_
|
||||
#define QUERYPARSER_H_
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Query/QueryTree.h"
|
||||
#include "SparqlParser.h"
|
||||
#include "SparqlLexer.h"
|
||||
|
||||
class QueryParser{
|
||||
private:
|
||||
std::map<std::string,std::string> _prefix_map;
|
||||
|
||||
int printNode(pANTLR3_BASE_TREE node,int dep = 0);
|
||||
|
||||
void parseTree(pANTLR3_BASE_TREE node, QueryTree& querytree);
|
||||
void parsePrologue(pANTLR3_BASE_TREE node);
|
||||
void parsePrefix(pANTLR3_BASE_TREE node);
|
||||
void replacePrefix(std::string& str);
|
||||
void parseSelectClause(pANTLR3_BASE_TREE node, QueryTree& querytree);
|
||||
void parseSelectVar(pANTLR3_BASE_TREE node, QueryTree& querytree);
|
||||
void parseGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern);
|
||||
void parsePattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern);
|
||||
void parseOptionalOrMinus(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern);
|
||||
void parseUnion(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern);
|
||||
void parseFilter(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern);
|
||||
void parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern, QueryTree::GroupPattern::FilterTreeNode& filter);
|
||||
void parseVarInExpressionList(pANTLR3_BASE_TREE node, QueryTree::GroupPattern::FilterTreeNode& filter, unsigned int begin);
|
||||
void parseExistsGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern& grouppattern, QueryTree::GroupPattern::FilterTreeNode& filter);
|
||||
void parseOrderBy(pANTLR3_BASE_TREE node, QueryTree& querytree);
|
||||
void parseString(pANTLR3_BASE_TREE node, std::string& str, int dep);
|
||||
|
||||
|
||||
public:
|
||||
QueryParser();
|
||||
|
||||
void sparqlParser(const std::string& query, QueryTree& querytree);
|
||||
};
|
||||
|
||||
#endif /* QUERYPARSER_H_ */
|
|
@ -1,28 +1,30 @@
|
|||
#ifndef gstore_parser_RDFParser
|
||||
#define gstore_parser_RDFParser
|
||||
#include "TurtleParser.h"
|
||||
#include "../Triple/Triple.h"
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <cstring>
|
||||
|
||||
using namespace std;
|
||||
|
||||
class RDFParser
|
||||
{
|
||||
private:
|
||||
stringstream _sin;
|
||||
TurtleParser _TurtleParser;
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Util/Triple.h"
|
||||
#include "TurtleParser.h"
|
||||
public:
|
||||
static const int TRIPLE_NUM_PER_GROUP = 10 * 1000 * 1000;
|
||||
|
||||
using namespace std;
|
||||
|
||||
class RDFParser
|
||||
{
|
||||
private:
|
||||
stringstream _sin;
|
||||
TurtleParser _TurtleParser;
|
||||
|
||||
public:
|
||||
static const int TRIPLE_NUM_PER_GROUP = 10 * 1000 * 1000;
|
||||
|
||||
//for parseString
|
||||
RDFParser():_TurtleParser(_sin) {}
|
||||
|
||||
//for parseFile
|
||||
RDFParser(ifstream& _fin):_TurtleParser(_fin) {}
|
||||
|
||||
string parseFile(TripleWithObjType* _triple_array, int& _triple_num);
|
||||
string parseString(string _str, TripleWithObjType* _triple_array, int& _triple_num);
|
||||
};
|
||||
//for parseString
|
||||
RDFParser():_TurtleParser(_sin){}
|
||||
|
||||
//for parseFile
|
||||
RDFParser(ifstream& _fin):_TurtleParser(_fin){}
|
||||
|
||||
string parseFile(TripleWithObjType* _triple_array, int& _triple_num);
|
||||
string parseString(string _str, TripleWithObjType* _triple_array, int& _triple_num);
|
||||
};
|
||||
#endif
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include "TurtleParser.h"
|
||||
#include <sstream>
|
||||
//---------------------------------------------------------------------------
|
||||
// RDF-3X
|
||||
// (c) 2008 Thomas Neumann. Web site: http://www.mpi-inf.mpg.de/~neumann/rdf3x
|
||||
|
|
|
@ -11,7 +11,10 @@
|
|||
// San Francisco, California, 94105, USA.
|
||||
//---------------------------------------------------------------------------
|
||||
#include "Type.h"
|
||||
#include "../Util/Util.h"
|
||||
#include <istream>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
//---------------------------------------------------------------------------
|
||||
/// Parse a turtle file
|
||||
class TurtleParser
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,154 +1,71 @@
|
|||
/*=============================================================================
|
||||
# Filename: BasicQuery.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-31 19:18
|
||||
# Description: originally written by liyouhuan, modified by zengli
|
||||
=============================================================================*/
|
||||
/*
|
||||
* basicQuery.h
|
||||
*
|
||||
* Created on: 2014-6-20
|
||||
* Author: liyouhuan
|
||||
*/
|
||||
|
||||
#ifndef _QUERY_BASICQUERY_H
|
||||
#define _QUERY_BASICQUERY_H
|
||||
#ifndef BASICQUERY_H_
|
||||
#define BASICQUERY_H_
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Util/Triple.h"
|
||||
#include<iostream>
|
||||
#include<string.h>
|
||||
#include<stdio.h>
|
||||
#include<stdlib.h>
|
||||
#include<map>
|
||||
#include<set>
|
||||
#include<vector>
|
||||
#include<algorithm>
|
||||
#include "../Triple/Triple.h"
|
||||
#include "../Signature/Signature.h"
|
||||
#include "../KVstore/KVstore.h"
|
||||
#include "IDList.h"
|
||||
#include<sstream>
|
||||
|
||||
//NOTICE: the query graph must be linked
|
||||
//var_id == -1: constant(string), entity or literal
|
||||
//All constants should be dealed before joining tables!
|
||||
//A var in query can be subject or object, and both. Once in subject,
|
||||
//it cannot contain literal, while in object it may contain entity,
|
||||
//literal, or both
|
||||
//The vars not in join process are also encoded, so not -1
|
||||
//
|
||||
//a subject cannot be literal, but an object can be entity or literal
|
||||
//not supported: ?v1 and this is a predicate
|
||||
//pre_id == -1: the query graph is not valid and the result should be empty
|
||||
//
|
||||
//DEFINE:
|
||||
//literal variable - no edge out(only occur in objects)
|
||||
//(after we retrive all candidates from vstree, only entities are considered, the vars
|
||||
//which only present in objects are possible to contain literals, so we must mark this!)
|
||||
//free literal variable - a literal variable and all its neighbor id != -1
|
||||
//(i.e. no constant neighbor which will restrict this variable, otherwise, we can acquire
|
||||
//this var's can_list by querying in kvstore according to the constant and pre)
|
||||
using namespace std;
|
||||
|
||||
//It is ok for var in select to be free var, but this can not be used as start point.
|
||||
//(we assume candidates of the start point is all ok and then search deeply)
|
||||
//However, we can always find a start point because not all vars are all in objects!
|
||||
//(otherwise, no edge in query graph)
|
||||
//
|
||||
//What is more, some graphs will be regarded as not-connected, such as:
|
||||
//A-c0-B, c0 is a constant, we should do a A x B here!
|
||||
//two-part-matching, ABC and c1c2, each node connects with this two constants.
|
||||
//(edge maybe different)
|
||||
|
||||
//+---------------------------------------------------------------------------+
|
||||
//|elements in BasicQuery(all are strings) |
|
||||
//|| |
|
||||
//|+---constants(all need to be dealed before join) |
|
||||
//|| | |
|
||||
//|| +---literals(quoted in "") |
|
||||
//|| | graph_var_num, the num of vars to join |
|
||||
//|| +---entities(included in <>, prefix is allowed) | |
|
||||
//|| | |
|
||||
//|+---variables(all begin with '?') | |
|
||||
//| | | |
|
||||
//| +---selected vars(the former select_var_num ones) <<<<<<<<<<<<<<<<<<<+ |
|
||||
//| | | |
|
||||
//| +---not selected vars | |
|
||||
//| | | |
|
||||
//| +---degree > 1 <<<<<<<<<<<as bridge<<<<<<<<<<<<<<<<<<<<<<<<<<+ |
|
||||
//| | | |
|
||||
//| | +---exist in subjects(cannot be literal) |
|
||||
//| | | |
|
||||
//| | +---just in objects(all edges in, may include literals) |
|
||||
//| | | |
|
||||
//| | +---free(all neighbors are vars) |
|
||||
//| | | |
|
||||
//| | +---not-free(exist constant neighbors) |
|
||||
//| | |
|
||||
//| +---degree == 1(dealed after join) |
|
||||
//| | |
|
||||
//| +---subject |
|
||||
//| | |
|
||||
//| +---object |
|
||||
//+---------------------------------------------------------------------------+
|
||||
|
||||
class PreVar
|
||||
{
|
||||
public:
|
||||
string name;
|
||||
vector<int> triples;
|
||||
bool selected;
|
||||
PreVar()
|
||||
{
|
||||
this->selected = false;
|
||||
}
|
||||
PreVar(string _name)
|
||||
{
|
||||
this->name = _name;
|
||||
this->selected = false;
|
||||
}
|
||||
PreVar(string _name, bool _flag)
|
||||
{
|
||||
this->name = _name;
|
||||
this->selected = _flag;
|
||||
}
|
||||
};
|
||||
|
||||
class BasicQuery
|
||||
{
|
||||
class BasicQuery{
|
||||
private:
|
||||
vector<string> option_vs;
|
||||
vector<Triple> triple_vt;
|
||||
// mapping from variables' name to their assigned id
|
||||
/* mapping from variables' name to their assigned id. */
|
||||
map<std::string, int> var_str2id;
|
||||
// record each tuple's(subject, predicate, object) number of occurrences in this BasicQuery
|
||||
/* record each tuple's(subject, predicate, object) number of occurrences in this BasicQuery. */
|
||||
map<std::string, int> tuple2freq;
|
||||
map<std::string, int> var_not_in_select;
|
||||
|
||||
// id < select_var_num means in select
|
||||
int select_var_num;
|
||||
|
||||
// var_num is different from that in SPARQLquery
|
||||
// because there are some variable not in select
|
||||
int graph_var_num;
|
||||
int total_var_num;
|
||||
int retrieve_var_num;
|
||||
/* var_num is different from that in SPARQLquery
|
||||
* because there are some variable not in select */
|
||||
int graph_var_num;
|
||||
string* var_name;
|
||||
IDList* candidate_list;
|
||||
vector<int*> result_list;
|
||||
int* var_degree;
|
||||
int* var_degree;
|
||||
|
||||
//whether has added the variable's literal candidate
|
||||
//bool* is_literal_candidate_added;
|
||||
bool* ready;
|
||||
//if need to be retrieved by vstree or generate when join(first is graph var)
|
||||
bool* need_retrieve;
|
||||
/* whether has added the variable's literal candidate */
|
||||
bool* is_literal_candidate_added;
|
||||
|
||||
char encode_method;
|
||||
|
||||
// edge_id[var_id][i] : the line id of the i-th edge of the var
|
||||
/* edge_id[var_id][i] : the line id of the i-th edge of the var */
|
||||
int** edge_id;
|
||||
|
||||
// edge_id[var_id][i] : the neighbor id of the i-th edge of the var
|
||||
/* edge_id[var_id][i] : the neighbor id of the i-th edge of the var */
|
||||
int** edge_nei_id;
|
||||
|
||||
// edge_pre_id[var_id][i] : the preID of the i-th edge of the var
|
||||
/* edge_pre_id[var_id][i] : the preID of the i-th edge of the var */
|
||||
int** edge_pre_id;
|
||||
|
||||
// denote the type of edge, assigned with
|
||||
// BasicQuery::IN or BasicQuery::OUT
|
||||
// edge_type[var_id][i]
|
||||
/* denote the type of edge, assigned with
|
||||
* BasicQuery::IN or BasicQuery::OUT
|
||||
* edge_type[var_id][i] */
|
||||
char** edge_type;
|
||||
|
||||
EntityBitSet* var_sig;
|
||||
|
||||
// BETTER:edge sig is of little importance
|
||||
// edge_sig[sub_id][obj_id]
|
||||
/* edge_sig[sub_id][obj_id] */
|
||||
EdgeBitSet** edge_sig;
|
||||
|
||||
void addInVarNotInSelect();
|
||||
|
@ -157,127 +74,112 @@ private:
|
|||
void initial();
|
||||
void null_initial();
|
||||
|
||||
void updateSubSig(int _sub_id, int _pre_id, int _obj_id, std::string _obj, int _line_id);
|
||||
void updateObjSig(int _obj_id, int _pre_id, int _sub_id, std::string _sub, int _line_id);
|
||||
|
||||
//infos for predicate variables
|
||||
vector<PreVar> pre_var;
|
||||
|
||||
public:
|
||||
static const char EDGE_IN = 'i';
|
||||
static const char EDGE_OUT= 'o';
|
||||
static const int MAX_VAR_NUM = 10;
|
||||
static const int MAX_PRE_VAR_NUM = 10;
|
||||
static const char NOT_JUST_SELECT = 'a';
|
||||
static const char SELECT_VAR = 's';
|
||||
|
||||
// _query is a SPARQL query string
|
||||
/* _query is a SPARQL query string */
|
||||
BasicQuery(const string _query="");
|
||||
~BasicQuery();
|
||||
void clear();
|
||||
|
||||
//get the number of variables which are in join
|
||||
/* get the number of variables */
|
||||
int getVarNum();
|
||||
|
||||
//get selected number of variadbles
|
||||
int getSelectVarNum();
|
||||
|
||||
//get the total number of variables
|
||||
int getTotalVarNum();
|
||||
|
||||
//get the retrieved number of variables
|
||||
int getRetrievedVarNum();
|
||||
|
||||
// get the name of _var in the query graph
|
||||
/* get the name of _var in the query graph */
|
||||
std::string getVarName(int _var);
|
||||
|
||||
// get triples number, also sentences number
|
||||
/* get triples number, also sentences number */
|
||||
int getTripleNum();
|
||||
|
||||
int getIDByVarName(const string& _name);
|
||||
|
||||
std::string to_str();
|
||||
|
||||
|
||||
// get the ID of the i-th triple
|
||||
/* get the ID of the i-th triple */
|
||||
const Triple& getTriple(int _i_th_triple);
|
||||
|
||||
// get the ID of the i-th edge of _var
|
||||
/* get the ID of the i-th edge of _var */
|
||||
int getEdgeID(int _var, int _i_th_edge);
|
||||
|
||||
// get the ID of var, where the i-th edge of _var points to
|
||||
/* get the ID of the i-th edge of _var */
|
||||
int getEdgeNeighborID(int _var, int _i_th_edge);
|
||||
|
||||
// get the preID of the i-th edge of _var
|
||||
/* get the preID of the i-th edge of _var */
|
||||
int getEdgePreID(int _var, int _i_th_edge);
|
||||
|
||||
// get the type of the i-th edge of _var
|
||||
/* get the type of the i-th edge of _var */
|
||||
char getEdgeType(int _var, int _i_th_edge);
|
||||
|
||||
//get the degree of _var in the query graph
|
||||
/* get the degree of _var in the query graph */
|
||||
int getVarDegree(int _var);
|
||||
|
||||
//get the index of edge between two var ids
|
||||
int getEdgeIndex(int _id0, int _id);
|
||||
|
||||
/* */
|
||||
const EntityBitSet& getVarBitSet(int _i)const;
|
||||
|
||||
// get the candidate list of _var in the query graph
|
||||
/* get the candidate list of _var in the query graph */
|
||||
IDList& getCandidateList(int _var);
|
||||
|
||||
int getCandidateSize(int _var);
|
||||
|
||||
// get the result list of _var in the query graph
|
||||
/* get the result list of _var in the query graph */
|
||||
vector<int*>& getResultList();
|
||||
vector<int*>* getResultListPointer();
|
||||
|
||||
// get the entity signature of _var in the query graph
|
||||
/* get the entity signature of _var in the query graph */
|
||||
const EntityBitSet& getEntitySignature(int _var);
|
||||
|
||||
// check whether the i-th edge of _var is IN edge
|
||||
/* check whether the i-th edge of _var is IN edge */
|
||||
bool isInEdge(int _var, int _i_th_edge)const;
|
||||
|
||||
// check whether the i-th edge of _var is OUT edge
|
||||
/* check whether the i-th edge of _var is OUT edge */
|
||||
bool isOutEdge(int _var, int _i_th_edge)const;
|
||||
|
||||
bool isOneDegreeNotJoinVar(std::string& _not_select_var);
|
||||
bool isOneDegreeNotSelectVar(std::string& _not_select_var);
|
||||
|
||||
// check whether _var may include some literal results
|
||||
/* check whether _var may include some literal results */
|
||||
bool isLiteralVariable(int _var);
|
||||
// check whether _var is literal variable and do not have any entity neighbors
|
||||
/* check whether _var is literal variable and do not have any entity neighbors */
|
||||
bool isFreeLiteralVariable(int _var);
|
||||
|
||||
// check whether has added _var's literal candidates
|
||||
//bool isAddedLiteralCandidate(int _var);
|
||||
/* check whether has added _var's literal candidates */
|
||||
bool isAddedLiteralCandidate(int _var);
|
||||
|
||||
// set _var's literal candidates has been added
|
||||
//void setAddedLiteralCandidate(int _var);
|
||||
/* set _var's literal candidates has been added */
|
||||
void setAddedLiteralCandidate(int _var);
|
||||
|
||||
//check if need to be retrieved
|
||||
bool if_need_retrieve(int _var);
|
||||
bool isSatelliteInJoin(int _var);
|
||||
|
||||
//if the candidates of var(in join) is all ready
|
||||
bool isReady(int _var) const;
|
||||
void setReady(int _var);
|
||||
private:
|
||||
void updateSubSig(int _sub_id, int _pre_id, int _obj_id, std::string _obj, int _line_id);
|
||||
void updateObjSig(int _obj_id, int _pre_id, int _sub_id, std::string _sub, int _line_id);
|
||||
|
||||
// encode relative signature data of the query graph
|
||||
bool encodeBasicQuery(KVstore* _p_kvstore, const std::vector<std::string>& _query_var);
|
||||
struct ResultCmp;
|
||||
struct ResultEqual;
|
||||
|
||||
unsigned getPreVarNum() const;
|
||||
const PreVar& getPreVarByID(unsigned) const;
|
||||
//int getIDByPreVarName(const std::string& _name) const;
|
||||
int getPreVarID(const string& _name) const;
|
||||
public:
|
||||
|
||||
/* encode relative signature data of the query graph */
|
||||
void encodeBasicQuery(KVstore* _p_kvstore, const std::vector<std::string>& _query_var);
|
||||
|
||||
/* add triple */
|
||||
void addTriple(const Triple& _triple);
|
||||
|
||||
/* print whole Basic query */
|
||||
void print(ostream& _out_stream);
|
||||
|
||||
int getVarID_MinCandidateList();
|
||||
int getVarID_MaxCandidateList();
|
||||
int getVarID_FirstProcessWhenJoin();
|
||||
|
||||
// deprecated.
|
||||
// static int cmp_result(const void* _a, const void* _b);
|
||||
bool dupRemoval_invalidRemoval();
|
||||
|
||||
std::string candidate_str();
|
||||
std::string result_str();
|
||||
std::string triple_str();
|
||||
};
|
||||
|
||||
#endif //_QUERY_BASICQUERY_H
|
||||
|
||||
#endif /* BASICQUERY_H_ */
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,146 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: GeneralEvaluation.h
|
||||
# Author: Jiaqi, Chen
|
||||
# Mail: chenjiaqi93@163.com
|
||||
# Last Modified: 2016-03-02 20:33
|
||||
# Description:
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _QUERY_GENERALEVALUATION_H
|
||||
#define _QUERY_GENERALEVALUATION_H
|
||||
|
||||
//#include "../Database/Database.h"
|
||||
#include "SPARQLquery.h"
|
||||
#include "../VSTree/VSTree.h"
|
||||
#include "../Database/Join.h"
|
||||
#include "../Database/Strategy.h"
|
||||
#include "../KVstore/KVstore.h"
|
||||
#include "../Query/ResultSet.h"
|
||||
#include "../Util/Util.h"
|
||||
#include "../Parser/QueryParser.h"
|
||||
#include "QueryTree.h"
|
||||
#include "Varset.h"
|
||||
#include "RegexExpression.h"
|
||||
#include "ResultFilter.h"
|
||||
|
||||
class GeneralEvaluation
|
||||
{
|
||||
private:
|
||||
QueryParser query_parser;
|
||||
QueryTree query_tree;
|
||||
SPARQLquery sparql_query;
|
||||
std::vector <Varset> sparql_query_varset;
|
||||
VSTree *vstree;
|
||||
KVstore *kvstore;
|
||||
ResultSet &result_set;
|
||||
ResultFilter result_filter;
|
||||
bool handle(SPARQLquery&);
|
||||
|
||||
public:
|
||||
explicit GeneralEvaluation(VSTree *_vstree, KVstore *_kvstore, ResultSet &_result_set):
|
||||
vstree(_vstree), kvstore(_kvstore), result_set(_result_set){}
|
||||
|
||||
std::vector<std::vector<std::string> > getSPARQLQueryVarset();
|
||||
|
||||
void doQuery(const std::string &_query);
|
||||
bool parseQuery(const std::string &_query);
|
||||
|
||||
void getBasicQuery(QueryTree::GroupPattern &grouppattern);
|
||||
|
||||
class FilterExistsGroupPatternResultSetRecord;
|
||||
|
||||
class TempResult
|
||||
{
|
||||
public:
|
||||
Varset var;
|
||||
std::vector<int*> res;
|
||||
|
||||
void release();
|
||||
|
||||
static int compareFunc(int *a, std::vector<int> &p, int *b, std::vector<int> &q);
|
||||
void sort(int l, int r, std::vector<int> &p);
|
||||
int findLeftBounder(std::vector<int> &p, int *b, std::vector<int> &q);
|
||||
int findRightBounder(std::vector<int> &p, int *b, std::vector<int> &q);
|
||||
|
||||
void doJoin(TempResult &x, TempResult &r);
|
||||
void doOptional(std::vector<bool> &binding, TempResult &x, TempResult &rn, TempResult &ra, bool add_no_binding);
|
||||
void doUnion(TempResult &x, TempResult &rt, TempResult &rx);
|
||||
void doMinus(TempResult &x, TempResult &r);
|
||||
void doDistinct(TempResult &r);
|
||||
|
||||
void mapFilterTree2Varset(QueryTree::GroupPattern::FilterTreeNode& filter, Varset &v);
|
||||
void doFilter(QueryTree::GroupPattern::FilterTreeNode &filter, FilterExistsGroupPatternResultSetRecord &filter_exists_grouppattern_resultset_record, TempResult &r, KVstore *kvstore);
|
||||
void getFilterString(int* x, QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild &child, string &str, KVstore *kvstore);
|
||||
bool matchFilterTree(int* x, QueryTree::GroupPattern::FilterTreeNode& filter, FilterExistsGroupPatternResultSetRecord &filter_exists_grouppattern_resultset_record, KVstore *kvstore);
|
||||
|
||||
void print();
|
||||
};
|
||||
|
||||
class TempResultSet
|
||||
{
|
||||
public:
|
||||
std::vector<TempResult> results;
|
||||
|
||||
void release();
|
||||
|
||||
int findCompatibleResult(Varset &_varset);
|
||||
|
||||
void doJoin(TempResultSet &x, TempResultSet &r);
|
||||
void doOptional(TempResultSet &x, TempResultSet &r);
|
||||
void doUnion(TempResultSet &x, TempResultSet &r);
|
||||
void doMinus(TempResultSet &x, TempResultSet &r);
|
||||
void doDistinct(Varset &projection, TempResultSet &r);
|
||||
|
||||
void doFilter(QueryTree::GroupPattern::FilterTreeNode& filter, FilterExistsGroupPatternResultSetRecord &filter_exists_grouppattern_resultset_record, TempResultSet &r, KVstore *kvstore);
|
||||
|
||||
void print();
|
||||
};
|
||||
|
||||
class EvaluationUnit
|
||||
{
|
||||
private:
|
||||
char type;
|
||||
void * p;
|
||||
public:
|
||||
EvaluationUnit(char _type, void *_p = NULL):type(_type), p(_p){}
|
||||
char getType()
|
||||
{ return type; }
|
||||
void * getPointer()
|
||||
{ return p; }
|
||||
};
|
||||
|
||||
std::vector<EvaluationUnit> semantic_evaluation_plan;
|
||||
|
||||
void generateEvaluationPlan(QueryTree::GroupPattern &grouppattern);
|
||||
void dfsJoinableResultGraph(int x, vector < pair<char, int> > &node_info, vector < vector<int> > &edge, QueryTree::GroupPattern &grouppattern);
|
||||
|
||||
std::stack<TempResultSet*> semantic_evaluation_result_stack;
|
||||
|
||||
class FilterExistsGroupPatternResultSetRecord
|
||||
{
|
||||
public:
|
||||
std::vector<TempResultSet*> resultset;
|
||||
std::vector< std::vector<Varset> > common;
|
||||
std::vector< std::vector< std::pair< std::vector<int>, std::vector<int> > > > common2resultset;
|
||||
} filter_exists_grouppattern_resultset_record;
|
||||
|
||||
int countFilterExistsGroupPattern(QueryTree::GroupPattern::FilterTreeNode& filter);
|
||||
void doEvaluationPlan();
|
||||
|
||||
class ExpansionEvaluationStackUnit
|
||||
{
|
||||
public:
|
||||
QueryTree::GroupPattern grouppattern;
|
||||
SPARQLquery sparql_query;
|
||||
TempResultSet* result;
|
||||
};
|
||||
std::vector <ExpansionEvaluationStackUnit> expansion_evaluation_stack;
|
||||
|
||||
bool expanseFirstOuterUnionGroupPattern(QueryTree::GroupPattern &grouppattern, std::deque<QueryTree::GroupPattern> &queue);
|
||||
void queryRewriteEncodeRetrieveJoin(int dep, ResultFilter &result_filter);
|
||||
|
||||
void getFinalResult(ResultSet& result_str);
|
||||
};
|
||||
|
||||
#endif // _QUERY_GENERALEVALUATION_H
|
||||
|
338
Query/IDList.cpp
338
Query/IDList.cpp
|
@ -1,13 +1,14 @@
|
|||
/*=============================================================================
|
||||
# Filename: IDList.cpp
|
||||
# Author: Bookug Lobert
|
||||
# Mail: zengli-bookug@pku.edu.cn
|
||||
# Last Modified: 2016-05-08 12:44
|
||||
# Description: originally written by liyouhuan, modified by zengli
|
||||
=============================================================================*/
|
||||
/*
|
||||
* IDList.cpp
|
||||
*
|
||||
* Created on: 2014-7-2
|
||||
* Author: liyouhuan
|
||||
*/
|
||||
|
||||
#include "IDList.h"
|
||||
|
||||
#include<sstream>
|
||||
#include "../util/util.h"
|
||||
#include <algorithm>
|
||||
using namespace std;
|
||||
|
||||
IDList::IDList()
|
||||
|
@ -15,9 +16,10 @@ IDList::IDList()
|
|||
this->id_list.clear();
|
||||
}
|
||||
|
||||
//return the _i-th id of the list if _i exceeds, return -1
|
||||
int
|
||||
IDList::getID(int _i)const
|
||||
/* return the _i-th id of the list
|
||||
* if _i exceeds, return -1;
|
||||
* */
|
||||
int IDList::getID(int _i)const
|
||||
{
|
||||
if(this->size() > _i)
|
||||
{
|
||||
|
@ -26,32 +28,23 @@ IDList::getID(int _i)const
|
|||
return -1;
|
||||
}
|
||||
|
||||
bool
|
||||
IDList::addID(int _id)
|
||||
bool IDList::addID(int _id)
|
||||
{
|
||||
//a check for duplicate case will be more reliable
|
||||
/* a check for duplicate case will be more reliable */
|
||||
this->id_list.push_back(_id);
|
||||
return true;
|
||||
}
|
||||
|
||||
int
|
||||
IDList::size()const
|
||||
int IDList::size()const
|
||||
{
|
||||
return this->id_list.size();
|
||||
}
|
||||
|
||||
bool
|
||||
IDList::empty()const
|
||||
{
|
||||
return this->id_list.size() == 0;
|
||||
}
|
||||
|
||||
bool
|
||||
IDList::isExistID(int _id)const
|
||||
bool IDList::isExistID(int _id)const
|
||||
{
|
||||
// naive implementation of searching(linear search).
|
||||
// you can use binary search when the id list is sorted, if necessary.
|
||||
for(unsigned i = 0; i < this->id_list.size(); i++)
|
||||
for (int i=0;i<this->id_list.size();i++)
|
||||
{
|
||||
if (this->id_list[i] == _id)
|
||||
{
|
||||
|
@ -62,15 +55,13 @@ IDList::isExistID(int _id)const
|
|||
return false;
|
||||
}
|
||||
|
||||
const vector<int>*
|
||||
IDList::getList()const
|
||||
const std::vector<int>* IDList::getList()const
|
||||
{
|
||||
return &(this->id_list);
|
||||
}
|
||||
|
||||
|
||||
int&
|
||||
IDList::operator[](const int& _i)
|
||||
int& IDList::operator[](const int& _i)
|
||||
{
|
||||
if(this->size() > _i)
|
||||
{
|
||||
|
@ -79,157 +70,69 @@ IDList::operator[](const int& _i)
|
|||
return id_list[0];
|
||||
}
|
||||
|
||||
string
|
||||
IDList::to_str()
|
||||
std::string IDList::to_str()
|
||||
{
|
||||
std::stringstream _ss;
|
||||
_ss << "size=" << this->id_list.size() << "";
|
||||
for(unsigned i = 0; i < this->id_list.size(); i ++)
|
||||
for(int i = 0; i < this->id_list.size(); i ++)
|
||||
{
|
||||
_ss << "\t[" << this->id_list[i] << "]";
|
||||
}
|
||||
return _ss.str();
|
||||
}
|
||||
|
||||
int
|
||||
IDList::sort()
|
||||
int IDList::sort()
|
||||
{
|
||||
std::sort(id_list.begin(),id_list.end());
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
IDList::clear()
|
||||
void IDList::clear()
|
||||
{
|
||||
this->id_list.clear();
|
||||
}
|
||||
|
||||
void
|
||||
IDList::copy(const vector<int>& _new_idlist)
|
||||
int IDList::intersectList(const int* _id_list, int _list_len)
|
||||
{
|
||||
this->id_list = _new_idlist;
|
||||
}
|
||||
|
||||
void
|
||||
IDList::copy(const IDList* _new_idlist)
|
||||
{
|
||||
this->id_list = *(_new_idlist->getList());
|
||||
}
|
||||
|
||||
int
|
||||
IDList::intersectList(const int* _id_list, int _list_len)
|
||||
{
|
||||
if(_id_list == NULL || _list_len == 0)
|
||||
int id_i = 0;
|
||||
int index_move_forward = 0;
|
||||
std::vector<int>::iterator it = this->id_list.begin();
|
||||
while(it != (this->id_list).end())
|
||||
{
|
||||
int remove_number = this->id_list.size();
|
||||
this->id_list.clear();
|
||||
return remove_number;
|
||||
}
|
||||
|
||||
//when size is almost the same, intersect O(n)
|
||||
//when one size is small ratio, search in the larger one O(mlogn)
|
||||
//
|
||||
//n>0 m=nk(0<k<1)
|
||||
//compare n(k+1) and nklogn: k0 = log(n/2)2 requiring that n>2
|
||||
//k<=k0 binary search; k>k0 intersect
|
||||
int method = -1; //0: intersect 1: search in vector 2: search in int*
|
||||
int n = this->id_list.size();
|
||||
double k = 0;
|
||||
if(n < _list_len)
|
||||
{
|
||||
k = (double)n / (double)_list_len;
|
||||
n = _list_len;
|
||||
method = 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
k = (double)_list_len / (double)n;
|
||||
method = 1;
|
||||
}
|
||||
if(n <= 2)
|
||||
method = 0;
|
||||
else
|
||||
{
|
||||
double limit = Util::logarithm(n/2, 2);
|
||||
if(k > limit)
|
||||
method = 0;
|
||||
}
|
||||
|
||||
int remove_number = 0;
|
||||
switch(method)
|
||||
{
|
||||
case 0:
|
||||
{ //this bracket is needed if vars are defined in case
|
||||
int id_i = 0;
|
||||
int index_move_forward = 0;
|
||||
vector<int>::iterator it = this->id_list.begin();
|
||||
while(it != (this->id_list).end())
|
||||
int can_id = *it;
|
||||
while( (id_i < _list_len) && (_id_list[id_i] < can_id) )
|
||||
{
|
||||
int can_id = *it;
|
||||
while((id_i < _list_len) && (_id_list[id_i] < can_id))
|
||||
{
|
||||
id_i ++;
|
||||
}
|
||||
|
||||
if(id_i == _list_len)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if(can_id == _id_list[id_i])
|
||||
{
|
||||
(this->id_list)[index_move_forward] = can_id;
|
||||
index_move_forward ++;
|
||||
id_i ++;
|
||||
}
|
||||
|
||||
it ++;
|
||||
id_i ++;
|
||||
}
|
||||
remove_number = this->id_list.size() - index_move_forward;
|
||||
vector<int>::iterator new_end = this->id_list.begin() + index_move_forward;
|
||||
(this->id_list).erase(new_end, this->id_list.end());
|
||||
break;
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
vector<int> new_id_list;
|
||||
for(int i = 0; i < _list_len; ++i)
|
||||
|
||||
if(id_i == _list_len){
|
||||
break;
|
||||
}
|
||||
|
||||
if(can_id == _id_list[id_i])
|
||||
{
|
||||
if(Util::bsearch_vec_uporder(_id_list[i], this->getList()) != -1)
|
||||
new_id_list.push_back(_id_list[i]);
|
||||
(this->id_list)[index_move_forward] = can_id;
|
||||
index_move_forward ++;
|
||||
id_i ++;
|
||||
}
|
||||
this->id_list = new_id_list;
|
||||
remove_number = n - this->id_list.size();
|
||||
break;
|
||||
}
|
||||
case 2:
|
||||
{
|
||||
vector<int> new_id_list;
|
||||
int m = this->id_list.size(), i;
|
||||
for(i = 0; i < m; ++i)
|
||||
{
|
||||
if(Util::bsearch_int_uporder(this->id_list[i], _id_list, _list_len) != -1)
|
||||
new_id_list.push_back(this->id_list[i]);
|
||||
}
|
||||
this->id_list = new_id_list;
|
||||
remove_number = m - this->id_list.size();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
cerr << "no such method in IDList::intersectList()" << endl;
|
||||
break;
|
||||
|
||||
it ++;
|
||||
}
|
||||
|
||||
int remove_number = this->id_list.size() - index_move_forward;
|
||||
std::vector<int>::iterator new_end =
|
||||
this->id_list.begin() + index_move_forward;
|
||||
|
||||
(this->id_list).erase(new_end, this->id_list.end());
|
||||
|
||||
return remove_number;
|
||||
}
|
||||
|
||||
int
|
||||
IDList::intersectList(const IDList& _id_list)
|
||||
int IDList::intersectList(const IDList& _id_list)
|
||||
{
|
||||
// copy _id_list to the temp array first.
|
||||
int temp_list_len = _id_list.size();
|
||||
int* temp_list = new int[temp_list_len];
|
||||
//BETTER:not to copy, just achieve here
|
||||
for (int i = 0; i < temp_list_len; i ++)
|
||||
{
|
||||
temp_list[i] = _id_list.getID(i);
|
||||
|
@ -237,27 +140,12 @@ IDList::intersectList(const IDList& _id_list)
|
|||
|
||||
int remove_number = this->intersectList(temp_list, temp_list_len);
|
||||
delete []temp_list;
|
||||
|
||||
return remove_number;
|
||||
}
|
||||
|
||||
int
|
||||
IDList::unionList(const int* _id_list, int _list_len, bool only_literal)
|
||||
int IDList::unionList(const int* _id_list, int _list_len)
|
||||
{
|
||||
if(_id_list == NULL || _list_len == 0)
|
||||
return 0;
|
||||
|
||||
if(only_literal)
|
||||
{
|
||||
//NOTICE:this means that the original is no literals and we need to add from a list(containing entities/literals)
|
||||
int k = 0;
|
||||
//NOTICE:literal id > entity id; the list is ordered
|
||||
for(; k < _list_len; ++k)
|
||||
if(Util::is_literal_ele(_id_list[k]))
|
||||
break;
|
||||
for(; k < _list_len; ++k)
|
||||
this->addID(_id_list[k]);
|
||||
return _list_len - k;
|
||||
}
|
||||
// O(n)
|
||||
int origin_size = (this->id_list).size();
|
||||
int* temp_list = new int[origin_size + _list_len];
|
||||
|
@ -344,129 +232,27 @@ IDList::unionList(const int* _id_list, int _list_len, bool only_literal)
|
|||
*/
|
||||
}
|
||||
|
||||
int
|
||||
IDList::unionList(const IDList& _id_list, bool only_literal)
|
||||
int IDList::unionList(const IDList& _id_list)
|
||||
{
|
||||
// copy _id_list to the temp array first.
|
||||
int temp_list_len = _id_list.size();
|
||||
int* temp_list = new int[temp_list_len];
|
||||
//BETTER:not to copy, just achieve here
|
||||
for(int i = 0; i < temp_list_len; i ++)
|
||||
for (int i = 0; i < temp_list_len; i ++)
|
||||
{
|
||||
temp_list[i] = _id_list.getID(i);
|
||||
}
|
||||
int ret = this->unionList(temp_list, temp_list_len, only_literal);
|
||||
delete[] temp_list;
|
||||
return ret;
|
||||
|
||||
return this->unionList(temp_list, temp_list_len);
|
||||
}
|
||||
|
||||
IDList*
|
||||
IDList::intersect(const IDList& _id_list, const int* _list, int _len)
|
||||
int IDList::erase(int i)
|
||||
{
|
||||
IDList* p = new IDList;
|
||||
if(_list == NULL || _len == 0) //just copy _id_list
|
||||
{
|
||||
int size = _id_list.size();
|
||||
for(int i = 0; i < size; ++i)
|
||||
p->addID(_id_list.getID(i));
|
||||
return p;
|
||||
}
|
||||
|
||||
//when size is almost the same, intersect O(n)
|
||||
//when one size is small ratio, search in the larger one O(mlogn)
|
||||
//
|
||||
//n>0 m=nk(0<k<1)
|
||||
//compare n(k+1) and nklogn: k0 = log(n/2)2 requiring that n>2
|
||||
//k<=k0 binary search; k>k0 intersect
|
||||
int method = -1; //0: intersect 1: search in vector 2: search in int*
|
||||
int n = _id_list.size();
|
||||
double k = 0;
|
||||
if(n < _len)
|
||||
{
|
||||
k = (double)n / (double)_len;
|
||||
n = _len;
|
||||
method = 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
k = (double)_len / (double)n;
|
||||
method = 1;
|
||||
}
|
||||
if(n <= 2)
|
||||
method = 0;
|
||||
else
|
||||
{
|
||||
double limit = Util::logarithm(n/2, 2);
|
||||
if(k > limit)
|
||||
method = 0;
|
||||
}
|
||||
|
||||
int remove_number = 0;
|
||||
switch(method)
|
||||
{
|
||||
case 0:
|
||||
{ //this bracket is needed if vars are defined in case
|
||||
int id_i = 0;
|
||||
int num = _id_list.size();
|
||||
for(int i = 0; i < num; ++i)
|
||||
{
|
||||
int can_id = _id_list.getID(i);
|
||||
while((id_i < _len) && (_list[id_i] < can_id))
|
||||
{
|
||||
id_i ++;
|
||||
}
|
||||
|
||||
if(id_i == _len)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if(can_id == _list[id_i])
|
||||
{
|
||||
p->addID(can_id);
|
||||
id_i ++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
for(int i = 0; i < _len; ++i)
|
||||
{
|
||||
if(Util::bsearch_vec_uporder(_list[i], _id_list.getList()) != -1)
|
||||
p->addID(_list[i]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 2:
|
||||
{
|
||||
int m = _id_list.size(), i;
|
||||
for(i = 0; i < m; ++i)
|
||||
{
|
||||
int t = _id_list.getID(i);
|
||||
if(Util::bsearch_int_uporder(t, _list, _len) != -1)
|
||||
p->addID(t);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
cerr << "no such method in IDList::intersectList()" << endl;
|
||||
break;
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
int
|
||||
IDList::erase(int i)
|
||||
{
|
||||
id_list.erase(id_list.begin()+i, id_list.end());
|
||||
id_list.erase(id_list.begin()+i,id_list.end());
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
IDList::bsearch_uporder(int _key)
|
||||
{
|
||||
return Util::bsearch_vec_uporder(_key, this->getList());
|
||||
}
|
||||
|
||||
int IDList::bsearch_uporder(int _key)
|
||||
{
|
||||
return util::bsearch_vec_uporder(_key, this->id_list);
|
||||
}
|
||||
|
|
|
@ -1,46 +1,40 @@
|
|||
/*=============================================================================
|
||||
# Filename: IDList.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-23 15:03
|
||||
# Description: originally written by liyouhuan, modified by zengli
|
||||
=============================================================================*/
|
||||
/*
|
||||
* CandidateList.h
|
||||
*
|
||||
* Created on: 2014-7-2
|
||||
* Author: liyouhuan
|
||||
*/
|
||||
#include<iostream>
|
||||
#include<vector>
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#ifndef IDLIST_H_
|
||||
#define IDLIST_H_
|
||||
|
||||
#ifndef _QUERY_IDLIST_H
|
||||
#define _QUERY_IDLIST_H
|
||||
|
||||
class IDList
|
||||
{
|
||||
class IDList{
|
||||
public:
|
||||
IDList();
|
||||
int getID(int _i)const;
|
||||
bool addID(int _id);
|
||||
|
||||
//check whether _id exists in this IDList.
|
||||
bool isExistID(int _id) const;
|
||||
int size() const;
|
||||
bool empty() const;
|
||||
/* check whether _id exists in this IDList. */
|
||||
bool isExistID(int _id)const;
|
||||
int size()const;
|
||||
const std::vector<int>* getList()const;
|
||||
int& operator[] (const int & _i);
|
||||
std::string to_str();
|
||||
int sort();
|
||||
void clear();
|
||||
void copy(const std::vector<int>& _new_idlist);
|
||||
void copy(const IDList* _new_idlist);
|
||||
|
||||
// intersect/union _id_list to this IDList, note that the two list must be ordered before using these two functions.
|
||||
/* intersect/union _id_list to this IDList, note that the two list must be ordered before using these two functions. */
|
||||
int intersectList(const int* _id_list, int _list_len);
|
||||
int intersectList(const IDList&);
|
||||
int unionList(const int* _id_list, int _list_len, bool only_literal=false);
|
||||
int unionList(const IDList&, bool only_literal=false);
|
||||
int unionList(const int* _id_list, int _list_len);
|
||||
int unionList(const IDList&);
|
||||
int bsearch_uporder(int _key);
|
||||
static IDList* intersect(const IDList&, const int*, int);
|
||||
private:
|
||||
std::vector<int> id_list;
|
||||
int erase(int i);
|
||||
};
|
||||
|
||||
#endif //_QUERY_IDLIST_H
|
||||
|
||||
#endif /* IDLIST_H_ */
|
||||
|
|
|
@ -1,489 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: QueryTree.cpp
|
||||
# Author: Jiaqi, Chen
|
||||
# Mail: chenjiaqi93@163.com
|
||||
# Last Modified: 2016-03-02 20:35
|
||||
# Description: implement functions in QueryTree.h
|
||||
=============================================================================*/
|
||||
|
||||
#include "QueryTree.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
void QueryTree::GroupPattern::FilterTreeNode::getVarset(Varset &varset)
|
||||
{
|
||||
for (int i = 0; i < (int)this->child.size(); i++)
|
||||
{
|
||||
if (this->child[i].type == 's' && this->child[i].arg[0] == '?')
|
||||
varset.addVar(this->child[i].arg);
|
||||
if (this->child[i].type == 't')
|
||||
this->child[i].node.getVarset(varset);
|
||||
}
|
||||
}
|
||||
|
||||
void QueryTree::GroupPattern::FilterTreeNode::print(vector<GroupPattern> &exist_grouppatterns, int dep)
|
||||
{
|
||||
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Not_type) printf("!");
|
||||
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type) printf("regex");
|
||||
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_lang_type) printf("lang");
|
||||
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_langmatches_type) printf("langmatches");
|
||||
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_bound_type) printf("bound");
|
||||
|
||||
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_in_type)
|
||||
{
|
||||
if (this->child[0].type == 's') printf("%s", this->child[0].arg.c_str());
|
||||
printf(" in (");
|
||||
for (int i = 1; i < (int)this->child.size(); i++)
|
||||
{
|
||||
if (i != 1) printf(" , ");
|
||||
if (this->child[i].type == 's') printf("%s", this->child[i].arg.c_str());
|
||||
}
|
||||
printf(")");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_exists_type)
|
||||
{
|
||||
printf("exists");
|
||||
exist_grouppatterns[this->exists_grouppattern_id].print(dep);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
printf("(");
|
||||
|
||||
if ((int)this->child.size() >= 1)
|
||||
{
|
||||
if (this->child[0].type == 's') printf("%s", this->child[0].arg.c_str());
|
||||
if (this->child[0].type == 't') this->child[0].node.print(exist_grouppatterns, dep);
|
||||
}
|
||||
|
||||
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Or_type) printf(" || ");
|
||||
if (this->type == QueryTree::GroupPattern::FilterTreeNode::And_type) printf(" && ");
|
||||
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Equal_type) printf(" = ");
|
||||
if (this->type == QueryTree::GroupPattern::FilterTreeNode::NotEqual_type) printf(" != ");
|
||||
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Less_type) printf(" < ");
|
||||
if (this->type == QueryTree::GroupPattern::FilterTreeNode::LessOrEqual_type) printf(" <= ");
|
||||
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Greater_type) printf(" > ");
|
||||
if (this->type == QueryTree::GroupPattern::FilterTreeNode::GreaterOrEqual_type) printf(" >= ");
|
||||
|
||||
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type || this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_langmatches_type) printf(", ");
|
||||
|
||||
if ((int)this->child.size() >= 2)
|
||||
{
|
||||
if (this->child[1].type == 's') printf("%s", this->child[1].arg.c_str());
|
||||
if (this->child[1].type == 't') this->child[1].node.print(exist_grouppatterns, dep);
|
||||
}
|
||||
|
||||
if ((int)this->child.size() >= 3)
|
||||
{
|
||||
if (this->type == QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type && this->child[2].type == 's')
|
||||
printf(", %s", this->child[2].arg.c_str());
|
||||
}
|
||||
|
||||
printf(")");
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
void QueryTree::GroupPattern::addOnePattern(Pattern _pattern)
|
||||
{
|
||||
this->patterns.push_back(_pattern);
|
||||
}
|
||||
|
||||
void QueryTree::GroupPattern::addOneGroupUnion()
|
||||
{
|
||||
this->unions.push_back(GroupPatternUnions((int)this->patterns.size() - 1));
|
||||
}
|
||||
|
||||
void QueryTree::GroupPattern::addOneUnion()
|
||||
{
|
||||
int n = (int)this->unions.size();
|
||||
this->unions[n - 1].grouppattern_vec.push_back(GroupPattern());
|
||||
}
|
||||
|
||||
QueryTree::GroupPattern& QueryTree::GroupPattern::getLastUnion()
|
||||
{
|
||||
int n = (int)this->unions.size();
|
||||
int m = (int)this->unions[n - 1].grouppattern_vec.size();
|
||||
return this->unions[n - 1].grouppattern_vec[m - 1];
|
||||
}
|
||||
|
||||
void QueryTree::GroupPattern::addOneOptionalOrMinus(char _type)
|
||||
{
|
||||
this->optionals.push_back(OptionalOrMinusGroupPattern((int)this->patterns.size() - 1, (int)this->unions.size() - 1, _type));
|
||||
}
|
||||
|
||||
QueryTree::GroupPattern& QueryTree::GroupPattern::getLastOptionalOrMinus()
|
||||
{
|
||||
int n = (int)this->optionals.size();
|
||||
return this->optionals[n - 1].grouppattern;
|
||||
}
|
||||
|
||||
void QueryTree::GroupPattern::addOneFilterTree()
|
||||
{
|
||||
this->filters.push_back(FilterTreeRoot());
|
||||
this->filter_exists_grouppatterns.push_back(vector<GroupPattern>());
|
||||
}
|
||||
|
||||
QueryTree::GroupPattern::FilterTreeNode& QueryTree::GroupPattern::getLastFilterTree()
|
||||
{
|
||||
return this->filters[(int)(this->filters.size()) - 1].root;
|
||||
}
|
||||
|
||||
void QueryTree::GroupPattern::addOneExistsGroupPattern()
|
||||
{
|
||||
int n = (int)this->filter_exists_grouppatterns.size();
|
||||
this->filter_exists_grouppatterns[n - 1].push_back(GroupPattern());
|
||||
}
|
||||
|
||||
QueryTree::GroupPattern& QueryTree::GroupPattern::getLastExistsGroupPattern()
|
||||
{
|
||||
int n = (int)this->filter_exists_grouppatterns.size();
|
||||
int m = (int)this->filter_exists_grouppatterns[n - 1].size();
|
||||
return this->filter_exists_grouppatterns[n - 1][m - 1];
|
||||
}
|
||||
|
||||
void QueryTree::GroupPattern::getVarset()
|
||||
{
|
||||
for (int i = 0; i < (int)this->patterns.size(); i++)
|
||||
{
|
||||
if (this->patterns[i].subject.value[0] == '?')
|
||||
this->patterns[i].varset.addVar(this->patterns[i].subject.value);
|
||||
if (this->patterns[i].object.value[0] == '?')
|
||||
this->patterns[i].varset.addVar(this->patterns[i].object.value);
|
||||
this->grouppattern_resultset_minimal_varset = this->grouppattern_resultset_minimal_varset + this->patterns[i].varset;
|
||||
this->grouppattern_resultset_maximal_varset = this->grouppattern_resultset_maximal_varset + this->patterns[i].varset;
|
||||
}
|
||||
|
||||
for (int i = 0; i < (int)this->unions.size(); i++)
|
||||
{
|
||||
Varset minimal_varset;
|
||||
for (int j = 0; j < (int)this->unions[i].grouppattern_vec.size(); j++)
|
||||
{
|
||||
this->unions[i].grouppattern_vec[j].getVarset();
|
||||
if (j == 0) minimal_varset = minimal_varset + this->unions[i].grouppattern_vec[j].grouppattern_resultset_minimal_varset;
|
||||
else minimal_varset = minimal_varset * this->unions[i].grouppattern_vec[j].grouppattern_resultset_minimal_varset;
|
||||
this->grouppattern_resultset_maximal_varset = this->grouppattern_resultset_maximal_varset + this->unions[i].grouppattern_vec[j].grouppattern_resultset_maximal_varset;
|
||||
}
|
||||
this->grouppattern_resultset_minimal_varset = this->grouppattern_resultset_minimal_varset + minimal_varset;
|
||||
}
|
||||
|
||||
for (int i = 0; i < (int)this->optionals.size(); i++)
|
||||
{
|
||||
this->optionals[i].grouppattern.getVarset();
|
||||
if (this->optionals[i].type == 'o')
|
||||
this->grouppattern_resultset_maximal_varset = this->grouppattern_resultset_maximal_varset + this->optionals[i].grouppattern.grouppattern_resultset_maximal_varset;
|
||||
}
|
||||
|
||||
for (int i = 0; i < (int)this->filters.size(); i++)
|
||||
{
|
||||
this->filters[i].root.getVarset(this->filters[i].varset);
|
||||
}
|
||||
|
||||
for(int i = 0; i < (int)this->filter_exists_grouppatterns.size(); i++)
|
||||
for (int j = 0; j < (int)this->filter_exists_grouppatterns[i].size(); j++)
|
||||
{
|
||||
this->filter_exists_grouppatterns[i][j].getVarset();
|
||||
}
|
||||
}
|
||||
|
||||
bool QueryTree::GroupPattern::checkOnlyUnionOptionalFilterNoExists()
|
||||
{
|
||||
for (int i = 0; i < (int)this->unions.size(); i++)
|
||||
{
|
||||
for (int j = 0; j < (int)this->unions[i].grouppattern_vec.size(); j++)
|
||||
if (!this->unions[i].grouppattern_vec[j].checkOnlyUnionOptionalFilterNoExists())
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < (int)this->optionals.size(); i++)
|
||||
{
|
||||
if (this->optionals[i].type != 'o')
|
||||
return false;
|
||||
if (!this->optionals[i].grouppattern.checkOnlyUnionOptionalFilterNoExists())
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < (int)this->filter_exists_grouppatterns.size(); i++)
|
||||
if ((int)this->filter_exists_grouppatterns[i].size() != 0)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
pair<Varset, Varset> QueryTree::GroupPattern::checkOptionalGroupPatternVarsAndSafeFilter(Varset occur , Varset ban, bool &check_condition)
|
||||
//return occur varset and ban varset
|
||||
{
|
||||
if (!check_condition) return make_pair(Varset(), Varset());
|
||||
|
||||
Varset this_ban;
|
||||
|
||||
int lastpattern = -1, lastunions = -1, lastoptional = -1;
|
||||
while (check_condition && (lastpattern + 1 < (int)this->patterns.size() || lastunions + 1 < (int)this->unions.size() || lastoptional + 1 < (int)this->optionals.size()))
|
||||
{
|
||||
if (lastoptional + 1 < (int)this->optionals.size() && this->optionals[lastoptional + 1].lastpattern == lastpattern && this->optionals[lastoptional + 1].lastunions == lastunions)
|
||||
//optional
|
||||
{
|
||||
pair<Varset, Varset> sub_grouppattern_return_varset = this->optionals[lastoptional + 1].grouppattern.checkOptionalGroupPatternVarsAndSafeFilter(Varset(), ban, check_condition);
|
||||
|
||||
if (occur.hasCommonVar(sub_grouppattern_return_varset.second))
|
||||
check_condition = false;
|
||||
|
||||
Varset out = this->optionals[lastoptional + 1].grouppattern.grouppattern_resultset_maximal_varset - occur;
|
||||
occur = occur + sub_grouppattern_return_varset.first;
|
||||
this_ban = this_ban + sub_grouppattern_return_varset.second;
|
||||
this_ban = this_ban + out;
|
||||
ban = ban + this_ban;
|
||||
|
||||
lastoptional++;
|
||||
}
|
||||
else if (lastunions + 1 < (int)this->unions.size() && this->unions[lastunions + 1].lastpattern == lastpattern)
|
||||
//union
|
||||
{
|
||||
Varset sub_grouppattern_occur, sub_grouppattern_ban;
|
||||
|
||||
for (int i = 0; i < (int)this->unions[lastunions + 1].grouppattern_vec.size(); i++)
|
||||
{
|
||||
pair<Varset, Varset> sub_grouppattern_result = this->unions[lastunions + 1].grouppattern_vec[i].checkOptionalGroupPatternVarsAndSafeFilter(occur, ban, check_condition);
|
||||
|
||||
if (i == 0)
|
||||
sub_grouppattern_occur = sub_grouppattern_occur + sub_grouppattern_result.first;
|
||||
else
|
||||
sub_grouppattern_occur = sub_grouppattern_occur * sub_grouppattern_result.first;
|
||||
sub_grouppattern_ban = sub_grouppattern_ban + sub_grouppattern_result.second;
|
||||
}
|
||||
|
||||
occur = occur + sub_grouppattern_occur;
|
||||
this_ban = this_ban + sub_grouppattern_ban;
|
||||
ban = ban + this_ban;
|
||||
|
||||
lastunions++;
|
||||
}
|
||||
else
|
||||
//triple pattern
|
||||
{
|
||||
if (this->patterns[lastpattern + 1].varset.hasCommonVar(ban))
|
||||
check_condition = false;
|
||||
|
||||
occur = occur + this->patterns[lastpattern + 1].varset;
|
||||
|
||||
lastpattern++;
|
||||
}
|
||||
}
|
||||
//filter
|
||||
for (int i = 0; i < (int)this->filters.size(); i++)
|
||||
if (!this->filters[i].varset.belongTo(occur))
|
||||
{
|
||||
check_condition = false;
|
||||
break;
|
||||
}
|
||||
|
||||
return make_pair(occur, this_ban);
|
||||
}
|
||||
|
||||
void QueryTree::GroupPattern::initPatternBlockid()
|
||||
{
|
||||
for (int i = 0; i < (int)this->patterns.size(); i++)
|
||||
this->pattern_blockid.push_back(i);
|
||||
}
|
||||
|
||||
int QueryTree::GroupPattern::getRootPatternBlockid(int x)
|
||||
{
|
||||
if (this->pattern_blockid[x] == x) return x;
|
||||
this->pattern_blockid[x] = getRootPatternBlockid(this->pattern_blockid[x]);
|
||||
return this->pattern_blockid[x];
|
||||
}
|
||||
|
||||
void QueryTree::GroupPattern::mergePatternBlockid(int x, int y)
|
||||
{
|
||||
int px = getRootPatternBlockid(x);
|
||||
int py = getRootPatternBlockid(y);
|
||||
this->pattern_blockid[px] = py;
|
||||
}
|
||||
|
||||
void QueryTree::GroupPattern::print(int dep)
|
||||
{
|
||||
for (int t = 0; t < dep; t++) printf("\t"); printf("{\n");
|
||||
|
||||
int lastpattern = -1, lastunions = -1, lastoptional = -1;
|
||||
while (lastpattern + 1 < (int)this->patterns.size() || lastunions + 1 < (int)this->unions.size() || lastoptional + 1 < (int)this->optionals.size())
|
||||
{
|
||||
if (lastoptional + 1 < (int)this->optionals.size() && this->optionals[lastoptional + 1].lastpattern == lastpattern && this->optionals[lastoptional + 1].lastunions == lastunions)
|
||||
//optional
|
||||
{
|
||||
for (int t = 0; t <= dep; t++) printf("\t");
|
||||
if (this->optionals[lastoptional + 1].type == 'o') printf("OPTIONAL\n");
|
||||
if (this->optionals[lastoptional + 1].type == 'm') printf("MINUS\n");
|
||||
|
||||
this->optionals[lastoptional + 1].grouppattern.print(dep + 1);
|
||||
lastoptional++;
|
||||
}
|
||||
else if (lastunions + 1 < (int)this->unions.size() && this->unions[lastunions + 1].lastpattern == lastpattern)
|
||||
//union
|
||||
{
|
||||
for (int i = 0; i < (int)this->unions[lastunions + 1].grouppattern_vec.size(); i++)
|
||||
{
|
||||
if (i != 0)
|
||||
{
|
||||
for (int t = 0; t <= dep; t++) printf("\t"); printf("UNION\n");
|
||||
}
|
||||
this->unions[lastunions + 1].grouppattern_vec[i].print(dep + 1);
|
||||
}
|
||||
lastunions++;
|
||||
}
|
||||
else
|
||||
//triple pattern
|
||||
{
|
||||
for (int t = 0; t <= dep; t++) printf("\t");
|
||||
printf("%s\t%s\t%s.\n", this->patterns[lastpattern + 1].subject.value.c_str(), this->patterns[lastpattern + 1].predicate.value.c_str(), this->patterns[lastpattern + 1].object.value.c_str());
|
||||
lastpattern++;
|
||||
}
|
||||
}
|
||||
//filter
|
||||
for (int i = 0; i < (int)this->filters.size(); i++)
|
||||
{
|
||||
for (int t = 0; t <= dep; t++) printf("\t"); printf("FILTER\t");
|
||||
this->filters[i].root.print(this->filter_exists_grouppatterns[i], dep + 1);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
for (int t = 0; t < dep; t++) printf("\t"); printf("}\n");
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
void QueryTree::setQueryForm(QueryForm _queryform)
|
||||
{
|
||||
this->query_form = _queryform;
|
||||
}
|
||||
|
||||
QueryTree::QueryForm QueryTree::getQueryForm()
|
||||
{
|
||||
return this->query_form;
|
||||
}
|
||||
|
||||
void QueryTree::setProjectionModifier(ProjectionModifier _projection_modifier)
|
||||
{
|
||||
projection_modifier = _projection_modifier;
|
||||
}
|
||||
|
||||
QueryTree::ProjectionModifier QueryTree::getProjectionModifier()
|
||||
{
|
||||
return this->projection_modifier;
|
||||
}
|
||||
|
||||
void QueryTree::addProjectionVar(string _projection)
|
||||
{
|
||||
this->projection.addVar(_projection);
|
||||
}
|
||||
|
||||
int QueryTree::getProjectionNum()
|
||||
{
|
||||
return (int)this->projection.varset.size();
|
||||
}
|
||||
|
||||
Varset& QueryTree::getProjection()
|
||||
{
|
||||
return this->projection;
|
||||
}
|
||||
|
||||
void QueryTree::setProjectionAsterisk()
|
||||
{
|
||||
this->projection_asterisk = true;
|
||||
}
|
||||
|
||||
bool QueryTree::checkProjectionAsterisk()
|
||||
{
|
||||
return this->projection_asterisk;
|
||||
}
|
||||
|
||||
void QueryTree::addOrder(string &_var, bool _descending)
|
||||
{
|
||||
this->order.push_back(Order(_var, _descending));
|
||||
}
|
||||
|
||||
vector<QueryTree::Order>& QueryTree::getOrder()
|
||||
{
|
||||
return this->order;
|
||||
}
|
||||
|
||||
void QueryTree::setOffset(int _offset)
|
||||
{
|
||||
this->offset = _offset;
|
||||
}
|
||||
|
||||
int QueryTree::getOffset()
|
||||
{
|
||||
return this->offset;
|
||||
}
|
||||
|
||||
void QueryTree::setLimit(int _limit)
|
||||
{
|
||||
this->limit = _limit;
|
||||
}
|
||||
|
||||
int QueryTree::getLimit()
|
||||
{
|
||||
return this->limit;
|
||||
}
|
||||
|
||||
QueryTree::GroupPattern& QueryTree::getGroupPattern()
|
||||
{
|
||||
return this->grouppattern;
|
||||
}
|
||||
|
||||
bool QueryTree::checkWellDesigned()
|
||||
{
|
||||
if (!this->getGroupPattern().checkOnlyUnionOptionalFilterNoExists())
|
||||
return false;
|
||||
|
||||
bool check_condition = true;
|
||||
this->getGroupPattern().checkOptionalGroupPatternVarsAndSafeFilter(Varset(), Varset(), check_condition);
|
||||
return check_condition;
|
||||
}
|
||||
|
||||
void QueryTree::print()
|
||||
{
|
||||
for (int j = 0; j < 80; j++) printf("="); printf("\n");
|
||||
|
||||
if (this->getQueryForm() == QueryTree::Select_Query)
|
||||
{
|
||||
printf("select");
|
||||
if (this->getProjectionModifier() == QueryTree::Modifier_Distinct)
|
||||
printf(" distinct");
|
||||
printf("\n");
|
||||
|
||||
printf("var is : \t");
|
||||
vector <string> &varvec = this->getProjection().varset;
|
||||
for (int i = 0; i < (int)varvec.size(); i++)
|
||||
printf("%s\t", varvec[i].c_str());
|
||||
if (this->checkProjectionAsterisk())
|
||||
printf("*");
|
||||
printf("\n");
|
||||
}
|
||||
else printf("ask\n");
|
||||
|
||||
this->getGroupPattern().print(0);
|
||||
|
||||
if ((int)this->getOrder().size() > 0)
|
||||
{
|
||||
printf("order by : \t");
|
||||
|
||||
vector<QueryTree::Order>&order = this->getOrder();
|
||||
for (int i = 0; i < (int)order.size(); i++)
|
||||
{
|
||||
if (!order[i].descending) printf("ASC(");
|
||||
else printf("DESC(");
|
||||
printf("%s) ", order[i].var.c_str());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
if (this->getOffset() != 0)
|
||||
printf("offset : %d\n", this->getOffset());
|
||||
if (this->getLimit() != -1)
|
||||
printf("limit : %d\n", this->getLimit());
|
||||
|
||||
for (int j = 0; j < 80; j++) printf("="); printf("\n");
|
||||
}
|
|
@ -1,200 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: QueryTree.h
|
||||
# Author: Jiaqi, Chen
|
||||
# Mail: chenjiaqi93@163.com
|
||||
# Last Modified: 2016-03-02 20:35
|
||||
# Description:
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _QUERY_QUERYTREE_H
|
||||
#define _QUERY_QUERYTREE_H
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "Varset.h"
|
||||
|
||||
class QueryTree
|
||||
{
|
||||
public:
|
||||
QueryTree():
|
||||
query_form(Select_Query), projection_modifier(Modifier_None), projection_asterisk(false), offset(0), limit(-1){}
|
||||
|
||||
enum QueryForm {Select_Query, Ask_Query};
|
||||
enum ProjectionModifier {Modifier_None, Modifier_Distinct, Modifier_Reduced, Modifier_Count, Modifier_Duplicates};
|
||||
|
||||
class GroupPattern
|
||||
{
|
||||
public:
|
||||
class Pattern
|
||||
{
|
||||
public:
|
||||
class Element
|
||||
{
|
||||
public:
|
||||
/*
|
||||
enum Type { Variable, Literal, IRI };
|
||||
enum SubType { None, CustomLanguage, CustomType };
|
||||
Type type;
|
||||
SubType subType;
|
||||
std::string subTypeValue;
|
||||
*/
|
||||
std::string value;
|
||||
Element(const std::string& _value):
|
||||
value(_value){}
|
||||
};
|
||||
Element subject, predicate, object;
|
||||
Varset varset;
|
||||
Pattern(const Element _subject, const Element _predicate,const Element _object):subject(_subject), predicate(_predicate), object(_object){}
|
||||
};
|
||||
|
||||
|
||||
class GroupPatternUnions;
|
||||
class OptionalOrMinusGroupPattern;
|
||||
class FilterTreeNode;
|
||||
class FilterTreeRoot;
|
||||
|
||||
std::vector<Pattern> patterns;
|
||||
std::vector<GroupPatternUnions> unions;
|
||||
std::vector<OptionalOrMinusGroupPattern> optionals;
|
||||
|
||||
std::vector<FilterTreeRoot> filters;
|
||||
std::vector<std::vector<GroupPattern> > filter_exists_grouppatterns;
|
||||
|
||||
Varset grouppattern_resultset_minimal_varset, grouppattern_resultset_maximal_varset;
|
||||
|
||||
std::vector<int> pattern_blockid;
|
||||
|
||||
void addOnePattern(Pattern _pattern);
|
||||
|
||||
void addOneGroupUnion();
|
||||
void addOneUnion();
|
||||
GroupPattern& getLastUnion();
|
||||
|
||||
void addOneOptionalOrMinus(char _type);
|
||||
GroupPattern& getLastOptionalOrMinus();
|
||||
|
||||
void addOneFilterTree();
|
||||
FilterTreeNode& getLastFilterTree();
|
||||
void addOneExistsGroupPattern();
|
||||
GroupPattern& getLastExistsGroupPattern();
|
||||
|
||||
void getVarset();
|
||||
|
||||
bool checkOnlyUnionOptionalFilterNoExists();
|
||||
std::pair<Varset, Varset> checkOptionalGroupPatternVarsAndSafeFilter(Varset occur , Varset ban, bool &check_condition);
|
||||
|
||||
void initPatternBlockid();
|
||||
int getRootPatternBlockid(int x);
|
||||
void mergePatternBlockid(int x, int y);
|
||||
|
||||
void print(int dep);
|
||||
};
|
||||
|
||||
class GroupPattern::GroupPatternUnions
|
||||
{
|
||||
public:
|
||||
std::vector<GroupPattern> grouppattern_vec;
|
||||
int lastpattern;
|
||||
GroupPatternUnions(int _lastpattern):
|
||||
lastpattern(_lastpattern){}
|
||||
};
|
||||
|
||||
class GroupPattern::OptionalOrMinusGroupPattern
|
||||
{
|
||||
public:
|
||||
GroupPattern grouppattern;
|
||||
int lastpattern, lastunions;
|
||||
char type;
|
||||
OptionalOrMinusGroupPattern(int _lastpattern, int _lastunions, char _type):
|
||||
grouppattern(GroupPattern()), lastpattern(_lastpattern), lastunions(_lastunions), type(_type){}
|
||||
};
|
||||
|
||||
class GroupPattern::FilterTreeNode
|
||||
{
|
||||
public:
|
||||
enum FilterType
|
||||
{
|
||||
None_type, Or_type, And_type, Equal_type, NotEqual_type, Less_type, LessOrEqual_type, Greater_type, GreaterOrEqual_type,
|
||||
Plus_type, Minus_type, Mul_type, Div_type, Not_type, UnaryPlus_type, UnaryMinus_type, Literal_type, Variable_type, IRI_type,
|
||||
Function_type, ArgumentList_type,Builtin_str_type, Builtin_lang_type, Builtin_langmatches_type, Builtin_datatype_type, Builtin_bound_type,
|
||||
Builtin_sameterm_type,Builtin_isiri_type, Builtin_isblank_type, Builtin_isliteral_type, Builtin_regex_type, Builtin_in_type, Builtin_exists_type
|
||||
};
|
||||
FilterType type;
|
||||
|
||||
class FilterTreeChild;
|
||||
|
||||
std::vector<FilterTreeChild> child;
|
||||
int exists_grouppattern_id;
|
||||
|
||||
FilterTreeNode():
|
||||
type(None_type), exists_grouppattern_id(-1){}
|
||||
|
||||
void getVarset(Varset &varset);
|
||||
|
||||
void print(std::vector<GroupPattern> &exist_grouppatterns, int dep);
|
||||
};
|
||||
|
||||
class GroupPattern::FilterTreeNode::FilterTreeChild
|
||||
{
|
||||
public:
|
||||
FilterTreeChild():
|
||||
type(' '), pos(-1){}
|
||||
|
||||
char type;
|
||||
FilterTreeNode node;
|
||||
std::string arg;
|
||||
int pos;
|
||||
};
|
||||
|
||||
class GroupPattern::FilterTreeRoot
|
||||
{
|
||||
public:
|
||||
FilterTreeNode root;
|
||||
Varset varset;
|
||||
};
|
||||
|
||||
class Order
|
||||
{
|
||||
public:
|
||||
std::string var;
|
||||
bool descending;
|
||||
Order(std::string &_var, bool _descending):
|
||||
var(_var), descending(_descending){}
|
||||
};
|
||||
|
||||
|
||||
private:
|
||||
QueryForm query_form;
|
||||
ProjectionModifier projection_modifier;
|
||||
Varset projection;
|
||||
bool projection_asterisk;
|
||||
std::vector<Order> order;
|
||||
int offset, limit;
|
||||
|
||||
GroupPattern grouppattern;
|
||||
|
||||
public:
|
||||
void setQueryForm(QueryForm _queryform);
|
||||
QueryForm getQueryForm();
|
||||
void setProjectionModifier(ProjectionModifier _projection_modifier);
|
||||
ProjectionModifier getProjectionModifier();
|
||||
void addProjectionVar(std::string _projection);
|
||||
int getProjectionNum();
|
||||
Varset& getProjection();
|
||||
void setProjectionAsterisk();
|
||||
bool checkProjectionAsterisk();
|
||||
void addOrder(std::string &_var, bool _descending);
|
||||
std::vector<Order>& getOrder();
|
||||
void setOffset(int _offset);
|
||||
int getOffset();
|
||||
void setLimit(int _limit);
|
||||
int getLimit();
|
||||
|
||||
GroupPattern& getGroupPattern();
|
||||
|
||||
bool checkWellDesigned();
|
||||
|
||||
void print();
|
||||
};
|
||||
|
||||
#endif // _QUERY_QUERYTREE_H
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: RegexExpression.cpp
|
||||
# Author: Jiaqi, Chen
|
||||
# Mail: chenjiaqi93@163.com
|
||||
# Last Modified: 2016-03-02 20:40
|
||||
# Description:
|
||||
=============================================================================*/
|
||||
|
||||
#include "../Util/Util.h"
|
||||
|
||||
class RegexExpression
|
||||
{
|
||||
private:
|
||||
regex_t oRegex;
|
||||
|
||||
public:
|
||||
~RegexExpression()
|
||||
{
|
||||
regfree(&oRegex);
|
||||
}
|
||||
inline bool compile(std::string &pRegexStr, std::string &flag)
|
||||
{
|
||||
int flags = 0;
|
||||
for (int i = 0; i < (int)flag.length(); i++)
|
||||
if (flag[i] == 'i') flags |= REG_ICASE;
|
||||
|
||||
int nErrCode = regcomp(&oRegex, pRegexStr.c_str(), flags);
|
||||
return (nErrCode == 0);
|
||||
}
|
||||
inline bool match(std::string &pText)
|
||||
{
|
||||
int nErrCode = regexec(&oRegex, pText.c_str(), 0, NULL, 0);
|
||||
return (nErrCode == 0);
|
||||
}
|
||||
};
|
||||
|
|
@ -1,81 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: ResultFilter.cpp
|
||||
# Author: Jiaqi, Chen
|
||||
# Mail: chenjiaqi93@163.com
|
||||
# Last Modified: 2016-05-03 15:36
|
||||
# Description: implement functions in ResultFilter.h
|
||||
=============================================================================*/
|
||||
|
||||
#include "ResultFilter.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
void ResultFilter::addVar(string var)
|
||||
{
|
||||
if (this->hash_table.count(var) == 0)
|
||||
this->hash_table.insert(pair<string, vector<int> >(var, vector<int>(this->MAX_SIZE, 0)));
|
||||
}
|
||||
|
||||
vector<int>* ResultFilter::findVar(string var)
|
||||
{
|
||||
if (this->hash_table.count(var) == 0)
|
||||
return NULL;
|
||||
|
||||
return &this->hash_table[var];
|
||||
}
|
||||
|
||||
void ResultFilter::change(SPARQLquery& query, int value)
|
||||
{
|
||||
for (int i = 0; i < query.getBasicQueryNum(); i++)
|
||||
{
|
||||
BasicQuery& basicquery = query.getBasicQuery(i);
|
||||
vector<int*>& basicquery_result =basicquery.getResultList();
|
||||
int result_num = basicquery_result.size();
|
||||
int var_num = basicquery.getVarNum();
|
||||
|
||||
for (int j = 0; j < var_num; j++)
|
||||
this->addVar(basicquery.getVarName(j));
|
||||
|
||||
vector<vector<int>*> refer;
|
||||
for (int j = 0; j < var_num; j++)
|
||||
refer.push_back(this->findVar(basicquery.getVarName(j)));
|
||||
|
||||
for (int j = 0; j < result_num; j++)
|
||||
for (int k = 0; k < var_num; k++)
|
||||
{
|
||||
(*refer[k])[this->hash(basicquery_result[j][k])] += value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ResultFilter::candFilter(SPARQLquery& query)
|
||||
{
|
||||
for (int i = 0; i < query.getBasicQueryNum(); i++)
|
||||
{
|
||||
BasicQuery& basicquery = query.getBasicQuery(i);
|
||||
for (int j = 0; j < basicquery.getVarNum(); j++)
|
||||
{
|
||||
vector<int>* col = this->findVar(basicquery.getVarName(j));
|
||||
if (col != NULL)
|
||||
{
|
||||
IDList& idlist = basicquery.getCandidateList(j);
|
||||
IDList new_idlist;
|
||||
|
||||
printf("candFilter on %s\n", basicquery.getVarName(j).c_str());
|
||||
printf("before candFilter, size = %d\n", idlist.size());
|
||||
|
||||
for (int k = 0; k < idlist.size(); k++)
|
||||
{
|
||||
int id = idlist.getID(k);
|
||||
if ((*col)[hash(id)] > 0)
|
||||
{
|
||||
new_idlist.addID(id);
|
||||
}
|
||||
}
|
||||
idlist = new_idlist;
|
||||
|
||||
printf("after candFilter, size = %d\n", idlist.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,35 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: ResultFilter.h
|
||||
# Author: Jiaqi, Chen
|
||||
# Mail: chenjiaqi93@163.com
|
||||
# Last Modified: 2016-05-03 15:36
|
||||
# Description:
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _QUERY_RESULTFILTER_H
|
||||
#define _QUERY_RESULTFILTER_H
|
||||
|
||||
#include "SPARQLquery.h"
|
||||
#include "../Util/Util.h"
|
||||
|
||||
class ResultFilter
|
||||
{
|
||||
private:
|
||||
static const int MAX_SIZE = 1048576;
|
||||
inline int hash(int x)
|
||||
{
|
||||
return ((x & (MAX_SIZE - 1)) * 17) & (MAX_SIZE - 1);
|
||||
}
|
||||
|
||||
std::map<std::string, std::vector<int> > hash_table;
|
||||
|
||||
public:
|
||||
void addVar(std::string var);
|
||||
std::vector<int>* findVar(std::string var);
|
||||
void change(SPARQLquery& query, int value);
|
||||
void candFilter(SPARQLquery& query);
|
||||
};
|
||||
|
||||
|
||||
#endif // _QUERY_RESULTFILTER_H
|
||||
|
|
@ -1,41 +1,29 @@
|
|||
/*=============================================================================
|
||||
# Filename: ResultSet.cpp
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-24 22:01
|
||||
# Description: implement functions in ResultSet.h
|
||||
=============================================================================*/
|
||||
/*
|
||||
* ResultSet.cpp
|
||||
*
|
||||
* Created on: 2014-6-20
|
||||
* Author: liyouhuan
|
||||
*/
|
||||
|
||||
#include "ResultSet.h"
|
||||
|
||||
using namespace std;
|
||||
#include <sstream>
|
||||
|
||||
ResultSet::ResultSet()
|
||||
{
|
||||
this->select_var_num = 0;
|
||||
this->var_name = NULL;
|
||||
this->ansNum = 0;
|
||||
#ifndef STREAM_ON
|
||||
this->answer = NULL;
|
||||
#else
|
||||
this->stream = NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
ResultSet::~ResultSet()
|
||||
{
|
||||
delete[] this->var_name;
|
||||
#ifndef STREAM_ON
|
||||
for(int i = 0; i < this->ansNum; i ++)
|
||||
{
|
||||
delete[] this->answer[i];
|
||||
}
|
||||
delete[] this->answer;
|
||||
#else
|
||||
delete this->stream; //maybe NULL
|
||||
#endif
|
||||
}
|
||||
|
||||
ResultSet::ResultSet(int _v_num, const string* _v_names)
|
||||
{
|
||||
this->select_var_num = _v_num;
|
||||
|
@ -44,217 +32,57 @@ ResultSet::ResultSet(int _v_num, const string* _v_names)
|
|||
{
|
||||
this->var_name[i] = _v_names[i];
|
||||
}
|
||||
#ifdef STREAM_ON
|
||||
this->stream = NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
ResultSet::setVar(const vector<string> & _var_names)
|
||||
/* convert to binary string */
|
||||
Bstr* ResultSet::to_bstr()
|
||||
{
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* convert to usual string */
|
||||
string ResultSet::to_str()
|
||||
{
|
||||
if(this->ansNum == 0)
|
||||
{
|
||||
return "[empty result]";
|
||||
}
|
||||
|
||||
|
||||
std::stringstream _buf;
|
||||
|
||||
//debug
|
||||
// _buf << "There has answer: " << this->ansNum << endl;
|
||||
// _buf << this->var_name[0];
|
||||
// for(int i = 1; i < this->select_var_num; i ++)
|
||||
// {
|
||||
// _buf << "\t" << this->var_name[i];
|
||||
// }
|
||||
// _buf << "\n";
|
||||
|
||||
for(int i = 0; i < this->ansNum; i ++)
|
||||
{
|
||||
_buf << this->answer[i][0];
|
||||
for(int j = 1; j < this->select_var_num; j ++)
|
||||
{
|
||||
//_buf << "\t" << this->answer[i][j];
|
||||
_buf << " " << this->answer[i][j];
|
||||
}
|
||||
_buf << "\n";
|
||||
}
|
||||
|
||||
return _buf.str();
|
||||
}
|
||||
|
||||
void ResultSet::setVar(const std::vector<string> & _var_names)
|
||||
{
|
||||
this->select_var_num = _var_names.size();
|
||||
this->var_name = new string[this->select_var_num];
|
||||
for(int i = 0; i < this->select_var_num; i++)
|
||||
for(int i = 0; i < this->select_var_num; i ++)
|
||||
{
|
||||
this->var_name[i] = _var_names[i];
|
||||
}
|
||||
}
|
||||
|
||||
//convert to usual string
|
||||
string
|
||||
ResultSet::to_str()
|
||||
{
|
||||
if(this->ansNum == 0)
|
||||
{
|
||||
return "[empty result]\n";
|
||||
}
|
||||
|
||||
stringstream _buf;
|
||||
|
||||
//#ifdef DEBUG_PRECISE
|
||||
_buf << "There has answer: " << this->ansNum << endl;
|
||||
_buf << this->var_name[0];
|
||||
for(int i = 1; i < this->select_var_num; i ++)
|
||||
{
|
||||
_buf << "\t" << this->var_name[i];
|
||||
}
|
||||
_buf << "\n";
|
||||
//#endif
|
||||
#ifndef STREAM_ON
|
||||
for(int i = 0; i < this->ansNum; i++)
|
||||
{
|
||||
if (this->output_limit != -1 && i == this->output_offset + this->output_limit)
|
||||
break;
|
||||
#ifdef DEBUG_PRECISE
|
||||
printf("to_str: well!\n"); //just for debug!
|
||||
#endif //DEBUG_PRECISE
|
||||
if (i >= this->output_offset)
|
||||
{
|
||||
_buf << this->answer[i][0];
|
||||
for(int j = 1; j < this->select_var_num; j++)
|
||||
{
|
||||
//there may be ' ' in spo, but no '\t'
|
||||
_buf << "\t" << this->answer[i][j];
|
||||
//_buf << " " << this->answer[i][j];
|
||||
}
|
||||
_buf << "\n";
|
||||
}
|
||||
}
|
||||
#ifdef DEBUG_PRECISE
|
||||
printf("to_str: ends!\n"); //just for debug!
|
||||
#endif //DEBUG_PRECISE
|
||||
|
||||
#else //STREAM_ON
|
||||
printf("using stream to produce to_str()!\n");
|
||||
_buf << this->readAllFromStream();
|
||||
#endif //STREAM_ON
|
||||
return _buf.str();
|
||||
}
|
||||
|
||||
void
|
||||
ResultSet::output(FILE* _fp)
|
||||
{
|
||||
#ifdef STREAM_ON
|
||||
fprintf(_fp, "%s", this->var_name[0].c_str());
|
||||
for(int i = 1; i < this->select_var_num; i++)
|
||||
{
|
||||
fprintf(_fp, "\t%s", this->var_name[i].c_str());
|
||||
}
|
||||
fprintf(_fp, "\n");
|
||||
|
||||
|
||||
if(this->ansNum == 0)
|
||||
{
|
||||
fprintf(_fp, "[empty result]\n");
|
||||
return;
|
||||
}
|
||||
const Bstr* bp;
|
||||
for(int i = 0; i < this->ansNum; i++)
|
||||
{
|
||||
if (this->output_limit != -1 && i == this->output_offset + this->output_limit)
|
||||
break;
|
||||
bp = this->stream->read();
|
||||
if (i >= this->output_offset)
|
||||
{
|
||||
fprintf(_fp, "%s", bp[0].getStr());
|
||||
//fprintf(_fp, "%s", bp->getStr());
|
||||
for(int j = 1; j < this->select_var_num; j++)
|
||||
{
|
||||
fprintf(_fp, "\t%s", bp[j].getStr());
|
||||
//bp = this->stream.read();
|
||||
//fprintf(_fp, "\t%s", bp->getStr());
|
||||
}
|
||||
fprintf(_fp, "\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
ResultSet::openStream(std::vector<int> &_keys, std::vector<bool> &_desc, int _output_offset, int _output_limit)
|
||||
{
|
||||
#ifdef STREAM_ON
|
||||
#ifdef DEBUG_STREAM
|
||||
vector<int> debug_keys;
|
||||
vector<bool> debug_desc;
|
||||
for(int i = 0; i < this->select_var_num; ++i)
|
||||
{
|
||||
debug_keys.push_back(i);
|
||||
debug_desc.push_back(false);
|
||||
}
|
||||
#endif
|
||||
if(this->stream != NULL)
|
||||
{
|
||||
delete this->stream;
|
||||
this->stream = NULL;
|
||||
}
|
||||
#ifdef DEBUG_STREAM
|
||||
if(this->ansNum > 0)
|
||||
this->stream = new Stream(debug_keys, debug_desc, this->ansNum, this->select_var_num, true);
|
||||
#else
|
||||
if(this->ansNum > 0)
|
||||
this->stream = new Stream(_keys, _desc, this->ansNum, this->select_var_num, _keys.size() > 0);
|
||||
#endif //DEBUG_STREAM
|
||||
#endif //STREAM_ON
|
||||
this->output_offset = _output_offset;
|
||||
this->output_limit = _output_limit;
|
||||
}
|
||||
|
||||
void
|
||||
ResultSet::resetStream()
|
||||
{
|
||||
#ifdef STREAM_ON
|
||||
//this->stream.reset();
|
||||
if(this->stream != NULL)
|
||||
this->stream->setEnd();
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
ResultSet::writeToStream(string& _s)
|
||||
{
|
||||
#ifdef STREAM_ON
|
||||
if(this->stream != NULL)
|
||||
this->stream->write(_s.c_str(), _s.length());
|
||||
#endif
|
||||
}
|
||||
|
||||
//QUERY: how to manage when large?
|
||||
string
|
||||
ResultSet::readAllFromStream()
|
||||
{
|
||||
stringstream buf;
|
||||
#ifdef STREAM_ON
|
||||
if(this->stream == NULL)
|
||||
return "";
|
||||
|
||||
this->resetStream();
|
||||
const Bstr* bp;
|
||||
for(int i = 0; i < this->ansNum; i++)
|
||||
{
|
||||
if (this->output_limit != -1 && i == this->output_offset + this->output_limit)
|
||||
break;
|
||||
bp = this->stream->read();
|
||||
if (i >= this->output_offset)
|
||||
{
|
||||
buf << bp[0].getStr();
|
||||
for(int j = 1; j < this->select_var_num; ++j)
|
||||
{
|
||||
buf << "\t" << bp[j].getStr();
|
||||
}
|
||||
|
||||
//buf << bp->getStr();
|
||||
//for(int j = 1; j < this->select_var_num; j++)
|
||||
//{
|
||||
//bp = this->stream.read();
|
||||
//buf << "\t" << bp->getStr();
|
||||
//}
|
||||
buf << "\n";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
const Bstr*
|
||||
ResultSet::getOneRecord()
|
||||
{
|
||||
#ifdef STREAM_ON
|
||||
if(this->stream == NULL)
|
||||
{
|
||||
fprintf(stderr, "ResultSet::getOneRecord(): no results now!\n");
|
||||
return NULL;
|
||||
}
|
||||
if(this->stream->isEnd())
|
||||
{
|
||||
fprintf(stderr, "ResultSet::getOneRecord(): read till end now!\n");
|
||||
return NULL;
|
||||
}
|
||||
//NOTICE:this is one record, and donot free the memory!
|
||||
//NOTICE:Bstr[] but only one element, used as Bstr*
|
||||
return this->stream->read();
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -1,53 +1,39 @@
|
|||
/*=============================================================================
|
||||
# Filename: ResultSet.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-24 21:57
|
||||
# Description: originally written by liyouhuan, modified by zengli
|
||||
=============================================================================*/
|
||||
/*
|
||||
* ResultSet.h
|
||||
*
|
||||
* Created on: 2014-6-20
|
||||
* Author: liyouhuan
|
||||
*/
|
||||
|
||||
#ifndef _QUERY_RESULTSET_H
|
||||
#define _QUERY_RESULTSET_H
|
||||
#ifndef RESULTSET_H_
|
||||
#define RESULTSET_H_
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Util/Bstr.h"
|
||||
#include "../Util/Stream.h"
|
||||
#include<string>
|
||||
#include<vector>
|
||||
#include "../Bstr/Bstr.h"
|
||||
using namespace std;
|
||||
|
||||
class ResultSet
|
||||
{
|
||||
private:
|
||||
#ifdef STREAM_ON
|
||||
Stream* stream;
|
||||
#endif
|
||||
class ResultSet{
|
||||
public:
|
||||
int select_var_num;
|
||||
int ansNum;
|
||||
std::string* var_name;
|
||||
int output_offset, output_limit;
|
||||
string* var_name;
|
||||
string** answer;
|
||||
|
||||
#ifndef STREAM_ON
|
||||
std::string** answer;
|
||||
#endif
|
||||
|
||||
ResultSet();
|
||||
~ResultSet();
|
||||
ResultSet(int _v_num, const std::string* _v_names);
|
||||
ResultSet(int _v_num, const string* _v_names);
|
||||
|
||||
//convert to binary string
|
||||
//Bstr* to_bstr();
|
||||
/* convert to binary string */
|
||||
Bstr* to_bstr();
|
||||
|
||||
//convert to usual string
|
||||
std::string to_str();
|
||||
void output(FILE* _fp); //output all results using Stream
|
||||
void setVar(const std::vector<std::string> & _var_names);
|
||||
/* convert to usual string */
|
||||
string to_str();
|
||||
|
||||
//operations on private stream from caller
|
||||
void openStream(std::vector<int> &_keys, std::vector<bool> &_desc, int _output_offset, int _output_limit);
|
||||
void resetStream();
|
||||
void writeToStream(std::string& _s);
|
||||
std::string readAllFromStream();
|
||||
const Bstr* getOneRecord();
|
||||
/* */
|
||||
void setVar(const std::vector<string> & _var_names);
|
||||
};
|
||||
|
||||
#endif //_QUERY_RESULTSET_H
|
||||
|
||||
#endif /* RESULTSET_H_ */
|
||||
|
|
|
@ -1,21 +1,14 @@
|
|||
/*=============================================================================
|
||||
# Filename: SPARQLquery.cpp
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-31 19:15
|
||||
# Description: implement functions in SPARQLquery.h
|
||||
=============================================================================*/
|
||||
/*
|
||||
* SPARQLquery.cpp
|
||||
*
|
||||
* Created on: 2014-6-20
|
||||
* Author: liyouhuan
|
||||
*/
|
||||
|
||||
#include "SPARQLquery.h"
|
||||
#include "../Parser/DBparser.h"
|
||||
#include "../Database/Database.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
SPARQLquery::SPARQLquery(const string& _query)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
SPARQLquery::SPARQLquery()
|
||||
{
|
||||
|
@ -24,167 +17,201 @@ SPARQLquery::SPARQLquery()
|
|||
|
||||
SPARQLquery::~SPARQLquery()
|
||||
{
|
||||
for(unsigned i = 0; i < this->query_union.size(); i ++)
|
||||
for(int i = 0; i < this->query_vec.size(); i ++)
|
||||
{
|
||||
delete this->query_union[i];
|
||||
delete this->query_vec[i];
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
SPARQLquery::addQueryVar(const string& _var)
|
||||
void SPARQLquery::addQueryVar(const string& _var)
|
||||
{
|
||||
query_var.push_back(_var);
|
||||
int n = (int)this->query_var_vec.size();
|
||||
std::vector<std::string>::iterator i = find(this->query_var_vec[n - 1].begin(), this->query_var_vec[n - 1].end(), _var);
|
||||
if (i == this->query_var_vec[n - 1].end()) this->query_var_vec[n - 1].push_back(_var);
|
||||
}
|
||||
|
||||
const int
|
||||
SPARQLquery::getQueryVarNum()
|
||||
void SPARQLquery::addQueryVarVec()
|
||||
{
|
||||
return query_var.size();
|
||||
this->query_var_vec.push_back(vector <string> ());
|
||||
}
|
||||
|
||||
const vector<string>&
|
||||
SPARQLquery::getQueryVar()const
|
||||
vector<string>& SPARQLquery::getQueryVarVec(int _var_vec_id)
|
||||
{
|
||||
return this->query_var;
|
||||
return this->query_var_vec[_var_vec_id];
|
||||
}
|
||||
|
||||
const string&
|
||||
SPARQLquery::getQueryVar(int _id)
|
||||
{
|
||||
return this->query_var.at(_id);
|
||||
void SPARQLquery::addTriple(const Triple& _triple){
|
||||
int last_i = query_vec.size()-1;
|
||||
query_vec[last_i]->addTriple(_triple);
|
||||
}
|
||||
|
||||
void
|
||||
SPARQLquery::addTriple(const Triple& _triple){
|
||||
int last_i = query_union.size()-1;
|
||||
query_union[last_i]->addTriple(_triple);
|
||||
const int SPARQLquery::getBasicQueryNum()
|
||||
{
|
||||
return this->query_vec.size();
|
||||
}
|
||||
|
||||
const int
|
||||
SPARQLquery::getBasicQueryNum()
|
||||
void SPARQLquery::encodeQuery(KVstore* _p_kv_store)
|
||||
{
|
||||
return this->query_union.size();
|
||||
}
|
||||
|
||||
void
|
||||
SPARQLquery::encodeQuery(KVstore* _p_kv_store)
|
||||
{
|
||||
for(unsigned i = 0; i < this->query_union.size(); i ++)
|
||||
for(int i = 0; i < this->query_vec.size(); i ++)
|
||||
{
|
||||
(this->query_union[i])->encodeBasicQuery(_p_kv_store, this->query_var);
|
||||
(this->query_vec[i])->encodeBasicQuery(_p_kv_store, this->query_var_vec[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
SPARQLquery::encodeQuery(KVstore* _p_kv_store, vector< vector<string> > sparql_query_varset)
|
||||
BasicQuery& SPARQLquery::getBasicQuery(int basic_query_id)
|
||||
{
|
||||
for(unsigned i = 0; i < this->query_union.size(); i ++)
|
||||
return *(query_vec[basic_query_id]);
|
||||
}
|
||||
|
||||
void SPARQLquery::addBasicQuery()
|
||||
{
|
||||
query_vec.push_back(new BasicQuery(""));
|
||||
}
|
||||
|
||||
void SPARQLquery::addBasicQuery(BasicQuery* _basic_q)
|
||||
{
|
||||
this->query_vec.push_back(_basic_q);
|
||||
}
|
||||
|
||||
vector<BasicQuery*>& SPARQLquery::getBasicQueryVec()
|
||||
{
|
||||
return this->query_vec;
|
||||
}
|
||||
|
||||
void SPARQLquery::print(ostream& _out_stream)
|
||||
{
|
||||
int n = getBasicQueryNum();
|
||||
std::cout << "Block " << n << " in total." << std::endl;
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
(this->query_union[i])->encodeBasicQuery(_p_kv_store, sparql_query_varset[i]);
|
||||
}
|
||||
}
|
||||
|
||||
BasicQuery&
|
||||
SPARQLquery::getBasicQuery(int basic_query_id)
|
||||
{
|
||||
return *(query_union[basic_query_id]);
|
||||
}
|
||||
|
||||
void
|
||||
SPARQLquery::addBasicQuery(){
|
||||
query_union.push_back(new BasicQuery(""));
|
||||
}
|
||||
|
||||
void
|
||||
SPARQLquery::addBasicQuery(BasicQuery* _basic_q)
|
||||
{
|
||||
this->query_union.push_back(_basic_q);
|
||||
}
|
||||
|
||||
vector<BasicQuery*>&
|
||||
SPARQLquery::getBasicQueryVec()
|
||||
{
|
||||
return this->query_union;
|
||||
}
|
||||
|
||||
void
|
||||
SPARQLquery::print(ostream& _out_stream){
|
||||
int k=getQueryVarNum();
|
||||
cout<<"QueryVar "<<k<<":"<<endl;
|
||||
for (int i=0;i<k;i++){
|
||||
cout<<getQueryVar(i)<<endl;
|
||||
}
|
||||
k=getBasicQueryNum();
|
||||
cout<<"Block "<<k<<" in total."<<endl;
|
||||
for (int i=0;i<k;i++){
|
||||
cout<<"Block "<<i<<endl;
|
||||
std::cout<<"Block "<< i << std::endl;
|
||||
int m = this->query_var_vec[i].size();
|
||||
std::cout << "QueryVar "<< m <<":"<< std::endl;
|
||||
for (int j = 0; j < m; j++)
|
||||
std::cout << this->query_var_vec[i][j] << " ";
|
||||
std::cout << std::endl;
|
||||
getBasicQuery(i).print(_out_stream);
|
||||
}
|
||||
}
|
||||
|
||||
string
|
||||
SPARQLquery::triple_str()
|
||||
std::string SPARQLquery::triple_str()
|
||||
{
|
||||
stringstream _ss;
|
||||
std::stringstream _ss;
|
||||
|
||||
_ss << "varNum:" << this->query_var.size() << endl;
|
||||
for(unsigned i = 0; i < this->query_var.size(); i ++)
|
||||
for(int i = 0; i < this->query_vec.size(); i ++)
|
||||
{
|
||||
_ss << this->query_var[i] << "\t";
|
||||
}
|
||||
_ss << endl;
|
||||
|
||||
for(unsigned i = 0; i < this->query_union.size(); i ++)
|
||||
{
|
||||
_ss << "bq" << i << " :" << this->query_union[i]->triple_str() << endl;
|
||||
_ss << "varVec" << i << "varNum:" << this->query_var_vec[i].size() << endl;
|
||||
for (int j = 0; j < this->query_var_vec[i].size(); j++)
|
||||
_ss << this->query_var_vec[i][j] << "\t";
|
||||
_ss << endl;
|
||||
_ss << "bq" << i << " :" << this->query_vec[i]->triple_str() << endl;
|
||||
}
|
||||
|
||||
return _ss.str();
|
||||
}
|
||||
|
||||
string
|
||||
SPARQLquery::candidate_str()
|
||||
std::string SPARQLquery::candidate_str()
|
||||
{
|
||||
stringstream _ss;
|
||||
std::stringstream _ss;
|
||||
|
||||
for(unsigned i = 0; i < this->query_union.size(); i ++)
|
||||
for(int i = 0; i < this->query_vec.size(); i ++)
|
||||
{
|
||||
_ss << "bq" << i << " :" << this->query_union[i]->candidate_str() << endl;
|
||||
_ss << "bq" << i << " :" << this->query_vec[i]->candidate_str() << endl;
|
||||
}
|
||||
|
||||
return _ss.str();
|
||||
}
|
||||
|
||||
string
|
||||
SPARQLquery::result_str()
|
||||
std::string SPARQLquery::result_str()
|
||||
{
|
||||
stringstream _ss;
|
||||
std::stringstream _ss;
|
||||
|
||||
for(unsigned i = 0; i < this->query_union.size(); i ++)
|
||||
for(int i = 0; i < this->query_vec.size(); i ++)
|
||||
{
|
||||
_ss << "bq" << i << " :" << this->query_union[i]->result_str() << endl;
|
||||
_ss << "bq" << i << " :" << this->query_vec[i]->result_str() << endl;
|
||||
}
|
||||
|
||||
return _ss.str();
|
||||
}
|
||||
|
||||
string
|
||||
SPARQLquery::to_str()
|
||||
std::string SPARQLquery::to_str()
|
||||
{
|
||||
stringstream _ss;
|
||||
std::stringstream _ss;
|
||||
|
||||
_ss << "varNum:" << this->query_var.size() << endl;
|
||||
for(unsigned i = 0; i < this->query_var.size(); i ++)
|
||||
for(int i = 0; i < this->query_vec.size(); i ++)
|
||||
{
|
||||
_ss << this->query_var[i] << "\t";
|
||||
}
|
||||
_ss << endl;
|
||||
|
||||
for(unsigned i = 0; i < this->query_union.size(); i ++)
|
||||
{
|
||||
_ss << "bq" << i << " :\n" << this->query_union[i]->to_str() << endl;
|
||||
_ss << "varVec" << i << "varNum:" << this->query_var_vec[i].size() << endl;
|
||||
for (int j = 0; j < this->query_var_vec[i].size(); j++)
|
||||
_ss << this->query_var_vec[i][j] << "\t";
|
||||
_ss << endl;
|
||||
_ss << "bq" << i << " :\n" << this->query_vec[i]->to_str() << endl;
|
||||
}
|
||||
|
||||
return _ss.str();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
void SPARQLquery::addOneProjection(std::string _projection)
|
||||
{
|
||||
this->projections.push_back(_projection);
|
||||
}
|
||||
|
||||
int SPARQLquery::getProjectionsNum()
|
||||
{
|
||||
return this->projections.size();
|
||||
}
|
||||
|
||||
std::vector<std::string>& SPARQLquery::getProjections()
|
||||
{
|
||||
return this->projections;
|
||||
}
|
||||
|
||||
SPARQLquery::PatternGroup& SPARQLquery::getPatternGroup()
|
||||
{
|
||||
return this->patterngroup;
|
||||
}
|
||||
|
||||
void SPARQLquery::PatternGroup::addOnePattern(Pattern _pattern)
|
||||
{
|
||||
if (_pattern.subject.value[0] == '?' || _pattern.object.value[0] == '?') this->hasVar = true;
|
||||
this->patterns.push_back(_pattern);
|
||||
}
|
||||
|
||||
void SPARQLquery::PatternGroup::addOneFilterTree()
|
||||
{
|
||||
this->filters.push_back(FilterTree());
|
||||
}
|
||||
|
||||
SPARQLquery::FilterTree& SPARQLquery::PatternGroup::getLastFilterTree()
|
||||
{
|
||||
return this->filters[(int)(this->filters.size()) - 1];
|
||||
}
|
||||
|
||||
void SPARQLquery::PatternGroup::addOneOptional()
|
||||
{
|
||||
this->optionals.push_back(PatternGroup());
|
||||
}
|
||||
|
||||
SPARQLquery::PatternGroup& SPARQLquery::PatternGroup::getLastOptional()
|
||||
{
|
||||
return this->optionals[(int)(this->optionals.size()) - 1];
|
||||
}
|
||||
|
||||
void SPARQLquery::PatternGroup::addOneGroupUnion()
|
||||
{
|
||||
this->unions.push_back(std::vector<PatternGroup>());
|
||||
}
|
||||
|
||||
void SPARQLquery::PatternGroup::addOneUnion()
|
||||
{
|
||||
int n = this->unions.size();
|
||||
this->unions[n - 1].push_back(PatternGroup());
|
||||
}
|
||||
|
||||
SPARQLquery::PatternGroup& SPARQLquery::PatternGroup::getLastUnion()
|
||||
{
|
||||
int n = this->unions.size();
|
||||
int m = this->unions[n - 1].size();
|
||||
return this->unions[n - 1][m - 1];
|
||||
}
|
||||
|
|
|
@ -1,29 +1,31 @@
|
|||
/*=============================================================================
|
||||
# Filename: SPARQLquery.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-31 19:13
|
||||
# Description: originally written by liyouhuan, modified by chenjiaqi and zengli
|
||||
=============================================================================*/
|
||||
/*
|
||||
* SPARQLquery.h
|
||||
*
|
||||
* Created on: 2014-6-20
|
||||
* Author: liyouhuan
|
||||
*/
|
||||
|
||||
#ifndef _QUERY_SPARQLQUERY_H
|
||||
#define _QUERY_SPARQLQUERY_H
|
||||
#ifndef SPARQLQUERY_H_
|
||||
#define SPARQLQUERY_H_
|
||||
|
||||
#include "BasicQuery.h"
|
||||
|
||||
class SPARQLquery
|
||||
{
|
||||
|
||||
class SPARQLquery{
|
||||
private:
|
||||
vector<BasicQuery*> query_union;
|
||||
vector<string> query_var;
|
||||
vector<BasicQuery*> query_vec;
|
||||
vector < vector<string> >query_var_vec;
|
||||
public:
|
||||
SPARQLquery(const string& _query);
|
||||
|
||||
SPARQLquery();
|
||||
~SPARQLquery();
|
||||
|
||||
void addQueryVar(const string& _var);
|
||||
|
||||
void addQueryVarVec();
|
||||
|
||||
vector<string>& getQueryVarVec(int _var_vec_id);
|
||||
|
||||
void addTriple(const Triple& _triple);
|
||||
|
||||
void addBasicQuery(BasicQuery* _basic_q);
|
||||
|
@ -34,23 +36,133 @@ public:
|
|||
|
||||
BasicQuery& getBasicQuery(int _basic_query_id);
|
||||
|
||||
const int getQueryVarNum();
|
||||
|
||||
const vector<string>& getQueryVar()const;
|
||||
|
||||
const string& getQueryVar(int _id);
|
||||
|
||||
void encodeQuery(KVstore* _p_kv_store);
|
||||
void encodeQuery(KVstore* _p_kv_store, vector< vector<string> > sparql_query_varset);
|
||||
|
||||
vector<BasicQuery*>& getBasicQueryVec();
|
||||
|
||||
|
||||
void print(ostream& _out_stream);
|
||||
std::string triple_str();
|
||||
std::string candidate_str();
|
||||
std::string result_str();
|
||||
std::string to_str();
|
||||
|
||||
|
||||
|
||||
public:
|
||||
struct Element
|
||||
{
|
||||
/*
|
||||
enum Type { Variable, Literal, IRI };
|
||||
enum SubType { None, CustomLanguage, CustomType };
|
||||
Type type;
|
||||
SubType subType;
|
||||
std::string subTypeValue;
|
||||
*/
|
||||
std::string value;
|
||||
Element(const std::string& _value):value(_value){}
|
||||
};
|
||||
|
||||
struct Pattern
|
||||
{
|
||||
Element subject,predicate,object;
|
||||
Pattern(const Element &_subject, const Element &_predicate,const Element &_object):subject(_subject), predicate(_predicate), object(_object){}
|
||||
};
|
||||
|
||||
struct FilterTree
|
||||
{
|
||||
enum FilterTree_Type
|
||||
{
|
||||
Or, And, Equal, NotEqual, Less, LessOrEqual, Greater, GreaterOrEqual, Plus, Minus, Mul, Div,
|
||||
Not, UnaryPlus, UnaryMinus, Literal, Variable, IRI, Function, ArgumentList,
|
||||
Builtin_str, Builtin_lang, Builtin_langmatches, Builtin_datatype, Builtin_bound, Builtin_sameterm,
|
||||
Builtin_isiri, Builtin_isblank, Builtin_isliteral, Builtin_regex, Builtin_in
|
||||
};
|
||||
|
||||
FilterTree_Type type;
|
||||
FilterTree* parg1,*parg2;
|
||||
std::string arg1, arg2;
|
||||
//std::string arg1Type, arg2Type;
|
||||
explicit FilterTree():parg1(NULL), parg2(NULL){}
|
||||
~FilterTree()
|
||||
{
|
||||
if (parg1 != NULL) delete parg1;
|
||||
if (parg2 != NULL) delete parg2;
|
||||
}
|
||||
};
|
||||
|
||||
class PatternGroup
|
||||
{
|
||||
public:
|
||||
std::vector<Pattern> patterns;
|
||||
std::vector<FilterTree> filters;
|
||||
std::vector<PatternGroup> optionals;
|
||||
std::vector<std::vector<PatternGroup> > unions;
|
||||
bool hasVar;
|
||||
|
||||
public:
|
||||
PatternGroup():hasVar(false){}
|
||||
void addOnePattern(Pattern _pattern);
|
||||
void addOneFilterTree();
|
||||
FilterTree& getLastFilterTree();
|
||||
void addOneOptional();
|
||||
PatternGroup& getLastOptional();
|
||||
void addOneGroupUnion();
|
||||
void addOneUnion();
|
||||
PatternGroup& getLastUnion();
|
||||
};
|
||||
|
||||
class TempResult
|
||||
{
|
||||
public:
|
||||
std::vector<std::string> var;
|
||||
std::vector< std::vector<int> > res;
|
||||
TempResult()
|
||||
{}
|
||||
TempResult(const TempResult &t):var(t.var), res(t.res)
|
||||
{}
|
||||
};
|
||||
|
||||
class EvaPlanEle
|
||||
{
|
||||
private:
|
||||
char type;
|
||||
void * p;
|
||||
public:
|
||||
EvaPlanEle(char _type, void *_p = NULL):type(_type), p(_p){}
|
||||
char getType()
|
||||
{ return type; }
|
||||
void * getPointer()
|
||||
{ return p; }
|
||||
};
|
||||
std::vector<EvaPlanEle> evaPlan;
|
||||
std::stack<TempResult *> evaStack;
|
||||
|
||||
/*
|
||||
enum ProjectionModifier { Modifier_None, Modifier_Distinct, Modifier_Reduced, Modifier_Count, Modifier_Duplicates };
|
||||
struct Order
|
||||
{
|
||||
unsigned id;
|
||||
bool descending;
|
||||
};
|
||||
*/
|
||||
|
||||
private:
|
||||
std::vector<std::string> projections;
|
||||
PatternGroup patterngroup;
|
||||
|
||||
/*
|
||||
ProjectionModifier projectionModifier;
|
||||
std::vector<Order> order;
|
||||
unsigned limit;
|
||||
*/
|
||||
|
||||
public:
|
||||
void addOneProjection(std::string _projection);
|
||||
int getProjectionsNum();
|
||||
std::vector<std::string>& getProjections();
|
||||
PatternGroup& getPatternGroup();
|
||||
};
|
||||
|
||||
#endif //_QUERY_SPARQLQUERY_H
|
||||
|
||||
#endif /* SPARQLQUERY_H_ */
|
||||
|
|
109
Query/Varset.cpp
109
Query/Varset.cpp
|
@ -1,109 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: Varset.cpp
|
||||
# Author: Jiaqi, Chen
|
||||
# Mail: chenjiaqi93@163.com
|
||||
# Last Modified: 2016-03-02 20:35
|
||||
# Description: implement functions in Varset.h
|
||||
=============================================================================*/
|
||||
|
||||
#include "Varset.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Varset::Varset(string & _var)
|
||||
{
|
||||
addVar(_var);
|
||||
}
|
||||
|
||||
Varset::Varset(vector<string> & _varset)
|
||||
{
|
||||
for (int i = 0; i < (int)_varset.size(); i++)
|
||||
addVar(_varset[i]);
|
||||
}
|
||||
|
||||
bool Varset::findVar(string& _var)
|
||||
{
|
||||
if ((int)this->varset.size() == 0) return false;
|
||||
vector<string>::iterator i = find(this->varset.begin(), this->varset.end(), _var);
|
||||
return (i != this->varset.end());
|
||||
}
|
||||
|
||||
void Varset::addVar(string& _var)
|
||||
{
|
||||
if (!this->findVar(_var))
|
||||
this->varset.push_back(_var);
|
||||
}
|
||||
|
||||
Varset Varset::operator + (Varset& x)
|
||||
{
|
||||
Varset r;
|
||||
for (int i = 0; i < (int)this->varset.size(); i++)
|
||||
r.addVar(this->varset[i]);
|
||||
for (int i = 0; i < (int)x.varset.size(); i++)
|
||||
r.addVar(x.varset[i]);
|
||||
return r;
|
||||
};
|
||||
|
||||
Varset Varset::operator * (Varset& x)
|
||||
{
|
||||
Varset r;
|
||||
for (int i = 0; i < (int)this->varset.size(); i++)
|
||||
if (x.findVar(this->varset[i]))
|
||||
r.addVar(this->varset[i]);
|
||||
return r;
|
||||
}
|
||||
|
||||
Varset Varset::operator - (Varset& x)
|
||||
{
|
||||
Varset r;
|
||||
for (int i = 0; i < (int)this->varset.size(); i++)
|
||||
if (!x.findVar(this->varset[i]))
|
||||
r.addVar(this->varset[i]);
|
||||
return r;
|
||||
}
|
||||
|
||||
bool Varset::operator ==(Varset &x)
|
||||
{
|
||||
if ((int)this->varset.size() != (int)x.varset.size()) return false;
|
||||
for (int i = 0; i < (int)this->varset.size(); i++)
|
||||
if (!x.findVar(this->varset[i])) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Varset::hasCommonVar(Varset &x)
|
||||
{
|
||||
for (int i = 0; i < (int)this->varset.size(); i++)
|
||||
if (x.findVar(this->varset[i])) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Varset::belongTo(Varset &x)
|
||||
{
|
||||
for (int i = 0; i < (int)this->varset.size(); i++)
|
||||
if (!x.findVar(this->varset[i])) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
vector <int> Varset::mapTo(Varset& x)
|
||||
{
|
||||
vector<int> r;
|
||||
for (int i = 0; i < (int)this->varset.size(); i++)
|
||||
{
|
||||
r.push_back(-1);
|
||||
for (int j = 0; j < (int)x.varset.size(); j++)
|
||||
if (this->varset[i] == x.varset[j])
|
||||
r[i] = j;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
void Varset::print()
|
||||
{
|
||||
printf("Varset: ");
|
||||
for (int i = 0; i < (int)this->varset.size(); i++)
|
||||
{
|
||||
printf("%s ", this->varset[i].c_str());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
|
@ -1,39 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: Varset.h
|
||||
# Author: Jiaqi, Chen
|
||||
# Mail: chenjiaqi93@163.com
|
||||
# Last Modified: 2016-03-02 20:35
|
||||
# Description:
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _QUERY_VARSET_H
|
||||
#define _QUERY_VARSET_H
|
||||
|
||||
#include "../Util/Util.h"
|
||||
|
||||
class Varset
|
||||
{
|
||||
public:
|
||||
std::vector <std::string> varset;
|
||||
Varset(){};
|
||||
Varset(std::string & _var);
|
||||
Varset(std::vector<std::string> & _varset);
|
||||
|
||||
public:
|
||||
bool findVar(std::string& _var);
|
||||
void addVar(std::string& _var);
|
||||
|
||||
Varset operator + (Varset& x);
|
||||
Varset operator * (Varset& x);
|
||||
Varset operator - (Varset& x);
|
||||
bool operator ==(Varset &x);
|
||||
bool hasCommonVar(Varset &x);
|
||||
bool belongTo(Varset &x);
|
||||
|
||||
std::vector <int> mapTo(Varset& x);
|
||||
|
||||
void print();
|
||||
};
|
||||
|
||||
#endif // _QUERY_VARSET_H
|
||||
|
68
README.md
68
README.md
|
@ -1,68 +0,0 @@
|
|||
# Gstore System
|
||||
|
||||
Gstore System(also called gStore) is a graph database engine for managing large graph-structured data, which is open-source and targets at Linux operation systems. The whole project is written in C++, with the help of some libraries such as readline, antlr, and so on. Only source tarballs are provided currently, which means you have to compile the source code if you want to use our system.
|
||||
|
||||
**The formal help document is in [Handbook](docs/latex/gStore_help.pdf).**
|
||||
|
||||
**You can write your information in [survey](http://59.108.48.38/survey) if you like.**
|
||||
|
||||
## Getting Started
|
||||
|
||||
This system is really user-friendly and you can pick it up in several minutes. Remember to check your platform where you want to run this system by viewing [System Requirements](docs/DEMAND.md). After all are verified, please get this project's source code. There are several ways to do this:
|
||||
|
||||
- download the zip from this repository and extract it
|
||||
|
||||
- fork this repository in your github account
|
||||
|
||||
- type `git clone git@github.com:Caesar11/gStore.git` in your terminal or use git GUI to acquire it
|
||||
|
||||
Then you need to compile the project, just type `make` in the gStore root directory, and all executables will be ok. To run gStore, please type `bin/gload database_name dataset_path` to build a database named by yourself. And you can use `bin/gquery database_name` command to query a existing database. What is more, `bin/gconsole` is a wonderful tool designed for you, providing all operations you need to use gStore. Notice that all commands should be typed in the root directory of gStore, and your database name should not end with ".db".
|
||||
|
||||
- - -
|
||||
|
||||
## Advanced Help
|
||||
|
||||
If you want to understand the details of the gStore system, or you want to try some advanced operations(for example, using the API, server/client), please see the chapters below.
|
||||
|
||||
- [Basic Introduction](docs/INTRO.md): introduce the theory and features of gStore
|
||||
|
||||
- [Install Guide](docs/INSTALL.md): instructions on how to install this system
|
||||
|
||||
- [How To Use](docs/USAGE.md): detailed information about using the gStore system
|
||||
|
||||
- [API Explanation](docs/API.md): guide you to develop applications based on our API
|
||||
|
||||
- [Project Structure](docs/STRUCT.md): show the whole structure and sequence of this project
|
||||
|
||||
- [Related Essays](docs/ESSAY.md): contain essays and publications related with gStore
|
||||
|
||||
- [Update Logs](docs/CHANGELOG.md): keep the logs of the system updates
|
||||
|
||||
- [Test Results](docs/TEST.md): present the test results of a series of experiments
|
||||
|
||||
- - -
|
||||
|
||||
## Other Business
|
||||
|
||||
We have written a series of short essays addressing recurring challenges in using gStore to realize applications, which are placed in [Recipe Book](docs/TIPS.md).
|
||||
|
||||
You are welcome to report any advice or errors in the github Issues part of this repository, if not requiring in-time reply. However, if you want to urgent on us to deal with your reports, please email to <chenjiaqi93@163.com> to submit your suggestions and report bugs to us by emailing to <zengli-bookug@pku.edu.cn>. A full list of our whole team is in [Mailing List](docs/MAIL.md).
|
||||
|
||||
There are some restrictions when you use the current gStore project, you can see them on [Limit Description](docs/LIMIT.md).
|
||||
|
||||
Sometimes you may find some strange phenomena(but not wrong case), or something hard to understand/solve(don't know how to do next), then do not hesitate to visit the [Frequently Asked Questions](docs/FAQ.md) page.
|
||||
|
||||
Graph database engine is a new area and we are still trying to go further. Things we plan to do next is in [Future Plan](docs/PLAN.md) chapter, and we hope more and more people will support or even join us. You can support in many ways:
|
||||
|
||||
- watch/star our project
|
||||
|
||||
- fork this repository and submit pull requests to us
|
||||
|
||||
- download and use this system, report bugs or suggestions
|
||||
|
||||
- ...
|
||||
|
||||
People who inspire us or contribute to this project will be listed in the [Thanks List](docs/THANK.md) chapter.
|
||||
|
||||
This whole document is divided into different pieces, and each them is stored in a markdown file. You can see/download the combined markdown file in [help_markdown](docs/gStore_help.md), and for html file, please go to [help_html](docs/gStore_help.html). What is more, we also provide help file in pdf format, and you can visit it in [help_pdf](docs/latex/gStore_help.pdf).
|
||||
|
|
@ -1,31 +1,29 @@
|
|||
/*=============================================================================
|
||||
# Filename: Client.cpp
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2016-02-23 13:39
|
||||
# Description: implement functions in Client.h
|
||||
=============================================================================*/
|
||||
/*
|
||||
* Client.cpp
|
||||
*
|
||||
* Created on: 2014-10-19
|
||||
* Author: hanshuo
|
||||
*/
|
||||
|
||||
#include"Client.h"
|
||||
|
||||
using namespace std;
|
||||
#include<iostream>
|
||||
|
||||
Client::Client()
|
||||
{
|
||||
this->ip = Socket::DEFAULT_SERVER_IP;
|
||||
this->port = Socket::DEFAULT_CONNECT_PORT;
|
||||
this->ip = Socket::DEFAULT_SERVER_IP;
|
||||
this->port = Socket::DEFAULT_CONNECT_PORT;
|
||||
}
|
||||
|
||||
Client::Client(string _ip, unsigned short _port)
|
||||
Client::Client(std::string _ip, unsigned short _port)
|
||||
{
|
||||
this->ip = _ip;
|
||||
this->port = _port;
|
||||
this->ip = _ip;
|
||||
this->port = _port;
|
||||
}
|
||||
|
||||
Client::Client(unsigned short _port)
|
||||
{
|
||||
this->ip = Socket::DEFAULT_SERVER_IP;
|
||||
this->port = _port;
|
||||
this->ip = Socket::DEFAULT_SERVER_IP;
|
||||
this->port = _port;
|
||||
}
|
||||
|
||||
Client::~Client()
|
||||
|
@ -34,295 +32,96 @@ Client::~Client()
|
|||
|
||||
bool Client::connectServer()
|
||||
{
|
||||
bool flag = this->socket.create();
|
||||
if (!flag)
|
||||
{
|
||||
cerr << "cannot create socket. @Client::connectServer" << endl;
|
||||
return false;
|
||||
}
|
||||
bool flag = this->socket.create();
|
||||
if (!flag)
|
||||
{
|
||||
std::cerr << "cannot create socket. @Client::connectServer" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
flag = this->socket.connect(this->ip, this->port);
|
||||
flag = this->socket.connect(this->ip, this->port);
|
||||
|
||||
if (!flag)
|
||||
{
|
||||
cerr << "cannot connect to server. @Client::connectServer" << endl;
|
||||
return false;
|
||||
}
|
||||
if (!flag)
|
||||
{
|
||||
std::cerr << "cannot connect to server. @Client::connectServer" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Client::disconnectServer()
|
||||
{
|
||||
bool flag = this->socket.close();
|
||||
bool flag = this->socket.close();
|
||||
|
||||
return flag;
|
||||
return flag;
|
||||
}
|
||||
|
||||
bool Client::send(const string& _msg)
|
||||
bool Client::send(const std::string& _msg)
|
||||
{
|
||||
bool flag = this->socket.send(_msg);
|
||||
bool flag = this->socket.send(_msg);
|
||||
|
||||
return flag;
|
||||
return flag;
|
||||
}
|
||||
|
||||
bool Client::receiveResponse(string& _recv_msg)
|
||||
bool Client::receiveResponse(std::string& _recv_msg)
|
||||
{
|
||||
bool flag = this->socket.recv(_recv_msg);
|
||||
bool flag = this->socket.recv(_recv_msg);
|
||||
|
||||
return flag;
|
||||
return flag;
|
||||
}
|
||||
|
||||
void Client::run()
|
||||
{
|
||||
string cmd;
|
||||
while (true)
|
||||
{
|
||||
std::string cmd;
|
||||
|
||||
#ifdef READLINE_ON
|
||||
char *buf, prompt[] = "gsql>";
|
||||
//printf("Type `help` for information of all commands\n");
|
||||
//printf("Type `help command_t` for detail of command_t\n");
|
||||
rl_bind_key('\t', rl_complete);
|
||||
//QUERY: should add ';'?
|
||||
while(true)
|
||||
{
|
||||
//BETTER:write in multi lines as in below comments
|
||||
buf = readline(prompt);
|
||||
if(buf == NULL)
|
||||
continue;
|
||||
else
|
||||
add_history(buf);
|
||||
if(strncmp(buf, "help", 4) == 0)
|
||||
{
|
||||
if(strcmp(buf, "help") == 0)
|
||||
{
|
||||
//print commands message
|
||||
printf("help - print commands message\n");
|
||||
printf("quit - quit the console normally\n");
|
||||
printf("import - build a database for a given dataset\n");
|
||||
printf("load - load an existen database\n");
|
||||
printf("unload - unload an existen database\n");
|
||||
printf("sparql - load query from the second argument\n");
|
||||
printf("show - show the current database's name\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
//TODO: help for a given command
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else if(strcmp(buf, "quit") == 0)
|
||||
break;
|
||||
else if(strncmp(buf, "import", 6) != 0 && strncmp(buf, "load", 4) != 0 && strncmp(buf, "unload", 6) != 0 && strncmp(buf, "sparql", 6) != 0 && strncmp(buf, "show", 4) != 0)
|
||||
{
|
||||
printf("unknown commands\n");
|
||||
continue;
|
||||
}
|
||||
while (true)
|
||||
{
|
||||
std::cout << "->";
|
||||
std::string line;
|
||||
std::getline(std::cin, line);
|
||||
int line_len = line.size();
|
||||
if (line_len >0 && line[line_len-1] == ';')
|
||||
{
|
||||
line.resize(line_len - 1);
|
||||
cmd += line;
|
||||
break;
|
||||
}
|
||||
cmd += line + "\n";
|
||||
}
|
||||
|
||||
string query_file;
|
||||
string query;
|
||||
FILE* fp = stdout; ///default to output on screen
|
||||
bool ifredirect = false;
|
||||
//BETTER:build a parser for this console
|
||||
//spaces/tabs can be before commands
|
||||
// std::cout << "input end" << std::endl;
|
||||
|
||||
if(strncmp(buf, "sparql", 6) == 0)
|
||||
{
|
||||
//NOTICE: if using query string, '>' is ok to exist!
|
||||
char* rp = buf;
|
||||
int pos = strlen(buf) - 1;
|
||||
while(pos > -1)
|
||||
{
|
||||
if(*(rp+pos) == '"')
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if(*(rp+pos) == '>')
|
||||
{
|
||||
ifredirect = true;
|
||||
break;
|
||||
}
|
||||
pos--;
|
||||
}
|
||||
rp += pos;
|
||||
//DEBUG:redirect sometimes not work for path query
|
||||
if (cmd == "exit")
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
char* p = buf + strlen(buf) - 1;
|
||||
if(ifredirect)
|
||||
{
|
||||
printf("redirected!\n");
|
||||
char* tp = p;
|
||||
while(*tp == ' ' || *tp == '\t')
|
||||
tp--;
|
||||
*(tp+1) = '\0';
|
||||
tp = rp + 1;
|
||||
while(*tp == ' ' || *tp == '\t')
|
||||
tp++;
|
||||
printf("redirect: %s\n", tp);
|
||||
fp = fopen(tp, "w"); //NOTICE:not judge here!
|
||||
p = rp - 1; //NOTICE: all separated with ' ' or '\t'
|
||||
}
|
||||
while(*p == ' ' || *p == '\t' || *p == '"') //set the end of path
|
||||
p--;
|
||||
*(p+1) = '\0';
|
||||
p = buf + 6;
|
||||
while(*p == ' ' || *p == '\t') //acquire the start of path
|
||||
p++;
|
||||
bool flag = this->connectServer();
|
||||
if (!flag)
|
||||
{
|
||||
std::cerr << "connect server error. @Client::run" << std::endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
bool isPath = true;
|
||||
if(*p == '"')
|
||||
{
|
||||
isPath = false;
|
||||
p++;
|
||||
}
|
||||
flag = this->send(cmd);
|
||||
if (!flag)
|
||||
{
|
||||
std::cerr << "sent message error. @Client::run" << std::endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
char* q;
|
||||
if(isPath)
|
||||
{
|
||||
//TODO: support the soft links(or hard links)
|
||||
//there are also readlink and getcwd functions for help
|
||||
//http://linux.die.net/man/2/readlink
|
||||
//NOTICE:getcwd and realpath cannot acquire the real path of file
|
||||
//in the same directory and the program is executing when the
|
||||
//system starts running
|
||||
//NOTICE: use realpath(p, NULL) is ok, but need to free the memory
|
||||
q = realpath(p, NULL); //QUERY:still not work for soft links
|
||||
#ifdef DEBUG_PRECISE
|
||||
printf("%s\n", p);
|
||||
#endif
|
||||
if(q == NULL)
|
||||
{
|
||||
printf("invalid path!\n");
|
||||
free(q);
|
||||
free(buf);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
printf("%s\n", q);
|
||||
//query = getQueryFromFile(p);
|
||||
query = Util::getQueryFromFile(q);
|
||||
}
|
||||
else
|
||||
{
|
||||
//BETTER:check query in ""
|
||||
query = string(p);
|
||||
}
|
||||
std::string recv_msg;
|
||||
flag = this->receiveResponse(recv_msg);
|
||||
std::cout << recv_msg << std::endl;
|
||||
|
||||
if(query.empty())
|
||||
{
|
||||
if(isPath)
|
||||
free(q);
|
||||
//free(resolved_path);
|
||||
free(buf);
|
||||
if(ifredirect)
|
||||
fclose(fp);
|
||||
continue;
|
||||
}
|
||||
|
||||
printf("query is:\n%s\n\n", query.c_str());
|
||||
if(isPath)
|
||||
free(q);
|
||||
cmd = string("query ") + query;
|
||||
}
|
||||
else if(strncmp(buf, "show", 4) == 0)
|
||||
{
|
||||
cmd = string("show databases");
|
||||
}
|
||||
else
|
||||
{
|
||||
cmd = string(buf);
|
||||
}
|
||||
//DEBUG!
|
||||
printf("%s\n", cmd.c_str());
|
||||
|
||||
free(buf);
|
||||
//free(resolved_path);
|
||||
#ifdef DEBUG_PRECISE
|
||||
printf("after buf freed!\n");
|
||||
#endif
|
||||
|
||||
//interacte with server
|
||||
bool flag = this->connectServer();
|
||||
if(!flag)
|
||||
{
|
||||
cerr << "connect server error. @Client::run" << endl;
|
||||
if(ifredirect)
|
||||
fclose(fp);
|
||||
continue;
|
||||
}
|
||||
|
||||
flag = this->send(cmd);
|
||||
if(!flag)
|
||||
{
|
||||
cerr << "sent message error. @Client::run" << endl;
|
||||
if(ifredirect)
|
||||
fclose(fp);
|
||||
continue;
|
||||
}
|
||||
|
||||
string recv_msg;
|
||||
flag = this->receiveResponse(recv_msg);
|
||||
fprintf(fp, "%s\n", recv_msg.c_str());
|
||||
|
||||
this->disconnectServer();
|
||||
if(!flag)
|
||||
{
|
||||
cerr << "disconnect server error. @Client::run" << endl;
|
||||
if(ifredirect)
|
||||
fclose(fp);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
//#else
|
||||
//while (true)
|
||||
//{
|
||||
// while(true)
|
||||
// {
|
||||
// //BETTER:readline and parser
|
||||
// cout << "->";
|
||||
// string line;
|
||||
// getline(cin, line);
|
||||
// int line_len = line.size();
|
||||
// if (line_len >0 && line[line_len-1] == ';')
|
||||
// {
|
||||
// line.resize(line_len - 1);
|
||||
// cmd += line;
|
||||
// break;
|
||||
// }
|
||||
// cmd += line + "\n";
|
||||
// }
|
||||
//
|
||||
// //cout << "input end" << endl;
|
||||
//
|
||||
// if(cmd == "quit")
|
||||
// {
|
||||
// break;
|
||||
// }
|
||||
//
|
||||
// bool flag = this->connectServer();
|
||||
// if (!flag)
|
||||
// {
|
||||
// cerr << "connect server error. @Client::run" << endl;
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
// flag = this->send(cmd);
|
||||
// if (!flag)
|
||||
// {
|
||||
// cerr << "sent message error. @Client::run" << endl;
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
// string recv_msg;
|
||||
// flag = this->receiveResponse(recv_msg);
|
||||
// cout << recv_msg << endl;
|
||||
//
|
||||
// this->disconnectServer();
|
||||
// if (!flag)
|
||||
// {
|
||||
// cerr << "disconnect server error. @Client::run" << endl;
|
||||
// continue;
|
||||
// }
|
||||
//}
|
||||
#endif
|
||||
this->disconnectServer();
|
||||
if (!flag)
|
||||
{
|
||||
std::cerr << "disconnect server error. @Client::run" << std::endl;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,16 +1,15 @@
|
|||
/*=============================================================================
|
||||
# Filename: Client.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2016-02-23 13:40
|
||||
# Description: originally written by hanshuo, modified by zengli
|
||||
=============================================================================*/
|
||||
/*
|
||||
* Client.h
|
||||
*
|
||||
* Created on: 2014-10-18
|
||||
* Author: hanshuo
|
||||
*/
|
||||
|
||||
#ifndef _SERVER_CLIENT_H
|
||||
#define _SERVER_CLIENT_H
|
||||
#ifndef CLIENT_H_
|
||||
#define CLIENT_H_
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "Socket.h"
|
||||
#include"Socket.h"
|
||||
#include<string>
|
||||
|
||||
class Client
|
||||
{
|
||||
|
@ -34,5 +33,6 @@ private:
|
|||
int random_key;
|
||||
};
|
||||
|
||||
#endif // _SERVER_CLIENT_H
|
||||
|
||||
|
||||
#endif /* CLIENT_H_ */
|
||||
|
|
|
@ -5,7 +5,8 @@
|
|||
* Author: hanshuo
|
||||
*/
|
||||
|
||||
#include "Operation.h"
|
||||
#include"Operation.h"
|
||||
#include<iostream>
|
||||
|
||||
Operation::Operation()
|
||||
{
|
||||
|
@ -33,13 +34,13 @@ Operation::~Operation()
|
|||
|
||||
Bstr Operation::encrypt()
|
||||
{
|
||||
//TODO
|
||||
// to be implemented...
|
||||
return Bstr(NULL, 0);
|
||||
}
|
||||
|
||||
Bstr Operation::deencrypt()
|
||||
{
|
||||
//TODO
|
||||
// to be implemented...
|
||||
return Bstr(NULL, 0);
|
||||
}
|
||||
|
||||
|
@ -50,7 +51,7 @@ CommandType Operation::getCommand()
|
|||
|
||||
std::string Operation::getParameter(int _idx)
|
||||
{
|
||||
if ((unsigned)_idx < this->parameters.size())
|
||||
if (_idx < this->parameters.size())
|
||||
{
|
||||
return this->parameters[_idx];
|
||||
}
|
||||
|
|
|
@ -8,12 +8,11 @@
|
|||
#ifndef OPERATION_H_
|
||||
#define OPERATION_H_
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include"../Util/Bstr.h"
|
||||
#include<string>
|
||||
#include<vector>
|
||||
#include"../Bstr/Bstr.h"
|
||||
|
||||
//NOTICE:CMD_DROP is used to remove the database, and CMD_CREATE is not useful because
|
||||
//we always need to import a dataset to create a gstore db
|
||||
enum CommandType {CMD_CONNECT, CMD_EXIT, CMD_LOAD, CMD_UNLOAD, CMD_CREATE, CMD_DROP,
|
||||
enum CommandType {CMD_CONNECT, CMD_EXIT, CMD_LOAD, CMD_UNLOAD, CMD_CREATE_DB, CMD_DELETE_DB,
|
||||
CMD_IMPORT, CMD_QUERY, CMD_SHOW, CMD_INSERT, CMD_OTHER}; // extend the operation command type here.
|
||||
|
||||
class Operation
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
/*=============================================================================
|
||||
# Filename: Server.cpp
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-25 13:47
|
||||
# Description:
|
||||
=============================================================================*/
|
||||
/*
|
||||
* Server.cpp
|
||||
*
|
||||
* Created on: 2014-10-14
|
||||
* Author: hanshuo
|
||||
*/
|
||||
|
||||
#include "Server.h"
|
||||
|
||||
using namespace std;
|
||||
#include"Server.h"
|
||||
#include"../Database/Database.h"
|
||||
#include<iostream>
|
||||
#include<sstream>
|
||||
|
||||
Server::Server()
|
||||
{
|
||||
|
@ -31,8 +31,7 @@ Server::~Server()
|
|||
delete this->database;
|
||||
}
|
||||
|
||||
bool
|
||||
Server::createConnection()
|
||||
bool Server::createConnection()
|
||||
{
|
||||
bool flag;
|
||||
|
||||
|
@ -60,24 +59,21 @@ Server::createConnection()
|
|||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Server::deleteConnection()
|
||||
bool Server::deleteConnection()
|
||||
{
|
||||
bool flag = this->socket.close();
|
||||
|
||||
return flag;
|
||||
}
|
||||
|
||||
bool
|
||||
Server::response(Socket _socket, std::string& _msg)
|
||||
bool Server::response(Socket _socket, std::string& _msg)
|
||||
{
|
||||
bool flag = _socket.send(_msg);
|
||||
|
||||
return flag;
|
||||
}
|
||||
|
||||
void
|
||||
Server::listen()
|
||||
void Server::listen()
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
|
@ -134,12 +130,6 @@ Server::listen()
|
|||
this->importRDF(db_name, "", rdf_path, ret_msg);
|
||||
break;
|
||||
}
|
||||
case CMD_DROP:
|
||||
{
|
||||
string db_name = operation.getParameter(0);
|
||||
this->dropDatabase(db_name, "", ret_msg);
|
||||
break;
|
||||
}
|
||||
case CMD_QUERY:
|
||||
{
|
||||
string query = operation.getParameter(0);
|
||||
|
@ -149,9 +139,9 @@ Server::listen()
|
|||
case CMD_SHOW:
|
||||
{
|
||||
string para = operation.getParameter(0);
|
||||
if (para == "databases" || para == "all")
|
||||
if (para == "databases")
|
||||
{
|
||||
this->showDatabases(para, "", ret_msg);
|
||||
this->showDatabases("", ret_msg);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -175,11 +165,10 @@ Server::listen()
|
|||
}
|
||||
}
|
||||
|
||||
bool
|
||||
Server::parser(std::string _raw_cmd, Operation& _ret_oprt)
|
||||
bool Server::parser(std::string _raw_cmd, Operation& _ret_oprt)
|
||||
{
|
||||
int cmd_start_pos = 0;
|
||||
int raw_len = (int)_raw_cmd.size();
|
||||
int raw_len = _raw_cmd.size();
|
||||
|
||||
for (int i=0;i<raw_len;i++)
|
||||
if (_raw_cmd[i] == '\n')
|
||||
|
@ -293,33 +282,20 @@ Server::parser(std::string _raw_cmd, Operation& _ret_oprt)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Server::createDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg)
|
||||
bool Server::createDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg)
|
||||
{
|
||||
// to be implemented...
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
Server::dropDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg)
|
||||
bool Server::deleteDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg)
|
||||
{
|
||||
//TODO
|
||||
if (this->database == NULL || this->database->getName() != _db_name)
|
||||
{
|
||||
_ret_msg = "database:" + _db_name + " is not loaded.";
|
||||
return false;
|
||||
}
|
||||
|
||||
delete this->database;
|
||||
this->database = NULL;
|
||||
_ret_msg = "unload database done.";
|
||||
|
||||
return true;
|
||||
// to be implemented...
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
Server::loadDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg)
|
||||
bool Server::loadDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg)
|
||||
{
|
||||
this->database = new Database(_db_name);
|
||||
|
||||
|
@ -332,15 +308,12 @@ Server::loadDatabase(std::string _db_name, std::string _ac_name, std::string& _r
|
|||
else
|
||||
{
|
||||
_ret_msg = "load database failed.";
|
||||
delete this->database;
|
||||
this->database = NULL;
|
||||
}
|
||||
|
||||
return flag;
|
||||
}
|
||||
|
||||
bool
|
||||
Server::unloadDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg)
|
||||
bool Server::unloadDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg)
|
||||
{
|
||||
if (this->database == NULL || this->database->getName() != _db_name)
|
||||
{
|
||||
|
@ -355,12 +328,11 @@ Server::unloadDatabase(std::string _db_name, std::string _ac_name, std::string&
|
|||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Server::importRDF(std::string _db_name, std::string _ac_name, std::string _rdf_path, std::string& _ret_msg)
|
||||
bool Server::importRDF(std::string _db_name, std::string _ac_name, std::string _rdf_path, std::string& _ret_msg)
|
||||
{
|
||||
//if (this->database != NULL && this->database->getName() != _db_name)
|
||||
if (this->database != NULL)
|
||||
if (this->database != NULL && this->database->getName() != _db_name)
|
||||
{
|
||||
this->database->unload();
|
||||
delete this->database;
|
||||
}
|
||||
|
||||
|
@ -379,8 +351,7 @@ Server::importRDF(std::string _db_name, std::string _ac_name, std::string _rdf_p
|
|||
return flag;
|
||||
}
|
||||
|
||||
bool
|
||||
Server::insertTriple(std::string _db_name, std::string _ac_name, std::string _rdf_path, std::string& _ret_msg)
|
||||
bool Server::insertTriple(std::string _db_name, std::string _ac_name, std::string _rdf_path, std::string& _ret_msg)
|
||||
{
|
||||
if (this->database != NULL)
|
||||
{
|
||||
|
@ -403,10 +374,9 @@ Server::insertTriple(std::string _db_name, std::string _ac_name, std::string _rd
|
|||
return flag;
|
||||
}
|
||||
|
||||
bool
|
||||
Server::query(const std::string _query, std::string& _ret_msg)
|
||||
bool Server::query(const std::string _query, std::string& _ret_msg)
|
||||
{
|
||||
if(this->database == NULL)
|
||||
if (this->database == NULL)
|
||||
{
|
||||
_ret_msg = "database has not been loaded.";
|
||||
return false;
|
||||
|
@ -414,10 +384,8 @@ Server::query(const std::string _query, std::string& _ret_msg)
|
|||
|
||||
ResultSet res_set;
|
||||
bool flag = this->database->query(_query, res_set);
|
||||
if(flag)
|
||||
if (flag)
|
||||
{
|
||||
//_ret_msg = "results are too large!";
|
||||
//BETTER: divide and transfer if too large to be placed in memory, using Stream
|
||||
_ret_msg = res_set.to_str();
|
||||
}
|
||||
else
|
||||
|
@ -428,15 +396,9 @@ Server::query(const std::string _query, std::string& _ret_msg)
|
|||
return flag;
|
||||
}
|
||||
|
||||
bool
|
||||
Server::showDatabases(string _para, string _ac_name, string& _ret_msg)
|
||||
bool Server::showDatabases(std::string _ac_name, std::string& _ret_msg)
|
||||
{
|
||||
if(_para == "all")
|
||||
{
|
||||
_ret_msg = Util::getItemsFromDir(Util::db_home);
|
||||
return true;
|
||||
}
|
||||
if(this->database != NULL)
|
||||
if (this->database != NULL)
|
||||
{
|
||||
_ret_msg = "\n" + this->database->getName() + "\n";
|
||||
}
|
||||
|
|
|
@ -1,18 +1,16 @@
|
|||
/*=============================================================================
|
||||
# Filename: Server.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-25 13:49
|
||||
# Description: originally written by hanshuo, modified by zengli
|
||||
=============================================================================*/
|
||||
/*
|
||||
* Server.h
|
||||
*
|
||||
* Created on: 2014-10-14
|
||||
* Author: hanshuo
|
||||
*/
|
||||
|
||||
#ifndef _SERVER_SERVER_H
|
||||
#define _SERVER_SERVER_H
|
||||
#ifndef SERVER_H_
|
||||
#define SERVER_H_
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Database/Database.h"
|
||||
#include "Socket.h"
|
||||
#include "Operation.h"
|
||||
#include"Socket.h"
|
||||
#include"Operation.h"
|
||||
#include"../Database/Database.h"
|
||||
|
||||
/*
|
||||
* the Server is only at a original and simple version.
|
||||
|
@ -43,10 +41,10 @@ public:
|
|||
bool response(Socket _socket, std::string& _msg);
|
||||
bool parser(std::string _raw_cmd, Operation& _ret_oprt);
|
||||
bool createDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg);
|
||||
bool dropDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg);
|
||||
bool deleteDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg);
|
||||
bool loadDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg);
|
||||
bool unloadDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg);
|
||||
bool showDatabases(std::string _para, std::string _ac_name, std::string& _ret_msg);
|
||||
bool showDatabases(std::string _ac_name, std::string& _ret_msg);
|
||||
bool importRDF(std::string _db_name, std::string _ac_name, std::string _rdf_path, std::string& _ret_msg);
|
||||
bool insertTriple(std::string _db_name, std::string _ac_name, std::string _rdf_path, std::string& _ret_msg);
|
||||
bool query(const std::string _query, std::string& _ret_msg);
|
||||
|
@ -60,5 +58,6 @@ private:
|
|||
Database* database;
|
||||
};
|
||||
|
||||
#endif // _SERVER_SERVER_H
|
||||
|
||||
|
||||
#endif /* SERVER_H_ */
|
||||
|
|
|
@ -4,8 +4,15 @@
|
|||
* Created on: 2014-10-14
|
||||
* Author: hanshuo
|
||||
*/
|
||||
|
||||
#include "Socket.h"
|
||||
#include"Socket.h"
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/time.h>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
const std::string Socket::DEFAULT_SERVER_IP = "127.0.0.1";
|
||||
|
||||
|
@ -236,4 +243,3 @@ bool Socket::isValid()const
|
|||
{
|
||||
return (this->sock != -1);
|
||||
}
|
||||
|
||||
|
|
|
@ -5,10 +5,16 @@
|
|||
* Author: hanshuo
|
||||
*/
|
||||
|
||||
#ifndef _SERVER_SOCKET_H
|
||||
#define _SERVER_SOCKET_H
|
||||
#ifndef SOCKET_H_
|
||||
#define SOCKET_H_
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include<sys/types.h>
|
||||
#include<sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <netdb.h>
|
||||
#include <unistd.h>
|
||||
#include <string>
|
||||
#include <arpa/inet.h>
|
||||
|
||||
class Socket
|
||||
{
|
||||
|
@ -38,5 +44,4 @@ private:
|
|||
sockaddr_in addr;
|
||||
};
|
||||
|
||||
#endif // _SERVER_SOCKET_H
|
||||
|
||||
#endif /* SOCKET_H_ */
|
||||
|
|
|
@ -1,15 +1,14 @@
|
|||
/*=============================================================================
|
||||
# Filename: SigEntry.cpp
|
||||
# Author: Bookug Lobert
|
||||
# Mail: zengli-bookug@pku.edu.cn
|
||||
# Last Modified: 2016-04-11 13:49
|
||||
# Description:
|
||||
=============================================================================*/
|
||||
/*
|
||||
* SIGEntry.cpp
|
||||
*
|
||||
* Created on: 2014-6-20
|
||||
* Author: liyouhuan
|
||||
* Modified on: 2014-6-29
|
||||
* Author: hanshuo
|
||||
*/
|
||||
|
||||
#include "SigEntry.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
SigEntry::SigEntry()
|
||||
{
|
||||
(this->sig).entityBitSet.reset();
|
||||
|
@ -34,26 +33,22 @@ SigEntry::SigEntry(const EntitySig& _sig, int _entity_id)
|
|||
this->entity_id = _entity_id;
|
||||
}
|
||||
|
||||
const EntitySig&
|
||||
SigEntry::getEntitySig() const
|
||||
const EntitySig& SigEntry::getEntitySig()const
|
||||
{
|
||||
return this->sig;
|
||||
}
|
||||
|
||||
int
|
||||
SigEntry::getEntityId() const
|
||||
int SigEntry::getEntityId()const
|
||||
{
|
||||
return this->entity_id;
|
||||
}
|
||||
|
||||
int
|
||||
SigEntry::getSigCount() const
|
||||
int SigEntry::getSigCount()const
|
||||
{
|
||||
return (int)this->sig.entityBitSet.count();
|
||||
}
|
||||
|
||||
SigEntry&
|
||||
SigEntry::operator=(const SigEntry _sig_entry)
|
||||
SigEntry& SigEntry::operator=(const SigEntry _sig_entry)
|
||||
{
|
||||
this->entity_id = _sig_entry.getEntityId();
|
||||
this->sig.entityBitSet.reset();
|
||||
|
@ -61,56 +56,34 @@ SigEntry::operator=(const SigEntry _sig_entry)
|
|||
return *this;
|
||||
}
|
||||
|
||||
SigEntry&
|
||||
SigEntry::operator|=(const SigEntry _sig_entry)
|
||||
SigEntry& SigEntry::operator|=(const SigEntry _sig_entry)
|
||||
{
|
||||
const EntitySig& sig = (_sig_entry.getEntitySig());
|
||||
(this->sig).entityBitSet |= sig.entityBitSet;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool
|
||||
SigEntry::cover(const SigEntry& _sig_entry) const
|
||||
bool SigEntry::cover(const SigEntry& _sig_entry)const
|
||||
{
|
||||
//EQUAL:this & that == that
|
||||
return (this->sig.entityBitSet | _sig_entry.getEntitySig().entityBitSet)
|
||||
== (this->sig.entityBitSet);
|
||||
}
|
||||
|
||||
bool
|
||||
SigEntry::cover(const EntitySig& _sig) const
|
||||
bool SigEntry::cover(const EntitySig& _sig)const
|
||||
{
|
||||
return (this->sig.entityBitSet | _sig.entityBitSet) == (this->sig.entityBitSet);
|
||||
}
|
||||
|
||||
int
|
||||
SigEntry::xOR(const SigEntry& _sig_entry) const
|
||||
int SigEntry::xEpsilen(const SigEntry& _sig_entry)const
|
||||
{
|
||||
EntityBitSet entityBitSet;
|
||||
entityBitSet.reset();
|
||||
entityBitSet |= this->sig.entityBitSet;
|
||||
//NOTICE: compute the xor distince now
|
||||
//a^b = (a & ~b) | (~a & b)
|
||||
EntityBitSet another;
|
||||
another.reset();
|
||||
another |= _sig_entry.getEntitySig().entityBitSet;
|
||||
return ((entityBitSet & another.flip()) | (entityBitSet.flip() & another)).count();
|
||||
entityBitSet.flip();
|
||||
return (entityBitSet & _sig_entry.getEntitySig().entityBitSet).count();
|
||||
}
|
||||
|
||||
//how many 1s in _sig_entry are contained ->flip-> not contained these 1s, as distince
|
||||
//0s in _sig_entry is nonsense
|
||||
int
|
||||
SigEntry::xEpsilen(const SigEntry& _sig_entry) const
|
||||
{
|
||||
EntityBitSet entityBitSet;
|
||||
entityBitSet.reset();
|
||||
entityBitSet |= this->sig.entityBitSet;
|
||||
entityBitSet.flip();
|
||||
return (entityBitSet & _sig_entry.getEntitySig().entityBitSet).count();
|
||||
}
|
||||
|
||||
string
|
||||
SigEntry::to_str() const
|
||||
std::string SigEntry::to_str()const
|
||||
{
|
||||
std::stringstream _ss;
|
||||
|
||||
|
@ -120,3 +93,4 @@ SigEntry::to_str() const
|
|||
return _ss.str();
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1,39 +1,38 @@
|
|||
/*=============================================================================
|
||||
# Filename: SigEntry.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: zengli-bookug@pku.edu.cn
|
||||
# Last Modified: 2016-04-11 13:48
|
||||
# Description: written by liyouhuan and hanshuo
|
||||
=============================================================================*/
|
||||
/*
|
||||
* SigEntry.h
|
||||
*
|
||||
* Created on: 2014-6-20
|
||||
* Author: liyouhuan
|
||||
* Modified on: 2014-6-29
|
||||
* Author: hanshuo
|
||||
*/
|
||||
|
||||
#ifndef _SIGNATURE_SIGENTRY_H
|
||||
#define _SIGNATURE_SIGENTRY_H
|
||||
#ifndef SIGENTRY_H_
|
||||
#define SIGENTRY_H_
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include<iostream>
|
||||
#include "Signature.h"
|
||||
#include<sstream>
|
||||
using namespace std;
|
||||
|
||||
class SigEntry
|
||||
{
|
||||
class SigEntry{
|
||||
private:
|
||||
EntitySig sig;
|
||||
//-1 if not in leaf node
|
||||
int entity_id;
|
||||
public:
|
||||
SigEntry();
|
||||
SigEntry(int _entity_id, EntityBitSet& _bitset);
|
||||
SigEntry(const SigEntry& _sig_entry);
|
||||
SigEntry(const EntitySig& sig, int _entity_id);
|
||||
const EntitySig& getEntitySig() const;
|
||||
int getEntityId() const;
|
||||
int getSigCount() const;
|
||||
const EntitySig& getEntitySig()const;
|
||||
int getEntityId()const;
|
||||
int getSigCount()const;
|
||||
SigEntry& operator=(const SigEntry _sig_entry);
|
||||
SigEntry& operator|=(const SigEntry _sig_entry);
|
||||
bool cover(const SigEntry& _sig_entry) const;
|
||||
bool cover(const EntitySig& _sig) const;
|
||||
int xEpsilen(const SigEntry& _sig_entry) const;
|
||||
int xOR(const SigEntry& _sig_entry) const;
|
||||
std::string to_str() const;
|
||||
bool cover(const SigEntry& _sig_entry)const;
|
||||
bool cover(const EntitySig& _sig)const;
|
||||
int xEpsilen(const SigEntry& _sig_entry)const;
|
||||
std::string to_str()const;
|
||||
};
|
||||
|
||||
#endif // _SIGNATURE_SIGENTRY_H
|
||||
|
||||
#endif /* SIGENTRY_H_ */
|
||||
|
|
|
@ -1,21 +1,21 @@
|
|||
/*=============================================================================
|
||||
# Filename: Signature.cpp
|
||||
# Author: Bookug Lobert
|
||||
# Mail: zengli-bookug@pku.edu.cn
|
||||
# Last Modified: 2016-04-11 13:18
|
||||
# Description:
|
||||
=============================================================================*/
|
||||
/*
|
||||
* Signature.cpp
|
||||
*
|
||||
* Created on: 2014-6-20
|
||||
* Author: liyouhuan
|
||||
* Implemented on: 2014-6-29
|
||||
* Author: hanshuo
|
||||
*/
|
||||
|
||||
#include "Signature.h"
|
||||
#include "../Query/BasicQuery.h"
|
||||
#include "../Database/Database.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
std::string
|
||||
Signature::BitSet2str(const EntityBitSet& _bitset)
|
||||
std::string Signature::BitSet2str(const EntityBitSet& _bitset)
|
||||
{
|
||||
std::stringstream _ss;
|
||||
bool any = false;
|
||||
for(unsigned i = 0; i < _bitset.size(); i ++)
|
||||
for(int i = 0; i < _bitset.size(); i ++)
|
||||
{
|
||||
if(_bitset.test(i))
|
||||
{
|
||||
|
@ -31,52 +31,50 @@ Signature::BitSet2str(const EntityBitSet& _bitset)
|
|||
return _ss.str();
|
||||
}
|
||||
|
||||
void
|
||||
Signature::encodePredicate2Entity(int _pre_id, EntityBitSet& _entity_bs, const char _type)
|
||||
/* for Signature */
|
||||
void Signature::encodePredicate2Entity(int _pre_id, EntityBitSet& _entity_bs, const char _type)
|
||||
{
|
||||
if (Signature::PREDICATE_ENCODE_METHOD == 0)
|
||||
{
|
||||
//WARN:change if need to use again, because the encoding method has changed now!
|
||||
int pos = ( (_pre_id+10) % Signature::EDGE_SIG_LENGTH ) + Signature::STR_SIG_LENGTH;
|
||||
_entity_bs.set(pos);
|
||||
}
|
||||
else
|
||||
{
|
||||
int seed_num = _pre_id % Signature::EDGE_SIG_INTERVAL_NUM_HALF;
|
||||
int seed_preid = _pre_id;
|
||||
|
||||
if(_type == Util::EDGE_OUT)
|
||||
if(_type == BasicQuery::EDGE_OUT)
|
||||
{
|
||||
seed_num += Signature::EDGE_SIG_INTERVAL_NUM_HALF;
|
||||
seed_preid += 101;
|
||||
}
|
||||
|
||||
//int primeSize = 5;
|
||||
//int prime1[]={5003,5009,5011,5021,5023};
|
||||
//int prime2[]={49943,49957,49991,49993,49999};
|
||||
/*
|
||||
int primeSize = 5;
|
||||
int prime1[]={5003,5009,5011,5021,5023};
|
||||
int prime2[]={49943,49957,49991,49993,49999};
|
||||
*/
|
||||
|
||||
//NOTICE: more ones in the bitset(use more primes) means less conflicts, but also weakens the filtration of VSTree.
|
||||
// how to hash the predicate id to signature(bitset) better?
|
||||
// more ones in the bitset(use more primes) means less conflicts, but also weakens the filtration of VSTree.
|
||||
// when the data set is big enough, cutting down the size of candidate list should come up to our primary consideration.
|
||||
// in this case we should not encode too many ones in entities' signature.
|
||||
// also, when the data set is small, hash conflicts can hardly happen.
|
||||
// therefore, I think using 2 primes(set up two ones in bitset) is enough.
|
||||
// --by hanshuo.
|
||||
int primeSize = 2;
|
||||
int prime1[] = {5003, 5011};
|
||||
int prime2[] = {49957, 49993};
|
||||
int prime1[]={5003,5011};
|
||||
int prime2[]={49957,49993};
|
||||
|
||||
//for(int i = 0; i < primeSize; i++)
|
||||
//{
|
||||
//int seed = _pre_id * prime1[i] % prime2[i];
|
||||
//int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
|
||||
//_entity_bs.set(pos);
|
||||
//}
|
||||
int seed = _pre_id * 5003 % 49957;
|
||||
int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
|
||||
_entity_bs.set(pos);
|
||||
for (int i=0;i<primeSize;i++)
|
||||
{
|
||||
int seed = seed_preid * prime1[i] % prime2[i];
|
||||
int pos = (seed % Signature::EDGE_SIG_LENGTH ) + Signature::STR_SIG_LENGTH;
|
||||
_entity_bs.set(pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Signature::encodePredicate2Edge(int _pre_id, EdgeBitSet& _edge_bs)
|
||||
void Signature::encodePredicate2Edge(int _pre_id, EdgeBitSet& _edge_bs)
|
||||
{
|
||||
if (Signature::PREDICATE_ENCODE_METHOD == 0)
|
||||
{
|
||||
|
@ -85,81 +83,233 @@ Signature::encodePredicate2Edge(int _pre_id, EdgeBitSet& _edge_bs)
|
|||
}
|
||||
else
|
||||
{
|
||||
int seed_num = _pre_id % Signature::EDGE_SIG_INTERVAL_NUM_HALF;
|
||||
//int primeSize = 5;
|
||||
//int prime1[]={5003,5009,5011,5021,5023};
|
||||
//int prime2[]={49943,49957,49991,49993,49999};
|
||||
/*
|
||||
int primeSize = 5;
|
||||
int prime1[]={5003,5009,5011,5021,5023};
|
||||
int prime2[]={49943,49957,49991,49993,49999};
|
||||
*/
|
||||
|
||||
int primeSize = 2;
|
||||
int prime1[] = {5003,5011};
|
||||
int prime2[] = {49957,49993};
|
||||
int prime1[]={5003,5011};
|
||||
int prime2[]={49957,49993};
|
||||
|
||||
//for (int i = 0; i < primeSize; i++)
|
||||
//{
|
||||
//int seed = _pre_id * prime1[i] % prime2[i];
|
||||
//int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
|
||||
//_edge_bs.set(pos);
|
||||
//}
|
||||
int seed = _pre_id * 5003 % 49957;
|
||||
int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::EDGE_SIG_INTERVAL_BASE * seed_num;
|
||||
_edge_bs.set(pos);
|
||||
for (int i=0;i<primeSize;i++)
|
||||
{
|
||||
int seed = _pre_id * prime1[i] % prime2[i];
|
||||
int pos = seed % Signature::EDGE_SIG_LENGTH;
|
||||
_edge_bs.set(pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//NOTICE: no need to encode itself because only variable in query need to be filtered!
|
||||
//So only consider all neighbors!
|
||||
void
|
||||
Signature::encodeStr2Entity(const char* _str, EntityBitSet& _entity_bs)
|
||||
void Signature::encodeStr2Entity(const char* _str, EntityBitSet& _entity_bs) //_str is subject or object or literal
|
||||
{
|
||||
//_str is subject or object or literal
|
||||
if(strlen(_str) >0 && _str[0] == '?')
|
||||
return;
|
||||
|
||||
int length = (int)strlen(_str);
|
||||
unsigned int hashKey = 0;
|
||||
unsigned int pos = 0;
|
||||
char *str2 = (char*)calloc(length + 1, sizeof(char));
|
||||
char *str2 = new char[length+1];
|
||||
strcpy(str2, _str);
|
||||
char *str = str2;
|
||||
|
||||
unsigned base = Signature::STR_SIG_BASE * (Signature::HASH_NUM - 1);
|
||||
for(int i = Signature::HASH_NUM - 1; i >= 0; --i)
|
||||
{
|
||||
HashFunction hf = Util::hash[i];
|
||||
if(hf == NULL)
|
||||
break;
|
||||
hashKey = hf(str);
|
||||
str=str2;
|
||||
pos = base + hashKey % Signature::STR_SIG_BASE;
|
||||
base -= Signature::STR_SIG_BASE;
|
||||
if(_str[0] == '"')
|
||||
{
|
||||
pos += Signature::STR_SIG_LENGTH2;
|
||||
}
|
||||
else if(_str[0] != '<')
|
||||
{
|
||||
#ifdef DEBUG_VSTREE
|
||||
cerr << "error in encodeStr2Entity(): neighbor is neither a literal or entity!" << endl;
|
||||
#endif
|
||||
}
|
||||
_entity_bs.set(pos);
|
||||
}
|
||||
//BETTER: use multiple threads for different hash functions
|
||||
// the same consideration as encodePredicate2Entity.
|
||||
// I think we should not set too many ones in entities' signature.
|
||||
hashKey = Signature::simpleHash(str);
|
||||
pos = hashKey % Signature::STR_SIG_LENGTH;
|
||||
_entity_bs.set(pos);
|
||||
str=str2;
|
||||
hashKey = Signature::RSHash(str);
|
||||
pos = hashKey % Signature::STR_SIG_LENGTH;
|
||||
_entity_bs.set(pos);
|
||||
str=str2;
|
||||
hashKey = Signature::JSHash(str);
|
||||
pos = hashKey % Signature::STR_SIG_LENGTH;
|
||||
_entity_bs.set(pos);
|
||||
str=str2;
|
||||
hashKey = Signature::PJWHash(str);
|
||||
pos = hashKey % Signature::STR_SIG_LENGTH;
|
||||
_entity_bs.set(pos);
|
||||
/*
|
||||
str=str2;
|
||||
hashKey = Signature::ELFHash(str);
|
||||
pos = hashKey % Signature::STR_SIG_LENGTH;
|
||||
_entity_bs.set(pos);
|
||||
str=str2;
|
||||
hashKey = Signature::SDBMHash(str);
|
||||
pos = hashKey % Signature::STR_SIG_LENGTH;
|
||||
_entity_bs.set(pos);
|
||||
hashKey = Signature::DJBHash(str);
|
||||
pos = hashKey % Signature::STR_SIG_LENGTH;
|
||||
_entity_bs.set(pos);
|
||||
str=str2;
|
||||
hashKey = Signature::APHash(str);
|
||||
pos = hashKey % Signature::STR_SIG_LENGTH;
|
||||
_entity_bs.set(pos);
|
||||
str=str2;
|
||||
hashKey = Signature::BKDRHash(str);
|
||||
pos = hashKey % Signature::STR_SIG_LENGTH;
|
||||
_entity_bs.set(pos);
|
||||
*/
|
||||
|
||||
#ifdef DEBUG_VSTREE
|
||||
//std::stringstream _ss;
|
||||
//_ss << "encodeStr2Entity:" << str2 << endl;
|
||||
//Util::logging(_ss.str());
|
||||
#endif
|
||||
free(str2);
|
||||
/*
|
||||
//debug
|
||||
{
|
||||
std::stringstream _ss;
|
||||
_ss << "encodeStr2Entity:" << str2 << endl;
|
||||
Database::log(_ss.str());
|
||||
}
|
||||
*/
|
||||
|
||||
delete []str2;
|
||||
}
|
||||
|
||||
void
|
||||
Signature::encodeStrID2Entity(int _str_id, EntityBitSet& _entity_bs)
|
||||
void Signature::encodeStrID2Entity(int _str_id, EntityBitSet& _entity_bs)
|
||||
{
|
||||
//TODO
|
||||
//to be implement
|
||||
}
|
||||
|
||||
unsigned int Signature::hash(const char* _str)
|
||||
{
|
||||
//to be implement
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* some string hash functions */
|
||||
unsigned int Signature::BKDRHash(const char *_str)
|
||||
{
|
||||
unsigned int seed = 131; // 31 131 1313 13131 131313 etc..
|
||||
unsigned int key = 0;
|
||||
|
||||
while (*_str)
|
||||
{
|
||||
key = key * seed + (*_str++);
|
||||
}
|
||||
|
||||
return (key & 0x7FFFFFFF);
|
||||
}
|
||||
|
||||
unsigned int Signature::simpleHash(const char *_str)
|
||||
{
|
||||
unsigned int key;
|
||||
unsigned char *p;
|
||||
|
||||
for(key = 0, p = (unsigned char *)_str; *p ; p++)
|
||||
key = 31 * key + *p;
|
||||
|
||||
return (key & 0x7FFFFFFF);
|
||||
}
|
||||
|
||||
unsigned int Signature::RSHash(const char *_str)
|
||||
{
|
||||
unsigned int b = 378551;
|
||||
unsigned int a = 63689;
|
||||
unsigned int key = 0;
|
||||
|
||||
while (*_str)
|
||||
{
|
||||
key = key * a + (*_str++);
|
||||
a *= b;
|
||||
}
|
||||
|
||||
return (key & 0x7FFFFFFF);
|
||||
}
|
||||
|
||||
unsigned int Signature::JSHash(const char *_str)
|
||||
{
|
||||
unsigned int key = 1315423911;
|
||||
|
||||
while (*_str)
|
||||
{
|
||||
key ^= ((key << 5) + (*_str++) + (key >> 2));
|
||||
}
|
||||
|
||||
return (key & 0x7FFFFFFF);
|
||||
}
|
||||
|
||||
unsigned int Signature::PJWHash(const char *_str)
|
||||
{
|
||||
unsigned int bits_in_unsigned_int = (unsigned int)(sizeof(unsigned int) * 8);
|
||||
unsigned int three_quarters = (unsigned int)((bits_in_unsigned_int * 3) / 4);
|
||||
unsigned int one_eighth = (unsigned int)(bits_in_unsigned_int / 8);
|
||||
|
||||
unsigned int high_bits = (unsigned int)(0xFFFFFFFF) << (bits_in_unsigned_int - one_eighth);
|
||||
unsigned int key = 0;
|
||||
unsigned int test = 0;
|
||||
|
||||
while (*_str)
|
||||
{
|
||||
key = (key << one_eighth) + (*_str++);
|
||||
if ((test = key & high_bits) != 0)
|
||||
{
|
||||
key = ((key ^ (test >> three_quarters)) & (~high_bits));
|
||||
}
|
||||
}
|
||||
|
||||
return (key & 0x7FFFFFFF);
|
||||
}
|
||||
|
||||
unsigned int Signature::ELFHash(const char *_str)
|
||||
{
|
||||
unsigned int key = 0;
|
||||
unsigned int x = 0;
|
||||
|
||||
while (*_str)
|
||||
{
|
||||
key = (key << 4) + (*_str++);
|
||||
if ((x = key & 0xF0000000L) != 0)
|
||||
{
|
||||
key ^= (x >> 24);
|
||||
key &= ~x;
|
||||
}
|
||||
}
|
||||
|
||||
return (key & 0x7FFFFFFF);
|
||||
}
|
||||
|
||||
unsigned int Signature::SDBMHash(const char *_str)
|
||||
{
|
||||
unsigned int key = 0;
|
||||
|
||||
while (*_str)
|
||||
{
|
||||
key = (*_str++) + (key << 6) + (key << 16) - key;
|
||||
}
|
||||
|
||||
return (key & 0x7FFFFFFF);
|
||||
}
|
||||
|
||||
unsigned int Signature::DJBHash(const char *_str)
|
||||
{
|
||||
unsigned int key = 5381;
|
||||
while (*_str) {
|
||||
key += (key << 5) + (*_str++);
|
||||
}
|
||||
return (key & 0x7FFFFFFF);
|
||||
}
|
||||
|
||||
unsigned int Signature::APHash(const char *_str)
|
||||
{
|
||||
unsigned int key = 0;
|
||||
int i;
|
||||
|
||||
for (i=0; *_str; i++)
|
||||
{
|
||||
if ((i & 1) == 0)
|
||||
{
|
||||
key ^= ((key << 7) ^ (*_str++) ^ (key >> 3));
|
||||
}
|
||||
else
|
||||
{
|
||||
key ^= (~((key << 11) ^ (*_str++) ^ (key >> 5)));
|
||||
}
|
||||
}
|
||||
|
||||
return (key & 0x7FFFFFFF);
|
||||
}
|
||||
|
||||
/* for ENTITYsig */
|
||||
EntitySig::EntitySig()
|
||||
{
|
||||
this->entityBitSet.reset();
|
||||
|
@ -183,50 +333,44 @@ EntitySig::EntitySig(const EntityBitSet& _bitset)
|
|||
this->entityBitSet |= _bitset;
|
||||
}
|
||||
|
||||
EntitySig&
|
||||
EntitySig::operator|=(const EntitySig& _sig)
|
||||
EntitySig& EntitySig::operator|=(const EntitySig& _sig)
|
||||
{
|
||||
this->entityBitSet |= _sig.entityBitSet;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool
|
||||
EntitySig::operator==(const EntitySig& _sig)const
|
||||
bool EntitySig::operator==(const EntitySig& _sig)const
|
||||
{
|
||||
return (this->entityBitSet == _sig.entityBitSet);
|
||||
}
|
||||
|
||||
bool
|
||||
EntitySig::operator!=(const EntitySig& _sig)const
|
||||
bool EntitySig::operator!=(const EntitySig& _sig)const
|
||||
{
|
||||
return (this->entityBitSet != _sig.entityBitSet);
|
||||
}
|
||||
|
||||
EntitySig&
|
||||
EntitySig::operator=(const EntitySig& _sig)
|
||||
EntitySig& EntitySig::operator=(const EntitySig& _sig)
|
||||
{
|
||||
this->entityBitSet.reset();
|
||||
this->entityBitSet |= _sig.getBitset();
|
||||
return *this;
|
||||
}
|
||||
|
||||
const EntityBitSet&
|
||||
EntitySig::getBitset()const
|
||||
const EntityBitSet & EntitySig::getBitset()const
|
||||
{
|
||||
return this->entityBitSet;
|
||||
}
|
||||
|
||||
/* for EDGEsig */
|
||||
EdgeSig::EdgeSig()
|
||||
{
|
||||
this->edgeBitSet.reset();
|
||||
}
|
||||
|
||||
EdgeSig::EdgeSig(const EdgeSig* _p_sig)
|
||||
{
|
||||
this->edgeBitSet.reset();
|
||||
this->edgeBitSet |= _p_sig->edgeBitSet;
|
||||
}
|
||||
|
||||
EdgeSig::EdgeSig(const EdgeSig& _sig)
|
||||
{
|
||||
this->edgeBitSet.reset();
|
||||
|
@ -238,21 +382,8 @@ EdgeSig::EdgeSig(const EdgeBitSet& _bitset)
|
|||
this->edgeBitSet.reset();
|
||||
this->edgeBitSet |= _bitset;
|
||||
}
|
||||
|
||||
EdgeSig&
|
||||
EdgeSig::operator|=(const EdgeSig& _sig)
|
||||
EdgeSig& EdgeSig::operator|=(const EdgeSig& _sig)
|
||||
{
|
||||
this->edgeBitSet |= _sig.edgeBitSet;
|
||||
return *this;
|
||||
}
|
||||
|
||||
string
|
||||
EntitySig::to_str() const
|
||||
{
|
||||
std::stringstream _ss;
|
||||
|
||||
_ss << Signature::BitSet2str(this->entityBitSet);
|
||||
|
||||
return _ss.str();
|
||||
}
|
||||
|
||||
|
|
|
@ -1,86 +1,57 @@
|
|||
/*=============================================================================
|
||||
# Filename: Signature.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: zengli-bookug@pku.edu.cn
|
||||
# Last Modified: 2016-04-11 12:50
|
||||
# Description: written by liyouhuan and hanshuo
|
||||
=============================================================================*/
|
||||
/*
|
||||
* Signature.h
|
||||
*
|
||||
* Created on: 2014-6-20
|
||||
* Author: liyouhuan
|
||||
* Modified on: 2014-6-29
|
||||
* add some private hash functions,
|
||||
* fix some ill-formed function names.
|
||||
* Author: hanshuo
|
||||
*/
|
||||
|
||||
#ifndef SIGNATURE_H_
|
||||
#define SIGNATURE_H_
|
||||
|
||||
#ifndef _SIGNATURE_SIGNATURE_H
|
||||
#define _SIGNATURE_SIGNATURE_H
|
||||
#include<iostream>
|
||||
#include<string.h>
|
||||
#include<bitset>
|
||||
#include<sstream>
|
||||
using namespace std;
|
||||
|
||||
#include "../Util/Util.h"
|
||||
|
||||
class Signature
|
||||
{
|
||||
class Signature{
|
||||
public:
|
||||
//static HashFunction hash[HashNum];
|
||||
//must make sure: ENTITY_SIG_LENGTH = EDGE_SIG_LENGTH + STR_SIG_LENGTH
|
||||
//const static int ENTITY_SIG_LENGTH = 400;
|
||||
static const int STR_SIG_BASE = 100;
|
||||
//NOTICE: we can also use id here, but string is recommended due to special structure
|
||||
//(maybe needed later, for example, wildcards)
|
||||
//Th ehash function is costly, so just use two
|
||||
static const int HASH_NUM = 3; //no more than Util::HashNum
|
||||
//NOTICE:if using str id, we can also divide like EDGE_SIG
|
||||
//here we divide as entity neighbors and literal neighbors: ENTITY, LITERAL
|
||||
static const int STR_SIG_LENGTH = 2 * STR_SIG_BASE * HASH_NUM; //250
|
||||
static const int STR_SIG_LENGTH2 = STR_SIG_BASE * HASH_NUM;
|
||||
|
||||
//QUERY:I think that str filter is more important in VSTree than predicate, because
|
||||
//a predicate may correspond to a lot of entities and predicate num is usually small
|
||||
static const int EDGE_SIG_INTERVAL_NUM_HALF = 5; //in edge or out edge
|
||||
static const int EDGE_SIG_INTERVAL_NUM = 2 * EDGE_SIG_INTERVAL_NUM_HALF;
|
||||
static const int EDGE_SIG_INTERVAL_BASE = 20;
|
||||
static const int EDGE_SIG_LENGTH = EDGE_SIG_INTERVAL_NUM * EDGE_SIG_INTERVAL_BASE; //150
|
||||
static const int EDGE_SIG_LENGTH2 = EDGE_SIG_INTERVAL_NUM_HALF * EDGE_SIG_INTERVAL_BASE; //150
|
||||
/* must make sure:
|
||||
* ENTITY_SIG_LENGTH = EDGE_SIG_LENGTH + STR_SIG_LENGTH */
|
||||
const static int ENTITY_SIG_LENGTH = 400;
|
||||
const static int EDGE_SIG_LENGTH = 150;
|
||||
const static int STR_SIG_LENGTH = 250;
|
||||
|
||||
static const int ENTITY_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH;
|
||||
//static const int ENTITY_SIG_LENGTH = STR_SIG_LENGTH + EDGE_SIG_LENGTH + NEIGHBOR_SIG_LENGTH;
|
||||
|
||||
typedef std::bitset<Signature::EDGE_SIG_LENGTH2> EdgeBitSet;
|
||||
typedef std::bitset<Signature::ENTITY_SIG_LENGTH> EntityBitSet;
|
||||
typedef bitset<Signature::EDGE_SIG_LENGTH> EdgeBitSet;
|
||||
typedef bitset<Signature::ENTITY_SIG_LENGTH> EntityBitSet;
|
||||
|
||||
static std::string BitSet2str(const EntityBitSet& _bitset);
|
||||
|
||||
//NOTICE: there are two predicate encoding method now, see the encoding functions @Signature.cpp for details
|
||||
/* there are two predicate encoding method now, see the encoding functions @Signature.cpp for details. */
|
||||
const static int PREDICATE_ENCODE_METHOD = 1;
|
||||
static void encodePredicate2Entity(int _pre_id, EntityBitSet& _entity_bs, const char _type);
|
||||
static void encodePredicate2Edge(int _pre_id, EdgeBitSet& _edge_bs);
|
||||
static void encodeStr2Entity(const char* _str, EntityBitSet& _entity_bs); //_str is subject or object(literal)
|
||||
static void encodeStrID2Entity(int _str_id, EntityBitSet& _entity_bs);
|
||||
//Signature()
|
||||
//{
|
||||
//NOTICE:not exceed the HashNum
|
||||
//this->hash = new HashFunction[HashNum];
|
||||
//this->hash[0] = Util::simpleHash;
|
||||
//this->hash[1] = Util::APHash;
|
||||
//this->hash[2] = Util::BKDRHash;
|
||||
//this->hash[3] = Util::DJBHash;
|
||||
//this->hash[4] = Util::ELFHash;
|
||||
//this->hash[5] = Util::DEKHash;
|
||||
//this->hash[6] = Util::BPHash;
|
||||
//this->hash[7] = Util::FNVHash;
|
||||
//this->hash[8] = Util::HFLPHash;
|
||||
//this->hash[9] = Util::HFHash;
|
||||
//this->hash[10] = Util::JSHash;
|
||||
//this->hash[11] = Util::PJWHash;
|
||||
//this->hash[12] = Util::RSHash;
|
||||
//this->hash[13] = Util::SDBMHash;
|
||||
//this->hash[14] = Util::StrHash;
|
||||
//this->hash[15] = Util::TianlHash;
|
||||
//}
|
||||
//~Signature()
|
||||
//{
|
||||
//delete[] this->hash;
|
||||
//}
|
||||
unsigned int hash(const char* _str);
|
||||
private:
|
||||
static unsigned int BKDRHash(const char *_str);
|
||||
static unsigned int simpleHash(const char *_str);
|
||||
static unsigned int RSHash(const char *_str);
|
||||
static unsigned int JSHash(const char *_str);
|
||||
static unsigned int PJWHash(const char *_str);
|
||||
static unsigned int ELFHash(const char *_str);
|
||||
static unsigned int SDBMHash(const char *_str);
|
||||
static unsigned int DJBHash(const char *_str);
|
||||
static unsigned int APHash(const char *_str);
|
||||
};
|
||||
|
||||
//WARN:also defined in Signature, must be same!!!
|
||||
//NOTICE:EdgeBitSet is only used in Query, not for VSTree
|
||||
typedef std::bitset<Signature::EDGE_SIG_LENGTH2> EdgeBitSet;
|
||||
typedef std::bitset<Signature::ENTITY_SIG_LENGTH> EntityBitSet;
|
||||
typedef bitset<Signature::EDGE_SIG_LENGTH> EdgeBitSet;
|
||||
typedef bitset<Signature::ENTITY_SIG_LENGTH> EntityBitSet;
|
||||
|
||||
class EntitySig : Signature{
|
||||
public:
|
||||
|
@ -95,7 +66,6 @@ public:
|
|||
EntitySig& operator=(const EntitySig& _sig);
|
||||
const EntityBitSet& getBitset()const;
|
||||
void encode(const char * _str, int _pre_id);
|
||||
std::string to_str() const;
|
||||
};
|
||||
|
||||
class EdgeSig : Signature{
|
||||
|
@ -108,5 +78,4 @@ public:
|
|||
EdgeSig& operator|=(const EdgeSig& _sig);
|
||||
};
|
||||
|
||||
#endif // _SIGNATURE_SIGNATURE_H
|
||||
|
||||
#endif /* SIGNATURE_H_ */
|
||||
|
|
|
@ -5,11 +5,10 @@
|
|||
* Author: liyouhuan
|
||||
*/
|
||||
|
||||
#ifndef _UTIL_TRIPLE_H
|
||||
#define _UTIL_TRIPLE_H
|
||||
|
||||
#include "Util.h"
|
||||
|
||||
#ifndef TRIPLE_H_
|
||||
#define TRIPLE_H_
|
||||
#include<iostream>
|
||||
#include<string.h>
|
||||
using namespace std;
|
||||
|
||||
class Triple{
|
||||
|
@ -62,5 +61,4 @@ public:
|
|||
const string toString()const;
|
||||
};
|
||||
|
||||
#endif //_UTIL_TRIPLE_H
|
||||
|
||||
#endif /* TRIPLE_H_ */
|
|
@ -1,70 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: BloomFilter.cpp
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2016-03-04 17:49
|
||||
# Description: implement functions in BloomFilter.h
|
||||
=============================================================================*/
|
||||
|
||||
#include "BloomFilter.h"
|
||||
|
||||
BloomFilter::BloomFilter()
|
||||
{
|
||||
//TODO
|
||||
}
|
||||
|
||||
BloomFilter::BloomFilter(unsigned _num)
|
||||
{
|
||||
//TODO:fix _num to mod 8 == 0
|
||||
}
|
||||
|
||||
void
|
||||
BloomFilter::init()
|
||||
{
|
||||
this->filter = (char *)calloc(this->length/8, sizeof(char));
|
||||
//TODO:assign hash functions for hfptr
|
||||
//assign the rate of false positive, and then compute the length and hfnum according to key num
|
||||
}
|
||||
|
||||
BloomFilter::~BloomFilter()
|
||||
{
|
||||
//TODO
|
||||
}
|
||||
|
||||
//NOTICE:there are two ways to change int to string, one digit to one character or just change int* to char*
|
||||
//The latter is more efficient because the former consumes space and time:O(32) >= O(lgn)
|
||||
void
|
||||
BloomFilter::addRecord(int _record)
|
||||
{
|
||||
//TODO
|
||||
}
|
||||
|
||||
void
|
||||
BloomFilter::addRecord(const char* _record, unsigned _len)
|
||||
{
|
||||
//TODO
|
||||
}
|
||||
|
||||
bool
|
||||
BloomFilter::checkRecord(int _record) const
|
||||
{
|
||||
//TODO
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
BloomFilter::checkRecord(const char* _record, unsigned _len) const
|
||||
{
|
||||
//TODO
|
||||
return false;
|
||||
}
|
||||
|
||||
//if( GETBIT(vector, Util::HFLPHash(ch,strlen(ch))%MAX) )
|
||||
//{
|
||||
//flag++;
|
||||
//}
|
||||
//else
|
||||
//{
|
||||
//SETBIT(vector,Util::HFLPHash(ch,strlen(ch))%MAX );
|
||||
//}
|
||||
|
|
@ -1,41 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: BloomFilter.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-20 13:18
|
||||
# Description: http://blog.csdn.net/hguisu/article/details/7866173
|
||||
TODO:this strategy can be used in Join and KVstore-search/modify/remove, or the uppest level!
|
||||
really better? appropiate if key num small but search too many!
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _UTIL_BLOOMFILTER_H
|
||||
#define _UTIL_BLOOMFILTER_H
|
||||
|
||||
#include "Util.h"
|
||||
|
||||
#define SETBIT(ch, n) ch[n/8] |= 1 << (7-n%8)
|
||||
#define GETBIT(ch, n) (ch[n/8] & (1<<(7-n%8))) >> (7-n%8)
|
||||
|
||||
class BloomFilter
|
||||
{
|
||||
public:
|
||||
BloomFilter();
|
||||
BloomFilter(unsigned _num); //num of all keys
|
||||
void addRecord(int _record);
|
||||
//NOTICE:we hope a Bstr-like struct here, for the length maybe very large
|
||||
void addRecord(const char* _record, unsigned _len);
|
||||
bool checkRecord(int _record) const;
|
||||
bool checkRecord(const char* _record, unsigned _len) const;
|
||||
~BloomFilter();
|
||||
private:
|
||||
unsigned length; //length of total bits, mod 8 == 0
|
||||
char *filter; //the bit space
|
||||
unsigned hfnum; //num of hash functions
|
||||
double rate; //false positive
|
||||
HashFunction* hfptr; //hash functions pointer array
|
||||
|
||||
void init();
|
||||
};
|
||||
|
||||
#endif //_UTIL_BLOOMFILTER_H
|
||||
|
200
Util/Bstr.cpp
200
Util/Bstr.cpp
|
@ -1,200 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: Bstr.cpp
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-16 13:18
|
||||
# Description: achieve functions in Bstr.h
|
||||
=============================================================================*/
|
||||
|
||||
#include "Bstr.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
//default construct function
|
||||
Bstr::Bstr()
|
||||
{
|
||||
this->length = 0;
|
||||
this->str = NULL;
|
||||
}
|
||||
|
||||
Bstr::Bstr(const char* _str, unsigned _len)
|
||||
{
|
||||
//WARN: if need a string .please add '\0' in your own!
|
||||
this->length = _len;
|
||||
//DEBUG:if copy memory?
|
||||
//this->str = _str; //not valid:const char* -> char*
|
||||
this->str = (char*)malloc(_len);
|
||||
memcpy(this->str, _str, sizeof(char) * _len);
|
||||
//this->str[_len]='\0';
|
||||
}
|
||||
|
||||
//Bstr::Bstr(char* _str, unsigned _len)
|
||||
//{
|
||||
// this->length = _len;
|
||||
// this->str = _str;
|
||||
//}
|
||||
|
||||
//copy construct function
|
||||
Bstr::Bstr(const Bstr& _bstr)
|
||||
{
|
||||
//DEBUG:if copy memory here
|
||||
this->length = _bstr.length;
|
||||
this->str = _bstr.str;
|
||||
}
|
||||
|
||||
//assign function for class
|
||||
//Bstr& Bstr::operate =(const Bstr& _bstr)
|
||||
//{
|
||||
// if(*this == _bstr)
|
||||
// return *this; //a=a
|
||||
// //WARN:not copy memory. if need to copy, delete original first!
|
||||
// this->length = _bstr.length;
|
||||
// this->str = _bstr.str;
|
||||
// return *this;
|
||||
//}
|
||||
|
||||
bool
|
||||
Bstr::operator > (const Bstr& _bstr)
|
||||
{
|
||||
int res = Util::compare(this->str, this->length, _bstr.str, _bstr.length);
|
||||
if(res == 1)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
Bstr::operator < (const Bstr& _bstr)
|
||||
{
|
||||
int res = Util::compare(this->str, this->length, _bstr.str, _bstr.length);
|
||||
if(res == -1)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
Bstr::operator == (const Bstr& _bstr)
|
||||
{
|
||||
int res = Util::compare(this->str, this->length, _bstr.str, _bstr.length);
|
||||
if(res == 0)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
Bstr::operator <= (const Bstr& _bstr)
|
||||
{
|
||||
int res = Util::compare(this->str, this->length, _bstr.str, _bstr.length);
|
||||
if(res <= 0)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
Bstr::operator >= (const Bstr& _bstr)
|
||||
{
|
||||
int res = Util::compare(this->str, this->length, _bstr.str, _bstr.length);
|
||||
if(res >= 0)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
Bstr::operator != (const Bstr& _bstr)
|
||||
{
|
||||
int res = Util::compare(this->str, this->length, _bstr.str, _bstr.length);
|
||||
if(res != 0)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned
|
||||
Bstr::getLen() const
|
||||
{
|
||||
return length;
|
||||
}
|
||||
|
||||
void
|
||||
Bstr::setLen(unsigned _len)
|
||||
{
|
||||
this->length = _len;
|
||||
}
|
||||
|
||||
char*
|
||||
Bstr::getStr() const
|
||||
{
|
||||
return str;
|
||||
}
|
||||
|
||||
void
|
||||
Bstr::setStr(char* _str)
|
||||
{
|
||||
this->str = _str;
|
||||
}
|
||||
|
||||
void
|
||||
Bstr::copy(const Bstr* _bp)
|
||||
{
|
||||
this->length = _bp->getLen();
|
||||
this->str = (char*)malloc(this->length);
|
||||
memcpy(this->str, _bp->getStr(), this->length);
|
||||
}
|
||||
|
||||
void
|
||||
Bstr::copy(const char* _str, unsigned _len)
|
||||
{
|
||||
this->length = _len;
|
||||
this->str = (char*)malloc(this->length);
|
||||
memcpy(this->str, _str, this->length);
|
||||
}
|
||||
|
||||
void
|
||||
Bstr::clear()
|
||||
{
|
||||
this->str = NULL;
|
||||
this->length = 0;
|
||||
}
|
||||
|
||||
void
|
||||
Bstr::release()
|
||||
{
|
||||
free(this->str); //ok to be null, do nothing
|
||||
clear();
|
||||
}
|
||||
|
||||
Bstr::~Bstr()
|
||||
{ //avoid mutiple delete
|
||||
release();
|
||||
}
|
||||
|
||||
void
|
||||
Bstr::print(string s) const
|
||||
{
|
||||
//TODO: add a new debug file in Util(maybe a total?)
|
||||
//#ifdef DEBUG
|
||||
// Util::showtime();
|
||||
// fputs("Class Bstr\n", Util::logsfp);
|
||||
// fputs("Message: ", Util::logsfp);
|
||||
// fputs(s.c_str(), Util::logsfp);
|
||||
// fputs("\n", Util::logsfp);
|
||||
// if(s == "BSTR")
|
||||
// { //total information, providing accurate debugging
|
||||
// fprintf(Util::logsfp, "length: %u\t the string is:\n", this->length);
|
||||
// unsigned i;
|
||||
// for(i = 0; i < this->length; ++i)
|
||||
// fputc(this->str[i], Util::logsfp);
|
||||
// fputs("\n", Util::logsfp);
|
||||
// }
|
||||
// else if(s == "bstr")
|
||||
// { //only length information, needed when string is very long
|
||||
// fprintf(Util::logsfp, "length: %u\n", this->length);
|
||||
// }
|
||||
// else;
|
||||
//#endif
|
||||
}
|
||||
|
53
Util/Bstr.h
53
Util/Bstr.h
|
@ -1,53 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: Bstr.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-16 13:01
|
||||
# Description:
|
||||
1. firstly written by liyouhuan, modified by zengli
|
||||
2. class declaration for Bstr(used to store arbitary string)
|
||||
=============================================================================*/
|
||||
|
||||
|
||||
#ifndef _UTIL_BSTR_H
|
||||
#define _UTIL_BSTR_H
|
||||
|
||||
#include "Util.h"
|
||||
|
||||
class Bstr
|
||||
{
|
||||
private:
|
||||
char* str; //pointers consume 8 byte in 64-bit system
|
||||
unsigned length;
|
||||
|
||||
public:
|
||||
Bstr();
|
||||
//if copy memory, then use const char*, but slow
|
||||
//else, can not use const char* -> char*
|
||||
Bstr(const char* _str, unsigned _len);
|
||||
//Bstr(char* _str, unsigned _len);
|
||||
Bstr(const Bstr& _bstr);
|
||||
//Bstr& operate = (const Bstr& _bstr);
|
||||
|
||||
bool operator > (const Bstr& _bstr);
|
||||
bool operator < (const Bstr& _bstr);
|
||||
bool operator == (const Bstr& _bstr);
|
||||
bool operator <= (const Bstr& _bstr);
|
||||
bool operator >= (const Bstr& _bstr);
|
||||
bool operator != (const Bstr& _bstr);
|
||||
unsigned getLen() const;
|
||||
void setLen(unsigned _len);
|
||||
char* getStr() const;
|
||||
void setStr(char* _str); //reuse a TBstr
|
||||
void release(); //release memory
|
||||
void clear(); //set str/length to 0
|
||||
void copy(const Bstr* _bp);
|
||||
void copy(const char* _str, unsigned _len);
|
||||
//bool read(FILE* _fp);
|
||||
//int write(FILE* _fp);
|
||||
~Bstr();
|
||||
void print(std::string s) const; //DEBUG
|
||||
};
|
||||
|
||||
#endif // _UTIL_BSTR_H
|
||||
|
532
Util/Stream.cpp
532
Util/Stream.cpp
|
@ -1,532 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: Stream.cpp
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-20 14:10
|
||||
# Description: achieve functions in Stream.h
|
||||
=============================================================================*/
|
||||
|
||||
#include "Stream.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
ResultCmp mycmp;
|
||||
|
||||
//DEBUG: error when using STL::sort() to sort the Bstr[] units with mycmp, null pointer(Bstr*)
|
||||
//reported sometimes(for example, watdiv_30.db and watdiv_200.db, query/C3.sql).
|
||||
//Notice that sort() uses quick-sorting method when size is large, which usually
|
||||
//performs faster than merge-sorting used by STL::stable_sort() which can ensures the order between same
|
||||
//value(only in the sorted column) units.
|
||||
//The error is marked by DEBUG1 and DEBUG2, and I just use STL::stable_sort() here, because I cannot find
|
||||
//the reason of the null pointer error if using STL::sort()
|
||||
|
||||
void
|
||||
Stream::init()
|
||||
{
|
||||
this->inMem = true;
|
||||
this->mode = -1;
|
||||
this->ansMem = NULL;
|
||||
this->ansDisk = NULL;
|
||||
this->rownum = this->colnum = 0;
|
||||
this->needSort = false;
|
||||
this->xpos = this->ypos = 0; //the 0-th pos is not used now
|
||||
this->record = NULL;
|
||||
this->record_size = NULL;
|
||||
this->space = 0;
|
||||
this->tempfp = NULL;
|
||||
}
|
||||
|
||||
Stream::Stream()
|
||||
{
|
||||
this->init();
|
||||
}
|
||||
|
||||
Stream::Stream(std::vector<int>& _keys, std::vector<bool>& _desc, unsigned _rownum, unsigned _colnum, bool _flag)
|
||||
{
|
||||
this->init();
|
||||
#ifdef DEBUG_STREAM
|
||||
printf("Stream:now to open stream\n");
|
||||
#endif
|
||||
|
||||
this->rownum = _rownum;
|
||||
this->colnum = _colnum;
|
||||
this->needSort = _flag;
|
||||
//this->cmp = ResultCmp(this->rownum, _keys);
|
||||
mycmp = ResultCmp(this->rownum, _keys, _desc);
|
||||
|
||||
this->record = new Bstr[this->colnum];
|
||||
this->record_size = new unsigned[this->colnum];
|
||||
for(unsigned i = 0; i < this->colnum; ++i)
|
||||
{
|
||||
this->record[i].setStr((char*)malloc(Util::TRANSFER_SIZE));
|
||||
this->record_size[i] = Util::TRANSFER_SIZE;
|
||||
}
|
||||
|
||||
this->mode = 0; //wait for writing records
|
||||
|
||||
int size = _rownum * _colnum * 100 / Util::GB;
|
||||
if(Util::memoryLeft() < size)
|
||||
{
|
||||
this->inMem = false;
|
||||
fprintf(stderr, "Stream: memory is not enough!\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "Stream: memory is enough!\n");
|
||||
}
|
||||
#ifdef DEBUG_STREAM
|
||||
fprintf(stderr, "Stream:after memory check!\n");
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_STREAM
|
||||
fprintf(stderr, "Stream::Stream() - basic information\n");
|
||||
fprintf(stderr, "rownum: %u\tcolnum: %u\n", this->rownum, this->colnum);
|
||||
if(this->needSort)
|
||||
{
|
||||
fprintf(stderr, "the result needs to be sorted, the keys are listed below:\n");
|
||||
for(vector<int>::iterator it = _keys.begin(); it != _keys.end(); ++it)
|
||||
fprintf(stderr, "%d\t", *it);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "the result needs not to be sorted!\n");
|
||||
}
|
||||
//WARN: this is just for debugging!
|
||||
//this->inMem = false;
|
||||
#endif
|
||||
|
||||
if(this->inMem)
|
||||
{
|
||||
this->ansMem = new Bstr*[this->rownum];
|
||||
for(unsigned i = 0; i < this->rownum; ++i)
|
||||
{
|
||||
this->ansMem[i] = new Bstr[this->colnum];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
//below are for disk
|
||||
if(!this->needSort) // in disk and need sort
|
||||
{
|
||||
string file_name = Util::tmp_path + Util::int2string(Util::get_cur_time());
|
||||
file_name += ".dat";
|
||||
#ifdef DEBUG_STREAM
|
||||
fprintf(stderr, "%s\n", file_name.c_str());
|
||||
#endif
|
||||
//FILE* fp = NULL;
|
||||
if((this->ansDisk = fopen(file_name.c_str(), "w+b")) == NULL)
|
||||
{
|
||||
fprintf(stderr, "Stream::Stream(): open error!\n");
|
||||
return;
|
||||
}
|
||||
this->result = file_name;
|
||||
}
|
||||
//return true;
|
||||
}
|
||||
|
||||
bool operator < (const Element& _a, const Element& _b)
|
||||
{
|
||||
return mycmp(_a.val, _b.val);
|
||||
}
|
||||
|
||||
bool operator > (const Element& _a, const Element& _b)
|
||||
{
|
||||
return !mycmp(_a.val, _b.val);
|
||||
}
|
||||
|
||||
bool
|
||||
Stream::copyToRecord(const char* _str, unsigned _len, unsigned _idx)
|
||||
{
|
||||
if(_idx >= this->colnum)
|
||||
{
|
||||
fprintf(stderr, "Stream::copyToRecord: index out of range!\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned length = _len;
|
||||
if(length + 1 > this->record_size[_idx])
|
||||
{
|
||||
this->record[_idx].release();
|
||||
this->record[_idx].setStr((char*)malloc((length + 1) * sizeof(char)));
|
||||
this->record_size[_idx] = length + 1; //one more byte: convenient to add \0
|
||||
}
|
||||
|
||||
memcpy(this->record[_idx].getStr(), _str, length);
|
||||
this->record[_idx].getStr()[length] = '\0'; //set for string() in KVstore
|
||||
this->record[_idx].setLen(length);
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
Stream::outputCache()
|
||||
{
|
||||
//DEBUG1
|
||||
//sort and output to file
|
||||
stable_sort(this->tempst.begin(), this->tempst.end(), mycmp);
|
||||
unsigned size = this->tempst.size();
|
||||
for(unsigned i = 0; i < size; ++i)
|
||||
{
|
||||
Bstr* p = this->tempst[i];
|
||||
for(unsigned j = 0; j < this->colnum; ++j)
|
||||
{
|
||||
unsigned len = p[j].getLen();
|
||||
char* str = p[j].getStr();
|
||||
fwrite(&len, sizeof(unsigned), 1, this->tempfp);
|
||||
fwrite(str, sizeof(char), len, this->tempfp);
|
||||
}
|
||||
delete[] p;
|
||||
}
|
||||
this->tempst.clear();
|
||||
|
||||
//reset and add to heap, waiting for merge sort
|
||||
fseek(this->tempfp, 0, SEEK_SET);
|
||||
Bstr* bp = new Bstr[this->colnum];
|
||||
|
||||
for(unsigned i = 0; i < this->colnum; ++i)
|
||||
{
|
||||
unsigned len;
|
||||
fread(&len, sizeof(unsigned), 1, this->tempfp);
|
||||
char* p = (char*)malloc(len * sizeof(char));
|
||||
fread(p, sizeof(char), len, this->tempfp);
|
||||
bp[i].setLen(len);
|
||||
bp[i].setStr(p);
|
||||
}
|
||||
this->sortHeap.push_back(Element(this->tempfp, bp));
|
||||
this->tempfp = NULL;
|
||||
this->space = 0;
|
||||
}
|
||||
|
||||
bool
|
||||
Stream::write(const char* _str, unsigned _len)
|
||||
{
|
||||
#ifdef DEBUG_PRECISE
|
||||
fprintf(stderr, "Stream::write(): the current column is %u\n", this->ypos);
|
||||
#endif
|
||||
this->copyToRecord(_str, _len, this->ypos);
|
||||
this->ypos++;
|
||||
if(this->ypos == this->colnum)
|
||||
{
|
||||
this->ypos = 0;
|
||||
#ifdef DEBUG_PRECISE
|
||||
fprintf(stderr, "Stream::write(): now a record is ready, the current row is %u\n", this->xpos);
|
||||
#endif
|
||||
return this->write(this->record);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Stream::write(const Bstr* _bp)
|
||||
{
|
||||
if(this->xpos >= this->rownum)
|
||||
{
|
||||
fprintf(stderr, "you should set the end now!\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if(this->inMem)
|
||||
{
|
||||
//Bstr** p = (Bstr**)this->ans;
|
||||
for(unsigned i = 0; i < this->colnum; ++i)
|
||||
{
|
||||
//this->ansMem[this->xpos][i].release();
|
||||
this->ansMem[this->xpos][i].copy(_bp + i);
|
||||
}
|
||||
this->xpos++;
|
||||
return true;
|
||||
}
|
||||
|
||||
//below are for disk
|
||||
if(needSort) //NOTICE:in disk and need sort
|
||||
{
|
||||
if(this->tempfp == NULL)
|
||||
{
|
||||
string name = Util::tmp_path + "stream_" + Util::int2string(this->files.size());
|
||||
//NOTICE:name derived from time maybe same
|
||||
//name = Util::tmp_path + Util::int2string(Util::get_cur_time());
|
||||
name += ".dat";
|
||||
#ifdef DEBUG_STREAM
|
||||
fprintf(stderr, "%s\n", name.c_str());
|
||||
#endif
|
||||
if((this->tempfp = fopen(name.c_str(), "w+b")) == NULL)
|
||||
{
|
||||
fprintf(stderr, "Stream::write(): open error!\n");
|
||||
return false;
|
||||
}
|
||||
this->files.push_back(name);
|
||||
}
|
||||
|
||||
Bstr* p = new Bstr[this->colnum];
|
||||
for(unsigned i = 0; i < this->colnum; ++i)
|
||||
{
|
||||
//p[i].release();
|
||||
p[i].copy(_bp + i);
|
||||
this->space += _bp->getLen();
|
||||
}
|
||||
this->space += sizeof(unsigned) * this->colnum;
|
||||
this->space += sizeof(char*) * this->colnum;
|
||||
this->tempst.push_back(p);
|
||||
this->xpos++;
|
||||
|
||||
if(this->space > Stream::BASE_MEMORY_LIMIT)
|
||||
{
|
||||
this->outputCache();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
//FILE* fp = (FILE*)(this->ans);
|
||||
for(unsigned i = 0; i < this->colnum; ++i)
|
||||
{
|
||||
unsigned len = _bp[i].getLen();
|
||||
const char* str = _bp[i].getStr();
|
||||
fwrite(&len, sizeof(unsigned), 1, this->ansDisk);
|
||||
fwrite(str, sizeof(char), len, this->ansDisk);
|
||||
}
|
||||
this->xpos++;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
const Bstr*
|
||||
Stream::read()
|
||||
{
|
||||
if(this->isEnd())
|
||||
{
|
||||
fprintf(stderr, "read to end now!\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(this->inMem)
|
||||
{
|
||||
//Bstr** bp = (Bstr**)(this->ans);
|
||||
Bstr* ip = this->ansMem[this->xpos];
|
||||
for(unsigned i = 0; i < this->colnum; ++i)
|
||||
{
|
||||
this->copyToRecord(ip[i].getStr(), ip[i].getLen(), i);
|
||||
//this->record[i].release();
|
||||
//unsigned len = ip[i].getLen();
|
||||
//char* s = (char*)calloc(len + 1, sizeof(char));
|
||||
//memcpy(s, ip[i].getStr(), len);
|
||||
//this->record[i].setLen(len);
|
||||
//this->record[i].setStr(s);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
//below are for disk, both needSort and not
|
||||
//FILE* fp = (FILE*)(this->ans);
|
||||
for(unsigned i = 0; i < this->colnum; ++i)
|
||||
{
|
||||
//BETTER:alloca and reuse the space in Bstr?
|
||||
unsigned len;
|
||||
fread(&len, sizeof(unsigned), 1, this->ansDisk);
|
||||
char* s = (char*)calloc(len + 1, sizeof(char));
|
||||
fread(s, sizeof(char), len, this->ansDisk);
|
||||
this->copyToRecord(s, len, i);
|
||||
}
|
||||
}
|
||||
this->xpos++;
|
||||
if(this->xpos == this->rownum)
|
||||
this->mode = 2;
|
||||
return this->record;
|
||||
|
||||
//if(feof((FILE*)this->fp))
|
||||
//return NULL; //indicate the end
|
||||
//unsigned len = 0;
|
||||
//fread(&len, sizeof(unsigned), 1, (FILE*)this->fp);
|
||||
//if(len + 1 > this->transfer_size)
|
||||
//{
|
||||
//transfer.release();
|
||||
//transfer.setStr((char*)malloc(len+1));
|
||||
//this->transfer_size = len + 1;
|
||||
//}
|
||||
//fread(transfer.getStr(), sizeof(char), len, (FILE*)this->fp);
|
||||
//transfer.getStr()[len] = '\0'; //set for string() in KVstore
|
||||
//transfer.setLen(len);
|
||||
//return &transfer;
|
||||
}
|
||||
|
||||
bool
|
||||
Stream::isEnd()
|
||||
{
|
||||
return this->mode == 2;
|
||||
}
|
||||
|
||||
//do multi-list merge sort using heap
|
||||
void
|
||||
Stream::mergeSort()
|
||||
{
|
||||
string file_name = Util::tmp_path + Util::int2string(Util::get_cur_time());
|
||||
file_name += ".dat";
|
||||
#ifdef DEBUG_STREAM
|
||||
fprintf(stderr, "%s\n", file_name.c_str());
|
||||
#endif
|
||||
//FILE* fp = NULL;
|
||||
if((this->ansDisk = fopen(file_name.c_str(), "w+b")) == NULL)
|
||||
{
|
||||
fprintf(stderr, "Stream::mergeSort: open error!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned valid = this->sortHeap.size();
|
||||
vector<Element>::iterator begin = this->sortHeap.begin();
|
||||
make_heap(begin, begin + valid, greater<Element>());
|
||||
while(valid > 0)
|
||||
{
|
||||
#ifdef DEBUG_STREAM
|
||||
fprintf(stderr, "valid: %u\n", valid);
|
||||
#endif
|
||||
//write contents of the first element to result file
|
||||
Bstr* bp = this->sortHeap[0].val;
|
||||
for(unsigned i = 0; i < this->colnum; ++i)
|
||||
{
|
||||
unsigned len = bp[i].getLen();
|
||||
char* s = bp[i].getStr();
|
||||
#ifdef DEBUG_STREAM
|
||||
fprintf(stderr, "top %u: %u\n", i, len);
|
||||
for(unsigned j = 0; j < len; ++j)
|
||||
fprintf(stderr, "%c", s[j]);
|
||||
fprintf(stderr, "\n");
|
||||
#endif
|
||||
fwrite(&len, sizeof(unsigned), 1, this->ansDisk);
|
||||
fwrite(s, sizeof(char), len, this->ansDisk);
|
||||
bp[i].release();
|
||||
}
|
||||
#ifdef DEBUG_STREAM
|
||||
fprintf(stderr, "\n");
|
||||
#endif
|
||||
|
||||
//pop, read and adjust
|
||||
pop_heap(begin, begin + valid, greater<Element>());
|
||||
bp = this->sortHeap[valid-1].val;
|
||||
bool tillEnd = false;
|
||||
for(unsigned i = 0; i < this->colnum; ++i)
|
||||
{
|
||||
unsigned len;
|
||||
char* s;
|
||||
FILE* tp = this->sortHeap[valid-1].fp;
|
||||
fread(&len, sizeof(unsigned), 1, tp);
|
||||
if(feof(tp))
|
||||
{
|
||||
this->sortHeap[valid-1].release();
|
||||
valid--;
|
||||
tillEnd = true;
|
||||
#ifdef DEBUG_STREAM
|
||||
fprintf(stderr, "now a stream file reaches its end!\n");
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
s = (char*)malloc(sizeof(char) * len);
|
||||
fread(s, sizeof(char), len, tp);
|
||||
bp[i].setLen(len);
|
||||
bp[i].setStr(s);
|
||||
}
|
||||
if(!tillEnd)
|
||||
push_heap(begin, begin + valid, greater<Element>());
|
||||
}
|
||||
|
||||
//fseek(fp, 0, SEEK_SET);
|
||||
//this->ans = fp;
|
||||
this->result = file_name;
|
||||
}
|
||||
|
||||
void
|
||||
Stream::setEnd()
|
||||
{
|
||||
if(this->mode == 1)
|
||||
{
|
||||
fprintf(stderr, "Stream::setEnd(): already in read mode!\n");
|
||||
this->xpos = 0;
|
||||
//FILE* fp = (FILE*)(this->ans);
|
||||
if(!this->inMem)
|
||||
fseek(this->ansDisk, 0, SEEK_SET);
|
||||
return;
|
||||
}
|
||||
|
||||
this->mode = 1; //wait for reading records
|
||||
this->xpos = 0;
|
||||
|
||||
#ifdef DEBUG_STREAM
|
||||
fprintf(stderr, "Stream::setEnd(): now is in read mode!\n");
|
||||
#endif
|
||||
|
||||
if(this->inMem)
|
||||
{
|
||||
//Bstr** p = (Bstr**)(this->ans);
|
||||
if(this->needSort)
|
||||
{
|
||||
//DEBUG2
|
||||
stable_sort(this->ansMem, this->ansMem + this->rownum, mycmp);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
//below are for disk
|
||||
if(this->needSort)
|
||||
{
|
||||
if(this->tempfp != NULL)
|
||||
{
|
||||
this->outputCache();
|
||||
}
|
||||
if(this->files.size() > 1)
|
||||
{
|
||||
#ifdef DEBUG_STREAM
|
||||
fprintf(stderr, "Stream::setEnd(): merge sort is needed here!\n");
|
||||
#endif
|
||||
//do multi-list merge sort using heap
|
||||
this->mergeSort();
|
||||
}
|
||||
else if(this->files.size() > 0) //==1
|
||||
{
|
||||
this->sortHeap[0].release();
|
||||
this->ansDisk = fopen(this->files[0].c_str(), "r+b");
|
||||
this->result = this->files[0];
|
||||
}
|
||||
}
|
||||
//FILE* fp = (FILE*)(this->ans);
|
||||
fseek(this->ansDisk, 0, SEEK_SET);
|
||||
}
|
||||
|
||||
Stream::~Stream()
|
||||
{
|
||||
delete[] this->record;
|
||||
delete[] this->record_size;
|
||||
#ifdef DEBUG_STREAM
|
||||
fprintf(stderr, "Stream::~Stream(): record deleted!\n");
|
||||
#endif
|
||||
|
||||
if(this->inMem)
|
||||
{
|
||||
//Bstr** bp = (Bstr**)(this->ans);
|
||||
for(unsigned i = 0; i < this->rownum; ++i)
|
||||
{
|
||||
delete[] this->ansMem[i];
|
||||
//bp[i] = NULL;
|
||||
}
|
||||
delete[] this->ansMem;
|
||||
#ifdef DEBUG_STREAM
|
||||
fprintf(stderr, "Stream::~Stream(): in memory, now table deleted!\n");
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
//below are for disk, both needSort and not
|
||||
//FILE* fp = (FILE*)(this->ans);
|
||||
fclose(this->ansDisk);
|
||||
|
||||
//remove files and result
|
||||
remove(this->result.c_str());
|
||||
for(vector<string>::iterator it = this->files.begin(); it != this->files.end(); ++it)
|
||||
remove((*it).c_str());
|
||||
#ifdef DEBUG_STREAM
|
||||
fprintf(stderr, "Stream::~Stream(): in disk, now all files removed!\n");
|
||||
#endif
|
||||
|
||||
//#ifdef DEBUG_PRECISE
|
||||
//printf("file is closed in Stream!\n");
|
||||
//#endif
|
||||
}
|
||||
|
158
Util/Stream.h
158
Util/Stream.h
|
@ -1,158 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: Stream.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-20 13:18
|
||||
# Description:
|
||||
1. stream buffer for medium results, store/write one record at a time
|
||||
2. dynamicly change method(memory/disk) according to the memory usage of system
|
||||
3. each Stream instance is asociated with one result-like object, and one file
|
||||
4. functions using this class should implement writeToStream... to operate on different records
|
||||
5. the records should be viewed as tables to deal with
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _UTIL_STREAM_H
|
||||
#define _UTIL_STREAM_H
|
||||
|
||||
#include "Util.h"
|
||||
#include "Bstr.h"
|
||||
|
||||
//BETTER: use vector instead of table with fixed size, no need to indicate the rownum first(colnum required)
|
||||
//It is really a question to use Bstr[] or string[] to store a record
|
||||
|
||||
//struct Stream::ResultEqual
|
||||
//{
|
||||
// int result_len;
|
||||
// ResultEqual(int _l):result_len(_l){}
|
||||
// bool operator() (Bstr* const& a, Bstr* const& b)
|
||||
// {
|
||||
// for (int i = 0; i < result_len; ++i)
|
||||
// {
|
||||
// if (a[i] != b[i])
|
||||
// return false;
|
||||
// }
|
||||
// return true;
|
||||
// }
|
||||
//};
|
||||
|
||||
struct ResultCmp
|
||||
{
|
||||
int result_len;
|
||||
std::vector<int> keys;
|
||||
std::vector<bool> desc;
|
||||
//ResultCmp(int _l):result_len(_l){}
|
||||
ResultCmp()
|
||||
{
|
||||
this->result_len = 0;
|
||||
}
|
||||
ResultCmp(int _l, std::vector<int>& _keys, std::vector<bool> &_desc)
|
||||
{
|
||||
this->result_len = _l;
|
||||
this->keys = std::vector<int>(_keys);
|
||||
this->desc = std::vector<bool>(_desc);
|
||||
}
|
||||
bool operator() (Bstr* const& a, Bstr* const& b)
|
||||
{
|
||||
//for(int i = 0; i < result_len; ++i)
|
||||
//{
|
||||
//if (a[i] != b[i])
|
||||
//return (a[i] < b[i]);
|
||||
//}
|
||||
unsigned size = this->keys.size();
|
||||
for(unsigned i = 0; i < size; ++i)
|
||||
{
|
||||
int t = this->keys[i];
|
||||
if(a[t] != b[t])
|
||||
{
|
||||
if (!this->desc[i])
|
||||
return (a[t] < b[t]);
|
||||
else
|
||||
return (a[t] > b[t]);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
//static ResultCmp mycmp;
|
||||
|
||||
typedef struct StreamElement
|
||||
{
|
||||
FILE* fp;
|
||||
Bstr* val;
|
||||
StreamElement(FILE* _fp, Bstr* _val)
|
||||
{
|
||||
this->fp = _fp;
|
||||
this->val = _val;
|
||||
}
|
||||
void release()
|
||||
{
|
||||
delete[] this->val;
|
||||
this->val = NULL;
|
||||
fclose(this->fp);
|
||||
this->fp = NULL;
|
||||
}
|
||||
}Element;
|
||||
|
||||
//static bool operator < (const Element& _a, const Element& _b);
|
||||
|
||||
//BETTER:use mmap part by part to get output
|
||||
|
||||
//NOTICE:new and delete the Stream when you use it to store a series of result
|
||||
//duplicates should not be considered here, because sort based on int-int is faster
|
||||
//(so easy to remove duplicates)
|
||||
//However, for 'order by', the string comparision is a must, which should be done here!
|
||||
//(maybe in memory , maybe internal-external)
|
||||
class Stream
|
||||
{
|
||||
private:
|
||||
//multi-way merge sort is used here to do the internal-external sort
|
||||
std::vector<Element> sortHeap;
|
||||
std::vector<std::string> files;
|
||||
FILE* tempfp;
|
||||
std::vector<Bstr*> tempst;
|
||||
unsigned space; //space used in disk for one file
|
||||
|
||||
//struct ResultCmp cmp;
|
||||
|
||||
//void* ans; //FILE* if in disk, Bstr** if in memory
|
||||
Bstr** ansMem;
|
||||
FILE* ansDisk;
|
||||
std::string result; //needed if stored in disk, to be removed later
|
||||
unsigned rownum, colnum;
|
||||
bool needSort;
|
||||
//std::vector<int> keys;
|
||||
int mode; //-1:invalid;0:only write;1:only read;2:read end
|
||||
bool inMem;
|
||||
//below are for record position
|
||||
unsigned xpos, ypos;
|
||||
Bstr* record; //one record for read, array of Bstrs
|
||||
unsigned* record_size;
|
||||
|
||||
void init();
|
||||
bool copyToRecord(const char* _str, unsigned _len, unsigned _idx);
|
||||
void outputCache();
|
||||
void mergeSort();
|
||||
|
||||
public:
|
||||
//NOTICE:max num of opened files is 1024 in Linux by default, but this is enough for a result
|
||||
//as large as 1T
|
||||
static const unsigned BASE_MEMORY_LIMIT = 1 << 30;
|
||||
|
||||
Stream();
|
||||
Stream(std::vector<int>& _keys, std::vector<bool>& _desc, unsigned _rownum, unsigned _colnum, bool _flag);
|
||||
|
||||
//read/write should be based on the unit of record
|
||||
|
||||
//NOTICE:this function copy/save the data, caller free the memory if needed
|
||||
bool write(const Bstr* _bp);
|
||||
bool write(const char* _str, unsigned _len);
|
||||
//NOTICE:the memory should not be freed by user, and the latter will flush the former!
|
||||
const Bstr* read();
|
||||
void setEnd();
|
||||
bool isEnd();
|
||||
~Stream();
|
||||
};
|
||||
|
||||
#endif //_UTIL_STREAM_H
|
||||
|
1199
Util/Util.cpp
1199
Util/Util.cpp
File diff suppressed because it is too large
Load Diff
228
Util/Util.h
228
Util/Util.h
|
@ -1,228 +0,0 @@
|
|||
/*=============================================================================
|
||||
# Filename: Util.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-16 10:43
|
||||
# Description:
|
||||
1. firstly written by liyouhuan, modified by zengli
|
||||
2. common macros, functions, classes, etc
|
||||
# Notice: we only talk about sub-graph isomorphism in the essay, however, in
|
||||
# this system, the homomorphism is supported.(which means that multiple variables
|
||||
in the sparql query can point to the same node in data graph)
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _UTIL_UTIL_H
|
||||
#define _UTIL_UTIL_H
|
||||
|
||||
/* basic macros and types are defined here, including common headers */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
#include <dirent.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <ctype.h>
|
||||
#include <time.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <regex.h>
|
||||
#include <locale.h>
|
||||
#include <assert.h>
|
||||
#include <libgen.h>
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/file.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <netdb.h>
|
||||
#include <arpa/inet.h>
|
||||
|
||||
//NOTICE:below are restricted to C++, C files should not include(maybe nested) this header!
|
||||
#include <bitset>
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <stack>
|
||||
#include <queue>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <iterator>
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
|
||||
//NOTICE:below are libraries need to link
|
||||
#include <math.h>
|
||||
#include <readline/readline.h>
|
||||
#include <readline/history.h>
|
||||
|
||||
#define STREAM_ON 1
|
||||
#define READLINE_ON 1
|
||||
#define MULTI_INDEX 1
|
||||
//#define SO2P 1
|
||||
|
||||
//indicate that in debug mode
|
||||
//#define DEBUG_STREAM
|
||||
//#define DEBUG_PRECISE 1 all information
|
||||
//#define DEBUG_KVSTORE 1 //in KVstore
|
||||
//#define DEBUG_VSTREE 1 //in Database
|
||||
//#define DEBUG_DATABASE 1 //in Database
|
||||
#define DEBUG_JOIN
|
||||
|
||||
#ifdef DEBUG_PRECISE
|
||||
#ifndef DEBUG
|
||||
#define DEBUG
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_KVSTORE
|
||||
#ifndef DEBUG
|
||||
#define DEBUG
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_VSTREE
|
||||
#ifndef DEBUG
|
||||
#define DEBUG
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_DATABASE
|
||||
#ifndef DEBUG
|
||||
#define DEBUG
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_JOIN
|
||||
#ifndef DEBUG
|
||||
#define DEBUG
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef DEBUG
|
||||
//#define DEBUG
|
||||
#endif
|
||||
|
||||
#define xfree(x) free(x); x = NULL;
|
||||
|
||||
//NOTICE:include Util.h and below in each main function
|
||||
//(the beginning position)
|
||||
//#ifdef DEBUG
|
||||
// Util util;
|
||||
//#endif
|
||||
|
||||
typedef unsigned(*HashFunction)(const char*);
|
||||
//NOTICE:hash functions for int are not so many, so we represent int by a 4-byte stringinstead
|
||||
//(not totally change int to string, which is costly)
|
||||
//http://www.cppblog.com/aurain/archive/2010/07/06/119463.html
|
||||
//http://blog.csdn.net/mycomputerxiaomei/article/details/7641221
|
||||
//http://kb.cnblogs.com/page/189480/
|
||||
|
||||
/******** all static&universal constants and fucntions ********/
|
||||
class Util
|
||||
{
|
||||
public:
|
||||
static int triple_num;
|
||||
static int pre_num;
|
||||
static int entity_num;
|
||||
static int literal_num;
|
||||
|
||||
static const unsigned MB = 1048576;
|
||||
static const unsigned GB = 1073741824;
|
||||
static const int TRIPLE_NUM_MAX = 1000*1000*1000;
|
||||
static const char EDGE_IN = 'i';
|
||||
static const char EDGE_OUT= 'o';
|
||||
//In order to differentiate the sub-part and literal-part of object
|
||||
//let subid begin with 0, while literalid begins with LITERAL_FIRST_ID
|
||||
//used in Database and Join
|
||||
static const int LITERAL_FIRST_ID = 1000*1000*1000;
|
||||
//initial transfer buffer size in Tree/ and Stream/
|
||||
static const unsigned TRANSFER_SIZE = 1 << 20; //1M
|
||||
|
||||
static std::string db_home;
|
||||
static std::string tmp_path;
|
||||
// this are for debugging
|
||||
//to build logs-system, each class: print() in time
|
||||
static std::string debug_path;
|
||||
static FILE* debug_kvstore;
|
||||
static FILE* debug_database;
|
||||
static FILE* debug_vstree;
|
||||
|
||||
static int memUsedPercentage();
|
||||
static int memoryLeft();
|
||||
static int compare(const char* _str1, unsigned _len1, const char* _str2, unsigned _len2); //QUERY(how to use default args)
|
||||
static int string2int(std::string s);
|
||||
static std::string int2string(long n);
|
||||
//string2str: s.c_str()
|
||||
//str2string: string(str)
|
||||
static std::string showtime();
|
||||
static int cmp_int(const void* _i1, const void* _i2);
|
||||
static void sort(int*& _id_list, int _list_len);
|
||||
static int bsearch_int_uporder(int _key, const int* _array,int _array_num);
|
||||
static bool bsearch_preid_uporder(int _preid, int* _pair_idlist, int _list_len);
|
||||
static int bsearch_vec_uporder(int _key, const std::vector<int>* _vec);
|
||||
static std::string result_id_str(std::vector<int*>& _v, int _var_num);
|
||||
static bool dir_exist(const std::string _dir);
|
||||
static bool create_dir(const std:: string _dir);
|
||||
static long get_cur_time();
|
||||
static bool save_to_file(const char*, const std::string _content);
|
||||
|
||||
static bool is_literal_ele(int);
|
||||
static int removeDuplicate(int*, int);
|
||||
static std::string getQueryFromFile(const char* _file_path);
|
||||
static std::string getSystemOutput(std::string cmd);
|
||||
static std::string getExactPath(const char* path);
|
||||
static std::string getItemsFromDir(std::string path);
|
||||
static void logging(std::string _str);
|
||||
|
||||
// Below are some useful hash functions for string
|
||||
static unsigned simpleHash(const char *_str);
|
||||
static unsigned APHash(const char *_str);
|
||||
static unsigned BKDRHash(const char *_str);
|
||||
static unsigned DJBHash(const char *_str);
|
||||
static unsigned ELFHash(const char *_str);
|
||||
static unsigned DEKHash(const char* _str);
|
||||
static unsigned BPHash(const char* _str);
|
||||
static unsigned FNVHash(const char* _str);
|
||||
static unsigned HFLPHash(const char* _str);
|
||||
static unsigned HFHash(const char* _str);
|
||||
static unsigned JSHash(const char *_str);
|
||||
static unsigned PJWHash(const char *_str);
|
||||
static unsigned RSHash(const char *_str);
|
||||
static unsigned SDBMHash(const char *_str);
|
||||
static unsigned StrHash(const char* _str);
|
||||
static unsigned TianlHash(const char* _str);
|
||||
|
||||
static const unsigned HashNum = 16;
|
||||
static HashFunction hash[];
|
||||
|
||||
static double logarithm(double _a, double _b);
|
||||
static void intersect(int*& _id_list, int& _id_list_len, const int* _list1, int _len1, const int* _list2, int _len2);
|
||||
|
||||
static char* l_trim(char * szOutput, const char *szInput);
|
||||
static char* r_trim(char *szOutput, const char *szInput);
|
||||
static char* a_trim(char * szOutput, const char * szInput);
|
||||
|
||||
//NOTICE: this function must be called at the beginning of executing!
|
||||
Util();
|
||||
~Util();
|
||||
static std::string profile;
|
||||
static bool configure(); //read init.conf and set the parameters for this system
|
||||
static bool config_setting();
|
||||
static bool config_advanced();
|
||||
static bool config_debug();
|
||||
static bool gStore_mode;
|
||||
|
||||
};
|
||||
|
||||
#endif //_UTIL_UTIL_H
|
||||
|
|
@ -6,8 +6,8 @@
|
|||
*/
|
||||
|
||||
#include"EntryBuffer.h"
|
||||
|
||||
using namespace std;
|
||||
#include"../Signature/SigEntry.h"
|
||||
#include"../Database/Database.h"
|
||||
|
||||
int EntryBuffer::DEFAULT_CAPACITY = 2*1000*1000;
|
||||
|
||||
|
|
|
@ -8,10 +8,8 @@
|
|||
#ifndef ENTRYBUFFER_H_
|
||||
#define ENTRYBUFFER_H_
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Signature/SigEntry.h"
|
||||
|
||||
//class SigEntry;
|
||||
class SigEntry;
|
||||
#include<stdio.h>
|
||||
|
||||
/* EntryBuffer is used to loading entries from hard disk when building VSTree. */
|
||||
class EntryBuffer
|
||||
|
|
|
@ -5,10 +5,11 @@
|
|||
* Author: hanshuo
|
||||
*/
|
||||
|
||||
#include "LRUCache.h"
|
||||
#include "VNode.h"
|
||||
|
||||
using namespace std;
|
||||
#include"LRUCache.h"
|
||||
#include"VNode.h"
|
||||
#include"../Database/Database.h"
|
||||
#include<stdio.h>
|
||||
#include<algorithm>
|
||||
|
||||
int LRUCache::DEFAULT_CAPACITY = 1*1000*1000;
|
||||
|
||||
|
@ -23,12 +24,6 @@ LRUCache::LRUCache(int _capacity)
|
|||
this->prev = new int[this->capacity + 2];
|
||||
this->keys = new int[this->capacity + 2];
|
||||
this->values = new VNode*[this->capacity + 2];
|
||||
|
||||
for(int i = 0; i < this->capacity + 2; ++i)
|
||||
{
|
||||
this->values[i] = NULL;
|
||||
}
|
||||
|
||||
this->next[LRUCache::START_INDEX] = LRUCache::END_INDEX;
|
||||
this->next[LRUCache::END_INDEX] = LRUCache::NULL_INDEX;
|
||||
this->prev[LRUCache::START_INDEX] = LRUCache::NULL_INDEX;
|
||||
|
@ -42,10 +37,6 @@ LRUCache::~LRUCache()
|
|||
delete []this->next;
|
||||
delete []this->prev;
|
||||
delete []this->keys;
|
||||
for(int i = 0; i < this->size; ++i)
|
||||
{
|
||||
delete this->values[i];
|
||||
}
|
||||
delete []this->values;
|
||||
}
|
||||
|
||||
|
@ -95,7 +86,7 @@ bool LRUCache::loadCache(string _filePath)
|
|||
{
|
||||
stringstream _ss;
|
||||
_ss << "error file line: " << _tmp_cycle_count << " " << nodePtr->getFileLine() << " " << nodePtr->getChildNum() << endl;
|
||||
Util::logging(_ss.str());
|
||||
Database::log(_ss.str());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -273,7 +264,7 @@ void LRUCache:: freeElem(int _pos)
|
|||
}
|
||||
|
||||
/* set the memory of the _pos element in cache */
|
||||
void LRUCache::setElem(int _pos, int _key, VNode* _value)
|
||||
void LRUCache:: setElem(int _pos, int _key, VNode* _value)
|
||||
{
|
||||
this->key2pos[_key] = _pos;
|
||||
this->keys[_pos] = _key;
|
||||
|
@ -286,8 +277,8 @@ void LRUCache::setElem(int _pos, int _key, VNode* _value)
|
|||
this->prev[nextPos] = _pos;
|
||||
this->next[_pos] = LRUCache::END_INDEX;
|
||||
this->prev[_pos] = prevPos;
|
||||
//NOTICE: this cannot be placed in loadCache() because this may be called by other functions
|
||||
this->size++;
|
||||
|
||||
this->size ++;
|
||||
}
|
||||
|
||||
/* just write the values[_pos] to the hard disk, the VNode in memory will not be free. */
|
||||
|
@ -315,7 +306,7 @@ bool LRUCache::writeOut(int _pos, int _fileLine)
|
|||
int line = _fileLine == -1 ? nodePtr->getFileLine() : _fileLine;
|
||||
size_t vNodeSize = sizeof(VNode);
|
||||
int flag = 0;
|
||||
long long seekPos = (long long)line * vNodeSize;
|
||||
int seekPos = (long long)line * vNodeSize;
|
||||
|
||||
flag = fseek(filePtr, seekPos, SEEK_SET);
|
||||
|
||||
|
@ -364,7 +355,7 @@ bool LRUCache::readIn(int _pos, int _fileLine)
|
|||
return false;
|
||||
}
|
||||
|
||||
//bool is_node_read = (fread((char *)nodePtr,vNodeSize,1,filePtr) == 1);
|
||||
bool is_node_read = (fread((char *)nodePtr,vNodeSize,1,filePtr) == 1);
|
||||
fclose(filePtr);
|
||||
|
||||
if (nodePtr == NULL || nodePtr->getFileLine() != _fileLine)
|
||||
|
@ -403,7 +394,7 @@ bool LRUCache::flush()
|
|||
{
|
||||
stringstream _ss;
|
||||
_ss << "line error at !!!" << line << " " << nodePtr->getFileLine() << endl;
|
||||
Util::logging(_ss.str());
|
||||
Database::log(_ss.str());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,16 +1,15 @@
|
|||
/*=============================================================================
|
||||
# Filename: LRUCache.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: zengli-bookug@pku.edu.cn
|
||||
# Last Modified: 2016-04-11 14:05
|
||||
# Description: written by hanshuo
|
||||
=============================================================================*/
|
||||
/*
|
||||
* LRUCache.h
|
||||
*
|
||||
* Created on: 2014-6-30
|
||||
* Author: hanshuo
|
||||
*/
|
||||
|
||||
#ifndef LRUCACHE_H_
|
||||
#define LRUCACHE_H_
|
||||
|
||||
#include "../Util/Util.h"
|
||||
|
||||
#include<map>
|
||||
#include<string>
|
||||
class VNode;
|
||||
|
||||
// before using the cache, you must loadCache or createCache.
|
||||
|
|
|
@ -1,20 +1,21 @@
|
|||
/*=============================================================================
|
||||
# Filename: VNode.cpp
|
||||
# Author: Bookug Lobert
|
||||
# Mail: zengli-bookug@pku.edu.cn
|
||||
# Last Modified: 2016-04-11 14:08
|
||||
# Description: by liyouhuan and hanshuo
|
||||
=============================================================================*/
|
||||
/*
|
||||
* VNode.cpp
|
||||
*
|
||||
* Created on: 2014-6-20
|
||||
* Author: liyouhuan
|
||||
* Implement on: 2014-7-2
|
||||
* Author: hanshuo
|
||||
*/
|
||||
|
||||
#include "VNode.h"
|
||||
|
||||
using namespace std;
|
||||
#include"VNode.h"
|
||||
#include<iostream>
|
||||
|
||||
VNode::VNode()
|
||||
{
|
||||
this->is_leaf = false;
|
||||
this->is_root = false;
|
||||
this->child_num = 0;
|
||||
// the following three lines are unnecessary.
|
||||
this->self_file_line = -1;
|
||||
this->father_file_line = -1;
|
||||
for(int i = 0; i < VNode::MAX_CHILD_NUM; i ++)
|
||||
|
@ -115,8 +116,8 @@ void VNode::setChildEntry(int _i, const SigEntry _entry)
|
|||
// {
|
||||
// if (this->getFileLine() == 0 && this->getChildFileLine(_i) == 153)
|
||||
// {
|
||||
// Util::logging("set node 0's child node 153's entry:");
|
||||
// Util::logging(Signature::BitSet2str(this->child_entries[_i].getEntitySig().entityBitSet));
|
||||
// Database::log("set node 0's child node 153's entry:");
|
||||
// Database::log(Signature::BitSet2str(this->child_entries[_i].getEntitySig().entityBitSet));
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
|
|
@ -1,25 +1,22 @@
|
|||
/*=============================================================================
|
||||
# Filename: VNode.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: zengli-bookug@pku.edu.cn
|
||||
# Last Modified: 2016-04-11 14:05
|
||||
# Description: written by liyouhuan
|
||||
=============================================================================*/
|
||||
/*
|
||||
* VNode.h
|
||||
*
|
||||
* Created on: 2014-6-20
|
||||
* Author: liyouhuan
|
||||
*/
|
||||
|
||||
#ifndef _VSTREE_VNODE_H
|
||||
#define _VSTREE_VNODE_H
|
||||
#ifndef VNODE_H_
|
||||
#define VNODE_H_
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include <vector>
|
||||
#include "../Signature/SigEntry.h"
|
||||
#include "LRUCache.h"
|
||||
#include"LRUCache.h"
|
||||
#include<sstream>
|
||||
|
||||
class VNode
|
||||
{
|
||||
class VNode{
|
||||
public:
|
||||
static const int MAX_CHILD_NUM = 200;
|
||||
//static const int MAX_CHILD_NUM = 151;
|
||||
static const int MIN_CHILD_NUM = 100;
|
||||
//static const int MIN_CHILD_NUM = 60;
|
||||
static const int MAX_CHILD_NUM = 151;
|
||||
static const int MIN_CHILD_NUM = 60;
|
||||
|
||||
//debug
|
||||
// static const int MAX_CHILD_NUM = 50;
|
||||
|
@ -69,10 +66,9 @@ private:
|
|||
int self_file_line;
|
||||
int father_file_line;
|
||||
SigEntry entry;
|
||||
//BETTER:is this necessary? too much memory?
|
||||
SigEntry child_entries[VNode::MAX_CHILD_NUM];
|
||||
int child_file_lines[VNode::MAX_CHILD_NUM];
|
||||
};
|
||||
|
||||
#endif // _VSTREE_VNODE_H
|
||||
|
||||
#endif /* VNODE_H_ */
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,57 +1,55 @@
|
|||
/*=============================================================================
|
||||
# Filename: VSTree.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-11-01 13:02
|
||||
# Description: originally written by liyouhuan, modified by zengli
|
||||
=============================================================================*/
|
||||
/*
|
||||
* VSTREE.h
|
||||
*
|
||||
* Created on: 2014-6-20
|
||||
* Author: liyouhuan
|
||||
*/
|
||||
|
||||
#ifndef _VSTREE_VSTREE_H
|
||||
#define _VSTREE_VSTREE_H
|
||||
#ifndef VSTREE_H_
|
||||
#define VSTREE_H_
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Query/SPARQLquery.h"
|
||||
#include "VNode.h"
|
||||
#include "LRUCache.h"
|
||||
#include "EntryBuffer.h"
|
||||
#include<string>
|
||||
#include<map>
|
||||
#include"VNode.h"
|
||||
#include"LRUCache.h"
|
||||
#include"EntryBuffer.h"
|
||||
#include"../Query/SPARQLquery.h"
|
||||
|
||||
class VSTree
|
||||
{
|
||||
class VSTree{
|
||||
friend class VNode;
|
||||
public:
|
||||
VSTree(std::string _store_path);
|
||||
~VSTree();
|
||||
int getHeight()const;
|
||||
//build the VSTree from the _entity_signature_file.
|
||||
/* build the VSTree from the _entity_signature_file. */
|
||||
bool buildTree(std::string _entity_signature_file);
|
||||
bool deleteTree();
|
||||
|
||||
//Incrementally update bitset of _entity_id conduct OR operation on Entry(_entity_id)'s EntityBitSet with _bitset
|
||||
//Entry of _entity_id must exists
|
||||
/* Incrementally update bitset of _entity_id
|
||||
* conduct OR operation on Entry(_entity_id)'s EntityBitSet with _bitset
|
||||
* Entry of _entity_id must exists */
|
||||
bool updateEntry(int _entity_id, const EntityBitSet& _bitset);
|
||||
|
||||
//Replace the Entry(_enitty_id)'s EntityBitSet with _bitset Entry of _entity_id must exists
|
||||
/* Replace the Entry(_enitty_id)'s EntityBitSet with _bitset
|
||||
* Entry of _entity_id must exists */
|
||||
bool replaceEntry(int _entity_id, const EntityBitSet& _bitset);
|
||||
|
||||
//insert an new Entry, whose entity doesn't exist before
|
||||
/* insert an new Entry, whose entity doesn't exist before */
|
||||
bool insertEntry(const SigEntry& _entry);
|
||||
|
||||
//remove an existed Entry(_entity_id) from VSTree
|
||||
/* remove an existed Entry(_entity_id) from VSTree */
|
||||
bool removeEntry(int _entity_id);
|
||||
|
||||
//save the tree information to tree_info_file_path, and flush the tree nodes in memory to tree_node_file_path.
|
||||
/* save the tree information to tree_info_file_path, and flush the tree nodes in memory to tree_node_file_path. */
|
||||
bool saveTree();
|
||||
//load tree from tree_info_file_path and tree_node_file_path files.
|
||||
/* load tree from tree_info_file_path and tree_node_file_path files. */
|
||||
bool loadTree();
|
||||
//get the tree's root node pointer.
|
||||
/* get the tree's root node pointer. */
|
||||
VNode* getRoot();
|
||||
//get the node pointer by its file line.
|
||||
/* get the node pointer by its file line. */
|
||||
VNode* getNode(int _line);
|
||||
//retrieve candidate result set by the var_sig in the _query.
|
||||
/* retrieve candidate result set by the var_sig in the _query. */
|
||||
void retrieve(SPARQLquery& _query);
|
||||
//retrieve the candidate entity ID which signature can cover the_entity_bit_set, and add them to the _p_id_list.
|
||||
void retrieveEntity(const EntityBitSet& _entity_bit_set, IDList* _p_id_list);
|
||||
|
||||
private:
|
||||
int root_file_line;
|
||||
int node_num;
|
||||
|
@ -64,29 +62,32 @@ private:
|
|||
static std::string tree_node_file_path;
|
||||
static std::string tree_info_file_path;
|
||||
|
||||
//choose the best leaf node to insert the _entry, return the choosed leaf node's pointer.
|
||||
/* choose the best leaf node to insert the _entry, return the choosed leaf node's pointer. */
|
||||
VNode* chooseNode(VNode* _p_node, const SigEntry& _entry);
|
||||
//split the _p_full_node to two new node when it is full.
|
||||
//the parameter _insert_entry and _p_insert_node are the entry/node
|
||||
//need to be insert to the _p_full_node.
|
||||
/* split the _p_full_node to two new node when it is full.
|
||||
* the parameter _insert_entry and _p_insert_node are the entry/node
|
||||
* need to be insert to the _p_full_node.
|
||||
*/
|
||||
void split(VNode* _p_full_node, const SigEntry& _insert_entry, VNode* _p_insert_node);
|
||||
//create a new node when one node need splitting.
|
||||
/* create a new node when one node need splitting. */
|
||||
VNode* createNode();
|
||||
//swap two nodes' file line, their related nodes(father and children nodes) will also be updated.
|
||||
/* swap two nodes' file line, their related nodes(father and children nodes) will also be updated. */
|
||||
void swapNodeFileLine(VNode* _p_node_a, VNode* _p_node_b);
|
||||
//save VSTree's information to tree_info_file_path, such as node_num, entry_num, height, etc.
|
||||
/* save VSTree's information to tree_info_file_path, such as node_num, entry_num, height, etc. */
|
||||
bool saveTreeInfo();
|
||||
//load VSTree's information from tree_info_file_path.
|
||||
/* load VSTree's information from tree_info_file_path. */
|
||||
bool loadTreeInfo();
|
||||
//traverse the tree_node_file_path file, load the mapping from entity id to file line.
|
||||
/* traverse the tree_node_file_path file, load the mapping from entity id to file line. */
|
||||
bool loadEntityID2FileLineMap();
|
||||
//update the entityID2FileLineMap with the _p_node's child entries, the _p_node should be leaf node.
|
||||
/* update the entityID2FileLineMap with the _p_node's child entries, the _p_node should be leaf node. */
|
||||
void updateEntityID2FileLineMap(VNode* _p_node);
|
||||
//get the leaf node pointer by the given _entityID
|
||||
/* get the leaf node pointer by the given _entityID */
|
||||
VNode* getLeafNodeByEntityID(int _entityID);
|
||||
/* retrieve the candidate entity ID which signature can cover the_entity_bit_set, and add them to the _p_id_list. */
|
||||
void retrieveEntity(const EntityBitSet& _entity_bit_set, IDList* _p_id_list);
|
||||
|
||||
std::string to_str();
|
||||
};
|
||||
|
||||
#endif // _VSTREE_VSTREE_H
|
||||
|
||||
#endif /* VSTREE_H_ */
|
||||
|
|
|
@ -1,5 +0,0 @@
|
|||
*.a
|
||||
*.o
|
||||
*.class
|
||||
*.jar
|
||||
|
|
@ -1,14 +1,13 @@
|
|||
/*=============================================================================
|
||||
# Filename: CppAPIExample.cpp
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2016-02-21 21:32
|
||||
# Description: originally written by hanshuo, modified by zengli
|
||||
=============================================================================*/
|
||||
/*
|
||||
* CppAPIExample.cpp
|
||||
*
|
||||
* Created on: 2014-11-4
|
||||
* Author: hanshuo
|
||||
*/
|
||||
|
||||
#include "GstoreConnector.h"
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include"GstoreConnector.h"
|
||||
#include<string>
|
||||
#include<iostream>
|
||||
|
||||
// before run this example, you must start up the GStore server at first (use command ./gserver).
|
||||
int main(int argc, char * argv[])
|
||||
|
@ -18,30 +17,31 @@ int main(int argc, char * argv[])
|
|||
|
||||
// build a new database by a RDF file.
|
||||
// note that the relative path is related to gserver.
|
||||
gc.build("LUBM10.db", "data/LUBM_10.n3");
|
||||
gc.build("db_LUBM10", "example/rdf_triple/LUBM_10_GStore.n3");
|
||||
|
||||
// then you can execute SPARQL query on this database.
|
||||
std::string sparql = "select ?x where \
|
||||
{ \
|
||||
?x <rdf:type> <ub:UndergraduateStudent>. \
|
||||
?y <ub:name> <Course1>. \
|
||||
?x <ub:takesCourse> ?y. \
|
||||
?z <ub:teacherOf> ?y. \
|
||||
?z <ub:name> <FullProfessor1>. \
|
||||
?z <ub:worksFor> ?w. \
|
||||
?w <ub:name> <Department0>. \
|
||||
?x rdf:type <ub:UndergraduateStudent>. \
|
||||
?y ub:name <Course1>. \
|
||||
?x ub:takesCourse ?y. \
|
||||
?z ub:teacherOf ?y. \
|
||||
?z ub:name <FullProfessor1>. \
|
||||
?z ub:worksFor ?w. \
|
||||
?w ub:name <Department0>. \
|
||||
}";
|
||||
std::string answer = gc.query(sparql);
|
||||
std::cout << answer << std::endl;
|
||||
|
||||
// unload this database.
|
||||
gc.unload("LUBM10.db");
|
||||
gc.unload("db_LUBM10");
|
||||
|
||||
// also, you can load some exist database directly and then query.
|
||||
gc.load("LUBM10.db");
|
||||
gc.load("db_LUBM10");
|
||||
answer = gc.query(sparql);
|
||||
std::cout << answer << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
*
|
||||
!.gitignore
|
||||
|
|
@ -1,27 +1,19 @@
|
|||
/*=============================================================================
|
||||
# Filename: GstoreConnector.cpp
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2016-02-21 21:24
|
||||
# Description: achieve functions in GstoreConnector.h
|
||||
=============================================================================*/
|
||||
/*
|
||||
* GstoreConnector.cpp
|
||||
*
|
||||
* Created on: 2014-11-1
|
||||
* Author: hanshuo
|
||||
*/
|
||||
|
||||
#include "GstoreConnector.h"
|
||||
#include <iostream>
|
||||
#include"GstoreConnector.h"
|
||||
#include<iostream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
string GstoreConnector::defaultServerIP = "127.0.0.1";
|
||||
std::string GstoreConnector::defaultServerIP = "127.0.0.1";
|
||||
unsigned short GstoreConnector::defaultServerPort = 3305;
|
||||
|
||||
GstoreConnector::GstoreConnector()
|
||||
{
|
||||
this->serverIP = GstoreConnector::defaultServerIP;
|
||||
}
|
||||
|
||||
GstoreConnector::GstoreConnector(string _ip)
|
||||
{
|
||||
this->serverIP = _ip;
|
||||
this->serverPort = GstoreConnector::defaultServerPort;
|
||||
}
|
||||
|
||||
|
@ -31,44 +23,40 @@ GstoreConnector::GstoreConnector(unsigned short _port)
|
|||
this->serverPort = _port;
|
||||
}
|
||||
|
||||
GstoreConnector::GstoreConnector(string _ip, unsigned short _port)
|
||||
GstoreConnector::GstoreConnector(std::string _ip, unsigned short _port)
|
||||
{
|
||||
if(_ip == "localhost")
|
||||
this->serverIP = "127.0.0.1";
|
||||
else
|
||||
this->serverIP = _ip;
|
||||
this->serverIP = _ip;
|
||||
this->serverPort = _port;
|
||||
}
|
||||
|
||||
GstoreConnector::~GstoreConnector()
|
||||
{
|
||||
this->disconnect();
|
||||
|
||||
}
|
||||
|
||||
bool
|
||||
GstoreConnector::load(string _db_name)
|
||||
bool GstoreConnector::load(std::string _db_name)
|
||||
{
|
||||
bool connect_return = this->connect();
|
||||
if (!connect_return)
|
||||
{
|
||||
cerr << "connect to server error. @GstoreConnector::load" << endl;
|
||||
std::cerr << "connect to server error. @GstoreConnector::load" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
string cmd = "load " + _db_name;
|
||||
std::string cmd = "load " + _db_name;
|
||||
bool send_return = this->socket.send(cmd);
|
||||
if (!send_return)
|
||||
{
|
||||
cerr << "send load command error. @GstoreConnector.load" << endl;
|
||||
std::cerr << "send load command error. @GstoreConnector.load" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
string recv_msg;
|
||||
std::string recv_msg;
|
||||
this->socket.recv(recv_msg);
|
||||
|
||||
this->disconnect();
|
||||
|
||||
cout << recv_msg << endl; //debug
|
||||
std::cout << recv_msg << std::endl; //debug
|
||||
if (recv_msg == "load database done.")
|
||||
{
|
||||
return true;
|
||||
|
@ -77,30 +65,29 @@ GstoreConnector::load(string _db_name)
|
|||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
GstoreConnector::unload(string _db_name)
|
||||
bool GstoreConnector::unload(std::string _db_name)
|
||||
{
|
||||
bool connect_return = this->connect();
|
||||
if (!connect_return)
|
||||
{
|
||||
cerr << "connect to server error. @GstoreConnector::unload" << endl;
|
||||
std::cerr << "connect to server error. @GstoreConnector::unload" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
string cmd = "unload " + _db_name;
|
||||
std::string cmd = "unload " + _db_name;
|
||||
bool send_return = this->socket.send(cmd);
|
||||
if (!send_return)
|
||||
{
|
||||
cerr << "send unload command error. @GstoreConnector::unload" << endl;
|
||||
std::cerr << "send unload command error. @GstoreConnector::unload" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
string recv_msg;
|
||||
std::string recv_msg;
|
||||
this->socket.recv(recv_msg);
|
||||
|
||||
this->disconnect();
|
||||
|
||||
cout << recv_msg << endl; //debug
|
||||
std::cout << recv_msg << std::endl; //debug
|
||||
if (recv_msg == "unload database done.")
|
||||
{
|
||||
return true;
|
||||
|
@ -109,30 +96,29 @@ GstoreConnector::unload(string _db_name)
|
|||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
GstoreConnector::build(string _db_name, string _rdf_file_path)
|
||||
bool GstoreConnector::build(std::string _db_name, std::string _rdf_file_path)
|
||||
{
|
||||
bool connect_return = this->connect();
|
||||
if (!connect_return)
|
||||
{
|
||||
cerr << "connect to server error. @GstoreConnector::build" << endl;
|
||||
std::cerr << "connect to server error. @GstoreConnector::build" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
string cmd = "import " + _db_name + " " + _rdf_file_path;
|
||||
std::string cmd = "import " + _db_name + " " + _rdf_file_path;
|
||||
bool send_return = this->socket.send(cmd);
|
||||
if (!send_return)
|
||||
{
|
||||
cerr << "send import command error. @GstoreConnector::build" << endl;
|
||||
std::cerr << "send import command error. @GstoreConnector::build" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
string recv_msg;
|
||||
std::string recv_msg;
|
||||
this->socket.recv(recv_msg);
|
||||
|
||||
this->disconnect();
|
||||
|
||||
cerr << recv_msg << endl; //debug
|
||||
std::cerr << recv_msg << std::endl; //debug
|
||||
if (recv_msg == "import RDF file to database done.")
|
||||
{
|
||||
return true;
|
||||
|
@ -141,56 +127,24 @@ GstoreConnector::build(string _db_name, string _rdf_file_path)
|
|||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
GstoreConnector::drop(string _db_name)
|
||||
std::string GstoreConnector::query(std::string _sparql)
|
||||
{
|
||||
bool connect_return = this->connect();
|
||||
if (!connect_return)
|
||||
{
|
||||
cerr << "connect to server error. @GstoreConnector::unload" << endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
string cmd = "drop " + _db_name;
|
||||
bool send_return = this->socket.send(cmd);
|
||||
if (!send_return)
|
||||
{
|
||||
cerr << "send unload command error. @GstoreConnector::unload" << endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
string recv_msg;
|
||||
this->socket.recv(recv_msg);
|
||||
|
||||
this->disconnect();
|
||||
|
||||
cout << recv_msg << endl; //debug
|
||||
//if (recv_msg == "unload database done.")
|
||||
//{
|
||||
//return true;
|
||||
//}
|
||||
return true;
|
||||
}
|
||||
|
||||
string
|
||||
GstoreConnector::query(string _sparql)
|
||||
{
|
||||
bool connect_return = this->connect();
|
||||
if (!connect_return)
|
||||
{
|
||||
cerr << "connect to server error. @GstoreConnector::query" << endl;
|
||||
std::cerr << "connect to server error. @GstoreConnector::query" << std::endl;
|
||||
return "connect to server error.";
|
||||
}
|
||||
|
||||
string cmd = "query " + _sparql;
|
||||
std::string cmd = "query " + _sparql;
|
||||
bool send_return = this->socket.send(cmd);
|
||||
if (!send_return)
|
||||
{
|
||||
cerr << "send query command error. @GstoreConnector::query";
|
||||
std::cerr << "send query command error. @GstoreConnector::query";
|
||||
return "send query command error.";
|
||||
}
|
||||
|
||||
string recv_msg;
|
||||
std::string recv_msg;
|
||||
this->socket.recv(recv_msg);
|
||||
|
||||
this->disconnect();
|
||||
|
@ -198,46 +152,12 @@ GstoreConnector::query(string _sparql)
|
|||
return recv_msg;
|
||||
}
|
||||
|
||||
string
|
||||
GstoreConnector::show(bool _type)
|
||||
{
|
||||
bool connect_return = this->connect();
|
||||
if (!connect_return)
|
||||
{
|
||||
cerr << "connect to server error. @GstoreConnector::show" << endl;
|
||||
return "connect to server error.";
|
||||
}
|
||||
|
||||
string cmd;
|
||||
if(_type)
|
||||
{
|
||||
cmd = "show all";
|
||||
}
|
||||
else
|
||||
{
|
||||
cmd = "show databases";
|
||||
}
|
||||
bool send_return = this->socket.send(cmd);
|
||||
if (!send_return)
|
||||
{
|
||||
cerr << "send show command error. @GstoreConnector::show";
|
||||
return "send query command error.";
|
||||
}
|
||||
|
||||
string recv_msg;
|
||||
this->socket.recv(recv_msg);
|
||||
this->disconnect();
|
||||
|
||||
return recv_msg;
|
||||
}
|
||||
|
||||
bool
|
||||
GstoreConnector::connect()
|
||||
bool GstoreConnector::connect()
|
||||
{
|
||||
bool flag = this->socket.create();
|
||||
if (!flag)
|
||||
{
|
||||
cerr << "cannot create socket. @GstoreConnector::connect" << endl;
|
||||
std::cerr << "cannot create socket. @GstoreConnector::connect" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -245,18 +165,16 @@ GstoreConnector::connect()
|
|||
|
||||
if (!flag)
|
||||
{
|
||||
cerr << "cannot connect to server. @GstoreConnector::connect" << endl;
|
||||
std::cerr << "cannot connect to server. @GstoreConnector::connect" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GstoreConnector::disconnect()
|
||||
bool GstoreConnector::disconnect()
|
||||
{
|
||||
bool flag = this->socket.close();
|
||||
|
||||
return flag;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,32 +1,28 @@
|
|||
/*=============================================================================
|
||||
# Filename: GstoreConnector.h
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2016-02-21 21:22
|
||||
# Description: originally written by hanshuo, modified by zengli
|
||||
=============================================================================*/
|
||||
/*
|
||||
* GstoreConnector.h
|
||||
*
|
||||
* Created on: 2014-11-1
|
||||
* Author: hanshuo
|
||||
*/
|
||||
|
||||
#ifndef _GSTORECONNECTOR_H
|
||||
#define _GSTORECONNECTOR_H
|
||||
#ifndef GSTORECONNECTOR_H_
|
||||
#define GSTORECONNECTOR_H_
|
||||
|
||||
#include "../../../Server/Socket.h"
|
||||
#include <cstring>
|
||||
#include"../../../Server/Socket.h"
|
||||
#include<cstring>
|
||||
|
||||
class GstoreConnector
|
||||
{
|
||||
public:
|
||||
GstoreConnector();
|
||||
GstoreConnector(std::string _ip);
|
||||
GstoreConnector(unsigned short _port);
|
||||
GstoreConnector(std::string _ip, unsigned short _port);
|
||||
GstoreConnector(unsigned short _port);
|
||||
~GstoreConnector();
|
||||
|
||||
bool load(std::string _db_name);
|
||||
bool unload(std::string _db_name);
|
||||
bool build(std::string _db_name, std::string _rdf_file_path);
|
||||
bool drop(std::string _db_name);
|
||||
std::string query(std::string _sparql);
|
||||
std::string show(bool _type=false); //show current or all databases
|
||||
|
||||
private:
|
||||
static std::string defaultServerIP;
|
||||
|
@ -42,5 +38,4 @@ private:
|
|||
bool disconnect();
|
||||
};
|
||||
|
||||
#endif // _GSTORECONNECTOR_H
|
||||
|
||||
#endif /* GSTORECONNECTOR_H_ */
|
||||
|
|
|
@ -1,9 +1,7 @@
|
|||
lib_dir=../lib/
|
||||
socket_obj_dir=../../../.objs/
|
||||
socket_obj_dir=../../../objs/
|
||||
|
||||
all: $(lib_dir)libgstoreconnector.a
|
||||
|
||||
$(lib_dir)libgstoreconnector.a: GstoreConnector.o $(socket_obj_dir)Socket.o
|
||||
libgstoreconnector.a: GstoreConnector.o $(socket_obj_dir)Socket.o
|
||||
ar -crv $(lib_dir)libgstoreconnector.a GstoreConnector.o $(socket_obj_dir)Socket.o
|
||||
|
||||
GstoreConnector.o: GstoreConnector.cpp GstoreConnector.h $(socket_obj_dir)Socket.o
|
||||
|
|
|
@ -17,29 +17,28 @@ public class JavaAPIExample
|
|||
|
||||
// build a new database by a RDF file.
|
||||
// note that the relative path is related to gserver.
|
||||
gc.build("LUBM10.db", "data/LUBM_10.n3");
|
||||
gc.build("db_LUBM10", "example/rdf_triple/LUBM_10_GStore.n3");
|
||||
|
||||
// then you can execute SPARQL query on this database.
|
||||
String sparql = "select ?x where "
|
||||
+ "{"
|
||||
+ "?x <rdf:type> <ub:UndergraduateStudent>. "
|
||||
+ "?y <ub:name> <Course1>. "
|
||||
+ "?x <ub:takesCourse> ?y. "
|
||||
+ "?z <ub:teacherOf> ?y. "
|
||||
+ "?z <ub:name> <FullProfessor1>. "
|
||||
+ "?z <ub:worksFor> ?w. "
|
||||
+ "?w <ub:name> <Department0>. "
|
||||
+ "?x rdf:type <ub:UndergraduateStudent>. "
|
||||
+ "?y ub:name <Course1>. "
|
||||
+ "?x ub:takesCourse ?y. "
|
||||
+ "?z ub:teacherOf ?y. "
|
||||
+ "?z ub:name <FullProfessor1>. "
|
||||
+ "?z ub:worksFor ?w. "
|
||||
+ "?w ub:name <Department0>. "
|
||||
+ "}";
|
||||
String answer = gc.query(sparql);
|
||||
System.out.println(answer);
|
||||
|
||||
// unload this database.
|
||||
gc.unload("LUBM10.db");
|
||||
gc.unload("db_LUBM10");
|
||||
|
||||
// also, you can load some exist database directly and then query.
|
||||
gc.load("LUBM10.db");
|
||||
gc.load("db_LUBM10");
|
||||
answer = gc.query(sparql);
|
||||
System.out.println(answer);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,11 +0,0 @@
|
|||
JavaAPIExample.class:
|
||||
javac -cp ../lib/GstoreJavaAPI.jar JavaAPIExample.java
|
||||
|
||||
.PHONY: clean run
|
||||
|
||||
run: JavaAPIExample.class
|
||||
java -cp ../lib/GstoreJavaAPI.jar:. JavaAPIExample
|
||||
|
||||
clean:
|
||||
rm -f JavaAPIExample.class
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue