gStore/Database/Join.h

146 lines
4.4 KiB
C
Raw Normal View History

2016-03-12 01:52:18 +08:00
/*=============================================================================
# Filename: Join.h
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2015-12-13 16:05
# Description: design join strategies and select/cost modules
=============================================================================*/
#ifndef _JOIN_JOIN_H
#define _JOIN_JOIN_H
#include "../Query/IDList.h"
#include "../Query/BasicQuery.h"
#include "../Query/SPARQLquery.h"
#include "../KVstore/KVstore.h"
#include "../Util/Util.h"
2017-03-24 20:10:43 +08:00
typedef vector<unsigned> RecordType;
typedef vector<unsigned>::iterator RecordIterator;
2016-03-12 01:52:18 +08:00
typedef list<RecordType> TableType;
typedef list<RecordType>::iterator TableIterator;
typedef list<RecordType>::reverse_iterator TableReverseIterator;
//typedef list< vector<int> > TableType;
//typedef list< vector<int> >::iterator TableIterator;
//typedef list< vector<int> >::reverse_iterator TableReverseIterator;
2017-03-24 20:10:43 +08:00
//typedef vector< vector<int*> > IdLists;
//typedef vector< vector<int> > IdListsLen;
2016-03-12 01:52:18 +08:00
2016-05-16 03:16:22 +08:00
typedef struct Satellite
{
int id;
2017-03-24 20:10:43 +08:00
unsigned* idlist;
unsigned idlist_len;
Satellite(int _id, unsigned* _idlist, unsigned _idlist_len)
2016-05-16 03:16:22 +08:00
{
this->id = _id;
this->idlist = _idlist;
this->idlist_len = _idlist_len;
}
}Satellite;
2016-03-12 01:52:18 +08:00
//Database new Join and pass something like kvstore
class Join
{
private:
int start_id;
int var_num;
BasicQuery* basic_query;
KVstore* kvstore;
2017-03-24 20:10:43 +08:00
TYPE_TRIPLE_NUM* pre2num;
TYPE_PREDICATE_ID limitID_predicate;
TYPE_ENTITY_LITERAL_ID limitID_literal;
2016-03-12 01:52:18 +08:00
//used by score_node for parameters
static const unsigned PARAM_DEGREE = 1;
2017-01-16 14:12:57 +08:00
static const unsigned PARAM_SIZE = 1000000;
static const unsigned PARAM_PRE = 10000;
2016-03-12 01:52:18 +08:00
static const unsigned PARAM_DENSE = 1;
2017-03-26 21:10:37 +08:00
static const unsigned JUDGE_LIMIT = 2;
2017-03-26 21:10:37 +08:00
//NOTICE+DEBUG: please use constexpr below instead of the phase above(constexpr is supported in C++11)
//http://www.cnblogs.com/wanyuanchun/p/4041080.html
//constexpr static const double JUDGE_LIMIT = 0.5;
2017-03-24 20:10:43 +08:00
static const unsigned LIMIT_CANDIDATE_LIST_SIZE = 1000;
2016-05-16 03:16:22 +08:00
//BETTER?:predefine size to avoid copy cost
2016-03-12 01:52:18 +08:00
TableType current_table;
TableIterator new_start; //keep to end() as default
//list<bool> table_row_new;
//keep the mapping for disordered ids in vector<int> table
int* id2pos;
int id_pos; //the num of id put into id2pos currently
int* pos2id;
bool* dealed_triple;
2016-03-12 01:52:18 +08:00
stack<int> mystack;
2017-03-24 20:10:43 +08:00
vector<unsigned*>* result_list;
2016-05-16 03:16:22 +08:00
vector<Satellite> satellites;
2017-03-24 20:10:43 +08:00
unsigned* record;
unsigned record_len;
2016-05-16 03:16:22 +08:00
2016-03-12 01:52:18 +08:00
void init(BasicQuery* _basic_query);
void clear();
void add_id_pos_mapping(int _id);
2016-05-16 03:16:22 +08:00
void reset_id_pos_mapping();
2016-03-12 01:52:18 +08:00
//judge which method should be used according to
//the size of candidates and structure of quering graph
2017-03-24 20:10:43 +08:00
int judge(unsigned _smallest, unsigned _biggest);
2016-03-12 01:52:18 +08:00
//select the start point and search order
void select();
//score the cost to link two tables and the efficience
//of filtering
//int score(List1, List2);
//score the node according to degree and size
2017-01-16 14:12:57 +08:00
double score_node(int var);
2016-03-12 01:52:18 +08:00
2016-06-15 16:35:50 +08:00
void toStartJoin();
2016-05-16 03:16:22 +08:00
bool filter_before_join();
bool constant_edge_filter(int _var_i);
2016-03-12 01:52:18 +08:00
void preid_filter(int _var_i);
2016-05-16 03:16:22 +08:00
bool only_pre_filter_after_join();
2016-03-12 01:52:18 +08:00
void add_literal_candidate();
2016-05-16 03:16:22 +08:00
bool pre_var_handler();
//bool filterBySatellites(int _var, int _ele);
2017-01-16 14:12:57 +08:00
bool preFilter(int _var);
bool allFilterByPres();
2016-05-16 03:16:22 +08:00
void cartesian(int pos, int end);
2016-03-12 01:52:18 +08:00
//functions for help
//copy/add to the end of current_table and set true
2017-03-24 20:10:43 +08:00
void add_new_to_results(TableIterator it, unsigned id);
2016-03-12 01:52:18 +08:00
//void set_results_old(list<bool>::iterator it);
2016-05-16 03:16:22 +08:00
int choose_next_node(int id);
2016-03-12 01:52:18 +08:00
bool is_literal_var(int id);
2017-03-24 20:10:43 +08:00
//bool is_literal_ele(int _id);
2016-05-16 03:16:22 +08:00
void copyToResult();
//BETTER?:change these params to members in class
2017-03-24 20:10:43 +08:00
//void acquire_all_id_lists(IdLists& _id_lists, IdListsLen& _id_lists_len, IDList& _can_list, vector<int>& _edges, int _id, unsigned _can_list_size);
void update_answer_list(IDList*& valid_ans_list, IDList& _can_list, unsigned* id_list, unsigned id_list_len, bool _is_literal);
bool join_two(vector< vector<int> >& _edges, IDList& _can_list, unsigned _can_list_size, int _id, bool _is_literal);
2016-03-12 01:52:18 +08:00
2016-05-16 03:16:22 +08:00
bool multi_join();
//NOTICE:this is only used to join a BasicQuery
bool join();
2016-03-12 01:52:18 +08:00
public:
Join();
2017-03-24 20:10:43 +08:00
Join(KVstore* _kvstore, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal);
2016-05-16 03:16:22 +08:00
//these functions can be called by Database
bool join_sparql(SPARQLquery& _sparql_query);
2016-05-16 03:16:22 +08:00
bool join_basic(BasicQuery* _basic_query);
2016-03-12 01:52:18 +08:00
~Join();
};
#endif //_JOIN_JOIN_H