303 lines
10 KiB
C++
303 lines
10 KiB
C++
/*=============================================================================
|
|
# Filename: BasicQuery.h
|
|
# Author: Bookug Lobert
|
|
# Mail: 1181955272@qq.com
|
|
# Last Modified: 2015-10-31 19:18
|
|
# Description: originally written by liyouhuan, modified by zengli
|
|
=============================================================================*/
|
|
|
|
#ifndef _QUERY_BASICQUERY_H
|
|
#define _QUERY_BASICQUERY_H
|
|
|
|
#include "../Util/Util.h"
|
|
#include "../Util/Triple.h"
|
|
#include "../Signature/Signature.h"
|
|
#include "../KVstore/KVstore.h"
|
|
#include "IDList.h"
|
|
|
|
//NOTICE: the query graph must be linked
|
|
//var_id == -1: constant(string), entity or literal
|
|
//All constants should be dealed before joining tables!
|
|
//A var in query can be subject or object, and both. Once in subject,
|
|
//it cannot contain literal, while in object it may contain entity,
|
|
//literal, or both
|
|
//The vars not in join process are also encoded, so not -1
|
|
//
|
|
//a subject cannot be literal, but an object can be entity or literal
|
|
//not supported: ?v1 and this is a predicate
|
|
//pre_id == -1: the query graph is not valid and the result should be empty
|
|
//
|
|
//DEFINE:
|
|
//literal variable - no edge out(only occur in objects)
|
|
//(after we retrive all candidates from vstree, only entities are considered, the vars
|
|
//which only present in objects are possible to contain literals, so we must mark this!)
|
|
//free literal variable - a literal variable and all its neighbor id != -1
|
|
//(i.e. no constant neighbor which will restrict this variable, otherwise, we can acquire
|
|
//this var's can_list by querying in kvstore according to the constant and pre)
|
|
|
|
//It is ok for var in select to be free var, but this can not be used as start point.
|
|
//(we assume candidates of the start point is all ok and then search deeply)
|
|
//However, we can always find a start point because not all vars are all in objects!
|
|
//(otherwise, no edge in query graph)
|
|
//
|
|
//What is more, some graphs will be regarded as not-connected, such as:
|
|
//A-c0-B, c0 is a constant, we should do a A x B here!
|
|
//two-part-matching, ABC and c1c2, each node connects with this two constants.
|
|
//(edge maybe different)
|
|
|
|
//+---------------------------------------------------------------------------+
|
|
//|elements in BasicQuery(all are strings) |
|
|
//|| |
|
|
//|+---constants(all need to be dealed before join) |
|
|
//|| | |
|
|
//|| +---literals(quoted in "") |
|
|
//|| | graph_var_num, the num of vars to join |
|
|
//|| +---entities(included in <>, prefix is allowed) | |
|
|
//|| | |
|
|
//|+---variables(all begin with '?') | |
|
|
//| | | |
|
|
//| +---selected vars(the former select_var_num ones) <<<<<<<<<<<<<<<<<<<+ |
|
|
//| | | |
|
|
//| +---not selected vars | |
|
|
//| | | |
|
|
//| +---degree > 1 <<<<<<<<<<<as bridge<<<<<<<<<<<<<<<<<<<<<<<<<<+ |
|
|
//| | | |
|
|
//| | +---exist in subjects(cannot be literal) |
|
|
//| | | |
|
|
//| | +---just in objects(all edges in, may include literals) |
|
|
//| | | |
|
|
//| | +---free(all neighbors are vars) |
|
|
//| | | |
|
|
//| | +---not-free(exist constant neighbors) |
|
|
//| | |
|
|
//| +---degree == 1(dealed after join) |
|
|
//| | |
|
|
//| +---subject |
|
|
//| | |
|
|
//| +---object |
|
|
//+---------------------------------------------------------------------------+
|
|
|
|
class PreVar
|
|
{
|
|
public:
|
|
string name;
|
|
vector<int> triples;
|
|
bool selected;
|
|
PreVar()
|
|
{
|
|
this->selected = false;
|
|
}
|
|
PreVar(string _name)
|
|
{
|
|
this->name = _name;
|
|
this->selected = false;
|
|
}
|
|
PreVar(string _name, bool _flag)
|
|
{
|
|
this->name = _name;
|
|
this->selected = _flag;
|
|
}
|
|
};
|
|
|
|
class BasicQuery
|
|
{
|
|
private:
|
|
vector<Triple> triple_vt;
|
|
// mapping from variables' name to their assigned id
|
|
map<std::string, int> var_str2id;
|
|
// record each tuple's(subject, predicate, object) number of occurrences in this BasicQuery
|
|
map<std::string, int> tuple2freq;
|
|
set<std::string> selected_var_set;
|
|
//NOTICE:this is not used now!
|
|
map<std::string, int> var_not_in_select;
|
|
|
|
// id < select_var_num means in select
|
|
int select_var_num;
|
|
// var_num is different from that in SPARQLquery
|
|
// because there are some variable not in select
|
|
int graph_var_num;
|
|
int total_var_num;
|
|
int retrieve_var_num;
|
|
string* var_name;
|
|
IDList* candidate_list;
|
|
vector<int*> result_list;
|
|
int* var_degree;
|
|
|
|
//whether has added the variable's literal candidate
|
|
//bool* is_literal_candidate_added;
|
|
bool* ready;
|
|
//if need to be retrieved by vstree or generate when join(first is graph var)
|
|
bool* need_retrieve;
|
|
|
|
char encode_method;
|
|
|
|
//save the result of encodeBasicQuery
|
|
bool encode_result;
|
|
|
|
// edge_id[var_id][i] : the line id of the i-th edge of the var
|
|
int** edge_id;
|
|
|
|
// edge_id[var_id][i] : the neighbor id of the i-th edge of the var
|
|
int** edge_nei_id;
|
|
|
|
// edge_pre_id[var_id][i] : the preID of the i-th edge of the var
|
|
int** edge_pre_id;
|
|
|
|
// denote the type of edge, assigned with
|
|
// BasicQuery::IN or BasicQuery::OUT
|
|
// edge_type[var_id][i]
|
|
char** edge_type;
|
|
|
|
EntityBitSet* var_sig;
|
|
|
|
// BETTER:edge sig is of little importance
|
|
// edge_sig[sub_id][obj_id]
|
|
EdgeBitSet** edge_sig;
|
|
|
|
void addInVarNotInSelect();
|
|
void findVarNotInSelect();
|
|
void buildTuple2Freq();
|
|
void initial();
|
|
void null_initial();
|
|
|
|
void updateSubSig(int _sub_id, int _pre_id, int _obj_id, std::string _obj, int _line_id);
|
|
void updateObjSig(int _obj_id, int _pre_id, int _sub_id, std::string _sub, int _line_id);
|
|
|
|
//infos for predicate variables
|
|
vector<PreVar> pre_var;
|
|
int selected_pre_var_num;
|
|
|
|
//to manage the selected vars order(considering selected pre var)
|
|
//NOTICE:the order is always same as query_var in encodeBasicQuery, but not always same as select-clause
|
|
//positive key id for normal var, -1-id for pre var
|
|
map<int, int> selected_var_position;
|
|
|
|
public:
|
|
static const int MAX_VAR_NUM = 10;
|
|
static const int MAX_PRE_VAR_NUM = 10;
|
|
static const char NOT_JUST_SELECT = 'a';
|
|
static const char SELECT_VAR = 's';
|
|
|
|
// _query is a SPARQL query string
|
|
BasicQuery(const string _query="");
|
|
~BasicQuery();
|
|
void clear();
|
|
|
|
//get the number of variables which are in join
|
|
int getVarNum();
|
|
|
|
//get selected number of variadbles
|
|
int getSelectVarNum();
|
|
|
|
//get the total number of variables
|
|
int getTotalVarNum();
|
|
|
|
//get the retrieved number of variables
|
|
int getRetrievedVarNum();
|
|
|
|
// get the name of _var in the query graph
|
|
std::string getVarName(int _var);
|
|
|
|
int getIDByVarName(const string& _name);
|
|
|
|
// get triples number, also sentences number
|
|
int getTripleNum();
|
|
|
|
//check if a normal var is in select
|
|
bool isVarSelected(const std::string& _name) const;
|
|
|
|
std::string to_str();
|
|
|
|
// get the ID of the i-th triple
|
|
const Triple& getTriple(int _i_th_triple);
|
|
|
|
// get the ID of the i-th edge of _var
|
|
int getEdgeID(int _var, int _i_th_edge);
|
|
|
|
// get the ID of var, where the i-th edge of _var points to
|
|
int getEdgeNeighborID(int _var, int _i_th_edge);
|
|
|
|
// get the preID of the i-th edge of _var
|
|
int getEdgePreID(int _var, int _i_th_edge);
|
|
|
|
// get the type of the i-th edge of _var
|
|
char getEdgeType(int _var, int _i_th_edge);
|
|
|
|
//get the degree of _var in the query graph
|
|
int getVarDegree(int _var);
|
|
|
|
//get the index of edge between two var ids
|
|
vector<int> getEdgeIndex(int _id0, int _id);
|
|
|
|
const EntityBitSet& getVarBitSet(int _i)const;
|
|
|
|
// get the candidate list of _var in the query graph
|
|
IDList& getCandidateList(int _var);
|
|
|
|
int getCandidateSize(int _var);
|
|
|
|
// get the result list of _var in the query graph
|
|
vector<int*>& getResultList();
|
|
vector<int*>* getResultListPointer();
|
|
|
|
// get the entity signature of _var in the query graph
|
|
const EntityBitSet& getEntitySignature(int _var);
|
|
|
|
// check whether the i-th edge of _var is IN edge
|
|
bool isInEdge(int _var, int _i_th_edge)const;
|
|
|
|
// check whether the i-th edge of _var is OUT edge
|
|
bool isOutEdge(int _var, int _i_th_edge)const;
|
|
|
|
bool isOneDegreeVar(string& _no_sense_var);
|
|
bool isOneDegreeNotJoinVar(std::string& _not_select_var);
|
|
|
|
// check whether _var may include some literal results
|
|
bool isLiteralVariable(int _var);
|
|
// check whether _var is literal variable and do not have any entity neighbors
|
|
bool isFreeLiteralVariable(int _var);
|
|
|
|
// check whether has added _var's literal candidates
|
|
//bool isAddedLiteralCandidate(int _var);
|
|
|
|
// set _var's literal candidates has been added
|
|
//void setAddedLiteralCandidate(int _var);
|
|
|
|
//check if need to be retrieved
|
|
bool if_need_retrieve(int _var);
|
|
bool isSatelliteInJoin(int _var);
|
|
|
|
//if the candidates of var(in join) is all ready
|
|
bool isReady(int _var) const;
|
|
void setReady(int _var);
|
|
|
|
// encode relative signature data of the query graph
|
|
bool encodeBasicQuery(KVstore* _p_kvstore, const std::vector<std::string>& _query_var);
|
|
bool getEncodeBasicQueryResult() const;
|
|
|
|
unsigned getPreVarNum() const;
|
|
const PreVar& getPreVarByID(unsigned) const;
|
|
int getPreVarID(const string& _name) const;
|
|
unsigned getSelectedPreVarNum() const;
|
|
bool isPreVarSelected(unsigned _pid) const;
|
|
|
|
int getSelectedVarPosition(int _var);
|
|
int getSelectedPreVarPosition(int _var);
|
|
int getSelectedVarPosition(std::string _var);
|
|
int getSelectedPreVarPosition(std::string _var);
|
|
|
|
void addTriple(const Triple& _triple);
|
|
void print(ostream& _out_stream);
|
|
|
|
int getVarID_MinCandidateList();
|
|
int getVarID_MaxCandidateList();
|
|
int getVarID_FirstProcessWhenJoin();
|
|
|
|
std::string candidate_str();
|
|
std::string result_str();
|
|
std::string triple_str();
|
|
};
|
|
|
|
#endif //_QUERY_BASICQUERY_H
|
|
|