2016-03-12 01:52:18 +08:00
|
|
|
/*=============================================================================
|
|
|
|
# Filename: BasicQuery.h
|
|
|
|
# Author: Bookug Lobert
|
|
|
|
# Mail: 1181955272@qq.com
|
|
|
|
# Last Modified: 2015-10-31 19:18
|
|
|
|
# Description: originally written by liyouhuan, modified by zengli
|
|
|
|
=============================================================================*/
|
|
|
|
|
|
|
|
#ifndef _QUERY_BASICQUERY_H
|
|
|
|
#define _QUERY_BASICQUERY_H
|
|
|
|
|
|
|
|
#include "../Util/Util.h"
|
2016-04-01 20:58:15 +08:00
|
|
|
#include "../Util/Triple.h"
|
2016-03-12 01:52:18 +08:00
|
|
|
#include "../Signature/Signature.h"
|
|
|
|
#include "../KVstore/KVstore.h"
|
|
|
|
#include "IDList.h"
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
//NOTICE: the query graph must be linked
|
|
|
|
//var_id == -1: constant(string), entity or literal, or vars not in join process
|
|
|
|
//All constants should be dealed before joining tables!
|
|
|
|
//A var in query can be subject or object, and both. Once in subject,
|
|
|
|
//it cannot contain literal, while in object it may contain entity,
|
|
|
|
//literal, or both
|
|
|
|
//
|
|
|
|
//a subject cannot be literal, but an object can be entity or literal
|
|
|
|
//not supported: ?v1 and this is a predicate
|
|
|
|
//pre_id == -1: the query graph is not valid and the result should be empty
|
|
|
|
//
|
|
|
|
//DEFINE:
|
|
|
|
//literal variable - no edge out(only occur in objects)
|
|
|
|
//(after we retrive all candidates from vstree, only entities are considered, the vars
|
|
|
|
//which only present in objects are possible to contain literals, so we must mark this!)
|
|
|
|
//free literal variable - a literal variable and all its neighbor id != -1
|
|
|
|
//(i.e. no constant neighbor which will restrict this variable, otherwise, we can acquire
|
|
|
|
//this var's can_list by querying in kvstore according to the constant and pre)
|
|
|
|
|
|
|
|
//TODO:free var's neighbor id != -1, how about vars not in join?(degree == 1 ), donot
|
|
|
|
//need to add? or already added in literal_edge_filter, just as constants?
|
|
|
|
//it is ok for var in select to be free var, but this can not be used as start point.
|
|
|
|
//(we assume start point is all ok and then search deeply)
|
|
|
|
//TODO:fix the graph below!!!
|
|
|
|
//However, we can always find a start point because not all vars are all in objects!
|
|
|
|
//(otherwise, no edge in query graph)
|
|
|
|
//What is more, some graphs will be regarded as not-connected, such as:
|
|
|
|
//A-c0-B, c0 is a constant, we should do a A x B here!
|
|
|
|
//two-part-matching, ABC and c1c2, each node connects with this two constants.
|
|
|
|
//(edge maybe different)
|
|
|
|
|
|
|
|
//+---------------------------------------------------------------------------+
|
|
|
|
//|elements in BasicQuery(all are strings) |
|
|
|
|
//|| |
|
|
|
|
//|+---constants(all need to be dealed before join) |
|
|
|
|
//|| | |
|
|
|
|
//|| +---literals(quoted in "") |
|
|
|
|
//|| | graph_var_num, the num of vars to join |
|
|
|
|
//|| +---entities(included in <>, prefix is allowed) | |
|
|
|
|
//|| | |
|
|
|
|
//|+---variables(all begin with '?') | |
|
|
|
|
//| | | |
|
|
|
|
//| +---selected vars(the former select_var_num ones) <<<<<<<<<<<<<<<<<<<+ |
|
|
|
|
//| | | |
|
|
|
|
//| +---not selected vars | |
|
|
|
|
//| | | |
|
|
|
|
//| +---degree > 1 <<<<<<<<<<<as bridge<<<<<<<<<<<<<<<<<<<<<<<<<<+ |
|
|
|
|
//| | | |
|
|
|
|
//| | +---exist in subjects(cannot be literal) |
|
|
|
|
//| | | |
|
|
|
|
//| | +---just in objects(all edges in, may include literals) |
|
|
|
|
//| | | |
|
|
|
|
//| | +---free(all neighbors are vars) |
|
|
|
|
//| | | |
|
|
|
|
//| | +---not-free(exist constant neighbors) |
|
|
|
|
//| | |
|
|
|
|
//| +---degree == 1(dealed after join) |
|
|
|
|
//| | |
|
|
|
|
//| +---subject |
|
|
|
|
//| | |
|
|
|
|
//| +---object |
|
|
|
|
//+---------------------------------------------------------------------------+
|
|
|
|
|
2016-03-12 01:52:18 +08:00
|
|
|
class BasicQuery
|
|
|
|
{
|
|
|
|
private:
|
|
|
|
vector<string> option_vs;
|
|
|
|
vector<Triple> triple_vt;
|
2016-04-01 20:58:15 +08:00
|
|
|
// mapping from variables' name to their assigned id
|
2016-03-12 01:52:18 +08:00
|
|
|
map<std::string, int> var_str2id;
|
2016-04-01 20:58:15 +08:00
|
|
|
// record each tuple's(subject, predicate, object) number of occurrences in this BasicQuery
|
2016-03-12 01:52:18 +08:00
|
|
|
map<std::string, int> tuple2freq;
|
|
|
|
map<std::string, int> var_not_in_select;
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// id < select_var_num means in select
|
2016-03-12 01:52:18 +08:00
|
|
|
int select_var_num;
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// var_num is different from that in SPARQLquery
|
|
|
|
// because there are some variable not in select
|
|
|
|
int graph_var_num;
|
2016-03-12 01:52:18 +08:00
|
|
|
string* var_name;
|
|
|
|
IDList* candidate_list;
|
|
|
|
vector<int*> result_list;
|
2016-04-01 20:58:15 +08:00
|
|
|
int* var_degree;
|
2016-03-12 01:52:18 +08:00
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// whether has added the variable's literal candidate
|
2016-03-12 01:52:18 +08:00
|
|
|
bool* is_literal_candidate_added;
|
|
|
|
|
|
|
|
char encode_method;
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// edge_id[var_id][i] : the line id of the i-th edge of the var
|
2016-03-12 01:52:18 +08:00
|
|
|
int** edge_id;
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// edge_id[var_id][i] : the neighbor id of the i-th edge of the var
|
2016-03-12 01:52:18 +08:00
|
|
|
int** edge_nei_id;
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// edge_pre_id[var_id][i] : the preID of the i-th edge of the var
|
2016-03-12 01:52:18 +08:00
|
|
|
int** edge_pre_id;
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// denote the type of edge, assigned with
|
|
|
|
// BasicQuery::IN or BasicQuery::OUT
|
|
|
|
// edge_type[var_id][i]
|
2016-03-12 01:52:18 +08:00
|
|
|
char** edge_type;
|
|
|
|
|
|
|
|
EntityBitSet* var_sig;
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// edge_sig[sub_id][obj_id]
|
2016-03-12 01:52:18 +08:00
|
|
|
EdgeBitSet** edge_sig;
|
|
|
|
|
|
|
|
void addInVarNotInSelect();
|
|
|
|
void findVarNotInSelect();
|
|
|
|
void buildTuple2Freq();
|
|
|
|
void initial();
|
|
|
|
void null_initial();
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
void updateSubSig(int _sub_id, int _pre_id, int _obj_id, std::string _obj, int _line_id);
|
|
|
|
void updateObjSig(int _obj_id, int _pre_id, int _sub_id, std::string _sub, int _line_id);
|
|
|
|
|
2016-03-12 01:52:18 +08:00
|
|
|
public:
|
|
|
|
static const char EDGE_IN = 'i';
|
|
|
|
static const char EDGE_OUT= 'o';
|
|
|
|
static const int MAX_VAR_NUM = 10;
|
|
|
|
static const char NOT_JUST_SELECT = 'a';
|
|
|
|
static const char SELECT_VAR = 's';
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// _query is a SPARQL query string
|
2016-03-12 01:52:18 +08:00
|
|
|
BasicQuery(const string _query="");
|
|
|
|
~BasicQuery();
|
|
|
|
void clear();
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
//get the number of variables which are in join
|
2016-03-12 01:52:18 +08:00
|
|
|
int getVarNum();
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
//get selected number of variadbles
|
|
|
|
int getSelectVarNum();
|
|
|
|
|
|
|
|
// get the name of _var in the query graph
|
2016-03-12 01:52:18 +08:00
|
|
|
std::string getVarName(int _var);
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// get triples number, also sentences number
|
2016-03-12 01:52:18 +08:00
|
|
|
int getTripleNum();
|
|
|
|
|
|
|
|
std::string to_str();
|
|
|
|
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// get the ID of the i-th triple
|
2016-03-12 01:52:18 +08:00
|
|
|
const Triple& getTriple(int _i_th_triple);
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// get the ID of the i-th edge of _var
|
2016-03-12 01:52:18 +08:00
|
|
|
int getEdgeID(int _var, int _i_th_edge);
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// get the ID of the i-th edge of _var
|
2016-03-12 01:52:18 +08:00
|
|
|
int getEdgeNeighborID(int _var, int _i_th_edge);
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// get the preID of the i-th edge of _var
|
2016-03-12 01:52:18 +08:00
|
|
|
int getEdgePreID(int _var, int _i_th_edge);
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// get the type of the i-th edge of _var
|
2016-03-12 01:52:18 +08:00
|
|
|
char getEdgeType(int _var, int _i_th_edge);
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
//get the degree of _var in the query graph
|
2016-03-12 01:52:18 +08:00
|
|
|
int getVarDegree(int _var);
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
//get the index of edge between two var ids
|
|
|
|
int getEdgeIndex(int _id0, int _id);
|
|
|
|
|
2016-03-12 01:52:18 +08:00
|
|
|
const EntityBitSet& getVarBitSet(int _i)const;
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// get the candidate list of _var in the query graph
|
2016-03-12 01:52:18 +08:00
|
|
|
IDList& getCandidateList(int _var);
|
|
|
|
|
|
|
|
int getCandidateSize(int _var);
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// get the result list of _var in the query graph
|
2016-03-12 01:52:18 +08:00
|
|
|
vector<int*>& getResultList();
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// get the entity signature of _var in the query graph
|
2016-03-12 01:52:18 +08:00
|
|
|
const EntityBitSet& getEntitySignature(int _var);
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// check whether the i-th edge of _var is IN edge
|
2016-03-12 01:52:18 +08:00
|
|
|
bool isInEdge(int _var, int _i_th_edge)const;
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// check whether the i-th edge of _var is OUT edge
|
2016-03-12 01:52:18 +08:00
|
|
|
bool isOutEdge(int _var, int _i_th_edge)const;
|
|
|
|
|
|
|
|
bool isOneDegreeNotSelectVar(std::string& _not_select_var);
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// check whether _var may include some literal results
|
2016-03-12 01:52:18 +08:00
|
|
|
bool isLiteralVariable(int _var);
|
2016-04-01 20:58:15 +08:00
|
|
|
// check whether _var is literal variable and do not have any entity neighbors
|
2016-03-12 01:52:18 +08:00
|
|
|
bool isFreeLiteralVariable(int _var);
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// check whether has added _var's literal candidates
|
2016-03-12 01:52:18 +08:00
|
|
|
bool isAddedLiteralCandidate(int _var);
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// set _var's literal candidates has been added
|
2016-03-12 01:52:18 +08:00
|
|
|
void setAddedLiteralCandidate(int _var);
|
|
|
|
|
2016-04-01 20:58:15 +08:00
|
|
|
// encode relative signature data of the query graph
|
2016-03-12 01:52:18 +08:00
|
|
|
void encodeBasicQuery(KVstore* _p_kvstore, const std::vector<std::string>& _query_var);
|
|
|
|
|
|
|
|
void addTriple(const Triple& _triple);
|
|
|
|
void print(ostream& _out_stream);
|
|
|
|
|
|
|
|
int getVarID_MinCandidateList();
|
|
|
|
int getVarID_MaxCandidateList();
|
|
|
|
int getVarID_FirstProcessWhenJoin();
|
|
|
|
|
|
|
|
std::string candidate_str();
|
|
|
|
std::string result_str();
|
|
|
|
std::string triple_str();
|
|
|
|
};
|
|
|
|
|
|
|
|
#endif //_QUERY_BASICQUERY_H
|
|
|
|
|