fix a bug when occuring OPTIONAL_GROUP_PATTERN; optimize the performance of StringIndex;

This commit is contained in:
magicpanda0618 2017-07-27 15:58:51 +08:00
parent 7b2792bbd4
commit aac9638541
6 changed files with 112 additions and 124 deletions

View File

@ -47,7 +47,7 @@ Strategy::~Strategy()
//however, this can be dealed due to several basic queries and linking
bool
Strategy::handle(SPARQLquery& _query, ResultFilter* _result_filter)
Strategy::handle(SPARQLquery& _query)
{
#ifdef MULTI_INDEX
Util::logging("IN GeneralEvaluation::handle");
@ -117,7 +117,7 @@ Strategy::handle(SPARQLquery& _query, ResultFilter* _result_filter)
//BETTER: use function pointer array in C++ class
case 0:
//default:filter by vstree and then verified by join
this->handler0(*iter, result_list, _result_filter);
this->handler0(*iter, result_list);
break;
case 1:
this->handler1(*iter, result_list);
@ -160,7 +160,7 @@ Strategy::handle(SPARQLquery& _query, ResultFilter* _result_filter)
}
void
Strategy::handler0(BasicQuery* _bq, vector<unsigned*>& _result_list, ResultFilter* _result_filter)
Strategy::handler0(BasicQuery* _bq, vector<unsigned*>& _result_list)
{
//long before_filter = Util::get_cur_time();
cout << "this BasicQuery use query strategy 0" << endl;
@ -205,10 +205,6 @@ Strategy::handler0(BasicQuery* _bq, vector<unsigned*>& _result_list, ResultFilte
long tv_retrieve = Util::get_cur_time();
cout << "after Retrieve, used " << (tv_retrieve - tv_handle) << "ms." << endl;
//between retrieve and join
if (_result_filter != NULL)
_result_filter->candFilterWithResultHashTable(*_bq);
Join *join = new Join(kvstore, pre2num, this->limitID_predicate, this->limitID_literal,this->limitID_entity);
join->join_basic(_bq);
delete join;

View File

@ -17,7 +17,6 @@
#include "../Query/BasicQuery.h"
#include "../KVstore/KVstore.h"
#include "../VSTree/VSTree.h"
#include "../Query/ResultFilter.h"
class Strategy
{
@ -26,7 +25,7 @@ public:
Strategy(KVstore*, VSTree*, TYPE_TRIPLE_NUM*, TYPE_PREDICATE_ID, TYPE_ENTITY_LITERAL_ID,TYPE_ENTITY_LITERAL_ID);
~Strategy();
//select efficient strategy to do the sparql query
bool handle(SPARQLquery&, ResultFilter* _result_filter = NULL);
bool handle(SPARQLquery&);
private:
int method;
@ -39,7 +38,7 @@ private:
//NOTICE: even the ID type is int, it is no problem and no waste that we use unsigned in answer
//(because -1, -2 or other invalid IDs can not be in answer)
void handler0(BasicQuery*, vector<unsigned*>&, ResultFilter* _result_filter = NULL);
void handler0(BasicQuery*, vector<unsigned*>&);
void handler1(BasicQuery*, vector<unsigned*>&);
void handler2(BasicQuery*, vector<unsigned*>&);
void handler3(BasicQuery*, vector<unsigned*>&);

View File

@ -64,7 +64,10 @@ bool GeneralEvaluation::doQuery()
printf("=================\n");
this->rewriting_evaluation_stack.clear();
this->rewriting_evaluation_stack.push_back(this->query_tree.getGroupPattern());
this->rewriting_evaluation_stack.push_back(EvaluationStackStruct());
this->rewriting_evaluation_stack.back().grouppattern = this->query_tree.getGroupPattern();
this->rewriting_evaluation_stack.back().sparql_query = NULL;
this->temp_result = this->rewritingBasedQueryEvaluation(0);
}
else
@ -2013,7 +2016,7 @@ void GeneralEvaluation::TempResultSet::print()
GeneralEvaluation::TempResultSet* GeneralEvaluation::semanticBasedQueryEvaluation(QueryTree::GroupPattern &grouppattern)
{
TempResultSet* result = new TempResultSet();
TempResultSet *result = new TempResultSet();
grouppattern.initPatternBlockid();
@ -2262,7 +2265,7 @@ bool GeneralEvaluation::expanseFirstOuterUnionGroupPattern(QueryTree::GroupPatte
GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluation(int dep)
{
deque<QueryTree::GroupPattern> queue;
queue.push_back(this->rewriting_evaluation_stack[dep]);
queue.push_back(this->rewriting_evaluation_stack[dep].grouppattern);
vector<QueryTree::GroupPattern> grouppattern_union;
while (!queue.empty())
@ -2276,8 +2279,8 @@ GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluati
for (int i = 0; i < (int)grouppattern_union.size(); i++)
{
this->rewriting_evaluation_stack[dep] = grouppattern_union[i];
QueryTree::GroupPattern *grouppattern = &this->rewriting_evaluation_stack[dep];
this->rewriting_evaluation_stack[dep].grouppattern = grouppattern_union[i];
QueryTree::GroupPattern *grouppattern = &this->rewriting_evaluation_stack[dep].grouppattern;
grouppattern->getVarset();
for (int j = 0; j < 80; j++) printf("="); printf("\n");
@ -2287,7 +2290,6 @@ GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluati
TempResultSet *sub_result = new TempResultSet();
QueryTree::GroupPattern triplepattern;
int grouppattern_triple_num = 0;
for (int j = 0; j < (int)grouppattern->sub_grouppattern.size(); j++)
if (grouppattern->sub_grouppattern[j].type == QueryTree::GroupPattern::SubGroupPattern::Pattern_type)
{
@ -2296,24 +2298,6 @@ GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluati
QueryTree::GroupPattern::Pattern::Element(grouppattern->sub_grouppattern[j].pattern.predicate.value),
QueryTree::GroupPattern::Pattern::Element(grouppattern->sub_grouppattern[j].pattern.object.value)
));
grouppattern_triple_num++;
}
triplepattern.getVarset();
for (int j = 0; j < dep; j++)
{
QueryTree::GroupPattern &parrent_grouppattern = this->rewriting_evaluation_stack[j];
for (int k = 0; k < (int)parrent_grouppattern.sub_grouppattern.size(); k++)
if (parrent_grouppattern.sub_grouppattern[k].type == QueryTree::GroupPattern::SubGroupPattern::Pattern_type)
if (triplepattern.grouppattern_subject_object_maximal_varset.hasCommonVar(parrent_grouppattern.sub_grouppattern[k].pattern.subject_object_varset))
{
triplepattern.addOnePattern(QueryTree::GroupPattern::Pattern(
QueryTree::GroupPattern::Pattern::Element(parrent_grouppattern.sub_grouppattern[k].pattern.subject.value),
QueryTree::GroupPattern::Pattern::Element(parrent_grouppattern.sub_grouppattern[k].pattern.predicate.value),
QueryTree::GroupPattern::Pattern::Element(parrent_grouppattern.sub_grouppattern[k].pattern.object.value)
));
}
}
triplepattern.getVarset();
@ -2323,7 +2307,7 @@ GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluati
{
for (int j = 0; j < dep; j++)
{
QueryTree::GroupPattern &parrent_grouppattern = this->rewriting_evaluation_stack[j];
QueryTree::GroupPattern &parrent_grouppattern = this->rewriting_evaluation_stack[j].grouppattern;
for (int k = 0; k < (int)parrent_grouppattern.sub_grouppattern.size(); k++)
{
@ -2343,8 +2327,8 @@ GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluati
}
}
SPARQLquery sparql_query;
vector<vector<string> > encode_varset;
this->rewriting_evaluation_stack[dep].sparql_query = new SPARQLquery();
this->rewriting_evaluation_stack[dep].encode_varset.clear();
//get connected block
triplepattern.initPatternBlockid();
@ -2363,20 +2347,18 @@ GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluati
if (triplepattern.sub_grouppattern[j].type == QueryTree::GroupPattern::SubGroupPattern::Pattern_type)
if (triplepattern.getRootPatternBlockID(j) == j)
{
sparql_query.addBasicQuery();
this->rewriting_evaluation_stack[dep].sparql_query->addBasicQuery();
Varset occur;
for (int k = 0; k < (int)triplepattern.sub_grouppattern.size(); k++)
if (triplepattern.sub_grouppattern[k].type == QueryTree::GroupPattern::SubGroupPattern::Pattern_type)
if (triplepattern.getRootPatternBlockID(k) == j)
{
sparql_query.addTriple(Triple(
this->rewriting_evaluation_stack[dep].sparql_query->addTriple(Triple(
triplepattern.sub_grouppattern[k].pattern.subject.value,
triplepattern.sub_grouppattern[k].pattern.predicate.value,
triplepattern.sub_grouppattern[k].pattern.object.value
));
if (j < grouppattern_triple_num)
occur += triplepattern.sub_grouppattern[k].pattern.varset;
}
@ -2386,7 +2368,7 @@ GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluati
else
useful = occur;
encode_varset.push_back(useful.vars);
this->rewriting_evaluation_stack[dep].encode_varset.push_back(useful.vars);
printf("select vars: ");
useful.print();
@ -2404,27 +2386,68 @@ GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluati
}
long tv_begin = Util::get_cur_time();
sparql_query.encodeQuery(this->kvstore, encode_varset);
this->rewriting_evaluation_stack[dep].sparql_query->encodeQuery(this->kvstore, this->rewriting_evaluation_stack[dep].encode_varset);
long tv_encode = Util::get_cur_time();
printf("after Encode, used %ld ms.\n", tv_encode - tv_begin);
if (dep > 0)
this->strategy.handle(sparql_query, &this->result_filter);
else
this->strategy.handle(sparql_query);
{
SPARQLquery *this_sparql_query = this->rewriting_evaluation_stack[dep].sparql_query;
SPARQLquery *last_sparql_query = this->rewriting_evaluation_stack[dep - 1].sparql_query;
for (int j = 0; j < this_sparql_query->getBasicQueryNum(); j++)
{
BasicQuery &this_basic_query = this_sparql_query->getBasicQuery(j);
vector<string> &this_encode_varset = this->rewriting_evaluation_stack[dep].encode_varset[j];
for (int k = 0; k < last_sparql_query->getBasicQueryNum(); k++)
{
BasicQuery &last_basic_query = last_sparql_query->getBasicQuery(k);
vector<string> &last_encode_varset = this->rewriting_evaluation_stack[dep - 1].encode_varset[k];
for (int p = 0; p < (int)this_encode_varset.size(); p++)
{
for (int q = 0; q < (int)last_encode_varset.size(); q++)
if (this_encode_varset[p] == last_encode_varset[q])
{
vector<unsigned*> &result = last_basic_query.getResultList();
set<unsigned> result_set;
for (int l = 0; l < (int)result.size(); l++)
result_set.insert(result[l][q]);
vector<unsigned> result_vector;
result_vector.reserve(result_set.size());
for (set<unsigned>::iterator iter = result_set.begin(); iter != result_set.end(); iter++)
result_vector.push_back(*iter);
this_basic_query.getCandidateList(p).copy(result_vector);
this_basic_query.setReady(p);
printf("fill var %s CandidateList size %d\n", this_encode_varset[p].c_str(), (int)result_vector.size());
}
}
}
}
}
long tv_fillcand = Util::get_cur_time();
printf("after FillCand, used %ld ms.\n", tv_fillcand - tv_encode);
this->strategy.handle(*this->rewriting_evaluation_stack[dep].sparql_query);
long tv_handle = Util::get_cur_time();
printf("after Handle, used %ld ms.\n", tv_handle - tv_encode);
printf("after Handle, used %ld ms.\n", tv_handle - tv_fillcand);
//collect and join the result of each BasicQuery
for (int j = 0; j < sparql_query.getBasicQueryNum(); j++)
for (int j = 0; j < this->rewriting_evaluation_stack[dep].sparql_query->getBasicQueryNum(); j++)
{
TempResultSet *temp = new TempResultSet();
temp->results.push_back(TempResult());
temp->results[0].id_varset = Varset(encode_varset[j]);
int varnum = (int)encode_varset[j].size();
temp->results[0].id_varset = Varset(this->rewriting_evaluation_stack[dep].encode_varset[j]);
int varnum = (int)this->rewriting_evaluation_stack[dep].encode_varset[j].size();
vector<unsigned*> &basicquery_result = sparql_query.getBasicQuery(j).getResultList();
vector<unsigned*> &basicquery_result = this->rewriting_evaluation_stack[dep].sparql_query->getBasicQuery(j).getResultList();
int basicquery_result_num = (int)basicquery_result.size();
temp->results[0].result.reserve(basicquery_result_num);
@ -2494,24 +2517,17 @@ GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluati
if (!sub_result->results[0].result.empty())
{
bool has_changed = false;
for (int j = 0; j < (int)grouppattern->sub_grouppattern.size(); j++)
if (grouppattern->sub_grouppattern[j].type == QueryTree::GroupPattern::SubGroupPattern::Optional_type)
{
if (!has_changed)
{
this->result_filter.changeResultHashTable(sparql_query, 1);
has_changed = true;
}
if ((int)this->rewriting_evaluation_stack.size() == dep + 1)
{
this->rewriting_evaluation_stack.push_back(QueryTree::GroupPattern());
grouppattern = &this->rewriting_evaluation_stack[dep];
this->rewriting_evaluation_stack.push_back(EvaluationStackStruct());
this->rewriting_evaluation_stack.back().sparql_query = NULL;
grouppattern = &this->rewriting_evaluation_stack[dep].grouppattern;
}
this->rewriting_evaluation_stack[dep + 1] = grouppattern->sub_grouppattern[j].optional;
this->rewriting_evaluation_stack[dep + 1].grouppattern = grouppattern->sub_grouppattern[j].optional;
TempResultSet *temp = rewritingBasedQueryEvaluation(dep + 1);
@ -2525,9 +2541,6 @@ GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluati
sub_result = new_result;
}
if (has_changed)
this->result_filter.changeResultHashTable(sparql_query, -1);
}
for (int j = 0; j < (int)grouppattern->sub_grouppattern.size(); j++)
@ -2562,6 +2575,9 @@ GeneralEvaluation::TempResultSet* GeneralEvaluation::rewritingBasedQueryEvaluati
result = new_result;
}
delete this->rewriting_evaluation_stack[dep].sparql_query;
this->rewriting_evaluation_stack[dep].sparql_query = NULL;
}
return result;

View File

@ -22,7 +22,6 @@
#include "QueryTree.h"
#include "Varset.h"
#include "RegexExpression.h"
#include "ResultFilter.h"
#include "../Util/Triple.h"
class GeneralEvaluation
@ -34,7 +33,6 @@ class GeneralEvaluation
KVstore *kvstore;
StringIndex *stringindex;
Strategy strategy;
ResultFilter result_filter;
TYPE_TRIPLE_NUM *pre2num;
TYPE_PREDICATE_ID limitID_predicate;
@ -194,8 +192,15 @@ class GeneralEvaluation
};
private:
TempResultSet* temp_result;
std::vector<QueryTree::GroupPattern> rewriting_evaluation_stack;
TempResultSet *temp_result;
struct EvaluationStackStruct
{
QueryTree::GroupPattern grouppattern;
SPARQLquery *sparql_query;
vector<vector<string> > encode_varset;
};
std::vector<EvaluationStackStruct> rewriting_evaluation_stack;
public:
TempResultSet* semanticBasedQueryEvaluation(QueryTree::GroupPattern &grouppattern);

View File

@ -97,11 +97,6 @@ bool StringIndexFile::randomAccess(unsigned id, string *str)
this->buffer[length] = '\0';
*str = string(this->buffer);
//cout<<"check: read from string index - "<<id<<" "<<*str<<endl;
//if(*str == "")
//{
//cout<<"ERROR in StringIndex - "<<id<<endl;
//}
return true;
}
@ -113,9 +108,16 @@ void StringIndexFile::addRequest(unsigned id, std::string *str)
void StringIndexFile::trySequenceAccess()
{
long max_end = 0;
long min_begin = -1, max_end = 0;
for (int i = 0; i < (int)this->request.size(); i++)
{
if (min_begin == -1)
min_begin = this->request[i].offset;
else
min_begin = min(min_begin, this->request[i].offset);
max_end = max(max_end, this->request[i].offset + long(this->request[i].length));
}
if (this->type == Entity)
cout << "Entity StringIndex ";
@ -123,37 +125,43 @@ void StringIndexFile::trySequenceAccess()
cout << "Literal StringIndex ";
if (this->type == Predicate)
cout << "Predicate StringIndex ";
if (max_end / (long)1e5 < (long)this->request.size())
if ((max_end - min_begin) / 800000L < (long)this->request.size())
{
cout << "sequence access." << endl;
sort(this->request.begin(), this->request.end());
int pos = 0;
fseek(this->value_file, 0, SEEK_SET);
char *block = new char[MAX_BLOCK_SIZE];
long current_block_begin = 0;
long current_block_begin = min_begin;
fseek(this->value_file, current_block_begin, SEEK_SET);
while (current_block_begin < max_end)
{
long current_block_end = min(current_block_begin + MAX_BLOCK_SIZE, max_end);
if (current_block_end <= this->request[pos].offset)
{
current_block_begin = this->request[pos].offset;
fseek(this->value_file, current_block_begin, SEEK_SET);
current_block_end = min(current_block_begin + MAX_BLOCK_SIZE, max_end);
}
fread(block, sizeof(char), current_block_end - current_block_begin, this->value_file);
while (pos < (int)this->request.size())
{
long offset = this->request[pos].offset;
long length = this->request[pos].length;
if (offset >= current_block_end)
break;
if (current_block_begin <= offset && offset + length <= current_block_end)
{
allocBuffer(length);
memcpy(this->buffer, &block[offset - current_block_begin], length);
this->buffer[length] = '\0';
*this->request[pos].str = string(this->buffer);
//if(string(this->buffer) == "")
//{
//cout<<"Error in StringIndex"<<endl;
//}
pos++;
}
else if (current_block_begin <= offset)
@ -163,10 +171,6 @@ void StringIndexFile::trySequenceAccess()
memcpy(this->buffer, &block[offset - current_block_begin], length);
this->buffer[length] = '\0';
*this->request[pos].str = string(this->buffer);
//if(string(this->buffer) == "")
//{
//cout<<"Error in StringIndex"<<endl;
//}
break;
}
else if (offset + length <= current_block_end)
@ -180,10 +184,6 @@ void StringIndexFile::trySequenceAccess()
while (pos < (int)this->request.size() && this->request[pos - 1].offset == this->request[pos].offset)
{
*this->request[pos].str = *this->request[pos - 1].str;
//if(*this->request[pos].str == "")
//{
//cout<<"Error in StringIndex"<<endl;
//}
pos++;
}
}
@ -194,10 +194,6 @@ void StringIndexFile::trySequenceAccess()
memcpy(this->buffer, block, length);
this->buffer[length] = '\0';
*this->request[pos].str += string(this->buffer);
//if(*this->request[pos].str == "")
//{
//cout<<"Error in StringIndex"<<endl;
//}
break;
}
}
@ -221,14 +217,6 @@ void StringIndexFile::change(unsigned id, KVstore &kv_store)
{
//DEBUG: for predicate, -1 when invalid
if (id == INVALID) return;
//if(this->type == Predicate)
//{
//if (id < 0) return;
//}
//else
//{
//if (id == INVALID) return;
//}
if (this->num <= id)
{
@ -271,14 +259,6 @@ void StringIndexFile::disable(unsigned id)
{
//DEBUG: for predicate, -1 when invalid
if (id >= this->num) return ;
//if(this->type == Predicate)
//{
//if (id < 0 || id >= this->num) return ;
//}
//else
//{
//if (id == INVALID) return;
//}
this->index_table[id] = IndexInfo();
@ -346,10 +326,6 @@ bool StringIndex::randomAccess(unsigned id, string *str, bool is_entity_or_liter
{
return true;
}
//else
//{
//cout<<"check: not found in string buffer - "<<id<<endl;
//}
if (id < Util::LITERAL_FIRST_ID)
{
@ -357,7 +333,6 @@ bool StringIndex::randomAccess(unsigned id, string *str, bool is_entity_or_liter
}
else
{
//cout<<"check: to search literal "<<id-Util::LITERAL_FIRST_ID<<endl;
return this->literal.randomAccess(id - Util::LITERAL_FIRST_ID, str);
}
}

View File

@ -78,7 +78,7 @@ kvstoreobj = $(objdir)KVstore.o $(sitreeobj) $(istreeobj) $(ivtreeobj) #$(sstree
utilobj = $(objdir)Util.o $(objdir)Bstr.o $(objdir)Stream.o $(objdir)Triple.o $(objdir)BloomFilter.o $(objdir)VList.o
queryobj = $(objdir)SPARQLquery.o $(objdir)BasicQuery.o $(objdir)ResultSet.o $(objdir)IDList.o \
$(objdir)Varset.o $(objdir)QueryTree.o $(objdir)ResultFilter.o $(objdir)GeneralEvaluation.o
$(objdir)Varset.o $(objdir)QueryTree.o $(objdir)GeneralEvaluation.o
signatureobj = $(objdir)SigEntry.o $(objdir)Signature.o
@ -277,7 +277,7 @@ $(objdir)Join.o: Database/Join.cpp Database/Join.h $(objdir)IDList.o $(objdir)Ba
$(CC) $(CFLAGS) Database/Join.cpp $(inc) -o $(objdir)Join.o
$(objdir)Strategy.o: Database/Strategy.cpp Database/Strategy.h $(objdir)SPARQLquery.o $(objdir)BasicQuery.o \
$(objdir)Triple.o $(objdir)IDList.o $(objdir)KVstore.o $(objdir)VSTree.o $(objdir)Util.o $(objdir)Join.o $(objdir)ResultFilter.o
$(objdir)Triple.o $(objdir)IDList.o $(objdir)KVstore.o $(objdir)VSTree.o $(objdir)Util.o $(objdir)Join.o
$(CC) $(CFLAGS) Database/Strategy.cpp $(inc) -o $(objdir)Strategy.o
#objects in Database/ end
@ -303,12 +303,9 @@ $(objdir)Varset.o: Query/Varset.cpp Query/Varset.h
$(objdir)QueryTree.o: Query/QueryTree.cpp Query/QueryTree.h $(objdir)Varset.o
$(CC) $(CFLAGS) Query/QueryTree.cpp $(inc) -o $(objdir)QueryTree.o
$(objdir)ResultFilter.o: Query/ResultFilter.cpp Query/ResultFilter.h $(objdir)BasicQuery.o $(objdir)SPARQLquery.o $(objdir)Util.o
$(CC) $(CFLAGS) Query/ResultFilter.cpp $(inc) -o $(objdir)ResultFilter.o
#no more using $(objdir)Database.o
$(objdir)GeneralEvaluation.o: Query/GeneralEvaluation.cpp Query/GeneralEvaluation.h $(objdir)QueryParser.o $(objdir)QueryTree.o \
$(objdir)SPARQLquery.o $(objdir)Varset.o $(objdir)KVstore.o $(objdir)ResultFilter.o $(objdir)Strategy.o $(objdir)StringIndex.o
$(objdir)SPARQLquery.o $(objdir)Varset.o $(objdir)KVstore.o $(objdir)Strategy.o $(objdir)StringIndex.o
$(CC) $(CFLAGS) Query/GeneralEvaluation.cpp $(inc) -o $(objdir)GeneralEvaluation.o
#objects in Query/ end