refactor: merge code of chenjiaqi;

implement Bind and GroupBy in SPARQL;

by zengli
This commit is contained in:
bookug 2017-07-14 16:42:47 +08:00
parent a2cce262ad
commit 9ca106ca7e
16 changed files with 2757 additions and 2240 deletions

View File

@ -739,12 +739,12 @@ string tstr;
//cout<<"string index: "<<tstr<<endl;
////cout<<"kvstore: "<<this->kvstore->getPredicateByID(pid)<<endl;
cout<<"right pair: "<<62<<" "<<"<http://www.Department0.University0.edu/GraduateCourse11>"<<endl;
unsigned sid = this->kvstore->getIDByEntity("<http://www.Department0.University0.edu/GraduateCourse11>");
cout<<"check: sub "<<sid<<endl;
this->stringindex->randomAccess(sid, &tstr, true);
cout<<"string index: "<<tstr<<endl;
cout<<"kvstore: "<<this->kvstore->getEntityByID(sid)<<endl;
//cout<<"right pair: "<<62<<" "<<"<http://www.Department0.University0.edu/GraduateCourse11>"<<endl;
//unsigned sid = this->kvstore->getIDByEntity("<http://www.Department0.University0.edu/GraduateCourse11>");
//cout<<"check: sub "<<sid<<endl;
//this->stringindex->randomAccess(sid, &tstr, true);
//cout<<"string index: "<<tstr<<endl;
//cout<<"kvstore: "<<this->kvstore->getEntityByID(sid)<<endl;
//unsigned oid = this->kvstore->getIDByString("<http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#Course>");
//cout<<"check: obj "<<oid<<endl;
@ -881,7 +881,7 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp)
long tv_begin = Util::get_cur_time();
if (!general_evaluation.parseQuery(_query))
return false;
return -101;
long tv_parse = Util::get_cur_time();
cout << "after Parsing, used " << (tv_parse - tv_begin) << "ms." << endl;
@ -889,6 +889,7 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp)
//for select, -100 by default, -101 means error
//for update, non-negative means true(and the num is updated triples num), -1 means error
int success_num = -100;
bool need_output_answer = false;
//Query
if (general_evaluation.getQueryTree().getUpdateType() == QueryTree::Not_Update)
@ -905,7 +906,8 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp)
cout << "after getFinalResult, used " << (tv_afget - tv_bfget) << "ms." << endl;
if(_fp != NULL)
general_evaluation.setNeedOutputAnswer();
need_output_answer = true;
//general_evaluation.setNeedOutputAnswer();
}
//Update
else
@ -919,22 +921,23 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp)
QueryTree::GroupPattern &update_pattern = general_evaluation.getQueryTree().getUpdateType() == QueryTree::Insert_Data ?
general_evaluation.getQueryTree().getInsertPatterns() : general_evaluation.getQueryTree().getDeletePatterns();
update_triple_num = update_pattern.patterns.size();
update_triple_num = update_pattern.sub_grouppattern.size();
update_triple = new TripleWithObjType[update_triple_num];
//for (int i = 0; i < (int)update_pattern.patterns.size(); i++)
for (TYPE_TRIPLE_NUM i = 0; i < update_triple_num; ++i)
{
TripleWithObjType::ObjectType object_type = TripleWithObjType::None;
if (update_pattern.patterns[i].object.value[0] == '<')
object_type = TripleWithObjType::Entity;
else
object_type = TripleWithObjType::Literal;
for (TYPE_TRIPLE_NUM i = 0; i < update_triple_num; i++)
if (update_pattern.sub_grouppattern[i].type == QueryTree::GroupPattern::SubGroupPattern::Pattern_type)
{
TripleWithObjType::ObjectType object_type = TripleWithObjType::None;
if (update_pattern.sub_grouppattern[i].pattern.object.value[0] == '<')
object_type = TripleWithObjType::Entity;
else
object_type = TripleWithObjType::Literal;
update_triple[i] = TripleWithObjType(update_pattern.patterns[i].subject.value,
update_pattern.patterns[i].predicate.value,
update_pattern.patterns[i].object.value, object_type);
}
update_triple[i] = TripleWithObjType(update_pattern.sub_grouppattern[i].pattern.subject.value,
update_pattern.sub_grouppattern[i].pattern.predicate.value,
update_pattern.sub_grouppattern[i].pattern.object.value, object_type);
}
else throw "Database::query failed";
if (general_evaluation.getQueryTree().getUpdateType() == QueryTree::Insert_Data)
{
@ -963,18 +966,19 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp)
}
}
general_evaluation.releaseResultStack();
general_evaluation.releaseResult();
delete[] update_triple;
}
long tv_final = Util::get_cur_time();
cout << "Total time used: " << (tv_final - tv_begin) << "ms." << endl;
if (general_evaluation.needOutputAnswer())
//if (general_evaluation.needOutputAnswer())
if (need_output_answer)
{
unsigned ans_num = max((long long)_result_set.ansNum - _result_set.output_offset, (long long)0);
long long ans_num = max((long long)_result_set.ansNum - _result_set.output_offset, 0LL);
if (_result_set.output_limit != -1)
ans_num = min(ans_num, _result_set.output_limit);
ans_num = min(ans_num, (long long)_result_set.output_limit);
cout << "There has answer: " << ans_num << endl;
cout << "final result is : " << endl;
_result_set.output(_fp);
@ -985,6 +989,7 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp)
#ifdef DEBUG
cout<<"query success_num: "<<success_num<<endl;
#endif
//cout<<"to check: "<<this->kvstore->getEntityByID(0)<<endl;
return success_num;
}

View File

@ -2,7 +2,7 @@
# Filename: QueryParser.cpp
# Author: Jiaqi, Chen
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-07-14
# Last Modified: 2017-03-13
# Description: implement functions in QueryParser.h
=============================================================================*/
@ -10,29 +10,20 @@
using namespace std;
QueryParser::QueryParser()
{
_prefix_map.clear();
}
void QueryParser::SPARQLParse(const string &query, QueryTree &querytree)
{
//uncompress before use
dfa34_Table_uncompress();
pANTLR3_INPUT_STREAM input;
pSparqlLexer lex;
pANTLR3_COMMON_TOKEN_STREAM tokens;
pSparqlParser parser;
input = antlr3StringStreamNew((ANTLR3_UINT8 *)(query.c_str()), ANTLR3_ENC_UTF8, query.length(), (ANTLR3_UINT8 *)"QueryString");
pANTLR3_INPUT_STREAM input = antlr3StringStreamNew((ANTLR3_UINT8 *)(query.c_str()), ANTLR3_ENC_UTF8, query.length(), (ANTLR3_UINT8 *) "QueryString");
//input = antlr3FileStreamNew((pANTLR3_UINT8)filePath,ANTLR3_ENC_8BIT);
lex = SparqlLexerNew(input);
tokens = antlr3CommonTokenStreamSourceNew(ANTLR3_SIZE_HINT,TOKENSOURCE(lex));
parser = SparqlParserNew(tokens);
pSparqlLexer lex = SparqlLexerNew(input);
pANTLR3_COMMON_TOKEN_STREAM tokens = antlr3CommonTokenStreamSourceNew(ANTLR3_SIZE_HINT,TOKENSOURCE(lex));
pSparqlParser parser = SparqlParserNew(tokens);
SparqlParser_workload_return r = parser->workload(parser);
pANTLR3_BASE_TREE root = r.tree;
SparqlParser_workload_return workload_ret = parser->workload(parser);
pANTLR3_BASE_TREE root = workload_ret.tree;
if (printNode(root) > 0)
throw "[ERROR] Some errors are found in the SPARQL query request.";
@ -49,20 +40,20 @@ void QueryParser::SPARQLParse(const string &query, QueryTree &querytree)
int QueryParser::printNode(pANTLR3_BASE_TREE node, int dep)
{
const char* s = (const char*) node->getText(node)->chars;
const char *s = (const char *) node->getText(node)->chars;
ANTLR3_UINT32 treeType = node->getType(node);
int hasErrorNode = 0;
if (treeType == 0) hasErrorNode = 1;
int hasErrorNode = (treeType == 0 ? 1 : 0);
for (int i = 0; i < dep; i++) printf(" ");
printf("%d: %s\n",treeType,s);
for (int t = 0; t < dep; t++) printf("\t");
printf("%d: %s\n", treeType, s);
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, i);
hasErrorNode += printNode(childNode, dep + 1);
}
return hasErrorNode;
}
@ -77,9 +68,8 @@ void QueryParser::parseWorkload(pANTLR3_BASE_TREE node, QueryTree &querytree)
{
parseQuery(childNode, querytree);
}
else
//update 196
if (childNode->getType(childNode) == 196)
else if (childNode->getType(childNode) == 196)
{
parseUpdate(childNode, querytree);
}
@ -98,42 +88,41 @@ void QueryParser::parseQuery(pANTLR3_BASE_TREE node, QueryTree &querytree)
{
parsePrologue(childNode);
}
else
//select 155
if (childNode->getType(childNode) == 155)
else if (childNode->getType(childNode) == 155)
{
querytree.setQueryForm(QueryTree::Select_Query);
parseQuery(childNode, querytree);
}
else
//ask 13
if (childNode->getType(childNode) == 13)
else if (childNode->getType(childNode) == 13)
{
querytree.setQueryForm(QueryTree::Ask_Query);
querytree.setProjectionAsterisk();
parseQuery(childNode, querytree);
}
else
//select clause 156
if (childNode->getType(childNode) == 156)
else if (childNode->getType(childNode) == 156)
{
parseSelectClause(childNode, querytree);
}
else
//group graph pattern 77
if (childNode->getType(childNode) == 77)
else if (childNode->getType(childNode) == 77)
{
parseGroupPattern(childNode, querytree.getGroupPattern());
}
else
//group by 75
else if (childNode->getType(childNode) == 75)
{
parseGroupBy(childNode, querytree);
}
//order by 127
if (childNode->getType(childNode) == 127)
else if (childNode->getType(childNode) == 127)
{
parseOrderBy(childNode, querytree);
}
else
//offset 120 limit 102
if (childNode->getType(childNode) == 120 || childNode->getType(childNode) == 102)
else if (childNode->getType(childNode) == 120 || childNode->getType(childNode) == 102)
{
pANTLR3_BASE_TREE gchildNode = (pANTLR3_BASE_TREE) childNode->getChild(childNode, 0);
@ -181,6 +170,7 @@ void QueryParser::parsePrefix(pANTLR3_BASE_TREE node)
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, i);
//prefix namespace 136
if (childNode->getType(childNode) == 136)
parseString(childNode, key, 0);
@ -189,7 +179,8 @@ void QueryParser::parsePrefix(pANTLR3_BASE_TREE node)
if (childNode->getType(childNode) == 89)
parseString(childNode, value, 0);
}
_prefix_map.insert(make_pair(key, value));
this->prefix_map[key] = value;
}
void QueryParser::replacePrefix(string &str)
@ -198,20 +189,21 @@ void QueryParser::replacePrefix(string &str)
{
int sep = str.find(":");
if (sep == -1) return;
string prefix = str.substr(0, sep + 1);
//blank node
if (prefix == "_:") return;
cout << "prefix = " << prefix << endl;
if (_prefix_map.find(prefix) != _prefix_map.end())
printf("prefix = %s\n", prefix.c_str());
if (this->prefix_map.count(prefix) != 0)
{
str = _prefix_map[prefix].substr(0, _prefix_map[prefix].length() - 1) + str.substr(sep + 1 ,str.length() - sep - 1) + ">";
cout << "str = " << str << endl;
str = this->prefix_map[prefix].substr(0, this->prefix_map[prefix].length() - 1) + str.substr(sep + 1 ,str.length() - sep - 1) + ">";
printf("str = %s\n",str.c_str());
}
else
{
cout << "prefix not found..." << endl;
printf("prefix not found...\n");
throw "[ERROR] Prefix is not found, please define it before use.";
}
}
@ -254,11 +246,10 @@ void QueryParser::parseSelectVar(pANTLR3_BASE_TREE node, QueryTree &querytree)
if (childNode->getType(childNode) == 200)
{
querytree.addProjectionVar();
QueryTree::ProjectionVar &var = querytree.getLastProjectionVar();
var.aggregate_type = QueryTree::ProjectionVar::None_type;
QueryTree::ProjectionVar &proj_var = querytree.getLastProjectionVar();
proj_var.aggregate_type = QueryTree::ProjectionVar::None_type;
parseString(childNode, var.var, 0);
querytree.addProjectionUsefulVar(var.var);
parseString(childNode, proj_var.var, 0);
}
}
}
@ -283,7 +274,7 @@ void QueryParser::parseSelectAggregateFunction(pANTLR3_BASE_TREE node, QueryTree
childNode = (pANTLR3_BASE_TREE) childNode->getChild(childNode, 0);
//count 39
if (childNode->getType(childNode) != 39)
throw "[ERROR] The supported aggregate functions now is COUNT only.";
throw "[ERROR] The supported aggregate function now is COUNT only.";
bool distinct = false;
pANTLR3_BASE_TREE gchildNode = (pANTLR3_BASE_TREE) childNode->getChild(childNode, 0);
@ -299,26 +290,25 @@ void QueryParser::parseSelectAggregateFunction(pANTLR3_BASE_TREE node, QueryTree
while (gchildNode->getType(gchildNode) == 190)
gchildNode = (pANTLR3_BASE_TREE) gchildNode->getChild(gchildNode, 0);
if (gchildNode->getType(gchildNode) != 200 && gchildNode->getType(gchildNode) != 14)
throw "[ERROR] The aggregate function COUNT can accepts only one var or *.";
throw "[ERROR] The aggregate function COUNT can accept only one var or *.";
querytree.addProjectionVar();
QueryTree::ProjectionVar &var = querytree.getLastProjectionVar();
var.aggregate_type = QueryTree::ProjectionVar::Count_type;
var.distinct = distinct;
QueryTree::ProjectionVar &proj_var = querytree.getLastProjectionVar();
proj_var.aggregate_type = QueryTree::ProjectionVar::Count_type;
proj_var.distinct = distinct;
if (gchildNode->getType(gchildNode) == 200)
{
parseString(gchildNode, var.aggregate_var, 0);
querytree.addProjectionUsefulVar(var.aggregate_var);
parseString(gchildNode, proj_var.aggregate_var, 0);
}
if (gchildNode->getType(gchildNode) == 14)
{
parseString(gchildNode, var.aggregate_var, 0); //for convenience, set aggregate_var *
parseString(gchildNode, proj_var.aggregate_var, 0); //for convenience, set aggregate_var *
querytree.setProjectionAsterisk();
}
childNode = (pANTLR3_BASE_TREE) node->getChild(node, 1);
parseString(childNode, var.var, 0);
parseString(childNode, proj_var.var, 0);
}
void QueryParser::parseGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern)
@ -354,6 +344,12 @@ void QueryParser::parseGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPatt
parseFilter(childNode, grouppattern);
}
//bind 17
if (childNode->getType(childNode) == 17)
{
parseBind(childNode, grouppattern);
}
//group graph pattern 77
//redundant {}
if (childNode->getType(childNode) == 77)
@ -368,7 +364,6 @@ void QueryParser::parsePattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &
printf("parsePattern\n");
string subject, predicate, object;
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, i);
@ -384,6 +379,7 @@ void QueryParser::parsePattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &
if (childNode->getType(childNode) == 142)
{
pANTLR3_BASE_TREE gchildNode = (pANTLR3_BASE_TREE) childNode->getChild(childNode, 0);
//var 200
if (gchildNode->getType(gchildNode) == 200)
{
@ -407,18 +403,30 @@ void QueryParser::parsePattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &
if (i != 0 && i % 2 == 0) //triples same subject
{
grouppattern.addOnePattern(QueryTree::GroupPattern::Pattern( QueryTree::GroupPattern::Pattern::Element(subject),
QueryTree::GroupPattern::Pattern::Element(predicate),
QueryTree::GroupPattern::Pattern::Element(object)));
grouppattern.addOnePattern(QueryTree::GroupPattern::Pattern(QueryTree::GroupPattern::Pattern::Element(subject),
QueryTree::GroupPattern::Pattern::Element(predicate),
QueryTree::GroupPattern::Pattern::Element(object)));
//scope of filter
for (int j = (int)grouppattern.sub_grouppattern.size() - 1; j > 0; j--)
if (grouppattern.sub_grouppattern[j].type == QueryTree::GroupPattern::SubGroupPattern::Pattern_type &&
grouppattern.sub_grouppattern[j - 1].type == QueryTree::GroupPattern::SubGroupPattern::Filter_type)
{
QueryTree::GroupPattern::SubGroupPattern tmp(grouppattern.sub_grouppattern[j - 1]);
grouppattern.sub_grouppattern[j - 1] = grouppattern.sub_grouppattern[j];
grouppattern.sub_grouppattern[j] = tmp;
}
else break;
}
}
}
void QueryParser::parseOptionalOrMinus(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern)
{
//optional 124 minus 108
//optional 124
if (node->getType(node) == 124)
printf("parseOptional\n");
//minus 108
else if (node->getType(node) == 108)
printf("parseMinus\n");
@ -430,11 +438,11 @@ void QueryParser::parseOptionalOrMinus(pANTLR3_BASE_TREE node, QueryTree::GroupP
if (childNode->getType(childNode) == 77)
{
if (node->getType(node) == 124)
grouppattern.addOneOptionalOrMinus('o');
grouppattern.addOneOptional(QueryTree::GroupPattern::SubGroupPattern::Optional_type);
else if (node->getType(node) == 108)
grouppattern.addOneOptionalOrMinus('m');
grouppattern.addOneOptional(QueryTree::GroupPattern::SubGroupPattern::Minus_type);
parseGroupPattern(childNode, grouppattern.getLastOptionalOrMinus());
parseGroupPattern(childNode, grouppattern.getLastOptional());
}
}
}
@ -474,54 +482,58 @@ void QueryParser::parseFilter(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &g
while (childNode->getType(childNode) == 190)
childNode = (pANTLR3_BASE_TREE) childNode->getChild(childNode, 0);
grouppattern.addOneFilterTree();
parseFilterTree(childNode, grouppattern, grouppattern.getLastFilterTree());
grouppattern.addOneFilter();
parseFilterTree(childNode, grouppattern.getLastFilter().root);
}
}
void QueryParser::parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern, QueryTree::GroupPattern::FilterTreeNode &filter)
void QueryParser::parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::GroupPattern::FilterTree::FilterTreeNode &filter)
{
printf("parseFilterTree\n");
switch (node->getType(node))
{
//! 192
case 192: filter.oper_type = QueryTree::GroupPattern::FilterTreeNode::Not_type; break;
case 192: filter.oper_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::Not_type; break;
//not 115
case 115: filter.oper_type = QueryTree::GroupPattern::FilterTreeNode::Not_type; break;
case 115: filter.oper_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::Not_type; break;
//or 125
case 125: filter.oper_type = QueryTree::GroupPattern::FilterTreeNode::Or_type; break;
case 125: filter.oper_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::Or_type; break;
//and 8
case 8: filter.oper_type = QueryTree::GroupPattern::FilterTreeNode::And_type; break;
case 8: filter.oper_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::And_type; break;
//equal 62
case 62: filter.oper_type = QueryTree::GroupPattern::FilterTreeNode::Equal_type; break;
case 62: filter.oper_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::Equal_type; break;
//not equal 116
case 116: filter.oper_type = QueryTree::GroupPattern::FilterTreeNode::NotEqual_type; break;
case 116: filter.oper_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::NotEqual_type; break;
//less 100
case 100: filter.oper_type = QueryTree::GroupPattern::FilterTreeNode::Less_type; break;
case 100: filter.oper_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::Less_type; break;
//less equal 101
case 101: filter.oper_type = QueryTree::GroupPattern::FilterTreeNode::LessOrEqual_type; break;
case 101: filter.oper_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::LessOrEqual_type; break;
//greater 72
case 72: filter.oper_type = QueryTree::GroupPattern::FilterTreeNode::Greater_type; break;
case 72: filter.oper_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::Greater_type; break;
//greater equal 73
case 73: filter.oper_type = QueryTree::GroupPattern::FilterTreeNode::GreaterOrEqual_type; break;
case 73: filter.oper_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::GreaterOrEqual_type; break;
//regex 150
case 150: filter.oper_type = QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type; break;
case 150: filter.oper_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::Builtin_regex_type; break;
//str 167
case 167: filter.oper_type = QueryTree::GroupPattern::FilterTreeNode::Builtin_str_type; break;
case 167: filter.oper_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::Builtin_str_type; break;
//isIRI 92
case 92: filter.oper_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::Builtin_isiri_type; break;
//isURI 95
case 95: filter.oper_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::Builtin_isuri_type; break;
//isLiteral 93
case 93: filter.oper_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::Builtin_isliteral_type; break;
//isNumeric 94
case 94: filter.oper_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::Builtin_isnumeric_type; break;
//lang 96
case 96: filter.oper_type = QueryTree::GroupPattern::FilterTreeNode::Builtin_lang_type; break;
//langmatches 97
case 97: filter.oper_type = QueryTree::GroupPattern::FilterTreeNode::Builtin_langmatches_type; break;
case 96: filter.oper_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::Builtin_lang_type; break;
//langMatches 97
case 97: filter.oper_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::Builtin_langmatches_type; break;
//bound 23
case 23: filter.oper_type = QueryTree::GroupPattern::FilterTreeNode::Builtin_bound_type; break;
case 23: filter.oper_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::Builtin_bound_type; break;
//in 81
case 81: filter.oper_type = QueryTree::GroupPattern::FilterTreeNode::Builtin_in_type; break;
//exists 63
case 63: filter.oper_type = QueryTree::GroupPattern::FilterTreeNode::Builtin_exists_type; break;
//not exists 117
case 117: filter.oper_type = QueryTree::GroupPattern::FilterTreeNode::Not_type; break;
case 81: filter.oper_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::Builtin_in_type; break;
default:
return;
@ -536,10 +548,10 @@ void QueryParser::parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::GroupPatter
//in 81
if (childNode->getType(childNode) == 81)
{
filter.child.push_back(QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild());
filter.child[0].node_type = QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type;
filter.child[0].node.oper_type = QueryTree::GroupPattern::FilterTreeNode::Builtin_in_type;
parseVarInExpressionList(node, grouppattern, filter.child[0].node);
filter.child.push_back(QueryTree::GroupPattern::FilterTree::FilterTreeNode::FilterTreeChild());
filter.child[0].node_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::FilterTreeChild::Tree_type;
filter.child[0].node.oper_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::Builtin_in_type;
parseVarInExpressionList(node, filter.child[0].node);
return;
}
@ -548,27 +560,7 @@ void QueryParser::parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::GroupPatter
//in 81
if (node->getType(node) == 81)
{
parseVarInExpressionList(node, grouppattern, filter);
return;
}
//not exists 117
if (node->getType(node) == 117)
{
filter.child.push_back(QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild());
filter.child[0].node_type = QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type;
filter.child[0].node.oper_type = QueryTree::GroupPattern::FilterTreeNode::Builtin_exists_type;
parseExistsGroupPattern(node, grouppattern, filter.child[0].node);
return;
}
//exists 63
if (node->getType(node) == 63)
{
parseExistsGroupPattern(node, grouppattern, filter);
parseVarInExpressionList(node, filter);
return;
}
@ -581,35 +573,36 @@ void QueryParser::parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::GroupPatter
while (childNode->getType(childNode) == 190)
{
pANTLR3_BASE_TREE gchildNode = (pANTLR3_BASE_TREE) childNode->getChild(childNode, 0);
if (gchildNode->getChildCount(gchildNode) != 0)
childNode = gchildNode;
else break;
}
filter.child.push_back(QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild());
filter.child.push_back(QueryTree::GroupPattern::FilterTree::FilterTreeNode::FilterTreeChild());
//unary 190
if (childNode->getType(childNode) == 190)
{
filter.child[i].node_type = QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::String_type;
parseString(childNode, filter.child[i].arg, 1);
replacePrefix(filter.child[i].arg);
filter.child[i].node_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::FilterTreeChild::String_type;
parseString(childNode, filter.child[i].str, 1);
replacePrefix(filter.child[i].str);
}
else if (childNode->getChildCount(childNode) == 0)
{
filter.child[i].node_type = QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::String_type;
parseString(childNode, filter.child[i].arg, 0);
replacePrefix(filter.child[i].arg);
filter.child[i].node_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::FilterTreeChild::String_type;
parseString(childNode, filter.child[i].str, 0);
replacePrefix(filter.child[i].str);
}
else
{
filter.child[i].node_type = QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type;
parseFilterTree(childNode, grouppattern, filter.child[i].node);
filter.child[i].node_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::FilterTreeChild::Tree_type;
parseFilterTree(childNode, filter.child[i].node);
}
}
}
void QueryParser::parseVarInExpressionList(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern, QueryTree::GroupPattern::FilterTreeNode &filter)
void QueryParser::parseVarInExpressionList(pANTLR3_BASE_TREE node, QueryTree::GroupPattern::FilterTree::FilterTreeNode &filter)
{
printf("parseVarInExpressionList\n");
@ -628,31 +621,32 @@ void QueryParser::parseVarInExpressionList(pANTLR3_BASE_TREE node, QueryTree::Gr
while (childNode->getType(childNode) == 190)
{
pANTLR3_BASE_TREE gchildNode = (pANTLR3_BASE_TREE) childNode->getChild(childNode, 0);
if (gchildNode->getChildCount(gchildNode) != 0)
childNode = gchildNode;
else break;
}
int last = filter.child.size();
filter.child.push_back(QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild());
filter.child.push_back(QueryTree::GroupPattern::FilterTree::FilterTreeNode::FilterTreeChild());
//unary 190
if (childNode->getType(childNode) == 190)
{
filter.child[last].node_type = QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::String_type;
parseString(childNode, filter.child[last].arg, 1);
replacePrefix(filter.child[last].arg);
filter.child[last].node_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::FilterTreeChild::String_type;
parseString(childNode, filter.child[last].str, 1);
replacePrefix(filter.child[last].str);
}
else if (childNode->getChildCount(childNode) == 0)
{
filter.child[last].node_type = QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::String_type;
parseString(childNode, filter.child[last].arg, 0);
replacePrefix(filter.child[last].arg);
filter.child[last].node_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::FilterTreeChild::String_type;
parseString(childNode, filter.child[last].str, 0);
replacePrefix(filter.child[last].str);
}
else
{
filter.child[last].node_type = QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type;
parseFilterTree(childNode, grouppattern, filter.child[last].node);
filter.child[last].node_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::FilterTreeChild::Tree_type;
parseFilterTree(childNode, filter.child[last].node);
}
}
@ -667,51 +661,102 @@ void QueryParser::parseVarInExpressionList(pANTLR3_BASE_TREE node, QueryTree::Gr
while (gchildNode->getType(gchildNode) == 190)
{
pANTLR3_BASE_TREE ggchildNode = (pANTLR3_BASE_TREE) gchildNode->getChild(gchildNode, 0);
if (ggchildNode->getChildCount(ggchildNode) != 0)
gchildNode = ggchildNode;
else break;
}
int last = filter.child.size();
filter.child.push_back(QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild());
filter.child.push_back(QueryTree::GroupPattern::FilterTree::FilterTreeNode::FilterTreeChild());
//unary 190
if (gchildNode->getType(gchildNode) == 190)
{
filter.child[last].node_type = QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::String_type;
parseString(gchildNode, filter.child[last].arg, 1);
replacePrefix(filter.child[last].arg);
filter.child[last].node_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::FilterTreeChild::String_type;
parseString(gchildNode, filter.child[last].str, 1);
replacePrefix(filter.child[last].str);
}
else if (gchildNode->getChildCount(gchildNode) == 0)
{
filter.child[last].node_type = QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::String_type;
parseString(gchildNode, filter.child[last].arg, 0);
replacePrefix(filter.child[last].arg);
filter.child[last].node_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::FilterTreeChild::String_type;
parseString(gchildNode, filter.child[last].str, 0);
replacePrefix(filter.child[last].str);
}
else
{
filter.child[last].node_type = QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type;
parseFilterTree(gchildNode, grouppattern, filter.child[last].node);
filter.child[last].node_type = QueryTree::GroupPattern::FilterTree::FilterTreeNode::FilterTreeChild::Tree_type;
parseFilterTree(gchildNode, filter.child[last].node);
}
}
}
}
}
void QueryParser::parseExistsGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern, QueryTree::GroupPattern::FilterTreeNode &filter)
void QueryParser::parseBind(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern)
{
printf("parseExistsGroupPattern\n");
printf("parseBind\n");
string str, var;
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, 0);
//group graph pattern 77
if (childNode->getType(childNode) == 77)
//unary 190
while (childNode->getType(childNode) == 190)
{
grouppattern.addOneExistsGroupPattern();
filter.exists_grouppattern_id = (int)grouppattern.filter_exists_grouppatterns[(int)grouppattern.filter_exists_grouppatterns.size() - 1].size() - 1;
parseGroupPattern(childNode, grouppattern.getLastExistsGroupPattern());
pANTLR3_BASE_TREE gchildNode = (pANTLR3_BASE_TREE) childNode->getChild(childNode, 0);
return;
if (gchildNode->getChildCount(gchildNode) != 0)
childNode = gchildNode;
else break;
}
//unary 190
if (childNode->getType(childNode) == 190)
{
parseString(childNode, str, 1);
replacePrefix(str);
}
else if (childNode->getChildCount(childNode) == 0)
{
parseString(childNode, str, 0);
replacePrefix(str);
}
else
{
throw "[ERROR] The BIND operator can't assign an expression to a var.";
}
childNode = (pANTLR3_BASE_TREE) node->getChild(node, 1);
//as 11
if (childNode->getType(childNode) == 11)
{
parseString(childNode, var, 1);
}
grouppattern.addOneBind();
grouppattern.getLastBind() = QueryTree::GroupPattern::Bind(str, var);
}
void QueryParser::parseGroupBy(pANTLR3_BASE_TREE node, QueryTree &querytree)
{
printf("parseGroupBy\n");
for (unsigned int i = 0; i < node->getChildCount(node); i++)
{
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, i);
//var 200
if (childNode->getType(childNode) == 200)
{
string var;
parseString(childNode, var, 0);
querytree.addGroupByVar(var);
}
else
throw "[ERROR] The supported GROUP BY key is var only.";
}
}
@ -728,9 +773,10 @@ void QueryParser::parseOrderBy(pANTLR3_BASE_TREE node, QueryTree &querytree)
{
string var;
bool desending = false;
for (unsigned int k = 0; k < childNode->getChildCount(childNode); k++)
for (unsigned int j = 0; j < childNode->getChildCount(childNode); j++)
{
pANTLR3_BASE_TREE gchildNode = (pANTLR3_BASE_TREE) childNode->getChild(childNode, k);
pANTLR3_BASE_TREE gchildNode = (pANTLR3_BASE_TREE) childNode->getChild(childNode, j);
//unary 190
while (gchildNode->getType(gchildNode) == 190)
@ -750,7 +796,7 @@ void QueryParser::parseOrderBy(pANTLR3_BASE_TREE node, QueryTree &querytree)
}
if (var.length() > 0)
querytree.addOrder(var, desending);
querytree.addOrderVar(var, desending);
}
}
}
@ -759,7 +805,7 @@ void QueryParser::parseString(pANTLR3_BASE_TREE node, string &str, int dep)
{
if (dep == 0)
{
str = (const char*) node->getText(node)->chars;
str = (const char *) node->getText(node)->chars;
return;
}
@ -789,7 +835,7 @@ void QueryParser::parseString(pANTLR3_BASE_TREE node, string &str, int dep)
//'''''' 172
//"""""" 173
string substr = (const char*) childNode->getText(childNode)->chars;
string substr = (const char *) childNode->getText(childNode)->chars;
if (childNode->getType(childNode) == 170)
substr = "\"" + substr.substr(1, substr.length() - 2) + "\"";
if (childNode->getType(childNode) == 172)
@ -827,9 +873,8 @@ void QueryParser::parseUpdate(pANTLR3_BASE_TREE node, QueryTree &querytree)
{
parsePrologue(childNode);
}
else
//insert 82
if (childNode->getType(childNode) == 82)
else if (childNode->getType(childNode) == 82)
{
//INSERT
//DATA
@ -838,13 +883,13 @@ void QueryParser::parseUpdate(pANTLR3_BASE_TREE node, QueryTree &querytree)
querytree.setUpdateType(QueryTree::Insert_Data);
pANTLR3_BASE_TREE gchildNode = (pANTLR3_BASE_TREE) childNode->getChild(childNode, 1);
//triples template 186
if (gchildNode->getType(gchildNode) == 186)
parseTripleTemplate(gchildNode, querytree.getInsertPatterns());
}
else
//delete 48
if (childNode->getType(childNode) == 48 && childNode->getChildCount(childNode) > 0)
else if (childNode->getType(childNode) == 48 && childNode->getChildCount(childNode) > 0)
{
//DELETE
//DELETE
@ -852,6 +897,7 @@ void QueryParser::parseUpdate(pANTLR3_BASE_TREE node, QueryTree &querytree)
//TRIPLES_TEMPLATE
pANTLR3_BASE_TREE gchildNode = (pANTLR3_BASE_TREE) childNode->getChild(childNode, 0);
//data 41
if (gchildNode->getType(gchildNode) == 41)
{
@ -862,9 +908,8 @@ void QueryParser::parseUpdate(pANTLR3_BASE_TREE node, QueryTree &querytree)
if (gchildNode->getType(gchildNode) == 186)
parseTripleTemplate(gchildNode, querytree.getDeletePatterns());
}
else
//where 203
if (gchildNode->getType(gchildNode) == 203)
else if (gchildNode->getType(gchildNode) == 203)
{
querytree.setUpdateType(QueryTree::Delete_Where);
@ -877,9 +922,8 @@ void QueryParser::parseUpdate(pANTLR3_BASE_TREE node, QueryTree &querytree)
}
}
}
else
//modify 110
if (childNode->getType(childNode) == 110)
else if (childNode->getType(childNode) == 110)
{
parseModify(childNode, querytree);
}
@ -937,9 +981,9 @@ void QueryParser::parseTripleTemplate(pANTLR3_BASE_TREE node, QueryTree::GroupPa
if (j != 0 && j % 2 == 0) //triples same subject
{
grouppattern.addOnePattern(QueryTree::GroupPattern::Pattern( QueryTree::GroupPattern::Pattern::Element(subject),
QueryTree::GroupPattern::Pattern::Element(predicate),
QueryTree::GroupPattern::Pattern::Element(object)));
grouppattern.addOnePattern(QueryTree::GroupPattern::Pattern(QueryTree::GroupPattern::Pattern::Element(subject),
QueryTree::GroupPattern::Pattern::Element(predicate),
QueryTree::GroupPattern::Pattern::Element(object)));
}
}
}
@ -968,27 +1012,24 @@ void QueryParser::parseModify(pANTLR3_BASE_TREE node, QueryTree &querytree)
{
querytree.setUpdateType(QueryTree::Delete_Clause);
}
else
//insert 82
if (childNode->getType(childNode) == 82)
else if (childNode->getType(childNode) == 82)
{
if (querytree.getUpdateType() == QueryTree::Not_Update)
querytree.setUpdateType(QueryTree::Insert_Clause);
else if (querytree.getUpdateType() == QueryTree::Delete_Clause)
querytree.setUpdateType(QueryTree::Modify_Clause);
}
else
//triples template 186
if (childNode->getType(childNode) == 186)
else if (childNode->getType(childNode) == 186)
{
if (querytree.getUpdateType() == QueryTree::Delete_Clause)
parseTripleTemplate(childNode, querytree.getDeletePatterns());
else if (querytree.getUpdateType() == QueryTree::Insert_Clause || querytree.getUpdateType() == QueryTree::Modify_Clause)
parseTripleTemplate(childNode, querytree.getInsertPatterns());
}
else
//where 203
if (childNode->getType(childNode) == 203)
else if (childNode->getType(childNode) == 203)
{
//WHERE
//GROUP_GRAPH_PATTERN

View File

@ -2,7 +2,7 @@
# Filename: QueryParser.h
# Author: Jiaqi, Chen
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-07-14
# Last Modified: 2017-03-13
# Description:
=============================================================================*/
@ -14,40 +14,40 @@
#include "SparqlParser.h"
#include "SparqlLexer.h"
class QueryParser{
private:
std::map<std::string,std::string> _prefix_map;
class QueryParser
{
public:
void SPARQLParse(const std::string &query, QueryTree &querytree);
int printNode(pANTLR3_BASE_TREE node,int dep = 0);
private:
std::map<std::string, std::string> prefix_map;
void parseWorkload(pANTLR3_BASE_TREE node, QueryTree &querytree);
int printNode(pANTLR3_BASE_TREE node, int dep = 0);
void parseQuery(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parsePrologue(pANTLR3_BASE_TREE node);
void parsePrefix(pANTLR3_BASE_TREE node);
void replacePrefix(std::string &str);
void parseSelectClause(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parseSelectVar(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parseSelectAggregateFunction(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parseGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
void parsePattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
void parseOptionalOrMinus(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
void parseUnion(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
void parseFilter(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
void parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern, QueryTree::GroupPattern::FilterTreeNode &filter);
void parseVarInExpressionList(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern, QueryTree::GroupPattern::FilterTreeNode &filter);
void parseExistsGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern, QueryTree::GroupPattern::FilterTreeNode &filter);
void parseOrderBy(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parseString(pANTLR3_BASE_TREE node, std::string &str, int dep);
void parseWorkload(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parseUpdate(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parseTripleTemplate(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
void parseModify(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parseQuery(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parsePrologue(pANTLR3_BASE_TREE node);
void parsePrefix(pANTLR3_BASE_TREE node);
void replacePrefix(std::string &str);
void parseSelectClause(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parseSelectVar(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parseSelectAggregateFunction(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parseGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
void parsePattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
void parseOptionalOrMinus(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
void parseUnion(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
void parseFilter(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
void parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::GroupPattern::FilterTree::FilterTreeNode &filter);
void parseVarInExpressionList(pANTLR3_BASE_TREE node, QueryTree::GroupPattern::FilterTree::FilterTreeNode &filter);
void parseBind(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
void parseGroupBy(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parseOrderBy(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parseString(pANTLR3_BASE_TREE node, std::string &str, int dep);
public:
QueryParser();
void SPARQLParse(const std::string &query, QueryTree &querytree);
void parseUpdate(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parseTripleTemplate(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
void parseModify(pANTLR3_BASE_TREE node, QueryTree &querytree);
};
#endif /* QUERYPARSER_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@
# Filename: GeneralEvaluation.h
# Author: Jiaqi, Chen
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-09-12
# Last Modified: 2017-05-05
# Description:
=============================================================================*/
@ -30,35 +30,27 @@ class GeneralEvaluation
private:
QueryParser query_parser;
QueryTree query_tree;
SPARQLquery sparql_query;
std::vector <Varset> sparql_query_varset;
VSTree *vstree;
KVstore *kvstore;
TYPE_TRIPLE_NUM* pre2num;
TYPE_PREDICATE_ID limitID_predicate;
TYPE_ENTITY_LITERAL_ID limitID_literal;
StringIndex *stringindex;
Strategy strategy;
ResultFilter result_filter;
bool need_output_answer;
TYPE_TRIPLE_NUM *pre2num;
TYPE_PREDICATE_ID limitID_predicate;
TYPE_ENTITY_LITERAL_ID limitID_literal;
public:
explicit GeneralEvaluation(VSTree *_vstree, KVstore *_kvstore, StringIndex *_stringindex, TYPE_TRIPLE_NUM* _pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal):
vstree(_vstree), kvstore(_kvstore), stringindex(_stringindex), pre2num(_pre2num), limitID_predicate(_limitID_predicate), limitID_literal(_limitID_literal), need_output_answer(false)
GeneralEvaluation(VSTree *_vstree, KVstore *_kvstore, StringIndex *_stringindex, TYPE_TRIPLE_NUM *_pre2num, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal):
vstree(_vstree), kvstore(_kvstore), stringindex(_stringindex), pre2num(_pre2num), limitID_predicate(_limitID_predicate), limitID_literal(_limitID_literal), temp_result(NULL)
{
}
std::vector<std::vector<std::string> > getSPARQLQueryVarset();
bool parseQuery(const std::string &_query);
QueryTree& getQueryTree();
bool doQuery();
void getBasicQuery(QueryTree::GroupPattern &grouppattern);
class FilterExistsGroupPatternResultSetRecord;
class FilterEvaluationMultitypeValue
{
public:
@ -141,32 +133,41 @@ class GeneralEvaluation
FilterEvaluationMultitypeValue():datatype(rdf_term), int_value(0), flt_value(0), dbl_value(0){}
};
class TempResult
{
public:
Varset var;
std::vector<int*> res;
class ResultPair
{
public:
unsigned *id;
std::vector<string> str;
ResultPair():id(NULL){}
};
Varset id_varset, str_varset;
std::vector<ResultPair> result;
Varset getAllVarset();
void release();
static int compareFunc(int *a, std::vector<int> &p, int *b, std::vector<int> &q);
void sort(int l, int r, std::vector<int> &p);
int findLeftBounder(std::vector<int> &p, int *b, std::vector<int> &q);
int findRightBounder(std::vector<int> &p, int *b, std::vector<int> &q);
static int compareRow(const ResultPair &x, const int x_id_cols, const std::vector<int> &x_pos,
const ResultPair &y, const int y_id_cols, const std::vector<int> &y_pos);
void sort(int l, int r, const std::vector<int> &this_pos);
int findLeftBounder(const std::vector<int> &this_pos, const ResultPair &x, const int x_id_cols, const std::vector<int> &x_pos) const;
int findRightBounder(const std::vector<int> &this_pos, const ResultPair &x, const int x_id_cols, const std::vector<int> &x_pos) const;
void convertId2Str(Varset convert_varset, StringIndex *stringindex, Varset &entity_literal_varset);
void doJoin(TempResult &x, TempResult &r);
void doUnion(TempResult &rt);
void doUnion(TempResult &r);
void doOptional(std::vector<bool> &binding, TempResult &x, TempResult &rn, TempResult &ra, bool add_no_binding);
void doMinus(TempResult &x, TempResult &r);
void doDistinct(TempResult &r);
void mapFilterTree2Varset(QueryTree::GroupPattern::FilterTreeNode &filter, Varset &v, Varset &entity_literal_varset);
void doFilter(QueryTree::GroupPattern::FilterTreeNode &filter, FilterExistsGroupPatternResultSetRecord &filter_exists_grouppattern_resultset_record, TempResult &r, StringIndex *stringindex, Varset &entity_literal_varset);
void getFilterString(QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild &child, FilterEvaluationMultitypeValue &femv, int *row, StringIndex *stringindex);
FilterEvaluationMultitypeValue matchFilterTree(QueryTree::GroupPattern::FilterTreeNode &filter, FilterExistsGroupPatternResultSetRecord &filter_exists_grouppattern_resultset_record, int *row, StringIndex *stringindex);
void getFilterString(QueryTree::GroupPattern::FilterTree::FilterTreeNode::FilterTreeChild &child, FilterEvaluationMultitypeValue &femv, ResultPair &row, int id_cols, StringIndex *stringindex);
FilterEvaluationMultitypeValue matchFilterTree(QueryTree::GroupPattern::FilterTree::FilterTreeNode &filter, ResultPair &row, int id_cols, StringIndex *stringindex);
void doFilter(QueryTree::GroupPattern::FilterTree::FilterTreeNode &filter, TempResult &r, StringIndex *stringindex, Varset &entity_literal_varset);
void print();
void print(int no=-1);
};
class TempResultSet
@ -176,68 +177,32 @@ class GeneralEvaluation
void release();
int findCompatibleResult(Varset &_varset);
int findCompatibleResult(Varset &_id_varset, Varset &_str_varset);
void doJoin(TempResultSet &x, TempResultSet &r);
void doJoin(TempResultSet &x, TempResultSet &r, StringIndex *stringindex, Varset &entity_literal_varset);
void doUnion(TempResultSet &x, TempResultSet &r);
void doOptional(TempResultSet &x, TempResultSet &r);
void doMinus(TempResultSet &x, TempResultSet &r);
void doDistinct(Varset &projection, TempResultSet &r);
void doOptional(TempResultSet &x, TempResultSet &r, StringIndex *stringindex, Varset &entity_literal_varset);
void doMinus(TempResultSet &x, TempResultSet &r, StringIndex *stringindex, Varset &entity_literal_varset);
void doFilter(QueryTree::GroupPattern::FilterTree::FilterTreeNode &filter, TempResultSet &r, StringIndex *stringindex, Varset &entity_literal_varset);
void doFilter(QueryTree::GroupPattern::FilterTreeNode &filter, FilterExistsGroupPatternResultSetRecord &filter_exists_grouppattern_resultset_record, TempResultSet &r, StringIndex *stringindex, Varset &entity_literal_varset);
void doProjection1(Varset &proj, TempResultSet &r, StringIndex *stringindex, Varset &entity_literal_varset);
void doDistinct1(TempResultSet &r);
void print();
};
class EvaluationUnit
{
private:
char type;
void *p;
public:
EvaluationUnit(char _type, void *_p = NULL):type(_type), p(_p){}
char getType()
{ return type; }
void *getPointer()
{ return p; }
};
private:
TempResultSet* temp_result;
std::vector<QueryTree::GroupPattern> rewriting_evaluation_stack;
std::vector<EvaluationUnit> semantic_evaluation_plan;
void generateEvaluationPlan(QueryTree::GroupPattern &grouppattern);
void dfsJoinableResultGraph(int x, vector < pair<char, int> > &node_info, vector < vector<int> > &edge, QueryTree::GroupPattern &grouppattern);
std::stack<TempResultSet*> semantic_evaluation_result_stack;
class FilterExistsGroupPatternResultSetRecord
{
public:
std::vector<TempResultSet*> resultset;
std::vector< std::vector<Varset> > common;
std::vector< std::vector< std::pair< std::vector<int>, std::vector<int> > > > common2resultset;
} filter_exists_grouppattern_resultset_record;
int countFilterExistsGroupPattern(QueryTree::GroupPattern::FilterTreeNode &filter);
void doEvaluationPlan();
class ExpansionEvaluationStackUnit
{
public:
ExpansionEvaluationStackUnit():result(NULL){}
QueryTree::GroupPattern grouppattern;
SPARQLquery sparql_query;
TempResultSet *result;
};
std::vector <ExpansionEvaluationStackUnit> expansion_evaluation_stack;
public:
TempResultSet* semanticBasedQueryEvaluation(QueryTree::GroupPattern &grouppattern);
bool expanseFirstOuterUnionGroupPattern(QueryTree::GroupPattern &grouppattern, std::deque<QueryTree::GroupPattern> &queue);
void queryRewriteEncodeRetrieveJoin(int dep);
TempResultSet* rewritingBasedQueryEvaluation(int dep);
bool needOutputAnswer();
void setNeedOutputAnswer();
void getFinalResult(ResultSet &result_str);
void releaseResultStack();
void getFinalResult(ResultSet &ret_result);
void releaseResult();
void prepareUpdateTriple(QueryTree::GroupPattern &update_pattern, TripleWithObjType *&update_triple, unsigned &update_triple_num);
};

View File

@ -2,7 +2,7 @@
# Filename: QueryTree.cpp
# Author: Jiaqi, Chen
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-07-14
# Last Modified: 2017-03-13
# Description: implement functions in QueryTree.h
=============================================================================*/
@ -10,80 +10,126 @@
using namespace std;
void QueryTree::GroupPattern::FilterTreeNode::getVarset(Varset &varset)
void QueryTree::GroupPattern::FilterTree::FilterTreeNode::getVarset(Varset &varset)
{
for (int i = 0; i < (int)this->child.size(); i++)
{
if (this->child[i].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::String_type && this->child[i].arg[0] == '?')
varset.addVar(this->child[i].arg);
if (this->child[i].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type)
if (this->child[i].node_type == FilterTreeChild::String_type && this->child[i].str[0] == '?')
varset.addVar(this->child[i].str);
if (this->child[i].node_type == FilterTreeChild::Tree_type)
this->child[i].node.getVarset(varset);
}
}
void QueryTree::GroupPattern::FilterTreeNode::print(vector<GroupPattern> &exist_grouppatterns, int dep)
void QueryTree::GroupPattern::FilterTree::FilterTreeNode::mapVarPos2Varset(Varset &varset, Varset &entity_literal_varset)
{
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Not_type) printf("!");
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type) printf("REGEX");
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_str_type) printf("STR");
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_lang_type) printf("LANG");
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_langmatches_type) printf("LANGMATCHES");
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_bound_type) printf("BOUND");
if (this->oper_type == Not_type)
{
this->child[0].node.mapVarPos2Varset(varset, entity_literal_varset);
}
else if (this->oper_type == Or_type || this->oper_type == And_type)
{
this->child[0].node.mapVarPos2Varset(varset, entity_literal_varset);
this->child[1].node.mapVarPos2Varset(varset, entity_literal_varset);
}
else if (Equal_type <= this->oper_type && this->oper_type <= GreaterOrEqual_type)
{
if (this->child[0].node_type == FilterTreeChild::Tree_type)
this->child[0].node.mapVarPos2Varset(varset, entity_literal_varset);
else if (this->child[0].node_type == FilterTreeChild::String_type && this->child[0].str[0] == '?')
{
this->child[0].pos = Varset(this->child[0].str).mapTo(varset)[0];
this->child[0].isel = entity_literal_varset.findVar(this->child[0].str);
}
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_in_type)
if (this->child[1].node_type == FilterTreeChild::Tree_type)
this->child[1].node.mapVarPos2Varset(varset, entity_literal_varset);
else if (this->child[1].node_type == FilterTreeChild::String_type && this->child[1].str[0] == '?')
{
this->child[1].pos = Varset(this->child[1].str).mapTo(varset)[0];
this->child[1].isel = entity_literal_varset.findVar(this->child[1].str);
}
}
else if (this->oper_type == Builtin_regex_type ||
this->oper_type == Builtin_str_type ||
this->oper_type == Builtin_isiri_type ||
this->oper_type == Builtin_isuri_type ||
this->oper_type == Builtin_isliteral_type ||
this->oper_type == Builtin_isnumeric_type ||
this->oper_type == Builtin_lang_type ||
this->oper_type == Builtin_langmatches_type ||
this->oper_type == Builtin_bound_type ||
this->oper_type == Builtin_in_type)
{
if (this->child[0].node_type == FilterTreeChild::Tree_type)
this->child[0].node.mapVarPos2Varset(varset, entity_literal_varset);
else if (this->child[0].node_type == FilterTreeChild::String_type && this->child[0].str[0] == '?')
{
this->child[0].pos = Varset(this->child[0].str).mapTo(varset)[0];
this->child[0].isel = entity_literal_varset.findVar(this->child[0].str);
}
}
}
void QueryTree::GroupPattern::FilterTree::FilterTreeNode::print(int dep)
{
if (this->oper_type == Not_type) printf("!");
if (this->oper_type == Builtin_regex_type) printf("REGEX");
if (this->oper_type == Builtin_str_type) printf("STR");
if (this->oper_type == Builtin_isiri_type) printf("ISIRI");
if (this->oper_type == Builtin_isuri_type) printf("ISURI");
if (this->oper_type == Builtin_isliteral_type) printf("ISLITERAL");
if (this->oper_type == Builtin_isnumeric_type) printf("ISNUMERIC");
if (this->oper_type == Builtin_lang_type) printf("LANG");
if (this->oper_type == Builtin_langmatches_type) printf("LANGMATCHES");
if (this->oper_type == Builtin_bound_type) printf("BOUND");
if (this->oper_type == Builtin_in_type)
{
printf("(");
if (this->child[0].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::String_type) printf("%s", this->child[0].arg.c_str());
if (this->child[0].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type) this->child[0].node.print(exist_grouppatterns, dep);
if (this->child[0].node_type == FilterTreeChild::String_type) printf("%s", this->child[0].str.c_str());
if (this->child[0].node_type == FilterTreeChild::Tree_type) this->child[0].node.print(dep);
printf(" IN (");
for (int i = 1; i < (int)this->child.size(); i++)
{
if (i != 1) printf(" , ");
if (this->child[i].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::String_type) printf("%s", this->child[i].arg.c_str());
if (this->child[i].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type) this->child[i].node.print(exist_grouppatterns, dep);
if (i != 1) printf(", ");
if (this->child[i].node_type == FilterTreeChild::String_type) printf("%s", this->child[i].str.c_str());
if (this->child[i].node_type == FilterTreeChild::Tree_type) this->child[i].node.print(dep);
}
printf("))");
return;
}
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_exists_type)
{
printf("EXISTS");
exist_grouppatterns[this->exists_grouppattern_id].print(dep);
return;
}
printf("(");
if ((int)this->child.size() >= 1)
{
if (this->child[0].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::String_type) printf("%s", this->child[0].arg.c_str());
if (this->child[0].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type) this->child[0].node.print(exist_grouppatterns, dep);
if (this->child[0].node_type == FilterTreeChild::String_type) printf("%s", this->child[0].str.c_str());
if (this->child[0].node_type == FilterTreeChild::Tree_type) this->child[0].node.print(dep);
}
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Or_type) printf(" || ");
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::And_type) printf(" && ");
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Equal_type) printf(" = ");
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::NotEqual_type) printf(" != ");
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Less_type) printf(" < ");
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::LessOrEqual_type) printf(" <= ");
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Greater_type) printf(" > ");
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::GreaterOrEqual_type) printf(" >= ");
if (this->oper_type == Or_type) printf(" || ");
if (this->oper_type == And_type) printf(" && ");
if (this->oper_type == Equal_type) printf(" = ");
if (this->oper_type == NotEqual_type) printf(" != ");
if (this->oper_type == Less_type) printf(" < ");
if (this->oper_type == LessOrEqual_type) printf(" <= ");
if (this->oper_type == Greater_type) printf(" > ");
if (this->oper_type == GreaterOrEqual_type) printf(" >= ");
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type || this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_langmatches_type) printf(", ");
if (this->oper_type == Builtin_regex_type || this->oper_type == Builtin_langmatches_type) printf(", ");
if ((int)this->child.size() >= 2)
{
if (this->child[1].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::String_type) printf("%s", this->child[1].arg.c_str());
if (this->child[1].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type) this->child[1].node.print(exist_grouppatterns, dep);
if (this->child[1].node_type == FilterTreeChild::String_type) printf("%s", this->child[1].str.c_str());
if (this->child[1].node_type == FilterTreeChild::Tree_type) this->child[1].node.print(dep);
}
if ((int)this->child.size() >= 3)
{
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type && this->child[2].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::String_type)
printf(", %s", this->child[2].arg.c_str());
if (this->oper_type == FilterTreeNode::Builtin_regex_type && this->child[2].node_type == FilterTreeChild::String_type)
printf(", %s", this->child[2].str.c_str());
}
printf(")");
@ -93,285 +139,280 @@ void QueryTree::GroupPattern::FilterTreeNode::print(vector<GroupPattern> &exist_
void QueryTree::GroupPattern::addOnePattern(Pattern _pattern)
{
this->patterns.push_back(_pattern);
this->sub_grouppattern.push_back(SubGroupPattern(SubGroupPattern::Pattern_type));
this->sub_grouppattern.back().pattern = _pattern;
}
void QueryTree::GroupPattern::addOneGroupUnion()
{
this->unions.push_back(GroupPatternUnions((int)this->patterns.size() - 1));
this->sub_grouppattern.push_back(SubGroupPattern(SubGroupPattern::Union_type));
}
void QueryTree::GroupPattern::addOneUnion()
{
int n = (int)this->unions.size();
this->unions[n - 1].grouppattern_vec.push_back(GroupPattern());
if (this->sub_grouppattern.back().type != SubGroupPattern::Union_type)
throw "QueryTree::GroupPattern::addOneUnion failed";
this->sub_grouppattern.back().unions.push_back(GroupPattern());
}
QueryTree::GroupPattern& QueryTree::GroupPattern::getLastUnion()
{
int n = (int)this->unions.size();
int m = (int)this->unions[n - 1].grouppattern_vec.size();
return this->unions[n - 1].grouppattern_vec[m - 1];
if (this->sub_grouppattern.back().type != SubGroupPattern::Union_type || this->sub_grouppattern.back().unions.empty())
throw "QueryTree::GroupPattern::getLastUnion failed";
return this->sub_grouppattern.back().unions.back();
}
void QueryTree::GroupPattern::addOneOptionalOrMinus(char _type)
void QueryTree::GroupPattern::addOneOptional(int _type)
{
this->optionals.push_back(OptionalOrMinusGroupPattern((int)this->patterns.size() - 1, (int)this->unions.size() - 1, _type));
SubGroupPattern::SubGroupPatternType type = (SubGroupPattern::SubGroupPatternType)_type;
if (type != SubGroupPattern::Optional_type && type != SubGroupPattern::Minus_type)
throw "QueryTree::GroupPattern::addOneOptional failed";
this->sub_grouppattern.push_back(SubGroupPattern(type));
}
QueryTree::GroupPattern& QueryTree::GroupPattern::getLastOptionalOrMinus()
QueryTree::GroupPattern& QueryTree::GroupPattern::getLastOptional()
{
int n = (int)this->optionals.size();
return this->optionals[n - 1].grouppattern;
if (this->sub_grouppattern.back().type != SubGroupPattern::Optional_type && this->sub_grouppattern.back().type != SubGroupPattern::Minus_type)
throw "QueryTree::GroupPattern::getLastOptional failed";
return this->sub_grouppattern.back().optional;
}
void QueryTree::GroupPattern::addOneFilterTree()
void QueryTree::GroupPattern::addOneFilter()
{
this->filters.push_back(FilterTreeRoot());
this->filter_exists_grouppatterns.push_back(vector<GroupPattern>());
this->sub_grouppattern.push_back(SubGroupPattern(SubGroupPattern::Filter_type));
}
QueryTree::GroupPattern::FilterTreeNode& QueryTree::GroupPattern::getLastFilterTree()
QueryTree::GroupPattern::FilterTree& QueryTree::GroupPattern::getLastFilter()
{
return this->filters[(int)(this->filters.size()) - 1].root;
if (this->sub_grouppattern.back().type != SubGroupPattern::Filter_type)
throw "QueryTree::GroupPattern::getLastFilter failed";
return this->sub_grouppattern.back().filter;
}
void QueryTree::GroupPattern::addOneExistsGroupPattern()
void QueryTree::GroupPattern::addOneBind()
{
int n = (int)this->filter_exists_grouppatterns.size();
this->filter_exists_grouppatterns[n - 1].push_back(GroupPattern());
this->sub_grouppattern.push_back(SubGroupPattern(SubGroupPattern::Bind_type));
}
QueryTree::GroupPattern& QueryTree::GroupPattern::getLastExistsGroupPattern()
QueryTree::GroupPattern::Bind& QueryTree::GroupPattern::getLastBind()
{
int n = (int)this->filter_exists_grouppatterns.size();
int m = (int)this->filter_exists_grouppatterns[n - 1].size();
return this->filter_exists_grouppatterns[n - 1][m - 1];
if (this->sub_grouppattern.back().type != SubGroupPattern::Bind_type)
throw "QueryTree::GroupPattern::getLastBind failed";
return this->sub_grouppattern.back().bind;
}
void QueryTree::GroupPattern::getVarset()
{
for (int i = 0; i < (int)this->patterns.size(); i++)
{
if (this->patterns[i].subject.value[0] == '?')
for (int i = 0; i < (int)this->sub_grouppattern.size(); i++)
if (this->sub_grouppattern[i].type == SubGroupPattern::Pattern_type)
{
this->patterns[i].varset.addVar(this->patterns[i].subject.value);
this->grouppattern_subject_object_maximal_varset.addVar(this->patterns[i].subject.value);
if (this->sub_grouppattern[i].pattern.subject.value[0] == '?')
{
this->sub_grouppattern[i].pattern.varset.addVar(this->sub_grouppattern[i].pattern.subject.value);
this->sub_grouppattern[i].pattern.subject_object_varset.addVar(this->sub_grouppattern[i].pattern.subject.value);
this->grouppattern_subject_object_maximal_varset.addVar(this->sub_grouppattern[i].pattern.subject.value);
}
if (this->sub_grouppattern[i].pattern.predicate.value[0] == '?')
{
this->sub_grouppattern[i].pattern.varset.addVar(this->sub_grouppattern[i].pattern.predicate.value);
this->grouppattern_predicate_maximal_varset.addVar(this->sub_grouppattern[i].pattern.predicate.value);
}
if (this->sub_grouppattern[i].pattern.object.value[0] == '?')
{
this->sub_grouppattern[i].pattern.varset.addVar(this->sub_grouppattern[i].pattern.object.value);
this->sub_grouppattern[i].pattern.subject_object_varset.addVar(this->sub_grouppattern[i].pattern.object.value);
this->grouppattern_subject_object_maximal_varset.addVar(this->sub_grouppattern[i].pattern.object.value);
}
this->grouppattern_resultset_minimal_varset += this->sub_grouppattern[i].pattern.varset;
this->grouppattern_resultset_maximal_varset += this->sub_grouppattern[i].pattern.varset;
}
if (this->patterns[i].predicate.value[0] == '?')
else if (this->sub_grouppattern[i].type == SubGroupPattern::Union_type)
{
this->patterns[i].varset.addVar(this->patterns[i].predicate.value);
this->grouppattern_predicate_maximal_varset.addVar(this->patterns[i].predicate.value);
}
if (this->patterns[i].object.value[0] == '?')
{
this->patterns[i].varset.addVar(this->patterns[i].object.value);
this->grouppattern_subject_object_maximal_varset.addVar(this->patterns[i].object.value);
}
this->grouppattern_resultset_minimal_varset = this->grouppattern_resultset_minimal_varset + this->patterns[i].varset;
this->grouppattern_resultset_maximal_varset = this->grouppattern_resultset_maximal_varset + this->patterns[i].varset;
}
Varset minimal_varset;
for (int i = 0; i < (int)this->unions.size(); i++)
{
Varset minimal_varset;
for (int j = 0; j < (int)this->unions[i].grouppattern_vec.size(); j++)
{
this->unions[i].grouppattern_vec[j].getVarset();
if (j == 0)
minimal_varset = minimal_varset + this->unions[i].grouppattern_vec[j].grouppattern_resultset_minimal_varset;
else
minimal_varset = minimal_varset * this->unions[i].grouppattern_vec[j].grouppattern_resultset_minimal_varset;
this->grouppattern_resultset_maximal_varset = this->grouppattern_resultset_maximal_varset + this->unions[i].grouppattern_vec[j].grouppattern_resultset_maximal_varset;
this->grouppattern_subject_object_maximal_varset = this->grouppattern_subject_object_maximal_varset + this->unions[i].grouppattern_vec[j].grouppattern_subject_object_maximal_varset;
this->grouppattern_predicate_maximal_varset = this->grouppattern_predicate_maximal_varset + this->unions[i].grouppattern_vec[j].grouppattern_predicate_maximal_varset;
for (int j = 0; j < (int)this->sub_grouppattern[i].unions.size(); j++)
{
this->sub_grouppattern[i].unions[j].getVarset();
if (j == 0)
minimal_varset = this->sub_grouppattern[i].unions[j].grouppattern_resultset_minimal_varset;
else
minimal_varset = minimal_varset * this->sub_grouppattern[i].unions[j].grouppattern_resultset_minimal_varset;
this->grouppattern_resultset_maximal_varset += this->sub_grouppattern[i].unions[j].grouppattern_resultset_maximal_varset;
this->grouppattern_subject_object_maximal_varset += this->sub_grouppattern[i].unions[j].grouppattern_subject_object_maximal_varset;
this->grouppattern_predicate_maximal_varset += this->sub_grouppattern[i].unions[j].grouppattern_predicate_maximal_varset;
}
this->grouppattern_resultset_minimal_varset += minimal_varset;
}
this->grouppattern_resultset_minimal_varset = this->grouppattern_resultset_minimal_varset + minimal_varset;
}
for (int i = 0; i < (int)this->optionals.size(); i++)
{
this->optionals[i].grouppattern.getVarset();
if (this->optionals[i].type == 'o')
else if (this->sub_grouppattern[i].type == SubGroupPattern::Optional_type)
{
this->grouppattern_resultset_maximal_varset = this->grouppattern_resultset_maximal_varset + this->optionals[i].grouppattern.grouppattern_resultset_maximal_varset;
this->grouppattern_subject_object_maximal_varset = this->grouppattern_subject_object_maximal_varset + this->optionals[i].grouppattern.grouppattern_subject_object_maximal_varset;
this->grouppattern_predicate_maximal_varset = this->grouppattern_predicate_maximal_varset + this->optionals[i].grouppattern.grouppattern_predicate_maximal_varset;
this->sub_grouppattern[i].optional.getVarset();
this->grouppattern_resultset_maximal_varset += this->sub_grouppattern[i].optional.grouppattern_resultset_maximal_varset;
this->grouppattern_subject_object_maximal_varset += this->sub_grouppattern[i].optional.grouppattern_subject_object_maximal_varset;
this->grouppattern_predicate_maximal_varset += this->sub_grouppattern[i].optional.grouppattern_predicate_maximal_varset;
}
}
for (int i = 0; i < (int)this->filters.size(); i++)
{
this->filters[i].root.getVarset(this->filters[i].varset);
}
for(int i = 0; i < (int)this->filter_exists_grouppatterns.size(); i++)
for (int j = 0; j < (int)this->filter_exists_grouppatterns[i].size(); j++)
else if (this->sub_grouppattern[i].type == SubGroupPattern::Minus_type)
{
this->filter_exists_grouppatterns[i][j].getVarset();
this->sub_grouppattern[i].optional.getVarset();
}
else if (this->sub_grouppattern[i].type == SubGroupPattern::Filter_type)
{
this->sub_grouppattern[i].filter.root.getVarset(this->sub_grouppattern[i].filter.varset);
}
else if (this->sub_grouppattern[i].type == SubGroupPattern::Bind_type)
{
this->sub_grouppattern[i].bind.varset = Varset(this->sub_grouppattern[i].bind.var);
this->grouppattern_resultset_minimal_varset += this->sub_grouppattern[i].bind.varset;
this->grouppattern_resultset_maximal_varset += this->sub_grouppattern[i].bind.varset;
}
}
bool QueryTree::GroupPattern::checkOnlyUnionOptionalFilterNoExists()
{
for (int i = 0; i < (int)this->unions.size(); i++)
{
for (int j = 0; j < (int)this->unions[i].grouppattern_vec.size(); j++)
if (!this->unions[i].grouppattern_vec[j].checkOnlyUnionOptionalFilterNoExists())
return false;
}
for (int i = 0; i < (int)this->optionals.size(); i++)
{
if (this->optionals[i].type != 'o')
return false;
if (!this->optionals[i].grouppattern.checkOnlyUnionOptionalFilterNoExists())
return false;
}
for (int i = 0; i < (int)this->filter_exists_grouppatterns.size(); i++)
if ((int)this->filter_exists_grouppatterns[i].size() != 0)
return false;
return true;
}
pair<Varset, Varset> QueryTree::GroupPattern::checkOptionalGroupPatternVarsAndSafeFilter(Varset occur , Varset ban, bool &check_condition)
pair<Varset, Varset> QueryTree::GroupPattern::checkNoMinusAndOptionalVarAndSafeFilter(Varset occur_varset, Varset ban_varset, bool &check_condition)
//return occur varset and ban varset
{
if (!check_condition) return make_pair(Varset(), Varset());
Varset this_ban;
Varset new_ban_varset;
int lastpattern = -1, lastunions = -1, lastoptional = -1;
while (check_condition && (lastpattern + 1 < (int)this->patterns.size() || lastunions + 1 < (int)this->unions.size() || lastoptional + 1 < (int)this->optionals.size()))
{
if (lastoptional + 1 < (int)this->optionals.size() && this->optionals[lastoptional + 1].lastpattern == lastpattern && this->optionals[lastoptional + 1].lastunions == lastunions)
//optional
for (int i = 0; i < (int)this->sub_grouppattern.size(); i++)
if (!check_condition) break;
else if (this->sub_grouppattern[i].type == SubGroupPattern::Pattern_type)
{
pair<Varset, Varset> sub_grouppattern_return_varset = this->optionals[lastoptional + 1].grouppattern.checkOptionalGroupPatternVarsAndSafeFilter(Varset(), ban, check_condition);
if (occur.hasCommonVar(sub_grouppattern_return_varset.second))
if (this->sub_grouppattern[i].pattern.varset.hasCommonVar(ban_varset))
check_condition = false;
Varset out = this->optionals[lastoptional + 1].grouppattern.grouppattern_resultset_maximal_varset - occur;
occur = occur + sub_grouppattern_return_varset.first;
this_ban = this_ban + sub_grouppattern_return_varset.second;
this_ban = this_ban + out;
ban = ban + this_ban;
lastoptional++;
occur_varset += this->sub_grouppattern[i].pattern.varset;
}
else if (lastunions + 1 < (int)this->unions.size() && this->unions[lastunions + 1].lastpattern == lastpattern)
//union
else if (this->sub_grouppattern[i].type == SubGroupPattern::Union_type)
{
Varset sub_grouppattern_occur, sub_grouppattern_ban;
Varset sub_occur_varset, sub_ban_varset;
for (int i = 0; i < (int)this->unions[lastunions + 1].grouppattern_vec.size(); i++)
for (int j = 0; j < (int)this->sub_grouppattern[i].unions.size(); j++)
{
pair<Varset, Varset> sub_grouppattern_result = this->unions[lastunions + 1].grouppattern_vec[i].checkOptionalGroupPatternVarsAndSafeFilter(occur, ban, check_condition);
pair<Varset, Varset> sub_return_varset =
this->sub_grouppattern[i].unions[j].checkNoMinusAndOptionalVarAndSafeFilter(occur_varset, ban_varset, check_condition);
if (i == 0)
sub_grouppattern_occur = sub_grouppattern_occur + sub_grouppattern_result.first;
if (j == 0)
sub_occur_varset = sub_return_varset.first;
else
sub_grouppattern_occur = sub_grouppattern_occur * sub_grouppattern_result.first;
sub_grouppattern_ban = sub_grouppattern_ban + sub_grouppattern_result.second;
sub_occur_varset = sub_occur_varset * sub_return_varset.first;
sub_ban_varset += sub_return_varset.second;
}
occur = occur + sub_grouppattern_occur;
this_ban = this_ban + sub_grouppattern_ban;
ban = ban + this_ban;
lastunions++;
new_ban_varset += sub_ban_varset;
occur_varset += sub_occur_varset;
ban_varset += new_ban_varset;
}
else
//triple pattern
else if (this->sub_grouppattern[i].type == SubGroupPattern::Optional_type)
{
if (this->patterns[lastpattern + 1].varset.hasCommonVar(ban))
pair<Varset, Varset> sub_return_varset =
this->sub_grouppattern[i].optional.checkNoMinusAndOptionalVarAndSafeFilter(Varset(), ban_varset, check_condition);
//occur before
if (occur_varset.hasCommonVar(sub_return_varset.second))
check_condition = false;
occur = occur + this->patterns[lastpattern + 1].varset;
lastpattern++;
new_ban_varset += sub_return_varset.second;
new_ban_varset += this->sub_grouppattern[i].optional.grouppattern_resultset_maximal_varset - occur_varset;
occur_varset += sub_return_varset.first;
ban_varset += new_ban_varset;
}
}
//filter
for (int i = 0; i < (int)this->filters.size(); i++)
if (!this->filters[i].varset.belongTo(occur))
{
check_condition = false;
break;
}
else if (this->sub_grouppattern[i].type == SubGroupPattern::Minus_type)
{
check_condition = false;
}
else if (this->sub_grouppattern[i].type == SubGroupPattern::Filter_type)
{
if (!this->sub_grouppattern[i].filter.varset.belongTo(occur_varset))
check_condition = false;
}
else if (this->sub_grouppattern[i].type == SubGroupPattern::Bind_type)
{
if (this->sub_grouppattern[i].bind.varset.hasCommonVar(ban_varset))
check_condition = false;
return make_pair(occur, this_ban);
occur_varset += this->sub_grouppattern[i].bind.varset;
}
return make_pair(occur_varset, new_ban_varset);
}
void QueryTree::GroupPattern::initPatternBlockid()
{
this->pattern_blockid.clear();
for (int i = 0; i < (int)this->patterns.size(); i++)
this->pattern_blockid.push_back(i);
for (int i = 0; i < (int)this->sub_grouppattern.size(); i++)
if (this->sub_grouppattern[i].type == SubGroupPattern::Pattern_type)
this->sub_grouppattern[i].pattern.blockid = i;
}
int QueryTree::GroupPattern::getRootPatternBlockID(int x)
{
if (this->pattern_blockid[x] == x) return x;
this->pattern_blockid[x] = getRootPatternBlockID(this->pattern_blockid[x]);
return this->pattern_blockid[x];
if (this->sub_grouppattern[x].type != SubGroupPattern::Pattern_type)
throw "QueryTree::GroupPattern::getRootPatternBlockID failed";
if (this->sub_grouppattern[x].pattern.blockid == x)
return x;
this->sub_grouppattern[x].pattern.blockid = this->getRootPatternBlockID(this->sub_grouppattern[x].pattern.blockid);
return this->sub_grouppattern[x].pattern.blockid;
}
void QueryTree::GroupPattern::mergePatternBlockID(int x, int y)
{
int px = getRootPatternBlockID(x);
int py = getRootPatternBlockID(y);
this->pattern_blockid[px] = py;
int px = this->getRootPatternBlockID(x);
int py = this->getRootPatternBlockID(y);
this->sub_grouppattern[px].pattern.blockid = py;
}
void QueryTree::GroupPattern::print(int dep)
{
for (int t = 0; t < dep; t++) printf("\t"); printf("{\n");
int lastpattern = -1, lastunions = -1, lastoptional = -1;
while (lastpattern + 1 < (int)this->patterns.size() || lastunions + 1 < (int)this->unions.size() || lastoptional + 1 < (int)this->optionals.size())
{
if (lastoptional + 1 < (int)this->optionals.size() && this->optionals[lastoptional + 1].lastpattern == lastpattern && this->optionals[lastoptional + 1].lastunions == lastunions)
//optional
for (int i = 0; i < (int)this->sub_grouppattern.size(); i++)
if (this->sub_grouppattern[i].type == SubGroupPattern::Pattern_type)
{
for (int t = 0; t <= dep; t++) printf("\t");
if (this->optionals[lastoptional + 1].type == 'o') printf("OPTIONAL\n");
if (this->optionals[lastoptional + 1].type == 'm') printf("MINUS\n");
this->optionals[lastoptional + 1].grouppattern.print(dep + 1);
lastoptional++;
printf("%s\t%s\t%s.\n", this->sub_grouppattern[i].pattern.subject.value.c_str(),
this->sub_grouppattern[i].pattern.predicate.value.c_str(),
this->sub_grouppattern[i].pattern.object.value.c_str());
}
else if (lastunions + 1 < (int)this->unions.size() && this->unions[lastunions + 1].lastpattern == lastpattern)
//union
else if (this->sub_grouppattern[i].type == SubGroupPattern::Union_type)
{
for (int i = 0; i < (int)this->unions[lastunions + 1].grouppattern_vec.size(); i++)
for (int j = 0; j < (int)this->sub_grouppattern[i].unions.size(); j++)
{
if (i != 0)
if (j != 0)
{
for (int t = 0; t <= dep; t++) printf("\t"); printf("UNION\n");
}
this->unions[lastunions + 1].grouppattern_vec[i].print(dep + 1);
this->sub_grouppattern[i].unions[j].print(dep + 1);
}
lastunions++;
}
else
//triple pattern
else if (this->sub_grouppattern[i].type == SubGroupPattern::Optional_type || this->sub_grouppattern[i].type == SubGroupPattern::Minus_type)
{
for (int t = 0; t <= dep; t++) printf("\t");
printf("%s\t%s\t%s.\n", this->patterns[lastpattern + 1].subject.value.c_str(), this->patterns[lastpattern + 1].predicate.value.c_str(), this->patterns[lastpattern + 1].object.value.c_str());
lastpattern++;
if (this->sub_grouppattern[i].type == SubGroupPattern::Optional_type) printf("OPTIONAL\n");
if (this->sub_grouppattern[i].type == SubGroupPattern::Minus_type) printf("MINUS\n");
this->sub_grouppattern[i].optional.print(dep + 1);
}
else if (this->sub_grouppattern[i].type == SubGroupPattern::Filter_type)
{
for (int t = 0; t <= dep; t++) printf("\t"); printf("FILTER\t");
this->sub_grouppattern[i].filter.root.print(dep + 1);
printf("\n");
}
else if (this->sub_grouppattern[i].type == SubGroupPattern::Bind_type)
{
for (int t = 0; t <= dep; t++) printf("\t");
printf("BIND(%s\tAS\t%s)", this->sub_grouppattern[i].bind.str.c_str(), this->sub_grouppattern[i].bind.var.c_str());
printf("\n");
}
}
//filter
for (int i = 0; i < (int)this->filters.size(); i++)
{
for (int t = 0; t <= dep; t++) printf("\t"); printf("FILTER\t");
this->filters[i].root.print(this->filter_exists_grouppatterns[i], dep + 1);
printf("\n");
}
for (int t = 0; t < dep; t++) printf("\t"); printf("}\n");
}
@ -390,7 +431,7 @@ QueryTree::QueryForm QueryTree::getQueryForm()
void QueryTree::setProjectionModifier(ProjectionModifier _projection_modifier)
{
projection_modifier = _projection_modifier;
this->projection_modifier = _projection_modifier;
}
QueryTree::ProjectionModifier QueryTree::getProjectionModifier()
@ -405,8 +446,7 @@ void QueryTree::addProjectionVar()
QueryTree::ProjectionVar& QueryTree::getLastProjectionVar()
{
int n = (int)this->projection.size();
return this->projection[n - 1];
return this->projection.back();
}
vector<QueryTree::ProjectionVar>& QueryTree::getProjection()
@ -414,24 +454,27 @@ vector<QueryTree::ProjectionVar>& QueryTree::getProjection()
return this->projection;
}
vector<string> QueryTree::getProjectionVar()
Varset QueryTree::getProjectionVarset()
{
vector<string> var_vec;
Varset varset;
for (int i = 0; i < (int)this->projection.size(); i++)
var_vec.push_back(this->projection[i].var);
varset.addVar(this->projection[i].var);
return var_vec;
return varset;
}
void QueryTree::addProjectionUsefulVar(string &_var)
Varset QueryTree::getResultProjectionVarset()
{
this->projection_useful_varset.addVar(_var);
}
Varset varset;
Varset& QueryTree::getProjectionUsefulVar()
{
return this->projection_useful_varset;
for (int i = 0; i < (int)this->projection.size(); i++)
if (this->projection[i].aggregate_type == ProjectionVar::None_type)
varset.addVar(this->projection[i].var);
else if (this->projection[i].aggregate_var != "*")
varset.addVar(this->projection[i].aggregate_var);
return varset;
}
void QueryTree::setProjectionAsterisk()
@ -444,41 +487,34 @@ bool QueryTree::checkProjectionAsterisk()
return this->projection_asterisk;
}
bool QueryTree::checkSelectCompatibility()
void QueryTree::addGroupByVar(string &_var)
{
bool all_var = true, all_aggregate_function = true;
if (this->checkProjectionAsterisk())
return true;
for (int i = 0; i < (int)this->projection.size(); i++)
{
if (this->projection[i].aggregate_type != ProjectionVar::None_type)
all_var = false;
if (this->projection[i].aggregate_type == ProjectionVar::None_type)
all_aggregate_function = false;
}
return all_var || all_aggregate_function;
this->group_by.addVar(_var);
}
bool QueryTree::atLeastOneAggregateFunction()
Varset& QueryTree::getGroupByVarset()
{
for (int i = 0; i < (int)this->projection.size(); i++)
if (this->projection[i].aggregate_type != ProjectionVar::None_type)
return true;
return false;
return this->group_by;
}
void QueryTree::addOrder(string &_var, bool _descending)
void QueryTree::addOrderVar(string &_var, bool _descending)
{
this->order.push_back(Order(_var, _descending));
this->order_by.push_back(Order(_var, _descending));
}
vector<QueryTree::Order>& QueryTree::getOrder()
vector<QueryTree::Order>& QueryTree::getOrderVarVector()
{
return this->order;
return this->order_by;
}
Varset QueryTree::getOrderByVarset()
{
Varset varset;
for (int i = 0; i < (int)this->order_by.size(); i++)
varset.addVar(this->order_by[i].var);
return varset;
}
void QueryTree::setOffset(int _offset)
@ -528,102 +564,135 @@ QueryTree::GroupPattern& QueryTree::getDeletePatterns()
bool QueryTree::checkWellDesigned()
{
if (!this->getGroupPattern().checkOnlyUnionOptionalFilterNoExists())
return false;
bool check_condition = true;
this->getGroupPattern().checkOptionalGroupPatternVarsAndSafeFilter(Varset(), Varset(), check_condition);
this->grouppattern.checkNoMinusAndOptionalVarAndSafeFilter(Varset(), Varset(), check_condition);
return check_condition;
}
bool QueryTree::checkAtLeastOneAggregateFunction()
{
for (int i = 0; i < (int)this->projection.size(); i++)
if (this->projection[i].aggregate_type != ProjectionVar::None_type)
return true;
return false;
}
bool QueryTree::checkSelectAggregateFunctionGroupByValid()
{
if (this->checkAtLeastOneAggregateFunction() && this->group_by.empty())
{
for (int i = 0; i < (int)this->projection.size(); i++)
if (this->projection[i].aggregate_type == ProjectionVar::None_type)
return false;
}
if (!this->group_by.empty())
{
for (int i = 0; i < (int)this->projection.size(); i++)
if (this->projection[i].aggregate_type == ProjectionVar::None_type && !this->group_by.findVar(this->projection[i].var))
return false;
}
return true;
}
void QueryTree::print()
{
for (int j = 0; j < 80; j++) printf("="); printf("\n");
if (this->getUpdateType() == Not_Update)
if (this->update_type == Not_Update)
{
if (this->getQueryForm() == Select_Query)
if (this->query_form == Select_Query)
{
printf("SELECT");
if (this->getProjectionModifier() == Modifier_Distinct)
if (this->projection_modifier == Modifier_Distinct)
printf(" DISTINCT");
printf("\n");
printf("Var: \t");
vector<ProjectionVar> &proj = this->getProjection();
for (int i = 0; i < (int)proj.size(); i++)
for (int i = 0; i < (int)this->projection.size(); i++)
{
if (proj[i].aggregate_type == QueryTree::ProjectionVar::None_type)
printf("%s\t", proj[i].var.c_str());
if (this->projection[i].aggregate_type == QueryTree::ProjectionVar::None_type)
printf("%s\t", this->projection[i].var.c_str());
else
{
printf("(");
if (proj[i].aggregate_type == QueryTree::ProjectionVar::Count_type)
if (this->projection[i].aggregate_type == QueryTree::ProjectionVar::Count_type)
printf("COUNT(");
if (proj[i].aggregate_type == QueryTree::ProjectionVar::Sum_type)
if (this->projection[i].aggregate_type == QueryTree::ProjectionVar::Sum_type)
printf("SUM(");
if (proj[i].aggregate_type == QueryTree::ProjectionVar::Min_type)
if (this->projection[i].aggregate_type == QueryTree::ProjectionVar::Min_type)
printf("MIN(");
if (proj[i].aggregate_type == QueryTree::ProjectionVar::Max_type)
if (this->projection[i].aggregate_type == QueryTree::ProjectionVar::Max_type)
printf("MAX(");
if (proj[i].aggregate_type == QueryTree::ProjectionVar::Avg_type)
if (this->projection[i].aggregate_type == QueryTree::ProjectionVar::Avg_type)
printf("AVG(");
if (proj[i].distinct)
if (this->projection[i].distinct)
printf("DISTINCT ");
printf("%s) AS %s)\t", proj[i].aggregate_var.c_str(), proj[i].var.c_str());
printf("%s) AS %s)\t", this->projection[i].aggregate_var.c_str(), this->projection[i].var.c_str());
}
}
if (this->checkProjectionAsterisk() && !this->atLeastOneAggregateFunction())
if (this->projection_asterisk && !this->checkAtLeastOneAggregateFunction())
printf("*");
printf("\n");
}
else printf("ASK\n");
printf("GroupPattern:\n");
this->getGroupPattern().print(0);
this->grouppattern.print(0);
if (this->getQueryForm() == Select_Query)
if (this->query_form == Select_Query)
{
if ((int)this->getOrder().size() > 0)
if (!this->group_by.empty())
{
printf("GROUP BY\t");
for (int i = 0; i < (int)this->group_by.vars.size(); i++)
printf("%s\t", this->group_by.vars[i].c_str());
printf("\n");
}
if (!this->order_by.empty())
{
printf("ORDER BY\t");
vector<QueryTree::Order>&order = this->getOrder();
for (int i = 0; i < (int)order.size(); i++)
for (int i = 0; i < (int)this->order_by.size(); i++)
{
if (!order[i].descending) printf("ASC(");
if (!this->order_by[i].descending) printf("ASC(");
else printf("DESC(");
printf("%s)\t", order[i].var.c_str());
printf("%s)\t", this->order_by[i].var.c_str());
}
printf("\n");
}
if (this->getOffset() != 0)
printf("OFFSET\t%d\n", this->getOffset());
if (this->getLimit() != -1)
printf("LIMIT\t%d\n", this->getLimit());
if (this->offset != 0)
printf("OFFSET\t%d\n", this->offset);
if (this->limit != -1)
printf("LIMIT\t%d\n", this->limit);
}
}
else
{
printf("UPDATE\n");
if (this->getUpdateType() == Delete_Data || this->getUpdateType() == Delete_Where ||
this->getUpdateType() == Delete_Clause || this->getUpdateType() == Modify_Clause)
if (this->update_type == Delete_Data || this->update_type == Delete_Where ||
this->update_type == Delete_Clause || this->update_type == Modify_Clause)
{
printf("Delete:\n");
this->getDeletePatterns().print(0);
this->delete_patterns.print(0);
}
if (this->getUpdateType() == Insert_Data || this->getUpdateType() == Insert_Clause || this->getUpdateType() == Modify_Clause)
if (this->update_type == Insert_Data || this->update_type == Insert_Clause || this->update_type == Modify_Clause)
{
printf("Insert:\n");
this->getInsertPatterns().print(0);
this->insert_patterns.print(0);
}
if (this->getUpdateType() == Delete_Where || this->getUpdateType() == Insert_Clause ||
this->getUpdateType() == Delete_Clause || this->getUpdateType() == Modify_Clause)
if (this->update_type == Delete_Where || this->update_type == Insert_Clause ||
this->update_type == Delete_Clause || this->update_type == Modify_Clause)
{
printf("GroupPattern:\n");
this->getGroupPattern().print(0);
this->grouppattern.print(0);
}
}
for (int j = 0; j < 80; j++) printf("="); printf("\n");
}

View File

@ -2,7 +2,7 @@
# Filename: QueryTree.h
# Author: Jiaqi, Chen
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-07-14
# Last Modified: 2017-03-13
# Description:
=============================================================================*/
@ -15,72 +15,40 @@
class QueryTree
{
public:
QueryTree():
query_form(Select_Query), update_type(Not_Update), projection_modifier(Modifier_None), projection_asterisk(false), offset(0), limit(-1){}
enum QueryForm {Select_Query, Ask_Query};
enum ProjectionModifier {Modifier_None, Modifier_Distinct, Modifier_Reduced, Modifier_Count, Modifier_Duplicates};
class GroupPattern
{
public:
class Pattern
{
public:
class Element
{
public:
/*
enum Type { Variable, Literal, IRI };
enum SubType { None, CustomLanguage, CustomType };
Type type;
SubType subType;
std::string subTypeValue;
*/
std::string value;
Element(const std::string &_value):
value(_value){}
};
Element subject, predicate, object;
Varset varset;
Pattern(const Element _subject, const Element _predicate,const Element _object):subject(_subject), predicate(_predicate), object(_object){}
};
class Pattern;
class FilterTree;
class Bind;
class SubGroupPattern;
class GroupPatternUnions;
class OptionalOrMinusGroupPattern;
class FilterTreeNode;
class FilterTreeRoot;
std::vector<Pattern> patterns;
std::vector<GroupPatternUnions> unions;
std::vector<OptionalOrMinusGroupPattern> optionals;
std::vector<FilterTreeRoot> filters;
std::vector<std::vector<GroupPattern> > filter_exists_grouppatterns;
std::vector<SubGroupPattern> sub_grouppattern;
Varset grouppattern_resultset_minimal_varset, grouppattern_resultset_maximal_varset;
Varset grouppattern_subject_object_maximal_varset, grouppattern_predicate_maximal_varset;
std::vector<int> pattern_blockid;
void addOnePattern(Pattern _pattern);
void addOneGroupUnion();
void addOneUnion();
GroupPattern& getLastUnion();
void addOneOptionalOrMinus(char _type);
GroupPattern& getLastOptionalOrMinus();
void addOneOptional(int _type);
GroupPattern& getLastOptional();
void addOneFilterTree();
FilterTreeNode& getLastFilterTree();
void addOneExistsGroupPattern();
GroupPattern& getLastExistsGroupPattern();
void addOneFilter();
FilterTree& getLastFilter();
void addOneBind();
Bind& getLastBind();
void getVarset();
bool checkOnlyUnionOptionalFilterNoExists();
std::pair<Varset, Varset> checkOptionalGroupPatternVarsAndSafeFilter(Varset occur , Varset ban, bool &check_condition);
std::pair<Varset, Varset> checkNoMinusAndOptionalVarAndSafeFilter(Varset occur_varset, Varset ban_varset, bool &check_condition);
void initPatternBlockid();
int getRootPatternBlockID(int x);
@ -89,78 +57,115 @@ class QueryTree
void print(int dep);
};
class GroupPattern::GroupPatternUnions
class GroupPattern::Pattern
{
public:
std::vector<GroupPattern> grouppattern_vec;
int lastpattern;
GroupPatternUnions(int _lastpattern):
lastpattern(_lastpattern){}
};
class GroupPattern::OptionalOrMinusGroupPattern
{
public:
GroupPattern grouppattern;
int lastpattern, lastunions;
char type;
OptionalOrMinusGroupPattern(int _lastpattern, int _lastunions, char _type):
grouppattern(GroupPattern()), lastpattern(_lastpattern), lastunions(_lastunions), type(_type){}
};
class GroupPattern::FilterTreeNode
{
public:
enum FilterOperationType
class Element
{
None_type, Or_type, And_type, Equal_type, NotEqual_type, Less_type, LessOrEqual_type, Greater_type, GreaterOrEqual_type,
Plus_type, Minus_type, Mul_type, Div_type, Not_type, UnaryPlus_type, UnaryMinus_type, Literal_type, Variable_type, IRI_type,
Function_type, ArgumentList_type,Builtin_str_type, Builtin_lang_type, Builtin_langmatches_type, Builtin_datatype_type, Builtin_bound_type,
Builtin_sameterm_type,Builtin_isiri_type, Builtin_isblank_type, Builtin_isliteral_type, Builtin_regex_type, Builtin_in_type, Builtin_exists_type
public:
//enum Type { Variable, Literal, IRI };
//enum SubType { None, CustomLanguage, CustomType };
//Type type;
//SubType subType;
//std::string subTypeValue;
std::string value;
Element(){}
Element(const std::string &_value):value(_value){}
};
FilterOperationType oper_type;
Element subject, predicate, object;
Varset varset, subject_object_varset;
int blockid;
class FilterTreeChild;
std::vector<FilterTreeChild> child;
int exists_grouppattern_id;
FilterTreeNode():
oper_type(None_type), exists_grouppattern_id(-1){}
void getVarset(Varset &varset);
void print(std::vector<GroupPattern> &exist_grouppatterns, int dep);
Pattern():blockid(-1){}
Pattern(const Element _subject, const Element _predicate, const Element _object):
subject(_subject), predicate(_predicate), object(_object), blockid(-1){}
};
class GroupPattern::FilterTreeNode::FilterTreeChild
class GroupPattern::FilterTree
{
public:
enum FilterTreeChildNodeType {None_type, Tree_type, String_type};
FilterTreeChildNodeType node_type;
class FilterTreeNode
{
public:
enum FilterOperationType
{
None_type, Or_type, And_type, Equal_type, NotEqual_type, Less_type, LessOrEqual_type, Greater_type, GreaterOrEqual_type,
Plus_type, Minus_type, Mul_type, Div_type, Not_type, UnaryPlus_type, UnaryMinus_type, Literal_type, Variable_type, IRI_type,
Function_type, ArgumentList_type, Builtin_str_type, Builtin_lang_type, Builtin_langmatches_type, Builtin_datatype_type, Builtin_bound_type,
Builtin_sameterm_type, Builtin_isiri_type, Builtin_isuri_type, Builtin_isblank_type, Builtin_isliteral_type, Builtin_isnumeric_type,
Builtin_regex_type, Builtin_in_type, Builtin_exists_type
};
FilterOperationType oper_type;
FilterTreeNode node;
std::string arg;
int pos;
bool isel;
class FilterTreeChild;
FilterTreeChild():
node_type(None_type), pos(-1), isel(true){}
};
std::vector<FilterTreeChild> child;
FilterTreeNode():oper_type(None_type){}
void getVarset(Varset &varset);
void mapVarPos2Varset(Varset &varset, Varset &entity_literal_varset);
void print(int dep);
};
class GroupPattern::FilterTreeRoot
{
public:
FilterTreeNode root;
Varset varset;
bool done;
FilterTreeRoot():done(false){}
FilterTree():done(false){}
};
class GroupPattern::FilterTree::FilterTreeNode::FilterTreeChild
{
public:
enum FilterTreeChildNodeType {None_type, Tree_type, String_type};
FilterTreeChildNodeType node_type;
FilterTreeNode node;
std::string str;
int pos;
bool isel;
FilterTreeChild():node_type(None_type), pos(-1), isel(true){}
};
class GroupPattern::Bind
{
public:
Bind(){}
Bind(const std::string &_str, const std::string &_var):str(_str), var(_var){}
std::string str, var;
Varset varset;
};
class GroupPattern::SubGroupPattern
{
public:
enum SubGroupPatternType{Pattern_type, Union_type, Optional_type, Minus_type, Filter_type, Bind_type};
SubGroupPatternType type;
Pattern pattern;
std::vector<GroupPattern> unions;
GroupPattern optional;
FilterTree filter;
Bind bind;
SubGroupPattern(SubGroupPatternType _type):type(_type){}
SubGroupPattern(const SubGroupPattern& _sgp):type(_sgp.type)
{
pattern = _sgp.pattern;
unions = _sgp.unions;
optional = _sgp.optional;
filter = _sgp.filter;
bind = _sgp.bind;
}
};
class ProjectionVar
{
public:
enum AggregateType{None_type, Count_type, Sum_type, Min_type, Max_type, Avg_type};
enum AggregateType{None_type, Count_type, Sum_type, Min_type, Max_type, Avg_type};
AggregateType aggregate_type;
std::string var, aggregate_var;
@ -174,19 +179,20 @@ class QueryTree
public:
std::string var;
bool descending;
Order(std::string &_var, bool _descending):
var(_var), descending(_descending){}
Order(std::string &_var, bool _descending):var(_var), descending(_descending){}
};
enum UpdateType {Not_Update, Insert_Data, Delete_Data, Delete_Where, Insert_Clause, Delete_Clause, Modify_Clause};
enum UpdateType {Not_Update, Insert_Data, Delete_Data, Delete_Where, Insert_Clause, Delete_Clause, Modify_Clause};
private:
QueryForm query_form;
ProjectionModifier projection_modifier;
std::vector<ProjectionVar> projection;
Varset projection_useful_varset;
bool projection_asterisk;
std::vector<Order> order;
Varset group_by;
std::vector<Order> order_by;
int offset, limit;
GroupPattern grouppattern;
@ -199,22 +205,31 @@ class QueryTree
GroupPattern insert_patterns, delete_patterns;
public:
QueryTree():
query_form(Select_Query), projection_modifier(Modifier_None), projection_asterisk(false), offset(0), limit(-1), update_type(Not_Update){}
void setQueryForm(QueryForm _queryform);
QueryForm getQueryForm();
void setProjectionModifier(ProjectionModifier _projection_modifier);
ProjectionModifier getProjectionModifier();
void addProjectionVar();
ProjectionVar& getLastProjectionVar();
std::vector<ProjectionVar>& getProjection();
std::vector<std::string> getProjectionVar();
void addProjectionUsefulVar(std::string &_var);
Varset& getProjectionUsefulVar();
Varset getProjectionVarset();
Varset getResultProjectionVarset();
void setProjectionAsterisk();
bool checkProjectionAsterisk();
bool checkSelectCompatibility();
bool atLeastOneAggregateFunction();
void addOrder(std::string &_var, bool _descending);
std::vector<Order>& getOrder();
void addGroupByVar(std::string &_var);
Varset& getGroupByVarset();
void addOrderVar(std::string &_var, bool _descending);
std::vector<Order>& getOrderVarVector();
Varset getOrderByVarset();
void setOffset(int _offset);
int getOffset();
void setLimit(int _limit);
@ -228,6 +243,8 @@ class QueryTree
GroupPattern& getDeletePatterns();
bool checkWellDesigned();
bool checkAtLeastOneAggregateFunction();
bool checkSelectAggregateFunctionGroupByValid();
void print();
};

View File

@ -23,8 +23,8 @@ void ResultFilter::changeResultHashTable(SPARQLquery &query, int value)
for (int i = 0; i < query.getBasicQueryNum(); i++)
{
BasicQuery &basicquery = query.getBasicQuery(i);
vector<unsigned*> &basicquery_result =basicquery.getResultList();
int result_num = basicquery_result.size();
vector<unsigned*> &basicquery_result = basicquery.getResultList();
unsigned result_num = basicquery_result.size();
int var_num = basicquery.getVarNum();
for (int j = 0; j < var_num; j++)
@ -37,7 +37,7 @@ void ResultFilter::changeResultHashTable(SPARQLquery &query, int value)
refer.push_back(&this->result_filter[basicquery.getVarName(j)].second);
}
for (int j = 0; j < result_num; j++)
for (unsigned j = 0; j < result_num; j++)
for (int k = 0; k < var_num; k++)
{
(*refer[k])[this->hash(basicquery_result[j][k])] += value;
@ -45,7 +45,7 @@ void ResultFilter::changeResultHashTable(SPARQLquery &query, int value)
}
long tv_end = Util::get_cur_time();
printf("after ResultFilter::change, used %d ms.\n", tv_end - tv_begin);
printf("after ResultFilter::change, used %ld ms.\n", tv_end - tv_begin);
}
void ResultFilter::candFilterWithResultHashTable(BasicQuery &basicquery)
@ -64,9 +64,9 @@ void ResultFilter::candFilterWithResultHashTable(BasicQuery &basicquery)
printf("before candFilter, size = %d\n", idlist.size());
long tv_begin = Util::get_cur_time();
for (int k = 0; k < idlist.size(); k++)
for (unsigned k = 0; k < idlist.size(); k++)
{
int id = idlist.getID(k);
unsigned id = idlist.getID(k);
if (col[hash(id)] > 0)
{
new_idlist.addID(id);
@ -75,8 +75,7 @@ void ResultFilter::candFilterWithResultHashTable(BasicQuery &basicquery)
idlist = new_idlist;
long tv_end = Util::get_cur_time();
printf("after candFilter, size = %d, used %d ms.\n", idlist.size(), tv_end - tv_begin);
printf("after candFilter, size = %d, used %ld ms.\n", idlist.size(), tv_end - tv_begin);
}
}
}

View File

@ -13,13 +13,11 @@
#include "SPARQLquery.h"
#include "../Util/Util.h"
//TODO: adjust the type to unsigned
class ResultFilter
{
private:
static const int MAX_SIZE = 1048576;
inline int hash(unsigned x)
static const unsigned MAX_SIZE = 1048576;
inline unsigned hash(unsigned x)
{
x = (x + 0x7ed55d16) + (x << 12);
x = (x ^ 0xc761c23c) ^ (x >> 19);

View File

@ -68,7 +68,7 @@ ResultSet::checkUseStream()
}
void
ResultSet::setOutputOffsetLimit(unsigned _output_offset, unsigned _output_limit)
ResultSet::setOutputOffsetLimit(int _output_offset, int _output_limit)
{
this->output_offset = _output_offset;
this->output_limit = _output_limit;
@ -89,9 +89,9 @@ ResultSet::setVar(const vector<string> & _var_names)
string
ResultSet::to_str()
{
unsigned ans_num = max((long long)this->ansNum - this->output_offset, (long long)0);
long long ans_num = max((long long)this->ansNum - this->output_offset, 0LL);
if (this->output_limit != -1)
ans_num = min(ans_num, this->output_limit);
ans_num = min(ans_num, (long long)this->output_limit);
if(ans_num == 0)
{
return "[empty result]\n";
@ -110,8 +110,8 @@ ResultSet::to_str()
if (this->useStream)
this->resetStream();
const Bstr* bp;
for(unsigned i = (!this->useStream ? this->output_offset : 0); i < this->ansNum; i++)
const Bstr* bp = NULL;
for(long long i = (!this->useStream ? this->output_offset : 0LL); i < this->ansNum; i++)
{
if (this->output_limit != -1 && i == this->output_offset + this->output_limit)
break;
@ -160,8 +160,8 @@ ResultSet::to_JSON()
if (this->useStream)
this->resetStream();
const Bstr* bp;
for(unsigned i = (!this->useStream ? this->output_offset : 0); i < this->ansNum; i++)
const Bstr* bp = NULL;
for(long long i = (!this->useStream ? this->output_offset : 0LL); i < this->ansNum; i++)
{
if (this->output_limit != -1 && i == this->output_offset + this->output_limit)
break;
@ -199,14 +199,14 @@ ResultSet::to_JSON()
_buf << "\"" + this->var_name[j].substr(1) + "\": { ";
_buf << "\"type\": \"" + ans_type + "\", \"value\": \"" + Util::node2string(ans_str.c_str()) + "\" }";
}
else if (ans_str[0] == '"' && ans_str.find("\"^^<") == -1 && ans_str[ans_str.length() - 1] != '>' )
else if (ans_str[0] == '"' && ans_str.find("\"^^<") == string::npos && ans_str[ans_str.length() - 1] != '>' )
{
ans_type = "literal";
ans_str = ans_str.substr(1, ans_str.rfind('"') - 1);
_buf << "\"" + this->var_name[j].substr(1) + "\": { ";
_buf << "\"type\": \"" + ans_type + "\", \"value\": \"" + Util::node2string(ans_str.c_str()) + "\" }";
}
else if (ans_str[0] == '"' && ans_str.find("\"^^<") != -1 && ans_str[ans_str.length() - 1] == '>' )
else if (ans_str[0] == '"' && ans_str.find("\"^^<") != string::npos && ans_str[ans_str.length() - 1] == '>' )
{
ans_type = "typed-literal";
int pos = ans_str.find("\"^^<");
@ -234,9 +234,9 @@ ResultSet::output(FILE* _fp)
{
if (this->useStream)
{
unsigned ans_num = max((long long)this->ansNum - this->output_offset, (long long)0);
long long ans_num = max((long long)this->ansNum - this->output_offset, 0LL);
if (this->output_limit != -1)
ans_num = min(ans_num, this->output_limit);
ans_num = min(ans_num, (long long)this->output_limit);
if(ans_num == 0)
{
fprintf(_fp, "[empty result]\n");
@ -251,7 +251,7 @@ ResultSet::output(FILE* _fp)
fprintf(_fp, "\n");
const Bstr* bp;
for(unsigned i = 0; i < this->ansNum; i++)
for(long long i = 0; i < this->ansNum; i++)
{
if (this->output_limit != -1 && i == this->output_offset + this->output_limit)
break;

View File

@ -20,10 +20,10 @@ private:
bool useStream;
public:
int select_var_num;
std::string* var_name;
std::string* var_name;
unsigned ansNum;
std::string** answer;
unsigned output_offset, output_limit;
int output_offset, output_limit;
ResultSet();
~ResultSet();
@ -31,7 +31,7 @@ public:
void setUseStream();
bool checkUseStream();
void setOutputOffsetLimit(unsigned _output_offset, unsigned _output_limit);
void setOutputOffsetLimit(int _output_offset, int _output_limit);
//convert to binary string
//Bstr* to_bstr();

View File

@ -2,7 +2,7 @@
# Filename: Varset.cpp
# Author: Jiaqi, Chen
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-03-02 20:35
# Last Modified: 2017-03-10
# Description: implement functions in Varset.h
=============================================================================*/
@ -10,100 +10,138 @@
using namespace std;
Varset::Varset(string & _var)
Varset::Varset(const string &_var)
{
addVar(_var);
this->addVar(_var);
}
Varset::Varset(vector<string> & _varset)
Varset::Varset(const vector<string> &_vars)
{
for (int i = 0; i < (int)_varset.size(); i++)
addVar(_varset[i]);
for (int i = 0; i < (int)_vars.size(); i++)
this->addVar(_vars[i]);
}
bool Varset::findVar(string& _var)
bool Varset::empty() const
{
if ((int)this->varset.size() == 0) return false;
vector<string>::iterator i = find(this->varset.begin(), this->varset.end(), _var);
return (i != this->varset.end());
return this->vars.empty();
}
void Varset::addVar(string& _var)
int Varset::getVarsetSize() const
{
if (!this->findVar(_var))
this->varset.push_back(_var);
return this->vars.size();
}
Varset Varset::operator + (Varset& x)
bool Varset::findVar(const string &_var) const
{
Varset r;
for (int i = 0; i < (int)this->varset.size(); i++)
r.addVar(this->varset[i]);
for (int i = 0; i < (int)x.varset.size(); i++)
r.addVar(x.varset[i]);
return r;
};
if (this->vars.empty())
return false;
Varset Varset::operator * (Varset& x)
{
Varset r;
for (int i = 0; i < (int)this->varset.size(); i++)
if (x.findVar(this->varset[i]))
r.addVar(this->varset[i]);
return r;
}
for (int i = 0; i < (int)this->vars.size(); i++)
if (this->vars[i] == _var) return true;
Varset Varset::operator - (Varset& x)
{
Varset r;
for (int i = 0; i < (int)this->varset.size(); i++)
if (!x.findVar(this->varset[i]))
r.addVar(this->varset[i]);
return r;
}
bool Varset::operator ==(Varset &x)
{
if ((int)this->varset.size() != (int)x.varset.size()) return false;
for (int i = 0; i < (int)this->varset.size(); i++)
if (!x.findVar(this->varset[i])) return false;
return true;
}
bool Varset::hasCommonVar(Varset &x)
{
for (int i = 0; i < (int)this->varset.size(); i++)
if (x.findVar(this->varset[i])) return true;
return false;
}
bool Varset::belongTo(Varset &x)
void Varset::addVar(const string &_var)
{
for (int i = 0; i < (int)this->varset.size(); i++)
if (!x.findVar(this->varset[i])) return false;
return true;
if (!this->findVar(_var))
this->vars.push_back(_var);
}
vector <int> Varset::mapTo(Varset& x)
Varset Varset::operator + (const Varset &_varset) const
{
vector<int> r;
for (int i = 0; i < (int)this->varset.size(); i++)
{
r.push_back(-1);
for (int j = 0; j < (int)x.varset.size(); j++)
if (this->varset[i] == x.varset[j])
r[i] = j;
}
Varset r(*this);
for (int i = 0; i < (int)_varset.vars.size(); i++)
r.addVar(_varset.vars[i]);
return r;
};
Varset& Varset::operator += (const Varset &_varset)
{
for (int i = 0; i < (int)_varset.vars.size(); i++)
this->addVar(_varset.vars[i]);
return *this;
}
Varset Varset::operator * (const Varset &_varset) const
{
Varset r;
for (int i = 0; i < (int)this->vars.size(); i++)
if (_varset.findVar(this->vars[i]))
r.addVar(this->vars[i]);
return r;
}
void Varset::print()
Varset Varset::operator - (const Varset &_varset) const
{
Varset r;
for (int i = 0; i < (int)this->vars.size(); i++)
if (!_varset.findVar(this->vars[i]))
r.addVar(this->vars[i]);
return r;
}
bool Varset::operator == (const Varset &_varset) const
{
if ((int)this->vars.size() != (int)_varset.vars.size())
return false;
for (int i = 0; i < (int)this->vars.size(); i++)
if (!_varset.findVar(this->vars[i]))
return false;
return true;
}
bool Varset::hasCommonVar(const Varset &_varset) const
{
for (int i = 0; i < (int)this->vars.size(); i++)
if (_varset.findVar(this->vars[i]))
return true;
return false;
}
bool Varset::belongTo(const Varset &_varset) const
{
for (int i = 0; i < (int)this->vars.size(); i++)
if (!_varset.findVar(this->vars[i]))
return false;
return true;
}
vector<int> Varset::mapTo(const Varset &_varset) const
{
vector<int> r;
for (int i = 0; i < (int)this->vars.size(); i++)
{
r.push_back(-1);
for (int j = 0; j < (int)_varset.vars.size(); j++)
if (this->vars[i] == _varset.vars[j])
r[i] = j;
}
return r;
}
void Varset::print() const
{
printf("Varset: ");
for (int i = 0; i < (int)this->varset.size(); i++)
for (int i = 0; i < (int)this->vars.size(); i++)
{
printf("%s ", this->varset[i].c_str());
printf("%s ", this->vars[i].c_str());
}
printf("\n");
}

View File

@ -2,7 +2,7 @@
# Filename: Varset.h
# Author: Jiaqi, Chen
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-03-02 20:35
# Last Modified: 2017-03-10
# Description:
=============================================================================*/
@ -14,26 +14,30 @@
class Varset
{
public:
std::vector <std::string> varset;
std::vector<std::string> vars;
Varset(){};
Varset(std::string & _var);
Varset(std::vector<std::string> & _varset);
Varset(const std::string &_var);
Varset(const std::vector<std::string> &_vars);
bool findVar(std::string& _var);
void addVar(std::string& _var);
bool empty() const;
int getVarsetSize() const;
bool findVar(const std::string &_var) const;
void addVar(const std::string &_var);
Varset operator + (Varset& x);
Varset operator * (Varset& x);
Varset operator - (Varset& x);
bool operator ==(Varset &x);
bool hasCommonVar(Varset &x);
bool belongTo(Varset &x);
Varset operator + (const Varset &_varset) const;
Varset& operator += (const Varset &_varset);
Varset operator * (const Varset &_varset) const;
Varset operator - (const Varset &_varset) const;
std::vector <int> mapTo(Varset& x);
bool operator ==(const Varset &_varset) const;
bool hasCommonVar(const Varset &_varset) const;
bool belongTo(const Varset &_varset) const;
void print();
std::vector<int> mapTo(const Varset &_varset) const;
void print() const;
};
#endif // _QUERY_VARSET_H

View File

@ -30,7 +30,7 @@ void StringIndexFile::save(KVstore &kv_store)
return;
}
fwrite(&this->num, sizeof(int), 1, this->index_file);
fwrite(&this->num, sizeof(unsigned), 1, this->index_file);
long offset = 0;
for (unsigned i = 0; i < this->num; i++)
@ -45,7 +45,7 @@ void StringIndexFile::save(KVstore &kv_store)
unsigned length = str.length();
fwrite(&offset, sizeof(long), 1, this->index_file);
fwrite(&length, sizeof(int), 1, this->index_file);
fwrite(&length, sizeof(unsigned), 1, this->index_file);
offset += length;
fwrite(str.c_str(), sizeof(char), length, this->value_file);
}
@ -71,21 +71,19 @@ void StringIndexFile::load()
return;
}
fread(&this->num, sizeof(int), 1, this->index_file);
fread(&this->num, sizeof(unsigned), 1, this->index_file);
this->index_table.resize(this->num);
for (unsigned i = 0; i < this->num; i++)
{
fread(&this->index_table[i].offset, sizeof(long), 1, this->index_file);
fread(&this->index_table[i].length, sizeof(int), 1, this->index_file);
fread(&this->index_table[i].length, sizeof(unsigned), 1, this->index_file);
this->empty_offset = max(this->empty_offset, this->index_table[i].offset + (long)this->index_table[i].length);
}
}
bool StringIndexFile::randomAccess(unsigned id, string *str)
{
//DEBUG: int or unsigned here???
//if (id < 0 || id >= this->num)
if (id >= this->num)
return false;
@ -116,7 +114,7 @@ void StringIndexFile::addRequest(unsigned id, std::string *str)
void StringIndexFile::trySequenceAccess()
{
long max_end = 0;
for (unsigned i = 0; i < this->request.size(); i++)
for (int i = 0; i < (int)this->request.size(); i++)
max_end = max(max_end, this->request[i].offset + long(this->request[i].length));
if (this->type == Entity)
@ -131,7 +129,7 @@ void StringIndexFile::trySequenceAccess()
sort(this->request.begin(), this->request.end());
unsigned pos = 0;
int pos = 0;
fseek(this->value_file, 0, SEEK_SET);
char *block = new char[MAX_BLOCK_SIZE];
long current_block_begin = 0;
@ -212,7 +210,7 @@ void StringIndexFile::trySequenceAccess()
{
cout << "random access." << endl;
for (unsigned i = 0; i < (int)this->request.size(); i++)
for (int i = 0; i < (int)this->request.size(); i++)
this->randomAccess(this->request[i].id, this->request[i].str);
}
this->request.clear();
@ -221,14 +219,16 @@ void StringIndexFile::trySequenceAccess()
void StringIndexFile::change(unsigned id, KVstore &kv_store)
{
if(this->type == Predicate)
{
if (id < 0) return;
}
else
{
if (id == INVALID) return;
}
//DEBUG: for predicate, -1 when invalid
if (id == INVALID) return;
//if(this->type == Predicate)
//{
//if (id < 0) return;
//}
//else
//{
//if (id == INVALID) return;
//}
if (this->num <= id)
{
@ -236,15 +236,15 @@ void StringIndexFile::change(unsigned id, KVstore &kv_store)
{
this->index_table.push_back(IndexInfo());
fseek(this->index_file, sizeof(int) + this->num * (sizeof(long) + sizeof(int)), SEEK_SET);
fseek(this->index_file, sizeof(unsigned) + this->num * (sizeof(long) + sizeof(unsigned)), SEEK_SET);
fwrite(&this->index_table[this->num].offset, sizeof(long), 1, this->index_file);
fwrite(&this->index_table[this->num].length, sizeof(int), 1, this->index_file);
fwrite(&this->index_table[this->num].length, sizeof(unsigned), 1, this->index_file);
this->num++;
}
fseek(this->index_file, 0, SEEK_SET);
fwrite(&this->num, sizeof(int), 1, this->index_file);
fwrite(&this->num, sizeof(unsigned), 1, this->index_file);
}
string str;
@ -259,9 +259,9 @@ void StringIndexFile::change(unsigned id, KVstore &kv_store)
this->index_table[id].length = str.length();
this->empty_offset += this->index_table[id].length;
fseek(this->index_file, sizeof(int) + id * (sizeof(long) + sizeof(int)), SEEK_SET);
fseek(this->index_file, sizeof(unsigned) + id * (sizeof(long) + sizeof(unsigned)), SEEK_SET);
fwrite(&this->index_table[id].offset, sizeof(long), 1, this->index_file);
fwrite(&this->index_table[id].length, sizeof(int), 1, this->index_file);
fwrite(&this->index_table[id].length, sizeof(unsigned), 1, this->index_file);
fseek(this->value_file, this->index_table[id].offset, SEEK_SET);
fwrite(str.c_str(), sizeof(char), this->index_table[id].length, this->value_file);
@ -269,20 +269,22 @@ void StringIndexFile::change(unsigned id, KVstore &kv_store)
void StringIndexFile::disable(unsigned id)
{
if(this->type == Predicate)
{
if (id < 0 || id >= this->num) return ;
}
else
{
if (id == INVALID) return;
}
//DEBUG: for predicate, -1 when invalid
if (id >= this->num) return ;
//if(this->type == Predicate)
//{
//if (id < 0 || id >= this->num) return ;
//}
//else
//{
//if (id == INVALID) return;
//}
this->index_table[id] = IndexInfo();
fseek(this->index_file, sizeof(int) + id * (sizeof(long) + sizeof(int)), SEEK_SET);
fseek(this->index_file, sizeof(unsigned) + id * (sizeof(long) + sizeof(unsigned)), SEEK_SET);
fwrite(&this->index_table[id].offset, sizeof(long), 1, this->index_file);
fwrite(&this->index_table[id].length, sizeof(int), 1, this->index_file);
fwrite(&this->index_table[id].length, sizeof(unsigned), 1, this->index_file);
}
//----------------------------------------------------------------------------------------------------------------------------------------------------
@ -344,13 +346,15 @@ bool StringIndex::randomAccess(unsigned id, string *str, bool is_entity_or_liter
{
return true;
}
else
{
//else
//{
//cout<<"check: not found in string buffer - "<<id<<endl;
}
//}
if (id < Util::LITERAL_FIRST_ID)
{
return this->entity.randomAccess(id, str);
}
else
{
//cout<<"check: to search literal "<<id-Util::LITERAL_FIRST_ID<<endl;

View File

@ -12,8 +12,6 @@
#include "../KVstore/KVstore.h"
#include "../Util/Util.h"
//TODO: adjust the type
class StringIndexFile
{
public:
@ -29,9 +27,9 @@ class StringIndexFile
class IndexInfo
{
public:
IndexInfo(long _offset = 0, unsigned _length = 0):offset(_offset), length(_length){}
long offset;
unsigned length;
IndexInfo(long _offset = 0, unsigned _length = 0):offset(_offset), length(_length){}
};
std::vector<IndexInfo> index_table;
@ -55,7 +53,7 @@ class StringIndexFile
std::vector<AccessRequest> request;
public:
StringIndexFile(StringIndexFileType _type, std::string _dir, unsigned _num):type(_type), num(_num), empty_offset(0), index_file(NULL), value_file(NULL), buffer_size(0), buffer(NULL)
StringIndexFile(StringIndexFileType _type, std::string _dir, unsigned _num):type(_type), num(_num), empty_offset(0), index_file(NULL), value_file(NULL), buffer_size(0), buffer(NULL)
{
if (this->type == Entity)
this->loc = _dir + "/entity_";
@ -105,7 +103,7 @@ class StringIndex
Buffer* literal_buffer;
unsigned literal_buffer_size;
public:
StringIndex(std::string _dir, unsigned _entity_num = 0, unsigned _literal_num = 0, unsigned _predicate_num = 0) :
StringIndex(std::string _dir, unsigned _entity_num = 0, unsigned _literal_num = 0, unsigned _predicate_num = 0):
entity(StringIndexFile::Entity, _dir, _entity_num), literal(StringIndexFile::Literal, _dir, _literal_num), predicate(StringIndexFile::Predicate, _dir, _predicate_num){}
void setBuffer(Buffer* _ebuf, Buffer* _lbuf)

View File

@ -24,6 +24,7 @@ debug_level = simple
# This option means which directory do you want to place your database in(the directory will be created if not exists)
# NOTICE:the position is the root of gStore system directory by default
# db_home = .
#db_home = /home/ssd
# user_home = .
# which suffix do you want to add to your database name? please set it here