change memory-disk swap to all-memory in VSTree::LRUCache;

deal with extra () in sparql;
support long string with multiple lines in sparql;
support str(IRI) in sparql;
This commit is contained in:
bookug 2017-02-26 17:18:21 +08:00
parent 74598bf66e
commit e3b09c6529
16 changed files with 1948 additions and 492 deletions

View File

@ -2328,8 +2328,17 @@ Database::remove(const TripleWithObjType* _triples, int _triple_num)
{
string sub = _triples[i].getSubject();
subid = this->kvstore->getIDByEntity(sub);
if(subid == -1)
{
continue;
}
string pre = _triples[i].getPredicate();
preid = this->kvstore->getIDByPredicate(pre);
if(preid == -1)
{
continue;
}
is_obj_entity = _triples[i].isObjEntity();
string obj = _triples[i].getObject();
@ -2341,11 +2350,15 @@ Database::remove(const TripleWithObjType* _triples, int _triple_num)
{
objid = this->kvstore->getIDByLiteral(obj);
}
if (subid == -1 || preid == -1 || objid == -1)
if(objid == -1)
{
continue;
}
//if (subid == -1 || preid == -1 || objid == -1)
//{
//continue;
//}
bool _exist_triple = this->exist_triple(subid, preid, objid);
if (!_exist_triple)
{

View File

@ -480,6 +480,7 @@ Join::pre_var_handler()
//we need to copy only the selected ones in copyToResult
int size = valid_ans.size();
//BETTER:only add pre vars which are selected or linked with satellite
if (size > 0)
{
//if(!is_selected)

View File

@ -68,7 +68,14 @@ Strategy::handle(SPARQLquery& _query, ResultFilter* _result_filter)
int selected_pre_var_num = (*iter)->getSelectedPreVarNum();
int selected_var_num = (*iter)->getSelectVarNum();
if ((*iter)->getTripleNum() == 1 && pre_varNum == 1)
//NOTICE: special case - query vertices only connected via same variables
//all constant triples will be viewed as unconnected, if a triple has no variable,
//then this triple is a BGP(no other triples in this BGP)
if(total_num == 0 && pre_varNum == 0)
{
this->method = 5;
}
else if ((*iter)->getTripleNum() == 1 && pre_varNum == 1)
{
this->method = 4;
}
@ -122,6 +129,9 @@ Strategy::handle(SPARQLquery& _query, ResultFilter* _result_filter)
case 4:
this->handler4(*iter, result_list);
break;
case 5:
this->handler5(*iter, result_list);
break;
default:
cout << "not support this method" << endl;
@ -166,6 +176,7 @@ Strategy::handler0(BasicQuery* _bq, vector<int*>& _result_list, ResultFilter* _r
long tv_handle = Util::get_cur_time();
int varNum = _bq->getVarNum(); //the num of vars needing to be joined
//TODO:parallel by pthread
for (int i = 0; i < varNum; ++i)
{
if (_bq->if_need_retrieve(i) == false)
@ -186,7 +197,7 @@ Strategy::handler0(BasicQuery* _bq, vector<int*>& _result_list, ResultFilter* _r
}
}
//TODO:end directly if one is empty!
//BETTER:end directly if one is empty!
long tv_retrieve = Util::get_cur_time();
cout << "after Retrieve, used " << (tv_retrieve - tv_handle) << "ms." << endl;
@ -457,3 +468,46 @@ Strategy::handler4(BasicQuery* _bq, vector<int*>& _result_list)
delete[] id_list;
}
//TODO:if any constants in a query are not found in kvstore, then this BGP should end to speed up the processing
void
Strategy::handler5(BasicQuery* _bq, vector<int*>& _result_list)
{
cout<<"Special Case: consider constant triple"<<endl;
Triple triple = _bq->getTriple(0);
_result_list.clear();
int subid = this->kvstore->getIDByEntity(triple.subject);
if(subid == -1) //not found
{
return;
}
int preid = this->kvstore->getIDByPredicate(triple.predicate);
if(preid == -1) //not found
{
return;
}
int objid = this->kvstore->getIDByEntity(triple.object);
if(objid == -1)
{
objid = this->kvstore->getIDByLiteral(triple.object);
}
if(objid == -1)
{
return;
}
int* id_list = NULL;
int id_list_len = 0;
(this->kvstore)->getobjIDlistBysubIDpreID(subid, preid, id_list, id_list_len);
if (Util::bsearch_int_uporder(objid, id_list, id_list_len) != -1)
{
int* record = new int[3];
record[0] = subid;
record[1] = preid;
record[2] = objid;
_result_list.push_back(record);
}
delete[] id_list;
}

View File

@ -41,6 +41,7 @@ private:
void handler2(BasicQuery*, vector<int*>&);
void handler3(BasicQuery*, vector<int*>&);
void handler4(BasicQuery*, vector<int*>&);
void handler5(BasicQuery*, vector<int*>&);
//QueryHandler *dispatch;
//void prepare_handler();
};

View File

@ -20,7 +20,7 @@ main(int argc, char * argv[])
#ifdef DEBUG
Util util;
#endif
if(argc == 1) //./gbuild
if(argc < 3) //./gbuild
{
//output help info here
cout << "the usage of gbuild: " << endl;

View File

@ -5,22 +5,61 @@
方式是控制Util/Util.h中OUTPUT_QUERY_RESULT宏的开启
在使用gserver时不能在数据库没有unload时再用gbuild或其他命令修改数据库仅限于C/S模式
将IRC聊天放到gstore文档上freenode #gStore
# 推广
必须建立一个官方网站可以展示下团队、demo需要建立社区/论坛并维护
另外要有桌面应用或者网页应用以可视化的方式操作数据库类似virtuoso和neo4j那种
server 118.89.115.42 gstore-pku.com
自己的网站可以用实验室的服务器gstore网站最好用云服务图个稳定
但用实验室主机,备案时是否更麻烦?得以企业为单位,而且解析是否更麻烦?
gstore网站中的demo应用的主体可以放在实验室主机上至少是gstore数据库应抽离出来但若实验室主机不开外网应如何而配置代理
demo应用全部外链具体服务放在实验室公开的主机上通过ip:port连接
---
# 并行策略- 线程控制模块
不宜使用并行框架可使用C的pthreadboost的thread库或者启用C++11gcc编译器需要高于4.8.1才能完整支持C++11
但boost的安装更麻烦而且没有C++11安全所以综合考虑还是选择C++11
而且现在C++14标准都已经出来11标准也已经得到比较广的应用了而像openmp这种高级并行框架也是不合适的因为不方便统筹规划每一线程
但如果只在 Linux 下编程,不用考虑平台兼容性,那么 C++11 的 thread 的附加值几乎为零(我认为它过度设计了,同时损失了一些功能),你自己把 Pthreads 封装成一个简单好用的线程库只需要两三百行代码,用十几个 pthreads 函数实现 4 个 classthread、mutex、lock_guard、condvar而且 RAII 的好处也享受到了。
Linux上最好还是使用POSIX标准的pthread库由glibc提供
编译时采用-pthread选项
目前B+树本身还无法并行,哪怕只有查询,也可能因为内外存交换导致并行出错。
可以让陈语嫣先做预加载并进行两表分块并行的处理之后再使B+树支持多线程
目前vstree没有内外存交换只读时完全可以支持多线程
但实际应用中必然存在着读写可能和写写可能,后期必须要引入事务
对于读写问题,应该使用读写锁,采用强读者的模式,即优先将锁分配给读者
另一种方式是不用锁,为索引建立溢出页,这是一种比较好的处理并行的方式
# TODO
删光了树叶应该在包括kv和vs等
清空后LRUcache::flush 没法打开文件
join缓存
彭鹏数据集的两个bug
成组插删还没完全支持和测试需要修改KVstore.cpp
要在单机支持到10亿triple最坏情况下最多有20亿entity和20亿literal目前的编号方式是不行的
可考虑将entity和literal彻底分开也可以将int扩展为unsigned但这样不好判断无效ID
gstore后续需要开发的地方
数据库连接池 保持连接而不是每次都用socket
事务操作
安全备份 数据库的多版本备份,动态删除
多领域多库解决方案。
分页查询
任务分配:
---
JSON格式传输(陈佳棋)
数据库连接池 保持连接而不是每次都用socket(http?)
分页查询(先将整个查询结果缓存,需要考虑内外存交换)
陈佳棋的任务s和p对应同一个实体应该先重命名再过滤。还有一种情况是两者只是名字相同实则并无关系
高阶谓词逻辑 <type> <type> <predicate>
---
陈佳棋找人负责:
模仿海量图大作业基于gStore开发一个社交应用要求可以批量导入且实时查询
@ -28,6 +67,7 @@ JSON格式传输(陈佳棋)
多查询优化
---
王力博:
安全备份 数据库的多版本备份,动态删除
gserver for multiple users, not load db for each connection, but get the same db pointer
assign the pointer to the user who require this db, and ensure that each db is just kept once in memory
有什么办法去检测一个db是否存在呢(首先要支持导入多个数据库)
@ -40,22 +80,33 @@ or the reason maybe gserver still dealing with the previous job, then a new conn
彭鹏师兄的数据集bug
优化谓词查询谓词少而entity/literal多所以先过滤得到谓词的解是一种可以考虑的策略
以谓词为节点以s/o为信息来过滤得到谓词的结果
需要一个查询计划进行选择,可能有些?p应该先做有些?s/?o应该先做
---
陈语嫣,李荆
陈语嫣:
网站设计以及和外包方的联系
之后用pthread将join_two函数内部的拼接并行化先实现一个最基本的版本即可
---
张雨的任务:单起点单终点的正则表达式路径问题,如果是多起点多终点?
下学期的任务:提取相关联的几个方向写论文
将IRC聊天放到gstore文档上freenode #gStore
---
WARNB+树删除时,向旁边兄弟借或者合并,要注意兄弟的定义,应该是同一父节点才对!
考虑使用 sigmod2016 那篇图同构的论文方法实现一套join
但那个是基于路径的BFS和索引连接的思想值得借用可作为另外一套join方法
@hulin
叶子节点是否也可以先过滤先join或者说非核心节点sparql查询图中算度数时是否不要考虑常量
新的方法基于谓词的频率动态调整顺序但中间结果的保留是另一个问题是用中间表还是索引连接是否需要multiJoin
另外超级点和超级边的概念也需要被提出
---
李荆的任务:
考虑出入度数编码为1的个数应该不用在编码的邻居信息中能够得到体现。
第二步编码应该更长,点和边对应着放在一起。按出入边分区,一步点,二步边分区和二步点。
对一个节点保留其最长链信息没啥用,因为数据图基本是连通的,最长链就是图的最长链。
多步编码不应分开,而应在编码逐一扩展,第二步可以依旧保留详细信息,最好用更长编码,因为信息更多。
可能有相同的谓词对应多个邻居,同样的谓词只要保留一个即可,不同邻居可以重合。
第三步可以只记谓词,第四步可以只记边的出入向。
vstree并行过滤
---
实现其他的join思路比如基于过滤效果
pthread写多线程
如何在preFilter和join的开销之间做平衡
preFilter中的限制条件是否过于严格
@ -68,18 +119,7 @@ for virtuoso, better to reset each time and the given configure file(need to res
load过程先导入满足内存的内容或者先来几轮搜索但不输出结果避免开头的查询要读磁盘。vstree直接全导入内存
先完成合并测试再测lubm500M和bsbm500M -- 90 server
两表拼接时多线程分块join
jemalloc??
vstree并行分块
@lijing
考虑出入度数编码为1的个数应该不用在编码的邻居信息中能够得到体现。
第二步编码应该更长,点和边对应着放在一起。按出入边分区,一步点,二步边分区和二步点。
对一个节点保留其最长链信息没啥用,因为数据图基本是连通的,最长链就是图的最长链。
多步编码不应分开,而应在编码逐一扩展,第二步可以依旧保留详细信息,最好用更长编码,因为信息更多。
可能有相同的谓词对应多个邻居,同样的谓词只要保留一个即可,不同邻居可以重合。
第三步可以只记谓词,第四步可以只记边的出入向。
各版本对比的表格中应加一列几何平均数,现实中大多数查询是简单查询,最好还有一个平均数,对应着把数据做归一化后求和
dbpedia q6现在应该统计并对比结果了包含在测试结果中

View File

@ -306,6 +306,46 @@ GeneralEvaluation::FilterEvaluationMultitypeValue::EffectiveBooleanValue
}
bool GeneralEvaluation::FilterEvaluationMultitypeValue::isSimpleLiteral()
{
if (this->datatype == literal)
{
int length = this->str_value.length();
if (length >= 2 && this->str_value[0] == '"' && this->str_value[length - 1] == '"')
return true;
}
return false;
}
void GeneralEvaluation::FilterEvaluationMultitypeValue::getSameNumericType (FilterEvaluationMultitypeValue &x)
{
DataType to_type = max(this->datatype, x.datatype);
if (this->datatype == xsd_integer && to_type == xsd_decimal)
this->flt_value = this->int_value;
if (this->datatype == xsd_integer && to_type == xsd_float)
this->flt_value = this->int_value;
if (this->datatype == xsd_integer && to_type == xsd_double)
this->dbl_value = this->int_value;
if (this->datatype == xsd_decimal && to_type == xsd_double)
this->dbl_value = this->flt_value;
if (this->datatype == xsd_float && to_type == xsd_double)
this->dbl_value = this->flt_value;
this->datatype = to_type;
if (x.datatype == xsd_integer && to_type == xsd_decimal)
x.flt_value = x.int_value;
if (x.datatype == xsd_integer && to_type == xsd_float)
x.flt_value = x.int_value;
if (x.datatype == xsd_integer && to_type == xsd_double)
x.dbl_value = x.int_value;
if (x.datatype == xsd_decimal && to_type == xsd_double)
x.dbl_value = x.flt_value;
if (x.datatype == xsd_float && to_type == xsd_double)
x.dbl_value = x.flt_value;
x.datatype = to_type;
}
GeneralEvaluation::FilterEvaluationMultitypeValue
GeneralEvaluation::FilterEvaluationMultitypeValue::operator !()
{
@ -348,35 +388,6 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
return ret_femv;
}
void GeneralEvaluation::FilterEvaluationMultitypeValue::getSameNumericType (FilterEvaluationMultitypeValue &x)
{
DataType to_type = max(this->datatype, x.datatype);
if (this->datatype == xsd_integer && to_type == xsd_decimal)
this->flt_value = this->int_value;
if (this->datatype == xsd_integer && to_type == xsd_float)
this->flt_value = this->int_value;
if (this->datatype == xsd_integer && to_type == xsd_double)
this->dbl_value = this->int_value;
if (this->datatype == xsd_decimal && to_type == xsd_double)
this->dbl_value = this->flt_value;
if (this->datatype == xsd_float && to_type == xsd_double)
this->dbl_value = this->flt_value;
this->datatype = to_type;
if (x.datatype == xsd_integer && to_type == xsd_decimal)
x.flt_value = x.int_value;
if (x.datatype == xsd_integer && to_type == xsd_float)
x.flt_value = x.int_value;
if (x.datatype == xsd_integer && to_type == xsd_double)
x.dbl_value = x.int_value;
if (x.datatype == xsd_decimal && to_type == xsd_double)
x.dbl_value = x.flt_value;
if (x.datatype == xsd_float && to_type == xsd_double)
x.dbl_value = x.flt_value;
x.datatype = to_type;
}
GeneralEvaluation::FilterEvaluationMultitypeValue
GeneralEvaluation::FilterEvaluationMultitypeValue::operator == (FilterEvaluationMultitypeValue &x)
{
@ -406,26 +417,26 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
return ret_femv;
}
if (this->datatype == simple_literal && x.datatype == simple_literal)
{
ret_femv.bool_value = (this->str_value == x.str_value);
return ret_femv;
}
if (this->isSimpleLiteral() && x.isSimpleLiteral())
{
ret_femv.bool_value = (this->str_value == x.str_value);
return ret_femv;
}
if (this->datatype == xsd_string && x.datatype == xsd_string)
{
ret_femv.bool_value = (this->str_value == x.str_value);
return ret_femv;
}
if (this->datatype == xsd_string && x.datatype == xsd_string)
{
ret_femv.bool_value = (this->str_value == x.str_value);
return ret_femv;
}
if (this->datatype == xsd_datetime && x.datatype == xsd_datetime)
{
ret_femv.bool_value = (this->dt_value == x.dt_value);
return ret_femv;
}
if (this->datatype == xsd_datetime && x.datatype == xsd_datetime)
{
ret_femv.bool_value = (this->dt_value == x.dt_value);
return ret_femv;
}
ret_femv.bool_value = (this->term_value == x.term_value);
return ret_femv;
ret_femv.bool_value = (this->term_value == x.term_value);
return ret_femv;
}
GeneralEvaluation::FilterEvaluationMultitypeValue
@ -457,26 +468,26 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
return ret_femv;
}
if (this->datatype == simple_literal && x.datatype == simple_literal)
{
ret_femv.bool_value = (this->str_value != x.str_value);
return ret_femv;
}
if (this->isSimpleLiteral() && x.isSimpleLiteral())
{
ret_femv.bool_value = (this->str_value != x.str_value);
return ret_femv;
}
if (this->datatype == xsd_string && x.datatype == xsd_string)
{
ret_femv.bool_value = (this->str_value != x.str_value);
return ret_femv;
}
if (this->datatype == xsd_string && x.datatype == xsd_string)
{
ret_femv.bool_value = (this->str_value != x.str_value);
return ret_femv;
}
if (this->datatype == xsd_datetime && x.datatype == xsd_datetime)
{
ret_femv.bool_value = (this->dt_value != x.dt_value);
return ret_femv;
}
if (this->datatype == xsd_datetime && x.datatype == xsd_datetime)
{
ret_femv.bool_value = (this->dt_value != x.dt_value);
return ret_femv;
}
ret_femv.bool_value = (this->term_value != x.term_value);
return ret_femv;
ret_femv.bool_value = (this->term_value != x.term_value);
return ret_femv;
}
GeneralEvaluation::FilterEvaluationMultitypeValue
@ -508,25 +519,25 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
return ret_femv;
}
if (this->datatype == simple_literal && x.datatype == simple_literal)
{
ret_femv.bool_value = (this->str_value < x.str_value);
return ret_femv;
}
if (this->isSimpleLiteral() && x.isSimpleLiteral())
{
ret_femv.bool_value = (this->str_value < x.str_value);
return ret_femv;
}
if (this->datatype == xsd_string && x.datatype == xsd_string)
{
ret_femv.bool_value = (this->str_value < x.str_value);
return ret_femv;
}
if (this->datatype == xsd_string && x.datatype == xsd_string)
{
ret_femv.bool_value = (this->str_value < x.str_value);
return ret_femv;
}
if (this->datatype == xsd_datetime && x.datatype == xsd_datetime)
{
ret_femv.bool_value = (this->dt_value < x.dt_value);
return ret_femv;
}
if (this->datatype == xsd_datetime && x.datatype == xsd_datetime)
{
ret_femv.bool_value = (this->dt_value < x.dt_value);
return ret_femv;
}
return ret_femv;
return ret_femv;
}
GeneralEvaluation::FilterEvaluationMultitypeValue
@ -558,25 +569,25 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
return ret_femv;
}
if (this->datatype == simple_literal && x.datatype == simple_literal)
{
ret_femv.bool_value = (this->str_value <= x.str_value);
return ret_femv;
}
if (this->isSimpleLiteral() && x.isSimpleLiteral())
{
ret_femv.bool_value = (this->str_value <= x.str_value);
return ret_femv;
}
if (this->datatype == xsd_string && x.datatype == xsd_string)
{
ret_femv.bool_value = (this->str_value <= x.str_value);
return ret_femv;
}
if (this->datatype == xsd_string && x.datatype == xsd_string)
{
ret_femv.bool_value = (this->str_value <= x.str_value);
return ret_femv;
}
if (this->datatype == xsd_datetime && x.datatype == xsd_datetime)
{
ret_femv.bool_value = (this->dt_value <= x.dt_value);
return ret_femv;
}
if (this->datatype == xsd_datetime && x.datatype == xsd_datetime)
{
ret_femv.bool_value = (this->dt_value <= x.dt_value);
return ret_femv;
}
return ret_femv;
return ret_femv;
}
GeneralEvaluation::FilterEvaluationMultitypeValue
@ -608,25 +619,25 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
return ret_femv;
}
if (this->datatype == simple_literal && x.datatype == simple_literal)
{
ret_femv.bool_value = (this->str_value > x.str_value);
return ret_femv;
}
if (this->isSimpleLiteral() && x.isSimpleLiteral())
{
ret_femv.bool_value = (this->str_value > x.str_value);
return ret_femv;
}
if (this->datatype == xsd_string && x.datatype == xsd_string)
{
ret_femv.bool_value = (this->str_value > x.str_value);
return ret_femv;
}
if (this->datatype == xsd_string && x.datatype == xsd_string)
{
ret_femv.bool_value = (this->str_value > x.str_value);
return ret_femv;
}
if (this->datatype == xsd_datetime && x.datatype == xsd_datetime)
{
ret_femv.bool_value = (this->dt_value > x.dt_value);
return ret_femv;
}
if (this->datatype == xsd_datetime && x.datatype == xsd_datetime)
{
ret_femv.bool_value = (this->dt_value > x.dt_value);
return ret_femv;
}
return ret_femv;
return ret_femv;
}
GeneralEvaluation::FilterEvaluationMultitypeValue
@ -658,25 +669,25 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
return ret_femv;
}
if (this->datatype == simple_literal && x.datatype == simple_literal)
{
ret_femv.bool_value = (this->str_value >= x.str_value);
return ret_femv;
}
if (this->isSimpleLiteral() && x.isSimpleLiteral())
{
ret_femv.bool_value = (this->str_value >= x.str_value);
return ret_femv;
}
if (this->datatype == xsd_string && x.datatype == xsd_string)
{
ret_femv.bool_value = (this->str_value >= x.str_value);
return ret_femv;
}
if (this->datatype == xsd_string && x.datatype == xsd_string)
{
ret_femv.bool_value = (this->str_value >= x.str_value);
return ret_femv;
}
if (this->datatype == xsd_datetime && x.datatype == xsd_datetime)
{
ret_femv.bool_value = (this->dt_value >= x.dt_value);
return ret_femv;
}
if (this->datatype == xsd_datetime && x.datatype == xsd_datetime)
{
ret_femv.bool_value = (this->dt_value >= x.dt_value);
return ret_femv;
}
return ret_femv;
return ret_femv;
}
//----------------------------------------------------------------------------------------------------------------------------------------------------
@ -970,7 +981,7 @@ void GeneralEvaluation::TempResult::mapFilterTree2Varset(QueryTree::GroupPattern
else filter.child[1].isel = false;
}
}
else if (filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type || filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_lang_type || filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_langmatches_type || filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_bound_type || filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_in_type)
else if (filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type || filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_str_type || filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_lang_type || filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_langmatches_type || filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_bound_type || filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_in_type)
{
if (filter.child[0].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type)
mapFilterTree2Varset(filter.child[0].node, v, entity_literal_varset);
@ -1024,13 +1035,6 @@ void GeneralEvaluation::TempResult::getFilterString(QueryTree::GroupPattern::Fil
}
else femv.term_value = child.arg;
//' to "
if (femv.term_value[0] == '\'')
{
femv.term_value[0] = '"';
femv.term_value[femv.term_value.rfind('\'')] = '"';
}
if (femv.term_value[0] == '<' && femv.term_value[femv.term_value.length() - 1] == '>')
{
femv.datatype = GeneralEvaluation::FilterEvaluationMultitypeValue::iri;
@ -1039,7 +1043,7 @@ void GeneralEvaluation::TempResult::getFilterString(QueryTree::GroupPattern::Fil
if (femv.term_value[0] == '"' && femv.term_value.find("\"^^<") == -1 && femv.term_value[femv.term_value.length() - 1] != '>' )
{
femv.datatype = GeneralEvaluation::FilterEvaluationMultitypeValue::simple_literal;
femv.datatype = GeneralEvaluation::FilterEvaluationMultitypeValue::literal;
femv.str_value = femv.term_value;
}
@ -1304,7 +1308,7 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
getFilterString(filter.child[0], x, row, stringindex);
else if (filter.child[0].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type)
x = matchFilterTree(filter.child[0].node, filter_exists_grouppattern_resultset_record, row, stringindex);
if (x.datatype == GeneralEvaluation::FilterEvaluationMultitypeValue::simple_literal)
if (x.datatype == GeneralEvaluation::FilterEvaluationMultitypeValue::literal || x.datatype == GeneralEvaluation::FilterEvaluationMultitypeValue::xsd_string)
{
t = x.str_value;
t = t.substr(1, t.rfind('"') - 1);
@ -1316,7 +1320,7 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
getFilterString(filter.child[1], y, row, stringindex);
else if (filter.child[1].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type)
y = matchFilterTree(filter.child[1].node, filter_exists_grouppattern_resultset_record, row, stringindex);
if (y.datatype == GeneralEvaluation::FilterEvaluationMultitypeValue::simple_literal)
if (y.isSimpleLiteral())
{
p = y.str_value;
p = p.substr(1, p.rfind('"') - 1);
@ -1330,7 +1334,7 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
getFilterString(filter.child[2], z, row, stringindex);
else if (filter.child[2].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type)
z = matchFilterTree(filter.child[2].node, filter_exists_grouppattern_resultset_record, row, stringindex);
if (z.datatype == GeneralEvaluation::FilterEvaluationMultitypeValue::simple_literal)
if (z.isSimpleLiteral())
{
f = z.str_value;
f = f.substr(1, f.rfind('"') - 1);
@ -1348,6 +1352,43 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
return ret_femv;
}
if (filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_str_type)
{
FilterEvaluationMultitypeValue x;
if (filter.child[0].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::String_type)
getFilterString(filter.child[0], x, row, stringindex);
else if (filter.child[0].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type)
x = matchFilterTree(filter.child[0].node, filter_exists_grouppattern_resultset_record, row, stringindex);
if (x.datatype == GeneralEvaluation::FilterEvaluationMultitypeValue::literal)
{
ret_femv.datatype = GeneralEvaluation::FilterEvaluationMultitypeValue::literal;
ret_femv.str_value = x.str_value.substr(0, x.str_value.rfind('"') + 1);
return ret_femv;
}
else if (x.datatype == GeneralEvaluation::FilterEvaluationMultitypeValue::iri)
{
ret_femv.datatype = GeneralEvaluation::FilterEvaluationMultitypeValue::literal;
ret_femv.str_value = "\"" + x.str_value.substr(1, x.str_value.length() - 2) + "\"";
return ret_femv;
}
else if (x.datatype == GeneralEvaluation::FilterEvaluationMultitypeValue::xsd_string)
{
ret_femv.datatype = GeneralEvaluation::FilterEvaluationMultitypeValue::literal;
ret_femv.str_value = x.str_value;
return ret_femv;
}
else
return ret_femv;
}
if (filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_lang_type)
{
FilterEvaluationMultitypeValue x;
@ -1357,9 +1398,9 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
else if (filter.child[0].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type)
x = matchFilterTree(filter.child[0].node, filter_exists_grouppattern_resultset_record, row, stringindex);
if (x.datatype == GeneralEvaluation::FilterEvaluationMultitypeValue::simple_literal)
if (x.datatype == GeneralEvaluation::FilterEvaluationMultitypeValue::literal)
{
ret_femv.datatype = GeneralEvaluation::FilterEvaluationMultitypeValue::simple_literal;
ret_femv.datatype = GeneralEvaluation::FilterEvaluationMultitypeValue::literal;
int p = x.str_value.rfind('@');
if (p != -1)
@ -1381,14 +1422,14 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
getFilterString(filter.child[0], x, row, stringindex);
else if (filter.child[0].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type)
x = matchFilterTree(filter.child[0].node, filter_exists_grouppattern_resultset_record, row, stringindex);
if (x.datatype != GeneralEvaluation::FilterEvaluationMultitypeValue::simple_literal)
if (!x.isSimpleLiteral())
return ret_femv;
if (filter.child[1].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::String_type)
getFilterString(filter.child[1], y, row, stringindex);
else if (filter.child[1].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type)
y = matchFilterTree(filter.child[1].node, filter_exists_grouppattern_resultset_record, row, stringindex);
if (y.datatype != GeneralEvaluation::FilterEvaluationMultitypeValue::simple_literal)
if (!y.isSimpleLiteral())
return ret_femv;
ret_femv.bool_value = ((x.str_value == y.str_value) || (x.str_value.length() > 0 && y.str_value == "\"*\""));

View File

@ -45,8 +45,8 @@ class GeneralEvaluation
public:
explicit GeneralEvaluation(VSTree *_vstree, KVstore *_kvstore, StringIndex *_stringindex, TNUM* _pre2num, int _limitID_predicate, int _limitID_literal):
vstree(_vstree), kvstore(_kvstore), stringindex(_stringindex), pre2num(_pre2num), limitID_predicate(_limitID_predicate), limitID_literal(_limitID_literal), need_output_answer(false)
{
}
{
}
std::vector<std::vector<std::string> > getSPARQLQueryVarset();
@ -114,7 +114,7 @@ class GeneralEvaluation
EffectiveBooleanValue operator >= (const DateTime &x);
};
enum DataType {rdf_term, iri, simple_literal, xsd_string,
enum DataType {rdf_term, iri, literal, xsd_string,
xsd_boolean, xsd_integer, xsd_decimal, xsd_float, xsd_double,
xsd_datetime};
@ -126,10 +126,11 @@ class GeneralEvaluation
double dbl_value;
DateTime dt_value;
bool isSimpleLiteral();
void getSameNumericType (FilterEvaluationMultitypeValue &x);
FilterEvaluationMultitypeValue operator !();
FilterEvaluationMultitypeValue operator || (FilterEvaluationMultitypeValue &x);
FilterEvaluationMultitypeValue operator && (FilterEvaluationMultitypeValue &x);
void getSameNumericType (FilterEvaluationMultitypeValue &x);
FilterEvaluationMultitypeValue operator == (FilterEvaluationMultitypeValue &x);
FilterEvaluationMultitypeValue operator != (FilterEvaluationMultitypeValue &x);
FilterEvaluationMultitypeValue operator < (FilterEvaluationMultitypeValue &x);

1002
Query/QueryParser.cpp Normal file

File diff suppressed because it is too large Load Diff

53
Query/QueryParser.h Normal file
View File

@ -0,0 +1,53 @@
/*=============================================================================
# Filename: QueryParser.h
# Author: Jiaqi, Chen
# Mail: chenjiaqi93@163.com
# Last Modified: 2016-07-14
# Description:
=============================================================================*/
#ifndef QUERYPARSER_H_
#define QUERYPARSER_H_
#include "../Util/Util.h"
#include "../Query/QueryTree.h"
#include "SparqlParser.h"
#include "SparqlLexer.h"
class QueryParser{
private:
std::map<std::string,std::string> _prefix_map;
int printNode(pANTLR3_BASE_TREE node,int dep = 0);
void parseWorkload(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parseQuery(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parsePrologue(pANTLR3_BASE_TREE node);
void parsePrefix(pANTLR3_BASE_TREE node);
void replacePrefix(std::string &str);
void parseSelectClause(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parseSelectVar(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parseSelectAggregateFunction(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parseGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
void parsePattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
void parseOptionalOrMinus(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
void parseUnion(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
void parseFilter(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
void parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern, QueryTree::GroupPattern::FilterTreeNode &filter);
void parseVarInExpressionList(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern, QueryTree::GroupPattern::FilterTreeNode &filter);
void parseExistsGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern, QueryTree::GroupPattern::FilterTreeNode &filter);
void parseOrderBy(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parseString(pANTLR3_BASE_TREE node, std::string &str, int dep);
void parseUpdate(pANTLR3_BASE_TREE node, QueryTree &querytree);
void parseTripleTemplate(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
void parseModify(pANTLR3_BASE_TREE node, QueryTree &querytree);
public:
QueryParser();
void SPARQLParse(const std::string &query, QueryTree &querytree);
};
#endif /* QUERYPARSER_H_ */

View File

@ -25,20 +25,24 @@ void QueryTree::GroupPattern::FilterTreeNode::print(vector<GroupPattern> &exist_
{
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Not_type) printf("!");
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type) printf("REGEX");
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_str_type) printf("STR");
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_lang_type) printf("LANG");
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_langmatches_type) printf("LANGMATCHES");
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_bound_type) printf("BOUND");
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_in_type)
{
printf("(");
if (this->child[0].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::String_type) printf("%s", this->child[0].arg.c_str());
if (this->child[0].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type) this->child[0].node.print(exist_grouppatterns, dep);
printf(" IN (");
for (int i = 1; i < (int)this->child.size(); i++)
{
if (i != 1) printf(" , ");
if (this->child[i].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::String_type) printf("%s", this->child[i].arg.c_str());
if (this->child[i].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type) this->child[i].node.print(exist_grouppatterns, dep);
}
printf(")");
printf("))");
return;
}

View File

@ -403,7 +403,10 @@ Server::importRDF(std::string _db_name, std::string _ac_name, std::string _rdf_p
//if (this->database != NULL && this->database->getName() != _db_name)
if (this->database != NULL)
{
delete this->database;
//delete this->database;
//NOTICE:if there is a db loaded, we should not build directly, tell user to unload it first
_ret_msg = "please unload the current db first: " + this->database->getName();
return false;
}
this->database = new Database(_db_name);

View File

@ -10,7 +10,10 @@
using namespace std;
int LRUCache::DEFAULT_CAPACITY = 1 * 1000 * 1000;
//NOTICE: we aim to support 1 billion triples in a single machine, whose entity num
//can not exceed the 2 billion limit, and the maxium VNODE num is 2000000000/100=20000000=20M
int LRUCache::DEFAULT_CAPACITY = 20000000;
//int LRUCache::DEFAULT_CAPACITY = 1 * 1000 * 1000;
LRUCache::LRUCache(int _capacity)
{
@ -66,8 +69,9 @@ bool LRUCache::loadCache(string _filePath)
return false;
}
//BETTER:increase this parameter if memory allows
int defaultLoadSize = this->capacity / 2;
//NOTICE:here we set it to the maxium, to ensure all VNODE in memory
int defaultLoadSize = this->capacity;
//int defaultLoadSize = this->capacity / 2;
size_t vNodeSize = sizeof(VNode);
int flag = 0;
@ -137,6 +141,8 @@ bool LRUCache::createCache(string _filePath)
return true;
}
//DEBUG+WARN:the memory-disk swap strategy exists serious bugs, however, we do not really use this startegy now!!!
//
//set the key(node's file line) and value(node's pointer). if the key exists now, the value of this key will be overwritten.
bool LRUCache::set(int _key, VNode * _value)
{
@ -158,6 +164,7 @@ bool LRUCache::set(int _key, VNode * _value)
// if the cache is full, should swap out the least recently used one to hard disk.
else
{
cout<<"memory-disk swap hadppened in VSTree - LRUCache"<<endl;
// write out and free the memory of the least recently used one.
int pos = this->next[LRUCache::START_INDEX];
this->writeOut(pos, this->keys[pos]);
@ -222,6 +229,7 @@ VNode* LRUCache::get(int _key)
// if the memory pool is full now, should swap out the least recently used one, and swap in the required value.
else
{
cout<<"memory-disk swap hadppened in VSTree - LRUCache"<<endl;
int pos = this->next[LRUCache::START_INDEX];
this->writeOut(pos, this->keys[pos]);
this->freeElem(pos);

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@ -26,6 +26,13 @@
#TODO:the dependences are not complete!
#TODO: parallel -pthread
#TODO: judge and decide using which program
#CC=$(shell which clang 2>/dev/null || which gcc)
#ccache, readline, gcov lcov
#http://blog.csdn.net/u012421852/article/details/52138960
#compile parameters
CC = ccache g++
@ -401,7 +408,7 @@ dist: clean
tarball:
tar -czvf devGstore.tar.gz api bin lib tools .debug .tmp .objs test docs data makefile \
Main Database KVstore Util Query Signature VSTree Parser Server README.md init.conf NOTES.md StringIndex
Main Database KVstore Util Query Signature VSTree Parser Server README.md init.conf NOTES.md StringIndex Coverage
APIexample: $(api_cpp) $(api_java)
$(MAKE) -C api/cpp/example