change memory-disk swap to all-memory in VSTree::LRUCache;
deal with extra () in sparql; support long string with multiple lines in sparql; support str(IRI) in sparql;
This commit is contained in:
parent
74598bf66e
commit
e3b09c6529
|
@ -2328,8 +2328,17 @@ Database::remove(const TripleWithObjType* _triples, int _triple_num)
|
|||
{
|
||||
string sub = _triples[i].getSubject();
|
||||
subid = this->kvstore->getIDByEntity(sub);
|
||||
if(subid == -1)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
string pre = _triples[i].getPredicate();
|
||||
preid = this->kvstore->getIDByPredicate(pre);
|
||||
if(preid == -1)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
is_obj_entity = _triples[i].isObjEntity();
|
||||
string obj = _triples[i].getObject();
|
||||
|
@ -2341,11 +2350,15 @@ Database::remove(const TripleWithObjType* _triples, int _triple_num)
|
|||
{
|
||||
objid = this->kvstore->getIDByLiteral(obj);
|
||||
}
|
||||
|
||||
if (subid == -1 || preid == -1 || objid == -1)
|
||||
if(objid == -1)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
//if (subid == -1 || preid == -1 || objid == -1)
|
||||
//{
|
||||
//continue;
|
||||
//}
|
||||
bool _exist_triple = this->exist_triple(subid, preid, objid);
|
||||
if (!_exist_triple)
|
||||
{
|
||||
|
|
|
@ -480,6 +480,7 @@ Join::pre_var_handler()
|
|||
//we need to copy only the selected ones in copyToResult
|
||||
int size = valid_ans.size();
|
||||
|
||||
//BETTER:only add pre vars which are selected or linked with satellite
|
||||
if (size > 0)
|
||||
{
|
||||
//if(!is_selected)
|
||||
|
|
|
@ -68,7 +68,14 @@ Strategy::handle(SPARQLquery& _query, ResultFilter* _result_filter)
|
|||
int selected_pre_var_num = (*iter)->getSelectedPreVarNum();
|
||||
int selected_var_num = (*iter)->getSelectVarNum();
|
||||
|
||||
if ((*iter)->getTripleNum() == 1 && pre_varNum == 1)
|
||||
//NOTICE: special case - query vertices only connected via same variables
|
||||
//all constant triples will be viewed as unconnected, if a triple has no variable,
|
||||
//then this triple is a BGP(no other triples in this BGP)
|
||||
if(total_num == 0 && pre_varNum == 0)
|
||||
{
|
||||
this->method = 5;
|
||||
}
|
||||
else if ((*iter)->getTripleNum() == 1 && pre_varNum == 1)
|
||||
{
|
||||
this->method = 4;
|
||||
}
|
||||
|
@ -122,6 +129,9 @@ Strategy::handle(SPARQLquery& _query, ResultFilter* _result_filter)
|
|||
case 4:
|
||||
this->handler4(*iter, result_list);
|
||||
break;
|
||||
case 5:
|
||||
this->handler5(*iter, result_list);
|
||||
break;
|
||||
default:
|
||||
cout << "not support this method" << endl;
|
||||
|
||||
|
@ -166,6 +176,7 @@ Strategy::handler0(BasicQuery* _bq, vector<int*>& _result_list, ResultFilter* _r
|
|||
|
||||
long tv_handle = Util::get_cur_time();
|
||||
int varNum = _bq->getVarNum(); //the num of vars needing to be joined
|
||||
//TODO:parallel by pthread
|
||||
for (int i = 0; i < varNum; ++i)
|
||||
{
|
||||
if (_bq->if_need_retrieve(i) == false)
|
||||
|
@ -186,7 +197,7 @@ Strategy::handler0(BasicQuery* _bq, vector<int*>& _result_list, ResultFilter* _r
|
|||
}
|
||||
}
|
||||
|
||||
//TODO:end directly if one is empty!
|
||||
//BETTER:end directly if one is empty!
|
||||
|
||||
long tv_retrieve = Util::get_cur_time();
|
||||
cout << "after Retrieve, used " << (tv_retrieve - tv_handle) << "ms." << endl;
|
||||
|
@ -457,3 +468,46 @@ Strategy::handler4(BasicQuery* _bq, vector<int*>& _result_list)
|
|||
delete[] id_list;
|
||||
}
|
||||
|
||||
//TODO:if any constants in a query are not found in kvstore, then this BGP should end to speed up the processing
|
||||
|
||||
void
|
||||
Strategy::handler5(BasicQuery* _bq, vector<int*>& _result_list)
|
||||
{
|
||||
cout<<"Special Case: consider constant triple"<<endl;
|
||||
Triple triple = _bq->getTriple(0);
|
||||
_result_list.clear();
|
||||
|
||||
int subid = this->kvstore->getIDByEntity(triple.subject);
|
||||
if(subid == -1) //not found
|
||||
{
|
||||
return;
|
||||
}
|
||||
int preid = this->kvstore->getIDByPredicate(triple.predicate);
|
||||
if(preid == -1) //not found
|
||||
{
|
||||
return;
|
||||
}
|
||||
int objid = this->kvstore->getIDByEntity(triple.object);
|
||||
if(objid == -1)
|
||||
{
|
||||
objid = this->kvstore->getIDByLiteral(triple.object);
|
||||
}
|
||||
if(objid == -1)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
int* id_list = NULL;
|
||||
int id_list_len = 0;
|
||||
(this->kvstore)->getobjIDlistBysubIDpreID(subid, preid, id_list, id_list_len);
|
||||
if (Util::bsearch_int_uporder(objid, id_list, id_list_len) != -1)
|
||||
{
|
||||
int* record = new int[3];
|
||||
record[0] = subid;
|
||||
record[1] = preid;
|
||||
record[2] = objid;
|
||||
_result_list.push_back(record);
|
||||
}
|
||||
delete[] id_list;
|
||||
}
|
||||
|
||||
|
|
|
@ -41,6 +41,7 @@ private:
|
|||
void handler2(BasicQuery*, vector<int*>&);
|
||||
void handler3(BasicQuery*, vector<int*>&);
|
||||
void handler4(BasicQuery*, vector<int*>&);
|
||||
void handler5(BasicQuery*, vector<int*>&);
|
||||
//QueryHandler *dispatch;
|
||||
//void prepare_handler();
|
||||
};
|
||||
|
|
|
@ -20,7 +20,7 @@ main(int argc, char * argv[])
|
|||
#ifdef DEBUG
|
||||
Util util;
|
||||
#endif
|
||||
if(argc == 1) //./gbuild
|
||||
if(argc < 3) //./gbuild
|
||||
{
|
||||
//output help info here
|
||||
cout << "the usage of gbuild: " << endl;
|
||||
|
|
80
NOTES.md
80
NOTES.md
|
@ -5,22 +5,61 @@
|
|||
方式是控制Util/Util.h中OUTPUT_QUERY_RESULT宏的开启
|
||||
|
||||
在使用gserver时,不能在数据库没有unload时再用gbuild或其他命令修改数据库,仅限于C/S模式
|
||||
将IRC聊天放到gstore文档上,freenode #gStore
|
||||
|
||||
# 推广
|
||||
|
||||
必须建立一个官方网站,可以展示下团队、demo,需要建立社区/论坛并维护
|
||||
另外要有桌面应用或者网页应用,以可视化的方式操作数据库,类似virtuoso和neo4j那种
|
||||
server 118.89.115.42 gstore-pku.com
|
||||
|
||||
自己的网站可以用实验室的服务器,gstore网站最好用云服务,图个稳定
|
||||
但用实验室主机,备案时是否更麻烦?得以企业为单位,而且解析是否更麻烦?
|
||||
gstore网站中的demo应用的主体可以放在实验室主机上,至少是gstore数据库应抽离出来,但若实验室主机不开外网,应如何而配置代理?
|
||||
demo应用全部外链,具体服务放在实验室公开的主机上,通过ip:port连接
|
||||
|
||||
---
|
||||
|
||||
# 并行策略- 线程控制模块
|
||||
|
||||
不宜使用并行框架,可使用C的pthread,boost的thread库,或者启用C++11,gcc编译器需要高于4.8.1才能完整支持C++11
|
||||
但boost的安装更麻烦,而且没有C++11安全,所以综合考虑,还是选择C++11
|
||||
而且现在C++14标准都已经出来,11标准也已经得到比较广的应用了,而像openmp这种高级并行框架也是不合适的,因为不方便统筹规划每一线程
|
||||
但如果只在 Linux 下编程,不用考虑平台兼容性,那么 C++11 的 thread 的附加值几乎为零(我认为它过度设计了,同时损失了一些功能),你自己把 Pthreads 封装成一个简单好用的线程库只需要两三百行代码,用十几个 pthreads 函数实现 4 个 class:thread、mutex、lock_guard、condvar,而且 RAII 的好处也享受到了。
|
||||
Linux上最好还是使用POSIX标准的pthread库,由glibc提供
|
||||
编译时采用-pthread选项
|
||||
|
||||
目前B+树本身还无法并行,哪怕只有查询,也可能因为内外存交换导致并行出错。
|
||||
可以让陈语嫣先做预加载,并进行两表分块并行的处理,之后再使B+树支持多线程
|
||||
目前vstree没有内外存交换,只读时完全可以支持多线程
|
||||
但实际应用中必然存在着读写可能和写写可能,后期必须要引入事务
|
||||
对于读写问题,应该使用读写锁,采用强读者的模式,即优先将锁分配给读者
|
||||
另一种方式是不用锁,为索引建立溢出页,这是一种比较好的处理并行的方式
|
||||
|
||||
|
||||
# TODO
|
||||
|
||||
删光了,树叶应该在,包括kv和vs等
|
||||
清空后,LRUcache::flush 没法打开文件
|
||||
|
||||
join缓存
|
||||
彭鹏数据集的两个bug
|
||||
成组插删还没完全支持和测试,需要修改KVstore.cpp
|
||||
|
||||
要在单机支持到10亿triple,最坏情况下最多有20亿entity和20亿literal,目前的编号方式是不行的
|
||||
可考虑将entity和literal彻底分开,也可以将int扩展为unsigned,但这样不好判断无效ID
|
||||
|
||||
gstore后续需要开发的地方:
|
||||
数据库连接池 保持连接而不是每次都用socket
|
||||
事务操作
|
||||
安全备份 数据库的多版本备份,动态删除
|
||||
多领域多库解决方案。
|
||||
分页查询
|
||||
|
||||
任务分配:
|
||||
---
|
||||
JSON格式传输(陈佳棋)
|
||||
数据库连接池 保持连接而不是每次都用socket(http?)
|
||||
分页查询(先将整个查询结果缓存,需要考虑内外存交换)
|
||||
陈佳棋的任务:s和p对应同一个实体,应该先重命名,再过滤。还有一种情况是两者只是名字相同,实则并无关系
|
||||
高阶谓词逻辑 <type> <type> <predicate>
|
||||
---
|
||||
陈佳棋找人负责:
|
||||
模仿海量图大作业,基于gStore开发一个社交应用,要求可以批量导入且实时查询
|
||||
|
@ -28,6 +67,7 @@ JSON格式传输(陈佳棋)
|
|||
多查询优化
|
||||
---
|
||||
王力博:
|
||||
安全备份 数据库的多版本备份,动态删除
|
||||
gserver for multiple users, not load db for each connection, but get the same db pointer
|
||||
assign the pointer to the user who require this db, and ensure that each db is just kept once in memory
|
||||
有什么办法去检测一个db是否存在呢?(首先要支持导入多个数据库)
|
||||
|
@ -40,22 +80,33 @@ or the reason maybe gserver still dealing with the previous job, then a new conn
|
|||
彭鹏师兄的数据集bug
|
||||
优化谓词查询,谓词少而entity/literal多,所以先过滤得到谓词的解是一种可以考虑的策略
|
||||
以谓词为节点,以s/o为信息,来过滤得到谓词的结果
|
||||
需要一个查询计划进行选择,可能有些?p应该先做,有些?s/?o应该先做
|
||||
---
|
||||
陈语嫣,李荆
|
||||
陈语嫣:
|
||||
网站设计以及和外包方的联系
|
||||
之后用pthread将join_two函数内部的拼接并行化,先实现一个最基本的版本即可
|
||||
---
|
||||
张雨的任务:单起点单终点的正则表达式路径问题,如果是多起点多终点?
|
||||
下学期的任务:提取相关联的几个方向写论文
|
||||
将IRC聊天放到gstore文档上,freenode #gStore
|
||||
|
||||
---
|
||||
WARN:B+树删除时,向旁边兄弟借或者合并,要注意兄弟的定义,应该是同一父节点才对!
|
||||
考虑使用 sigmod2016 那篇图同构的论文方法,实现一套join
|
||||
但那个是基于路径的,BFS和索引连接的思想值得借用,可作为另外一套join方法
|
||||
@hulin
|
||||
叶子节点是否也可以先过滤先join,或者说非核心节点,sparql查询图中算度数时是否不要考虑常量?
|
||||
新的方法:基于谓词的频率动态调整顺序,但中间结果的保留是另一个问题,是用中间表还是索引连接,是否需要multiJoin?
|
||||
另外超级点和超级边的概念也需要被提出
|
||||
---
|
||||
李荆的任务:
|
||||
考虑出入度数,编码为1的个数?应该不用,在编码的邻居信息中能够得到体现。
|
||||
第二步编码应该更长,点和边对应着放在一起。按出入边分区,一步点,二步边分区和二步点。
|
||||
对一个节点保留其最长链信息没啥用,因为数据图基本是连通的,最长链就是图的最长链。
|
||||
多步编码不应分开,而应在编码逐一扩展,第二步可以依旧保留详细信息,最好用更长编码,因为信息更多。
|
||||
可能有相同的谓词对应多个邻居,同样的谓词只要保留一个即可,不同邻居可以重合。
|
||||
第三步可以只记谓词,第四步可以只记边的出入向。
|
||||
vstree并行过滤
|
||||
---
|
||||
实现其他的join思路,比如基于过滤效果
|
||||
|
||||
pthread写多线程
|
||||
|
||||
如何在preFilter和join的开销之间做平衡
|
||||
preFilter中的限制条件是否过于严格
|
||||
|
||||
|
@ -68,18 +119,7 @@ for virtuoso, better to reset each time and the given configure file(need to res
|
|||
load过程先导入满足内存的内容,或者先来几轮搜索但不输出结果,避免开头的查询要读磁盘。vstree直接全导入内存?
|
||||
先完成合并测试,再测lubm500M和bsbm500M -- 90 server
|
||||
|
||||
两表拼接时多线程分块join
|
||||
jemalloc??
|
||||
vstree并行分块
|
||||
|
||||
|
||||
@lijing
|
||||
考虑出入度数,编码为1的个数?应该不用,在编码的邻居信息中能够得到体现。
|
||||
第二步编码应该更长,点和边对应着放在一起。按出入边分区,一步点,二步边分区和二步点。
|
||||
对一个节点保留其最长链信息没啥用,因为数据图基本是连通的,最长链就是图的最长链。
|
||||
多步编码不应分开,而应在编码逐一扩展,第二步可以依旧保留详细信息,最好用更长编码,因为信息更多。
|
||||
可能有相同的谓词对应多个邻居,同样的谓词只要保留一个即可,不同邻居可以重合。
|
||||
第三步可以只记谓词,第四步可以只记边的出入向。
|
||||
|
||||
各版本对比的表格中应加一列几何平均数,现实中大多数查询是简单查询,最好还有一个平均数,对应着把数据做归一化后求和
|
||||
dbpedia q6现在应该统计并对比结果了,包含在测试结果中
|
||||
|
|
|
@ -306,6 +306,46 @@ GeneralEvaluation::FilterEvaluationMultitypeValue::EffectiveBooleanValue
|
|||
}
|
||||
|
||||
|
||||
bool GeneralEvaluation::FilterEvaluationMultitypeValue::isSimpleLiteral()
|
||||
{
|
||||
if (this->datatype == literal)
|
||||
{
|
||||
int length = this->str_value.length();
|
||||
if (length >= 2 && this->str_value[0] == '"' && this->str_value[length - 1] == '"')
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void GeneralEvaluation::FilterEvaluationMultitypeValue::getSameNumericType (FilterEvaluationMultitypeValue &x)
|
||||
{
|
||||
DataType to_type = max(this->datatype, x.datatype);
|
||||
|
||||
if (this->datatype == xsd_integer && to_type == xsd_decimal)
|
||||
this->flt_value = this->int_value;
|
||||
if (this->datatype == xsd_integer && to_type == xsd_float)
|
||||
this->flt_value = this->int_value;
|
||||
if (this->datatype == xsd_integer && to_type == xsd_double)
|
||||
this->dbl_value = this->int_value;
|
||||
if (this->datatype == xsd_decimal && to_type == xsd_double)
|
||||
this->dbl_value = this->flt_value;
|
||||
if (this->datatype == xsd_float && to_type == xsd_double)
|
||||
this->dbl_value = this->flt_value;
|
||||
this->datatype = to_type;
|
||||
|
||||
if (x.datatype == xsd_integer && to_type == xsd_decimal)
|
||||
x.flt_value = x.int_value;
|
||||
if (x.datatype == xsd_integer && to_type == xsd_float)
|
||||
x.flt_value = x.int_value;
|
||||
if (x.datatype == xsd_integer && to_type == xsd_double)
|
||||
x.dbl_value = x.int_value;
|
||||
if (x.datatype == xsd_decimal && to_type == xsd_double)
|
||||
x.dbl_value = x.flt_value;
|
||||
if (x.datatype == xsd_float && to_type == xsd_double)
|
||||
x.dbl_value = x.flt_value;
|
||||
x.datatype = to_type;
|
||||
}
|
||||
|
||||
GeneralEvaluation::FilterEvaluationMultitypeValue
|
||||
GeneralEvaluation::FilterEvaluationMultitypeValue::operator !()
|
||||
{
|
||||
|
@ -348,35 +388,6 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
|
|||
return ret_femv;
|
||||
}
|
||||
|
||||
void GeneralEvaluation::FilterEvaluationMultitypeValue::getSameNumericType (FilterEvaluationMultitypeValue &x)
|
||||
{
|
||||
DataType to_type = max(this->datatype, x.datatype);
|
||||
|
||||
if (this->datatype == xsd_integer && to_type == xsd_decimal)
|
||||
this->flt_value = this->int_value;
|
||||
if (this->datatype == xsd_integer && to_type == xsd_float)
|
||||
this->flt_value = this->int_value;
|
||||
if (this->datatype == xsd_integer && to_type == xsd_double)
|
||||
this->dbl_value = this->int_value;
|
||||
if (this->datatype == xsd_decimal && to_type == xsd_double)
|
||||
this->dbl_value = this->flt_value;
|
||||
if (this->datatype == xsd_float && to_type == xsd_double)
|
||||
this->dbl_value = this->flt_value;
|
||||
this->datatype = to_type;
|
||||
|
||||
if (x.datatype == xsd_integer && to_type == xsd_decimal)
|
||||
x.flt_value = x.int_value;
|
||||
if (x.datatype == xsd_integer && to_type == xsd_float)
|
||||
x.flt_value = x.int_value;
|
||||
if (x.datatype == xsd_integer && to_type == xsd_double)
|
||||
x.dbl_value = x.int_value;
|
||||
if (x.datatype == xsd_decimal && to_type == xsd_double)
|
||||
x.dbl_value = x.flt_value;
|
||||
if (x.datatype == xsd_float && to_type == xsd_double)
|
||||
x.dbl_value = x.flt_value;
|
||||
x.datatype = to_type;
|
||||
}
|
||||
|
||||
GeneralEvaluation::FilterEvaluationMultitypeValue
|
||||
GeneralEvaluation::FilterEvaluationMultitypeValue::operator == (FilterEvaluationMultitypeValue &x)
|
||||
{
|
||||
|
@ -406,7 +417,7 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
|
|||
return ret_femv;
|
||||
}
|
||||
|
||||
if (this->datatype == simple_literal && x.datatype == simple_literal)
|
||||
if (this->isSimpleLiteral() && x.isSimpleLiteral())
|
||||
{
|
||||
ret_femv.bool_value = (this->str_value == x.str_value);
|
||||
return ret_femv;
|
||||
|
@ -457,7 +468,7 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
|
|||
return ret_femv;
|
||||
}
|
||||
|
||||
if (this->datatype == simple_literal && x.datatype == simple_literal)
|
||||
if (this->isSimpleLiteral() && x.isSimpleLiteral())
|
||||
{
|
||||
ret_femv.bool_value = (this->str_value != x.str_value);
|
||||
return ret_femv;
|
||||
|
@ -508,7 +519,7 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
|
|||
return ret_femv;
|
||||
}
|
||||
|
||||
if (this->datatype == simple_literal && x.datatype == simple_literal)
|
||||
if (this->isSimpleLiteral() && x.isSimpleLiteral())
|
||||
{
|
||||
ret_femv.bool_value = (this->str_value < x.str_value);
|
||||
return ret_femv;
|
||||
|
@ -558,7 +569,7 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
|
|||
return ret_femv;
|
||||
}
|
||||
|
||||
if (this->datatype == simple_literal && x.datatype == simple_literal)
|
||||
if (this->isSimpleLiteral() && x.isSimpleLiteral())
|
||||
{
|
||||
ret_femv.bool_value = (this->str_value <= x.str_value);
|
||||
return ret_femv;
|
||||
|
@ -608,7 +619,7 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
|
|||
return ret_femv;
|
||||
}
|
||||
|
||||
if (this->datatype == simple_literal && x.datatype == simple_literal)
|
||||
if (this->isSimpleLiteral() && x.isSimpleLiteral())
|
||||
{
|
||||
ret_femv.bool_value = (this->str_value > x.str_value);
|
||||
return ret_femv;
|
||||
|
@ -658,7 +669,7 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
|
|||
return ret_femv;
|
||||
}
|
||||
|
||||
if (this->datatype == simple_literal && x.datatype == simple_literal)
|
||||
if (this->isSimpleLiteral() && x.isSimpleLiteral())
|
||||
{
|
||||
ret_femv.bool_value = (this->str_value >= x.str_value);
|
||||
return ret_femv;
|
||||
|
@ -970,7 +981,7 @@ void GeneralEvaluation::TempResult::mapFilterTree2Varset(QueryTree::GroupPattern
|
|||
else filter.child[1].isel = false;
|
||||
}
|
||||
}
|
||||
else if (filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type || filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_lang_type || filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_langmatches_type || filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_bound_type || filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_in_type)
|
||||
else if (filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type || filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_str_type || filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_lang_type || filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_langmatches_type || filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_bound_type || filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_in_type)
|
||||
{
|
||||
if (filter.child[0].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type)
|
||||
mapFilterTree2Varset(filter.child[0].node, v, entity_literal_varset);
|
||||
|
@ -1024,13 +1035,6 @@ void GeneralEvaluation::TempResult::getFilterString(QueryTree::GroupPattern::Fil
|
|||
}
|
||||
else femv.term_value = child.arg;
|
||||
|
||||
//' to "
|
||||
if (femv.term_value[0] == '\'')
|
||||
{
|
||||
femv.term_value[0] = '"';
|
||||
femv.term_value[femv.term_value.rfind('\'')] = '"';
|
||||
}
|
||||
|
||||
if (femv.term_value[0] == '<' && femv.term_value[femv.term_value.length() - 1] == '>')
|
||||
{
|
||||
femv.datatype = GeneralEvaluation::FilterEvaluationMultitypeValue::iri;
|
||||
|
@ -1039,7 +1043,7 @@ void GeneralEvaluation::TempResult::getFilterString(QueryTree::GroupPattern::Fil
|
|||
|
||||
if (femv.term_value[0] == '"' && femv.term_value.find("\"^^<") == -1 && femv.term_value[femv.term_value.length() - 1] != '>' )
|
||||
{
|
||||
femv.datatype = GeneralEvaluation::FilterEvaluationMultitypeValue::simple_literal;
|
||||
femv.datatype = GeneralEvaluation::FilterEvaluationMultitypeValue::literal;
|
||||
femv.str_value = femv.term_value;
|
||||
}
|
||||
|
||||
|
@ -1304,7 +1308,7 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
|
|||
getFilterString(filter.child[0], x, row, stringindex);
|
||||
else if (filter.child[0].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type)
|
||||
x = matchFilterTree(filter.child[0].node, filter_exists_grouppattern_resultset_record, row, stringindex);
|
||||
if (x.datatype == GeneralEvaluation::FilterEvaluationMultitypeValue::simple_literal)
|
||||
if (x.datatype == GeneralEvaluation::FilterEvaluationMultitypeValue::literal || x.datatype == GeneralEvaluation::FilterEvaluationMultitypeValue::xsd_string)
|
||||
{
|
||||
t = x.str_value;
|
||||
t = t.substr(1, t.rfind('"') - 1);
|
||||
|
@ -1316,7 +1320,7 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
|
|||
getFilterString(filter.child[1], y, row, stringindex);
|
||||
else if (filter.child[1].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type)
|
||||
y = matchFilterTree(filter.child[1].node, filter_exists_grouppattern_resultset_record, row, stringindex);
|
||||
if (y.datatype == GeneralEvaluation::FilterEvaluationMultitypeValue::simple_literal)
|
||||
if (y.isSimpleLiteral())
|
||||
{
|
||||
p = y.str_value;
|
||||
p = p.substr(1, p.rfind('"') - 1);
|
||||
|
@ -1330,7 +1334,7 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
|
|||
getFilterString(filter.child[2], z, row, stringindex);
|
||||
else if (filter.child[2].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type)
|
||||
z = matchFilterTree(filter.child[2].node, filter_exists_grouppattern_resultset_record, row, stringindex);
|
||||
if (z.datatype == GeneralEvaluation::FilterEvaluationMultitypeValue::simple_literal)
|
||||
if (z.isSimpleLiteral())
|
||||
{
|
||||
f = z.str_value;
|
||||
f = f.substr(1, f.rfind('"') - 1);
|
||||
|
@ -1348,6 +1352,43 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
|
|||
return ret_femv;
|
||||
}
|
||||
|
||||
if (filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_str_type)
|
||||
{
|
||||
FilterEvaluationMultitypeValue x;
|
||||
|
||||
if (filter.child[0].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::String_type)
|
||||
getFilterString(filter.child[0], x, row, stringindex);
|
||||
else if (filter.child[0].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type)
|
||||
x = matchFilterTree(filter.child[0].node, filter_exists_grouppattern_resultset_record, row, stringindex);
|
||||
|
||||
if (x.datatype == GeneralEvaluation::FilterEvaluationMultitypeValue::literal)
|
||||
{
|
||||
ret_femv.datatype = GeneralEvaluation::FilterEvaluationMultitypeValue::literal;
|
||||
|
||||
ret_femv.str_value = x.str_value.substr(0, x.str_value.rfind('"') + 1);
|
||||
|
||||
return ret_femv;
|
||||
}
|
||||
else if (x.datatype == GeneralEvaluation::FilterEvaluationMultitypeValue::iri)
|
||||
{
|
||||
ret_femv.datatype = GeneralEvaluation::FilterEvaluationMultitypeValue::literal;
|
||||
|
||||
ret_femv.str_value = "\"" + x.str_value.substr(1, x.str_value.length() - 2) + "\"";
|
||||
|
||||
return ret_femv;
|
||||
}
|
||||
else if (x.datatype == GeneralEvaluation::FilterEvaluationMultitypeValue::xsd_string)
|
||||
{
|
||||
ret_femv.datatype = GeneralEvaluation::FilterEvaluationMultitypeValue::literal;
|
||||
|
||||
ret_femv.str_value = x.str_value;
|
||||
|
||||
return ret_femv;
|
||||
}
|
||||
else
|
||||
return ret_femv;
|
||||
}
|
||||
|
||||
if (filter.oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_lang_type)
|
||||
{
|
||||
FilterEvaluationMultitypeValue x;
|
||||
|
@ -1357,9 +1398,9 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
|
|||
else if (filter.child[0].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type)
|
||||
x = matchFilterTree(filter.child[0].node, filter_exists_grouppattern_resultset_record, row, stringindex);
|
||||
|
||||
if (x.datatype == GeneralEvaluation::FilterEvaluationMultitypeValue::simple_literal)
|
||||
if (x.datatype == GeneralEvaluation::FilterEvaluationMultitypeValue::literal)
|
||||
{
|
||||
ret_femv.datatype = GeneralEvaluation::FilterEvaluationMultitypeValue::simple_literal;
|
||||
ret_femv.datatype = GeneralEvaluation::FilterEvaluationMultitypeValue::literal;
|
||||
|
||||
int p = x.str_value.rfind('@');
|
||||
if (p != -1)
|
||||
|
@ -1381,14 +1422,14 @@ GeneralEvaluation::FilterEvaluationMultitypeValue
|
|||
getFilterString(filter.child[0], x, row, stringindex);
|
||||
else if (filter.child[0].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type)
|
||||
x = matchFilterTree(filter.child[0].node, filter_exists_grouppattern_resultset_record, row, stringindex);
|
||||
if (x.datatype != GeneralEvaluation::FilterEvaluationMultitypeValue::simple_literal)
|
||||
if (!x.isSimpleLiteral())
|
||||
return ret_femv;
|
||||
|
||||
if (filter.child[1].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::String_type)
|
||||
getFilterString(filter.child[1], y, row, stringindex);
|
||||
else if (filter.child[1].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type)
|
||||
y = matchFilterTree(filter.child[1].node, filter_exists_grouppattern_resultset_record, row, stringindex);
|
||||
if (y.datatype != GeneralEvaluation::FilterEvaluationMultitypeValue::simple_literal)
|
||||
if (!y.isSimpleLiteral())
|
||||
return ret_femv;
|
||||
|
||||
ret_femv.bool_value = ((x.str_value == y.str_value) || (x.str_value.length() > 0 && y.str_value == "\"*\""));
|
||||
|
|
|
@ -114,7 +114,7 @@ class GeneralEvaluation
|
|||
EffectiveBooleanValue operator >= (const DateTime &x);
|
||||
};
|
||||
|
||||
enum DataType {rdf_term, iri, simple_literal, xsd_string,
|
||||
enum DataType {rdf_term, iri, literal, xsd_string,
|
||||
xsd_boolean, xsd_integer, xsd_decimal, xsd_float, xsd_double,
|
||||
xsd_datetime};
|
||||
|
||||
|
@ -126,10 +126,11 @@ class GeneralEvaluation
|
|||
double dbl_value;
|
||||
DateTime dt_value;
|
||||
|
||||
bool isSimpleLiteral();
|
||||
void getSameNumericType (FilterEvaluationMultitypeValue &x);
|
||||
FilterEvaluationMultitypeValue operator !();
|
||||
FilterEvaluationMultitypeValue operator || (FilterEvaluationMultitypeValue &x);
|
||||
FilterEvaluationMultitypeValue operator && (FilterEvaluationMultitypeValue &x);
|
||||
void getSameNumericType (FilterEvaluationMultitypeValue &x);
|
||||
FilterEvaluationMultitypeValue operator == (FilterEvaluationMultitypeValue &x);
|
||||
FilterEvaluationMultitypeValue operator != (FilterEvaluationMultitypeValue &x);
|
||||
FilterEvaluationMultitypeValue operator < (FilterEvaluationMultitypeValue &x);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,53 @@
|
|||
/*=============================================================================
|
||||
# Filename: QueryParser.h
|
||||
# Author: Jiaqi, Chen
|
||||
# Mail: chenjiaqi93@163.com
|
||||
# Last Modified: 2016-07-14
|
||||
# Description:
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef QUERYPARSER_H_
|
||||
#define QUERYPARSER_H_
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Query/QueryTree.h"
|
||||
#include "SparqlParser.h"
|
||||
#include "SparqlLexer.h"
|
||||
|
||||
class QueryParser{
|
||||
private:
|
||||
std::map<std::string,std::string> _prefix_map;
|
||||
|
||||
int printNode(pANTLR3_BASE_TREE node,int dep = 0);
|
||||
|
||||
void parseWorkload(pANTLR3_BASE_TREE node, QueryTree &querytree);
|
||||
|
||||
void parseQuery(pANTLR3_BASE_TREE node, QueryTree &querytree);
|
||||
void parsePrologue(pANTLR3_BASE_TREE node);
|
||||
void parsePrefix(pANTLR3_BASE_TREE node);
|
||||
void replacePrefix(std::string &str);
|
||||
void parseSelectClause(pANTLR3_BASE_TREE node, QueryTree &querytree);
|
||||
void parseSelectVar(pANTLR3_BASE_TREE node, QueryTree &querytree);
|
||||
void parseSelectAggregateFunction(pANTLR3_BASE_TREE node, QueryTree &querytree);
|
||||
void parseGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
|
||||
void parsePattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
|
||||
void parseOptionalOrMinus(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
|
||||
void parseUnion(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
|
||||
void parseFilter(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
|
||||
void parseFilterTree(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern, QueryTree::GroupPattern::FilterTreeNode &filter);
|
||||
void parseVarInExpressionList(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern, QueryTree::GroupPattern::FilterTreeNode &filter);
|
||||
void parseExistsGroupPattern(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern, QueryTree::GroupPattern::FilterTreeNode &filter);
|
||||
void parseOrderBy(pANTLR3_BASE_TREE node, QueryTree &querytree);
|
||||
void parseString(pANTLR3_BASE_TREE node, std::string &str, int dep);
|
||||
|
||||
void parseUpdate(pANTLR3_BASE_TREE node, QueryTree &querytree);
|
||||
void parseTripleTemplate(pANTLR3_BASE_TREE node, QueryTree::GroupPattern &grouppattern);
|
||||
void parseModify(pANTLR3_BASE_TREE node, QueryTree &querytree);
|
||||
|
||||
public:
|
||||
QueryParser();
|
||||
|
||||
void SPARQLParse(const std::string &query, QueryTree &querytree);
|
||||
};
|
||||
|
||||
#endif /* QUERYPARSER_H_ */
|
|
@ -25,20 +25,24 @@ void QueryTree::GroupPattern::FilterTreeNode::print(vector<GroupPattern> &exist_
|
|||
{
|
||||
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Not_type) printf("!");
|
||||
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_regex_type) printf("REGEX");
|
||||
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_str_type) printf("STR");
|
||||
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_lang_type) printf("LANG");
|
||||
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_langmatches_type) printf("LANGMATCHES");
|
||||
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_bound_type) printf("BOUND");
|
||||
|
||||
if (this->oper_type == QueryTree::GroupPattern::FilterTreeNode::Builtin_in_type)
|
||||
{
|
||||
printf("(");
|
||||
if (this->child[0].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::String_type) printf("%s", this->child[0].arg.c_str());
|
||||
if (this->child[0].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type) this->child[0].node.print(exist_grouppatterns, dep);
|
||||
printf(" IN (");
|
||||
for (int i = 1; i < (int)this->child.size(); i++)
|
||||
{
|
||||
if (i != 1) printf(" , ");
|
||||
if (this->child[i].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::String_type) printf("%s", this->child[i].arg.c_str());
|
||||
if (this->child[i].node_type == QueryTree::GroupPattern::FilterTreeNode::FilterTreeChild::Tree_type) this->child[i].node.print(exist_grouppatterns, dep);
|
||||
}
|
||||
printf(")");
|
||||
printf("))");
|
||||
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -403,7 +403,10 @@ Server::importRDF(std::string _db_name, std::string _ac_name, std::string _rdf_p
|
|||
//if (this->database != NULL && this->database->getName() != _db_name)
|
||||
if (this->database != NULL)
|
||||
{
|
||||
delete this->database;
|
||||
//delete this->database;
|
||||
//NOTICE:if there is a db loaded, we should not build directly, tell user to unload it first
|
||||
_ret_msg = "please unload the current db first: " + this->database->getName();
|
||||
return false;
|
||||
}
|
||||
|
||||
this->database = new Database(_db_name);
|
||||
|
|
|
@ -10,7 +10,10 @@
|
|||
|
||||
using namespace std;
|
||||
|
||||
int LRUCache::DEFAULT_CAPACITY = 1 * 1000 * 1000;
|
||||
//NOTICE: we aim to support 1 billion triples in a single machine, whose entity num
|
||||
//can not exceed the 2 billion limit, and the maxium VNODE num is 2000000000/100=20000000=20M
|
||||
int LRUCache::DEFAULT_CAPACITY = 20000000;
|
||||
//int LRUCache::DEFAULT_CAPACITY = 1 * 1000 * 1000;
|
||||
|
||||
LRUCache::LRUCache(int _capacity)
|
||||
{
|
||||
|
@ -66,8 +69,9 @@ bool LRUCache::loadCache(string _filePath)
|
|||
return false;
|
||||
}
|
||||
|
||||
//BETTER:increase this parameter if memory allows
|
||||
int defaultLoadSize = this->capacity / 2;
|
||||
//NOTICE:here we set it to the maxium, to ensure all VNODE in memory
|
||||
int defaultLoadSize = this->capacity;
|
||||
//int defaultLoadSize = this->capacity / 2;
|
||||
size_t vNodeSize = sizeof(VNode);
|
||||
int flag = 0;
|
||||
|
||||
|
@ -137,6 +141,8 @@ bool LRUCache::createCache(string _filePath)
|
|||
return true;
|
||||
}
|
||||
|
||||
//DEBUG+WARN:the memory-disk swap strategy exists serious bugs, however, we do not really use this startegy now!!!
|
||||
//
|
||||
//set the key(node's file line) and value(node's pointer). if the key exists now, the value of this key will be overwritten.
|
||||
bool LRUCache::set(int _key, VNode * _value)
|
||||
{
|
||||
|
@ -158,6 +164,7 @@ bool LRUCache::set(int _key, VNode * _value)
|
|||
// if the cache is full, should swap out the least recently used one to hard disk.
|
||||
else
|
||||
{
|
||||
cout<<"memory-disk swap hadppened in VSTree - LRUCache"<<endl;
|
||||
// write out and free the memory of the least recently used one.
|
||||
int pos = this->next[LRUCache::START_INDEX];
|
||||
this->writeOut(pos, this->keys[pos]);
|
||||
|
@ -222,6 +229,7 @@ VNode* LRUCache::get(int _key)
|
|||
// if the memory pool is full now, should swap out the least recently used one, and swap in the required value.
|
||||
else
|
||||
{
|
||||
cout<<"memory-disk swap hadppened in VSTree - LRUCache"<<endl;
|
||||
int pos = this->next[LRUCache::START_INDEX];
|
||||
this->writeOut(pos, this->keys[pos]);
|
||||
this->freeElem(pos);
|
||||
|
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
9
makefile
9
makefile
|
@ -26,6 +26,13 @@
|
|||
|
||||
#TODO:the dependences are not complete!
|
||||
|
||||
#TODO: parallel -pthread
|
||||
|
||||
#TODO: judge and decide using which program
|
||||
#CC=$(shell which clang 2>/dev/null || which gcc)
|
||||
#ccache, readline, gcov lcov
|
||||
#http://blog.csdn.net/u012421852/article/details/52138960
|
||||
|
||||
#compile parameters
|
||||
|
||||
CC = ccache g++
|
||||
|
@ -401,7 +408,7 @@ dist: clean
|
|||
|
||||
tarball:
|
||||
tar -czvf devGstore.tar.gz api bin lib tools .debug .tmp .objs test docs data makefile \
|
||||
Main Database KVstore Util Query Signature VSTree Parser Server README.md init.conf NOTES.md StringIndex
|
||||
Main Database KVstore Util Query Signature VSTree Parser Server README.md init.conf NOTES.md StringIndex Coverage
|
||||
|
||||
APIexample: $(api_cpp) $(api_java)
|
||||
$(MAKE) -C api/cpp/example
|
||||
|
|
Loading…
Reference in New Issue