Merge branch 'dev' of https://github.com/pkumod/gStore into cyy
This commit is contained in:
commit
65e4bcc026
|
@ -0,0 +1,52 @@
|
|||
# https://git-scm.com/docs/gitattributes
|
||||
|
||||
# Set the default behavior, in case people don't have core.autocrlf set.
|
||||
* text=auto
|
||||
*.txt text
|
||||
*.vcproj text eol=crlf
|
||||
*.sh text eol=lf
|
||||
*.py eol=lf
|
||||
*.c text eol=lf
|
||||
*.h text eol=lf
|
||||
*.cpp text eol=lf
|
||||
*.cu text eol=lf
|
||||
*.md text eol=lf
|
||||
*.nt text eol=lf
|
||||
*.sql text eol=lf
|
||||
#*.jpg -text
|
||||
|
||||
# Explicitly declare text files you want to always be normalized and converted
|
||||
# to native line endings on checkout.
|
||||
#*.c text
|
||||
#*.h text
|
||||
*.md text
|
||||
*.js text
|
||||
*.json text
|
||||
*.wxss text
|
||||
*.wxml text
|
||||
|
||||
# Declare files that will always have CRLF line endings on checkout.
|
||||
#*.sln text eol=crlf
|
||||
|
||||
# Denote all files that are truly binary and should not be modified.
|
||||
*.png binary
|
||||
*.jpg binary
|
||||
|
||||
# Below denotes which file to use git-lfs, large files
|
||||
#*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
#*.tar.gz filter=lfs diff=lfs merge=lfs -text
|
||||
#*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
#*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
#*.torrent filter=lfs diff=lfs merge=lfs -text
|
||||
#*.iso filter=lfs diff=lfs merge=lfs -text
|
||||
#*.jpg filter=lfs diff=lfs merge=lfs -text
|
||||
#*.jpeg filter=lfs diff=lfs merge=lfs -text
|
||||
#*.png filter=lfs diff=lfs merge=lfs -text
|
||||
#*.pdf filter=lfs diff=lfs merge=lfs -text
|
||||
#*.mp3 filter=lfs diff=lfs merge=lfs -text
|
||||
#*.wav filter=lfs diff=lfs merge=lfs -text
|
||||
#*.mp4 filter=lfs diff=lfs merge=lfs -text
|
||||
#*.rmvb filter=lfs diff=lfs merge=lfs -text
|
||||
#*.nt filter=lfs diff=lfs merge=lfs -text
|
||||
#*.n3 filter=lfs diff=lfs merge=lfs -text
|
||||
|
|
@ -7,6 +7,7 @@
|
|||
*.x86_64
|
||||
*.hex
|
||||
bin/g*
|
||||
scripts/update_test
|
||||
api/cpp/example/example
|
||||
|
||||
Parser/Sparql*
|
||||
|
|
|
@ -1107,6 +1107,9 @@ Database::load_vstree(unsigned _vstree_size)
|
|||
cout<<"vstree loaded"<<endl;
|
||||
}
|
||||
|
||||
// @author bookug
|
||||
// @email bookug@qq.com
|
||||
// @function check some parameters, statues and correctness of the database
|
||||
void
|
||||
Database::check()
|
||||
{
|
||||
|
@ -1116,6 +1119,19 @@ cout<<"entity num: "<<this->entity_num<<endl;
|
|||
cout<<"literal num: "<<this->literal_num<<endl;
|
||||
|
||||
string tstr;
|
||||
//unsigned lid1 = this->kvstore->getIDByLiteral("\"111\"");
|
||||
//cout<<"check: "<<lid1<<endl;
|
||||
//unsigned lid2 = this->kvstore->getIDByLiteral("\"222\"");
|
||||
//cout<<"check: "<<lid2<<endl;
|
||||
//unsigned lid3 = this->kvstore->getIDByLiteral("\"Bookug Lobert\"");
|
||||
//cout<<"check: "<<lid3<<endl;
|
||||
//unsigned eid = this->kvstore->getIDByEntity("<bookug>");
|
||||
//cout<<"check: "<<eid<<endl;
|
||||
//tstr = this->kvstore->getLiteralByID(2000000004);
|
||||
//cout<<"check: "<<tstr<<endl;
|
||||
//tstr = this->kvstore->getEntityByID(14);
|
||||
//cout<<"check: "<<tstr<<endl;
|
||||
|
||||
//unsigned pid = this->kvstore->getIDByPredicate("<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>");
|
||||
//cout<<"check: pre "<<pid<<endl;
|
||||
//this->stringindex->randomAccess(pid, &tstr, false);
|
||||
|
@ -1181,9 +1197,6 @@ string tstr;
|
|||
//}
|
||||
//delete[] thr_si;
|
||||
|
||||
//TODO: each thread for a sparql query, support by assigning a thread for each query in ghttp(better to set timeout)
|
||||
//and test stringIndex::addRequest(),
|
||||
//the request array maybe not right, request.clear()
|
||||
string spq[6];
|
||||
spq[0] = "select ?x where { ?x <ub:name> <FullProfessor0> . }";
|
||||
spq[1] = "select distinct ?x where { ?x <rdf:type> <ub:GraduateStudent>. ?y <rdf:type> <ub:University>. ?z <rdf:type> <ub:Department>. ?x <ub:memberOf> ?z. ?z <ub:subOrganizationOf> ?y. ?x <ub:undergraduateDegreeFrom> ?y. }";
|
||||
|
@ -2129,8 +2142,8 @@ Database::build_s2xx(ID_TUPLE* _p_id_tuples)
|
|||
//qsort(_p_id_tuples, this->triples_num, sizeof(int*), Util::_spo_cmp);
|
||||
|
||||
//remove duplicates from the id tables
|
||||
int j = 1;
|
||||
for(int i = 1; i < this->triples_num; ++i)
|
||||
TYPE_TRIPLE_NUM j = 1;
|
||||
for(TYPE_TRIPLE_NUM i = 1; i < this->triples_num; ++i)
|
||||
{
|
||||
if(!Util::equal(_p_id_tuples[i], _p_id_tuples[i-1]))
|
||||
{
|
||||
|
@ -2497,7 +2510,7 @@ Database::sub2id_pre2id_obj2id_RDFintoSignature(const string _rdf_file)
|
|||
//_id_tuples_max = _new_tuples_len;
|
||||
//}
|
||||
|
||||
//TODO: use 3 threads to deal with sub, obj, pre separately
|
||||
//BETTER: use 3 threads to deal with sub, obj, pre separately
|
||||
//However, the cost of new /delete threads may be high
|
||||
//We need a thread pool!
|
||||
|
||||
|
@ -2712,13 +2725,7 @@ Database::insertTriple(const TripleWithObjType& _triple, vector<unsigned>* _vert
|
|||
_is_new_sub = true;
|
||||
//_sub_id = this->entity_num++;
|
||||
_sub_id = this->allocEntityID();
|
||||
//cout<<"this is a new sub id"<<endl;
|
||||
//if(_sub_id == 14912)
|
||||
//{
|
||||
//cout<<"get the error one"<<endl;
|
||||
//cout<<_sub_id<<endl<<_triple.subject<<endl;
|
||||
//cout<<_triple.predicate<<endl<<_triple.object<<endl;
|
||||
//}
|
||||
//cout<<"this is a new sub id: "<<_sub_id<<endl;
|
||||
this->sub_num++;
|
||||
(this->kvstore)->setIDByEntity(_triple.subject, _sub_id);
|
||||
(this->kvstore)->setEntityByID(_sub_id, _triple.subject);
|
||||
|
@ -2780,6 +2787,7 @@ Database::insertTriple(const TripleWithObjType& _triple, vector<unsigned>* _vert
|
|||
else
|
||||
{
|
||||
_obj_id = (this->kvstore)->getIDByLiteral(_triple.object);
|
||||
//cout<<"check: "<<_obj_id<<" "<<INVALID_ENTITY_LITERAL_ID<<endl;
|
||||
|
||||
//if (_obj_id == -1)
|
||||
if (_obj_id == INVALID_ENTITY_LITERAL_ID)
|
||||
|
@ -2787,8 +2795,11 @@ Database::insertTriple(const TripleWithObjType& _triple, vector<unsigned>* _vert
|
|||
_is_new_obj = true;
|
||||
//_obj_id = Util::LITERAL_FIRST_ID + this->literal_num;
|
||||
_obj_id = this->allocLiteralID();
|
||||
//cout<<"this is a new obj id: "<<_obj_id<<endl;
|
||||
(this->kvstore)->setIDByLiteral(_triple.object, _obj_id);
|
||||
(this->kvstore)->setLiteralByID(_obj_id, _triple.object);
|
||||
//cout<<this->kvstore->getLiteralByID(_obj_id)<<endl;
|
||||
//cout<<this->kvstore->getIDByLiteral(_triple.object)<<endl;
|
||||
|
||||
//update the string buffer
|
||||
//TYPE_ENTITY_LITERAL_ID tid = _obj_id - Util::LITERAL_FIRST_ID;
|
||||
|
@ -2944,6 +2955,7 @@ Database::removeTriple(const TripleWithObjType& _triple, vector<unsigned>* _vert
|
|||
if (obj_degree == 0)
|
||||
{
|
||||
this->kvstore->subLiteralByID(_obj_id);
|
||||
//cout<<"check after subLiteralByID: "<<_obj_id<<" "<<this->kvstore->getLiteralByID(_obj_id)<<endl;
|
||||
this->kvstore->subIDByLiteral(_triple.object);
|
||||
this->freeLiteralID(_obj_id);
|
||||
//update the string buffer
|
||||
|
@ -3230,7 +3242,7 @@ Database::insert(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num,
|
|||
is_new_sub = true;
|
||||
subid = this->allocEntityID();
|
||||
#ifdef DEBUG
|
||||
cout << "this is a new subject: " << sub << " " << subid << endl;
|
||||
//cout << "this is a new subject: " << sub << " " << subid << endl;
|
||||
#endif
|
||||
this->sub_num++;
|
||||
this->kvstore->setIDByEntity(sub, subid);
|
||||
|
@ -3265,7 +3277,7 @@ Database::insert(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num,
|
|||
is_new_obj = true;
|
||||
objid = this->allocEntityID();
|
||||
#ifdef DEBUG
|
||||
cout << "this is a new object: " << obj << " " << objid << endl;
|
||||
//cout << "this is a new object: " << obj << " " << objid << endl;
|
||||
#endif
|
||||
//this->obj_num++;
|
||||
this->kvstore->setIDByEntity(obj, objid);
|
||||
|
@ -3663,7 +3675,7 @@ Database::insert(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num,
|
|||
//SigEntry _sig(it->first, it->second);
|
||||
//this->vstree->insertEntry(_sig);
|
||||
//}
|
||||
#else
|
||||
#else //USE_GROUP_INSERT
|
||||
//NOTICE:we deal with insertions one by one here
|
||||
//Callers should save the vstree(node and info) after calling this function
|
||||
for (TYPE_TRIPLE_NUM i = 0; i < _triple_num; ++i)
|
||||
|
@ -3674,7 +3686,7 @@ Database::insert(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num,
|
|||
valid_num++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif //USE_GROUP_INSERT
|
||||
|
||||
//update string index
|
||||
this->stringindex->change(vertices, *this->kvstore, true);
|
||||
|
|
|
@ -16,7 +16,6 @@ ISArray::ISArray()
|
|||
dir_path = "";
|
||||
ISfile_name = "";
|
||||
BM = NULL;
|
||||
CurKeyNum = 0;
|
||||
CurEntryNum = 0;
|
||||
CurCacheSize = 0;
|
||||
CurEntryNumChange = false;
|
||||
|
@ -55,7 +54,6 @@ ISArray::ISArray(string _dir_path, string _filename, string mode, unsigned long
|
|||
|
||||
// temp is the smallest number >= _key_num and mod SET_KEY_INC = 0
|
||||
unsigned temp = ((_key_num + (1 << 10) - 1) >> 10) << 10;
|
||||
CurKeyNum = 0;
|
||||
CurEntryNum = max(temp, SETKEYNUM);
|
||||
CurEntryNumChange = true;
|
||||
|
||||
|
@ -341,7 +339,6 @@ ISArray::insert(unsigned _key, char *_str, unsigned _len)
|
|||
}
|
||||
|
||||
bool Alloc = false;
|
||||
CurKeyNum++;
|
||||
//if (CurKeyNum >= CurEntryNum) // need to realloc
|
||||
if (_key >= CurEntryNum)
|
||||
{
|
||||
|
@ -391,7 +388,6 @@ ISArray::remove(unsigned _key)
|
|||
return false;
|
||||
}
|
||||
|
||||
CurKeyNum--;
|
||||
|
||||
unsigned store = array[_key].getStore();
|
||||
BM->FreeBlocks(store);
|
||||
|
|
|
@ -35,7 +35,6 @@ private:
|
|||
string dir_path;
|
||||
ISBlockManager *BM;
|
||||
unsigned CurEntryNum; // how many entries are available
|
||||
unsigned CurKeyNum; // how many keys are stored
|
||||
bool CurEntryNumChange;
|
||||
|
||||
//Cache
|
||||
|
|
|
@ -16,7 +16,6 @@ IVArray::IVArray()
|
|||
dir_path = "";
|
||||
IVfile_name = "";
|
||||
BM = NULL;
|
||||
CurKeyNum = 0;
|
||||
CurEntryNum = 0;
|
||||
CurCacheSize = 0;
|
||||
CurEntryNumChange = false;
|
||||
|
@ -63,7 +62,6 @@ IVArray::IVArray(string _dir_path, string _filename, string mode, unsigned long
|
|||
|
||||
// temp is the smallest number >= _key_num and mod SET_KEY_INC = 0
|
||||
unsigned temp = ((_key_num + (1 << 10) - 1) >> 10) << 10;
|
||||
CurKeyNum = 0;
|
||||
CurEntryNum = max(temp, SETKEYNUM);
|
||||
CurEntryNumChange = true;
|
||||
|
||||
|
@ -346,7 +344,6 @@ IVArray::insert(unsigned _key, char *_str, unsigned _len)
|
|||
return false;
|
||||
}
|
||||
|
||||
CurKeyNum++;
|
||||
//if (CurKeyNum >= CurEntryNum) // need to realloc
|
||||
if (_key >= CurEntryNum)
|
||||
{
|
||||
|
@ -402,7 +399,6 @@ IVArray::remove(unsigned _key)
|
|||
return false;
|
||||
}
|
||||
|
||||
CurKeyNum--;
|
||||
|
||||
unsigned store = array[_key].getStore();
|
||||
BM->FreeBlocks(store);
|
||||
|
|
|
@ -36,7 +36,6 @@ private:
|
|||
string dir_path;
|
||||
IVBlockManager *BM;
|
||||
unsigned CurEntryNum; // how many entries are available
|
||||
unsigned CurKeyNum; // how many keys are stored
|
||||
bool CurEntryNumChange;
|
||||
|
||||
//Cache
|
||||
|
|
|
@ -346,8 +346,10 @@ IVBlockManager::FreeBlocks(const unsigned index)
|
|||
if (curlen + cur_index == it->first) // block after is free
|
||||
{
|
||||
curlen += it->second;
|
||||
auto tmp_key = it->second, tmp_val = it->first;
|
||||
index_len_map.erase(it);
|
||||
len_index_map.erase(make_pair(it->second,it->first));
|
||||
//len_index_map.erase(make_pair(it->second,it->first));
|
||||
len_index_map.erase(make_pair(tmp_key,tmp_val));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -929,7 +929,6 @@ KVstore::updateInsert_o2values(TYPE_ENTITY_LITERAL_ID _objid, const std::vector<
|
|||
return true;
|
||||
}
|
||||
|
||||
//TODO: TO BE IMPROVED
|
||||
bool
|
||||
KVstore::updateRemove_o2values(TYPE_ENTITY_LITERAL_ID _objid, const std::vector<unsigned>& _pidsidlist)
|
||||
{
|
||||
|
@ -1027,7 +1026,6 @@ KVstore::updateRemove_p2values(TYPE_ENTITY_LITERAL_ID _sub_id, TYPE_PREDICATE_ID
|
|||
return true;
|
||||
}
|
||||
|
||||
//TODO: TO BE IMPROVED
|
||||
bool
|
||||
KVstore::updateInsert_p2values(TYPE_PREDICATE_ID _preid, const std::vector<unsigned>& _sidoidlist)
|
||||
{
|
||||
|
@ -1042,7 +1040,6 @@ KVstore::updateInsert_p2values(TYPE_PREDICATE_ID _preid, const std::vector<unsig
|
|||
return true;
|
||||
}
|
||||
|
||||
//TODO: TO BE IMPROVED
|
||||
bool
|
||||
KVstore::updateRemove_p2values(TYPE_PREDICATE_ID _preid, const std::vector<unsigned>& _sidoidlist)
|
||||
{
|
||||
|
@ -1103,7 +1100,8 @@ KVstore::subIDByEntity(string _entity)
|
|||
//so _entity.c_str() is a valid const char*
|
||||
//this->load_trie();
|
||||
_entity = trie->Compress(_entity);
|
||||
return this->entity2id->remove(_entity.c_str(), _entity.length());
|
||||
//return this->entity2id->remove(_entity.c_str(), _entity.length());
|
||||
return this->removeKey(this->entity2id, _entity.c_str(), _entity.length());
|
||||
}
|
||||
|
||||
TYPE_ENTITY_LITERAL_ID
|
||||
|
@ -1170,7 +1168,8 @@ KVstore::close_id2entity()
|
|||
bool
|
||||
KVstore::subEntityByID(TYPE_ENTITY_LITERAL_ID _id)
|
||||
{
|
||||
return this->id2entity->remove(_id);
|
||||
//return this->id2entity->remove(_id);
|
||||
return this->removeKey(this->id2entity, _id);
|
||||
}
|
||||
|
||||
string
|
||||
|
@ -1262,7 +1261,8 @@ KVstore::subIDByPredicate(string _predicate)
|
|||
{
|
||||
//this->load_trie();
|
||||
_predicate = trie->Compress(_predicate);
|
||||
return this->predicate2id->remove(_predicate.c_str(), _predicate.length());
|
||||
//return this->predicate2id->remove(_predicate.c_str(), _predicate.length());
|
||||
return this->removeKey(this->predicate2id, _predicate.c_str(), _predicate.length());
|
||||
}
|
||||
|
||||
TYPE_PREDICATE_ID
|
||||
|
@ -1328,7 +1328,8 @@ KVstore::close_id2predicate()
|
|||
bool
|
||||
KVstore::subPredicateByID(TYPE_PREDICATE_ID _id)
|
||||
{
|
||||
return this->id2predicate->remove(_id);
|
||||
//return this->id2predicate->remove(_id);
|
||||
return this->removeKey(this->id2predicate, _id);
|
||||
}
|
||||
|
||||
string
|
||||
|
@ -1402,7 +1403,8 @@ KVstore::subIDByLiteral(string _literal)
|
|||
{
|
||||
//this->load_trie();
|
||||
_literal = trie->Compress(_literal);
|
||||
return this->literal2id->remove(_literal.c_str(), _literal.length());
|
||||
//return this->literal2id->remove(_literal.c_str(), _literal.length());
|
||||
return this->removeKey(this->literal2id, _literal.c_str(), _literal.length());
|
||||
}
|
||||
|
||||
TYPE_ENTITY_LITERAL_ID
|
||||
|
@ -1411,6 +1413,11 @@ KVstore::getIDByLiteral(string _literal) const
|
|||
//this->load_trie();
|
||||
_literal = trie->Compress(_literal);
|
||||
return this->getIDByStr(this->literal2id, _literal.c_str(), _literal.length());
|
||||
//TYPE_ENTITY_LITERAL_ID id = this->getIDByStr(this->literal2id, _literal.c_str(), _literal.length());
|
||||
//if(id != INVALID)
|
||||
//{
|
||||
//id += Util::LITERAL_FIRST_ID;
|
||||
//}
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -1468,7 +1475,8 @@ KVstore::close_id2literal()
|
|||
bool
|
||||
KVstore::subLiteralByID(TYPE_ENTITY_LITERAL_ID _id)
|
||||
{
|
||||
return this->id2literal->remove(_id);
|
||||
//return this->id2literal->remove(_id - Util::LITERAL_FIRST_ID);
|
||||
return this->removeKey(this->id2literal, _id);
|
||||
}
|
||||
|
||||
string
|
||||
|
|
119
Main/gbuild.cpp
119
Main/gbuild.cpp
|
@ -1,9 +1,9 @@
|
|||
/*=============================================================================
|
||||
# Filename: gbuild.cpp
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-24 19:27
|
||||
# Description: firstly written by liyouhuan, modified by zengli
|
||||
# Author: Bookug Lobert suxunbin
|
||||
# Mail: 1181955272@qq.com suxunbin@pku.edu.cn
|
||||
# Last Modified: 2018-10-19 20:30
|
||||
# Description: firstly written by liyouhuan, modified by zengli and suxunbin
|
||||
TODO: add -h/--help for help message
|
||||
=============================================================================*/
|
||||
|
||||
|
@ -11,6 +11,7 @@ TODO: add -h/--help for help message
|
|||
#include "../Database/Database.h"
|
||||
|
||||
using namespace std;
|
||||
#define SYSTEM_PATH "data/system/system.nt"
|
||||
|
||||
//[0]./gbuild [1]data_folder_path [2]rdf_file_path
|
||||
int
|
||||
|
@ -44,15 +45,39 @@ main(int argc, char * argv[])
|
|||
return -1;
|
||||
}
|
||||
|
||||
//check if the db_name is system
|
||||
if (_db_path == "system")
|
||||
{
|
||||
cout<< "Your database's name can not be system."<<endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
//if(_db_path[0] != '/' && _db_path[0] != '~') //using relative path
|
||||
//{
|
||||
//_db_path = string("../") + _db_path;
|
||||
//}
|
||||
string _rdf = string(argv[2]);
|
||||
|
||||
//check if the db_path is the path of system.nt
|
||||
if (_rdf == SYSTEM_PATH)
|
||||
{
|
||||
cout<< "You have no rights to access system files"<<endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
//if(_rdf[0] != '/' && _rdf[0] != '~') //using relative path
|
||||
//{
|
||||
//_rdf = string("../") + _rdf;
|
||||
//}
|
||||
|
||||
//check if the database is already built
|
||||
int isbuilt;
|
||||
if (boost::filesystem::exists(_db_path + ".db"))
|
||||
isbuilt = 1;
|
||||
else
|
||||
isbuilt = 0;
|
||||
|
||||
//build database
|
||||
Database _db(_db_path);
|
||||
bool flag = _db.build(_rdf);
|
||||
if (flag)
|
||||
|
@ -62,12 +87,96 @@ main(int argc, char * argv[])
|
|||
f.open("./"+ _db_path +".db/success.txt");
|
||||
f.close();
|
||||
}
|
||||
else
|
||||
else //if fails, drop database and return
|
||||
{
|
||||
cout << "import RDF file to database failed." << endl;
|
||||
string cmd = "rm -r " + _db_path + ".db";
|
||||
system(cmd.c_str());
|
||||
return 0;
|
||||
}
|
||||
if (!boost::filesystem::exists("system.db"))
|
||||
return 0;
|
||||
//system("clock");
|
||||
|
||||
Database system_db("system");
|
||||
system_db.load();
|
||||
|
||||
//if isbuilt is false, add database information to system.db
|
||||
if (isbuilt == 0)
|
||||
{
|
||||
string time = Util::get_date_time();
|
||||
string sparql = "INSERT DATA {<" + _db_path + "> <database_status> \"already_built\"." + "<" + _db_path + "> <built_by> <root>."
|
||||
+ "<" + _db_path + "> <built_time> \"" + time + "\".}";
|
||||
ResultSet _rs;
|
||||
FILE* ofp = stdout;
|
||||
string msg;
|
||||
int ret = system_db.query(sparql, _rs, ofp);
|
||||
if (ret <= -100) // select query
|
||||
{
|
||||
if (ret == -100)
|
||||
msg = _rs.to_str();
|
||||
else //query error
|
||||
msg = "query failed";
|
||||
}
|
||||
else //update query
|
||||
{
|
||||
if (ret >= 0)
|
||||
msg = "update num : " + Util::int2string(ret);
|
||||
else //update error
|
||||
msg = "update failed.";
|
||||
if (ret != -100)
|
||||
cout << msg << endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
else //if isbuilt is true, update built_time of the database
|
||||
{
|
||||
string sparql = "DELETE {<" + _db_path + "> <built_time> ?t .}"
|
||||
+ "WHERE{<" + _db_path + "> <built_time> ?t .}";
|
||||
ResultSet _rs;
|
||||
FILE* ofp = stdout;
|
||||
string msg;
|
||||
int ret = system_db.query(sparql, _rs, ofp);
|
||||
if (ret <= -100) // select query
|
||||
{
|
||||
if (ret == -100)
|
||||
msg = _rs.to_str();
|
||||
else //query error
|
||||
msg = "query failed";
|
||||
}
|
||||
else //update query
|
||||
{
|
||||
if (ret >= 0)
|
||||
msg = "update num : " + Util::int2string(ret);
|
||||
else //update error
|
||||
msg = "update failed.";
|
||||
if (ret != -100)
|
||||
cout << msg << endl;
|
||||
}
|
||||
cout << "delete successfully" << endl;
|
||||
}
|
||||
string time = Util::get_date_time();
|
||||
string sparql = "INSERT DATA {<" + _db_path + "> <built_time> \"" + time + "\".}";
|
||||
ResultSet _rs;
|
||||
FILE* ofp = stdout;
|
||||
string msg;
|
||||
int ret = system_db.query(sparql, _rs, ofp);
|
||||
if (ret <= -100) // select query
|
||||
{
|
||||
if (ret == -100)
|
||||
msg = _rs.to_str();
|
||||
else //query error
|
||||
msg = "query failed";
|
||||
}
|
||||
else //update query
|
||||
{
|
||||
if (ret >= 0)
|
||||
msg = "update num : " + Util::int2string(ret);
|
||||
else //update error
|
||||
msg = "update failed.";
|
||||
if (ret != -100)
|
||||
cout << msg << endl;
|
||||
}
|
||||
cout << "insert successfully" << endl;
|
||||
return 0;
|
||||
}
|
||||
|
|
800
Main/ghttp.cpp
800
Main/ghttp.cpp
File diff suppressed because it is too large
Load Diff
|
@ -1,3 +1,11 @@
|
|||
/*=============================================================================
|
||||
# Filename: ginit.cpp
|
||||
# Author: suxunbin
|
||||
# Mail: suxunbin@pku.edu.cn
|
||||
# Last Modified: 2018-10-17 14:59
|
||||
# Description: used to initialize the system.db
|
||||
=============================================================================*/
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Database/Database.h"
|
||||
|
||||
|
@ -10,11 +18,13 @@ int main(int argc, char * argv[])
|
|||
if(boost::filesystem::exists("system.db"))
|
||||
return 0;
|
||||
}
|
||||
|
||||
//build system.db
|
||||
Util util;
|
||||
string _db_path = "system";
|
||||
string _rdf = "data/system/system.nt";
|
||||
Database _db(_db_path);
|
||||
bool flag = _db.build(_rdf);
|
||||
Database* _db = new Database(_db_path);
|
||||
bool flag = _db->build(_rdf);
|
||||
if (flag)
|
||||
{
|
||||
cout << "import RDF file to database done." << endl;
|
||||
|
@ -22,11 +32,44 @@ int main(int argc, char * argv[])
|
|||
f.open("./"+ _db_path +".db/success.txt");
|
||||
f.close();
|
||||
}
|
||||
else
|
||||
else //if fails, drop system.db and return
|
||||
{
|
||||
cout << "import RDF file to database failed." << endl;
|
||||
string cmd = "rm -r " + _db_path + ".db";
|
||||
system(cmd.c_str());
|
||||
delete _db;
|
||||
_db = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
//insert built_time of system.db
|
||||
delete _db;
|
||||
_db = new Database(_db_path);
|
||||
_db->load();
|
||||
string time = Util::get_date_time();
|
||||
string sparql = "INSERT DATA {<system> <built_time> \"" + time + "\".}";
|
||||
ResultSet _rs;
|
||||
FILE* ofp = stdout;
|
||||
string msg;
|
||||
int ret = _db->query(sparql, _rs, ofp);
|
||||
if (ret <= -100) // select query
|
||||
{
|
||||
if (ret == -100)
|
||||
msg = _rs.to_str();
|
||||
else //query error
|
||||
msg = "query failed";
|
||||
}
|
||||
else //update query
|
||||
{
|
||||
if (ret >= 0)
|
||||
msg = "update num : " + Util::int2string(ret);
|
||||
else //update error
|
||||
msg = "update failed.";
|
||||
if (ret != -100)
|
||||
cout << msg << endl;
|
||||
}
|
||||
delete _db;
|
||||
_db = NULL;
|
||||
cout << "system.db is built successfully!" << endl;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,8 +1,19 @@
|
|||
/*=============================================================================
|
||||
# Filename: shutdown.cpp
|
||||
# Author: suxunbin
|
||||
# Mail: suxunbin@pku.edu.cn
|
||||
# Last Modified: 2018-10-16 16:15
|
||||
# Description: used to stop the ghttp server
|
||||
=============================================================================*/
|
||||
|
||||
#include "../api/http/cpp/client.h"
|
||||
#include "../Util/Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
#define ROOT_USERNAME "root"
|
||||
#define ROOT_PASSWORD "123456"
|
||||
|
||||
bool isNum(char *str)
|
||||
{
|
||||
for(int i = 0; i < strlen(str); i++)
|
||||
|
@ -49,6 +60,6 @@ int main(int argc, char *argv[])
|
|||
CHttpClient hc;
|
||||
string res;
|
||||
int ret;
|
||||
ret = hc.Get("http://127.0.0.1:"+port+"/?operation=stop", res);
|
||||
ret = hc.Get("http://127.0.0.1:" + port + "/?operation=stop&username=" + ROOT_USERNAME + "&password=" + ROOT_PASSWORD, res);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1078,7 +1078,7 @@ void GeneralEvaluation::getFinalResult(ResultSet &ret_result)
|
|||
|
||||
if (!ret_result.checkUseStream())
|
||||
{
|
||||
cout << "flag2" << endl;
|
||||
//cout << "flag2" << endl;
|
||||
for (unsigned i = 0; i < ret_result.ansNum; i++)
|
||||
{
|
||||
ret_result.answer[i] = new string [ret_result.select_var_num];
|
||||
|
@ -1117,7 +1117,7 @@ void GeneralEvaluation::getFinalResult(ResultSet &ret_result)
|
|||
}
|
||||
else
|
||||
{
|
||||
cout << "flag3" << endl;
|
||||
//cout << "flag3" << endl;
|
||||
for (unsigned i = 0; i < ret_result.ansNum; i++)
|
||||
for (int j = 0; j < ret_result.select_var_num; j++)
|
||||
{
|
||||
|
|
|
@ -20,7 +20,10 @@ This system is really user-friendly and you can pick it up in several minutes. R
|
|||
|
||||
- fork this repository in your github account
|
||||
|
||||
Then you need to compile the project, just type `make` in the gStore root directory, then all executables will be generated.
|
||||
Then you need to compile the project, for the first time you need to type `make pre` to prepare the `ANTLR` library and some Lexer/Parser programs.
|
||||
Later you do not need to type this command again, just use the `make` command in the home directory of gStore, then all executables will be generated.
|
||||
(For faster compiling speed, use `make -j4` instead, using how many threads is up to your machine)
|
||||
To check the correctness of the program, please type `make test` command.
|
||||
|
||||
The first strategy is suggested to get the source code because you can easily acquire the updates of the code by typing `git pull` in the home directory of gStore repository.
|
||||
In addition, you can directly check the version of the code by typing `git log` to see the commit logs.
|
||||
|
@ -62,6 +65,9 @@ If you want to understand the details of the gStore system, or you want to try s
|
|||
|
||||
## Other Business
|
||||
|
||||
Bugs are recorded in [BUG REPORT](docs/BUGS.md).
|
||||
You are welcomed to submit the bugs you discover if they do not exist in this file.
|
||||
|
||||
We have written a series of short essays addressing recurring challenges in using gStore to realize applications, which are placed in [Recipe Book](docs/TIPS.md).
|
||||
|
||||
You are welcome to report any advice or errors in the github Issues part of this repository, if not requiring in-time reply. However, if you want to urgent on us to deal with your reports, please email to <bookug@qq.com> to submit your suggestions and report bugs to us by emailing to <gStoreDB@gmail.com>. A full list of our whole team is in [Mailing List](docs/MAIL.md).
|
||||
|
|
|
@ -81,7 +81,7 @@ void StringIndexFile::load()
|
|||
this->empty_offset = max(this->empty_offset, (*this->index_table)[i].offset + (long)(*this->index_table)[i].length);
|
||||
}
|
||||
|
||||
trie->LoadTrie(dictionary_path);
|
||||
// trie->LoadTrie(dictionary_path);
|
||||
}
|
||||
|
||||
bool StringIndexFile::randomAccess(unsigned id, string *str, bool real)
|
||||
|
@ -110,10 +110,10 @@ bool StringIndexFile::randomAccess(unsigned id, string *str, bool real)
|
|||
//cout<<"check: "<<*str<<endl;
|
||||
//}
|
||||
|
||||
if (real)
|
||||
{
|
||||
*str = trie->Uncompress(*str, str->length());//Uncompresss
|
||||
}
|
||||
// if (real)
|
||||
// {
|
||||
// *str = trie->Uncompress(*str, str->length());//Uncompresss
|
||||
// }
|
||||
//if(id == 9)
|
||||
//{
|
||||
//cout<<"check: "<<*str<<endl;
|
||||
|
@ -194,8 +194,8 @@ void StringIndexFile::trySequenceAccess(bool real)
|
|||
this->buffer[length] = '\0';
|
||||
*this->request[pos].str = string(this->buffer);
|
||||
|
||||
*this->request[pos].str = trie->Uncompress(
|
||||
*this->request[pos].str, this->request[pos].str->length());
|
||||
// *this->request[pos].str = trie->Uncompress(
|
||||
// *this->request[pos].str, this->request[pos].str->length());
|
||||
|
||||
pos++;
|
||||
}
|
||||
|
@ -216,8 +216,8 @@ void StringIndexFile::trySequenceAccess(bool real)
|
|||
this->buffer[length] = '\0';
|
||||
*this->request[pos].str += string(this->buffer);
|
||||
|
||||
*this->request[pos].str = trie->Uncompress(
|
||||
*this->request[pos].str, this->request[pos].str->length());
|
||||
// *this->request[pos].str = trie->Uncompress(
|
||||
// *this->request[pos].str, this->request[pos].str->length());
|
||||
|
||||
pos++;
|
||||
while (pos < (int)this->request.size() && this->request[pos - 1].offset == this->request[pos].offset)
|
||||
|
|
|
@ -52,10 +52,10 @@ class StringIndexFile
|
|||
};
|
||||
std::vector<AccessRequest> request;
|
||||
|
||||
std::string dictionary_path;
|
||||
// std::string dictionary_path;
|
||||
|
||||
public:
|
||||
Trie *trie;
|
||||
//Trie *trie;
|
||||
|
||||
StringIndexFile(StringIndexFileType _type, std::string _dir, unsigned _num):type(_type), num(_num), empty_offset(0), index_file(NULL), value_file(NULL), buffer_size(0), buffer(NULL)
|
||||
{
|
||||
|
@ -67,8 +67,8 @@ class StringIndexFile
|
|||
this->loc = _dir + "/predicate_";
|
||||
this->index_table = new std::vector<IndexInfo>;
|
||||
|
||||
dictionary_path = _dir + "/../dictionary.dc";
|
||||
trie = new Trie;
|
||||
//dictionary_path = _dir + "/../dictionary.dc";
|
||||
//trie = new Trie;
|
||||
}
|
||||
~StringIndexFile()
|
||||
{
|
||||
|
@ -79,8 +79,8 @@ class StringIndexFile
|
|||
if (this->buffer != NULL)
|
||||
delete[] this->buffer;
|
||||
delete this->index_table;
|
||||
if (this->trie != NULL)
|
||||
delete trie;
|
||||
// if (this->trie != NULL)
|
||||
// delete trie;
|
||||
}
|
||||
|
||||
void clear()
|
||||
|
@ -88,7 +88,7 @@ class StringIndexFile
|
|||
this->index_file = NULL;
|
||||
this->value_file = NULL;
|
||||
this->index_table = NULL;
|
||||
this->trie = NULL;
|
||||
// this->trie = NULL;
|
||||
}
|
||||
|
||||
void setNum(unsigned _num);
|
||||
|
@ -132,12 +132,12 @@ class StringIndex
|
|||
unsigned literal_buffer_size;
|
||||
|
||||
public:
|
||||
Trie *trie;
|
||||
// Trie *trie;
|
||||
|
||||
StringIndex(std::string _dir, unsigned _entity_num = 0, unsigned _literal_num = 0, unsigned _predicate_num = 0):
|
||||
entity(StringIndexFile::Entity, _dir, _entity_num), literal(StringIndexFile::Literal, _dir, _literal_num), predicate(StringIndexFile::Predicate, _dir, _predicate_num)
|
||||
{
|
||||
trie = entity.trie;
|
||||
// trie = entity.trie;
|
||||
}
|
||||
|
||||
void clear()
|
||||
|
|
|
@ -999,6 +999,8 @@ Util::getItemsFromDir(string _path)
|
|||
//
|
||||
//http://www.cnblogs.com/wuchanming/p/3784862.html
|
||||
//http://www.cnblogs.com/sky-heaven/p/4687489.html
|
||||
//
|
||||
//BETTER: sysytem() may have some risks, using popen() instead?
|
||||
string
|
||||
Util::getSystemOutput(string cmd)
|
||||
{
|
||||
|
@ -1007,7 +1009,7 @@ Util::getSystemOutput(string cmd)
|
|||
file += "ans.txt";
|
||||
cmd += " > ";
|
||||
cmd += file;
|
||||
cerr << cmd << endl;
|
||||
//cerr << cmd << endl;
|
||||
int ret = system(cmd.c_str());
|
||||
cmd = "rm -rf " + file;
|
||||
if(ret < 0)
|
||||
|
|
|
@ -1 +1,2 @@
|
|||
<root> <has_password> "123456" .
|
||||
<system> <built_by> <root> .
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
<s0> <p0> <o0>.
|
|
@ -1,3 +1,23 @@
|
|||
## Oct 14, 2018
|
||||
|
||||
Xunbin, Su adds the function of shutdowning the `ghttp` server. In addition, he addes the `ginit` command to system and prepares the nodejs API.
|
||||
What is more, `drop` command is supported in `ghttp` by Xunbin, Su.
|
||||
|
||||
Chaofan, Yang fixes the bug in the encoder part of `ghttp` api.
|
||||
|
||||
Li, Zeng fixes bugs in build process and in the indices module, addes some scripts for demo.
|
||||
|
||||
Yinnian, Lin finds a way to scale the contributions of all developers and Li, Zeng adds this function in Makefile.
|
||||
|
||||
Yuyan, Chen addes help document to the system, both in Chinese and English.
|
||||
|
||||
Jing, Li uses JSON format in response to the request of clients, and addes `system.db` to store information of multiple users.
|
||||
|
||||
Wenjie, Li reviews the code of gStore system, and proposes many feasible suggestions for optimization. We have made a plan to optimize the system in all aspects and all effects of the plans will be seen in the next release version.
|
||||
When the version of release rises to 1.0.0, and that is the time we will call the system a formal release rather than a pre-release.
|
||||
|
||||
---
|
||||
|
||||
## Sep 20, 2018
|
||||
|
||||
Zongyue, Qin fixes the remaining bugs in Trie, ISArray and IVArray.
|
||||
|
|
23
docs/FAQ.md
23
docs/FAQ.md
|
@ -1,3 +1,26 @@
|
|||
#### How can I redirect the output to disk files from the console, even the content is output from stderr?
|
||||
|
||||
You can redirect the output of the whole console and use `tail -f` to see the immediate updates to the disk file.
|
||||
For example, if you are using `gquery` console, you can do it in the way below:
|
||||
|
||||
```
|
||||
In one terminal, named x
|
||||
bin/gquery ${YOUR_DATABASE} >& log.txt
|
||||
type console commands in terminal x
|
||||
In another terminal y
|
||||
tail -f log.txt
|
||||
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### Why can not I run correctly after `make` again, even the new code has fixed bugs or added new features?
|
||||
|
||||
Sometimes file dependency is not complete, and some objects are not re-compiled in practice.
|
||||
In this case you are advised to run `make clean` first, and then run `make`.
|
||||
|
||||
---
|
||||
|
||||
#### When I use the newer gStore system to query the original database, why error?
|
||||
|
||||
The database produced by gStore contains several indexes, whose structures may have been chnaged in the new gStore version. So, please rebuild your dataset just in case.
|
||||
|
|
|
@ -4,13 +4,19 @@ gStore is a green software, and you just need to compile it with two commands. P
|
|||
|
||||
```
|
||||
sudo ./scripts/setup/setup_$(ARCH).sh
|
||||
make pre
|
||||
make
|
||||
|
||||
```
|
||||
in the gStore home directory to compile the gStore code, link the ANTLR lib, and build executable "gbuild", "gquery", "ghttp", "gserver", "gclient", "gconsole".
|
||||
in the gStore home directory to prepare the dependency, link the ANTLR lib, compile the gStore code, and build executable "gbuild", "gquery", "ghttp", "gserver", "gclient", "gconsole".
|
||||
(Please substitute the $(ARCH) with your system version, like setup_archlinux.sh, setup_centos.sh and setup_ubuntu.sh)
|
||||
What is more, the api of gStore is also built now.
|
||||
|
||||
Setup scripts and dependency preparation only need to be done once, later you can directly use `make` to compile the code.
|
||||
(For faster compiling speed, use `make -j4` instead, using how many threads is up to your machine)
|
||||
To check the correctness of the program, please type `make test` command.
|
||||
Only if you use the `make dist` command, then you need to run `make pre` command again.
|
||||
|
||||
If you want to use API examples of gStore, please run `make APIexample` to compile example codes for both C++ API and Java API. For details of API, please visit [API](API.md) chapter.
|
||||
|
||||
Use `make clean` command to clean all objects, executables, and use `make dist` command to clean all objects, executables, libs, datasets, databases, debug logs, temp/text files in the gStore root directory.
|
||||
|
|
|
@ -2,9 +2,9 @@
|
|||
|
||||
#### Figures
|
||||
|
||||
The whole architecture of gStore system is presented in [Architecture](png/系统架构图_en.png).
|
||||
The thread model of 'ghttp' can be viewed in [EN](png/ghttp-thread.png) and [ZH](png/ghttp-线程.png), which shows the relationship among main process, sever thread, query thread and so on.
|
||||
The flow of answering a SPARQL query is given in [SPARQL Processing](png/查询处理过程.png), and the subprocess, which only targets at the BGP(Basic Graph Pattern) processing, is drawed in [BGP Processing](png/BGP.png).
|
||||
The whole architecture of gStore system is presented in [Architecture](png/system_architecture_en.png).
|
||||
The thread model of 'ghttp' can be viewed in [EN](png/ghttp_thread_model_en.png) and [ZH](png/ghttp_thread_model_zh.png), which shows the relationship among main process, sever thread, query thread and so on.
|
||||
The flow of answering a SPARQL query is given in [SPARQL Processing](png/sparql_processing.png), and the subprocess, which only targets at the BGP(Basic Graph Pattern) processing, is drawed in [BGP Processing](png/BGP.png).
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -11,6 +11,11 @@ Tabs, '<' and '>' are not allowed to appear in entity, literal or predicates of
|
|||
|
||||
#### 1. gbuild
|
||||
|
||||
As long as you download and compile the code of gStore system, a database named `system`(the real directory name is `system.db`) will be created automatically.
|
||||
This is the database that manages the information of system statistics, including all users and all databases.
|
||||
You can query this database using `gquery` command, but you are forbidded to modify it using editors.
|
||||
The only way to change this database is the operations on users and databases, which are sent to `ghttp` server.
|
||||
|
||||
gbuild is used to build a new database from a RDF triple format file.
|
||||
|
||||
`# bin/gbuild db_name rdf_triple_file_path`
|
||||
|
@ -85,6 +90,8 @@ Notice:
|
|||
|
||||
- path completion is supported for utility. (not built-in command completion)
|
||||
|
||||
- To ouput the result to disk files, use `sparql ${YOUR_QUERY} > ${YOUR_FILE}` in the console.
|
||||
|
||||
- - -
|
||||
|
||||
#### 3. ghttp
|
||||
|
@ -127,10 +134,13 @@ cout << answer << std::endl;
|
|||
gc.unload("lubm", "root", "123456");
|
||||
|
||||
// show all databases already built and if they are loaded
|
||||
gc.show();
|
||||
gc.show("root", "123456");
|
||||
|
||||
// show statistical information of a loaded database
|
||||
gc.monitor("lubm");
|
||||
gc.monitor("lubm", "root", "123456");
|
||||
|
||||
// save updates of a loaded database
|
||||
gc.checkpoint("lubm", "root", "123456");
|
||||
|
||||
//add a user(with username: Jack, password: 2)
|
||||
answer = gc.user("add_user", "root", "123456", "Jack", "2");
|
||||
|
@ -323,7 +333,29 @@ If you want to restore the initial configuration of the ghttp server, type `bin/
|
|||
|
||||
#### 13. test utilities
|
||||
|
||||
A series of test program are placed in the scripts/ folder, and we will introduce the two useful ones: gtest.cpp and full_test.sh
|
||||
A series of test program are placed in the `scripts/` folder, and we will introduce the several useful ones: `full_test.sh`, `basic_test.sh`, `update_test.cpp` and `gtest.cpp`.
|
||||
|
||||
**`full_test.sh` is used to compare the performance of gStore and other database systems on multiple datasets and queries.**
|
||||
|
||||
To use `full_test.sh` utility, please download the database system which you want to test and compare, and set the exact position of database systems and datasets in this script. The name strategy should be the same as the requirements of gtest, as well as the logs strategy.
|
||||
|
||||
Only gStore and Jena are tested and compared in this script, but it is easy to add other database systems, if you would like to spend some time on reading this script. You may go to [test report](pdf/gstore测试报告.pdf) or [Frequently Asked Questions](FAQ.md) for help if you encounter a problem.
|
||||
|
||||
**`basic_test.sh` is used to verify the correctness of build/query/add/sub on several small datasets.**
|
||||
|
||||
Just run `bash scripts/basic_test.sh` to use this script.
|
||||
In fact, `make test` will conduct `basic_test.sh` above and `update_test.cpp` below.
|
||||
You are advised to finish this verification each time after you add some modifications and compile again(including the case that you update the code using `git pull`).
|
||||
|
||||
**`update_test.cpp` is used to verify the correctness of repeatedly insertion/deletion.**
|
||||
|
||||
To use this utility, you will find `update_test` executable under the `bin/` directory after you compile the whole project with `make`.
|
||||
Run `bin/update_test > /dev/null` to finish this test, and you will see the output in the end indicating whether successful or not.
|
||||
This command will test 10000 groups of insertions/deletions by default, to change the group number you can run in the way below:
|
||||
|
||||
```
|
||||
bin/update_test ${YOUR_GROUP_NUMBER}
|
||||
```
|
||||
|
||||
**gtest is used to test gStore with multiple datasets and queries.**
|
||||
|
||||
|
@ -343,9 +375,3 @@ Then you can run the gtest program with specified parameters, and the output wil
|
|||
|
||||
All logs produced by this program are in TSV format(separated with '\t'), you can load them into Calc/Excel/Gnumeric directly. Notice that time unit is ms, and space unit is kb.
|
||||
|
||||
**full_test.sh is used to compare the performance of gStore and other database systems on multiple datasets and queries.**
|
||||
|
||||
To use full_test.sh utility, please download the database system which you want to tats and compare, and set the exact position of database systems and datasets in this script. The name strategy should be the same as the requirements of gtest, as well as the logs strategy.
|
||||
|
||||
Only gStore and Jena are tested and compared in this script, but it is easy to add other database systems, if you would like to spend some time on reading this script. You may go to [test report](pdf/gstore测试报告.pdf) or [Frequently Asked Questions](FAQ.md) for help if you encounter a problem.
|
||||
|
||||
|
|
Binary file not shown.
After Width: | Height: | Size: 20 KiB |
Binary file not shown.
After Width: | Height: | Size: 26 KiB |
Binary file not shown.
After Width: | Height: | Size: 30 KiB |
Binary file not shown.
After Width: | Height: | Size: 117 KiB |
96
makefile
96
makefile
|
@ -18,20 +18,16 @@
|
|||
#(also include good comments norm)
|
||||
#http://blog.csdn.net/u010740725/article/details/51387810
|
||||
|
||||
#NOTICE: to speed up the make process, use make -j4
|
||||
#use -j8 or higher may cause error
|
||||
#http://blog.csdn.net/cscrazybing/article/details/50789482
|
||||
#http://blog.163.com/liuhonggaono1@126/blog/static/10497901201210254622141/
|
||||
|
||||
|
||||
#TODO:the dependences are not complete!
|
||||
|
||||
#TODO: parallel -pthread
|
||||
|
||||
#TODO: judge and decide using which program
|
||||
#CC=$(shell which clang 2>/dev/null || which gcc)
|
||||
#ccache, readline, gcov lcov
|
||||
#http://blog.csdn.net/u012421852/article/details/52138960
|
||||
#
|
||||
# How to speed up the compilation
|
||||
# https://blog.csdn.net/a_little_a_day/article/details/78251928
|
||||
# use make -j4, if error then use make utilizing only one thread
|
||||
#use -j8 or higher may cause error
|
||||
#http://blog.csdn.net/cscrazybing/article/details/50789482
|
||||
#http://blog.163.com/liuhonggaono1@126/blog/static/10497901201210254622141/
|
||||
|
||||
#compile parameters
|
||||
|
||||
|
@ -44,11 +40,11 @@ CC = g++
|
|||
#NOTICE: -O2 is recommended, while -O3(add loop-unroll and inline-function) is dangerous
|
||||
#when developing, not use -O because it will disturb the normal
|
||||
#routine. use it for test and release.
|
||||
#CFLAGS = -c -Wall -O2 -pthread -std=c++11
|
||||
#XEFLAG = -O2 -pthread -std=c++11
|
||||
CFLAGS = -c -Wall -O2 -pthread -std=c++11
|
||||
EXEFLAG = -O2 -pthread -std=c++11
|
||||
#-coverage
|
||||
CFLAGS = -c -Wall -pthread -g -std=c++11
|
||||
EXEFLAG = -pthread -g -std=c++11
|
||||
#CFLAGS = -c -Wall -pthread -g -std=c++11 -pg
|
||||
#EXEFLAG = -pthread -g -std=c++11 -pg
|
||||
|
||||
#add -lreadline [-ltermcap] if using readline or objs contain readline
|
||||
library = -lreadline -L./lib -L/usr/local/lib -lantlr -lgcov -lboost_thread -lboost_filesystem -lboost_system -lboost_regex -lpthread -I/usr/local/include/boost -lcurl
|
||||
|
@ -63,6 +59,8 @@ objdir = .objs/
|
|||
|
||||
exedir = bin/
|
||||
|
||||
testdir = scripts/
|
||||
|
||||
lib_antlr = lib/libantlr.a
|
||||
|
||||
api_cpp = api/socket/cpp/lib/libgstoreconnector.a
|
||||
|
@ -115,12 +113,11 @@ inc = -I./tools/libantlr3c-3.4/ -I./tools/libantlr3c-3.4/include
|
|||
|
||||
#gtest
|
||||
|
||||
TARGET = $(exedir)gbuild $(exedir)gserver $(exedir)gserver_backup_scheduler $(exedir)gclient $(exedir)gquery $(exedir)gconsole $(api_java) $(exedir)gadd $(exedir)gsub $(exedir)ghttp $(exedir)gmonitor $(exedir)gshow $(exedir)shutdown $(exedir)ginit
|
||||
TARGET = $(exedir)gbuild $(exedir)gserver $(exedir)gserver_backup_scheduler $(exedir)gclient $(exedir)gquery $(exedir)gconsole $(api_java) $(exedir)gadd $(exedir)gsub $(exedir)ghttp $(exedir)gmonitor $(exedir)gshow $(exedir)shutdown $(exedir)ginit $(exedir)update_test
|
||||
|
||||
all: $(TARGET)
|
||||
bash scripts/test.sh
|
||||
test_index: test_index.cpp
|
||||
$(CC) $(EXEFLAG) -o test_index test_index.cpp $(objfile) $(library) $(openmp)
|
||||
@echo "Compilation ends successfully!"
|
||||
@bash scripts/init.sh
|
||||
|
||||
#BETTER: use for loop to reduce the lines
|
||||
#NOTICE: g++ -MM will run error if linking failed, like Database.h/../SparlParser.h/../antlr3.h
|
||||
|
@ -132,8 +129,8 @@ test_index: test_index.cpp
|
|||
$(exedir)ginit: $(lib_antlr) $(objdir)ginit.o $(objfile)
|
||||
$(CC) $(EXEFLAG) -o $(exedir)ginit $(objdir)ginit.o $(objfile) $(library) $(openmp)
|
||||
|
||||
$(exedir)shutdown: $(lib_antlr) $(objdir)shutdown.o $(objfile)
|
||||
$(CC) $(EXEFLAG) -o $(exedir)shutdown $(objdir)shutdown.o $(objfile) $(library) $(openmp) -L./api/http/cpp/lib -lclient
|
||||
$(exedir)shutdown: $(lib_antlr) $(objdir)shutdown.o $(objfile) $(api_cpp)
|
||||
$(CC) $(EXEFLAG) -o $(exedir)shutdown $(objdir)shutdown.o $(objfile) $(openmp) -L./api/http/cpp/lib -lclient $(library)
|
||||
|
||||
$(exedir)gmonitor: $(lib_antlr) $(objdir)gmonitor.o $(objfile)
|
||||
$(CC) $(EXEFLAG) -o $(exedir)gmonitor $(objdir)gmonitor.o $(objfile) $(library) $(openmp)
|
||||
|
@ -162,22 +159,23 @@ $(exedir)gconsole: $(lib_antlr) $(objdir)gconsole.o $(objfile) $(api_cpp)
|
|||
$(exedir)ghttp: $(lib_antlr) $(objdir)ghttp.o ./Server/server_http.hpp ./Server/client_http.hpp $(objfile)
|
||||
$(CC) $(EXEFLAG) -o $(exedir)ghttp $(objdir)ghttp.o $(objfile) $(library) $(inc) -DUSE_BOOST_REGEX $(openmp)
|
||||
|
||||
|
||||
$(exedir)update_test: $(lib_antlr) $(objdir)update_test.o $(objfile)
|
||||
$(CC) $(EXEFLAG) -o $(exedir)update_test $(objdir)update_test.o $(objfile) $(library) $(openmp)
|
||||
#executables end
|
||||
|
||||
|
||||
#objects in Main/ begin
|
||||
|
||||
$(objdir)ginit.o: Main/ginit.cpp $(lib_antlr)
|
||||
$(objdir)ginit.o: Main/ginit.cpp Database/Database.h Util/Util.h $(lib_antlr)
|
||||
$(CC) $(CFLAGS) Main/ginit.cpp $(inc) -o $(objdir)ginit.o $(openmp)
|
||||
|
||||
$(objdir)shutdown.o: Main/shutdown.cpp $(lib_antlr)
|
||||
$(objdir)shutdown.o: Main/shutdown.cpp Database/Database.h Util/Util.h $(lib_antlr)
|
||||
$(CC) $(CFLAGS) Main/shutdown.cpp $(inc) -o $(objdir)shutdown.o $(openmp)
|
||||
|
||||
$(objdir)gmonitor.o: Main/gmonitor.cpp $(lib_antlr)
|
||||
$(objdir)gmonitor.o: Main/gmonitor.cpp Database/Database.h Util/Util.h $(lib_antlr)
|
||||
$(CC) $(CFLAGS) Main/gmonitor.cpp $(inc) -o $(objdir)gmonitor.o $(openmp)
|
||||
|
||||
$(objdir)gshow.o: Main/gshow.cpp $(lib_antlr)
|
||||
$(objdir)gshow.o: Main/gshow.cpp Database/Database.h Util/Util.h $(lib_antlr)
|
||||
$(CC) $(CFLAGS) Main/gshow.cpp $(inc) -o $(objdir)gshow.o $(openmp)
|
||||
|
||||
$(objdir)gbuild.o: Main/gbuild.cpp Database/Database.h Util/Util.h $(lib_antlr)
|
||||
|
@ -204,6 +202,11 @@ $(objdir)ghttp.o: Main/ghttp.cpp Server/server_http.hpp Server/client_http.hpp D
|
|||
|
||||
#objects in Main/ end
|
||||
|
||||
#objects in scripts/ begin
|
||||
$(objdir)update_test.o: scripts/update_test.cpp Database/Database.h Util/Util.h $(lib_antlr)
|
||||
$(CC) $(CFLAGS) scripts/update_test.cpp $(inc) -o $(objdir)update_test.o $(openmp)
|
||||
#objects in scripts/ end
|
||||
|
||||
|
||||
#objects in kvstore/ begin
|
||||
|
||||
|
@ -484,16 +487,24 @@ $(objdir)Client.o: Server/Client.cpp Server/Client.h $(objdir)Socket.o $(objdir)
|
|||
#objects in Server/ end
|
||||
|
||||
|
||||
$(lib_antlr):
|
||||
pre:
|
||||
rm -rf tools/libantlr3c-3.4/
|
||||
cd tools; tar -xzvf libantlr3c-3.4.tar.gz;
|
||||
cd tools; cd libantlr3c-3.4/; ./configure -enable-64bit; make;
|
||||
rm -rf lib/libantlr.a
|
||||
ar -crv lib/libantlr.a tools/libantlr3c-3.4/*.o
|
||||
#NOTICE: update the sparql.tar.gz if Sparql* in Parser are changed manually
|
||||
##NOTICE: update the sparql.tar.gz if Sparql* in Parser are changed manually
|
||||
rm -rf Parser/Sparql*
|
||||
cd tools; tar -xzvf sparql.tar.gz; mv Sparql* ../Parser/;
|
||||
|
||||
# DEBUG: below not works properly
|
||||
#Parser/SparqlLexer.c Parser/SparqlLexer.h Parser/SparqlParser.h Parser/SparqlParser.c: unpack_sparql
|
||||
#.INTERMEDIATE: unpack_sparql
|
||||
#unpack_sparql: tools/sparql.tar.gz
|
||||
##NOTICE: update the sparql.tar.gz if Sparql* in Parser are changed manually
|
||||
#rm -rf Parser/Sparql*
|
||||
#cd tools; tar -xzvf sparql.tar.gz; mv Sparql* ../Parser/;
|
||||
|
||||
$(api_cpp): $(objdir)Socket.o
|
||||
$(MAKE) -C api/socket/cpp/src
|
||||
$(MAKE) -C api/http/cpp/
|
||||
|
@ -503,10 +514,16 @@ $(api_java):
|
|||
$(MAKE) -C api/socket/java/src
|
||||
$(MAKE) -C api/http/java/src
|
||||
|
||||
.PHONY: clean dist tarball api_example gtest sumlines
|
||||
.PHONY: clean dist tarball api_example gtest sumlines contribution test
|
||||
|
||||
test: $(TARGET)
|
||||
@echo "basic build/query/add/sub test"
|
||||
@bash scripts/basic_test.sh
|
||||
@echo "repeatedly insertion/deletion test"
|
||||
@bin/update_test > /dev/null
|
||||
|
||||
clean:
|
||||
rm -rf lib/libantlr.a
|
||||
#rm -rf lib/libantlr.a
|
||||
$(MAKE) -C api/socket/cpp/src clean
|
||||
$(MAKE) -C api/socket/cpp/example clean
|
||||
$(MAKE) -C api/socket/java/src clean
|
||||
|
@ -516,22 +533,24 @@ clean:
|
|||
$(MAKE) -C api/http/java/src clean
|
||||
$(MAKE) -C api/http/java/example clean
|
||||
#$(MAKE) -C KVstore clean
|
||||
rm -rf $(exedir)g* $(objdir)*.o $(exedir).gserver*
|
||||
rm -rf $(exedir)g* $(objdir)*.o $(exedir).gserver* $(exedir)shutdown $(exedir).gconsole*
|
||||
rm -rf bin/*.class
|
||||
rm -rf bin/update_test
|
||||
#rm -rf .project .cproject .settings just for eclipse
|
||||
#rm -rf cscope* just for vim
|
||||
rm -rf logs/*.log
|
||||
rm -rf *.out # gmon.out for gprof with -pg
|
||||
|
||||
dist: clean
|
||||
rm -rf *.nt *.n3 .debug/*.log .tmp/*.dat *.txt *.db
|
||||
rm -rf tools/libantlr3c-3.4 lib/libantlr.a Parser/Sparql*
|
||||
#rm -rf Parser/SparqlLexer* Parser/SparlParser.cpp
|
||||
rm -rf cscope* .cproject .settings tags
|
||||
rm -rf *.info
|
||||
rm -rf backups/*.db
|
||||
|
||||
tarball:
|
||||
tar -czvf devGstore.tar.gz api bin lib tools .debug .tmp .objs scripts garbage docs data makefile \
|
||||
Main Database KVstore Util Query Signature VSTree Parser Server README.md init.conf NOTES.md StringIndex COVERAGE
|
||||
tar -czvf gstore.tar.gz api backups bin lib tools .debug .tmp .objs scripts garbage docs data logs \
|
||||
Main Database KVstore Util Query Signature VSTree Parser Server README.md init.conf NOTES.md StringIndex COVERAGE \
|
||||
Dockerfile LICENSE makefile Trie
|
||||
|
||||
APIexample: $(api_cpp) $(api_java)
|
||||
$(MAKE) -C api/socket/cpp/example
|
||||
|
@ -564,7 +583,7 @@ $(objdir)gsub.o: Main/gsub.cpp
|
|||
$(CC) $(CFLAGS) Main/gsub.cpp $(inc) -o $(objdir)gsub.o $(openmp)
|
||||
|
||||
sumlines:
|
||||
bash scripts/sumline.sh
|
||||
@bash scripts/sumline.sh
|
||||
|
||||
tag:
|
||||
ctags -R
|
||||
|
@ -587,3 +606,8 @@ fulltest:
|
|||
test-kvstore:
|
||||
# test/kvstore_test.cpp
|
||||
echo "TODO"
|
||||
|
||||
# https://segmentfault.com/a/1190000008542123
|
||||
contribution:
|
||||
bash scripts/contribution.sh
|
||||
|
||||
|
|
|
@ -2,6 +2,10 @@
|
|||
|
||||
#set -v
|
||||
|
||||
#initialize system.db
|
||||
"bin/ginit" "--make" >& /dev/null
|
||||
|
||||
#test
|
||||
db=("bbug" "lubm" "num" "small")
|
||||
op=("bin/gbuild" "bin/gquery" "bin/gadd" "bin/gsub")
|
||||
path="./data/"
|
||||
|
@ -141,6 +145,3 @@ then
|
|||
else
|
||||
echo "Test failed!"
|
||||
fi
|
||||
|
||||
"bin/ginit" "--make"
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
# NOTICE: we adopts the scoring strategy that the lines added and subtracted by one should be accumulated as his contribution
|
||||
git log --format='%aN' | sort -u | while read name; do echo -en "$name\t"; git log --author="$name" --pretty=tformat: --numstat | awk '{ add += $1; subs += $2; loc += $1 + $2 } END { printf "added lines: %s, removed lines: %s, total lines: %s\n", add, subs, loc }' -; done
|
||||
#git log --format='%aN' | sort -u | while read name; do echo -en "$name\t"; git log --author="$name" --pretty=tformat: --numstat | awk '{ add += $1; subs += $2; loc += $1 - $2 } END { printf "added lines: %s, removed lines: %s, total lines: %s\n", add, subs, loc }' -; done
|
|
@ -0,0 +1,7 @@
|
|||
#!/bin/bash
|
||||
|
||||
#set -v
|
||||
|
||||
#initialize system.db
|
||||
"bin/ginit" "--make" >& /dev/null
|
||||
|
|
@ -35,7 +35,7 @@ cd boost_1_54_0
|
|||
# by default: /usr/local/include and /usr/local/lib
|
||||
./bootstrap.sh
|
||||
./b2
|
||||
sudo ./b2 install
|
||||
./b2 install
|
||||
echo "/usr/local/lib" >> /etc/ld.so.conf
|
||||
cd ../..
|
||||
rm -rf boost
|
||||
|
|
|
@ -0,0 +1,196 @@
|
|||
/*=============================================================================
|
||||
# Filename: update_test.cpp
|
||||
# Author: suxunbin
|
||||
# Mail: suxunbin@pku.edu.cn
|
||||
# Last Modified: 2018-10-25 21:25
|
||||
# Description: used to test the correctness of update triples
|
||||
=============================================================================*/
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Database/Database.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
//triple information
|
||||
class triple{
|
||||
public:
|
||||
string subject;
|
||||
string predicate;
|
||||
string object;
|
||||
|
||||
triple(int s, int p, int o){
|
||||
subject = "<s" + Util::int2string(s) + ">";
|
||||
predicate = "<p" + Util::int2string(p) + ">";
|
||||
object = "<o" + Util::int2string(o) + ">";
|
||||
}
|
||||
triple(string s, string p, string o){
|
||||
subject = s;
|
||||
predicate = p;
|
||||
object = o;
|
||||
}
|
||||
inline bool operator<(const triple& t) const
|
||||
{
|
||||
if (this->subject < t.subject)
|
||||
return true;
|
||||
else if (this->subject > t.subject)
|
||||
return false;
|
||||
else
|
||||
{
|
||||
if (this->predicate < t.predicate)
|
||||
return true;
|
||||
else if (this->predicate > t.predicate)
|
||||
return false;
|
||||
else
|
||||
{
|
||||
if (this->object < t.object)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
inline bool operator==(const triple& t) const
|
||||
{
|
||||
if (this->subject != t.subject)
|
||||
return false;
|
||||
else
|
||||
{
|
||||
if (this->predicate != t.predicate)
|
||||
return false;
|
||||
else
|
||||
{
|
||||
if (this->object != t.object)
|
||||
return false;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
std::set<triple> update_triples;
|
||||
std::set<triple> db_triples;
|
||||
Database* db;
|
||||
|
||||
int main(int argc, char * argv[])
|
||||
{
|
||||
//build update_test.db
|
||||
Util util;
|
||||
int test_group_num = 10000;
|
||||
if(argc > 1)
|
||||
{
|
||||
test_group_num = atoi(argv[1]);
|
||||
}
|
||||
int test_group_size = 5;
|
||||
int test_value_region = 10;
|
||||
|
||||
string db_name = "update_test";
|
||||
string db_path = "data/update_test.nt";
|
||||
db = new Database(db_name);
|
||||
bool flag = db->build(db_path);
|
||||
if (flag)
|
||||
{
|
||||
cerr << "update_test.db is built done." << endl;
|
||||
ofstream f;
|
||||
f.open("./" + db_name + ".db/success.txt");
|
||||
f.close();
|
||||
}
|
||||
else //if fails, drop update_test.db and return
|
||||
{
|
||||
cerr << "update_test.db is built failed." << endl;
|
||||
string cmd = "rm -r " + db_name + ".db";
|
||||
system(cmd.c_str());
|
||||
delete db;
|
||||
db = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
//load update_test.db
|
||||
delete db;
|
||||
db = new Database(db_name);
|
||||
db->load();
|
||||
|
||||
//update triples test
|
||||
srand((unsigned)time(NULL));
|
||||
update_triples.clear();
|
||||
triple temp(0, 0, 0);
|
||||
update_triples.insert(temp);
|
||||
for (int i = 0; i < test_group_num; i++)
|
||||
{
|
||||
int a = rand() % test_group_size + 1;
|
||||
int b = rand() % test_group_size + 1;
|
||||
for (int j = 0; j < a; j++)
|
||||
{
|
||||
int s = rand() % test_value_region;
|
||||
int p = rand() % test_value_region;
|
||||
int o = rand() % test_value_region;
|
||||
triple t(s, p, o);
|
||||
update_triples.insert(t);
|
||||
string query = "INSERT DATA{" + t.subject + " " + t.predicate + " " + t.object + ".}";
|
||||
ResultSet _rs;
|
||||
FILE* ofp = stdout;
|
||||
int ret = db->query(query, _rs, ofp);
|
||||
}
|
||||
for (int j = 0; j < b; j++)
|
||||
{
|
||||
int s = rand() % test_value_region;
|
||||
int p = rand() % test_value_region;
|
||||
int o = rand() % test_value_region;
|
||||
triple t(s, p, o);
|
||||
std::set<triple>::iterator it = update_triples.find(t);
|
||||
if (it != update_triples.end())
|
||||
update_triples.erase(it);
|
||||
string query = "DELETE DATA{" + t.subject + " " + t.predicate + " " + t.object + ".}";
|
||||
ResultSet _rs;
|
||||
FILE* ofp = stdout;
|
||||
int ret = db->query(query, _rs, ofp);
|
||||
}
|
||||
db_triples.clear();
|
||||
string query = "select ?s ?p ?o where{?s ?p ?o.}";
|
||||
ResultSet _rs;
|
||||
FILE* ofp = NULL;
|
||||
int ret = db->query(query, _rs, ofp);
|
||||
for (int i = 0; i < _rs.ansNum; i++)
|
||||
{
|
||||
string s = _rs.answer[i][0];
|
||||
string p = _rs.answer[i][1];
|
||||
string o = _rs.answer[i][2];
|
||||
triple t(s, p, o);
|
||||
db_triples.insert(t);
|
||||
}
|
||||
if (update_triples.size() != db_triples.size())
|
||||
{
|
||||
cerr << "Update triples exist errors." << endl;
|
||||
delete db;
|
||||
db = NULL;
|
||||
string cmd = "rm -r " + db_name + ".db";
|
||||
system(cmd.c_str());
|
||||
return 0;
|
||||
}
|
||||
std::set<triple>::iterator it1;
|
||||
std::set<triple>::iterator it2;
|
||||
for (it1 = update_triples.begin(), it2 = db_triples.begin(); it1 != update_triples.end(); it1++, it2++)
|
||||
{
|
||||
if (*it1 == *it2)
|
||||
continue;
|
||||
else
|
||||
{
|
||||
cerr << "Update triples exist errors." << endl;
|
||||
delete db;
|
||||
db = NULL;
|
||||
string cmd = "rm -r " + db_name + ".db";
|
||||
system(cmd.c_str());
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delete db;
|
||||
db = NULL;
|
||||
string cmd = "rm -r " + db_name + ".db";
|
||||
system(cmd.c_str());
|
||||
cerr << "Test passed!" << endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue