Merge branch 'dev' into ywang

This commit is contained in:
ywang1111 2018-10-03 23:50:05 +08:00
commit 6e8f35629e
163 changed files with 1989 additions and 6121 deletions

View File

@ -2,7 +2,7 @@
# Filename: Database.cpp
# Author: Bookug Lobert
# Mail: 1181955272@qq.com
# Last Modified: 2016-09-11 15:27
# Last Modified: 2018-09-05 14:20
# Description: originally written by liyouhuan, modified by zengli and chenjiaqi
=============================================================================*/
@ -576,13 +576,15 @@ Database::setPreMap()
void
Database::setStringBuffer()
{
//TODO: assign according to memory manager
//BETTER: assign according to memory manager
//BETTER?maybe different size for entity and literal, maybe different offset should be used
this->entity_buffer_size = (this->limitID_entity<50000000) ? this->limitID_entity : 50000000;
this->literal_buffer_size = (this->limitID_literal<50000000) ? this->limitID_literal : 50000000;
this->entity_buffer = new Buffer(this->entity_buffer_size);
this->literal_buffer = new Buffer(this->literal_buffer_size);
//DEBUG: insert/delete we should update the size of buffer if adding new string
//WARN: after delete and insert, IDs may be not continuous, then the string buffer will cause errors!
TYPE_ENTITY_LITERAL_ID valid = 0, i;
string str;
for (i = 0; i < this->entity_buffer_size; ++i)
@ -700,13 +702,15 @@ Database::load()
id2literal_thread.join();
#endif
//TODO+BETTER: if we set string buffer using string index instead of B+Tree, then we can
//BETTER: if we set string buffer using string index instead of B+Tree, then we can
//avoid to load id2entity and id2literal in ONLY_READ mode
//generate the string buffer for entity and literal, no need for predicate
//NOTICE:the total string size should not exceed 20G, assume that most strings length < 500
//too many empty between entity and literal, so divide them
this->setStringBuffer();
//this->setStringBuffer();
//NOTICE: we should build string buffer from kvstore, not string index
//Because when searching in string index, it will first check if in buffer(but the buffer is being built)
@ -726,7 +730,7 @@ Database::load()
//vstree_thread.join();
#endif
//load cache of sub2values and obj2values
//this->load_cache();
this->load_cache();
//warm up always as finishing build(), to utilize the system buffer
//this->warmUp();
@ -748,7 +752,7 @@ Database::load()
cout << "finish load" << endl;
//TODO: for only-read application(like endpoint), 3 id2values trees can be closed now
//BETTER: for only-read application(like endpoint), 3 id2values trees can be closed now
//and we should load all trees on only READ mode
//HELP: just for checking infos(like kvstore)
@ -767,7 +771,7 @@ Database::load_cache()
{
// get important pre ID
// a pre whose degree is more than 50% of max pre degree is important pre
/* cout << "get important pre ID" << endl;
cout << "get important pre ID" << endl;
this->get_important_preID();
cout << "total preID num is " << pre_num << endl;
cout << "important pre ID is: ";
@ -776,7 +780,7 @@ Database::load_cache()
cout << endl;
this->load_candidate_pre2values();
this->load_important_sub2values();
this->load_important_obj2values();*/
this->load_important_obj2values();
}
void
@ -843,7 +847,7 @@ Database::get_candidate_preID()
_size = this->kvstore->getPreListSize(i);
if (!VList::isLongList(_size)) continue; // only long list need to be stored in cache
if (!VList::isLongList(_size) || _size >= max_total_size) continue; // only long list need to be stored in cache
_value = pre2num[i];
if (_value == 0) continue;
@ -888,56 +892,34 @@ Database::get_candidate_preID()
void
Database::build_CacheOfPre2values()
{
/* cout << "now add cache of preID2values..." << endl;
priority_queue <KEY_SIZE_VALUE, vector<KEY_SIZE_VALUE>, CmpByMod<2000> > temp_queue;
cout << "now add cache of preID2values..." << endl;
while (!candidate_preID.empty())
{
temp_queue.push(candidate_preID.top());
this->kvstore->AddIntoPreCache(candidate_preID.top().key);
candidate_preID.pop();
}
while (!temp_queue.empty())
{
//cout << "add key " << important_objID.top().key << " size: " << important_objID.top().size << endl;
this->kvstore->AddIntoPreCache(temp_queue.top().key);
temp_queue.pop();
}*/
}
void
Database::build_CacheOfObj2values()
{
/* cout << "now add cache of objID2values..." << endl;
// sort key according to their mod by 2000
priority_queue <KEY_SIZE_VALUE, vector<KEY_SIZE_VALUE>, CmpByMod<2000> > temp_queue;
cout << "now add cache of objID2values..." << endl;
while (!important_objID.empty())
{
temp_queue.push(important_objID.top());
this->kvstore->AddIntoObjCache(important_objID.top().key);
important_objID.pop();
}
while (!temp_queue.empty())
{
//cout << "add key " << important_objID.top().key << " size: " << important_objID.top().size << endl;
this->kvstore->AddIntoObjCache(temp_queue.top().key);
temp_queue.pop();
}*/
}
void
Database::build_CacheOfSub2values()
{
/* cout << "now add cache of subID2values..." << endl;
priority_queue <KEY_SIZE_VALUE, vector<KEY_SIZE_VALUE>, CmpByMod<2000> > temp_queue;
cout << "now add cache of subID2values..." << endl;
while (!important_subID.empty())
{
temp_queue.push(important_subID.top());
this->kvstore->AddIntoSubCache(important_subID.top().key);
important_subID.pop();
}
while (!temp_queue.empty())
{
//cout << "add key " << important_objID.top().key << " size: " << important_objID.top().size << endl;
this->kvstore->AddIntoSubCache(temp_queue.top().key);
temp_queue.pop();
}*/
}
void
@ -956,7 +938,7 @@ Database::get_important_subID()
unsigned _size = 0;
if (this->kvstore->getEntityByID(i) == invalid) continue;
_size = this->kvstore->getSubListSize(i);
if (!VList::isLongList(_size)) continue; // only long list need to be stored in cache
if (!VList::isLongList(_size) || _size >= max_total_size) continue; // only long list need to be stored in cache
for(unsigned j = 0; j < important_preID.size(); ++j)
{
@ -1019,7 +1001,7 @@ Database::get_important_objID()
if (_tmp == invalid) continue;
_size = this->kvstore->getObjListSize(i);
if (!VList::isLongList(_size)) continue; // only long list need to be stored in cache
if (!VList::isLongList(_size) || _size >= max_total_size) continue; // only long list need to be stored in cache
for(unsigned j = 0; j < important_preID.size(); ++j)
{
@ -1247,7 +1229,6 @@ Database::unload()
delete this->literal_buffer;
this->literal_buffer = NULL;
//TODO: fflush the database file
//this->vstree->saveTree();
//delete this->vstree;
//this->vstree = NULL;
@ -1283,10 +1264,7 @@ bool Database::save()
this->saveDBInfoFile();
this->saveIDinfo();
//TODO: fsync or using sync in Util
//should sync every file modified
//TODO: add flush for string index
//this->stringindex->flush();
this->stringindex->flush();
this->clear_update_log();
cerr<<"database checkpoint: "<<this->getName()<<endl;
@ -1347,6 +1325,60 @@ Database::getPreNum()
return this->pre_num;
}
VSTree*
Database::getVSTree()
{
return this->vstree;
}
KVstore*
Database::getKVstore()
{
return this->kvstore;
}
StringIndex*
Database::getStringIndex()
{
return this->stringindex;
}
QueryCache*
Database::getQueryCache()
{
return this->query_cache;
}
TYPE_TRIPLE_NUM*
Database::getpre2num()
{
return this->pre2num;
}
TYPE_ENTITY_LITERAL_ID&
Database::getlimitID_literal()
{
return this->limitID_literal;
}
TYPE_ENTITY_LITERAL_ID&
Database::getlimitID_entity()
{
return this->limitID_entity;
}
TYPE_PREDICATE_ID&
Database::getlimitID_predicate()
{
return this->limitID_predicate;
}
mutex&
Database::get_query_parse_lock()
{
return this->query_parse_lock;
}
int
Database::query(const string _query, ResultSet& _result_set, FILE* _fp)
{
@ -1381,23 +1413,24 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp)
{
return -101;
}
cout<<"read lock acquired"<<endl;
cout<<"read priviledge of update lock acquired"<<endl;
//copy the string index for each query thread
StringIndex tmpsi = *this->stringindex;
tmpsi.emptyBuffer();
general_evaluation.setStringIndexPointer(&tmpsi);
//StringIndex tmpsi = *this->stringindex;
//tmpsi.emptyBuffer();
//general_evaluation.setStringIndexPointer(&tmpsi);
//TODO: withdraw this lock, and allow for multiple doQuery() to run in parallism
//we need to add lock in QueryCache's operations
this->debug_lock.lock();
// this->debug_lock.lock();
bool query_ret = general_evaluation.doQuery();
if(!query_ret)
{
success_num = -101;
}
this->debug_lock.unlock();
// this->debug_lock.unlock();
long tv_bfget = Util::get_cur_time();
//NOTICE: this lock lock ensures that StringIndex is visited sequentially
this->getFinalResult_lock.lock();
if (trie == NULL)
{
trie = new Trie;
@ -1408,9 +1441,8 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp)
}
trie->LoadDictionary();
}
long tv_bfget = Util::get_cur_time();
general_evaluation.getFinalResult(_result_set);
this->getFinalResult_lock.unlock();
long tv_afget = Util::get_cur_time();
cout << "after getFinalResult, used " << (tv_afget - tv_bfget) << "ms." << endl;
@ -1418,7 +1450,7 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp)
need_output_answer = true;
//general_evaluation.setNeedOutputAnswer();
tmpsi.clear();
//tmpsi.clear();
pthread_rwlock_unlock(&(this->update_lock));
}
//Update
@ -1434,6 +1466,7 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp)
cout<<"unable to write lock"<<endl;
return -101;
}
cout<<"write priviledge of update lock acquired"<<endl;
success_num = 0;
TripleWithObjType *update_triple = NULL;
@ -1499,11 +1532,19 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp)
if (general_evaluation.getQueryTree().getUpdateType() == QueryTree::Delete_Where || general_evaluation.getQueryTree().getUpdateType() == QueryTree::Delete_Clause || general_evaluation.getQueryTree().getUpdateType() == QueryTree::Modify_Clause)
{
general_evaluation.prepareUpdateTriple(general_evaluation.getQueryTree().getDeletePatterns(), update_triple, update_triple_num);
for(int i = 0; i < update_triple_num; i++)
{
update_triple[i] = trie->Compress(update_triple[i], Trie::QUERYMODE);
}
success_num = remove(update_triple, update_triple_num);
}
if (general_evaluation.getQueryTree().getUpdateType() == QueryTree::Insert_Clause || general_evaluation.getQueryTree().getUpdateType() == QueryTree::Modify_Clause)
{
general_evaluation.prepareUpdateTriple(general_evaluation.getQueryTree().getInsertPatterns(), update_triple, update_triple_num);
for(int i = 0; i < update_triple_num; i++)
{
update_triple[i] = trie->Compress(update_triple[i], Trie::QUERYMODE);
}
success_num = insert(update_triple, update_triple_num);
}
}
@ -1511,8 +1552,12 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp)
general_evaluation.releaseResult();
delete[] update_triple;
this->query_cache->clear();
cout<<"QueryCache cleared"<<endl;
//NOTICE: maybe no updates are really done!
if(success_num > 0)
{
this->query_cache->clear();
cout<<"QueryCache cleared"<<endl;
}
pthread_rwlock_unlock(&(this->update_lock));
}
@ -1626,6 +1671,12 @@ Database::build(const string& _rdf_file)
cout << "after build, used " << (tv_build_end - tv_build_begin) << "ms." << endl;
cout << "finish build VS-Tree." << endl;
cout << "finish sub2id pre2id obj2id" << endl;
cout << "tripleNum is " << this->triples_num << endl;
cout << "entityNum is " << this->entity_num << endl;
cout << "preNum is " << this->pre_num << endl;
cout << "literalNum is " << this->literal_num << endl;
//this->vstree->saveTree();
//delete this->vstree;
//this->vstree = NULL;
@ -2076,6 +2127,19 @@ Database::build_s2xx(ID_TUPLE* _p_id_tuples)
__gnu_parallel::sort(_p_id_tuples, _p_id_tuples + this->triples_num, Util::spo_cmp_idtuple);
#endif
//qsort(_p_id_tuples, this->triples_num, sizeof(int*), Util::_spo_cmp);
//remove duplicates from the id tables
int j = 1;
for(int i = 1; i < this->triples_num; ++i)
{
if(!Util::equal(_p_id_tuples[i], _p_id_tuples[i-1]))
{
_p_id_tuples[j] = _p_id_tuples[i];
++j;
}
}
this->triples_num = j;
this->kvstore->build_subID2values(_p_id_tuples, this->triples_num, this->entity_num);
//save all entity_signature into binary file
@ -2608,12 +2672,6 @@ Database::sub2id_pre2id_obj2id_RDFintoSignature(const string _rdf_file)
//delete _entity_bitset[i];
//}
//delete[] _entity_bitset;
cout << "finish sub2id pre2id obj2id" << endl;
cout << "tripleNum is " << this->triples_num << endl;
cout << "entityNum is " << this->entity_num << endl;
cout << "preNum is " << this->pre_num << endl;
cout << "literalNum is " << this->literal_num << endl;
//{
//stringstream _ss;
@ -2666,10 +2724,10 @@ Database::insertTriple(const TripleWithObjType& _triple, vector<unsigned>* _vert
(this->kvstore)->setEntityByID(_sub_id, _triple.subject);
//update the string buffer
if (_sub_id < this->entity_buffer_size)
{
this->entity_buffer->set(_sub_id, _triple.subject);
}
//if (_sub_id < this->entity_buffer_size)
//{
//this->entity_buffer->set(_sub_id, _triple.subject);
//}
if (_vertices != NULL)
_vertices->push_back(_sub_id);
@ -2710,10 +2768,10 @@ Database::insertTriple(const TripleWithObjType& _triple, vector<unsigned>* _vert
(this->kvstore)->setEntityByID(_obj_id, _triple.object);
//update the string buffer
if (_obj_id < this->entity_buffer_size)
{
this->entity_buffer->set(_obj_id, _triple.object);
}
//if (_obj_id < this->entity_buffer_size)
//{
//this->entity_buffer->set(_obj_id, _triple.object);
//}
if (_vertices != NULL)
_vertices->push_back(_obj_id);
@ -2733,11 +2791,11 @@ Database::insertTriple(const TripleWithObjType& _triple, vector<unsigned>* _vert
(this->kvstore)->setLiteralByID(_obj_id, _triple.object);
//update the string buffer
TYPE_ENTITY_LITERAL_ID tid = _obj_id - Util::LITERAL_FIRST_ID;
if (tid < this->literal_buffer_size)
{
this->literal_buffer->set(tid, _triple.object);
}
//TYPE_ENTITY_LITERAL_ID tid = _obj_id - Util::LITERAL_FIRST_ID;
//if (tid < this->literal_buffer_size)
//{
//this->literal_buffer->set(tid, _triple.object);
//}
if (_vertices != NULL)
_vertices->push_back(_obj_id);
@ -2850,10 +2908,10 @@ Database::removeTriple(const TripleWithObjType& _triple, vector<unsigned>* _vert
this->freeEntityID(_sub_id);
this->sub_num--;
//update the string buffer
if (_sub_id < this->entity_buffer_size)
{
this->entity_buffer->del(_sub_id);
}
//if (_sub_id < this->entity_buffer_size)
//{
//this->entity_buffer->del(_sub_id);
//}
if (_vertices != NULL)
_vertices->push_back(_sub_id);
}
@ -2872,10 +2930,10 @@ Database::removeTriple(const TripleWithObjType& _triple, vector<unsigned>* _vert
this->kvstore->subIDByEntity(_triple.object);
this->freeEntityID(_obj_id);
//update the string buffer
if (_obj_id < this->entity_buffer_size)
{
this->entity_buffer->del(_obj_id);
}
//if (_obj_id < this->entity_buffer_size)
//{
//this->entity_buffer->del(_obj_id);
//}
if (_vertices != NULL)
_vertices->push_back(_obj_id);
}
@ -2889,11 +2947,11 @@ Database::removeTriple(const TripleWithObjType& _triple, vector<unsigned>* _vert
this->kvstore->subIDByLiteral(_triple.object);
this->freeLiteralID(_obj_id);
//update the string buffer
TYPE_ENTITY_LITERAL_ID tid = _obj_id - Util::LITERAL_FIRST_ID;
if (tid < this->literal_buffer_size)
{
this->literal_buffer->del(tid);
}
//TYPE_ENTITY_LITERAL_ID tid = _obj_id - Util::LITERAL_FIRST_ID;
//if (tid < this->literal_buffer_size)
//{
//this->literal_buffer->del(tid);
//}
if (_vertices != NULL)
_vertices->push_back(_obj_id);
}
@ -3078,7 +3136,7 @@ Database::remove(std::string _rdf_file, bool _is_restore)
//triple_num -= parse_triple_num;
}
//TODO:better to free this just after id_tuples are ok
//BETTER: free this just after id_tuples are ok
//(only when using group insertion/deletion)
//or reduce the array size
delete[] triple_array;
@ -3171,17 +3229,19 @@ Database::insert(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num,
{
is_new_sub = true;
subid = this->allocEntityID();
#ifdef DEBUG
cout << "this is a new subject: " << sub << " " << subid << endl;
#endif
this->sub_num++;
this->kvstore->setIDByEntity(sub, subid);
this->kvstore->setEntityByID(subid, sub);
new_entity.insert(subid);
//add info and update buffer
vertices.push_back(subid);
if (subid < this->entity_buffer_size)
{
this->entity_buffer->set(subid, sub);
}
//if (subid < this->entity_buffer_size)
//{
//this->entity_buffer->set(subid, sub);
//}
}
string pre = _triples[i].getPredicate();
@ -3204,17 +3264,19 @@ Database::insert(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num,
{
is_new_obj = true;
objid = this->allocEntityID();
#ifdef DEBUG
cout << "this is a new object: " << obj << " " << objid << endl;
#endif
//this->obj_num++;
this->kvstore->setIDByEntity(obj, objid);
this->kvstore->setEntityByID(objid, obj);
new_entity.insert(objid);
//add info and update
vertices.push_back(objid);
if (objid < this->entity_buffer_size)
{
this->entity_buffer->set(objid, obj);
}
//if (objid < this->entity_buffer_size)
//{
//this->entity_buffer->set(objid, obj);
//}
}
}
else //isObjLiteral
@ -3229,11 +3291,11 @@ Database::insert(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num,
this->kvstore->setLiteralByID(objid, obj);
//add info and update
vertices.push_back(objid);
int tid = objid - Util::LITERAL_FIRST_ID;
if (tid < this->literal_buffer_size)
{
this->literal_buffer->set(tid, obj);
}
//int tid = objid - Util::LITERAL_FIRST_ID;
//if (tid < this->literal_buffer_size)
//{
//this->literal_buffer->set(tid, obj);
//}
}
}
@ -3805,10 +3867,10 @@ Database::remove(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num,
this->sub_num--;
//add info and update buffer
vertices.push_back(_sub_id);
if (_sub_id < this->entity_buffer_size)
{
this->entity_buffer->del(_sub_id);
}
//if (_sub_id < this->entity_buffer_size)
//{
//this->entity_buffer->del(_sub_id);
//}
}
else
{
@ -3895,15 +3957,15 @@ Database::remove(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num,
this->freeEntityID(_obj_id);
//add info and update buffer
vertices.push_back(_obj_id);
if (_obj_id < this->entity_buffer_size)
{
this->entity_buffer->del(_obj_id);
}
//if (_obj_id < this->entity_buffer_size)
//{
//this->entity_buffer->del(_obj_id);
//}
}
else
{
tmpset.reset();
this->calculateEntityBitSet(_obj_id, tmpset);
//this->calculateEntityBitSet(_obj_id, tmpset);
//this->vstree->replaceEntry(_obj_id, tmpset);
}
}
@ -3918,11 +3980,11 @@ Database::remove(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num,
this->freeLiteralID(_obj_id);
//add info and update buffer
vertices.push_back(_obj_id);
int tid = _obj_id - Util::LITERAL_FIRST_ID;
if (tid < this->literal_buffer_size)
{
this->literal_buffer->del(tid);
}
//int tid = _obj_id - Util::LITERAL_FIRST_ID;
//if (tid < this->literal_buffer_size)
//{
//this->literal_buffer->del(tid);
//}
}
}
}
@ -4036,7 +4098,6 @@ Database::remove(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num,
return valid_num;
}
//TODO: check and improve the backup program
bool
Database::backup()
{

View File

@ -87,6 +87,16 @@ public:
//id tuples file
string getIDTuplesFile();
VSTree* getVSTree();
KVstore* getKVstore();
StringIndex* getStringIndex();
QueryCache* getQueryCache();
TYPE_TRIPLE_NUM* getpre2num();
TYPE_ENTITY_LITERAL_ID& getlimitID_literal();
TYPE_ENTITY_LITERAL_ID& getlimitID_entity();
TYPE_PREDICATE_ID& getlimitID_predicate();
mutex& get_query_parse_lock();
private:
string name;
string store_path;
@ -107,6 +117,8 @@ private:
pthread_rwlock_t update_lock;
//just for debug a block of code
mutex debug_lock;
// for getFinalResult
mutex getFinalResult_lock;
VSTree* vstree;
KVstore* kvstore;

View File

@ -285,17 +285,20 @@ ISArray::UpdateTime(unsigned _key)
bool
ISArray::search(unsigned _key, char *&_str, unsigned &_len)
{
// this->AccessLock.lock();
// printf("%s search %d: \n", filename.c_str(), _key);
if (_key >= CurEntryNum ||!array[_key].isUsed())
{
_str = NULL;
_len = 0;
// this->AccessLock.unlock();
return false;
}
// try to read in main memory
if (array[_key].inCache())
{
UpdateTime(_key);
// this->AccessLock.unlock();
return array[_key].getBstr(_str, _len);
}
// printf(" need to read disk ");
@ -305,6 +308,7 @@ ISArray::search(unsigned _key, char *&_str, unsigned &_len)
// printf("stored in block %d, ", store);
if (!BM->ReadValue(store, _str, _len))
{
// this->AccessLock.unlock();
return false;
}
@ -314,15 +318,17 @@ ISArray::search(unsigned _key, char *&_str, unsigned &_len)
_str = debug;
// printf("str = %s, len = %d\n", _str, _len);
// this->AccessLock.unlock();
return true;
}
bool
ISArray::insert(unsigned _key, char *_str, unsigned _len)
{
// this->AccessLock.lock();
if (_key < CurEntryNum && array[_key].isUsed())
{
// this->AccessLock.unlock();
return false;
}
@ -330,6 +336,7 @@ ISArray::insert(unsigned _key, char *_str, unsigned _len)
{
cout << _key << ' ' << MAX_KEY_NUM << endl;
cout << "ISArray insert error: Key is bigger than MAX_KEY_NUM" << endl;
// this->AccessLock.unlock();
return false;
}
@ -350,6 +357,7 @@ ISArray::insert(unsigned _key, char *_str, unsigned _len)
if (newp == NULL)
{
cout << "ISArray insert error: main memory full" << endl;
// this->AccessLock.unlock();
return false;
}
else
@ -369,14 +377,17 @@ ISArray::insert(unsigned _key, char *_str, unsigned _len)
AddInCache(_key, _str, _len);
array[_key].setUsedFlag(true);
array[_key].setDirtyFlag(true);
// this->AccessLock.unlock();
return true;
}
bool
ISArray::remove(unsigned _key)
{
// this->AccessLock.lock();
if (_key >= CurEntryNum || !array[_key].isUsed())
{
// this->AccessLock.unlock();
return false;
}
@ -391,6 +402,8 @@ ISArray::remove(unsigned _key)
if (array[_key].inCache())
{
RemoveFromLRUQueue(_key);
char *str = NULL;
unsigned len = 0;
array[_key].getBstr(str, len, false);
@ -400,6 +413,7 @@ ISArray::remove(unsigned _key)
array[_key].release();
// this->AccessLock.unlock();
return true;
}
@ -407,14 +421,18 @@ ISArray::remove(unsigned _key)
bool
ISArray::modify(unsigned _key, char *_str, unsigned _len)
{
// this->AccessLock.lock();
if (_key >= CurEntryNum ||!array[_key].isUsed())
{
// this->AccessLock.unlock();
return false;
}
array[_key].setDirtyFlag(true);
if (array[_key].inCache())
{
RemoveFromLRUQueue(_key);
char* str = NULL;
unsigned len = 0;
array[_key].getBstr(str, len, false);
@ -431,8 +449,41 @@ ISArray::modify(unsigned _key, char *_str, unsigned _len)
BM->FreeBlocks(store);
AddInCache(_key, _str, _len);
// this->AccessLock.unlock();
return true;
}
void
ISArray::RemoveFromLRUQueue(unsigned _key)
{
if (!array[_key].inCache())
return;
int prevID = array[_key].getPrev();
int nextID = array[_key].getNext();
if (prevID == -1)
cache_head->setNext(nextID);
else
array[prevID].setNext(nextID);
//cout << "next ID: " << nextID << endl;
if (nextID != -1)
array[nextID].setPrev(prevID); // since array[_key] is not tail, nextp will not be NULL
else
cache_tail_id = prevID;
array[_key].setCacheFlag(false);
array[_key].setPrev(-1);
array[_key].setNext(-1);
/* UpdateTime(_key);
unsigned PrevID = array[_key].getPrev();
cache_tail_id = PrevID;
if (PrevID == -1)
cache_head->setNext(-1);
else
array[PrevID].setNext(-1);
*/
return;
}

View File

@ -46,9 +46,12 @@ private:
bool AddInCache(unsigned _key, char *_str, unsigned _len);
bool SwapOut();
bool UpdateTime(unsigned _key);
void RemoveFromLRUQueue(unsigned _key);
bool PreLoad();
mutex AccessLock;
public:
ISArray();
ISArray(string _dir_path, string _filename, string mode, unsigned long long buffer_size, unsigned _key_num = 0);

View File

@ -51,6 +51,7 @@ IVArray::IVArray(string _dir_path, string _filename, string mode, unsigned long
//index_time_map.clear();
//time_index_map.clear();
MAX_CACHE_SIZE = buffer_size;
// MAX_CACHE_SIZE = 10 * (1 << 30);
cache_head = new IVEntry;
cache_tail_id = -1;
@ -130,34 +131,6 @@ IVArray::IVArray(string _dir_path, string _filename, string mode, unsigned long
// cout << _filename << " Done." << endl;
}
bool
IVArray::PreLoad()
{
if (array == NULL)
return false;
for(unsigned i = 0; i < CurEntryNum; i++)
{
if (!array[i].isUsed())
continue;
unsigned store = array[i].getStore();
char *str = NULL;
unsigned len = 0;
if (!BM->ReadValue(store, str, len))
return false;
if (CurCacheSize + len > (MAX_CACHE_SIZE >> 1))
break;
AddInCache(i, str, len);
delete [] str;
}
return true;
}
bool
IVArray::save()
{
@ -237,33 +210,7 @@ IVArray::SwapOut()
}
array[targetID].release();
array[targetID].setCacheFlag(false);
/* if (time_index_map.empty())
{
return false;
}
multimap <long, unsigned>::iterator it = time_index_map.begin();
unsigned key = it->second;
char *str = NULL;
unsigned len = 0;
array[key].getBstr(str, len, false);
if (array[key].isDirty() && array[key].inCache())
{
unsigned store = BM->WriteValue(str, len);
array[key].setStore(store);
}
CurCacheSize -= len;
array[key].release();
array[key].setCacheFlag(false);
//array[key].setTime(0);
//index_time_map.erase(key);
time_index_map.erase(it);
*/
return true;
}
@ -275,6 +222,8 @@ IVArray::AddInCache(unsigned _key, char *_str, unsigned _len)
{
return false;
}
// this->CacheLock.lock();
// ensure there is enough room in main memory
while (CurCacheSize + _len > MAX_CACHE_SIZE)
{
@ -285,9 +234,6 @@ IVArray::AddInCache(unsigned _key, char *_str, unsigned _len)
}
}
CurCacheSize += _len;
array[_key].setBstr(_str, _len);
array[_key].setCacheFlag(true);
if (cache_tail_id == -1)
cache_head->setNext(_key);
@ -298,21 +244,26 @@ IVArray::AddInCache(unsigned _key, char *_str, unsigned _len)
array[_key].setNext(-1);
cache_tail_id = _key;
//modify maps
// long time = Util::get_cur_time();
// array[_key].setTime(time);
// time_index_map.insert(make_pair(time, _key));
CurCacheSize += _len;
array[_key].setBstr(_str, _len);
array[_key].setCacheFlag(true);
// this->CacheLock.unlock();
return true;
}
//Update last used time of array[_key]
bool
IVArray::UpdateTime(unsigned _key)
IVArray::UpdateTime(unsigned _key, bool HasLock)
{
if (array[_key].isPined()) // the cache pined should not be swaped out
return true;
if (_key == (unsigned) cache_tail_id)// already most recent
return true;
// if (!HasLock)
// this->CacheLock.lock();
// cout << "UpdateTime: " << _key << endl;
int prevID = array[_key].getPrev();
int nextID = array[_key].getNext();
@ -328,115 +279,55 @@ IVArray::UpdateTime(unsigned _key)
array[_key].setNext(-1);
array[cache_tail_id].setNext(_key);
cache_tail_id = _key;
/*
//map <unsigned, long>::iterator it;
unsigned oldtime;
if ((oldtime = array[_key].getTime()) == 0)
{
return false;
}
//unsigned oldtime = it->second;
long time = Util::get_cur_time();
array[_key].setTime(time);
//it->second = time;
// pair < multimap<long, unsigned>::iterator, multimap<long, unsigned>::iterator > ret;
// ret = time_index_map.equal_range(oldtime);
multimap <long, unsigned>::iterator p = time_index_map.lower_bound(oldtime);
//for(p = ret.first; p != ret.second; p++)
for(p; p->first == oldtime; p++)
{
if (p->second == _key)
break;
}
//if (p == ret.second)
if (p->first != oldtime)
{
return false;
}
time_index_map.erase(p);
time_index_map.insert(make_pair(time, _key));
*/
// if (!HasLock)
// this->CacheLock.unlock();
return true;
}
bool
IVArray::search(unsigned _key, char *&_str, unsigned &_len)
{
this->CacheLock.lock();
//printf("%s search %d: ", filename.c_str(), _key);
if (_key >= CurEntryNum ||!array[_key].isUsed())
{
// cout << "IVArray " << filename << " Search Error: Key " << _key << " is not available." << endl;
_str = NULL;
_len = 0;
this->CacheLock.unlock();
return false;
}
// try to read in main memory
if (array[_key].inCache())
{
UpdateTime(_key);
return array[_key].getBstr(_str, _len);
bool ret = array[_key].getBstr(_str, _len);
this->CacheLock.unlock();
return ret;
}
// printf(" need to read disk ");
// read in disk
unsigned store = array[_key].getStore();
// cout << "store: " << store << endl;
// printf("stored in block %d, ", store);
if (!BM->ReadValue(store, _str, _len))
{
this->CacheLock.unlock();
return false;
}
// try to add the entry into cache
/* if (VList::isLongList(_len) && _len + CurCacheSize <= IVArray::MAX_CACHE_SIZE)
if(!VList::isLongList(_len))
{
array[_key].setBstr(_str, _len);
array[_key].setCacheFlag(true);
CurCacheSize += _len;
}*/
if (!VList::isLongList(_len))
{
AddInCache(_key, _str, _len);
char *debug = new char [_len];
memcpy(debug, _str, _len);
_str = debug;
}
// printf(" value is %s, length: %d\n", _str, _len);
// if (array[_key].Lock.try_lock())
// {
// if (array[_key].inCache())
// return true;
AddInCache(_key, _str, _len);
char *debug = new char [_len];
memcpy(debug, _str, _len);
_str = debug;
// array[_key].Lock.unlock();
// also read values near it so that we can take advantage of spatial locality
/* unsigned start = (_key / SEG_LEN) * SEG_LEN;
unsigned end = start + SEG_LEN;
for(unsigned i = start; i < end; i++)
{
unsigned store = array[i].getStore();
if (i == _key)
{
if (!BM->ReadValue(store, _str, _len))
return false;
//if (!VList::isLongList(_len))
AddInCache(_key, _str, _len);
//else
if (VList::isLongList(_len))
array[_key].setLongListFlag(true);
}
else if (!array[i].isLongList() && array[i].isUsed() && !array[i].inCache())
{
char *temp_str;
unsigned temp_len;
if (!BM->ReadValue(store, temp_str, temp_len))
continue;
if (!VList::isLongList(temp_len))
AddInCache(i, temp_str, temp_len);
else
array[_key].setLongListFlag(true);
delete [] temp_str;
}
}*/
// }
}
this->CacheLock.unlock();
return true;
}
@ -523,6 +414,8 @@ IVArray::remove(unsigned _key)
if (array[_key].inCache())
{
RemoveFromLRUQueue(_key);
if(array[_key].isPined())
array[_key].setCachePinFlag(false);
char *str = NULL;
unsigned len = 0;
@ -531,6 +424,9 @@ IVArray::remove(unsigned _key)
array[_key].setCacheFlag(false);
}
if (array[_key].isPined())
array[_key].setCachePinFlag(false);
array[_key].release();
return true;
@ -550,26 +446,13 @@ IVArray::modify(unsigned _key, char *_str, unsigned _len)
if (array[_key].inCache())
{
RemoveFromLRUQueue(_key);
if(array[_key].isPined())
array[_key].setCachePinFlag(false);
char* str = NULL;
unsigned len = 0;
array[_key].getBstr(str, len, false);
/* if (!VList::isLongList(_len))
{
CurCacheSize -= len;
CurCacheSize += _len;
array[_key].setBstr(_str, _len);
}
else
{
CurCacheSize -= len;
array[_key].release();
array[_key].setCacheFlag(false);
unsigned store = BM->WriteValue(_str, _len);
array[_key].setStore(store);
}
*/
array[_key].release();
CurCacheSize -= len;
AddInCache(_key, _str, _len);
@ -578,15 +461,6 @@ IVArray::modify(unsigned _key, char *_str, unsigned _len)
{
unsigned store = array[_key].getStore();
BM->FreeBlocks(store);
/*if (VList::isLongList(_len))
{
unsigned store = BM->WriteValue(_str, _len);
array[_key].setStore(store);
}
else
{
AddInCache(_key, _str, _len);
}*/
AddInCache(_key, _str, _len);
}
@ -594,12 +468,47 @@ IVArray::modify(unsigned _key, char *_str, unsigned _len)
}
//Pin an entry in cache and never swap out
void
IVArray::PinCache(unsigned _key)
{
//printf("%s search %d: ", filename.c_str(), _key);
if (_key >= CurEntryNum ||!array[_key].isUsed())
{
return;
}
// try to read in main memory
if (array[_key].inCache())
{
RemoveFromLRUQueue(_key);
array[_key].setCachePinFlag(true);
return;
}
// read in disk
unsigned store = array[_key].getStore();
char *_str = NULL;
unsigned _len = 0;
if (!BM->ReadValue(store, _str, _len))
{
return;
}
array[_key].setBstr(_str, _len);
array[_key].setCacheFlag(true);
array[_key].setCachePinFlag(true);
return;
}
void
IVArray::RemoveFromLRUQueue(unsigned _key)
{
if (!array[_key].inCache())
if (!array[_key].inCache() || array[_key].isPined())
return;
//this->CacheLock.lock();
int prevID = array[_key].getPrev();
int nextID = array[_key].getNext();
@ -625,7 +534,6 @@ IVArray::RemoveFromLRUQueue(unsigned _key)
else
array[PrevID].setNext(-1);*/
//this->CacheLock.unlock();
return;
}

View File

@ -7,6 +7,7 @@
* a Key-Value Index for ID-Value pair in form of Array
* =======================================================================*/
#include "../../Util/Util.h"
#include "IVEntry.h"
#include "IVBlockManager.h"
@ -45,10 +46,11 @@ private:
bool AddInCache(unsigned _key, char *_str, unsigned _len);
bool SwapOut();
bool UpdateTime(unsigned _key);
bool UpdateTime(unsigned _key, bool HasLock = false);
bool PreLoad();
void RemoveFromLRUQueue(unsigned int);
void RemoveFromLRUQueue(unsigned _key);
mutex CacheLock;
public:
IVArray();
@ -60,4 +62,5 @@ public:
bool remove(unsigned _key);
bool insert(unsigned _key, char *_str, unsigned _len);
bool save();
void PinCache(unsigned _key);
};

View File

@ -18,6 +18,7 @@ IVEntry::IVEntry()
usedFlag = false;
dirtyFlag = true;
cacheFlag = false;
CachePinFlag = false;
prevID = nextID = -1;
}
@ -113,6 +114,18 @@ IVEntry::inCache() const
return cacheFlag;
}
void
IVEntry::setCachePinFlag(bool _flag)
{
CachePinFlag = _flag;
}
bool
IVEntry::isPined()
{
return CachePinFlag;
}
void
IVEntry::release()
{

View File

@ -16,6 +16,7 @@ class IVEntry
bool usedFlag; // mark if the entry is used
bool dirtyFlag;
bool cacheFlag;
bool CachePinFlag;
unsigned store; //index of block where value is stored
// pointer to id for LRU list
int prevID;
@ -42,6 +43,9 @@ public:
void setCacheFlag(bool _flag);
bool inCache() const;
void setCachePinFlag(bool _flag);
bool isPined();
void release();
void Copy(const IVEntry& _entry);

View File

@ -2738,22 +2738,29 @@ KVstore::isEntity(TYPE_ENTITY_LITERAL_ID id)
return id < Util::LITERAL_FIRST_ID;
}
/*void
void
KVstore::AddIntoPreCache(TYPE_PREDICATE_ID _pre_id)
{
this->preID2values->AddIntoCache(_pre_id);
this->preID2values->PinCache(_pre_id);
}
void
KVstore::AddIntoSubCache(TYPE_ENTITY_LITERAL_ID _entity_id)
{
this->subID2values->AddIntoCache(_entity_id);
this->subID2values->PinCache(_entity_id);
}
void
KVstore::AddIntoObjCache(TYPE_ENTITY_LITERAL_ID _entity_literal_id)
{
this->objID2values->AddIntoCache(_entity_literal_id);
}*/
if (Util::is_literal_ele(_entity_literal_id))
{
TYPE_ENTITY_LITERAL_ID _literal_id = _entity_literal_id
- Util::LITERAL_FIRST_ID;
objID2values_literal->PinCache(_literal_id);
}
this->objID2values->PinCache(_entity_literal_id);
}
unsigned
KVstore::getSubListSize(TYPE_ENTITY_LITERAL_ID _sub_id)

View File

@ -86,6 +86,7 @@ SITree::getHeight() const
void
SITree::setHeight(unsigned _h)
{
this->height = _h;
}
@ -107,10 +108,12 @@ SITree::prepare(SINode* _np)
bool
SITree::search(const char* _str, unsigned _len, unsigned* _val)
{
this->AccessLock.lock();
if (_str == NULL || _len == 0)
{
printf("error in SITree-search: empty string\n");
//*_val = -1;
this->AccessLock.unlock();
return false;
}
//this->CopyToTransfer(_str, _len, 1);
@ -123,26 +126,31 @@ SITree::search(const char* _str, unsigned _len, unsigned* _val)
if (ret == NULL || store == -1) //tree is empty or not found
{
//bstr.clear();
this->AccessLock.unlock();
return false;
}
const Bstr* tmp = ret->getKey(store);
if (Util::compare(_str, _len, tmp->getStr(), tmp->getLen()) != 0) //tree is empty or not found
{
this->AccessLock.unlock();
return false;
}
*_val = ret->getValue(store);
this->TSM->request(request);
//bstr.clear();
this->AccessLock.unlock();
return true;
}
bool
SITree::insert(char* _str, unsigned _len, unsigned _val)
{
this->AccessLock.lock();
if (_str == NULL || _len == 0)
{
printf("error in SITree-insert: empty string\n");
this->AccessLock.unlock();
return false;
}
//this->CopyToTransfer(_str, _len, 1);
@ -253,16 +261,18 @@ SITree::insert(char* _str, unsigned _len, unsigned _val)
this->TSM->request(request);
//bstr.clear(); //NOTICE: must be cleared!
this->AccessLock.unlock();
return !ifexist; //QUERY(which case:return false)
}
bool
SITree::modify(const char* _str, unsigned _len, unsigned _val)
{
this->AccessLock.lock();
if (_str == NULL || _len == 0)
{
printf("error in SITree-modify: empty string\n");
this->AccessLock.unlock();
return false;
}
//this->CopyToTransfer(_str, _len, 1);
@ -275,11 +285,13 @@ SITree::modify(const char* _str, unsigned _len, unsigned _val)
if (ret == NULL || store == -1) //tree is empty or not found
{
//bstr.clear();
this->AccessLock.unlock();
return false;
}
const Bstr* tmp = ret->getKey(store);
if (Util::compare(_str, _len, tmp->getStr(), tmp->getLen()) != 0) //tree is empty or not found
{
this->AccessLock.unlock();
return false;
}
@ -287,7 +299,7 @@ SITree::modify(const char* _str, unsigned _len, unsigned _val)
ret->setDirty();
this->TSM->request(request);
//bstr.clear();
this->AccessLock.unlock();
return true;
}
@ -341,9 +353,11 @@ SITree::find(unsigned _len, const char* _str, int* store) const
bool
SITree::remove(const char* _str, unsigned _len)
{
this->AccessLock.lock();
if (_str == NULL || _len == 0)
{
printf("error in SITree-remove: empty string\n");
this->AccessLock.unlock();
return false;
}
//this->CopyToTransfer(_str, _len, 1);
@ -352,8 +366,10 @@ SITree::remove(const char* _str, unsigned _len)
//const Bstr* _key = &transfer[1];
SINode* ret;
if (this->root == NULL) //tree is empty
{
this->AccessLock.unlock();
return false;
}
SINode* p = this->root;
SINode* q;
int i, j;
@ -424,20 +440,27 @@ SITree::remove(const char* _str, unsigned _len)
this->TSM->request(request);
//bstr.clear();
this->AccessLock.unlock();
return flag; //i == j, not found
}
bool
SITree::save() //save the whole tree to disk
{
this->AccessLock.lock();
#ifdef DEBUG_KVSTORE
printf("now to save tree!\n");
#endif
if (TSM->writeTree(this->root))
{
this->AccessLock.unlock();
return true;
}
else
{
this->AccessLock.unlock();
return false;
}
}
void

View File

@ -57,18 +57,20 @@ private:
long long request;
void prepare(SINode* _np);
public:
SITree(); //always need to initial transfer
SITree(std::string _storepath, std::string _filename, std::string _mode, unsigned long long _buffer_size);
std::mutex AccessLock;
unsigned getHeight() const;
void setHeight(unsigned _h);
SINode* getRoot() const;
SINode* find(const Bstr* _key, int* store, bool ifmodify);
SINode* find(const char* _key, unsigned _len, int* store, bool ifmodify);
public:
SITree(); //always need to initial transfer
SITree(std::string _storepath, std::string _filename, std::string _mode, unsigned long long _buffer_size);
//insert, search, remove, set
bool search(const char* _str, unsigned _len, unsigned* _val);
bool insert(char* _str, unsigned _len, unsigned _val);
bool modify(const char* _str, unsigned _len, unsigned _val);
SINode* find(const Bstr* _key, int* store, bool ifmodify);
SINode* find(const char* _key, unsigned _len, int* store, bool ifmodify);
bool remove(const char* _str, unsigned _len);
bool save();
~SITree();

View File

@ -38,7 +38,7 @@ main(int argc, char * argv[])
string _db_path = string(argv[1]);
int len = _db_path.length();
if(_db_path.substr(len-3, 3) == ".db")
if(_db_path.length() > 3 && _db_path.substr(len-3, 3) == ".db")
{
cout<<"your database can not end with .db"<<endl;
return -1;

View File

@ -27,6 +27,7 @@ using namespace boost::property_tree;
typedef SimpleWeb::Server<SimpleWeb::HTTP> HttpServer;
typedef SimpleWeb::Client<SimpleWeb::HTTP> HttpClient;
#define THREAD_NUM 30
#define MAX_DATABASE_NUM 100
#define MAX_USER_NUM 1000
#define ROOT_USERNAME "root"
@ -81,6 +82,8 @@ bool user_handler(const HttpServer& server, const shared_ptr<HttpServer::Respons
bool showUser_handler(const HttpServer& server, const shared_ptr<HttpServer::Response>& response, const shared_ptr<HttpServer::Request>& request);
bool check_handler(const HttpServer& server, const shared_ptr<HttpServer::Response>& response, const shared_ptr<HttpServer::Request>& request);
void query_thread(string db_name, string format, string db_query, const shared_ptr<HttpServer::Response>& response, const shared_ptr<HttpServer::Request>& request);
//=============================================================================
//TODO: use locak to protect logs when running in multithreading environment
@ -271,6 +274,227 @@ string UrlDecode(string& SRC)
return (ret);
}
class Task
{
public:
string db_name;
string format;
string db_query;
const shared_ptr<HttpServer::Response> response;
const shared_ptr<HttpServer::Request> request;
Task(string name, string ft, string query, const shared_ptr<HttpServer::Response>& res, const shared_ptr<HttpServer::Request>& req);
~Task();
void run();
};
Task::Task(string name, string ft, string query, const shared_ptr<HttpServer::Response>& res, const shared_ptr<HttpServer::Request>& req):response(res),request(req)
{
db_name = name;
format = ft;
db_query = query;
}
Task::~Task()
{
}
void Task::run()
{
query_thread(db_name, format, db_query, response, request);
}
class Thread
{
public:
thread TD;
int ID;
static int threadnum;
Task* task;
Thread();
~Thread();
int GetThreadID();
void assign(Task* t);
void run();
void start();
friend bool operator==(Thread t1, Thread t2);
friend bool operator!=(Thread t1, Thread t2);
};
list<Thread*> busythreads;
vector<Thread*> freethreads;
mutex busy_mutex;
mutex free_mutex;
mutex task_mutex;
void BackToFree(Thread *t)
{
busy_mutex.lock();
busythreads.erase(find(busythreads.begin(), busythreads.end(), t));
busy_mutex.unlock();
free_mutex.lock();
freethreads.push_back(t);
free_mutex.unlock();
}
int Thread::threadnum = 0;
Thread::Thread()
{
threadnum++;
ID = threadnum;
}
Thread::~Thread()
{
}
int Thread::GetThreadID()
{
return ID;
}
void Thread::assign(Task* t)
{
task = t;
}
void Thread::run()
{
cout << "Thread:" << ID << " run\n";
task->run();
delete task;
BackToFree(this);
}
void Thread::start()
{
TD = thread(&Thread::run, this);
TD.detach();
}
bool operator==(Thread t1, Thread t2)
{
return t1.ID == t2.ID;
}
bool operator!=(Thread t1, Thread t2)
{
return !(t1.ID == t2.ID);
}
class ThreadPool
{
public:
int ThreadNum;
bool isclose;
thread ThreadsManage;
queue<Task*> tasklines;
ThreadPool();
ThreadPool(int t);
~ThreadPool();
void create();
void SetThreadNum(int t);
int GetThreadNum();
void AddTask(Task* t);
void start();
void close();
};
ThreadPool::ThreadPool()
{
isclose = false;
ThreadNum = 10;
busythreads.clear();
freethreads.clear();
for (int i = 0; i < ThreadNum; i++)
{
Thread *p = new Thread();
freethreads.push_back(p);
}
}
ThreadPool::ThreadPool(int t)
{
isclose = false;
ThreadNum = t;
busythreads.clear();
freethreads.clear();
for (int i = 0; i < t; i++)
{
Thread *p = new Thread();
freethreads.push_back(p);
}
}
ThreadPool::~ThreadPool()
{
for (vector<Thread*>::iterator i = freethreads.begin(); i != freethreads.end(); i++)
delete *i;
}
void ThreadPool::create()
{
ThreadsManage = thread(&ThreadPool::start, this);
ThreadsManage.detach();
}
void ThreadPool::SetThreadNum(int t)
{
ThreadNum = t;
}
int ThreadPool::GetThreadNum()
{
return ThreadNum;
}
void ThreadPool::AddTask(Task* t)
{
task_mutex.lock();
tasklines.push(t);
task_mutex.unlock();
}
void ThreadPool::start()
{
while (true)
{
if (isclose == true)
{
busy_mutex.lock();
if (busythreads.size() != 0)
{
busy_mutex.unlock();
continue;
}
busy_mutex.unlock();
break;
}
free_mutex.lock();
if (freethreads.size() == 0)
{
free_mutex.unlock();
continue;
}
free_mutex.unlock();
task_mutex.lock();
if (tasklines.size() == 0)
{
task_mutex.unlock();
continue;
}
Task *job = tasklines.front();
tasklines.pop();
task_mutex.unlock();
free_mutex.lock();
Thread *t = freethreads.back();
freethreads.pop_back();
t->assign(job);
free_mutex.unlock();
busy_mutex.lock();
busythreads.push_back(t);
busy_mutex.unlock();
t->start();
}
}
void ThreadPool::close()
{
isclose = true;
}
ThreadPool pool(THREAD_NUM);
int main(int argc, char *argv[])
{
Util util;
@ -448,7 +672,7 @@ int initialize(int argc, char *argv[])
//scheduler = start_thread(func_scheduler);
#endif
pool.create();
//pthread_rwlock_init(&database_load_lock, NULL);
#ifndef SPARQL_ENDPOINT
@ -604,12 +828,10 @@ int initialize(int argc, char *argv[])
// }
// };
//TODO: use db_name if multiple databases
server.resource["^/%3[F|f]operation%3[D|d]checkpoint%26db_name%3[D|d](.*)$"]["GET"]=[&server](shared_ptr<HttpServer::Response> response, shared_ptr<HttpServer::Request> request)
{
checkpoint_handler(server, response, request);
};
//TODO: use db_name if multiple databases
server.resource["^/?operation=checkpoint&db_name=(.*)$"]["GET"]=[&server](shared_ptr<HttpServer::Response> response, shared_ptr<HttpServer::Request> request)
{
checkpoint_handler(server, response, request);
@ -1560,9 +1782,8 @@ bool query_handler0(const HttpServer& server, const shared_ptr<HttpServer::Respo
// }
//doQuery(format, db_query, server, response, request);
query_num++;
thread t(&query_thread, db_name, format, db_query, response, request);
t.detach();
Task* task = new Task(db_name, format, db_query, response, request);
pool.AddTask(task);
}
bool query_handler1(const HttpServer& server, const shared_ptr<HttpServer::Response>& response, const shared_ptr<HttpServer::Request>& request)
@ -1616,8 +1837,10 @@ bool query_handler1(const HttpServer& server, const shared_ptr<HttpServer::Respo
//current_database = iter->second;
//doQuery(format, db_query, server, response, request);
query_num++;
thread t(&query_thread, db_name, format, db_query, response, request);
t.detach();
Task* task = new Task(db_name, format, db_query, response, request);
pool.AddTask(task);
//thread t(&query_thread, db_name, format, db_query, response, request);
//t.detach();
}
//void query_handler(const shared_ptr<HttpServer::Response>& response, const shared_ptr<HttpServer::Request>& request)

View File

@ -1078,7 +1078,7 @@ void GeneralEvaluation::getFinalResult(ResultSet &ret_result)
if (!ret_result.checkUseStream())
{
// cout << "flag2" << endl;
cout << "flag2" << endl;
for (unsigned i = 0; i < ret_result.ansNum; i++)
{
ret_result.answer[i] = new string [ret_result.select_var_num];
@ -1095,7 +1095,7 @@ void GeneralEvaluation::getFinalResult(ResultSet &ret_result)
//ret_result.answer[i][j] = trie->Uncompress(ret_result.answer[i][j], ret_result.answer[i][j].length());
}
}
else //TODO add Uncompress
else
{
// ret_result.answer[i][j] = trie->Uncompress(result0.result[i].str[k - id_cols],
//result0.result[i].str[k - id_cols].length());
@ -1117,7 +1117,7 @@ void GeneralEvaluation::getFinalResult(ResultSet &ret_result)
}
else
{
// cout << "flag3" << endl;
cout << "flag3" << endl;
for (unsigned i = 0; i < ret_result.ansNum; i++)
for (int j = 0; j < ret_result.select_var_num; j++)
{
@ -1145,7 +1145,7 @@ void GeneralEvaluation::getFinalResult(ResultSet &ret_result)
ret_result.resetStream();
}
}
// TODO: is this part need uncompression?
else if (this->query_tree.getQueryForm() == QueryTree::Ask_Query)
{
// cout << "flag4" << endl;

View File

@ -95,6 +95,8 @@ bool QueryCache::getMinimalRepresentation(const Patterns &triple_pattern, Patter
bool QueryCache::tryCaching(const Patterns &triple_pattern, const TempResult &temp_result, int eva_time)
{
lock_guard<mutex> (this->query_cache_lock); //when quit this scope the lock will be released
Patterns minimal_repre;
map<string, string> minimal_mapping;
@ -176,8 +178,12 @@ bool QueryCache::tryCaching(const Patterns &triple_pattern, const TempResult &te
return true;
}
//NOTICE: in this function we also modify some contents, so we must use mutex instead of rwlock
bool QueryCache::checkCached(const Patterns &triple_pattern, const Varset &varset, TempResult &temp_result)
{
//this->query_cache_lock.lock();
lock_guard<mutex> (this->query_cache_lock); //when quit this scope the lock will be released
Patterns minimal_repre;
map<string, string> minimal_mapping;

View File

@ -23,6 +23,8 @@ class QueryCache
const long long ITEM_MEMORY_LIMIT = 1000000LL;
const long long TOTAL_MEMORY_LIMIT = 100000000LL;
mutex query_cache_lock;
int time_now;
long long total_memory_used;

View File

@ -2,7 +2,7 @@
Gstore System(also called gStore) is a graph database engine for managing large graph-structured data, which is open-source and targets at Linux operation systems. The whole project is written in C++, with the help of some libraries such as readline, antlr, and so on. Only source tarballs are provided currently, which means you have to compile the source code if you want to use our system.
**The formal help document is in [English(EN)](docs/help/gStore_help.pdf) and [中文(CN)](docs/help/gStore_help_CN.pdf).**
**The formal help document is in [English(EN)](docs/help/gStore_help.pdf) and [中文(ZH)](docs/help/gStore_help_CN.pdf).**
**The formal experiment result is in [Experiment](docs/test/formal_experiment.pdf).**
@ -14,14 +14,22 @@ Gstore System(also called gStore) is a graph database engine for managing large
### Compile from Source
This system is really user-friendly and you can pick it up in several minutes. Remember to check your platform where you want to run this system by viewing [System Requirements](docs/DEMAND.md). After all are verified, please get this project's source code. There are several ways to do this:
- (suggested)type `git clone https://github.com/Caesar11/gStore.git` in your terminal or use git GUI to acquire it
- download the zip from this repository and extract it
- fork this repository in your github account
- type `git clone https://github.com/Caesar11/gStore.git` in your terminal or use git GUI to acquire it
Then you need to compile the project, just type `make` in the gStore root directory, then all executables will be generated.
The first strategy is suggested to get the source code because you can easily acquire the updates of the code by typing `git pull` in the home directory of gStore repository.
In addition, you can directly check the version of the code by typing `git log` to see the commit logs.
If you want to use code from other branches instead of master branch, like 'dev' branch, then:
- clone the master branch and type `git checkout dev` in your terminal
- clone the dev branch directly by typing `git clone -b dev`
### Deploy via Docker
You can easily deploy gStore via Docker. We provide both of Dockerfile and docker image. Please see our [Docker Deployment Doc(EN)](docs/DOCKER_DEPLOY_EN.md) or [Docker部署文档(中文)](docs/DOCKER_DEPLOY_CN.md) for details.
@ -42,7 +50,7 @@ If you want to understand the details of the gStore system, or you want to try s
- [API Explanation](docs/API.md): guide you to develop applications based on our API
- [Project Structure](docs/STRUCT.md): show the whole structure and sequence of this project
- [Project Structure](docs/STRUCT.md): show the whole structure and process of this project
- [Related Essays](docs/ESSAY.md): contain essays and publications related with gStore
@ -56,7 +64,7 @@ If you want to understand the details of the gStore system, or you want to try s
We have written a series of short essays addressing recurring challenges in using gStore to realize applications, which are placed in [Recipe Book](docs/TIPS.md).
You are welcome to report any advice or errors in the github Issues part of this repository, if not requiring in-time reply. However, if you want to urgent on us to deal with your reports, please email to <zengli@bookug.cc> to submit your suggestions and report bugs to us by emailing to <gStoreDB@gmail.com>. A full list of our whole team is in [Mailing List](docs/MAIL.md).
You are welcome to report any advice or errors in the github Issues part of this repository, if not requiring in-time reply. However, if you want to urgent on us to deal with your reports, please email to <bookug@qq.com> to submit your suggestions and report bugs to us by emailing to <gStoreDB@gmail.com>. A full list of our whole team is in [Mailing List](docs/MAIL.md).
There are some restrictions when you use the current gStore project, you can see them on [Limit Description](docs/LIMIT.md).

View File

@ -138,7 +138,7 @@ namespace SimpleWeb {
/// Timeout on request handling. Defaults to 5 seconds.
size_t timeout_request=5;
/// Timeout on content handling. Defaults to 300 seconds.
size_t timeout_content=300;
size_t timeout_content=3600;
/// IPv4 address in dotted decimal form or IPv6 address in hexadecimal notation.
/// If empty, the address will be any address.
std::string address;

View File

@ -25,7 +25,7 @@
<li id="li_1" >
<label class="description" for="element_1">Database Name </label>
<div>
<input id="element_1" name="databasename" class="element text medium" type="text" maxlength="255" value="lubm" readonly>
<input id="element_1" name="databasename" class="element text medium" type="text" maxlength="255" value="lubm">
</input>
</div>

View File

@ -91,20 +91,33 @@ bool StringIndexFile::randomAccess(unsigned id, string *str, bool real)
long offset = (*this->index_table)[id].offset;
unsigned length = (*this->index_table)[id].length;
//if(id == 9)
//{
//cout<<"check: "<<offset<<" "<<length<<endl;
//}
allocBuffer(length);
//fseek(this->value_file, offset, SEEK_SET);
//fread(this->buffer, sizeof(char), length, this->value_file);
pread(fileno(value_file), this->buffer, sizeof(char)*length, offset);
//DEBUG: here a bug exists if we use pread instead of fread, the details are in BUG_StringIndex_pread of docs/BUGS.md
fseek(this->value_file, offset, SEEK_SET);
fread(this->buffer, sizeof(char), length, this->value_file);
//pread(fileno(value_file), this->buffer, sizeof(char)*length, offset);
this->buffer[length] = '\0';
*str = string(this->buffer);
//if(id == 9)
//{
//cout<<"check: "<<*str<<endl;
//}
if (real)
{
*str = trie->Uncompress(*str, str->length());//Uncompresss
}
//if(id == 9)
//{
//cout<<"check: "<<*str<<endl;
//}
return true;
}
@ -132,7 +145,7 @@ void StringIndexFile::trySequenceAccess(bool real)
if (this->type == Predicate)
cout << "Predicate StringIndex ";
long current_offset = 0;
//long current_offset = 0;
if ((max_end - min_begin) / 800000L < (long)this->request.size())
{
cout << "sequence access." << endl;
@ -147,8 +160,8 @@ void StringIndexFile::trySequenceAccess(bool real)
char *block = new char[MAX_BLOCK_SIZE];
long current_block_begin = min_begin;
//fseek(this->value_file, current_block_begin, SEEK_SET);
current_offset = current_block_begin;
fseek(this->value_file, current_block_begin, SEEK_SET);
//current_offset = current_block_begin;
while (current_block_begin < max_end)
{
@ -157,14 +170,14 @@ void StringIndexFile::trySequenceAccess(bool real)
if (current_block_end <= this->request[pos].offset)
{
current_block_begin = this->request[pos].offset;
//fseek(this->value_file, current_block_begin, SEEK_SET);
current_offset = current_block_begin;
fseek(this->value_file, current_block_begin, SEEK_SET);
//current_offset = current_block_begin;
current_block_end = min(current_block_begin + MAX_BLOCK_SIZE, max_end);
}
//fread(block, sizeof(char), current_block_end - current_block_begin, this->value_file);
pread(fileno(this->value_file), block, sizeof(char)*(current_block_end-current_block_begin), current_offset);
current_offset += sizeof(char)*(current_block_end-current_block_begin);
fread(block, sizeof(char), current_block_end - current_block_begin, this->value_file);
//pread(fileno(this->value_file), block, sizeof(char)*(current_block_end-current_block_begin), current_offset);
//current_offset += sizeof(char)*(current_block_end-current_block_begin);
while (pos < (int)this->request.size())
{
@ -279,6 +292,13 @@ void StringIndexFile::change(unsigned id, KVstore &kv_store)
fseek(this->value_file, (*this->index_table)[id].offset, SEEK_SET);
fwrite(str.c_str(), sizeof(char), (*this->index_table)[id].length, this->value_file);
//if(id == 9)
//{
//cout<<"check in change():9 "<<str<<endl;
//string str2;
//randomAccess(id, &str2);
//cout<<str2<<endl;
//}
}
void StringIndexFile::disable(unsigned id)
@ -348,12 +368,12 @@ bool StringIndex::randomAccess(unsigned id, string *str, bool is_entity_or_liter
if (is_entity_or_literal)
{
if(searchBuffer(id, str))
{
cout << "FLAG2" << endl;
*str = trie->Uncompress(*str, str->length());
return true;
}
//if(searchBuffer(id, str))
//{
//cout << "FLAG2" << endl;
//*str = trie->Uncompress(*str, str->length());
//return true;
//}
if (id < Util::LITERAL_FIRST_ID)
{
@ -374,11 +394,16 @@ void StringIndex::addRequest(unsigned id, std::string *str, bool is_entity_or_li
{
if (is_entity_or_literal)
{
if(searchBuffer(id, str))
{
// *str = trie->Uncompress(*str)
return;
}
//if(id == 9)
//{
//cout<<"to search 9 in string buffer"<<endl;
//}
//if(searchBuffer(id, str))
//{
//// *str = trie->Uncompress(*str)
//cout<<"found in string buffer"<<endl;
//return;
//}
if (id < Util::LITERAL_FIRST_ID)
this->entity.addRequest(id, str);
else

View File

@ -147,6 +147,11 @@ class StringIndex
predicate.clear();
}
void flush()
{
//nothing to do here
}
void emptyBuffer()
{
entity.emptyBuffer();

View File

@ -204,6 +204,9 @@ Trie::WriteDownNode(TrieNode *_node, ofstream& _fout, string _str)
TripleWithObjType
Trie::Compress(const TripleWithObjType &_in_triple, int MODE)
{
//use this to forbid the trie
return _in_triple;
int lowbound = (MODE == BUILDMODE) ? Trie::LOWBOUND : 0;
string _in_sub = _in_triple.getSubject();
@ -236,6 +239,9 @@ Trie::Compress(const TripleWithObjType &_in_triple, int MODE)
string
Trie::Compress(string _str)
{
//use this to forbid the trie
return _str;
int lowbound = 0;
if (Util::isLiteral(_str))
@ -258,8 +264,9 @@ Trie::LoadDictionary()
ifstream _fin(this->store_path.c_str());
if (!_fin)
{
cout << "Trie::LoadDictionary: Fail to open " << store_path
<< endl;
//cout << "Trie::LoadDictionary: Fail to open " << store_path
//<< " but it doesn't matter if you are building a database."
//<< endl;
return false;
}
@ -293,6 +300,9 @@ Trie::LoadDictionary()
string
Trie::Uncompress(const char *_str, const int len)
{
//use this to forbid the trie
return string(_str);
if (len == 0)
return "";
@ -360,6 +370,9 @@ Trie::Uncompress(const char *_str, const int len)
string
Trie::Uncompress(const string &_str, const int len)
{
//use this to forbid the trie
return _str;
return Uncompress(_str.data(), len);
}
@ -385,7 +398,7 @@ Trie::LoadTrie(string dictionary_path)
ifstream fin(store_path.c_str());
if (!fin)
{
cout << "Trie::LoadTrie: Fail to open " << store_path << endl;
//cout << "Trie::LoadTrie: Fail to open " << store_path << endl;
return false;
}

View File

@ -17,8 +17,11 @@ using namespace std;
class Trie
{
static const int SAMPLE_UPBOUND = 1000000;
static const int LOWBOUND = 100;//this param should change with data sets
//static const int SAMPLE_UPBOUND = 1000000;
//static const int LOWBOUND = 100;//this param should change with data sets
//NOTICE: set SAMPLE_UPBOUND to a tiny number to unuse the Trie structure
static const int SAMPLE_UPBOUND = 1;
static const int LOWBOUND = 1;//this param should change with data sets
//SAMPLE_UPBOUND = 1000000, LOWBOUND = 100 for LUBM500M
//SAMPLE_UPBOUND = 100000, LOWBOUND = 20 for DBpediafull
//SAMPLE_UPVOUND = 300000, LOWBOUND = 30 for WatDiv500M

View File

@ -1859,6 +1859,16 @@ Util::pso_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b)
return false;
}
bool
Util::equal(const ID_TUPLE& a, const ID_TUPLE& b)
{
if(a.subid == b.subid && a.preid == b.preid && a.objid == b.objid)
{
return true;
}
return false;
}
void
Util::empty_file(const char* _fname)
{

View File

@ -423,6 +423,7 @@ public:
static bool spo_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b);
static bool ops_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b);
static bool pso_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b);
static bool equal(const ID_TUPLE& a, const ID_TUPLE& b);
static std::string tmp_path;
// this are for debugging

View File

@ -4,10 +4,10 @@ all: Benchmark CppAPIExample
#all: example Benchmark CppAPIExample
Benchmark: Benchmark.o
$(CC) -o Benchmark.exe Benchmark.o -lcurl -L../lib -lclient -lpthread
$(CC) -o Benchmark.exe Benchmark.o -L../lib -lclient -lcurl -lpthread
CppAPIExample: CppAPIExample.o
$(CC) -o CppAPIExample.exe CppAPIExample.o -I../ -lcurl -L../lib -lgstoreconnector -lclient
$(CC) -o CppAPIExample.exe CppAPIExample.o -I../ -L../lib -lgstoreconnector -lclient -lcurl
Benchmark.o: Benchmark.cpp
$(CC) -c -I../ Benchmark.cpp -o Benchmark.o

View File

@ -1,6 +1,7 @@
#CC=g++
CC=g++ -std=c++11
#CC=ccache g++
all: example QueryGenerator test
example: CppAPIExample.o
$(CC) -o example CppAPIExample.o -L../lib -lgstoreconnector
@ -9,5 +10,14 @@ example: CppAPIExample.o
CppAPIExample.o: CppAPIExample.cpp
$(CC) -c -I../src/ CppAPIExample.cpp -o CppAPIExample.o
QueryGenerator: QueryGenerator.cpp
$(CC) -o QueryGenerator QueryGenerator.cpp
test: test.o
$(CC) -o test test.o -L../lib -lgstoreconnector -pthread
test.o: test.cpp
$(CC) -c -I../src/ test.cpp -o test.o -pthread
clean:
rm -rf *.o example
rm -rf *.o example QueryGenerator test

View File

@ -1,4 +1,4 @@
select ?s ?o where
select ?s ?p ?o where
{
?s ?p ?o .
}

View File

@ -1,366 +1,4 @@
**This Chapter guides you to use our API for accessing gStore. We provide socket API and HTTP api, corresponding to gserver and ghttp respectively.**
# Socket API
## Easy Examples
We provide JAVA, C++, PHP and Python API for gStore now. Please refer to example codes in `api/socket/cpp/example`, `api/socket/java/example`, `api/socket/php` and `api/socket/python`. To use C++ and Java examples to have a try, please ensure that executables have already been generated. Otherwise, just type `make APIexample` in the root directory of gStore to compile the codes, as well as API.
Next, **start up a gStore server by using `./gserver` command.** It is ok if you know a running usable gStore server and try to connect to it, but notice that **the server ip and port of server and client must be matched.**(you don't need to change any thing if using examples, just by default) Then, you need to compile the example codes in the directory gStore/api/socket. We provide a utility to do this, and you just need to type `make APIexample` in the root directory of gStore. Or you can compile the codes by yourself, in this case please go to gStore/api/socket/cpp/example/ and gStore/socket/api/java/example/, respectively.
Finally, go to the example directory and run the corresponding executables. For C++, just use `./example` command to run it. And for Java, use `make run` command or `java -cp ../lib/GstoreJavaAPI.jar:. JavaAPIExample` to run it. Both the two executables will connect to a specified gStore server and do some load or query operations. Be sure that you see the query results in the terminal where you run the examples, otherwise please go to [Frequently Asked Questions](FAQ.md) for help or report it to us.(the report approach is described in [README](../README.md))
You are advised to read the example code carefully, as well as the corresponding Makefile. This will help you to understand the API, specially if you want to write your own programs based on the API interface.
- - -
## API structure
The socket API of gStore is placed in api/socket directory in the root directory of gStore, whose contents are listed below:
- gStore/api/socket/
- cpp/ (the C++ API)
- src/ (source code of C++ API, used to build the lib/libgstoreconnector.a)
- GstoreConnector.cpp (interfaces to interact with gStore server)
- GstoreConnector.h
- Makefile (compile and build lib)
- lib/ (where the static lib lies in)
- .gitignore
- libgstoreconnector.a (only exist after compiled, you need to link this lib when you use the C++ API)
- example/ (small example program to show the basic idea of using the C++ API)
- CppAPIExample.cpp
- Makefile
- java/ (the Java API)
- src/ (source code of Java API, used to build the lib/GstoreJavaAPI.jar)
- jgsc/ (the package which you need to import when you use the Java API)
- GstoreConnector.java (interfaces to interact with gStore server)
- Makefile (compile and build lib)
- lib/
- .gitignore
- GstoreJavaAPI.jar (only exist after compiled, you need to include this JAR in your class path)
- example/ (small example program to show the basic idea of using the Java API)
- JavaAPIExample.cpp
- Makefile
- php/ (the PHP API)
- PHPAPIExxample.php (small example program to show the basic idea of using the PHP API)
- GstoreConnector.php (source code of PHP API)
- python/ (the python API)
- src/ (source code of Python API)
- GstoreConnector.py
- lib/
- example/ (small example program to show the basic idea of using the Python API)
- PythonAPIExample.py
- - -
## C++ API
#### Interface
To use the C++ API, please place the phrase `#include "GstoreConnector.h"` in your cpp code. Functions in GstoreConnector.h should be called like below:
```
// initialize the Gstore server's IP address and port.
GstoreConnector gc("127.0.0.1", 3305);
// build a new database by a RDF file.
// note that the relative path is related to gserver.
gc.build("LUBM10.db", "example/LUBM_10.n3");
// then you can execute SPARQL query on this database.
std::string sparql = "select ?x where \
{ \
?x <rdf:type> <ub:UndergraduateStudent>. \
?y <ub:name> <Course1>. \
?x <ub:takesCourse> ?y. \
?z <ub:teacherOf> ?y. \
?z <ub:name> <FullProfessor1>. \
?z <ub:worksFor> ?w. \
?w <ub:name> <Department0>. \
}";
std::string answer = gc.query(sparql);
// unload this database.
gc.unload("LUBM10.db");
// also, you can load some exist database directly and then query.
gc.load("LUBM10.db");
// query a SPARQL in current database
answer = gc.query(sparql);
```
The original declaration of these functions are as below:
```
GstoreConnector();
GstoreConnector(string _ip, unsigned short _port);
GstoreConnector(unsigned short _port);
bool load(string _db_name);
bool unload(string _db_name);
bool build(string _db_name, string _rdf_file_path);
string query(string _sparql);
```
Notice:
1. When using GstoreConnector(), the default value for ip and port is 127.0.0.1 and 3305, respectively.
2. When using build(), the rdf_file_path(the second parameter) should be related to the position where gserver lies in.
3. Please remember to unload the database you have loaded, otherwise things may go wrong.(the errors may not be reported!)
#### Compile
You are advised to see gStore/api/socket/cpp/example/Makefile for instructions on how to compile your code with the C++ API. Generally, what you must do is compile your own code to object with header in the C++ API, and link the object with static lib in the C++ API.
Let us assume that your source code is placed in test.cpp, whose position is ${TEST}, while the gStore project position is ${GSTORE}/gStore.(if using devGstore as name instead of gStore, then the path is ${GSTORE}/devGstore) Please go to the ${TEST} directory first:
> Use `g++ -c -I${GSTORE}/gStore/api/socket/cpp/src/ test.cpp -o test.o` to compile your test.cpp into test.o, relative API header is placed in api/socket/cpp/src/.
> Use `g++ -o test test.o -L${GSTORE}/gStore/api/socket/cpp/lib/ -lgstoreconnector` to link your test.o with the libgstoreconnector.a(a static lib) in api/socket/cpplib/.
Then you can type `./test` to execute your own program, which uses our C++ API. It is also advised for you to place relative compile commands in a Makefile, as well as other commands if you like.
- - -
## Java API
#### Interface
To use the Java API, please place the phrase `import jgsc.GstoreConnector;` in your java code. Functions in GstoreConnector.java should be called like below:
```
// initialize the Gstore server's IP address and port.
GstoreConnector gc = new GstoreConnector("127.0.0.1", 3305);
// build a new database by a RDF file.
// note that the relative path is related to gserver.
gc.build("LUBM10.db", "example/LUBM_10.n3");
// then you can execute SPARQL query on this database.
String sparql = "select ?x where "
+ "{"
+ "?x <rdf:type> <ub:UndergraduateStudent>. "
+ "?y <ub:name> <Course1>. "
+ "?x <ub:takesCourse> ?y. "
+ "?z <ub:teacherOf> ?y. "
+ "?z <ub:name> <FullProfessor1>. "
+ "?z <ub:worksFor> ?w. "
+ "?w <ub:name> <Department0>. "
+ "}";
String answer = gc.query(sparql);
// unload this database.
gc.unload("LUBM10.db");
// also, you can load some exist database directly and then query.
gc.load("LUBM10.db");
// query a SPARQL in current database
answer = gc.query(sparql);
```
The original declaration of these functions are as below:
```
GstoreConnector();
GstoreConnector(string _ip, unsigned short _port);
GstoreConnector(unsigned short _port);
bool load(string _db_name);
bool unload(string _db_name);
bool build(string _db_name, string _rdf_file_path);
string query(string _sparql);
```
Notice:
1. When using GstoreConnector(), the default value for ip and port is 127.0.0.1 and 3305, respectively.
2. When using build(), the rdf_file_path(the second parameter) should be related to the position where gserver lies in.
3. Please remember to unload the database you have loaded, otherwise things may go wrong.(the errors may not be reported!)
#### Compile
You are advised to see gStore/api/socket/java/example/Makefile for instructions on how to compile your code with the Java API. Generally, what you must do is compile your own code to object with jar file in the Java API.
Let us assume that your source code is placed in test.java, whose position is ${TEST}, while the gStore project position is ${GSTORE}/gStore.(if using devGstore as name instead of gStore, then the path is ${GSTORE}/devGstore) Please go to the ${TEST} directory first:
> Use `javac -cp ${GSTORE}/gStore/api/socket/java/lib/GstoreJavaAPI.jar test.java` to compile your test.java into test.class with the GstoreJavaAPI.jar(a jar package used in Java) in api/java/lib/.
Then you can type `java -cp ${GSTORE}/gStore/api/socket/java/lib/GstoreJavaAPI.jar:. test` to execute your own program(notice that the ":." in command cannot be neglected), which uses our Java API. It is also advised for you to place relative compile commands in a Makefile, as well as other commands if you like.
- - -
## PHP API
#### Interface
To use the PHP API, please place the phrase `include('GstoreConnector,php');` in your php code. Functions in
GstoreConnector.php should be called like below:
```
// initialize the Gstore server's IP address and port.
$gc = new Connector("127.0.0.1", 3305);
// build a new database by a RDF file.
// note that the relative path is related to gserver.
$gc->build("LUBM10", "example/LUBM_10.n3");
// then you can execute SPARQL query on this database.
$sparql = "select ?x where " + "{" +
"?x <rdf:type> <ub:UndergraduateStudent>. " +
"?y <ub:name> <Course1>. " +
"?x <ub:takesCourse> ?y. " +
"?z <ub:teacherOf> ?y. " +
"?z <ub:name> <FullProfessor1>. " +
"?z <ub:worksFor> ?w. " +
"?w <ub:name> <Department0>. " +
"}";
$answer = gc->query($sparql);
//unload this database.
$gc->unload("LUBM10");
//also, you can load some exist database directly and then query.
$gc->load("LUBM10");// query a SPARQL in current database
$answer = gc->query(sparql);
```
The original declaration of these functions are as below:
```
class Connector {
public function __construct($host, $port);
public function send($data);
public function recv();
public function build($db_name, $rdf_file_path);
public function load($db_name);
public function unload($db_name);
public function query($sparql);
public function __destruct();
}
```
Notice:
1. When using Connector(), the default value for ip and port is 127.0.0.1 and 3305, respectively.
2. When using build(), the rdf_file_path(the second parameter) should be related to the position where gserver lies in.
3. Please remember to unload the database you have loaded, otherwise things may go wrong.(the errors may not be reported!)
#### Run
You can see gStore/api/socket/php/PHPAPIExample for instructions on how to use PHP API. PHP script doesn't need compiling. You can run PHP file directly or use it in your web project.
- - -
## Python API
#### Interface
To use the Python API, please place the phrase `from GstoreConnector import GstoreConnector` in your python code. Functions in GstoreConnector.py should be called like below:
```
// initialize the Gstore server's IP address and port.
gc = GstoreConnector('127.0.0.1', 3305)
// build a new database by a RDF file.
// note that the relative path is related to gserver.
gc.build('LUBM10', 'data/LUBM_10.n3')
// then you can execute SPARQL query on this database.
$sparql = "select ?x where " + "{" +
"?x <rdf:type> <ub:UndergraduateStudent>. " +
"?y <ub:name> <Course1>. " +
"?x <ub:takesCourse> ?y. " +
"?z <ub:teacherOf> ?y. " +
"?z <ub:name> <FullProfessor1>. " +
"?z <ub:worksFor> ?w. " +
"?w <ub:name> <Department0>. " +
"}";
answer = gc.query(sparql)
//unload this database.
gc.unload('LUBM10')
//also, you can load some exist database directly and then query.
gc.load('LUBM10')// query a SPARQL in current database
answer = gc.query(sparql)
```
The original declaration of these functions are as below:
```
class GstoreConnector {
def _connect(self)
def _disconnect(self)
def _send(self, msg):
def _recv(self)
def _pack(self, msg):
def _communicate(f):
def __init__(self, ip='127.0.0.1', port=3305):
@_communicate
def test(self)
@_communicate
def load(self, db_name)
@_communicate
def unload(self, db_name)
@_communicate
def build(self, db_name, rdf_file_path)
@_communicate
def drop(self, db_name)
@_communicate
def stop(self)
@_communicate
def query(self, sparql)
@_communicate
def show(self, _type=False)
}
```
Notice:
1. When using GstoreConnector(), the default value for ip and port is 127.0.0.1 and 3305, respectively.
2. When using build(), the rdf_file_path(the second parameter) should be related to the position where gserver lies in.
3. Please remember to unload the database you have loaded, otherwise things may go wrong.(the errors may not be reported!)
#### Run
You are advised to see gStore/api/socket/python/example/PythonAPIExample for examples on how to use python API. Python file doesn't need compiling, and you can run it directly.
**This Chapter guides you to use our API for accessing gStore. We provide HTTP api(suggested) and socket api, corresponding to ghttp and gserver respectively.**
# HTTP API
@ -370,7 +8,7 @@ Compired with socket API, HTTP API is more stable and more standard, and can mai
We provide JAVA and C++ API for ghttp now. Please refer to example codes in `api/http/cpp` and `api/http/java`. To use these examples, please make sure that executables have already been generated.
Next, **start up ghttp service by using \texttt{./ghttp} command.** It is ok if you know a running usable ghttp server and try to connect to it. (you don't need to change anything if using examples, just by default). Then, for Java and C++ code, you need to compile the example codes in the directory gStore/api/http/.
Next, **start up ghttp service by using \texttt{./ghttp} command.** It is ok if you know a running usable ghttp server and try to connect to it. (you do not need to change anything if using examples, just by default). Then, for Java and C++ code, you need to compile the example codes in the directory gStore/api/http/.
Finally, go to the example directory and run the corresponding executables. All these four executables will connect to a specified ghttp server and do some load or query operations. Be sure that you see the query results in the terminal where you run the examples, otherwise please go to [Frequently Asked Questions](FAQ.md) for help or report it to us.(the report approach is described in [README](../README.md))
@ -646,3 +284,370 @@ function query($username, $password, $db_name, $sparql)
function fquery($username, $password, $db_name, $sparql, $filename)
```
---
---
# Socket API
**This APIis not maintained now.**
## Easy Examples
We provide JAVA, C++, PHP and Python API for gStore now. Please refer to example codes in `api/socket/cpp/example`, `api/socket/java/example`, `api/socket/php` and `api/socket/python`. To use C++ and Java examples to have a try, please ensure that executables have already been generated. Otherwise, just type `make APIexample` in the root directory of gStore to compile the codes, as well as API.
Next, **start up a gStore server by using `./gserver` command.** It is ok if you know a running usable gStore server and try to connect to it, but notice that **the server ip and port of server and client must be matched.**(you do not need to change any thing if using examples, just by default) Then, you need to compile the example codes in the directory gStore/api/socket. We provide a utility to do this, and you just need to type `make APIexample` in the root directory of gStore. Or you can compile the codes by yourself, in this case please go to gStore/api/socket/cpp/example/ and gStore/socket/api/java/example/, respectively.
Finally, go to the example directory and run the corresponding executables. For C++, just use `./example` command to run it. And for Java, use `make run` command or `java -cp ../lib/GstoreJavaAPI.jar:. JavaAPIExample` to run it. Both the two executables will connect to a specified gStore server and do some load or query operations. Be sure that you see the query results in the terminal where you run the examples, otherwise please go to [Frequently Asked Questions](FAQ.md) for help or report it to us.(the report approach is described in [README](../README.md))
You are advised to read the example code carefully, as well as the corresponding Makefile. This will help you to understand the API, specially if you want to write your own programs based on the API interface.
- - -
## API structure
The socket API of gStore is placed in api/socket directory in the root directory of gStore, whose contents are listed below:
- gStore/api/socket/
- cpp/ (the C++ API)
- src/ (source code of C++ API, used to build the lib/libgstoreconnector.a)
- GstoreConnector.cpp (interfaces to interact with gStore server)
- GstoreConnector.h
- Makefile (compile and build lib)
- lib/ (where the static lib lies in)
- .gitignore
- libgstoreconnector.a (only exist after compiled, you need to link this lib when you use the C++ API)
- example/ (small example program to show the basic idea of using the C++ API)
- CppAPIExample.cpp
- Makefile
- java/ (the Java API)
- src/ (source code of Java API, used to build the lib/GstoreJavaAPI.jar)
- jgsc/ (the package which you need to import when you use the Java API)
- GstoreConnector.java (interfaces to interact with gStore server)
- Makefile (compile and build lib)
- lib/
- .gitignore
- GstoreJavaAPI.jar (only exist after compiled, you need to include this JAR in your class path)
- example/ (small example program to show the basic idea of using the Java API)
- JavaAPIExample.cpp
- Makefile
- php/ (the PHP API)
- PHPAPIExxample.php (small example program to show the basic idea of using the PHP API)
- GstoreConnector.php (source code of PHP API)
- python/ (the python API)
- src/ (source code of Python API)
- GstoreConnector.py
- lib/
- example/ (small example program to show the basic idea of using the Python API)
- PythonAPIExample.py
- - -
## C++ API
#### Interface
To use the C++ API, please place the phrase `#include "GstoreConnector.h"` in your cpp code. Functions in GstoreConnector.h should be called like below:
```
// initialize the Gstore server's IP address and port.
GstoreConnector gc("127.0.0.1", 3305);
// build a new database by a RDF file.
// note that the relative path is related to gserver.
gc.build("LUBM10.db", "example/LUBM_10.n3");
// then you can execute SPARQL query on this database.
std::string sparql = "select ?x where \
{ \
?x <rdf:type> <ub:UndergraduateStudent>. \
?y <ub:name> <Course1>. \
?x <ub:takesCourse> ?y. \
?z <ub:teacherOf> ?y. \
?z <ub:name> <FullProfessor1>. \
?z <ub:worksFor> ?w. \
?w <ub:name> <Department0>. \
}";
std::string answer = gc.query(sparql);
// unload this database.
gc.unload("LUBM10.db");
// also, you can load some exist database directly and then query.
gc.load("LUBM10.db");
// query a SPARQL in current database
answer = gc.query(sparql);
```
The original declaration of these functions are as below:
```
GstoreConnector();
GstoreConnector(string _ip, unsigned short _port);
GstoreConnector(unsigned short _port);
bool load(string _db_name);
bool unload(string _db_name);
bool build(string _db_name, string _rdf_file_path);
string query(string _sparql);
```
Notice:
1. When using GstoreConnector(), the default value for ip and port is 127.0.0.1 and 3305, respectively.
2. When using build(), the rdf_file_path(the second parameter) should be related to the position where gserver lies in.
3. Please remember to unload the database you have loaded, otherwise things may go wrong.(the errors may not be reported!)
#### Compile
You are advised to see gStore/api/socket/cpp/example/Makefile for instructions on how to compile your code with the C++ API. Generally, what you must do is compile your own code to object with header in the C++ API, and link the object with static lib in the C++ API.
Let us assume that your source code is placed in test.cpp, whose position is ${TEST}, while the gStore project position is ${GSTORE}/gStore.(if using devGstore as name instead of gStore, then the path is ${GSTORE}/devGstore) Please go to the ${TEST} directory first:
> Use `g++ -c -I${GSTORE}/gStore/api/socket/cpp/src/ test.cpp -o test.o` to compile your test.cpp into test.o, relative API header is placed in api/socket/cpp/src/.
> Use `g++ -o test test.o -L${GSTORE}/gStore/api/socket/cpp/lib/ -lgstoreconnector` to link your test.o with the libgstoreconnector.a(a static lib) in api/socket/cpplib/.
Then you can type `./test` to execute your own program, which uses our C++ API. It is also advised for you to place relative compile commands in a Makefile, as well as other commands if you like.
- - -
## Java API
#### Interface
To use the Java API, please place the phrase `import jgsc.GstoreConnector;` in your java code. Functions in GstoreConnector.java should be called like below:
```
// initialize IP address and port of the Gstore server.
GstoreConnector gc = new GstoreConnector("127.0.0.1", 3305);
// build a new database by a RDF file.
// note that the relative path is related to gserver.
gc.build("LUBM10.db", "example/LUBM_10.n3");
// then you can execute SPARQL query on this database.
String sparql = "select ?x where "
+ "{"
+ "?x <rdf:type> <ub:UndergraduateStudent>. "
+ "?y <ub:name> <Course1>. "
+ "?x <ub:takesCourse> ?y. "
+ "?z <ub:teacherOf> ?y. "
+ "?z <ub:name> <FullProfessor1>. "
+ "?z <ub:worksFor> ?w. "
+ "?w <ub:name> <Department0>. "
+ "}";
String answer = gc.query(sparql);
// unload this database.
gc.unload("LUBM10.db");
// also, you can load some exist database directly and then query.
gc.load("LUBM10.db");
// query a SPARQL in current database
answer = gc.query(sparql);
```
The original declaration of these functions are as below:
```
GstoreConnector();
GstoreConnector(string _ip, unsigned short _port);
GstoreConnector(unsigned short _port);
bool load(string _db_name);
bool unload(string _db_name);
bool build(string _db_name, string _rdf_file_path);
string query(string _sparql);
```
Notice:
1. When using GstoreConnector(), the default value for ip and port is 127.0.0.1 and 3305, respectively.
2. When using build(), the rdf_file_path(the second parameter) should be related to the position where gserver lies in.
3. Please remember to unload the database you have loaded, otherwise things may go wrong.(the errors may not be reported!)
#### Compile
You are advised to see gStore/api/socket/java/example/Makefile for instructions on how to compile your code with the Java API. Generally, what you must do is compile your own code to object with jar file in the Java API.
Let us assume that your source code is placed in test.java, whose position is ${TEST}, while the gStore project position is ${GSTORE}/gStore.(if using devGstore as name instead of gStore, then the path is ${GSTORE}/devGstore) Please go to the ${TEST} directory first:
> Use `javac -cp ${GSTORE}/gStore/api/socket/java/lib/GstoreJavaAPI.jar test.java` to compile your test.java into test.class with the GstoreJavaAPI.jar(a jar package used in Java) in api/java/lib/.
Then you can type `java -cp ${GSTORE}/gStore/api/socket/java/lib/GstoreJavaAPI.jar:. test` to execute your own program(notice that the ":." in command cannot be neglected), which uses our Java API. It is also advised for you to place relative compile commands in a Makefile, as well as other commands if you like.
- - -
## PHP API
#### Interface
To use the PHP API, please place the phrase `include('GstoreConnector,php');` in your php code. Functions in
GstoreConnector.php should be called like below:
```
// initialize IP address and port of the Gstore server.
$gc = new Connector("127.0.0.1", 3305);
// build a new database by a RDF file.
// note that the relative path is related to gserver.
$gc->build("LUBM10", "example/LUBM_10.n3");
// then you can execute SPARQL query on this database.
$sparql = "select ?x where " + "{" +
"?x <rdf:type> <ub:UndergraduateStudent>. " +
"?y <ub:name> <Course1>. " +
"?x <ub:takesCourse> ?y. " +
"?z <ub:teacherOf> ?y. " +
"?z <ub:name> <FullProfessor1>. " +
"?z <ub:worksFor> ?w. " +
"?w <ub:name> <Department0>. " +
"}";
$answer = gc->query($sparql);
//unload this database.
$gc->unload("LUBM10");
//also, you can load some exist database directly and then query.
$gc->load("LUBM10");// query a SPARQL in current database
$answer = gc->query(sparql);
```
The original declaration of these functions are as below:
```
class Connector {
public function __construct($host, $port);
public function send($data);
public function recv();
public function build($db_name, $rdf_file_path);
public function load($db_name);
public function unload($db_name);
public function query($sparql);
public function __destruct();
}
```
Notice:
1. When using Connector(), the default value for ip and port is 127.0.0.1 and 3305, respectively.
2. When using build(), the rdf_file_path(the second parameter) should be related to the position where gserver lies in.
3. Please remember to unload the database you have loaded, otherwise things may go wrong.(the errors may not be reported!)
#### Run
You can see gStore/api/socket/php/PHPAPIExample for instructions on how to use PHP API. PHP script does not need compiling. You can run PHP file directly or use it in your web project.
- - -
## Python API
#### Interface
To use the Python API, please place the phrase `from GstoreConnector import GstoreConnector` in your python code. Functions in GstoreConnector.py should be called like below:
```
// initialize IP address and port of the Gstore server.
gc = GstoreConnector('127.0.0.1', 3305)
// build a new database by a RDF file.
// note that the relative path is related to gserver.
gc.build('LUBM10', 'data/LUBM_10.n3')
// then you can execute SPARQL query on this database.
$sparql = "select ?x where " + "{" +
"?x <rdf:type> <ub:UndergraduateStudent>. " +
"?y <ub:name> <Course1>. " +
"?x <ub:takesCourse> ?y. " +
"?z <ub:teacherOf> ?y. " +
"?z <ub:name> <FullProfessor1>. " +
"?z <ub:worksFor> ?w. " +
"?w <ub:name> <Department0>. " +
"}";
answer = gc.query(sparql)
//unload this database.
gc.unload('LUBM10')
//also, you can load some exist database directly and then query.
gc.load('LUBM10')// query a SPARQL in current database
answer = gc.query(sparql)
```
The original declaration of these functions are as below:
```
class GstoreConnector {
def _connect(self)
def _disconnect(self)
def _send(self, msg):
def _recv(self)
def _pack(self, msg):
def _communicate(f):
def __init__(self, ip='127.0.0.1', port=3305):
@_communicate
def test(self)
@_communicate
def load(self, db_name)
@_communicate
def unload(self, db_name)
@_communicate
def build(self, db_name, rdf_file_path)
@_communicate
def drop(self, db_name)
@_communicate
def stop(self)
@_communicate
def query(self, sparql)
@_communicate
def show(self, _type=False)
}
```
Notice:
1. When using GstoreConnector(), the default value for ip and port is 127.0.0.1 and 3305, respectively.
2. When using build(), the rdf_file_path(the second parameter) should be related to the position where gserver lies in.
3. Please remember to unload the database you have loaded, otherwise things may go wrong.(the errors may not be reported!)
#### Run
You are advised to see gStore/api/socket/python/example/PythonAPIExample for examples on how to use python API. Python file does not need compiling, and you can run it directly.

23
docs/BUGS.md Normal file
View File

@ -0,0 +1,23 @@
**This file maintains details of the bugs not solved currently.**
---
#### BUG_StringIndex_pread
StringIndex::randomAcces()
StringIndex::trySequenceAccess()
when we insert a triple via ghttp, and query this triple immediately, we will find that answer is wrong.
when we run this query for several times, each time we will get a different answer.
Sometimes, we will get messy code.
With the same reason, if we use bin/gquery db to enter the gquery console, insert and query within this console, we will get similar errors.
Amazingly, if we quit the console and restart, run this query again, we will get the correct answer!
The problem appears after we replace fread in StringIndex with pread, to support conncurrent queries.
The inherent reason have not been found now.
As a result, we change it back to fread, and use a lock for the StringIndex to block concurrent reads.
This is not supposed to cause a great loss in performance, because all operations to a single disk will be executed sequentially by the disk controller.
---

View File

@ -1,3 +1,26 @@
## Sep 20, 2018
Zongyue, Qin fixes the remaining bugs in Trie, ISArray and IVArray.
In addition, he improves the performance of parallism by providing fine-grained locks to key-value indices instead of locking the whole KVstore.
In order to support applications in Wuhan University, Li, Zeng fixes many existing bugs in parallism, 'ghttp', indices, caches(string buffers are not used now) and APIs.
What is more, setup scripts are added for several wellknown Linux distributions(CentOS, Archlinux, Ubuntu) to ease the burden of installing softwares and setting system variables.
As for documents, new figures of architecture and query processing are added by Li, Zeng and Yuyan, Chen.
This will help other developers on Github to view our code and modify it as they wish.
---
## Jun 6, 2018
Xunbin, Su designs a thread pool for 'ghttp' based on the SimpleWeb framework of Boost Library.
In addition, Xunbin, Sun and imbajin(a Github user) add a docker image(only works for Docker CE 17.06.1) to gStore, which can ease the usage of this database system.
Meanwhile, Zongyue, Qin fixes several bugs in the new indices and designs caches for them.
Chaofan, Yang adds and improves APIs(Application Program Interface) of several programming languages to 'ghttp' interface.
---
## Apr 24, 2018
Multithreading is enabled by zengli in ghttp, to improve the performance of this HTTP web server.

View File

@ -24,6 +24,8 @@ libcurl-devel | needed to be installed
NOTICE:
**To help ease the burden of setting environments, several scripts are provided in [setup](../scripts/setup/) for different Linux distributions. Please select the setup script corresponding to your system and run it with root(or sudo) priviledge. (As for CentOS system, you need to install boost-devel by yourselves.)**
1. The name of some packages may be different in different platforms, just install the corresponding one in your own operation system.
2. To install readline and readline-devel, just type `dnf install readline-devel` in Redhat/CentOS/Fedora, or `apt-get install libreadline-dev` in Debian/Ubuntu. Please use corresponding commands in other systems. If you use ArchLinux, just type `pacman -S readline` to install the readline and readline-devel.(so do other packages)
@ -60,7 +62,7 @@ NOTICE:
# ./buildconf --force
# ./configure --help | grep pthreads
You have to see --enable-pthreads listed. If do not, clear the buidls with this commands:
You have to see --enable-pthreads listed. If do not, clear the builds with this commands:
# rm -rf aclocal.m4
# rm -rf autom4te.cache/

View File

@ -1,10 +1,15 @@
You are advised to read init.conf file, and modify it as you wish. (this file will configure the basic options of gStore system)
gStore is a green software, and you just need to compile it with one command. Please run
gStore is a green software, and you just need to compile it with two commands. Please run
`make`
```
sudo ./scripts/setup/setup_$(ARCH).sh
make
in the gStore root directory to compile the gStore code, link the ANTLR lib, and build executable "gbuild", "gquery", "ghttp", "gserver", "gclient", "gconsole". What is more, the api of gStore is also built now.
```
in the gStore home directory to compile the gStore code, link the ANTLR lib, and build executable "gbuild", "gquery", "ghttp", "gserver", "gclient", "gconsole".
(Please substitute the $(ARCH) with your system version, like setup_archlinux.sh, setup_centos.sh and setup_ubuntu.sh)
What is more, the api of gStore is also built now.
If you want to use API examples of gStore, please run `make APIexample` to compile example codes for both C++ API and Java API. For details of API, please visit [API](API.md) chapter.

View File

@ -1,16 +1,22 @@
## Improve The Core
- add numeric value query function. need to answer numeric range query efficiently and space consume cannot be too large
- speed up the join process and postprocessing of SPARQL using GPU or FPGA
- add a control module to heuristically select an kind of index for a SPARQL query to filter(not always vstree)
- improve the indices and support concurrent reads
- add numeric value query function. need to answer numeric range query efficiently and space consume cannot be too large
- typedef all frequently used types, to avoid inconsistence and high modify cost
- - -
## Better The Interface
## Better Interface
- write web interface for gStore, and a web page to operate on it, just like virtuoso
- the usability of ghttp(ERROR_CODE, API ...)
- improve socket interface
- docker settings
- - -
@ -18,8 +24,6 @@
- warnings remain in using Parser/(antlr)!(modify sparql.g 1.1 and regenerate). change name to avoid redefine problem(maybe error), or go to use executable to parse
- build compress module(such as key-value module and stream module), but the latter just needs one-pass read/write, which may causes the compress method to be used both in disk and memory. all operations of string in memory can be changed to operations after compress: provide compress/archive interface, compare function. there are many compress algorithms to be chosen, then how to choose? what about utf-8 encoding problem? this method can lower the consume of memory and disk, but consumes more CPU. However, the time is decided by isomorphism. Simple compress is not good, but too complicated method will consume too much time, how to balance? (merge the continuous same characters, Huffman tree)
- mmap to speedup KVstore?
- the strategy for Stream:is 85% valid? consider sampling, analyse the size of result set and decide strategy? how to support order by: sort in memory if not put in file; otherwise, partial sort in memory, then put into file, then proceed external sorting

View File

@ -1,5 +1,13 @@
**This chapter introduce the whole structure of the gStore system project.**
#### Figures
The whole architecture of gStore system is presented in [Architecture](png/系统架构图_en.png).
The thread model of 'ghttp' can be viewed in [EN](png/ghttp-thread.png) and [ZH](png/ghttp-线程.png), which shows the relationship among main process, sever thread, query thread and so on.
The flow of answering a SPARQL query is given in [SPARQL Processing](png/查询处理过程.png), and the subprocess, which only targets at the BGP(Basic Graph Pattern) processing, is drawed in [BGP Processing](png/BGP.png).
---
#### The core source codes are listed below:
- Database/ (calling other core parts to deal with requests from interface part)
@ -10,7 +18,11 @@
- Join.cpp (join the node candidates to get results)
- Join.h (class, members,, and functions definitions)
- Join.h (class, members and functions definitions)
- Strategy.cpp
- Strategy.h
- KVstore/ (a key-value store to swap between memory and disk)
@ -18,39 +30,133 @@
- KVstore.h
- heap/ (a heap of nodes whose content are in memory)
- Heap.cpp
- ISArray/
- Heap.h
- ISArray.cpp
- ISArray.h
- ISBlockManager.cpp
- ISBlockManager.h
- ISEntry.cpp
- ISEntry.h
- ISTree/
- ISTree.cpp
- ISTree.h
- heap/ (a heap of nodes whose content are in memory)
- ISHeap.cpp
- ISHeap.h
- node/ (all kinds of nodes in B+-tree)
- node/ (all kinds of nodes in B+-tree)
- Node.cpp (the base class of IntlNode and LeafNode)
- ISIntlNode.cpp
- Node.h
- ISIntlNode.h
- IntlNode.cpp (internal nodes in B+-tree)
- ISLeafNode.cpp
- IntlNode.h
- ISLeafNode.h
- LeafNode.cpp (leaf nodes in B+-tree)
- ISNode.cpp
- LeafNode.h
- ISNode.h
- storage/ (swap contents between memory and disk)
- storage/
- file.h
- ISStorage.cpp
- ISStorage.h
- IVArray/
- IVArray.cpp
- IVArray.h
- IVBlockManager.cpp
- IVBlockManager.h
- IVCacheManager.cpp
- IVCacheManger.h
- IVEntry.cpp
- IVEntry.h
- IVTree/
- IVTree.cpp
- IVTree.h
- heap/ (a heap of nodes whose content are in memory)
- Storage.cpp
- IVHeap.cpp
- Storage.h
- IVHeap.h
- node/ (all kinds of nodes in B+-tree)
- tree/ (implement all tree operations and interfaces)
- IVIntlNode.cpp
- Tree.cpp
- IVIntlNode.h
- Tree.h
- IVLeafNode.cpp
- IVLeafNode.h
- IVNode.cpp
- IVNode.h
- storage/
- IVStorage.cpp
- IVStorage.h
- SITree/
- SITree.cpp
- SITree.h
- heap/ (a heap of nodes whose content are in memory)
- SIHeap.cpp
- SIHeap.h
- node/ (all kinds of nodes in B+-tree)
- SIIntlNode.cpp
- SIIntlNode.h
- SILeafNode.cpp
- SILeafNode.h
- SINode.cpp
- SINode.h
- storage/
- SIStorage.cpp
- SIStorage.h
- Query/ (needed to answer SPARQL query)
@ -62,6 +168,10 @@
- IDList.h
- ResultFilter.cpp
- ResultFilter.h
- ResultSet.cpp (keep the result set corresponding to a query)
- ResultSet.h
@ -74,6 +184,10 @@
- Varset.h
- QueryCache.cpp
- QueryCache.h
- QueryTree.cpp
- QueryTree.h
@ -82,6 +196,10 @@
- GeneralEvaluation.h
- TempResult.cpp
- TempResult.h
- RegexExpression.h
- Signature/ (assign signatures for nodes and edges, but not for literals)
@ -170,6 +288,12 @@
- BloomFilter.h
- ClassForVlistCache.h
- VList.cpp
- VList.h
- - -
#### The interface part is listed below:
@ -192,21 +316,19 @@
- Socket.h
- Main/ (a series of applications/main-program to operate on gStore)
- client_http.hpp
- gload.cpp (import a RDF dataset)
- server_http.hpp
- gquery.cpp (query a database)
- gserver.cpp (start up the gStore server)
- gclient.cpp (connect to a gStore server and interact)
- web/
-
- - -
#### More details
To acquire a deep understanding of gStore codes, please go to [Code Detail](pdf/代码目录及概览.pdf). See [use case](pdf/Gstore2.0_useCaseDoc.pdf) to understand the design of use cases, and see [OOA](pdf/OOA_class.pdf) and [OOD](pdf/OOD_class.pdf) for OOA design and OOD design, respectively.
To acquire a deep understanding of gStore codes, please go to [Code Detail](pdf/code_overview.pdf). See [use case](pdf/Gstore2.0_useCaseDoc.pdf) to understand the design of use cases, and see [OOA](pdf/OOA_class.pdf) and [OOD](pdf/OOD_class.pdf) for OOA design and OOD design, respectively.
If you want to know the sequence of a running gStore, please view the list below:

View File

@ -2,15 +2,10 @@
**All the commands of gStore should be used in the root directory of gStore like bin/gconsole, because executables are placed in bin/, and they may use some files whose paths are indicated in the code, not absolute paths. We will ensure that all paths are absolute later by asking users to give the absolute path in their own systems to really install/configure the gStore. However, you must do as we told now to avoid errors.**
#### 0. gconsole
gconsole is the main console of gStore, which integrates with all functions to operate on gStore, as well as some system commands. Completion of commands name, line editing features and access to the history list are all provided. Feel free to try it, and you may have a wonderful tour!(spaces or tabs at the beginning or end is ok, and no need to type any special characters as separators)
Just type `bin/gconsole` in the root directory of gStore to use this console, and you will find a `gstore>` prompt, which indicates that you are in native mode and can type in native commands now. There are another mode of this console, which is called remote mode. Just type `connect` in the native mode to enter the remote mode, and type `disconnect` to exit to native mode.(the console connect to a gStore server whose ip is '127.0.0.1' and port is 3305, you can specify them by type `connect gStore_server_ip gStore_server_port`)
You can use `help` or `?` either in native mode or remote mode to see the help information, or you can type `help command_name` or `? command_name` to see the information of a given command. Notice that there are some differences between the commands in native mode and commands in remote mode. For example, system commands like `ls`, `cd` and `pwd` are provided in native mode, but not in remote mode. Also take care that not all commands contained in the help page are totally achieved, and we may change some functions of the console in the future.
What we have done is enough to bring you much convenience to use gStore, just enjoy it!
#### 0. Format of data
The RDF data should be given in N-Triple format(XML is not supported by now) and queries must be given in SPARQL 1.1 syntax.
Not all syntax in SPARQL 1.1 are parsered and answered in gStore, for example, property path is beyond the ability of gStore system.
Tabs, '<' and '>' are not allowed to appear in entity, literal or predicates of the data and queries.
- - -
@ -23,7 +18,6 @@ gbuild is used to build a new database from a RDF triple format file.
For example, we build a database from lubm.nt which can be found in example folder.
[bookug@localhost gStore]$ bin/gbuild lubm ./data/lubm/lubm.nt
2017年11月23日 星期四 20时58分21秒 -0.484698 seconds
gbuild...
argc: 3 DB_store:lubm RDF_data: ./data/lubm/lubm.nt
begin encode RDF from : ./data/lubm/lubm.nt ...
@ -95,7 +89,7 @@ Notice:
#### 3. ghttp
ghttp runs gStore like HTTP server with port 9000. Visit from browser with prescriptive url, then gStore will execute corresponding operation.
ghttp runs gStore like HTTP server with port 9000(You need to open this port in your environment, `iptables` tool is suggested). Visit from browser with prescriptive url, then gStore will execute corresponding operation.
type:
@ -136,6 +130,7 @@ answer = gc.user("delete_query", "root", "123456", "Jack", "lubm");
//delete user(with username: Jack, password: 2)
answer = gc.user("delete_user", "root", "123456", "Jack", "2");
```
```
db_name: the name of database, like lubm
format: html, json, txt, csv
@ -146,11 +141,21 @@ type: the type of operation that you execute on user, like: add_user, delete_use
username: the username of the user that execute the operation
password: the password of the user that execute the operation
```
`ghttp` support concurrent read-only queries, but when queries containing updates come, the whole database will be locked.
The number of concurrent running queries is suggest to be lower than 300 on a machine with dozens of kernel threads, though we can run 13000 queries concurrently in our experiments.
To use the concurrency feature, you had better modify the system settings of 'open files' and 'maximum processes' to 65535 or larger.
Three scripts are placed in [setup](../scripts/setup/) to help you modify the settings in different Linux distributions.
**If queries containing updates are sent via `ghttp`, a `checkpoint` command must be sent and done by the `ghttp` console before we shutdown the database server. Otherwise, the updates may not be synchronize to disk and will be lost if the `ghttp` server is stopped.**
- - -
#### 4. gserver
**This is not maintained now.**
gserver is a daemon. It should be launched first when accessing gStore by gclient or API. It communicates with client through socket.
[bookug@localhost gStore]$ bin/gserver -s
@ -172,13 +177,15 @@ Notice: Multiple threads are not supported by gserver. If you start up gclient i
#### 5. gclient
**This is not maintained now.**
gclient is designed as a client to send commands and receive feedbacks.
[bookug@localhost gStore]$ bin/gclient
ip=127.0.0.1 port=3305
gsql>
You can also assign gserver's ip and port.
You can also assign the ip and port of gserver.
[bookug@localhost gStore]$ bin/gclient 172.31.19.15 3307
ip=172.31.19.15 port=3307
@ -208,35 +215,17 @@ Notice:
- - -
#### 6. gconsole
**This is not maintained now.**
#### 6. test utilities
gconsole is the main console of gStore, which integrates with all functions to operate on gStore, as well as some system commands. Completion of commands name, line editing features and access to the history list are all provided. Feel free to try it, and you may have a wonderful tour!(spaces or tabs at the beginning or end is ok, and no need to type any special characters as separators)
A series of test program are placed in the test/ folder, and we will introduce the two useful ones: gtest.cpp and full_test.sh
Just type `bin/gconsole` in the root directory of gStore to use this console, and you will find a `gstore>` prompt, which indicates that you are in native mode and can type in native commands now. There are another mode of this console, which is called remote mode. Just type `connect` in the native mode to enter the remote mode, and type `disconnect` to exit to native mode.(the console connect to a gStore server whose ip is '127.0.0.1' and port is 3305, you can specify them by type `connect gStore_server_ip gStore_server_port`)
**gtest is used to test gStore with multiple datasets and queries.**
You can use `help` or `?` either in native mode or remote mode to see the help information, or you can type `help command_name` or `? command_name` to see the information of a given command. Notice that there are some differences between the commands in native mode and commands in remote mode. For example, system commands like `ls`, `cd` and `pwd` are provided in native mode, but not in remote mode. Also take care that not all commands contained in the help page are totally achieved, and we may change some functions of the console in the future.
To use gtest utility, please type `make gtest` to compile the gtest program first. Program gtest is a test tool to generate structural logs for datasets. Please type `./gtest --help` in the working directory for details.
**Please change paths in the test/gtest.cpp if needed.**
You should place the datasets and queries in this way:
DIR/WatDiv/database/*.nt
DIR/WatDiv/query/*.sql
Notice that DIR is the root directory where you place all datasets waiting to be used by gtest. And WatDiv is a class of datasets, as well as lubm. Inside WatDiv(or lubm, etc. please place all datasets(named with .nt) in a database/ folder, and place all queries(corresponding to datasets, named with .sql) in a query folder.
Then you can run the gtest program with specified parameters, and the output will be sorted into three logs in gStore root directory: load.log/(for database loading time and size), time.log/(for query time) and result.log/(for all query results, not the entire output strings, but the information to record the selected two database systems matched or not).
All logs produced by this program are in TSV format(separated with '\t'), you can load them into Calc/Excel/Gnumeric directly. Notice that time unit is ms, and space unit is kb.
**full_test.sh is used to compare the performance of gStore and other database systems on multiple datasets and queries.**
To use full_test.sh utility, please download the database system which you want to tats and compare, and set the exact position of database systems and datasets in this script. The name strategy should be the same as the requirements of gtest, as well as the logs strategy.
Only gStore and Jena are tested and compared in this script, but it is easy to add other database systems, if you would like to spend some time on reading this script. You may go to [test report](pdf/gstore测试报告.pdf) or [Frequently Asked Questions](FAQ.md) for help if you encounter a problem.
What we have done is enough to bring you much convenience to use gStore, just enjoy it!
- - -
@ -301,3 +290,33 @@ After starting ghttp, type `bin/gshow ip port` to check loaded database.
Content-Length--->[4]
database: lubm
---
#### 11. test utilities
A series of test program are placed in the scripts/ folder, and we will introduce the two useful ones: gtest.cpp and full_test.sh
**gtest is used to test gStore with multiple datasets and queries.**
To use gtest utility, please type `make gtest` to compile the gtest program first. Program gtest is a test tool to generate structural logs for datasets. Please type `./gtest --help` in the working directory for details.
**Please change paths in the test/gtest.cpp if needed.**
You should place the datasets and queries in this way:
DIR/WatDiv/database/*.nt
DIR/WatDiv/query/*.sql
Notice that DIR is the root directory where you place all datasets waiting to be used by gtest. And WatDiv is a class of datasets, as well as lubm. Inside WatDiv(or lubm, etc. please place all datasets(named with .nt) in a database/ folder, and place all queries(corresponding to datasets, named with .sql) in a query folder.
Then you can run the gtest program with specified parameters, and the output will be sorted into three logs in gStore root directory: load.log/(for database loading time and size), time.log/(for query time) and result.log/(for all query results, not the entire output strings, but the information to record the selected two database systems matched or not).
All logs produced by this program are in TSV format(separated with '\t'), you can load them into Calc/Excel/Gnumeric directly. Notice that time unit is ms, and space unit is kb.
**full_test.sh is used to compare the performance of gStore and other database systems on multiple datasets and queries.**
To use full_test.sh utility, please download the database system which you want to tats and compare, and set the exact position of database systems and datasets in this script. The name strategy should be the same as the requirements of gtest, as well as the logs strategy.
Only gStore and Jena are tested and compared in this script, but it is easy to add other database systems, if you would like to spend some time on reading this script. You may go to [test report](pdf/gstore测试报告.pdf) or [Frequently Asked Questions](FAQ.md) for help if you encounter a problem.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load Diff

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@ -1,364 +0,0 @@
% This file was created with JabRef 2.10.
% Encoding: UTF8
@String { cacm = {Commun. ACM} }
@String { cloud13 = {Proc. IEEE 6th Int. Conf. on Cloud Computing} }
@String { compsur = {ACM Comput. Surv.} }
@String { EDBT12 = {Proc. 15th Int. Conf. on Extending Database Technology} }
@String { ESWC08 = {Proc. 5th European Semantic Web Conf.} }
@String { ESWC14 = {Proc. 11th Extended Semantic Web Conf.} }
@String { ICDE11 = {Proc. 27th Int. Conf. on Data Engineering} }
@String { icde13 = {Proc. 29th Int. Conf. on Data Engineering} }
@String { icde14 = {Proc. 30th Int. Conf. on Data Engineering} }
@String { icdew13 = {Proc. Workshops of 29th Int. Conf. on Data Engineering} }
@String { ieeetkde = {IEEE Trans. Knowl. and Data Eng.} }
@String { iswc11 = {Proc. 10th Int. Semantic Web Conf.} }
@String { iswc14 = {Proc. 13th Int. Semantic Web Conf.} }
@String { jwebs = {J. Web Semantics} }
@String { pvldb = {Proc. VLDB Endowment} }
@String { sc95 = {Proc. ACM/IEEE Conf. on Supercomputing} }
@String { siamjc = {SIAM J. on Comput.} }
@String { sigmod = {Proc. ACM SIGMOD Int. Conf. on Management of Data} }
@String { ssdbm12 = {Proc. 24th Int. Conf. on Scientific and Statistical Database Management} }
@String { tods = {ACM Trans. Database Syst.} }
@String { vldb06 = {Proc. 32nd Int. Conf. on Very Large Data Bases} }
@String { vldbj = {VLDB J.} }
@String { www10 = {Proc. 19th Int. World Wide Web Conf.} }
@String { WWW10 = {Proc. 19th Int. World Wide Web Conf.} }
@String { www12 = {Proc. 21st Int. World Wide Web Conf.} }
@String { www12cv = {Proc. 21st Int. World Wide Web Conf. (Companion Volume)} }
@String { www14cv = {Proc. 23rd Int. World Wide Web Conf. (Companion Volume)} }
@InProceedings{DBLP:conf/semweb/BroekstraKH02,
Title = {Sesame: {A} Generic Architecture for Storing and Querying {RDF} and
{RDF} Schema},
Author = {Jeen Broekstra and
Arjohn Kampman and
Frank van Harmelen},
Booktitle = {The Semantic Web - {ISWC} 2002, First International Semantic Web Conference},
Year = {2002},
Bibsource = {dblp computer science bibliography, http://dblp.org},
Biburl = {http://dblp.uni-trier.de/rec/bib/conf/semweb/BroekstraKH02},
Crossref = {DBLP:conf/semweb/2002},
Doi = {10.1007/3-540-48005-6_7},
Timestamp = {Thu, 30 Jun 2011 15:59:43 +0200},
Url = {http://dx.doi.org/10.1007/3-540-48005-6_7}
}
@InProceedings{DBLP:conf/sigir/Deppisch86,
Title = {S-Tree: {A} Dynamic Balanced Signature Index for Office Retrieval},
Author = {Uwe Deppisch},
Booktitle = {SIGIR'86, Proceedings of the 9th Annual International {ACM} {SIGIR}
Conference on Research and Development in Information Retrieval},
Year = {1986},
Bibsource = {dblp computer science bibliography, http://dblp.org},
Biburl = {http://dblp.uni-trier.de/rec/bib/conf/sigir/Deppisch86},
Crossref = {DBLP:conf/sigir/86},
Doi = {10.1145/253168.253189},
Timestamp = {Thu, 13 Sep 2012 13:14:47 +0200},
Url = {http://doi.acm.org/10.1145/253168.253189}
}
@Article{DBLP:swstore,
Title = {{SW-Store:} a vertically partitioned {DBMS} for Semantic Web data management},
Author = {Daniel J. Abadi and Adam Marcus and Samuel Madden and Kate Hollenbach},
Journal = VLDBJ,
Year = {2009},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Date-modified = {2015-03-30 13:11:35 +0000},
Ee = {http://dx.doi.org/10.1007/s00778-008-0125-y}
}
@InProceedings{DBLP:WatDiv,
Title = {Diversified Stress Testing of {RDF} Data Management Systems},
Author = {G{\"{u}}nes Alu{\c{c}} and Olaf Hartig and M. Tamer {\"{O}}zsu and Khuzaima Daudjee},
Booktitle = ISWC14,
Year = {2014},
Bdsk-url-1 = {http://dx.doi.org/10.1007/978-3-319-11964-9_13},
Bibsource = {dblp computer science bibliography, http://dblp.org},
Biburl = {http://dblp.uni-trier.de/rec/bib/conf/semweb/AlucHOD14},
Date-modified = {2015-02-25 16:52:34 +0000},
Timestamp = {Fri, 10 Oct 2014 14:01:06 +0200}
}
@Article{CiteSeerX:SystemR,
Title = {{System R:} Relational Approach to Database Management},
Author = {M. M. Astrahan and Ht. W. Blasgen and D. D. Chamberlin and K. P. Eswaran and J. N. Gray and P. P. Griffiths and W. F. King and R. A. Lorie and J. W. Mehl and G. R. Putzolu and I. L. Traiger and B. W. Wade and V. Watson},
Journal = {ACM Transactions on Database Systems},
Year = {1976},
}
@InProceedings{DBLP:conf/sigmod/Atre15,
Title = {\emph{Left Bit Right}: {For SPARQL Join Queries with OPTIONAL Patterns
(Left-outer-joins)}},
Author = {Medha Atre},
Booktitle = SIGMOD,
Year = {2015},
Bibsource = {dblp computer science bibliography, http://dblp.org},
Biburl = {http://dblp.uni-trier.de/rec/bib/conf/sigmod/Atre15},
Doi = {10.1145/2723372.2746483},
Timestamp = {Thu, 28 May 2015 08:59:02 +0200},
Url = {http://doi.acm.org/10.1145/2723372.2746483}
}
@Article{Berners2001THE,
Title = {THE SEMANTIC WEB.},
Author = {Berners-Lee, Tim and Hendler, James and Lassila, Ora},
Journal = {Scientific American},
Year = {2001},
}
@Article{Berners2001The,
Title = {The Semantic Web: A New Form of Web Content That is Meaningful to Computers Will Unleash a Revolution of New Possibilities},
Author = {Berners-Lee, Tim and Hendler, James and Lassila, Ora and Berners-Lee, Tim},
Journal = {Scientific American},
Year = {2001},
}
@Book{cormen1990introduction,
Title = {Introduction to algorithms},
Author = {Cormen, Thomas H and Leiserson, Charles E and Rivest, Ronald L},
Publisher = {MIT press},
Year = {1990}
}
@InProceedings{Decker2003,
Title = {The Semantic Web: Semantics for Data on the Web},
Author = {Decker, Stefan and Kashyap, Vipul},
Booktitle = Proceedings # {2003} # VLDB # Conference,
Year = {2003},
}
@InProceedings{Decker2003The,
Title = {The semantic web: semantics for data on the web},
Author = {Decker, Stefan and Kashyap, Vipul},
Booktitle = {International Conference on Very Large Data Bases},
Year = {2003},
}
@Article{DBLP:journals/rsa/DyerG00,
Title = {The Complexity of Counting Graph Homomorphisms},
Author = {Martin E. Dyer and
Catherine S. Greenhill},
Journal = {Random Struct. Algorithms},
Year = {2000},
Bibsource = {dblp computer science bibliography, http://dblp.org},
Biburl = {http://dblp.uni-trier.de/rec/bib/journals/rsa/DyerG00},
Timestamp = {Sat, 14 Apr 2012 16:35:22 +0200},
Url = {http://onlinelibrary.wiley.com/doi/10.1002/1098-2418(200010/12)17:3/4<260::AID-RSA5>3.0.CO;2-W/abstract}
}
@InProceedings{DBLP:conf/icde/FenderM11,
Title = {A New, Highly Efficient, and Easy to Implement Top-down Join Enumeration
Algorithm},
Author = {Pit Fender and
Guido Moerkotte},
Booktitle = ICDE11,
Year = {2011},
Bibsource = {dblp computer science bibliography, http://dblp.org},
Biburl = {http://dblp.uni-trier.de/rec/bib/conf/icde/FenderM11},
Doi = {10.1109/ICDE.2011.5767901},
Timestamp = {Sat, 09 Aug 2014 14:37:16 +0200},
Url = {http://dx.doi.org/10.1109/ICDE.2011.5767901}
}
@InProceedings{Vijay2016Querying,
Title = {Querying RDF Data Using A Multigraph-based Approach},
Author = {Vijay Ingalalli and Dino Ienco and Pascal Poncelet and Serena Villata},
Booktitle = {EDBT},
Year = {2016}
}
@Article{Klyne2004Resource,
Title = {Resource Description Framework (RDF): Concepts and Abstract Syntax},
Author = {Klyne and Graham and Carroll and Jeremy, J and McBride and Brian},
Journal = {World Wide Web Consortium Recommendation},
Year = {2004}
}
@Article{Kopecky2007Semantic,
Title = {Semantic Annotations for WSDL and XML Schema},
Author = {Kopecky, J. and Vitvar, T. and Bournez, C. and Farrell, J.},
Journal = {IEEE Internet Computing},
Year = {2007},
}
@Article{Martens2006Expressiveness,
Title = {Expressiveness and complexity of XML Schema},
Author = {Martens, Wim and Neven, Frank and Schwentick, Thomas and Bex, Geert Jan},
Journal = {Acm Transactions on Database Systems},
Year = {2006},
}
@Article{DBLP:rdf3x,
Title = {{RDF-3X}: a {RISC}-style engine for {RDF}},
Author = {Thomas Neumann and Gerhard Weikum},
Journal = PVLDB,
Year = {2008},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Date-added = {2015-03-30 01:16:48 +0000},
Date-modified = {2015-03-30 01:16:48 +0000},
Ee = {http://www.vldb.org/pvldb/1/1453927.pdf}
}
@Article{DBLP:journals/tods/PerezAG09,
Title = {Semantics and complexity of {SPARQL}},
Author = {Jorge P{\'{e}}rez and Marcelo Arenas and Claudio Gutierrez},
Journal = TODS,
Year = {2009},
Date-added = {2015-03-30 01:16:48 +0000},
Date-modified = {2015-03-30 01:16:48 +0000}
}
@Article{DBLP:journals/vldb/PengZO0Z16,
Title = {Processing {SPARQL} queries over distributed {RDF} graphs},
Author = {Peng Peng and
Lei Zou and
M. Tamer {\"{O}}zsu and
Lei Chen and
Dongyan Zhao},
Journal = {{VLDB} J.},
Year = {2016},
Bibsource = {dblp computer science bibliography, http://dblp.org},
Biburl = {http://dblp.uni-trier.de/rec/bib/journals/vldb/PengZO0Z16},
Doi = {10.1007/s00778-015-0415-0},
Timestamp = {Mon, 21 Mar 2016 17:08:54 +0100},
Url = {http://dx.doi.org/10.1007/s00778-015-0415-0}
}
@Article{Prud2008SPARQL,
Title = {SPARQL Query Language for RDF},
Author = {Prud'Hommeaux, Eric and Seaborne, Andy},
Journal = {W3C Recommendation},
Year = {2008}
}
@InProceedings{DBLP:FedBench,
Title = {{FedBench: A} Benchmark Suite for Federated Semantic Data Query Processing},
Author = {Michael Schmidt and
Olaf G{\"{o}}rlitz and
Peter Haase and
G{\"{u}}nter Ladwig and
Andreas Schwarte and
Thanh Tran},
Booktitle = ISWC11,
Year = {2011},
Bibsource = {dblp computer science bibliography, http://dblp.org},
Biburl = {http://dblp.uni-trier.de/rec/bib/conf/semweb/SchmidtGHLST11},
Doi = {10.1007/978-3-642-25073-6_37},
Timestamp = {Wed, 19 Oct 2011 14:01:40 +0200},
Url = {http://dx.doi.org/10.1007/978-3-642-25073-6_37}
}
@InProceedings{DBLP:FedX,
Title = {{FedX}: Optimization Techniques for Federated Query Processing on Linked
Data},
Author = {Andreas Schwarte and
Peter Haase and
Katja Hose and
Ralf Schenkel and
Michael Schmidt},
Booktitle = ISWC11,
Year = {2011},
Bibsource = {dblp computer science bibliography, http://dblp.org},
Biburl = {http://dblp.uni-trier.de/rec/bib/conf/semweb/SchwarteHHSS11},
Doi = {10.1007/978-3-642-25073-6_38},
Timestamp = {Wed, 19 Oct 2011 14:01:40 +0200},
Url = {http://dx.doi.org/10.1007/978-3-642-25073-6_38}
}
@Article{VLDB08:QuickSI,
Title = {Taming verification hardness: an efficient algorithm for testing subgraph isomorphism},
Author = {Haichuan Shang and Ying Zhang and Xuemin Lin and Jeffrey Xu Yu},
Journal = PVLDB,
Year = {2008},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Date-added = {2015-03-30 01:16:48 +0000},
Date-modified = {2015-03-30 01:16:48 +0000},
Ee = {http://www.vldb.org/pvldb/1/1453899.pdf}
}
@InProceedings{SIGMOD13:Trinity,
Title = {Trinity: a distributed graph engine on a memory cloud},
Author = {Bin Shao and Haixun Wang and Yatao Li},
Booktitle = SIGMOD,
Year = {2013},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Date-added = {2015-03-30 01:16:48 +0000},
Date-modified = {2015-03-30 01:16:48 +0000},
Ee = {http://doi.acm.org/10.1145/2463676.2467799}
}
@Article{Swick1999Resource,
Title = {Resource Description Framework (RDF) Model and Syntax Specification},
Author = {Swick, Ralph R.},
Journal = {World Wide Web Consortium Recommendation},
Year = {1999}
}
@Article{DBLP:journals/pvldb/ZengYWSW13,
Title = {A Distributed Graph Engine for Web Scale {RDF} Data},
Author = {Kai Zeng and
Jiacheng Yang and
Haixun Wang and
Bin Shao and
Zhongyuan Wang},
Journal = {{PVLDB}},
Year = {2013},
Bibsource = {dblp computer science bibliography, http://dblp.org},
Biburl = {http://dblp.uni-trier.de/rec/bib/journals/pvldb/ZengYWSW13},
Timestamp = {Wed, 04 Sep 2013 08:33:42 +0200},
Url = {http://www.vldb.org/pvldb/vol6/p265-zeng.pdf}
}
@Article{VLDB13:Trinity,
Title = {A Distributed Graph Engine for Web Scale {RDF} Data},
Author = {Kai Zeng and Jiacheng Yang and Haixun Wang and Bin Shao and Zhongyuan Wang},
Journal = PVLDB,
Year = {2013},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Date-added = {2015-03-30 01:16:48 +0000},
Date-modified = {2015-03-30 01:16:48 +0000},
Ee = {http://www.vldb.org/pvldb/vol6/p265-zeng.pdf}
}
@Article{Zou:2013fk,
Title = {{gStore}: A Graph-based {SPARQL} Query Engine},
Author = {Zou, Lei and {\"O}zsu, M. Tamer and Chen, Lei and Shen, Xuchuan and Huang, Ruizhe and Zhao, Dongyan},
Journal = VLDBJ,
Year = {2014},
Date-added = {2015-03-30 01:16:48 +0000},
Date-modified = {2015-03-30 01:16:48 +0000},
Keywords = {RDF, Graph Database}
}
@Article{VLDB11:gStore,
Title = {Answering pattern match queries in large graph databases via graph embedding},
Author = {Lei Zou and M. Tamer {\"O}zsu and Lei Chen and Xuchuan Shen and Ruizhe Huang and Dongyan Zhao},
Journal = VLDBJ,
Year = {2013},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Date-added = {2015-03-30 01:16:48 +0000},
Date-modified = {2015-03-30 01:16:48 +0000},
Ee = {http://dx.doi.org/10.1007/s00778-011-0238-6}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

BIN
docs/jpg/A11-loadRDF.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

BIN
docs/jpg/A12-insertRDF.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

BIN
docs/jpg/A13-deleteRDF.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

BIN
docs/jpg/bsbm_10000.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 94 KiB

BIN
docs/jpg/bsbm_100000.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 82 KiB

BIN
docs/jpg/dbpedia2014.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

BIN
docs/jpg/lubm_5000.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 83 KiB

BIN
docs/jpg/watdiv_200.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 92 KiB

BIN
docs/jpg/watdiv_300M.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 80 KiB

BIN
docs/logo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 186 KiB

BIN
docs/png/BGP.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

BIN
docs/visio/BGP.vsdx Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More