diff --git a/Database/Database.cpp b/Database/Database.cpp index 9bc4380..d80103f 100644 --- a/Database/Database.cpp +++ b/Database/Database.cpp @@ -2,7 +2,7 @@ # Filename: Database.cpp # Author: Bookug Lobert # Mail: 1181955272@qq.com -# Last Modified: 2016-09-11 15:27 +# Last Modified: 2018-09-05 14:20 # Description: originally written by liyouhuan, modified by zengli and chenjiaqi =============================================================================*/ @@ -576,13 +576,15 @@ Database::setPreMap() void Database::setStringBuffer() { - //TODO: assign according to memory manager + //BETTER: assign according to memory manager //BETTER?maybe different size for entity and literal, maybe different offset should be used this->entity_buffer_size = (this->limitID_entity<50000000) ? this->limitID_entity : 50000000; this->literal_buffer_size = (this->limitID_literal<50000000) ? this->limitID_literal : 50000000; this->entity_buffer = new Buffer(this->entity_buffer_size); this->literal_buffer = new Buffer(this->literal_buffer_size); + //DEBUG: insert/delete we should update the size of buffer if adding new string + //WARN: after delete and insert, IDs may be not continuous, then the string buffer will cause errors! TYPE_ENTITY_LITERAL_ID valid = 0, i; string str; for (i = 0; i < this->entity_buffer_size; ++i) @@ -700,13 +702,15 @@ Database::load() id2literal_thread.join(); #endif - //TODO+BETTER: if we set string buffer using string index instead of B+Tree, then we can + //BETTER: if we set string buffer using string index instead of B+Tree, then we can //avoid to load id2entity and id2literal in ONLY_READ mode //generate the string buffer for entity and literal, no need for predicate //NOTICE:the total string size should not exceed 20G, assume that most strings length < 500 //too many empty between entity and literal, so divide them - this->setStringBuffer(); + + //this->setStringBuffer(); + //NOTICE: we should build string buffer from kvstore, not string index //Because when searching in string index, it will first check if in buffer(but the buffer is being built) @@ -726,7 +730,7 @@ Database::load() //vstree_thread.join(); #endif //load cache of sub2values and obj2values - //this->load_cache(); + this->load_cache(); //warm up always as finishing build(), to utilize the system buffer //this->warmUp(); @@ -748,7 +752,7 @@ Database::load() cout << "finish load" << endl; - //TODO: for only-read application(like endpoint), 3 id2values trees can be closed now + //BETTER: for only-read application(like endpoint), 3 id2values trees can be closed now //and we should load all trees on only READ mode //HELP: just for checking infos(like kvstore) @@ -767,7 +771,7 @@ Database::load_cache() { // get important pre ID // a pre whose degree is more than 50% of max pre degree is important pre -/* cout << "get important pre ID" << endl; + cout << "get important pre ID" << endl; this->get_important_preID(); cout << "total preID num is " << pre_num << endl; cout << "important pre ID is: "; @@ -776,7 +780,7 @@ Database::load_cache() cout << endl; this->load_candidate_pre2values(); this->load_important_sub2values(); - this->load_important_obj2values();*/ + this->load_important_obj2values(); } void @@ -843,7 +847,7 @@ Database::get_candidate_preID() _size = this->kvstore->getPreListSize(i); - if (!VList::isLongList(_size)) continue; // only long list need to be stored in cache + if (!VList::isLongList(_size) || _size >= max_total_size) continue; // only long list need to be stored in cache _value = pre2num[i]; if (_value == 0) continue; @@ -888,56 +892,34 @@ Database::get_candidate_preID() void Database::build_CacheOfPre2values() { -/* cout << "now add cache of preID2values..." << endl; - priority_queue , CmpByMod<2000> > temp_queue; + cout << "now add cache of preID2values..." << endl; while (!candidate_preID.empty()) { - temp_queue.push(candidate_preID.top()); + this->kvstore->AddIntoPreCache(candidate_preID.top().key); candidate_preID.pop(); } - while (!temp_queue.empty()) - { - //cout << "add key " << important_objID.top().key << " size: " << important_objID.top().size << endl; - this->kvstore->AddIntoPreCache(temp_queue.top().key); - temp_queue.pop(); - }*/ } void Database::build_CacheOfObj2values() { -/* cout << "now add cache of objID2values..." << endl; - // sort key according to their mod by 2000 - priority_queue , CmpByMod<2000> > temp_queue; + cout << "now add cache of objID2values..." << endl; while (!important_objID.empty()) { - temp_queue.push(important_objID.top()); + this->kvstore->AddIntoObjCache(important_objID.top().key); important_objID.pop(); } - while (!temp_queue.empty()) - { - //cout << "add key " << important_objID.top().key << " size: " << important_objID.top().size << endl; - this->kvstore->AddIntoObjCache(temp_queue.top().key); - temp_queue.pop(); - }*/ } void Database::build_CacheOfSub2values() { -/* cout << "now add cache of subID2values..." << endl; - priority_queue , CmpByMod<2000> > temp_queue; + cout << "now add cache of subID2values..." << endl; while (!important_subID.empty()) { - temp_queue.push(important_subID.top()); + this->kvstore->AddIntoSubCache(important_subID.top().key); important_subID.pop(); } - while (!temp_queue.empty()) - { - //cout << "add key " << important_objID.top().key << " size: " << important_objID.top().size << endl; - this->kvstore->AddIntoSubCache(temp_queue.top().key); - temp_queue.pop(); - }*/ } void @@ -956,7 +938,7 @@ Database::get_important_subID() unsigned _size = 0; if (this->kvstore->getEntityByID(i) == invalid) continue; _size = this->kvstore->getSubListSize(i); - if (!VList::isLongList(_size)) continue; // only long list need to be stored in cache + if (!VList::isLongList(_size) || _size >= max_total_size) continue; // only long list need to be stored in cache for(unsigned j = 0; j < important_preID.size(); ++j) { @@ -1019,7 +1001,7 @@ Database::get_important_objID() if (_tmp == invalid) continue; _size = this->kvstore->getObjListSize(i); - if (!VList::isLongList(_size)) continue; // only long list need to be stored in cache + if (!VList::isLongList(_size) || _size >= max_total_size) continue; // only long list need to be stored in cache for(unsigned j = 0; j < important_preID.size(); ++j) { @@ -1247,7 +1229,6 @@ Database::unload() delete this->literal_buffer; this->literal_buffer = NULL; - //TODO: fflush the database file //this->vstree->saveTree(); //delete this->vstree; //this->vstree = NULL; @@ -1283,10 +1264,7 @@ bool Database::save() this->saveDBInfoFile(); this->saveIDinfo(); - //TODO: fsync or using sync in Util - //should sync every file modified - //TODO: add flush for string index - //this->stringindex->flush(); + this->stringindex->flush(); this->clear_update_log(); cerr<<"database checkpoint: "<getName()<pre_num; } +VSTree* +Database::getVSTree() +{ + return this->vstree; +} + +KVstore* +Database::getKVstore() +{ + return this->kvstore; +} + +StringIndex* +Database::getStringIndex() +{ + return this->stringindex; +} + +QueryCache* +Database::getQueryCache() +{ + return this->query_cache; +} + +TYPE_TRIPLE_NUM* +Database::getpre2num() +{ + return this->pre2num; +} + +TYPE_ENTITY_LITERAL_ID& +Database::getlimitID_literal() +{ + return this->limitID_literal; +} + +TYPE_ENTITY_LITERAL_ID& +Database::getlimitID_entity() +{ + return this->limitID_entity; +} + +TYPE_PREDICATE_ID& +Database::getlimitID_predicate() +{ + return this->limitID_predicate; +} + +mutex& +Database::get_query_parse_lock() +{ + return this->query_parse_lock; +} + int Database::query(const string _query, ResultSet& _result_set, FILE* _fp) { @@ -1381,23 +1413,24 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp) { return -101; } - cout<<"read lock acquired"<stringindex; - tmpsi.emptyBuffer(); - general_evaluation.setStringIndexPointer(&tmpsi); + //StringIndex tmpsi = *this->stringindex; + //tmpsi.emptyBuffer(); + //general_evaluation.setStringIndexPointer(&tmpsi); - //TODO: withdraw this lock, and allow for multiple doQuery() to run in parallism - //we need to add lock in QueryCache's operations - this->debug_lock.lock(); + // this->debug_lock.lock(); bool query_ret = general_evaluation.doQuery(); if(!query_ret) { success_num = -101; } - this->debug_lock.unlock(); + // this->debug_lock.unlock(); + long tv_bfget = Util::get_cur_time(); + //NOTICE: this lock lock ensures that StringIndex is visited sequentially + this->getFinalResult_lock.lock(); if (trie == NULL) { trie = new Trie; @@ -1408,9 +1441,8 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp) } trie->LoadDictionary(); } - - long tv_bfget = Util::get_cur_time(); general_evaluation.getFinalResult(_result_set); + this->getFinalResult_lock.unlock(); long tv_afget = Util::get_cur_time(); cout << "after getFinalResult, used " << (tv_afget - tv_bfget) << "ms." << endl; @@ -1418,7 +1450,7 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp) need_output_answer = true; //general_evaluation.setNeedOutputAnswer(); - tmpsi.clear(); + //tmpsi.clear(); pthread_rwlock_unlock(&(this->update_lock)); } //Update @@ -1434,6 +1466,7 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp) cout<<"unable to write lock"<Compress(update_triple[i], Trie::QUERYMODE); + } success_num = remove(update_triple, update_triple_num); } if (general_evaluation.getQueryTree().getUpdateType() == QueryTree::Insert_Clause || general_evaluation.getQueryTree().getUpdateType() == QueryTree::Modify_Clause) { general_evaluation.prepareUpdateTriple(general_evaluation.getQueryTree().getInsertPatterns(), update_triple, update_triple_num); + for(int i = 0; i < update_triple_num; i++) + { + update_triple[i] = trie->Compress(update_triple[i], Trie::QUERYMODE); + } success_num = insert(update_triple, update_triple_num); } } @@ -1511,8 +1552,12 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp) general_evaluation.releaseResult(); delete[] update_triple; - this->query_cache->clear(); - cout<<"QueryCache cleared"< 0) + { + this->query_cache->clear(); + cout<<"QueryCache cleared"<update_lock)); } @@ -1626,6 +1671,12 @@ Database::build(const string& _rdf_file) cout << "after build, used " << (tv_build_end - tv_build_begin) << "ms." << endl; cout << "finish build VS-Tree." << endl; + cout << "finish sub2id pre2id obj2id" << endl; + cout << "tripleNum is " << this->triples_num << endl; + cout << "entityNum is " << this->entity_num << endl; + cout << "preNum is " << this->pre_num << endl; + cout << "literalNum is " << this->literal_num << endl; + //this->vstree->saveTree(); //delete this->vstree; //this->vstree = NULL; @@ -2076,6 +2127,19 @@ Database::build_s2xx(ID_TUPLE* _p_id_tuples) __gnu_parallel::sort(_p_id_tuples, _p_id_tuples + this->triples_num, Util::spo_cmp_idtuple); #endif //qsort(_p_id_tuples, this->triples_num, sizeof(int*), Util::_spo_cmp); + + //remove duplicates from the id tables + int j = 1; + for(int i = 1; i < this->triples_num; ++i) + { + if(!Util::equal(_p_id_tuples[i], _p_id_tuples[i-1])) + { + _p_id_tuples[j] = _p_id_tuples[i]; + ++j; + } + } + this->triples_num = j; + this->kvstore->build_subID2values(_p_id_tuples, this->triples_num, this->entity_num); //save all entity_signature into binary file @@ -2608,12 +2672,6 @@ Database::sub2id_pre2id_obj2id_RDFintoSignature(const string _rdf_file) //delete _entity_bitset[i]; //} //delete[] _entity_bitset; - - cout << "finish sub2id pre2id obj2id" << endl; - cout << "tripleNum is " << this->triples_num << endl; - cout << "entityNum is " << this->entity_num << endl; - cout << "preNum is " << this->pre_num << endl; - cout << "literalNum is " << this->literal_num << endl; //{ //stringstream _ss; @@ -2666,10 +2724,10 @@ Database::insertTriple(const TripleWithObjType& _triple, vector* _vert (this->kvstore)->setEntityByID(_sub_id, _triple.subject); //update the string buffer - if (_sub_id < this->entity_buffer_size) - { - this->entity_buffer->set(_sub_id, _triple.subject); - } + //if (_sub_id < this->entity_buffer_size) + //{ + //this->entity_buffer->set(_sub_id, _triple.subject); + //} if (_vertices != NULL) _vertices->push_back(_sub_id); @@ -2710,10 +2768,10 @@ Database::insertTriple(const TripleWithObjType& _triple, vector* _vert (this->kvstore)->setEntityByID(_obj_id, _triple.object); //update the string buffer - if (_obj_id < this->entity_buffer_size) - { - this->entity_buffer->set(_obj_id, _triple.object); - } + //if (_obj_id < this->entity_buffer_size) + //{ + //this->entity_buffer->set(_obj_id, _triple.object); + //} if (_vertices != NULL) _vertices->push_back(_obj_id); @@ -2733,11 +2791,11 @@ Database::insertTriple(const TripleWithObjType& _triple, vector* _vert (this->kvstore)->setLiteralByID(_obj_id, _triple.object); //update the string buffer - TYPE_ENTITY_LITERAL_ID tid = _obj_id - Util::LITERAL_FIRST_ID; - if (tid < this->literal_buffer_size) - { - this->literal_buffer->set(tid, _triple.object); - } + //TYPE_ENTITY_LITERAL_ID tid = _obj_id - Util::LITERAL_FIRST_ID; + //if (tid < this->literal_buffer_size) + //{ + //this->literal_buffer->set(tid, _triple.object); + //} if (_vertices != NULL) _vertices->push_back(_obj_id); @@ -2850,10 +2908,10 @@ Database::removeTriple(const TripleWithObjType& _triple, vector* _vert this->freeEntityID(_sub_id); this->sub_num--; //update the string buffer - if (_sub_id < this->entity_buffer_size) - { - this->entity_buffer->del(_sub_id); - } + //if (_sub_id < this->entity_buffer_size) + //{ + //this->entity_buffer->del(_sub_id); + //} if (_vertices != NULL) _vertices->push_back(_sub_id); } @@ -2872,10 +2930,10 @@ Database::removeTriple(const TripleWithObjType& _triple, vector* _vert this->kvstore->subIDByEntity(_triple.object); this->freeEntityID(_obj_id); //update the string buffer - if (_obj_id < this->entity_buffer_size) - { - this->entity_buffer->del(_obj_id); - } + //if (_obj_id < this->entity_buffer_size) + //{ + //this->entity_buffer->del(_obj_id); + //} if (_vertices != NULL) _vertices->push_back(_obj_id); } @@ -2889,11 +2947,11 @@ Database::removeTriple(const TripleWithObjType& _triple, vector* _vert this->kvstore->subIDByLiteral(_triple.object); this->freeLiteralID(_obj_id); //update the string buffer - TYPE_ENTITY_LITERAL_ID tid = _obj_id - Util::LITERAL_FIRST_ID; - if (tid < this->literal_buffer_size) - { - this->literal_buffer->del(tid); - } + //TYPE_ENTITY_LITERAL_ID tid = _obj_id - Util::LITERAL_FIRST_ID; + //if (tid < this->literal_buffer_size) + //{ + //this->literal_buffer->del(tid); + //} if (_vertices != NULL) _vertices->push_back(_obj_id); } @@ -3078,7 +3136,7 @@ Database::remove(std::string _rdf_file, bool _is_restore) //triple_num -= parse_triple_num; } - //TODO:better to free this just after id_tuples are ok + //BETTER: free this just after id_tuples are ok //(only when using group insertion/deletion) //or reduce the array size delete[] triple_array; @@ -3171,17 +3229,19 @@ Database::insert(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num, { is_new_sub = true; subid = this->allocEntityID(); +#ifdef DEBUG cout << "this is a new subject: " << sub << " " << subid << endl; +#endif this->sub_num++; this->kvstore->setIDByEntity(sub, subid); this->kvstore->setEntityByID(subid, sub); new_entity.insert(subid); //add info and update buffer vertices.push_back(subid); - if (subid < this->entity_buffer_size) - { - this->entity_buffer->set(subid, sub); - } + //if (subid < this->entity_buffer_size) + //{ + //this->entity_buffer->set(subid, sub); + //} } string pre = _triples[i].getPredicate(); @@ -3204,17 +3264,19 @@ Database::insert(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num, { is_new_obj = true; objid = this->allocEntityID(); +#ifdef DEBUG cout << "this is a new object: " << obj << " " << objid << endl; +#endif //this->obj_num++; this->kvstore->setIDByEntity(obj, objid); this->kvstore->setEntityByID(objid, obj); new_entity.insert(objid); //add info and update vertices.push_back(objid); - if (objid < this->entity_buffer_size) - { - this->entity_buffer->set(objid, obj); - } + //if (objid < this->entity_buffer_size) + //{ + //this->entity_buffer->set(objid, obj); + //} } } else //isObjLiteral @@ -3229,11 +3291,11 @@ Database::insert(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num, this->kvstore->setLiteralByID(objid, obj); //add info and update vertices.push_back(objid); - int tid = objid - Util::LITERAL_FIRST_ID; - if (tid < this->literal_buffer_size) - { - this->literal_buffer->set(tid, obj); - } + //int tid = objid - Util::LITERAL_FIRST_ID; + //if (tid < this->literal_buffer_size) + //{ + //this->literal_buffer->set(tid, obj); + //} } } @@ -3805,10 +3867,10 @@ Database::remove(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num, this->sub_num--; //add info and update buffer vertices.push_back(_sub_id); - if (_sub_id < this->entity_buffer_size) - { - this->entity_buffer->del(_sub_id); - } + //if (_sub_id < this->entity_buffer_size) + //{ + //this->entity_buffer->del(_sub_id); + //} } else { @@ -3895,15 +3957,15 @@ Database::remove(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num, this->freeEntityID(_obj_id); //add info and update buffer vertices.push_back(_obj_id); - if (_obj_id < this->entity_buffer_size) - { - this->entity_buffer->del(_obj_id); - } + //if (_obj_id < this->entity_buffer_size) + //{ + //this->entity_buffer->del(_obj_id); + //} } else { tmpset.reset(); - this->calculateEntityBitSet(_obj_id, tmpset); + //this->calculateEntityBitSet(_obj_id, tmpset); //this->vstree->replaceEntry(_obj_id, tmpset); } } @@ -3918,11 +3980,11 @@ Database::remove(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num, this->freeLiteralID(_obj_id); //add info and update buffer vertices.push_back(_obj_id); - int tid = _obj_id - Util::LITERAL_FIRST_ID; - if (tid < this->literal_buffer_size) - { - this->literal_buffer->del(tid); - } + //int tid = _obj_id - Util::LITERAL_FIRST_ID; + //if (tid < this->literal_buffer_size) + //{ + //this->literal_buffer->del(tid); + //} } } } @@ -4036,7 +4098,6 @@ Database::remove(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num, return valid_num; } -//TODO: check and improve the backup program bool Database::backup() { diff --git a/Database/Database.h b/Database/Database.h index 54d0092..cca19d1 100644 --- a/Database/Database.h +++ b/Database/Database.h @@ -87,6 +87,16 @@ public: //id tuples file string getIDTuplesFile(); + VSTree* getVSTree(); + KVstore* getKVstore(); + StringIndex* getStringIndex(); + QueryCache* getQueryCache(); + TYPE_TRIPLE_NUM* getpre2num(); + TYPE_ENTITY_LITERAL_ID& getlimitID_literal(); + TYPE_ENTITY_LITERAL_ID& getlimitID_entity(); + TYPE_PREDICATE_ID& getlimitID_predicate(); + mutex& get_query_parse_lock(); + private: string name; string store_path; @@ -107,6 +117,8 @@ private: pthread_rwlock_t update_lock; //just for debug a block of code mutex debug_lock; + // for getFinalResult + mutex getFinalResult_lock; VSTree* vstree; KVstore* kvstore; diff --git a/KVstore/ISArray/ISArray.cpp b/KVstore/ISArray/ISArray.cpp index c3d32c9..2f8ea7e 100644 --- a/KVstore/ISArray/ISArray.cpp +++ b/KVstore/ISArray/ISArray.cpp @@ -285,17 +285,20 @@ ISArray::UpdateTime(unsigned _key) bool ISArray::search(unsigned _key, char *&_str, unsigned &_len) { +// this->AccessLock.lock(); // printf("%s search %d: \n", filename.c_str(), _key); if (_key >= CurEntryNum ||!array[_key].isUsed()) { _str = NULL; _len = 0; +// this->AccessLock.unlock(); return false; } // try to read in main memory if (array[_key].inCache()) { UpdateTime(_key); +// this->AccessLock.unlock(); return array[_key].getBstr(_str, _len); } // printf(" need to read disk "); @@ -305,6 +308,7 @@ ISArray::search(unsigned _key, char *&_str, unsigned &_len) // printf("stored in block %d, ", store); if (!BM->ReadValue(store, _str, _len)) { +// this->AccessLock.unlock(); return false; } @@ -314,15 +318,17 @@ ISArray::search(unsigned _key, char *&_str, unsigned &_len) _str = debug; // printf("str = %s, len = %d\n", _str, _len); - +// this->AccessLock.unlock(); return true; } bool ISArray::insert(unsigned _key, char *_str, unsigned _len) { +// this->AccessLock.lock(); if (_key < CurEntryNum && array[_key].isUsed()) { +// this->AccessLock.unlock(); return false; } @@ -330,6 +336,7 @@ ISArray::insert(unsigned _key, char *_str, unsigned _len) { cout << _key << ' ' << MAX_KEY_NUM << endl; cout << "ISArray insert error: Key is bigger than MAX_KEY_NUM" << endl; +// this->AccessLock.unlock(); return false; } @@ -350,6 +357,7 @@ ISArray::insert(unsigned _key, char *_str, unsigned _len) if (newp == NULL) { cout << "ISArray insert error: main memory full" << endl; +// this->AccessLock.unlock(); return false; } else @@ -369,14 +377,17 @@ ISArray::insert(unsigned _key, char *_str, unsigned _len) AddInCache(_key, _str, _len); array[_key].setUsedFlag(true); array[_key].setDirtyFlag(true); +// this->AccessLock.unlock(); return true; } bool ISArray::remove(unsigned _key) { +// this->AccessLock.lock(); if (_key >= CurEntryNum || !array[_key].isUsed()) { +// this->AccessLock.unlock(); return false; } @@ -391,6 +402,8 @@ ISArray::remove(unsigned _key) if (array[_key].inCache()) { + RemoveFromLRUQueue(_key); + char *str = NULL; unsigned len = 0; array[_key].getBstr(str, len, false); @@ -400,6 +413,7 @@ ISArray::remove(unsigned _key) array[_key].release(); +// this->AccessLock.unlock(); return true; } @@ -407,14 +421,18 @@ ISArray::remove(unsigned _key) bool ISArray::modify(unsigned _key, char *_str, unsigned _len) { +// this->AccessLock.lock(); if (_key >= CurEntryNum ||!array[_key].isUsed()) { +// this->AccessLock.unlock(); return false; } array[_key].setDirtyFlag(true); if (array[_key].inCache()) { + RemoveFromLRUQueue(_key); + char* str = NULL; unsigned len = 0; array[_key].getBstr(str, len, false); @@ -431,8 +449,41 @@ ISArray::modify(unsigned _key, char *_str, unsigned _len) BM->FreeBlocks(store); AddInCache(_key, _str, _len); +// this->AccessLock.unlock(); return true; } +void +ISArray::RemoveFromLRUQueue(unsigned _key) +{ + if (!array[_key].inCache()) + return; + int prevID = array[_key].getPrev(); + int nextID = array[_key].getNext(); + if (prevID == -1) + cache_head->setNext(nextID); + else + array[prevID].setNext(nextID); + + //cout << "next ID: " << nextID << endl; + if (nextID != -1) + array[nextID].setPrev(prevID); // since array[_key] is not tail, nextp will not be NULL + else + cache_tail_id = prevID; + + array[_key].setCacheFlag(false); + array[_key].setPrev(-1); + array[_key].setNext(-1); + +/* UpdateTime(_key); + unsigned PrevID = array[_key].getPrev(); + cache_tail_id = PrevID; + if (PrevID == -1) + cache_head->setNext(-1); + else + array[PrevID].setNext(-1); +*/ + return; +} diff --git a/KVstore/ISArray/ISArray.h b/KVstore/ISArray/ISArray.h index 27f0a58..4ce7549 100644 --- a/KVstore/ISArray/ISArray.h +++ b/KVstore/ISArray/ISArray.h @@ -46,9 +46,12 @@ private: bool AddInCache(unsigned _key, char *_str, unsigned _len); bool SwapOut(); bool UpdateTime(unsigned _key); + void RemoveFromLRUQueue(unsigned _key); bool PreLoad(); + mutex AccessLock; + public: ISArray(); ISArray(string _dir_path, string _filename, string mode, unsigned long long buffer_size, unsigned _key_num = 0); diff --git a/KVstore/IVArray/IVArray.cpp b/KVstore/IVArray/IVArray.cpp index 7d60088..b2c919f 100644 --- a/KVstore/IVArray/IVArray.cpp +++ b/KVstore/IVArray/IVArray.cpp @@ -51,6 +51,7 @@ IVArray::IVArray(string _dir_path, string _filename, string mode, unsigned long //index_time_map.clear(); //time_index_map.clear(); MAX_CACHE_SIZE = buffer_size; +// MAX_CACHE_SIZE = 10 * (1 << 30); cache_head = new IVEntry; cache_tail_id = -1; @@ -130,34 +131,6 @@ IVArray::IVArray(string _dir_path, string _filename, string mode, unsigned long // cout << _filename << " Done." << endl; } -bool -IVArray::PreLoad() -{ - if (array == NULL) - return false; - - for(unsigned i = 0; i < CurEntryNum; i++) - { - if (!array[i].isUsed()) - continue; - - unsigned store = array[i].getStore(); - char *str = NULL; - unsigned len = 0; - - if (!BM->ReadValue(store, str, len)) - return false; - if (CurCacheSize + len > (MAX_CACHE_SIZE >> 1)) - break; - - AddInCache(i, str, len); - - delete [] str; - } - - return true; -} - bool IVArray::save() { @@ -237,33 +210,7 @@ IVArray::SwapOut() } array[targetID].release(); array[targetID].setCacheFlag(false); -/* if (time_index_map.empty()) - { - return false; - } - multimap ::iterator it = time_index_map.begin(); - - unsigned key = it->second; - char *str = NULL; - unsigned len = 0; - array[key].getBstr(str, len, false); - - if (array[key].isDirty() && array[key].inCache()) - { - unsigned store = BM->WriteValue(str, len); - array[key].setStore(store); - } - - CurCacheSize -= len; - - array[key].release(); - array[key].setCacheFlag(false); - - //array[key].setTime(0); - //index_time_map.erase(key); - time_index_map.erase(it); -*/ return true; } @@ -275,6 +222,8 @@ IVArray::AddInCache(unsigned _key, char *_str, unsigned _len) { return false; } + +// this->CacheLock.lock(); // ensure there is enough room in main memory while (CurCacheSize + _len > MAX_CACHE_SIZE) { @@ -285,9 +234,6 @@ IVArray::AddInCache(unsigned _key, char *_str, unsigned _len) } } - CurCacheSize += _len; - array[_key].setBstr(_str, _len); - array[_key].setCacheFlag(true); if (cache_tail_id == -1) cache_head->setNext(_key); @@ -298,21 +244,26 @@ IVArray::AddInCache(unsigned _key, char *_str, unsigned _len) array[_key].setNext(-1); cache_tail_id = _key; - //modify maps -// long time = Util::get_cur_time(); -// array[_key].setTime(time); -// time_index_map.insert(make_pair(time, _key)); + CurCacheSize += _len; + array[_key].setBstr(_str, _len); + array[_key].setCacheFlag(true); +// this->CacheLock.unlock(); return true; } //Update last used time of array[_key] bool -IVArray::UpdateTime(unsigned _key) +IVArray::UpdateTime(unsigned _key, bool HasLock) { + if (array[_key].isPined()) // the cache pined should not be swaped out + return true; + if (_key == (unsigned) cache_tail_id)// already most recent return true; +// if (!HasLock) +// this->CacheLock.lock(); // cout << "UpdateTime: " << _key << endl; int prevID = array[_key].getPrev(); int nextID = array[_key].getNext(); @@ -328,115 +279,55 @@ IVArray::UpdateTime(unsigned _key) array[_key].setNext(-1); array[cache_tail_id].setNext(_key); cache_tail_id = _key; - /* - //map ::iterator it; - unsigned oldtime; - if ((oldtime = array[_key].getTime()) == 0) - { - return false; - } - //unsigned oldtime = it->second; - long time = Util::get_cur_time(); - array[_key].setTime(time); - //it->second = time; - -// pair < multimap::iterator, multimap::iterator > ret; -// ret = time_index_map.equal_range(oldtime); - - multimap ::iterator p = time_index_map.lower_bound(oldtime); - //for(p = ret.first; p != ret.second; p++) - for(p; p->first == oldtime; p++) - { - if (p->second == _key) - break; - } - - //if (p == ret.second) - if (p->first != oldtime) - { - return false; - } - time_index_map.erase(p); - time_index_map.insert(make_pair(time, _key)); - */ +// if (!HasLock) +// this->CacheLock.unlock(); return true; } bool IVArray::search(unsigned _key, char *&_str, unsigned &_len) { + this->CacheLock.lock(); //printf("%s search %d: ", filename.c_str(), _key); if (_key >= CurEntryNum ||!array[_key].isUsed()) { -// cout << "IVArray " << filename << " Search Error: Key " << _key << " is not available." << endl; _str = NULL; _len = 0; + this->CacheLock.unlock(); return false; } // try to read in main memory if (array[_key].inCache()) { UpdateTime(_key); - return array[_key].getBstr(_str, _len); + bool ret = array[_key].getBstr(_str, _len); + this->CacheLock.unlock(); + return ret; } -// printf(" need to read disk "); // read in disk unsigned store = array[_key].getStore(); -// cout << "store: " << store << endl; -// printf("stored in block %d, ", store); if (!BM->ReadValue(store, _str, _len)) { + this->CacheLock.unlock(); return false; } - // try to add the entry into cache -/* if (VList::isLongList(_len) && _len + CurCacheSize <= IVArray::MAX_CACHE_SIZE) + if(!VList::isLongList(_len)) { - array[_key].setBstr(_str, _len); - array[_key].setCacheFlag(true); - - CurCacheSize += _len; - }*/ - if (!VList::isLongList(_len)) - { - AddInCache(_key, _str, _len); - char *debug = new char [_len]; - memcpy(debug, _str, _len); - _str = debug; - } -// printf(" value is %s, length: %d\n", _str, _len); +// if (array[_key].Lock.try_lock()) +// { +// if (array[_key].inCache()) +// return true; + AddInCache(_key, _str, _len); + char *debug = new char [_len]; + memcpy(debug, _str, _len); + _str = debug; + // array[_key].Lock.unlock(); - // also read values near it so that we can take advantage of spatial locality -/* unsigned start = (_key / SEG_LEN) * SEG_LEN; - unsigned end = start + SEG_LEN; - for(unsigned i = start; i < end; i++) - { - unsigned store = array[i].getStore(); - if (i == _key) - { - if (!BM->ReadValue(store, _str, _len)) - return false; - //if (!VList::isLongList(_len)) - AddInCache(_key, _str, _len); - //else - if (VList::isLongList(_len)) - array[_key].setLongListFlag(true); - } - else if (!array[i].isLongList() && array[i].isUsed() && !array[i].inCache()) - { - char *temp_str; - unsigned temp_len; - if (!BM->ReadValue(store, temp_str, temp_len)) - continue; - if (!VList::isLongList(temp_len)) - AddInCache(i, temp_str, temp_len); - else - array[_key].setLongListFlag(true); - - delete [] temp_str; - } - }*/ +// } + } + this->CacheLock.unlock(); return true; } @@ -523,6 +414,8 @@ IVArray::remove(unsigned _key) if (array[_key].inCache()) { RemoveFromLRUQueue(_key); + if(array[_key].isPined()) + array[_key].setCachePinFlag(false); char *str = NULL; unsigned len = 0; @@ -531,6 +424,9 @@ IVArray::remove(unsigned _key) array[_key].setCacheFlag(false); } + if (array[_key].isPined()) + array[_key].setCachePinFlag(false); + array[_key].release(); return true; @@ -550,26 +446,13 @@ IVArray::modify(unsigned _key, char *_str, unsigned _len) if (array[_key].inCache()) { RemoveFromLRUQueue(_key); + if(array[_key].isPined()) + array[_key].setCachePinFlag(false); char* str = NULL; unsigned len = 0; array[_key].getBstr(str, len, false); -/* if (!VList::isLongList(_len)) - { - CurCacheSize -= len; - CurCacheSize += _len; - array[_key].setBstr(_str, _len); - } - else - { - CurCacheSize -= len; - array[_key].release(); - array[_key].setCacheFlag(false); - unsigned store = BM->WriteValue(_str, _len); - array[_key].setStore(store); - } -*/ array[_key].release(); CurCacheSize -= len; AddInCache(_key, _str, _len); @@ -578,15 +461,6 @@ IVArray::modify(unsigned _key, char *_str, unsigned _len) { unsigned store = array[_key].getStore(); BM->FreeBlocks(store); - /*if (VList::isLongList(_len)) - { - unsigned store = BM->WriteValue(_str, _len); - array[_key].setStore(store); - } - else - { - AddInCache(_key, _str, _len); - }*/ AddInCache(_key, _str, _len); } @@ -594,12 +468,47 @@ IVArray::modify(unsigned _key, char *_str, unsigned _len) } +//Pin an entry in cache and never swap out +void +IVArray::PinCache(unsigned _key) +{ + //printf("%s search %d: ", filename.c_str(), _key); + if (_key >= CurEntryNum ||!array[_key].isUsed()) + { + return; + } + // try to read in main memory + if (array[_key].inCache()) + { + RemoveFromLRUQueue(_key); + + array[_key].setCachePinFlag(true); + + return; + } + // read in disk + unsigned store = array[_key].getStore(); + char *_str = NULL; + unsigned _len = 0; + if (!BM->ReadValue(store, _str, _len)) + { + return; + } + + array[_key].setBstr(_str, _len); + array[_key].setCacheFlag(true); + array[_key].setCachePinFlag(true); + + return; +} + void IVArray::RemoveFromLRUQueue(unsigned _key) { - if (!array[_key].inCache()) + if (!array[_key].inCache() || array[_key].isPined()) return; + //this->CacheLock.lock(); int prevID = array[_key].getPrev(); int nextID = array[_key].getNext(); @@ -625,7 +534,6 @@ IVArray::RemoveFromLRUQueue(unsigned _key) else array[PrevID].setNext(-1);*/ + //this->CacheLock.unlock(); return; } - - diff --git a/KVstore/IVArray/IVArray.h b/KVstore/IVArray/IVArray.h index de4a36f..8932183 100644 --- a/KVstore/IVArray/IVArray.h +++ b/KVstore/IVArray/IVArray.h @@ -7,6 +7,7 @@ * a Key-Value Index for ID-Value pair in form of Array * =======================================================================*/ +#include "../../Util/Util.h" #include "IVEntry.h" #include "IVBlockManager.h" @@ -45,10 +46,11 @@ private: bool AddInCache(unsigned _key, char *_str, unsigned _len); bool SwapOut(); - bool UpdateTime(unsigned _key); + bool UpdateTime(unsigned _key, bool HasLock = false); - bool PreLoad(); - void RemoveFromLRUQueue(unsigned int); + void RemoveFromLRUQueue(unsigned _key); + + mutex CacheLock; public: IVArray(); @@ -60,4 +62,5 @@ public: bool remove(unsigned _key); bool insert(unsigned _key, char *_str, unsigned _len); bool save(); + void PinCache(unsigned _key); }; diff --git a/KVstore/IVArray/IVEntry.cpp b/KVstore/IVArray/IVEntry.cpp index 9c392af..7c429ee 100644 --- a/KVstore/IVArray/IVEntry.cpp +++ b/KVstore/IVArray/IVEntry.cpp @@ -18,6 +18,7 @@ IVEntry::IVEntry() usedFlag = false; dirtyFlag = true; cacheFlag = false; + CachePinFlag = false; prevID = nextID = -1; } @@ -113,6 +114,18 @@ IVEntry::inCache() const return cacheFlag; } +void +IVEntry::setCachePinFlag(bool _flag) +{ + CachePinFlag = _flag; +} + +bool +IVEntry::isPined() +{ + return CachePinFlag; +} + void IVEntry::release() { diff --git a/KVstore/IVArray/IVEntry.h b/KVstore/IVArray/IVEntry.h index f90df8f..0e48cd6 100644 --- a/KVstore/IVArray/IVEntry.h +++ b/KVstore/IVArray/IVEntry.h @@ -16,6 +16,7 @@ class IVEntry bool usedFlag; // mark if the entry is used bool dirtyFlag; bool cacheFlag; + bool CachePinFlag; unsigned store; //index of block where value is stored // pointer to id for LRU list int prevID; @@ -42,6 +43,9 @@ public: void setCacheFlag(bool _flag); bool inCache() const; + void setCachePinFlag(bool _flag); + bool isPined(); + void release(); void Copy(const IVEntry& _entry); diff --git a/KVstore/KVstore.cpp b/KVstore/KVstore.cpp index 9acd682..3c10ef2 100644 --- a/KVstore/KVstore.cpp +++ b/KVstore/KVstore.cpp @@ -2738,22 +2738,29 @@ KVstore::isEntity(TYPE_ENTITY_LITERAL_ID id) return id < Util::LITERAL_FIRST_ID; } -/*void +void KVstore::AddIntoPreCache(TYPE_PREDICATE_ID _pre_id) { - this->preID2values->AddIntoCache(_pre_id); + this->preID2values->PinCache(_pre_id); } void KVstore::AddIntoSubCache(TYPE_ENTITY_LITERAL_ID _entity_id) { - this->subID2values->AddIntoCache(_entity_id); + this->subID2values->PinCache(_entity_id); } void KVstore::AddIntoObjCache(TYPE_ENTITY_LITERAL_ID _entity_literal_id) { - this->objID2values->AddIntoCache(_entity_literal_id); -}*/ + if (Util::is_literal_ele(_entity_literal_id)) + { + TYPE_ENTITY_LITERAL_ID _literal_id = _entity_literal_id + - Util::LITERAL_FIRST_ID; + + objID2values_literal->PinCache(_literal_id); + } + this->objID2values->PinCache(_entity_literal_id); +} unsigned KVstore::getSubListSize(TYPE_ENTITY_LITERAL_ID _sub_id) diff --git a/KVstore/SITree/SITree.cpp b/KVstore/SITree/SITree.cpp index 9405df4..4b7a532 100644 --- a/KVstore/SITree/SITree.cpp +++ b/KVstore/SITree/SITree.cpp @@ -86,6 +86,7 @@ SITree::getHeight() const void SITree::setHeight(unsigned _h) { + this->height = _h; } @@ -107,10 +108,12 @@ SITree::prepare(SINode* _np) bool SITree::search(const char* _str, unsigned _len, unsigned* _val) { + this->AccessLock.lock(); if (_str == NULL || _len == 0) { printf("error in SITree-search: empty string\n"); //*_val = -1; + this->AccessLock.unlock(); return false; } //this->CopyToTransfer(_str, _len, 1); @@ -123,26 +126,31 @@ SITree::search(const char* _str, unsigned _len, unsigned* _val) if (ret == NULL || store == -1) //tree is empty or not found { //bstr.clear(); + this->AccessLock.unlock(); return false; } const Bstr* tmp = ret->getKey(store); if (Util::compare(_str, _len, tmp->getStr(), tmp->getLen()) != 0) //tree is empty or not found { + this->AccessLock.unlock(); return false; } *_val = ret->getValue(store); this->TSM->request(request); //bstr.clear(); + this->AccessLock.unlock(); return true; } bool SITree::insert(char* _str, unsigned _len, unsigned _val) { + this->AccessLock.lock(); if (_str == NULL || _len == 0) { printf("error in SITree-insert: empty string\n"); + this->AccessLock.unlock(); return false; } //this->CopyToTransfer(_str, _len, 1); @@ -253,16 +261,18 @@ SITree::insert(char* _str, unsigned _len, unsigned _val) this->TSM->request(request); //bstr.clear(); //NOTICE: must be cleared! - + this->AccessLock.unlock(); return !ifexist; //QUERY(which case:return false) } bool SITree::modify(const char* _str, unsigned _len, unsigned _val) { + this->AccessLock.lock(); if (_str == NULL || _len == 0) { printf("error in SITree-modify: empty string\n"); + this->AccessLock.unlock(); return false; } //this->CopyToTransfer(_str, _len, 1); @@ -275,11 +285,13 @@ SITree::modify(const char* _str, unsigned _len, unsigned _val) if (ret == NULL || store == -1) //tree is empty or not found { //bstr.clear(); + this->AccessLock.unlock(); return false; } const Bstr* tmp = ret->getKey(store); if (Util::compare(_str, _len, tmp->getStr(), tmp->getLen()) != 0) //tree is empty or not found { + this->AccessLock.unlock(); return false; } @@ -287,7 +299,7 @@ SITree::modify(const char* _str, unsigned _len, unsigned _val) ret->setDirty(); this->TSM->request(request); //bstr.clear(); - + this->AccessLock.unlock(); return true; } @@ -341,9 +353,11 @@ SITree::find(unsigned _len, const char* _str, int* store) const bool SITree::remove(const char* _str, unsigned _len) { + this->AccessLock.lock(); if (_str == NULL || _len == 0) { printf("error in SITree-remove: empty string\n"); + this->AccessLock.unlock(); return false; } //this->CopyToTransfer(_str, _len, 1); @@ -352,8 +366,10 @@ SITree::remove(const char* _str, unsigned _len) //const Bstr* _key = &transfer[1]; SINode* ret; if (this->root == NULL) //tree is empty + { + this->AccessLock.unlock(); return false; - + } SINode* p = this->root; SINode* q; int i, j; @@ -424,20 +440,27 @@ SITree::remove(const char* _str, unsigned _len) this->TSM->request(request); //bstr.clear(); - + this->AccessLock.unlock(); return flag; //i == j, not found } bool SITree::save() //save the whole tree to disk { + this->AccessLock.lock(); #ifdef DEBUG_KVSTORE printf("now to save tree!\n"); #endif if (TSM->writeTree(this->root)) + { + this->AccessLock.unlock(); return true; + } else + { + this->AccessLock.unlock(); return false; + } } void diff --git a/KVstore/SITree/SITree.h b/KVstore/SITree/SITree.h index 3f5d924..b511f38 100644 --- a/KVstore/SITree/SITree.h +++ b/KVstore/SITree/SITree.h @@ -57,18 +57,20 @@ private: long long request; void prepare(SINode* _np); -public: - SITree(); //always need to initial transfer - SITree(std::string _storepath, std::string _filename, std::string _mode, unsigned long long _buffer_size); + std::mutex AccessLock; + unsigned getHeight() const; void setHeight(unsigned _h); SINode* getRoot() const; + SINode* find(const Bstr* _key, int* store, bool ifmodify); + SINode* find(const char* _key, unsigned _len, int* store, bool ifmodify); +public: + SITree(); //always need to initial transfer + SITree(std::string _storepath, std::string _filename, std::string _mode, unsigned long long _buffer_size); //insert, search, remove, set bool search(const char* _str, unsigned _len, unsigned* _val); bool insert(char* _str, unsigned _len, unsigned _val); bool modify(const char* _str, unsigned _len, unsigned _val); - SINode* find(const Bstr* _key, int* store, bool ifmodify); - SINode* find(const char* _key, unsigned _len, int* store, bool ifmodify); bool remove(const char* _str, unsigned _len); bool save(); ~SITree(); diff --git a/Main/gbuild.cpp b/Main/gbuild.cpp index bca5c16..076a3be 100644 --- a/Main/gbuild.cpp +++ b/Main/gbuild.cpp @@ -38,7 +38,7 @@ main(int argc, char * argv[]) string _db_path = string(argv[1]); int len = _db_path.length(); - if(_db_path.substr(len-3, 3) == ".db") + if(_db_path.length() > 3 && _db_path.substr(len-3, 3) == ".db") { cout<<"your database can not end with .db"< HttpServer; typedef SimpleWeb::Client HttpClient; +#define THREAD_NUM 30 #define MAX_DATABASE_NUM 100 #define MAX_USER_NUM 1000 #define ROOT_USERNAME "root" @@ -81,6 +82,8 @@ bool user_handler(const HttpServer& server, const shared_ptr& response, const shared_ptr& request); bool check_handler(const HttpServer& server, const shared_ptr& response, const shared_ptr& request); + +void query_thread(string db_name, string format, string db_query, const shared_ptr& response, const shared_ptr& request); //============================================================================= //TODO: use locak to protect logs when running in multithreading environment @@ -271,6 +274,227 @@ string UrlDecode(string& SRC) return (ret); } +class Task +{ +public: + string db_name; + string format; + string db_query; + const shared_ptr response; + const shared_ptr request; + Task(string name, string ft, string query, const shared_ptr& res, const shared_ptr& req); + ~Task(); + void run(); +}; +Task::Task(string name, string ft, string query, const shared_ptr& res, const shared_ptr& req):response(res),request(req) +{ + db_name = name; + format = ft; + db_query = query; +} +Task::~Task() +{ + +} +void Task::run() +{ + query_thread(db_name, format, db_query, response, request); +} + +class Thread +{ +public: + thread TD; + int ID; + static int threadnum; + Task* task; + Thread(); + ~Thread(); + int GetThreadID(); + void assign(Task* t); + void run(); + void start(); + friend bool operator==(Thread t1, Thread t2); + friend bool operator!=(Thread t1, Thread t2); +}; + +list busythreads; +vector freethreads; +mutex busy_mutex; +mutex free_mutex; +mutex task_mutex; + +void BackToFree(Thread *t) +{ + busy_mutex.lock(); + busythreads.erase(find(busythreads.begin(), busythreads.end(), t)); + busy_mutex.unlock(); + + free_mutex.lock(); + freethreads.push_back(t); + free_mutex.unlock(); +} + +int Thread::threadnum = 0; + +Thread::Thread() +{ + threadnum++; + ID = threadnum; +} +Thread::~Thread() +{ + +} +int Thread::GetThreadID() +{ + return ID; +} +void Thread::assign(Task* t) +{ + task = t; +} +void Thread::run() +{ + cout << "Thread:" << ID << " run\n"; + task->run(); + delete task; + BackToFree(this); +} +void Thread::start() +{ + TD = thread(&Thread::run, this); + TD.detach(); +} +bool operator==(Thread t1, Thread t2) +{ + return t1.ID == t2.ID; +} +bool operator!=(Thread t1, Thread t2) +{ + return !(t1.ID == t2.ID); +} + +class ThreadPool +{ +public: + int ThreadNum; + bool isclose; + thread ThreadsManage; + queue tasklines; + ThreadPool(); + ThreadPool(int t); + ~ThreadPool(); + void create(); + void SetThreadNum(int t); + int GetThreadNum(); + void AddTask(Task* t); + void start(); + void close(); +}; +ThreadPool::ThreadPool() +{ + isclose = false; + ThreadNum = 10; + busythreads.clear(); + freethreads.clear(); + for (int i = 0; i < ThreadNum; i++) + { + Thread *p = new Thread(); + freethreads.push_back(p); + } +} +ThreadPool::ThreadPool(int t) +{ + isclose = false; + ThreadNum = t; + busythreads.clear(); + freethreads.clear(); + for (int i = 0; i < t; i++) + { + Thread *p = new Thread(); + freethreads.push_back(p); + } +} +ThreadPool::~ThreadPool() +{ + for (vector::iterator i = freethreads.begin(); i != freethreads.end(); i++) + delete *i; +} +void ThreadPool::create() +{ + ThreadsManage = thread(&ThreadPool::start, this); + ThreadsManage.detach(); +} +void ThreadPool::SetThreadNum(int t) +{ + ThreadNum = t; +} +int ThreadPool::GetThreadNum() +{ + return ThreadNum; +} +void ThreadPool::AddTask(Task* t) +{ + task_mutex.lock(); + tasklines.push(t); + task_mutex.unlock(); +} +void ThreadPool::start() +{ + while (true) + { + if (isclose == true) + { + busy_mutex.lock(); + if (busythreads.size() != 0) + { + busy_mutex.unlock(); + continue; + } + busy_mutex.unlock(); + break; + } + + free_mutex.lock(); + if (freethreads.size() == 0) + { + free_mutex.unlock(); + continue; + } + free_mutex.unlock(); + + task_mutex.lock(); + if (tasklines.size() == 0) + { + task_mutex.unlock(); + continue; + } + + Task *job = tasklines.front(); + tasklines.pop(); + task_mutex.unlock(); + + free_mutex.lock(); + Thread *t = freethreads.back(); + freethreads.pop_back(); + t->assign(job); + free_mutex.unlock(); + + busy_mutex.lock(); + busythreads.push_back(t); + busy_mutex.unlock(); + + t->start(); + } +} +void ThreadPool::close() +{ + isclose = true; +} + +ThreadPool pool(THREAD_NUM); + int main(int argc, char *argv[]) { Util util; @@ -448,7 +672,7 @@ int initialize(int argc, char *argv[]) //scheduler = start_thread(func_scheduler); #endif - + pool.create(); //pthread_rwlock_init(&database_load_lock, NULL); #ifndef SPARQL_ENDPOINT @@ -604,12 +828,10 @@ int initialize(int argc, char *argv[]) // } // }; - //TODO: use db_name if multiple databases server.resource["^/%3[F|f]operation%3[D|d]checkpoint%26db_name%3[D|d](.*)$"]["GET"]=[&server](shared_ptr response, shared_ptr request) { checkpoint_handler(server, response, request); }; - //TODO: use db_name if multiple databases server.resource["^/?operation=checkpoint&db_name=(.*)$"]["GET"]=[&server](shared_ptr response, shared_ptr request) { checkpoint_handler(server, response, request); @@ -1560,9 +1782,8 @@ bool query_handler0(const HttpServer& server, const shared_ptr& response, const shared_ptr& request) @@ -1616,8 +1837,10 @@ bool query_handler1(const HttpServer& server, const shared_ptrsecond; //doQuery(format, db_query, server, response, request); query_num++; - thread t(&query_thread, db_name, format, db_query, response, request); - t.detach(); + Task* task = new Task(db_name, format, db_query, response, request); + pool.AddTask(task); + //thread t(&query_thread, db_name, format, db_query, response, request); + //t.detach(); } //void query_handler(const shared_ptr& response, const shared_ptr& request) diff --git a/Query/GeneralEvaluation.cpp b/Query/GeneralEvaluation.cpp index 2ab7de9..b4eefe5 100644 --- a/Query/GeneralEvaluation.cpp +++ b/Query/GeneralEvaluation.cpp @@ -1078,7 +1078,7 @@ void GeneralEvaluation::getFinalResult(ResultSet &ret_result) if (!ret_result.checkUseStream()) { - // cout << "flag2" << endl; + cout << "flag2" << endl; for (unsigned i = 0; i < ret_result.ansNum; i++) { ret_result.answer[i] = new string [ret_result.select_var_num]; @@ -1095,7 +1095,7 @@ void GeneralEvaluation::getFinalResult(ResultSet &ret_result) //ret_result.answer[i][j] = trie->Uncompress(ret_result.answer[i][j], ret_result.answer[i][j].length()); } } - else //TODO add Uncompress + else { // ret_result.answer[i][j] = trie->Uncompress(result0.result[i].str[k - id_cols], //result0.result[i].str[k - id_cols].length()); @@ -1117,7 +1117,7 @@ void GeneralEvaluation::getFinalResult(ResultSet &ret_result) } else { - // cout << "flag3" << endl; + cout << "flag3" << endl; for (unsigned i = 0; i < ret_result.ansNum; i++) for (int j = 0; j < ret_result.select_var_num; j++) { @@ -1145,7 +1145,7 @@ void GeneralEvaluation::getFinalResult(ResultSet &ret_result) ret_result.resetStream(); } } - // TODO: is this part need uncompression? + else if (this->query_tree.getQueryForm() == QueryTree::Ask_Query) { // cout << "flag4" << endl; diff --git a/Query/QueryCache.cpp b/Query/QueryCache.cpp index 4334c9c..49b3a9e 100755 --- a/Query/QueryCache.cpp +++ b/Query/QueryCache.cpp @@ -95,6 +95,8 @@ bool QueryCache::getMinimalRepresentation(const Patterns &triple_pattern, Patter bool QueryCache::tryCaching(const Patterns &triple_pattern, const TempResult &temp_result, int eva_time) { + lock_guard (this->query_cache_lock); //when quit this scope the lock will be released + Patterns minimal_repre; map minimal_mapping; @@ -176,8 +178,12 @@ bool QueryCache::tryCaching(const Patterns &triple_pattern, const TempResult &te return true; } +//NOTICE: in this function we also modify some contents, so we must use mutex instead of rwlock bool QueryCache::checkCached(const Patterns &triple_pattern, const Varset &varset, TempResult &temp_result) { + //this->query_cache_lock.lock(); + lock_guard (this->query_cache_lock); //when quit this scope the lock will be released + Patterns minimal_repre; map minimal_mapping; diff --git a/Query/QueryCache.h b/Query/QueryCache.h index c9449d6..01a5172 100755 --- a/Query/QueryCache.h +++ b/Query/QueryCache.h @@ -23,6 +23,8 @@ class QueryCache const long long ITEM_MEMORY_LIMIT = 1000000LL; const long long TOTAL_MEMORY_LIMIT = 100000000LL; + mutex query_cache_lock; + int time_now; long long total_memory_used; diff --git a/README.md b/README.md index 71be0e6..9ef75d2 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Gstore System(also called gStore) is a graph database engine for managing large graph-structured data, which is open-source and targets at Linux operation systems. The whole project is written in C++, with the help of some libraries such as readline, antlr, and so on. Only source tarballs are provided currently, which means you have to compile the source code if you want to use our system. -**The formal help document is in [English(EN)](docs/help/gStore_help.pdf) and [中文(CN)](docs/help/gStore_help_CN.pdf).** +**The formal help document is in [English(EN)](docs/help/gStore_help.pdf) and [中文(ZH)](docs/help/gStore_help_CN.pdf).** **The formal experiment result is in [Experiment](docs/test/formal_experiment.pdf).** @@ -14,14 +14,22 @@ Gstore System(also called gStore) is a graph database engine for managing large ### Compile from Source This system is really user-friendly and you can pick it up in several minutes. Remember to check your platform where you want to run this system by viewing [System Requirements](docs/DEMAND.md). After all are verified, please get this project's source code. There are several ways to do this: +- (suggested)type `git clone https://github.com/Caesar11/gStore.git` in your terminal or use git GUI to acquire it + - download the zip from this repository and extract it - fork this repository in your github account -- type `git clone https://github.com/Caesar11/gStore.git` in your terminal or use git GUI to acquire it - Then you need to compile the project, just type `make` in the gStore root directory, then all executables will be generated. +The first strategy is suggested to get the source code because you can easily acquire the updates of the code by typing `git pull` in the home directory of gStore repository. +In addition, you can directly check the version of the code by typing `git log` to see the commit logs. +If you want to use code from other branches instead of master branch, like 'dev' branch, then: + +- clone the master branch and type `git checkout dev` in your terminal + +- clone the dev branch directly by typing `git clone -b dev` + ### Deploy via Docker You can easily deploy gStore via Docker. We provide both of Dockerfile and docker image. Please see our [Docker Deployment Doc(EN)](docs/DOCKER_DEPLOY_EN.md) or [Docker部署文档(中文)](docs/DOCKER_DEPLOY_CN.md) for details. @@ -42,7 +50,7 @@ If you want to understand the details of the gStore system, or you want to try s - [API Explanation](docs/API.md): guide you to develop applications based on our API -- [Project Structure](docs/STRUCT.md): show the whole structure and sequence of this project +- [Project Structure](docs/STRUCT.md): show the whole structure and process of this project - [Related Essays](docs/ESSAY.md): contain essays and publications related with gStore @@ -56,7 +64,7 @@ If you want to understand the details of the gStore system, or you want to try s We have written a series of short essays addressing recurring challenges in using gStore to realize applications, which are placed in [Recipe Book](docs/TIPS.md). -You are welcome to report any advice or errors in the github Issues part of this repository, if not requiring in-time reply. However, if you want to urgent on us to deal with your reports, please email to to submit your suggestions and report bugs to us by emailing to . A full list of our whole team is in [Mailing List](docs/MAIL.md). +You are welcome to report any advice or errors in the github Issues part of this repository, if not requiring in-time reply. However, if you want to urgent on us to deal with your reports, please email to to submit your suggestions and report bugs to us by emailing to . A full list of our whole team is in [Mailing List](docs/MAIL.md). There are some restrictions when you use the current gStore project, you can see them on [Limit Description](docs/LIMIT.md). diff --git a/Server/server_http.hpp b/Server/server_http.hpp index b9d24e1..b7ba3e7 100644 --- a/Server/server_http.hpp +++ b/Server/server_http.hpp @@ -138,7 +138,7 @@ namespace SimpleWeb { /// Timeout on request handling. Defaults to 5 seconds. size_t timeout_request=5; /// Timeout on content handling. Defaults to 300 seconds. - size_t timeout_content=300; + size_t timeout_content=3600; /// IPv4 address in dotted decimal form or IPv6 address in hexadecimal notation. /// If empty, the address will be any address. std::string address; diff --git a/Server/web/index.html b/Server/web/index.html index 3fbbd60..7c31b99 100644 --- a/Server/web/index.html +++ b/Server/web/index.html @@ -25,7 +25,7 @@
  • - +
    diff --git a/StringIndex/StringIndex.cpp b/StringIndex/StringIndex.cpp index f36a944..341d7ac 100644 --- a/StringIndex/StringIndex.cpp +++ b/StringIndex/StringIndex.cpp @@ -91,20 +91,33 @@ bool StringIndexFile::randomAccess(unsigned id, string *str, bool real) long offset = (*this->index_table)[id].offset; unsigned length = (*this->index_table)[id].length; + //if(id == 9) + //{ + //cout<<"check: "<value_file, offset, SEEK_SET); - //fread(this->buffer, sizeof(char), length, this->value_file); - pread(fileno(value_file), this->buffer, sizeof(char)*length, offset); + //DEBUG: here a bug exists if we use pread instead of fread, the details are in BUG_StringIndex_pread of docs/BUGS.md + fseek(this->value_file, offset, SEEK_SET); + fread(this->buffer, sizeof(char), length, this->value_file); + //pread(fileno(value_file), this->buffer, sizeof(char)*length, offset); this->buffer[length] = '\0'; *str = string(this->buffer); + //if(id == 9) + //{ + //cout<<"check: "<<*str<Uncompress(*str, str->length());//Uncompresss } + //if(id == 9) + //{ + //cout<<"check: "<<*str<type == Predicate) cout << "Predicate StringIndex "; - long current_offset = 0; + //long current_offset = 0; if ((max_end - min_begin) / 800000L < (long)this->request.size()) { cout << "sequence access." << endl; @@ -147,8 +160,8 @@ void StringIndexFile::trySequenceAccess(bool real) char *block = new char[MAX_BLOCK_SIZE]; long current_block_begin = min_begin; - //fseek(this->value_file, current_block_begin, SEEK_SET); - current_offset = current_block_begin; + fseek(this->value_file, current_block_begin, SEEK_SET); + //current_offset = current_block_begin; while (current_block_begin < max_end) { @@ -157,14 +170,14 @@ void StringIndexFile::trySequenceAccess(bool real) if (current_block_end <= this->request[pos].offset) { current_block_begin = this->request[pos].offset; - //fseek(this->value_file, current_block_begin, SEEK_SET); - current_offset = current_block_begin; + fseek(this->value_file, current_block_begin, SEEK_SET); + //current_offset = current_block_begin; current_block_end = min(current_block_begin + MAX_BLOCK_SIZE, max_end); } - //fread(block, sizeof(char), current_block_end - current_block_begin, this->value_file); - pread(fileno(this->value_file), block, sizeof(char)*(current_block_end-current_block_begin), current_offset); - current_offset += sizeof(char)*(current_block_end-current_block_begin); + fread(block, sizeof(char), current_block_end - current_block_begin, this->value_file); + //pread(fileno(this->value_file), block, sizeof(char)*(current_block_end-current_block_begin), current_offset); + //current_offset += sizeof(char)*(current_block_end-current_block_begin); while (pos < (int)this->request.size()) { @@ -279,6 +292,13 @@ void StringIndexFile::change(unsigned id, KVstore &kv_store) fseek(this->value_file, (*this->index_table)[id].offset, SEEK_SET); fwrite(str.c_str(), sizeof(char), (*this->index_table)[id].length, this->value_file); + //if(id == 9) + //{ + //cout<<"check in change():9 "<Uncompress(*str, str->length()); - return true; - } + //if(searchBuffer(id, str)) + //{ + //cout << "FLAG2" << endl; + //*str = trie->Uncompress(*str, str->length()); + //return true; + //} if (id < Util::LITERAL_FIRST_ID) { @@ -374,11 +394,16 @@ void StringIndex::addRequest(unsigned id, std::string *str, bool is_entity_or_li { if (is_entity_or_literal) { - if(searchBuffer(id, str)) - { -// *str = trie->Uncompress(*str) - return; - } + //if(id == 9) + //{ + //cout<<"to search 9 in string buffer"<Uncompress(*str) + //cout<<"found in string buffer"<entity.addRequest(id, str); else diff --git a/StringIndex/StringIndex.h b/StringIndex/StringIndex.h index 06f9c49..914486c 100644 --- a/StringIndex/StringIndex.h +++ b/StringIndex/StringIndex.h @@ -147,6 +147,11 @@ class StringIndex predicate.clear(); } + void flush() + { + //nothing to do here + } + void emptyBuffer() { entity.emptyBuffer(); diff --git a/Trie/Trie.cpp b/Trie/Trie.cpp index 35ea62c..7a7633e 100644 --- a/Trie/Trie.cpp +++ b/Trie/Trie.cpp @@ -204,6 +204,9 @@ Trie::WriteDownNode(TrieNode *_node, ofstream& _fout, string _str) TripleWithObjType Trie::Compress(const TripleWithObjType &_in_triple, int MODE) { + //use this to forbid the trie + return _in_triple; + int lowbound = (MODE == BUILDMODE) ? Trie::LOWBOUND : 0; string _in_sub = _in_triple.getSubject(); @@ -236,6 +239,9 @@ Trie::Compress(const TripleWithObjType &_in_triple, int MODE) string Trie::Compress(string _str) { + //use this to forbid the trie + return _str; + int lowbound = 0; if (Util::isLiteral(_str)) @@ -258,8 +264,9 @@ Trie::LoadDictionary() ifstream _fin(this->store_path.c_str()); if (!_fin) { - cout << "Trie::LoadDictionary: Fail to open " << store_path - << endl; + //cout << "Trie::LoadDictionary: Fail to open " << store_path + //<< " but it doesn't matter if you are building a database." + //<< endl; return false; } @@ -293,6 +300,9 @@ Trie::LoadDictionary() string Trie::Uncompress(const char *_str, const int len) { + //use this to forbid the trie + return string(_str); + if (len == 0) return ""; @@ -360,6 +370,9 @@ Trie::Uncompress(const char *_str, const int len) string Trie::Uncompress(const string &_str, const int len) { + //use this to forbid the trie + return _str; + return Uncompress(_str.data(), len); } @@ -385,7 +398,7 @@ Trie::LoadTrie(string dictionary_path) ifstream fin(store_path.c_str()); if (!fin) { - cout << "Trie::LoadTrie: Fail to open " << store_path << endl; + //cout << "Trie::LoadTrie: Fail to open " << store_path << endl; return false; } diff --git a/Trie/Trie.h b/Trie/Trie.h index 40d4d96..cd56584 100644 --- a/Trie/Trie.h +++ b/Trie/Trie.h @@ -17,8 +17,11 @@ using namespace std; class Trie { - static const int SAMPLE_UPBOUND = 1000000; - static const int LOWBOUND = 100;//this param should change with data sets + //static const int SAMPLE_UPBOUND = 1000000; + //static const int LOWBOUND = 100;//this param should change with data sets + //NOTICE: set SAMPLE_UPBOUND to a tiny number to unuse the Trie structure + static const int SAMPLE_UPBOUND = 1; + static const int LOWBOUND = 1;//this param should change with data sets //SAMPLE_UPBOUND = 1000000, LOWBOUND = 100 for LUBM500M //SAMPLE_UPBOUND = 100000, LOWBOUND = 20 for DBpediafull //SAMPLE_UPVOUND = 300000, LOWBOUND = 30 for WatDiv500M diff --git a/Util/Util.cpp b/Util/Util.cpp index 6e65a41..82c8425 100644 --- a/Util/Util.cpp +++ b/Util/Util.cpp @@ -1859,6 +1859,16 @@ Util::pso_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b) return false; } +bool +Util::equal(const ID_TUPLE& a, const ID_TUPLE& b) +{ + if(a.subid == b.subid && a.preid == b.preid && a.objid == b.objid) + { + return true; + } + return false; +} + void Util::empty_file(const char* _fname) { diff --git a/Util/Util.h b/Util/Util.h index 6a8f85d..38d3ade 100644 --- a/Util/Util.h +++ b/Util/Util.h @@ -423,6 +423,7 @@ public: static bool spo_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b); static bool ops_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b); static bool pso_cmp_idtuple(const ID_TUPLE& a, const ID_TUPLE& b); + static bool equal(const ID_TUPLE& a, const ID_TUPLE& b); static std::string tmp_path; // this are for debugging diff --git a/api/http/cpp/example/Makefile b/api/http/cpp/example/Makefile index 77a8c88..e4b1f34 100644 --- a/api/http/cpp/example/Makefile +++ b/api/http/cpp/example/Makefile @@ -4,10 +4,10 @@ all: Benchmark CppAPIExample #all: example Benchmark CppAPIExample Benchmark: Benchmark.o - $(CC) -o Benchmark.exe Benchmark.o -lcurl -L../lib -lclient -lpthread + $(CC) -o Benchmark.exe Benchmark.o -L../lib -lclient -lcurl -lpthread CppAPIExample: CppAPIExample.o - $(CC) -o CppAPIExample.exe CppAPIExample.o -I../ -lcurl -L../lib -lgstoreconnector -lclient + $(CC) -o CppAPIExample.exe CppAPIExample.o -I../ -L../lib -lgstoreconnector -lclient -lcurl Benchmark.o: Benchmark.cpp $(CC) -c -I../ Benchmark.cpp -o Benchmark.o diff --git a/api/socket/cpp/example/Makefile b/api/socket/cpp/example/Makefile index 0e91d6d..fe6ab96 100644 --- a/api/socket/cpp/example/Makefile +++ b/api/socket/cpp/example/Makefile @@ -1,6 +1,7 @@ #CC=g++ CC=g++ -std=c++11 #CC=ccache g++ +all: example QueryGenerator test example: CppAPIExample.o $(CC) -o example CppAPIExample.o -L../lib -lgstoreconnector @@ -9,5 +10,14 @@ example: CppAPIExample.o CppAPIExample.o: CppAPIExample.cpp $(CC) -c -I../src/ CppAPIExample.cpp -o CppAPIExample.o +QueryGenerator: QueryGenerator.cpp + $(CC) -o QueryGenerator QueryGenerator.cpp + +test: test.o + $(CC) -o test test.o -L../lib -lgstoreconnector -pthread + +test.o: test.cpp + $(CC) -c -I../src/ test.cpp -o test.o -pthread + clean: - rm -rf *.o example + rm -rf *.o example QueryGenerator test diff --git a/data/all.sql b/data/all.sql index 5c07960..4689bab 100644 --- a/data/all.sql +++ b/data/all.sql @@ -1,4 +1,4 @@ -select ?s ?o where +select ?s ?p ?o where { ?s ?p ?o . } diff --git a/docs/API.md b/docs/API.md index de5dcb0..82a9f00 100644 --- a/docs/API.md +++ b/docs/API.md @@ -1,366 +1,4 @@ -**This Chapter guides you to use our API for accessing gStore. We provide socket API and HTTP api, corresponding to gserver and ghttp respectively.** - -# Socket API - -## Easy Examples - -We provide JAVA, C++, PHP and Python API for gStore now. Please refer to example codes in `api/socket/cpp/example`, `api/socket/java/example`, `api/socket/php` and `api/socket/python`. To use C++ and Java examples to have a try, please ensure that executables have already been generated. Otherwise, just type `make APIexample` in the root directory of gStore to compile the codes, as well as API. - -Next, **start up a gStore server by using `./gserver` command.** It is ok if you know a running usable gStore server and try to connect to it, but notice that **the server ip and port of server and client must be matched.**(you don't need to change any thing if using examples, just by default) Then, you need to compile the example codes in the directory gStore/api/socket. We provide a utility to do this, and you just need to type `make APIexample` in the root directory of gStore. Or you can compile the codes by yourself, in this case please go to gStore/api/socket/cpp/example/ and gStore/socket/api/java/example/, respectively. - -Finally, go to the example directory and run the corresponding executables. For C++, just use `./example` command to run it. And for Java, use `make run` command or `java -cp ../lib/GstoreJavaAPI.jar:. JavaAPIExample` to run it. Both the two executables will connect to a specified gStore server and do some load or query operations. Be sure that you see the query results in the terminal where you run the examples, otherwise please go to [Frequently Asked Questions](FAQ.md) for help or report it to us.(the report approach is described in [README](../README.md)) - -You are advised to read the example code carefully, as well as the corresponding Makefile. This will help you to understand the API, specially if you want to write your own programs based on the API interface. - -- - - - -## API structure - -The socket API of gStore is placed in api/socket directory in the root directory of gStore, whose contents are listed below: - -- gStore/api/socket/ - - - cpp/ (the C++ API) - - - src/ (source code of C++ API, used to build the lib/libgstoreconnector.a) - - - GstoreConnector.cpp (interfaces to interact with gStore server) - - - GstoreConnector.h - - - Makefile (compile and build lib) - - - lib/ (where the static lib lies in) - - - .gitignore - - - libgstoreconnector.a (only exist after compiled, you need to link this lib when you use the C++ API) - - - example/ (small example program to show the basic idea of using the C++ API) - - - CppAPIExample.cpp - - - Makefile - - - java/ (the Java API) - - - src/ (source code of Java API, used to build the lib/GstoreJavaAPI.jar) - - - jgsc/ (the package which you need to import when you use the Java API) - - - GstoreConnector.java (interfaces to interact with gStore server) - - - Makefile (compile and build lib) - - - lib/ - - - .gitignore - - - GstoreJavaAPI.jar (only exist after compiled, you need to include this JAR in your class path) - - - example/ (small example program to show the basic idea of using the Java API) - - - JavaAPIExample.cpp - - - Makefile - - - php/ (the PHP API) - - - PHPAPIExxample.php (small example program to show the basic idea of using the PHP API) - - - GstoreConnector.php (source code of PHP API) - - - python/ (the python API) - - - src/ (source code of Python API) - - - GstoreConnector.py - - - lib/ - - - example/ (small example program to show the basic idea of using the Python API) - - - PythonAPIExample.py - - -- - - - -## C++ API - -#### Interface - -To use the C++ API, please place the phrase `#include "GstoreConnector.h"` in your cpp code. Functions in GstoreConnector.h should be called like below: - -``` -// initialize the Gstore server's IP address and port. -GstoreConnector gc("127.0.0.1", 3305); - -// build a new database by a RDF file. -// note that the relative path is related to gserver. -gc.build("LUBM10.db", "example/LUBM_10.n3"); - -// then you can execute SPARQL query on this database. -std::string sparql = "select ?x where \ - { \ - ?x . \ - ?y . \ - ?x ?y. \ - ?z ?y. \ - ?z . \ - ?z ?w. \ - ?w . \ - }"; -std::string answer = gc.query(sparql); - -// unload this database. -gc.unload("LUBM10.db"); - -// also, you can load some exist database directly and then query. -gc.load("LUBM10.db"); - -// query a SPARQL in current database -answer = gc.query(sparql); -``` - -The original declaration of these functions are as below: - -``` -GstoreConnector(); -GstoreConnector(string _ip, unsigned short _port); -GstoreConnector(unsigned short _port); -bool load(string _db_name); -bool unload(string _db_name); -bool build(string _db_name, string _rdf_file_path); -string query(string _sparql); -``` - -Notice: - -1. When using GstoreConnector(), the default value for ip and port is 127.0.0.1 and 3305, respectively. - -2. When using build(), the rdf_file_path(the second parameter) should be related to the position where gserver lies in. - -3. Please remember to unload the database you have loaded, otherwise things may go wrong.(the errors may not be reported!) - -#### Compile - -You are advised to see gStore/api/socket/cpp/example/Makefile for instructions on how to compile your code with the C++ API. Generally, what you must do is compile your own code to object with header in the C++ API, and link the object with static lib in the C++ API. - -Let us assume that your source code is placed in test.cpp, whose position is ${TEST}, while the gStore project position is ${GSTORE}/gStore.(if using devGstore as name instead of gStore, then the path is ${GSTORE}/devGstore) Please go to the ${TEST} directory first: - -> Use `g++ -c -I${GSTORE}/gStore/api/socket/cpp/src/ test.cpp -o test.o` to compile your test.cpp into test.o, relative API header is placed in api/socket/cpp/src/. - -> Use `g++ -o test test.o -L${GSTORE}/gStore/api/socket/cpp/lib/ -lgstoreconnector` to link your test.o with the libgstoreconnector.a(a static lib) in api/socket/cpplib/. - -Then you can type `./test` to execute your own program, which uses our C++ API. It is also advised for you to place relative compile commands in a Makefile, as well as other commands if you like. - -- - - - -## Java API - -#### Interface - -To use the Java API, please place the phrase `import jgsc.GstoreConnector;` in your java code. Functions in GstoreConnector.java should be called like below: - -``` -// initialize the Gstore server's IP address and port. -GstoreConnector gc = new GstoreConnector("127.0.0.1", 3305); - -// build a new database by a RDF file. -// note that the relative path is related to gserver. -gc.build("LUBM10.db", "example/LUBM_10.n3"); - -// then you can execute SPARQL query on this database. -String sparql = "select ?x where " - + "{" - + "?x . " - + "?y . " - + "?x ?y. " - + "?z ?y. " - + "?z . " - + "?z ?w. " - + "?w . " - + "}"; -String answer = gc.query(sparql); - -// unload this database. -gc.unload("LUBM10.db"); - -// also, you can load some exist database directly and then query. -gc.load("LUBM10.db"); - -// query a SPARQL in current database -answer = gc.query(sparql); -``` - -The original declaration of these functions are as below: - -``` -GstoreConnector(); -GstoreConnector(string _ip, unsigned short _port); -GstoreConnector(unsigned short _port); -bool load(string _db_name); -bool unload(string _db_name); -bool build(string _db_name, string _rdf_file_path); -string query(string _sparql); -``` - -Notice: - -1. When using GstoreConnector(), the default value for ip and port is 127.0.0.1 and 3305, respectively. - -2. When using build(), the rdf_file_path(the second parameter) should be related to the position where gserver lies in. - -3. Please remember to unload the database you have loaded, otherwise things may go wrong.(the errors may not be reported!) - -#### Compile - -You are advised to see gStore/api/socket/java/example/Makefile for instructions on how to compile your code with the Java API. Generally, what you must do is compile your own code to object with jar file in the Java API. - -Let us assume that your source code is placed in test.java, whose position is ${TEST}, while the gStore project position is ${GSTORE}/gStore.(if using devGstore as name instead of gStore, then the path is ${GSTORE}/devGstore) Please go to the ${TEST} directory first: - -> Use `javac -cp ${GSTORE}/gStore/api/socket/java/lib/GstoreJavaAPI.jar test.java` to compile your test.java into test.class with the GstoreJavaAPI.jar(a jar package used in Java) in api/java/lib/. - -Then you can type `java -cp ${GSTORE}/gStore/api/socket/java/lib/GstoreJavaAPI.jar:. test` to execute your own program(notice that the ":." in command cannot be neglected), which uses our Java API. It is also advised for you to place relative compile commands in a Makefile, as well as other commands if you like. - - -- - - - -## PHP API - -#### Interface - -To use the PHP API, please place the phrase `include('GstoreConnector,php');` in your php code. Functions in -GstoreConnector.php should be called like below: - -``` -// initialize the Gstore server's IP address and port. -$gc = new Connector("127.0.0.1", 3305); -// build a new database by a RDF file. -// note that the relative path is related to gserver. -$gc->build("LUBM10", "example/LUBM_10.n3"); -// then you can execute SPARQL query on this database. -$sparql = "select ?x where " + "{" + - "?x . " + - "?y . " + - "?x ?y. " + - "?z ?y. " + - "?z . " + - "?z ?w. " + - "?w . " + - "}"; -$answer = gc->query($sparql); -//unload this database. -$gc->unload("LUBM10"); -//also, you can load some exist database directly and then query. -$gc->load("LUBM10");// query a SPARQL in current database -$answer = gc->query(sparql); -``` - -The original declaration of these functions are as below: - -``` -class Connector { - public function __construct($host, $port); - public function send($data); - public function recv(); - public function build($db_name, $rdf_file_path); - public function load($db_name); - public function unload($db_name); - public function query($sparql); - public function __destruct(); -} - -``` - -Notice: - -1. When using Connector(), the default value for ip and port is 127.0.0.1 and 3305, respectively. - -2. When using build(), the rdf_file_path(the second parameter) should be related to the position where gserver lies in. - -3. Please remember to unload the database you have loaded, otherwise things may go wrong.(the errors may not be reported!) - -#### Run - -You can see gStore/api/socket/php/PHPAPIExample for instructions on how to use PHP API. PHP script doesn't need compiling. You can run PHP file directly or use it in your web project. - -- - - - -## Python API - -#### Interface - -To use the Python API, please place the phrase `from GstoreConnector import GstoreConnector` in your python code. Functions in GstoreConnector.py should be called like below: - -``` -// initialize the Gstore server's IP address and port. -gc = GstoreConnector('127.0.0.1', 3305) -// build a new database by a RDF file. -// note that the relative path is related to gserver. -gc.build('LUBM10', 'data/LUBM_10.n3') -// then you can execute SPARQL query on this database. -$sparql = "select ?x where " + "{" + - "?x . " + - "?y . " + - "?x ?y. " + - "?z ?y. " + - "?z . " + - - "?z ?w. " + - "?w . " + - "}"; -answer = gc.query(sparql) -//unload this database. -gc.unload('LUBM10') -//also, you can load some exist database directly and then query. -gc.load('LUBM10')// query a SPARQL in current database -answer = gc.query(sparql) -``` - -The original declaration of these functions are as below: - -``` -class GstoreConnector { - def _connect(self) - def _disconnect(self) - def _send(self, msg): - def _recv(self) - def _pack(self, msg): - def _communicate(f): - def __init__(self, ip='127.0.0.1', port=3305): - @_communicate - def test(self) - @_communicate - def load(self, db_name) - @_communicate - def unload(self, db_name) - @_communicate - def build(self, db_name, rdf_file_path) - @_communicate - def drop(self, db_name) - @_communicate - def stop(self) - @_communicate - def query(self, sparql) - @_communicate - def show(self, _type=False) -} - -``` - -Notice: - -1. When using GstoreConnector(), the default value for ip and port is 127.0.0.1 and 3305, respectively. - -2. When using build(), the rdf_file_path(the second parameter) should be related to the position where gserver lies in. - -3. Please remember to unload the database you have loaded, otherwise things may go wrong.(the errors may not be reported!) - -#### Run - -You are advised to see gStore/api/socket/python/example/PythonAPIExample for examples on how to use python API. Python file doesn't need compiling, and you can run it directly. - +**This Chapter guides you to use our API for accessing gStore. We provide HTTP api(suggested) and socket api, corresponding to ghttp and gserver respectively.** # HTTP API @@ -370,7 +8,7 @@ Compired with socket API, HTTP API is more stable and more standard, and can mai We provide JAVA and C++ API for ghttp now. Please refer to example codes in `api/http/cpp` and `api/http/java`. To use these examples, please make sure that executables have already been generated. -Next, **start up ghttp service by using \texttt{./ghttp} command.** It is ok if you know a running usable ghttp server and try to connect to it. (you don't need to change anything if using examples, just by default). Then, for Java and C++ code, you need to compile the example codes in the directory gStore/api/http/. +Next, **start up ghttp service by using \texttt{./ghttp} command.** It is ok if you know a running usable ghttp server and try to connect to it. (you do not need to change anything if using examples, just by default). Then, for Java and C++ code, you need to compile the example codes in the directory gStore/api/http/. Finally, go to the example directory and run the corresponding executables. All these four executables will connect to a specified ghttp server and do some load or query operations. Be sure that you see the query results in the terminal where you run the examples, otherwise please go to [Frequently Asked Questions](FAQ.md) for help or report it to us.(the report approach is described in [README](../README.md)) @@ -646,3 +284,370 @@ function query($username, $password, $db_name, $sparql) function fquery($username, $password, $db_name, $sparql, $filename) ``` + +--- +--- + +# Socket API + +**This APIis not maintained now.** + +## Easy Examples + +We provide JAVA, C++, PHP and Python API for gStore now. Please refer to example codes in `api/socket/cpp/example`, `api/socket/java/example`, `api/socket/php` and `api/socket/python`. To use C++ and Java examples to have a try, please ensure that executables have already been generated. Otherwise, just type `make APIexample` in the root directory of gStore to compile the codes, as well as API. + +Next, **start up a gStore server by using `./gserver` command.** It is ok if you know a running usable gStore server and try to connect to it, but notice that **the server ip and port of server and client must be matched.**(you do not need to change any thing if using examples, just by default) Then, you need to compile the example codes in the directory gStore/api/socket. We provide a utility to do this, and you just need to type `make APIexample` in the root directory of gStore. Or you can compile the codes by yourself, in this case please go to gStore/api/socket/cpp/example/ and gStore/socket/api/java/example/, respectively. + +Finally, go to the example directory and run the corresponding executables. For C++, just use `./example` command to run it. And for Java, use `make run` command or `java -cp ../lib/GstoreJavaAPI.jar:. JavaAPIExample` to run it. Both the two executables will connect to a specified gStore server and do some load or query operations. Be sure that you see the query results in the terminal where you run the examples, otherwise please go to [Frequently Asked Questions](FAQ.md) for help or report it to us.(the report approach is described in [README](../README.md)) + +You are advised to read the example code carefully, as well as the corresponding Makefile. This will help you to understand the API, specially if you want to write your own programs based on the API interface. + +- - - + +## API structure + +The socket API of gStore is placed in api/socket directory in the root directory of gStore, whose contents are listed below: + +- gStore/api/socket/ + + - cpp/ (the C++ API) + + - src/ (source code of C++ API, used to build the lib/libgstoreconnector.a) + + - GstoreConnector.cpp (interfaces to interact with gStore server) + + - GstoreConnector.h + + - Makefile (compile and build lib) + + - lib/ (where the static lib lies in) + + - .gitignore + + - libgstoreconnector.a (only exist after compiled, you need to link this lib when you use the C++ API) + + - example/ (small example program to show the basic idea of using the C++ API) + + - CppAPIExample.cpp + + - Makefile + + - java/ (the Java API) + + - src/ (source code of Java API, used to build the lib/GstoreJavaAPI.jar) + + - jgsc/ (the package which you need to import when you use the Java API) + + - GstoreConnector.java (interfaces to interact with gStore server) + + - Makefile (compile and build lib) + + - lib/ + + - .gitignore + + - GstoreJavaAPI.jar (only exist after compiled, you need to include this JAR in your class path) + + - example/ (small example program to show the basic idea of using the Java API) + + - JavaAPIExample.cpp + + - Makefile + + - php/ (the PHP API) + + - PHPAPIExxample.php (small example program to show the basic idea of using the PHP API) + + - GstoreConnector.php (source code of PHP API) + + - python/ (the python API) + + - src/ (source code of Python API) + + - GstoreConnector.py + + - lib/ + + - example/ (small example program to show the basic idea of using the Python API) + + - PythonAPIExample.py + + +- - - + +## C++ API + +#### Interface + +To use the C++ API, please place the phrase `#include "GstoreConnector.h"` in your cpp code. Functions in GstoreConnector.h should be called like below: + +``` +// initialize the Gstore server's IP address and port. +GstoreConnector gc("127.0.0.1", 3305); + +// build a new database by a RDF file. +// note that the relative path is related to gserver. +gc.build("LUBM10.db", "example/LUBM_10.n3"); + +// then you can execute SPARQL query on this database. +std::string sparql = "select ?x where \ + { \ + ?x . \ + ?y . \ + ?x ?y. \ + ?z ?y. \ + ?z . \ + ?z ?w. \ + ?w . \ + }"; +std::string answer = gc.query(sparql); + +// unload this database. +gc.unload("LUBM10.db"); + +// also, you can load some exist database directly and then query. +gc.load("LUBM10.db"); + +// query a SPARQL in current database +answer = gc.query(sparql); +``` + +The original declaration of these functions are as below: + +``` +GstoreConnector(); +GstoreConnector(string _ip, unsigned short _port); +GstoreConnector(unsigned short _port); +bool load(string _db_name); +bool unload(string _db_name); +bool build(string _db_name, string _rdf_file_path); +string query(string _sparql); +``` + +Notice: + +1. When using GstoreConnector(), the default value for ip and port is 127.0.0.1 and 3305, respectively. + +2. When using build(), the rdf_file_path(the second parameter) should be related to the position where gserver lies in. + +3. Please remember to unload the database you have loaded, otherwise things may go wrong.(the errors may not be reported!) + +#### Compile + +You are advised to see gStore/api/socket/cpp/example/Makefile for instructions on how to compile your code with the C++ API. Generally, what you must do is compile your own code to object with header in the C++ API, and link the object with static lib in the C++ API. + +Let us assume that your source code is placed in test.cpp, whose position is ${TEST}, while the gStore project position is ${GSTORE}/gStore.(if using devGstore as name instead of gStore, then the path is ${GSTORE}/devGstore) Please go to the ${TEST} directory first: + +> Use `g++ -c -I${GSTORE}/gStore/api/socket/cpp/src/ test.cpp -o test.o` to compile your test.cpp into test.o, relative API header is placed in api/socket/cpp/src/. + +> Use `g++ -o test test.o -L${GSTORE}/gStore/api/socket/cpp/lib/ -lgstoreconnector` to link your test.o with the libgstoreconnector.a(a static lib) in api/socket/cpplib/. + +Then you can type `./test` to execute your own program, which uses our C++ API. It is also advised for you to place relative compile commands in a Makefile, as well as other commands if you like. + +- - - + +## Java API + +#### Interface + +To use the Java API, please place the phrase `import jgsc.GstoreConnector;` in your java code. Functions in GstoreConnector.java should be called like below: + +``` +// initialize IP address and port of the Gstore server. +GstoreConnector gc = new GstoreConnector("127.0.0.1", 3305); + +// build a new database by a RDF file. +// note that the relative path is related to gserver. +gc.build("LUBM10.db", "example/LUBM_10.n3"); + +// then you can execute SPARQL query on this database. +String sparql = "select ?x where " + + "{" + + "?x . " + + "?y . " + + "?x ?y. " + + "?z ?y. " + + "?z . " + + "?z ?w. " + + "?w . " + + "}"; +String answer = gc.query(sparql); + +// unload this database. +gc.unload("LUBM10.db"); + +// also, you can load some exist database directly and then query. +gc.load("LUBM10.db"); + +// query a SPARQL in current database +answer = gc.query(sparql); +``` + +The original declaration of these functions are as below: + +``` +GstoreConnector(); +GstoreConnector(string _ip, unsigned short _port); +GstoreConnector(unsigned short _port); +bool load(string _db_name); +bool unload(string _db_name); +bool build(string _db_name, string _rdf_file_path); +string query(string _sparql); +``` + +Notice: + +1. When using GstoreConnector(), the default value for ip and port is 127.0.0.1 and 3305, respectively. + +2. When using build(), the rdf_file_path(the second parameter) should be related to the position where gserver lies in. + +3. Please remember to unload the database you have loaded, otherwise things may go wrong.(the errors may not be reported!) + +#### Compile + +You are advised to see gStore/api/socket/java/example/Makefile for instructions on how to compile your code with the Java API. Generally, what you must do is compile your own code to object with jar file in the Java API. + +Let us assume that your source code is placed in test.java, whose position is ${TEST}, while the gStore project position is ${GSTORE}/gStore.(if using devGstore as name instead of gStore, then the path is ${GSTORE}/devGstore) Please go to the ${TEST} directory first: + +> Use `javac -cp ${GSTORE}/gStore/api/socket/java/lib/GstoreJavaAPI.jar test.java` to compile your test.java into test.class with the GstoreJavaAPI.jar(a jar package used in Java) in api/java/lib/. + +Then you can type `java -cp ${GSTORE}/gStore/api/socket/java/lib/GstoreJavaAPI.jar:. test` to execute your own program(notice that the ":." in command cannot be neglected), which uses our Java API. It is also advised for you to place relative compile commands in a Makefile, as well as other commands if you like. + + +- - - + +## PHP API + +#### Interface + +To use the PHP API, please place the phrase `include('GstoreConnector,php');` in your php code. Functions in +GstoreConnector.php should be called like below: + +``` +// initialize IP address and port of the Gstore server. +$gc = new Connector("127.0.0.1", 3305); +// build a new database by a RDF file. +// note that the relative path is related to gserver. +$gc->build("LUBM10", "example/LUBM_10.n3"); +// then you can execute SPARQL query on this database. +$sparql = "select ?x where " + "{" + + "?x . " + + "?y . " + + "?x ?y. " + + "?z ?y. " + + "?z . " + + "?z ?w. " + + "?w . " + + "}"; +$answer = gc->query($sparql); +//unload this database. +$gc->unload("LUBM10"); +//also, you can load some exist database directly and then query. +$gc->load("LUBM10");// query a SPARQL in current database +$answer = gc->query(sparql); +``` + +The original declaration of these functions are as below: + +``` +class Connector { + public function __construct($host, $port); + public function send($data); + public function recv(); + public function build($db_name, $rdf_file_path); + public function load($db_name); + public function unload($db_name); + public function query($sparql); + public function __destruct(); +} + +``` + +Notice: + +1. When using Connector(), the default value for ip and port is 127.0.0.1 and 3305, respectively. + +2. When using build(), the rdf_file_path(the second parameter) should be related to the position where gserver lies in. + +3. Please remember to unload the database you have loaded, otherwise things may go wrong.(the errors may not be reported!) + +#### Run + +You can see gStore/api/socket/php/PHPAPIExample for instructions on how to use PHP API. PHP script does not need compiling. You can run PHP file directly or use it in your web project. + +- - - + +## Python API + +#### Interface + +To use the Python API, please place the phrase `from GstoreConnector import GstoreConnector` in your python code. Functions in GstoreConnector.py should be called like below: + +``` +// initialize IP address and port of the Gstore server. +gc = GstoreConnector('127.0.0.1', 3305) +// build a new database by a RDF file. +// note that the relative path is related to gserver. +gc.build('LUBM10', 'data/LUBM_10.n3') +// then you can execute SPARQL query on this database. +$sparql = "select ?x where " + "{" + + "?x . " + + "?y . " + + "?x ?y. " + + "?z ?y. " + + "?z . " + + + "?z ?w. " + + "?w . " + + "}"; +answer = gc.query(sparql) +//unload this database. +gc.unload('LUBM10') +//also, you can load some exist database directly and then query. +gc.load('LUBM10')// query a SPARQL in current database +answer = gc.query(sparql) +``` + +The original declaration of these functions are as below: + +``` +class GstoreConnector { + def _connect(self) + def _disconnect(self) + def _send(self, msg): + def _recv(self) + def _pack(self, msg): + def _communicate(f): + def __init__(self, ip='127.0.0.1', port=3305): + @_communicate + def test(self) + @_communicate + def load(self, db_name) + @_communicate + def unload(self, db_name) + @_communicate + def build(self, db_name, rdf_file_path) + @_communicate + def drop(self, db_name) + @_communicate + def stop(self) + @_communicate + def query(self, sparql) + @_communicate + def show(self, _type=False) +} + +``` + +Notice: + +1. When using GstoreConnector(), the default value for ip and port is 127.0.0.1 and 3305, respectively. + +2. When using build(), the rdf_file_path(the second parameter) should be related to the position where gserver lies in. + +3. Please remember to unload the database you have loaded, otherwise things may go wrong.(the errors may not be reported!) + +#### Run + +You are advised to see gStore/api/socket/python/example/PythonAPIExample for examples on how to use python API. Python file does not need compiling, and you can run it directly. + diff --git a/docs/BUGS.md b/docs/BUGS.md new file mode 100644 index 0000000..9a95e54 --- /dev/null +++ b/docs/BUGS.md @@ -0,0 +1,23 @@ +**This file maintains details of the bugs not solved currently.** + +--- + +#### BUG_StringIndex_pread + +StringIndex::randomAcces() + +StringIndex::trySequenceAccess() + +when we insert a triple via ghttp, and query this triple immediately, we will find that answer is wrong. +when we run this query for several times, each time we will get a different answer. +Sometimes, we will get messy code. +With the same reason, if we use bin/gquery db to enter the gquery console, insert and query within this console, we will get similar errors. +Amazingly, if we quit the console and restart, run this query again, we will get the correct answer! + +The problem appears after we replace fread in StringIndex with pread, to support conncurrent queries. +The inherent reason have not been found now. +As a result, we change it back to fread, and use a lock for the StringIndex to block concurrent reads. +This is not supposed to cause a great loss in performance, because all operations to a single disk will be executed sequentially by the disk controller. + +--- + diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 5ff2ba0..11da6b2 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,3 +1,26 @@ +## Sep 20, 2018 + +Zongyue, Qin fixes the remaining bugs in Trie, ISArray and IVArray. +In addition, he improves the performance of parallism by providing fine-grained locks to key-value indices instead of locking the whole KVstore. + +In order to support applications in Wuhan University, Li, Zeng fixes many existing bugs in parallism, 'ghttp', indices, caches(string buffers are not used now) and APIs. +What is more, setup scripts are added for several wellknown Linux distributions(CentOS, Archlinux, Ubuntu) to ease the burden of installing softwares and setting system variables. + +As for documents, new figures of architecture and query processing are added by Li, Zeng and Yuyan, Chen. +This will help other developers on Github to view our code and modify it as they wish. + +--- + +## Jun 6, 2018 + +Xunbin, Su designs a thread pool for 'ghttp' based on the SimpleWeb framework of Boost Library. +In addition, Xunbin, Sun and imbajin(a Github user) add a docker image(only works for Docker CE 17.06.1) to gStore, which can ease the usage of this database system. + +Meanwhile, Zongyue, Qin fixes several bugs in the new indices and designs caches for them. +Chaofan, Yang adds and improves APIs(Application Program Interface) of several programming languages to 'ghttp' interface. + +--- + ## Apr 24, 2018 Multithreading is enabled by zengli in ghttp, to improve the performance of this HTTP web server. diff --git a/docs/DEMAND.md b/docs/DEMAND.md index bdebb5a..196a194 100644 --- a/docs/DEMAND.md +++ b/docs/DEMAND.md @@ -24,6 +24,8 @@ libcurl-devel | needed to be installed NOTICE: +**To help ease the burden of setting environments, several scripts are provided in [setup](../scripts/setup/) for different Linux distributions. Please select the setup script corresponding to your system and run it with root(or sudo) priviledge. (As for CentOS system, you need to install boost-devel by yourselves.)** + 1. The name of some packages may be different in different platforms, just install the corresponding one in your own operation system. 2. To install readline and readline-devel, just type `dnf install readline-devel` in Redhat/CentOS/Fedora, or `apt-get install libreadline-dev` in Debian/Ubuntu. Please use corresponding commands in other systems. If you use ArchLinux, just type `pacman -S readline` to install the readline and readline-devel.(so do other packages) @@ -60,7 +62,7 @@ NOTICE: # ./buildconf --force # ./configure --help | grep pthreads -You have to see --enable-pthreads listed. If do not, clear the buidls with this commands: +You have to see --enable-pthreads listed. If do not, clear the builds with this commands: # rm -rf aclocal.m4 # rm -rf autom4te.cache/ diff --git a/docs/INSTALL.md b/docs/INSTALL.md index 2d60e72..8b4782c 100644 --- a/docs/INSTALL.md +++ b/docs/INSTALL.md @@ -1,10 +1,15 @@ You are advised to read init.conf file, and modify it as you wish. (this file will configure the basic options of gStore system) -gStore is a green software, and you just need to compile it with one command. Please run +gStore is a green software, and you just need to compile it with two commands. Please run -`make` +``` +sudo ./scripts/setup/setup_$(ARCH).sh +make -in the gStore root directory to compile the gStore code, link the ANTLR lib, and build executable "gbuild", "gquery", "ghttp", "gserver", "gclient", "gconsole". What is more, the api of gStore is also built now. +``` +in the gStore home directory to compile the gStore code, link the ANTLR lib, and build executable "gbuild", "gquery", "ghttp", "gserver", "gclient", "gconsole". +(Please substitute the $(ARCH) with your system version, like setup_archlinux.sh, setup_centos.sh and setup_ubuntu.sh) +What is more, the api of gStore is also built now. If you want to use API examples of gStore, please run `make APIexample` to compile example codes for both C++ API and Java API. For details of API, please visit [API](API.md) chapter. diff --git a/docs/PLAN.md b/docs/PLAN.md index 4bbc62e..95439d8 100644 --- a/docs/PLAN.md +++ b/docs/PLAN.md @@ -1,16 +1,22 @@ ## Improve The Core -- add numeric value query function. need to answer numeric range query efficiently and space consume cannot be too large +- speed up the join process and postprocessing of SPARQL using GPU or FPGA -- add a control module to heuristically select an kind of index for a SPARQL query to filter(not always vstree) +- improve the indices and support concurrent reads + +- add numeric value query function. need to answer numeric range query efficiently and space consume cannot be too large - typedef all frequently used types, to avoid inconsistence and high modify cost - - - -## Better The Interface +## Better Interface -- write web interface for gStore, and a web page to operate on it, just like virtuoso +- the usability of ghttp(ERROR_CODE, API ...) + +- improve socket interface + +- docker settings - - - @@ -18,8 +24,6 @@ - warnings remain in using Parser/(antlr)!(modify sparql.g 1.1 and regenerate). change name to avoid redefine problem(maybe error), or go to use executable to parse -- build compress module(such as key-value module and stream module), but the latter just needs one-pass read/write, which may causes the compress method to be used both in disk and memory. all operations of string in memory can be changed to operations after compress: provide compress/archive interface, compare function. there are many compress algorithms to be chosen, then how to choose? what about utf-8 encoding problem? this method can lower the consume of memory and disk, but consumes more CPU. However, the time is decided by isomorphism. Simple compress is not good, but too complicated method will consume too much time, how to balance? (merge the continuous same characters, Huffman tree) - - mmap to speedup KVstore? - the strategy for Stream:is 85% valid? consider sampling, analyse the size of result set and decide strategy? how to support order by: sort in memory if not put in file; otherwise, partial sort in memory, then put into file, then proceed external sorting diff --git a/docs/STRUCT.md b/docs/STRUCT.md index 26dd3e8..721cac3 100644 --- a/docs/STRUCT.md +++ b/docs/STRUCT.md @@ -1,5 +1,13 @@ **This chapter introduce the whole structure of the gStore system project.** +#### Figures + +The whole architecture of gStore system is presented in [Architecture](png/系统架构图_en.png). +The thread model of 'ghttp' can be viewed in [EN](png/ghttp-thread.png) and [ZH](png/ghttp-线程.png), which shows the relationship among main process, sever thread, query thread and so on. +The flow of answering a SPARQL query is given in [SPARQL Processing](png/查询处理过程.png), and the subprocess, which only targets at the BGP(Basic Graph Pattern) processing, is drawed in [BGP Processing](png/BGP.png). + +--- + #### The core source codes are listed below: - Database/ (calling other core parts to deal with requests from interface part) @@ -10,7 +18,11 @@ - Join.cpp (join the node candidates to get results) - - Join.h (class, members,, and functions definitions) + - Join.h (class, members and functions definitions) + + - Strategy.cpp + + - Strategy.h - KVstore/ (a key-value store to swap between memory and disk) @@ -18,39 +30,133 @@ - KVstore.h - - heap/ (a heap of nodes whose content are in memory) - - - Heap.cpp + - ISArray/ - - Heap.h + - ISArray.cpp + + - ISArray.h + + - ISBlockManager.cpp + + - ISBlockManager.h + + - ISEntry.cpp + + - ISEntry.h + + - ISTree/ + + - ISTree.cpp + + - ISTree.h + + - heap/ (a heap of nodes whose content are in memory) + + - ISHeap.cpp + + - ISHeap.h - - node/ (all kinds of nodes in B+-tree) + - node/ (all kinds of nodes in B+-tree) - - Node.cpp (the base class of IntlNode and LeafNode) + - ISIntlNode.cpp - - Node.h + - ISIntlNode.h - - IntlNode.cpp (internal nodes in B+-tree) + - ISLeafNode.cpp - - IntlNode.h + - ISLeafNode.h - - LeafNode.cpp (leaf nodes in B+-tree) + - ISNode.cpp - - LeafNode.h + - ISNode.h - - storage/ (swap contents between memory and disk) + - storage/ - - file.h + - ISStorage.cpp + + - ISStorage.h + + - IVArray/ + + - IVArray.cpp + + - IVArray.h + + - IVBlockManager.cpp + + - IVBlockManager.h + + - IVCacheManager.cpp + + - IVCacheManger.h + + - IVEntry.cpp + + - IVEntry.h + + - IVTree/ + + - IVTree.cpp + + - IVTree.h + + - heap/ (a heap of nodes whose content are in memory) - - Storage.cpp + - IVHeap.cpp - - Storage.h + - IVHeap.h + + - node/ (all kinds of nodes in B+-tree) - - tree/ (implement all tree operations and interfaces) + - IVIntlNode.cpp - - Tree.cpp + - IVIntlNode.h - - Tree.h + - IVLeafNode.cpp + + - IVLeafNode.h + + - IVNode.cpp + + - IVNode.h + + - storage/ + + - IVStorage.cpp + + - IVStorage.h + + - SITree/ + + - SITree.cpp + + - SITree.h + + - heap/ (a heap of nodes whose content are in memory) + + - SIHeap.cpp + + - SIHeap.h + + - node/ (all kinds of nodes in B+-tree) + + - SIIntlNode.cpp + + - SIIntlNode.h + + - SILeafNode.cpp + + - SILeafNode.h + + - SINode.cpp + + - SINode.h + + - storage/ + + - SIStorage.cpp + + - SIStorage.h - Query/ (needed to answer SPARQL query) @@ -62,6 +168,10 @@ - IDList.h + - ResultFilter.cpp + + - ResultFilter.h + - ResultSet.cpp (keep the result set corresponding to a query) - ResultSet.h @@ -74,6 +184,10 @@ - Varset.h + - QueryCache.cpp + + - QueryCache.h + - QueryTree.cpp - QueryTree.h @@ -82,6 +196,10 @@ - GeneralEvaluation.h + - TempResult.cpp + + - TempResult.h + - RegexExpression.h - Signature/ (assign signatures for nodes and edges, but not for literals) @@ -170,6 +288,12 @@ - BloomFilter.h + - ClassForVlistCache.h + + - VList.cpp + + - VList.h + - - - #### The interface part is listed below: @@ -192,21 +316,19 @@ - Socket.h -- Main/ (a series of applications/main-program to operate on gStore) + - client_http.hpp - - gload.cpp (import a RDF dataset) + - server_http.hpp - - gquery.cpp (query a database) - - - gserver.cpp (start up the gStore server) - - - gclient.cpp (connect to a gStore server and interact) +- web/ + + - - - - #### More details -To acquire a deep understanding of gStore codes, please go to [Code Detail](pdf/代码目录及概览.pdf). See [use case](pdf/Gstore2.0_useCaseDoc.pdf) to understand the design of use cases, and see [OOA](pdf/OOA_class.pdf) and [OOD](pdf/OOD_class.pdf) for OOA design and OOD design, respectively. +To acquire a deep understanding of gStore codes, please go to [Code Detail](pdf/code_overview.pdf). See [use case](pdf/Gstore2.0_useCaseDoc.pdf) to understand the design of use cases, and see [OOA](pdf/OOA_class.pdf) and [OOD](pdf/OOD_class.pdf) for OOA design and OOD design, respectively. If you want to know the sequence of a running gStore, please view the list below: diff --git a/docs/USAGE.md b/docs/USAGE.md index 5b6f8c7..4e9522f 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -2,15 +2,10 @@ **All the commands of gStore should be used in the root directory of gStore like bin/gconsole, because executables are placed in bin/, and they may use some files whose paths are indicated in the code, not absolute paths. We will ensure that all paths are absolute later by asking users to give the absolute path in their own systems to really install/configure the gStore. However, you must do as we told now to avoid errors.** -#### 0. gconsole - -gconsole is the main console of gStore, which integrates with all functions to operate on gStore, as well as some system commands. Completion of commands name, line editing features and access to the history list are all provided. Feel free to try it, and you may have a wonderful tour!(spaces or tabs at the beginning or end is ok, and no need to type any special characters as separators) - -Just type `bin/gconsole` in the root directory of gStore to use this console, and you will find a `gstore>` prompt, which indicates that you are in native mode and can type in native commands now. There are another mode of this console, which is called remote mode. Just type `connect` in the native mode to enter the remote mode, and type `disconnect` to exit to native mode.(the console connect to a gStore server whose ip is '127.0.0.1' and port is 3305, you can specify them by type `connect gStore_server_ip gStore_server_port`) - -You can use `help` or `?` either in native mode or remote mode to see the help information, or you can type `help command_name` or `? command_name` to see the information of a given command. Notice that there are some differences between the commands in native mode and commands in remote mode. For example, system commands like `ls`, `cd` and `pwd` are provided in native mode, but not in remote mode. Also take care that not all commands contained in the help page are totally achieved, and we may change some functions of the console in the future. - -What we have done is enough to bring you much convenience to use gStore, just enjoy it! +#### 0. Format of data +The RDF data should be given in N-Triple format(XML is not supported by now) and queries must be given in SPARQL 1.1 syntax. +Not all syntax in SPARQL 1.1 are parsered and answered in gStore, for example, property path is beyond the ability of gStore system. +Tabs, '<' and '>' are not allowed to appear in entity, literal or predicates of the data and queries. - - - @@ -23,7 +18,6 @@ gbuild is used to build a new database from a RDF triple format file. For example, we build a database from lubm.nt which can be found in example folder. [bookug@localhost gStore]$ bin/gbuild lubm ./data/lubm/lubm.nt - 2017年11月23日 星期四 20时58分21秒 -0.484698 seconds gbuild... argc: 3 DB_store:lubm RDF_data: ./data/lubm/lubm.nt begin encode RDF from : ./data/lubm/lubm.nt ... @@ -95,7 +89,7 @@ Notice: #### 3. ghttp -ghttp runs gStore like HTTP server with port 9000. Visit from browser with prescriptive url, then gStore will execute corresponding operation. +ghttp runs gStore like HTTP server with port 9000(You need to open this port in your environment, `iptables` tool is suggested). Visit from browser with prescriptive url, then gStore will execute corresponding operation. type: @@ -136,6 +130,7 @@ answer = gc.user("delete_query", "root", "123456", "Jack", "lubm"); //delete user(with username: Jack, password: 2) answer = gc.user("delete_user", "root", "123456", "Jack", "2"); ``` + ``` db_name: the name of database, like lubm format: html, json, txt, csv @@ -146,11 +141,21 @@ type: the type of operation that you execute on user, like: add_user, delete_use username: the username of the user that execute the operation password: the password of the user that execute the operation ``` + +`ghttp` support concurrent read-only queries, but when queries containing updates come, the whole database will be locked. +The number of concurrent running queries is suggest to be lower than 300 on a machine with dozens of kernel threads, though we can run 13000 queries concurrently in our experiments. +To use the concurrency feature, you had better modify the system settings of 'open files' and 'maximum processes' to 65535 or larger. +Three scripts are placed in [setup](../scripts/setup/) to help you modify the settings in different Linux distributions. + +**If queries containing updates are sent via `ghttp`, a `checkpoint` command must be sent and done by the `ghttp` console before we shutdown the database server. Otherwise, the updates may not be synchronize to disk and will be lost if the `ghttp` server is stopped.** + - - - #### 4. gserver +**This is not maintained now.** + gserver is a daemon. It should be launched first when accessing gStore by gclient or API. It communicates with client through socket. [bookug@localhost gStore]$ bin/gserver -s @@ -172,13 +177,15 @@ Notice: Multiple threads are not supported by gserver. If you start up gclient i #### 5. gclient +**This is not maintained now.** + gclient is designed as a client to send commands and receive feedbacks. [bookug@localhost gStore]$ bin/gclient ip=127.0.0.1 port=3305 gsql> -You can also assign gserver's ip and port. +You can also assign the ip and port of gserver. [bookug@localhost gStore]$ bin/gclient 172.31.19.15 3307 ip=172.31.19.15 port=3307 @@ -208,35 +215,17 @@ Notice: - - - +#### 6. gconsole +**This is not maintained now.** -#### 6. test utilities +gconsole is the main console of gStore, which integrates with all functions to operate on gStore, as well as some system commands. Completion of commands name, line editing features and access to the history list are all provided. Feel free to try it, and you may have a wonderful tour!(spaces or tabs at the beginning or end is ok, and no need to type any special characters as separators) -A series of test program are placed in the test/ folder, and we will introduce the two useful ones: gtest.cpp and full_test.sh +Just type `bin/gconsole` in the root directory of gStore to use this console, and you will find a `gstore>` prompt, which indicates that you are in native mode and can type in native commands now. There are another mode of this console, which is called remote mode. Just type `connect` in the native mode to enter the remote mode, and type `disconnect` to exit to native mode.(the console connect to a gStore server whose ip is '127.0.0.1' and port is 3305, you can specify them by type `connect gStore_server_ip gStore_server_port`) -**gtest is used to test gStore with multiple datasets and queries.** +You can use `help` or `?` either in native mode or remote mode to see the help information, or you can type `help command_name` or `? command_name` to see the information of a given command. Notice that there are some differences between the commands in native mode and commands in remote mode. For example, system commands like `ls`, `cd` and `pwd` are provided in native mode, but not in remote mode. Also take care that not all commands contained in the help page are totally achieved, and we may change some functions of the console in the future. -To use gtest utility, please type `make gtest` to compile the gtest program first. Program gtest is a test tool to generate structural logs for datasets. Please type `./gtest --help` in the working directory for details. - -**Please change paths in the test/gtest.cpp if needed.** - -You should place the datasets and queries in this way: - - DIR/WatDiv/database/*.nt - - DIR/WatDiv/query/*.sql - -Notice that DIR is the root directory where you place all datasets waiting to be used by gtest. And WatDiv is a class of datasets, as well as lubm. Inside WatDiv(or lubm, etc. please place all datasets(named with .nt) in a database/ folder, and place all queries(corresponding to datasets, named with .sql) in a query folder. - -Then you can run the gtest program with specified parameters, and the output will be sorted into three logs in gStore root directory: load.log/(for database loading time and size), time.log/(for query time) and result.log/(for all query results, not the entire output strings, but the information to record the selected two database systems matched or not). - -All logs produced by this program are in TSV format(separated with '\t'), you can load them into Calc/Excel/Gnumeric directly. Notice that time unit is ms, and space unit is kb. - -**full_test.sh is used to compare the performance of gStore and other database systems on multiple datasets and queries.** - -To use full_test.sh utility, please download the database system which you want to tats and compare, and set the exact position of database systems and datasets in this script. The name strategy should be the same as the requirements of gtest, as well as the logs strategy. - -Only gStore and Jena are tested and compared in this script, but it is easy to add other database systems, if you would like to spend some time on reading this script. You may go to [test report](pdf/gstore测试报告.pdf) or [Frequently Asked Questions](FAQ.md) for help if you encounter a problem. +What we have done is enough to bring you much convenience to use gStore, just enjoy it! - - - @@ -301,3 +290,33 @@ After starting ghttp, type `bin/gshow ip port` to check loaded database. Content-Length--->[4] database: lubm +--- + +#### 11. test utilities + +A series of test program are placed in the scripts/ folder, and we will introduce the two useful ones: gtest.cpp and full_test.sh + +**gtest is used to test gStore with multiple datasets and queries.** + +To use gtest utility, please type `make gtest` to compile the gtest program first. Program gtest is a test tool to generate structural logs for datasets. Please type `./gtest --help` in the working directory for details. + +**Please change paths in the test/gtest.cpp if needed.** + +You should place the datasets and queries in this way: + + DIR/WatDiv/database/*.nt + + DIR/WatDiv/query/*.sql + +Notice that DIR is the root directory where you place all datasets waiting to be used by gtest. And WatDiv is a class of datasets, as well as lubm. Inside WatDiv(or lubm, etc. please place all datasets(named with .nt) in a database/ folder, and place all queries(corresponding to datasets, named with .sql) in a query folder. + +Then you can run the gtest program with specified parameters, and the output will be sorted into three logs in gStore root directory: load.log/(for database loading time and size), time.log/(for query time) and result.log/(for all query results, not the entire output strings, but the information to record the selected two database systems matched or not). + +All logs produced by this program are in TSV format(separated with '\t'), you can load them into Calc/Excel/Gnumeric directly. Notice that time unit is ms, and space unit is kb. + +**full_test.sh is used to compare the performance of gStore and other database systems on multiple datasets and queries.** + +To use full_test.sh utility, please download the database system which you want to tats and compare, and set the exact position of database systems and datasets in this script. The name strategy should be the same as the requirements of gtest, as well as the logs strategy. + +Only gStore and Jena are tested and compared in this script, but it is easy to add other database systems, if you would like to spend some time on reading this script. You may go to [test report](pdf/gstore测试报告.pdf) or [Frequently Asked Questions](FAQ.md) for help if you encounter a problem. + diff --git a/docs/docx/02-Gstore2.0_useCaseDoc.docx b/docs/docx/02-Gstore2.0_useCaseDoc.docx new file mode 100644 index 0000000..9960924 Binary files /dev/null and b/docs/docx/02-Gstore2.0_useCaseDoc.docx differ diff --git a/docs/docx/05-gStore2.0-usage_eng.docx b/docs/docx/05-gStore2.0-usage_eng.docx new file mode 100644 index 0000000..35950b6 Binary files /dev/null and b/docs/docx/05-gStore2.0-usage_eng.docx differ diff --git a/docs/docx/Gstore2.0软件需求说明.docx b/docs/docx/Gstore2.0软件需求说明.docx new file mode 100644 index 0000000..b43af34 Binary files /dev/null and b/docs/docx/Gstore2.0软件需求说明.docx differ diff --git a/docs/help/gStore_help.pdf b/docs/help/gStore_help.pdf index 8673245..9dd6047 100644 Binary files a/docs/help/gStore_help.pdf and b/docs/help/gStore_help.pdf differ diff --git a/docs/help/gStore_help.tex b/docs/help/gStore_help.tex deleted file mode 100644 index 32d1539..0000000 --- a/docs/help/gStore_help.tex +++ /dev/null @@ -1,2600 +0,0 @@ -\documentclass[titlepage, a4paper, 12pt]{article} - -%\usepackage{ctex} -\usepackage{lmodern} -\usepackage{ifxetex,ifluatex} -\usepackage{fixltx2e} -\usepackage{amsmath} -\usepackage{txfonts} -\usepackage{amssymb} -\usepackage{times} -\usepackage{graphicx} -\usepackage{epsfig,tabularx,amssymb,amsmath,subfigure,multirow} -%\usepackage{algorithmic} -\usepackage[linesnumbered,ruled,noend]{algorithm2e} -\usepackage[noend]{algorithmic} -\usepackage{multirow} -\usepackage{graphicx,floatrow} -\usepackage{listings} -\usepackage{threeparttable} -%\usepackage{tikz} -\usepackage[T1]{fontenc} -\usepackage{pgfplots} -\usepackage{filecontents} -\usepackage{comment} - -\lstset{% - alsolanguage=Java, - %language={[ISO]C++}, %language为,还有{[Visual]C++} - %alsolanguage=[ANSI]C, %可以添加很多个alsolanguage,如alsolanguage=matlab,alsolanguage=VHDL等 - %alsolanguage= tcl, - alsolanguage= XML, - tabsize=4, % - frame=shadowbox, %把代码用带有阴影的框圈起来 - commentstyle=\color{red!50!green!50!blue!50},%浅灰色的注释 - rulesepcolor=\color{red!20!green!20!blue!20},%代码块边框为淡青色 - keywordstyle=\color{blue!90}\bfseries, %代码关键字的颜色为蓝色,粗体 - showstringspaces=false,%不显示代码字符串中间的空格标记 - stringstyle=\ttfamily, % 代码字符串的特殊格式 - keepspaces=true, % - breakindent=22pt, % - numbers=left,%左侧显示行号 往左靠,还可以为right,或none,即不加行号 - stepnumber=1,%若设置为2,则显示行号为1,3,5,即stepnumber为公差,默认stepnumber=1 - %numberstyle=\tiny, %行号字体用小号 - numberstyle={\color[RGB]{0,192,192}\tiny} ,%设置行号的大小,大小有tiny,scriptsize,footnotesize,small,normalsize,large等 - numbersep=8pt, %设置行号与代码的距离,默认是5pt - basicstyle=\footnotesize, % 这句设置代码的大小 - showspaces=false, % - flexiblecolumns=true, % - breaklines=true, %对过长的代码自动换行 - breakautoindent=true,% - breakindent=4em, % - % escapebegin=\begin{CJK*}{GBK}{hei},escapeend=\end{CJK*}, - aboveskip=1em, %代码块边框 - tabsize=2, - showstringspaces=false, %不显示字符串中的空格 - backgroundcolor=\color[RGB]{245,245,244}, %代码背景色 - %backgroundcolor=\color[rgb]{0.91,0.91,0.91} %添加背景色 - escapeinside=``, %在``里显示中文 - %% added by http://bbs.ctex.org/viewthread.php?tid=53451 - fontadjust, - captionpos=t, - framextopmargin=2pt,framexbottommargin=2pt,abovecaptionskip=-3pt,belowcaptionskip=3pt, - xleftmargin=4em,xrightmargin=4em, % 设定listing左右的空白 - texcl=true, - % 设定中文冲突,断行,列模式,数学环境输入,listing数字的样式 - extendedchars=false,columns=flexible,mathescape=true - % numbersep=-1em -} - -\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex - \usepackage[T1]{fontenc} - \usepackage[utf8]{inputenc} -\else % if luatex or xelatex - \ifxetex - \usepackage{mathspec} - \usepackage{xltxtra,xunicode} - \else - \usepackage{fontspec} - \fi - \defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase} - \newcommand{\euro}{�} -\fi - -% use upquote if available, for straight quotes in verbatim environments -\IfFileExists{upquote.sty}{\usepackage{upquote}}{} -% use microtype if available -\IfFileExists{microtype.sty}{% -\usepackage{microtype} -\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts -}{} -\usepackage{longtable,booktabs} -\ifxetex - \usepackage[setpagesize=false, % page size defined by xetex - unicode=false, % unicode breaks when used with xetex - xetex]{hyperref} -\else - \usepackage[unicode=true]{hyperref} -\fi -\hypersetup{breaklinks=true, - bookmarks=true, - pdfauthor={}, - pdftitle={Gstore System}, - colorlinks=true, - citecolor=blue, - urlcolor=blue, - linkcolor=magenta, - pdfborder={0 0 0}} -\urlstyle{same} % don't use monospace font for urls -%\setlength{\parskip}{6pt plus 2pt minus 1pt} -\setlength{\emergencystretch}{3em} % prevent overfull lines -\setcounter{secnumdepth}{0} -\setlength{\parindent}{0pt} -%\setlength{\parindent}{2em} -\addtolength{\parskip}{3pt} -\linespread{1.3} - -\begin{document} -\title{\includegraphics[scale=0.3, bb=0 0 385 567]{logo.png} \\ - The handbook of gStore System测试} -%\author{Bookug Lobert\footnote{EECS of Peking University, zengli-bookug@pku.edu.cn}\\[2ex]} -\author{Edited by gStore team \footnote{The mailing list is given in Chapter 13.}} -\date{\today} -%\begin{figure}[b] -% \centering -%  \includegraphics[scale=0.3,bb=0 0 385 567]{../logo.png} - %\caption{Some description about the picture} -% \label{logo} -%\end{figure} -\maketitle - -\hyperdef{}{MathJaxux5fSVGux5fHidden}{} - -\hyperdef{}{wmd-preview}{} - -\setcounter{tocdepth}{4} -\tableofcontents -\clearpage - -\section{Preface} -The RDF (\emph{R}esource \emph{D}escription \emph{F}ramework) is a family of specifications proposed by W3C for modeling Web objects as part of developing the semantic web. In RDF model, each Web object is modeled as a uniquely named \emph{resource} and denoted by a URI (\emph{U}niform \emph{R}esource \emph{I}dentifier). RDF also uses URIs to name the properties of resources and the relationships between resources as well as the two ends of the link (this is usually referred to as a ``triple''). Hence, an RDF dataset can be represented as a directed, labeled graph where resources are vertices, and triples are -edges with property or relationship names as edge labels. For more details, please go to \href{https://www.w3.org/RDF/}{RDF Introduction}\\ - -To retrieve and manipulate an RDF graph, W3C also proposes a structured query language, SPARQL (\emph{S}imple \emph{P}rotocol \emph{A}nd \emph{R}DF \emph{Q}uery \emph{L}anguage), to access RDF repository. SPARQL contains capabilities for querying required and optional graph patterns along with their conjunctions and disjunctions. SPARQL also supports aggregation, subqueries, negation, creating values by expressions, extensible value testing, and constraining queries by source RDF graph. Similar to RDF graphs, a SPARQL query can also be modeled as a graph, which is a query graph with some variables. Then, evaluating a SPARQL query is equivalent to finding subgraph (homomorphism) matches of a query graph over an RDF graph. You can have a better understanding of SPARQL at \href{https://www.w3.org/TR/sparql11-query/}{SPARQL Introduction}.\\ - -Although there are some RDF data management systems (like Jena, Virtuoso, Sesame) that store the RDF data in relational systems, few existing systems exploit the native graph pattern -matching semantics of SPARQL. \textbf{Here, we implement a graph-based RDF triple store named gStore, which is a joint research project by Peking University, University of Waterloo and Hong Kong University of Science and Technology. The system is developed and maintained by the database group in Institute of Computer Science and Technology, Peking University, China.} A detailed description of gStore can be found at our papers {[}Zou et al., VLDB 11{]} and {[}Zou et al., VLDB Journal 14{]} in the \hyperref[chapter09]{Publication} chapter. This HELP document includes system installment, usage, API, use cases and FAQ. gStore is a open-source project in github under the BSD license. You are welcome to use gStore, report bugs or suggestions, or join us to make gStore better. It is also allowed for you to build all kinds of applications based on gStore, while respecting our work.\\ - - - -\textbf{Please make sure that you have read \hyperref[chapter18]{Legal Issues} before using gStore.} - -\clearpage - -\part{Start} - -\hyperdef{}{chapter00}{\subsection{Chapter 00: A Quick Tour}\label{chapter00}} -Gstore System(also called gStore) is a graph database engine for managing large graph-structured data, which is open-source and targets at Linux operation systems. The whole project is written in C++, with the help of some libraries such as readline, antlr, and so on. Only source tarballs are provided currently, which means you have to compile the source code if you want to use our system. - -\hyperdef{}{getting-started}{\subsubsection{Getting -Started}\label{getting-started}} - -This system is really user-friendly and you can pick it up in several minutes. Remember to check your platform where you want to run this system by viewing \hyperref[chapter01]{System Requirements}. After all are verified, please get this project's source code. There are several ways to do this: - -\begin{itemize} -\item - download the zip from this repository and extract it -\item - fork this repository in your github account -\item - type \texttt{git\ clone\ git@github.com:Caesar11/gStore.git} in your - terminal or use git GUI to acquire it -\end{itemize} - -Then you need to compile the project, just type \texttt{make} in the gStore root directory, and all executables will be ok. To run gStore, please type \texttt{bin/gbuild\ database\_name\ dataset\_path} to build a database named by yourself. And you can use \texttt{bin/gquery\ database\_name} command to query a existing database. What is more, \texttt{bin/gconsole} is a wonderful tool designed for you, providing all operations you need to use gStore. -Notice that all commands should be typed in the root directory of gStore. - -\emph{A detailed description can be found at Chapter 04 -\hyperref[chapter04]{How to use} in this document.} - -\hyperdef{}{advanced-help}{\subsubsection{Advanced -Help}\label{advanced-help}} - -If you want to understand the details of the gStore system, or you want to try some advanced operations(for example, using the API, server/client), please see the chapters below. - -\begin{itemize} -\item - \hyperref[chapter02]{Basic Introduction}: introduce the theory and features of gStore -\item - \hyperref[chapter03]{Install Guide}: instructions on how to install this system -\item - \hyperref[chapter04]{How To Use}: detailed information about using the gStore system -\item - \hyperref[chapter05]{Socket API Explanation}: guide you to develop applications based on our Socket API -\item - \hyperref[chapter06]{HTTP API Explanation}: guide you to develop applications based on our HTTP API -\item - \hyperref[chapter08]{Project Structure}: show the whole structure and sequence of this project -\item - \hyperref[chapter09]{Publications}: contain essays and publications - related with gStore -\item - \hyperref[chapter10]{Update Logs}: keep the logs of the system updates -\item - \hyperref[chapter15]{Test Result}: present the test results of a series of experiments -\end{itemize} - -\hyperdef{}{other-business}{\subsubsection{Other Business}\label{other-business}} - -We have written a series of short essays addressing recurring challenges in using gStore to realize applications, which are placed in -\hyperref[chapter12]{Recipe Book}. - -You are welcome to report any advice or errors in the github Issues part of this repository, if not requiring in-time reply. However, if you want to urgent on us to deal with your reports, please email to to submit your suggestions and report bugs to us by emailing to . A full list of our whole team is in \hyperref[chapter13]{Contributors}. - -There are some restrictions when you use the current gStore project, you can see them on \hyperref[chapter10]{Limitations}. - -Sometimes you may find some strange phenomena(but not wrong case), or something hard to understand/solve(don't know how to do next), then do not hesitate to visit the \hyperref[chapter11]{Frequently Asked Questions} page. - -Graph database engine is a new area and we are still trying to go further. Things we plan to do next is in \hyperref[chapter16]{Future Plan} chapter, and we hope more and more people will support or even -join us. You can support in many ways: - -\begin{itemize} -\item - watch/star our project -\item - fork this repository and submit pull requests to us -\item - download and use this system, report bugs or suggestions -\item - \ldots{} -\end{itemize} - -People who inspire us or contribute to this project will be listed in the \hyperref[chapter17]{Thanks List} chapter. - -\clearpage - -\hyperdef{}{chapter01}{\subsection{Chapter 01: System Requirements}\label{chapter01}} - -\emph{We have tested on linux server with CentOS 6.2 x86\_64 and CentOS 6.6 x86\_64. The version of GCC should be 4.4.7 or later.} - -\begin{longtable}[c]{@{}ll@{}} -\toprule -Item & Requirement\tabularnewline -\midrule -\endhead -operation system & Linux, such as CentOS, Ubuntu and so on\tabularnewline -architecture & x86\_64\tabularnewline -disk size & according to size of dataset\tabularnewline -memory size & according to size of dataset\tabularnewline -glibc & version \textgreater{}= 2.14\tabularnewline -gcc & version \textgreater{}= 4.4.7\tabularnewline -g++ & version \textgreater{}= 4.4.7\tabularnewline -make & need to be installed\tabularnewline -boost & version >= 1.54\tabularnewline -readline & need to be installed\tabularnewline -readline-devel & need to be installed\tabularnewline -openjdk & needed if using Java api\tabularnewline -openjdk-devel & needed if using Java api\tabularnewline -realpath & needed if using gconsole\tabularnewline -ccache & optional, used to speed up the compilation\tabularnewline -\bottomrule -\caption{software requirement} -\end{longtable} - -NOTICE: - -\begin{enumerate} -\item - The name of some packages may be different in different platforms, just install the corresponding one in your own operation system. -\item - To install readline and readline-devel, just type \texttt{dnf\ install\ readline-devel} in Redhat/CentOS/Fedora, or \texttt{apt-get\ install\ libreadline-dev} in Debian/Ubuntu. Please use corresponding commands in other systems. If you use ArchLinux, just type \texttt{pacman\ -S\ readline} to install the readline and readline-devel.(so do other packages) -\item - You do not have to install realpath to use gStore, but if you want to use the gconsole for its convenience, please do so by using \texttt{dnf\ install\ realpath} or \texttt{apt-get\ install\ realpath}. -\item - Our programs use regEx functions, which are provided by GNU/Linux by default. -\item - ANTLR3.4 is used in gStore to produce lexer and parser code for SPARQL query. However, you do not need to install the corresponding antlr libraries because we have merged the libantlr3.4 in our system. -\item - When you type \texttt{make} in the root directory of the gStore project, the Java api will also be compiled. You can modify the makefile if you do not have JDK in your system. However, you are advised to install openjdk-devel in your Linux system. -\item - To install ccache, you need to add epel repository if using CentOS, while in Ubuntu you can directly install it by 'apt-get install ccache' comand. If you can not install ccahe(or maybe you do not want to), please go to modify the makefile(just change the CC variable to g++). -\item If you need to use the HTTP server in gStore, then Boost Library(like boost-devel, including boost headers for developing) must be installed and the version should not be less than 1.54. Remember to check the makefile for your installed path of Boost. -\item - Any other questions, please go to \hyperref[chapter11]{FAQ} page. -\end{enumerate} - -\clearpage - -\hyperdef{}{chapter02}{\subsection{Chapter 02: Basic Introduction}\label{chapter02}} - -\textit{The first essay to come up with Gstore System is -\href{run:../pdf/gStoreVLDBJ.pdf}{gStore\_VLDBJ}, and you can find related publications in -\hyperref[chapter09]{Publications}.} - -\hyperdef{}{what-is-gstore}{\subsubsection{What Is -gStore}\label{what-is-gstore}} - -gStore is a graph-based RDF data management system(or what is commonly called a ``triple store'') that maintains the graph structure of the original \href{http://www.w3.org/TR/rdf11-concepts/}{RDF} data. Its data model is a labeled, directed multi edge graph, where each vertex corresponds to a subject or an object. - -We represent a given \href{http://www.w3.org/TR/sparql11-overview/}{SPARQL} query by a query graph Q. Query processing involves finding subgraph matches of Q over the RDF graph G, instead of joining tables in relational data management system. gStore incorporates an index over the RDF graph (called VS-tree) to speed up query processing. VS-tree is a height balanced tree with a number of associated pruning techniques to speed up subgraph matching. - -\textbf{The gStore project is supported by the National Science Foundation of China (NSFC), Natural Sciences and Engineering Research Council (NSERC) of Canada, and Hong Kong RGC.} - -\hyperdef{}{why-gstore}{\subsubsection{Why gStore}\label{why-gstore}} - -After a series of test, we analyse and keep the result in \hyperref[chapter15]{Test Results}. gStore runs faster to answer complicated queries(for example, contain circles) than other database systems. For simple queries, both gStore and other database systems work -well. - -In addition, now is the big data era and more and more structured data is coming, while the original relational database systems(or database systems based on relational tables) cannot deal with them efficiently. In contrast, gStore can utilize the features of graph data structures, and improve the performance. - -What is more, gStore is a high-extensible project. Many new ideas of graph database have be proposed, and most of them can be used in gStore. For example, our group is also designing a distributed gstore system, which is expected to be released at the end of 2016. - -\hyperdef{}{open-source}{\subsubsection{Open Source}\label{open-source}} - -The gStore source code is available as open-source code under the BSD license. You are welcome to use gStore, report bugs or suggestions, or join us to make gStore better. It is also allowed for you to build all kinds of applications based on gStore, while respecting our work. - -\clearpage - -\hyperdef{}{chapter03}{\subsection{Chapter 03: Install Guide}\label{chapter03}} - -You are advised to read init.conf file, and modify it as you wish. (this file will configure the basic options of gStore system) - -gStore is a green software, and you just need to compile it with one command. Please run \texttt{make} in the gStore root directory to compile the gStore code, link the ANTLR lib, and build executable ``gbuild'', ``gquery'', ``gserver'', ``gclient'', ``gconsole''. What is more, the api of gStore is also built now. - -If you want to use API examples of gStore, please run \texttt{make\ APIexample} to compile example codes for both C++ API and Java API. For details of API, please visit \hyperref[chapter05]{API} chapter. - -Use \texttt{make\ clean} command to clean all objects, executables, and use \texttt{make\ dist} command to clean all objects, executables, libs, datasets, databases, debug logs, temp/text files in the gStore root directory. - -You are free to modify the source code of gStore and create your own project while respecting our work, and type \texttt{make\ tarball} command to compress all useful files into a .tar.gz file, which is easy to carry. - -Type \texttt{make\ gtest} to compile the gtest program if you want to use this test utility. You can see the \hyperref[chapter04]{HOW TO USE} for details of gtest program. - -\clearpage - -\hyperdef{}{chapter04}{\subsection{Chapter 04: How To Use}\label{chapter04}} - -\textit{gStore currently includes five executables and others.} - -\textbf{All the commands of gStore should be used in the root directory of gStore like bin/gconsole, because executables are placed in bin/, and they may use some files whose paths are indicated in the code, not absolute paths. We will ensure that all paths are absolute later by asking users to give the absolute path in their own systems to really install/configure the gStore. However, you must do as we told now to avoid errors.} - -\hyperdef{}{0-gconsole}{\paragraph{0. gconsole}\label{0-gconsole}} - -gconsole is the main console of gStore, which integrates with all functions to operate on gStore, as well as some system commands. Completion of commands name, line editing features and access to the history list are all provided. Feel free to try it, and you may have a wonderful tour!(spaces or tabs at the beginning or end is ok, and no need to type any special characters as separators) - -\begin{verbatim} -[bookug@localhost gStore]$ bin/gconsole -Gstore Console(gconsole), an interactive shell based utility to communicate with -gStore repositories. -usage: start-gconsole [OPTION] --h,--help print this help --s,--source source the SPARQL script -For bug reports and suggestions, see https://github.com/Caesar11/gStore - -notice that commands are a little different between native mode and remote mode! -now is in native mode, please type your commands. -please do not use any separators in the end. - -gstore>help - -gstore>help drop -drop Drop a database according to the given path. - -gstore>connect 127.0.0.1 3305 -now is in remote mode, please type your commands. - -server>disconnect -now is in native mode, please type your commands. - -gstore>build lubm_10 ./data/LUBM_10.n3 -... -import RDF file to database done. - -gstore>unload - -gstore>load lubm_10 -... -database loaded successfully! - -gstore>show -lubm_10 - -gstore>query ./data/LUBM_q0.sql -... -final result is : -?x - - - - - - - - - - - - - - - - -gstore>query "select distinct ?x ?y where { ?x - . -?x ?y . ?y . }" -final result is : -?x ?y -[empty result] - -gstore>unload - -gstore>quit -\end{verbatim} - -Just type \texttt{bin/gconsole} in the root directory of gStore to use this console, and you will find a \texttt{gstore\textgreater{}} prompt, which indicates that you are in native mode and can type in native commands now. There are another mode of this console, which is called remote mode. Just type \texttt{connect} in the native mode to enter the remote mode, and type \texttt{disconnect} to exit to native mode.(the console connect to a gStore server whose ip is `127.0.0.1' and port is 3305, you can specify them by type \texttt{connect\ gStore\_server\_ip\ gStore\_server\_port}) - -You can use \texttt{help} or \texttt{?} either in native mode or remote mode to see the help information, or you can type \texttt{help\ command\_name} or \texttt{?\ command\_name} to see the information of a given command. Notice that there are some differences between the commands in native mode and commands in remote mode. For example, system commands like \texttt{ls}, \texttt{cd} and \texttt{pwd} are provided in native mode, but not in remote mode. Also take care that not all commands contained in the help page are totally achieved, and we may change some functions of the console in the future. - -What we have done is enough to bring you much convenience to use gStore, just enjoy it! - -\hyperdef{}{1-gbuild}{\paragraph{1. gbuild}\label{1-gbuild}} - -gbuild is used to build a new database from a RDF triple format file. - -\texttt{bin/gbuild\ db\_name\ rdf\_triple\_file\_path} - -For example, we build a database from LUBM\_10.n3 which can be found in -example folder. - -\begin{verbatim} -[bookug@localhost gStore]$ bin/gbuild LUBM10 ./data/LUBM_10.n3 -gbuild... -argc: 3 DB_store:LUBM10 RDF_data: ./data/LUBM_10.n3 -begin encode RDF from : ./data/LUBM_10.n3 ... -\end{verbatim} - -\hyperdef{}{2-gquery}{\paragraph{2. gquery}\label{2-gquery}} - -gquery is used to query an existing database with files containing -SPARQL queries.(each file contains exact one SPARQL query) - -Type \texttt{bin/gquery\ db\_name\ query\_file} to execute the SPARQL -query retrieved from query\_file in the database named db\_name. - -Use \texttt{bin/gquery\ -\/-help} for detail information of gquery -usage. - -To enter the gquery console, type \texttt{bin/gquery\ db\_name}. The -program shows a command prompt(``gsql\textgreater{}''), and you can type -in a command here. Use \texttt{help} to see basic information of all -commands, while \texttt{help\ command\_t} shows details of a specified -command. - -Type \texttt{quit} to leave the gquery console. - -For \texttt{sparql} command, input a file path which contains a single -SPARQL query. (\emph{answer redirecting to file is supported}) - -When the program finish answering the query, it shows the command prompt -again. - -\emph{gStore2.0 only support simple ``select'' queries(not for -predicates) now.} - -We also take LUBM\_10.n3 as an example. - -\begin{verbatim} -[bookug@localhost gStore]$ bin/gquery LUBM10 -gquery... -argc: 2 DB_store:LUBM10/ -loadTree... -LRUCache initial... -LRUCache initial finish -finish loadCache -finish loadEntityID2FileLineMap -open KVstore -finish load -finish loading -Type `help` for information of all commands -Type `help command_t` for detail of command_t -gsql>sparql ./data/LUBM_q0.sql -... ... -Total time used: 4ms. -final result is : - - - - - - - - - - - - - - - -\end{verbatim} - -Notice: - -\begin{itemize} -\item - ``{[}empty result{]}'' will be printed if no answer, and there is an - empty line after all results. -\item - readline lib is used, so you can use arrow key in your keyboard to see - command history, and use and arrow key to move and modify your entire - command. -\item - path completion is supported for utility. (not built-in command - completion) -\end{itemize} - -\hyperdef{}{3-ghttp}{\paragraph{3. ghttp}\label{3-ghttp}} - -ghttp is a daemon. It should be launched first when accessing gStore by HTTP protocol. It uses port 9000. - -Just type \texttt{bin/ghttp} to start server. After the server is started, you can access it by visit the url in a browser or use the Restful API in your program. You can press Ctrl-C to stop the server. (Multiple connections are supported in HTTP server) - -\begin{verbatim} -[bookug@localhost gStore]$ bin/ghttp -the current settings are as below: -key : value ------------------------------------------------------------ -BackupTime : 2000 # 4 am (GMT+8) -buffer_maxium : 100 -db_home : . -db_suffix : .db -debug_level : simple -gstore_mode : single -operation_logs : true -thread_maxium : 1000 - -enter initialize. -server port: 9000 database name: - -\end{verbatim} - -URL rules are listed blow: - -parameters: operation, db\_name, ds\_path, format, sparql - -NOTICE: do URL encoding before sending it to database server. - -operation: build, load, unload, query, monitor, show, checkpoint -\begin{itemize} - \item - db\_name: the name of database, like lubm - \item - format: html, json, txt, csv - \item - sparql: select ?s where { ?s ?p ?o . } - \item - ds\_path in the server: like /home/data/test.n3 -\end{itemize} - -Examples: - -\begin{itemize} - \item - to build a database from a dataset:\\ - http://localhost:9000/?operation=build\&db\_name=[db\_name]\&ds\_path=[ds\_path] - \item - to load a database:\\ - http://localhost:9000/?operation=load\&db\_name=[db\_name] - \item - to query a database:\\ - http://localhost:9000/?operation=query\&format=[format]\&sparql=[sparql] - \item - to unload a database:\\ - http://localhost:9000/?operation=unload\&db\_name=[db\_name] - \item - to monitor the server:\\ - http://localhost:9000/?operation=monitor - \item - to show the database used:\\ - http://localhost:9000/?operation=show - \item - to save the database currently:\\ - http://localhost:9000/?operation=checkpoint -\end{itemize} - - -\hyperdef{}{4-gserver}{\paragraph{4. gserver}\label{4-gserver}} - -gserver is a daemon. It should be launched first when accessing gStore -by gclient or API. It communicates with client through socket. - -\begin{verbatim} -[bookug@localhost gStore]$ bin/gserver -s -Server started at port 3305 -\end{verbatim} - -\begin{verbatim} -[bookug@localhost gStore]$ bin/gserver -t -Server stopped at port 3305 -\end{verbatim} - -You can also assign a custom port for listening. - -\begin{verbatim} -[bookug@localhost gStore]$ bin/gserver -p 3307 -Port changed to 3307. -\end{verbatim} - -Notice: Multiple threads are not supported by gserver. If you start up -gclient in more than one terminal in the same time, gserver will go -down. - -\hyperdef{}{5-gclient}{\paragraph{5. gclient}\label{5-gclient}} - -gclient is designed as a client to send commands and receive feedbacks. - -\begin{verbatim} -[bookug@localhost gStore]$ bin/gclient -ip=127.0.0.1 port=3305 -gsql>help -help - print commands message -quit - quit the console normally -import - build a database for a given dataset -load - load an existen database -unload - unload an existen database -sparql - load query from the second argument -show - show the current database's name -gsql>import lubm data/LUBM_10.n3 -import RDF file to database done. -gsql>load lubm -load database done. -gsql>sparql "select ?s ?o where { ?s ?o . }" -[empty result] - -gsql>quit -\end{verbatim} - -You can also assign gserver's ip and port. - -\begin{verbatim} -[bookug@localhost gStore]$ bin/gclient 172.31.19.15 3307 -ip=172.31.19.15 port=3307 -gsql> -\end{verbatim} - -We can use these following commands now: - -\begin{itemize} -\item - \texttt{help} shows the information of all commands -\item - \texttt{import\ db\_name\ rdf\_triple\_file\_name} build a database - from RDF triple file -\item - \texttt{load\ db\_name} load an existing database -\item - \texttt{unload\ db\_name} unload database, but will not delete it on - disk, you can load it next time -\item - \texttt{sparql\ "query\_string"} query the current database with a - SPARQL query string(quoted by ``'') -\item - \texttt{show} displays the name of the current loaded database -\end{itemize} - -Notice: - -\begin{itemize} -\item - at most one database can be loaded in the gclient console -\item - you can place ` ' or `\textbackslash{}t' between different parts of - command, but not use characters like `;' -\item - you should not place any space or tab ahead of the start of any - command -\end{itemize} - -\hyperdef{}{6-test-utilities}{\paragraph{6. test -utilities}\label{6-test-utilities}} - -A series of test program are placed in the test/ folder, and we will -introduce the two useful ones: gtest.cpp and full\_test.sh - -\textbf{gtest is used to test gStore with multiple datasets and -queries.} - -To use gtest utility, please type \texttt{make\ gtest} to compile the -gtest program first. Program gtest is a test tool to generate structural -logs for datasets. Please type \texttt{./gtest\ -\/-help} in the working -directory for details. - -\textbf{Please change paths in the test/gtest.cpp if needed.} - -You should place the datasets and queries in this way: - -\begin{verbatim} -DIR/WatDiv/database/*.nt - -DIR/WatDiv/query/*.sql -\end{verbatim} - -Notice that DIR is the root directory where you place all datasets -waiting to be used by gtest. And WatDiv is a class of datasets, as well -as LUBM. Inside WatDiv(or LUBM, etc. please place all datasets(named -with .nt) in a database/ folder, and place all queries(corresponding to -datasets, named with .sql) in a query folder. - -Then you can run the gtest program with specified parameters, and the -output will be sorted into three logs in gStore root directory: -load.log/(for database loading time and size), time.log/(for query time) -and result.log/(for all query results, not the entire output strings, -but the information to record the selected two database systems matched -or not). - -All logs produced by this program are in TSV format(separated with -`\textbackslash{}t'), you can load them into Calc/Excel/Gnumeric -directly. Notice that time unit is ms, and space unit is kb. - -\textbf{full\_test.sh is used to compare the performance of gStore and -other database systems on multiple datasets and queries.} - -To use full\_test.sh utility, please download the database system which -you want to tats and compare, and set the exact position of database -systems and datasets in this script. The name strategy should be the -same as the requirements of gtest, as well as the logs strategy. - -Only gStore and Jena are tested and compared in this script, but it is -easy to add other database systems, if you would like to spend some time -on reading this script. You may go to -\href{run:../pdf/gstore���Ա���.pdf}{test -report} or \hyperref[chapter11]{Frequently Asked Questions} for help if -you encounter a problem. - -\hyperdef{}{7-gadd}{\paragraph{7. gadd}\label{7-gadd}} - -gadd is used to add triples in a file to an existing database. - -Usage: \texttt{bin/gadd db\_name rdf\_triple\_file\_path}. - -\begin{verbatim} -[bookug@localhost gStore]$ bin/gadd lubm ./data/LUBM\_10.n3 -... -argc: 3 DB_store:lubm insert file:./data/LUBM_10.n3 -get important pre ID -... -insert rdf triples done. -inserted triples num: 99550 -\end{verbatim} - -\hyperdef{}{8-gsub}{\paragraph{8. gsub}\label{8-gsub}} - -gsub is used to remove triples from an existing database. - -Usage: \texttt{bin/gsub db\_name rdf\_triple\_file\_path}. - -\begin{verbatim} -[bookug@localhost gStore]$ bin/gsub lubm data/LUBM\_10.n3 -... -argc: 3 DB_store:lubm remove file: data/LUBM\_10.n3 -... -remove rdf triples done. -removed triples num: 99550 -\end{verbatim} - -\hyperdef{}{9-gmonitor}{\paragraph{9. gmonitor}\label{9-gmonitor}} - -After starting ghttp, go into gStore/bin/ and type \texttt{./gmonitor ip port} to check current status of gStore. - -\begin{verbatim} -[bookug@localhost bin]$ ./gmonitor 127.0.0.1 9000 -parameter: ?operation=monitor -request: http://127.0.0.1:9000/%3Foperation%3Dmonitor -null--->[HTTP/1.1 200 OK] -Content-Length--->[127] -database: lubm -triple num: 99550 -entity num: 28413 -literal num: 0 -subject num: 14569 -predicate num: 17 -connection num: 7 -\end{verbatim} - -\hyperdef{}{10-gshow}{\paragraph{10. gshow}\label{10-gshow}} - -After starting ghttp, go into gStore/bin and type \texttt{./gshow ip port} to check loaded database. - -\begin{verbatim} -[bookug@localhost gStore]$ ./gshow 127.0.0.1 9000 -parameter: ?operation=show -request: http://127.0.0.1:9000/%3Foperation%3Dshow -null--->[HTTP/1.1 200 OK] -Content-Length--->[4] -lubm -\end{verbatim} - -\clearpage - -\part{Advanced} - -\hyperdef{}{chapter05}{\subsection{Chapter 05: Socket API Explanation}\label{chapter05}} - -\textbf{This Chapter guides you to use socket API for accessing gStore, which can be used when the server runs gserver. We also provide HTTP API for ghttp, please see \hyperref[chapter06]{【HTTP API Explanation】}.} - -\hyperdef{}{easy-examples}{\subsubsection{Easy -Examples}\label{easy-examples}} - -We provide JAVA, C++, PHP and Python API for gStore now. Please refer to example -codes in \texttt{api/socket/cpp/example}, \texttt{api/socket/java/example}, \texttt{api/socket/php} and \texttt{api/socket/python/example}. To use the four examples to have a try, please ensure that executables have already been generated. Otherwise, for Java and C++, just type \texttt{make\ APIexample} in the root directory of gStore to compile the codes, as well as API. - -Next, \textbf{start up a gStore server by using \texttt{./gserver} -command.} It is ok if you know a running usable gStore server and try to -connect to it, but notice that \textbf{the server ip and port of server -and client must be matched.}(you don't need to change any thing if using -examples, just by default) Then, for Java and C++ code, you need to compile the example codes -in the directory gStore/api/socket/. We provide a utility to do this, and you -just need to type \texttt{make\ APIexample} in the root directory of -gStore. Or you can compile the codes by yourself, in this case please go -to gStore/api/socket/cpp/example/ and gStore/api/socket/java/example/, respectively. - -Finally, go to the example directory and run the corresponding -executables. For C++, just use \texttt{./example} command to run it. And -for Java, use \texttt{make\ run} command or \texttt{java\ -cp\ ../lib/GstoreJavaAPI.jar:.\ JavaAPIExample} to run -it. For PHP, use \texttt{php ./PHPAPIExample}. For python, use \texttt{python ./PythonAPIExample}. All these four executables will connect to a specified gStore server -and do some load or query operations. Be sure that you see the query -results in the terminal where you run the examples, otherwise please go -to \hyperref[chapter11]{Frequently Asked Questions} for help or report -it to us.(the report approach is described in -\hyperref[chapter00]{README}) - -You are advised to read the example code carefully, as well as the -corresponding Makefile. This will help you to understand the API, -specially if you want to write your own programs based on the API -interface. - -\hyperdef{}{api-structure}{\subsubsection{API structure}\label{api-structure}} - -The API of gStore is placed in api/socket/ directory in the root directory of -gStore, whose contents are listed below: - -\begin{itemize} -\item - gStore/api/socket/ - - \begin{itemize} - \item - cpp/ (the C++ API) - - \begin{itemize} - \item - src/ (source code of C++ API, used to build the - lib/libgstoreconnector.a) - - \begin{itemize} - \item - GstoreConnector.cpp (interfaces to interact with gStore server) - \item - GstoreConnector.h - \item - Makefile (compile and build lib) - \end{itemize} - \item - lib/ (where the static lib lies in) - - \begin{itemize} - \item - .gitignore - \item - libgstoreconnector.a (only exist after compiled, you need to - link this lib when you use the C++ API) - \end{itemize} - \item - example/ (small example program to show the basic idea of using - the C++ API) - - \begin{itemize} - \item - CppAPIExample.cpp - \item - Makefile - \end{itemize} - \end{itemize} - \item - java/ (the Java API) - - \begin{itemize} - \item - src/ (source code of Java API, used to build the - lib/GstoreJavaAPI.jar) - - \begin{itemize} - \item - jgsc/GstoreConnector.java (the package which you need to import when you use the Java API) - \item - Makefile (compile and build lib) - \end{itemize} - \item - lib/ - - \begin{itemize} - \item - .gitignore - \item - GstoreJavaAPI.jar (only exist after compiled, you need to - include this JAR in your class path) - \end{itemize} - \item - example/ (small example program to show the basic idea of using - the Java API) - - \begin{itemize} - \item - JavaAPIExample.cpp - \item - Makefile - \end{itemize} - \end{itemize} - - \item - php/ (the PHP API) - - \begin{itemize} - \item - GstoreConnector.php (source code of PHP API, you need to include this file when you use the PHP API) - - \item - PHPAPIExample.php (small example program to show the basic idea of using the PHP API) - \end{itemize} - - \item - python/ (the Python API) - \begin{itemize} - \item - src/ (source code of Python API) - \begin{itemize} - \item - GstoreConnector.py (the package which you need to import when you use the Python API) - \end{itemize} - \item - example/ (small example program to show the basic idea of using the Python API) - \begin{itemize} - \item - PythonAPIExample.py - \end{itemize} - \end{itemize} - - \end{itemize} -\end{itemize} - -\hyperdef{}{c-api}{\subsubsection{C++ API}\label{c-api}} - -\hyperdef{}{interface}{\paragraph{Interface}\label{interface}} - -To use the C++ API, please place the phrase -\texttt{\#include\ "GstoreConnector.h"} in your cpp code. Functions in -GstoreConnector.h should be called like below: - -\begin{verbatim} -// initialize the Gstore server's IP address and port. -GstoreConnector gc("127.0.0.1", 3305); -// build a new database by a RDF file. -// note that the relative path is related to gserver. -gc.build("LUBM10", "example/LUBM_10.n3"); -// then you can execute SPARQL query on this database. -std::string sparql = "select ?x where \ -{\ -?x . \ -?y . \ -?x ?y. \ -?z ?y. \ -?z . \ -?z ?w. \ -?w . \ -}"; -std::string answer = gc.query(sparql); -// unload this database. -gc.unload("LUBM10"); -// also, you can load some exist database directly and then query. -gc.load("LUBM10"); -// query a SPARQL in current database -answer = gc.query(sparql); -\end{verbatim} - -The original declaration of these functions are as below: - -\begin{verbatim} -GstoreConnector(); -GstoreConnector(string _ip, unsigned short _port); -GstoreConnector(unsigned short _port); -bool load(string _db_name); -bool unload(string _db_name); -bool build(string _db_name, string _rdf_file_path); -string query(string _sparql); -\end{verbatim} - -Notice: - -\begin{enumerate} -\item - When using GstoreConnector(), the default value for ip and port is - 127.0.0.1 and 3305, respectively. -\item - When using build(), the rdf\_file\_path(the second parameter) should - be related to the position where gserver lies in. -\item - Please remember to unload the database you have loaded, otherwise - things may go wrong.(the errors may not be reported!) -\end{enumerate} - -\hyperdef{}{compile}{\paragraph{Compile}\label{compile}} - -You are advised to see gStore/api/socket/cpp/example/Makefile for instructions on how to compile your code with the C++ API. Generally, what you must do is compile your own code to object with header in the C++ API, and link the object with static lib in the C++ API. - -Let us assume that your source code is placed in test.cpp, whose position is \$\{GSTORE\}/gStore/.(if using devGstore as name instead of gStore, then the path is \$\{GSTORE\}/devGstore/ directory first: - -\begin{quote} -Use \texttt{g++\ -c\ -I\$\{GSTORE\}/gStore/api/socket/cpp/src/\ test.cpp\ -o\ test.o} to compile your test.cpp into test.o, relative API header is placed in api/socket/cpp/src/. - -Use \texttt{g++\ -o\ test\ test.o\ -L\$\{GSTORE\}/gStore/api/socket/cpp/lib/\ -lgstoreconnector} to link your test.o with the libgstoreconnector.a(a static lib) in api/socket/cpp/lib/. -\end{quote} - -Then you can type \texttt{./test} to execute your own program, which uses our C++ API. It is also advised for you to place relative compile commands in a Makefile, as well as other commands if you like. - -\hyperdef{}{java-api}{\subsubsection{Java API}\label{java-api}} - -\hyperdef{}{interface-1}{\paragraph{Interface}\label{interface-1}} - -To use the Java API, please place the phrase -\texttt{import\ jgsc.GstoreConnector;} in your java code. Functions in -GstoreConnector.java should be called like below: - -\begin{verbatim} -// initialize the Gstore server's IP address and port. -GstoreConnector gc = new GstoreConnector("127.0.0.1", 3305); -// build a new database by a RDF file. -// note that the relative path is related to gserver. -gc.build("LUBM10", "example/LUBM_10.n3"); -// then you can execute SPARQL query on this database. -String sparql = "select ?x where " + "{" + -"?x . " + -"?y . " + -"?x ?y. " + -"?z ?y. " + -"?z . " + -"?z ?w. " + -"?w . " + -"}"; -String answer = gc.query(sparql); -//unload this database. -gc.unload("LUBM10"); -//also, you can load some exist database directly and then query. -gc.load("LUBM10");// query a SPARQL in current database -answer = gc.query(sparql); -\end{verbatim} - -The original declaration of these functions are as below: - -\begin{verbatim} -GstoreConnector(); -GstoreConnector(string _ip, unsigned short _port); -GstoreConnector(unsigned short _port); -bool load(string _db_name); -bool unload(string _db_name); -bool build(string _db_name, string _rdf_file_path); -string query(string _sparql); -\end{verbatim} - -Notice: - -\begin{enumerate} -\item - When using GstoreConnector(), the default value for ip and port is - 127.0.0.1 and 3305, respectively. -\item - When using build(), the rdf\_file\_path(the second parameter) should - be related to the position where gserver lies in. -\item - Please remember to unload the database you have loaded, otherwise - things may go wrong.(the errors may not be reported!) -\end{enumerate} - -\hyperdef{}{compile-1}{\paragraph{Compile}\label{compile-1}} - -You are advised to see gStore/api/socket/java/example/Makefile for instructions on how to compile your code with the Java API. Generally, what you must do is compile your own code to object with jar file in the Java API. - -Let us assume that your source code is placed in test.java, whose position is \$\{GSTORE\}/gStore/.(if using devGstore as name instead of gStore, then the path is \$\{GSTORE\}/devGstore/ directory first: - -\begin{quote} -Use \texttt{javac\ -cp\ \$\{GSTORE\}/gStore/api/socket/java/lib/GstoreJavaAPI.jar\ test.java} to compile your test.java into test.class with the GstoreJavaAPI.jar(a jar package used in Java) in api/socket/java/lib/. -\end{quote} - -Then you can type \texttt{java\ -cp\ \$\{GSTORE\}/gStore/api/socket/java/lib/GstoreJavaAPI.jar:.\ test} to execute your own program(notice that the ``:.'' in command cannot be neglected), which uses our Java API. It is also advised for you to place relative compile commands in a Makefile, as well as other commands if you like. - -\hyperdef{}{php-api}{\subsubsection{PHP API}\label{php-api}} - - \hyperdef{}{interface-2}{\paragraph{Interface}\label{interface-2}} - - To use the PHP API, please place the phrase -\texttt{include('GstoreConnector,php');} in your php code. Functions in -GstoreConnector.php should be called like below: - - \begin{verbatim} - // initialize the Gstore server's IP address and port. - $gc = new Connector("127.0.0.1", 3305); - // build a new database by a RDF file. - // note that the relative path is related to gserver. - $gc->build("LUBM10", "example/LUBM_10.n3"); - // then you can execute SPARQL query on this database. - $sparql = "select ?x where " + "{" + - "?x . " + - "?y . " + - "?x ?y. " + - "?z ?y. " + - "?z . " + - "?z ?w. " + - "?w . " + - "}"; - $answer = gc->query($sparql); - //unload this database. - $gc->unload("LUBM10"); - //also, you can load some exist database directly and then query. - $gc->load("LUBM10");// query a SPARQL in current database - $answer = gc->query(sparql); - \end{verbatim} - - The original declaration of these functions are as below: - - \begin{verbatim} - class Connector { - public function __construct($host, $port); - public function send($data); - public function recv(); - public function build($db_name, $rdf_file_path); - public function load($db_name); - public function unload($db_name); - public function query($sparql); - public function __destruct(); - } - \end{verbatim} - - Notice: - - \begin{enumerate} - \item - When using Connector(), the default value for ip and port is - 127.0.0.1 and 3305, respectively. - \item - When using build(), the rdf\_file\_path(the second parameter) should - be related to the position where gserver lies in. - \item - Please remember to unload the database you have loaded, otherwise - things may go wrong.(the errors may not be reported!) - \end{enumerate} - - \hyperdef{}{run-2}{\paragraph{Run}\label{run-2}} - - You can see gStore/api/socket/php/PHPAPIExample for instructions on how to use PHP API. PHP script doesn't need compiling. You can run PHP file directly or use it in your web project. - - -\hyperdef{}{python-api}{\subsubsection{Python API}\label{python-api}} - - \hyperdef{}{interface-3}{\paragraph{Interface}\label{interface-3}} - - To use the Python API, please place the phrase \texttt{from GstoreConnector import GstoreConnector} in your python code. Functions in GstoreConnector.py should be called like below: - - \begin{verbatim} - // initialize the Gstore server's IP address and port. - gc = GstoreConnector('127.0.0.1', 3305) - // build a new database by a RDF file. - // note that the relative path is related to gserver. - gc.build('LUBM10', 'data/LUBM_10.n3') - // then you can execute SPARQL query on this database. - $sparql = "select ?x where " + "{" + - "?x . " + - "?y . " + - "?x ?y. " + - "?z ?y. " + - "?z . " + - - "?z ?w. " + - "?w . " + - "}"; - answer = gc.query(sparql) - //unload this database. - gc.unload('LUBM10') - //also, you can load some exist database directly and then query. - gc.load('LUBM10')// query a SPARQL in current database - answer = gc.query(sparql) - \end{verbatim} - - The original declaration of these functions are as below: - - \begin{verbatim} - class GstoreConnector { - def _connect(self) - def _disconnect(self) - def _send(self, msg): - def _recv(self) - def _pack(self, msg): - def _communicate(f): - def __init__(self, ip='127.0.0.1', port=3305): - @_communicate - def test(self) - @_communicate - def load(self, db_name) - @_communicate - def unload(self, db_name) - @_communicate - def build(self, db_name, rdf_file_path) - @_communicate - def drop(self, db_name) - @_communicate - def stop(self) - @_communicate - def query(self, sparql) - @_communicate - def show(self, _type=False) - } - \end{verbatim} - - Notice: - - \begin{enumerate} - \item - When using GstoreConnector(), the default value for ip and port is - 127.0.0.1 and 3305, respectively. - \item - When using build(), the rdf\_file\_path(the second parameter) should - be related to the position where gserver lies in. - \item - Please remember to unload the database you have loaded, otherwise - things may go wrong.(the errors may not be reported!) - \end{enumerate} - - \hyperdef{}{run-3}{\paragraph{Run}\label{run-3}} - - You are advised to see gStore/api/socket/python/example/PythonAPIExample for examples on how to use python API. Python file doesn't need compiling, and you can run it directly. - -\clearpage - - -\hyperdef{}{chapter06}{\subsection{Chapter 06: HTTP API Explanation}\label{chapter06}} - -\textbf{This chapter provides API for ghttp. Compared with socket API, HTTP API is more stable and more standard, and can maintain connection. Socket API can not guaratee correct transmission, so the network transmission is faster.} - -\hyperdef{}{easy-http-examples}{\subsubsection{Easy Examples}\label{easy-http-examples}} - -We provide JAVA and C++ API for ghttp now. Please see \texttt{api/http/cpp} and \texttt{api/http/java}. To use these examples, please make sure that executables have already been generated. - -Next, \textbf{start up ghttp service by using \texttt{./ghttp} command.} It is ok if you know a running usable ghttp server and try to connect to it. (you don't need to change anything if using -examples, just by default) Then, for Java and C++ code, you need to compile the example codes in the directory gStore/api/http/. We provide a utility to do this, and you just need to type \texttt{make\ APIexample} in the root directory of gStore. Or you can compile the codes by yourself, in this case please go to gStore/api/http/cpp/ and gStore/api/http/java/, respectively. - -Finally, go to the example directory and run the corresponding executables. All these four executables will connect to a specified ghttp server and do some load or query operations. Be sure that you see the query results in the terminal where you run the examples, otherwise please go to \hyperref[chapter11]{Frequently Asked Questions} for help or report it to us.(the report approach is described in \hyperref[chapter00]{README}) - -You are advised to read the example code carefully, as well as the corresponding Makefile. This will help you to understand the API, specially if you want to write your own programs based on the API interface. - -\hyperdef{}{http-api-structure}{\subsubsection{API Structure}\label{http-api-structure}} - -The HTTP API of gStore is placed in api/http/ directory in the root directory of gStore, whose contents are listed below: - -\begin{itemize} - \item - gStore/api/http/ - - \begin{itemize} - \item - cpp/ (C++ API) - - \begin{itemize} - \item - client.cpp (source code of C++ API) - \item - client.h - \item - example.cpp (example program to show the basic idea of using the C++ API) - \item - Makefile (compile) - - \end{itemize} - - \item - java/ (Java API) - \begin{itemize} - \item - src/ (source code of Java API, used to build the - lib/GstoreJavaAPI.jar) - - \begin{itemize} - \item - jgsc/GstoreConnector.java (the package which you need to import when you use the Java API) - \item - Makefile (compile and build lib) - \end{itemize} - \item - lib/ - - \begin{itemize} - \item - .gitignore - \item - GstoreJavaAPI.jar (only exist after compiled, you need to - include this JAR in your class path) - \end{itemize} - \item - example/ (small example program to show the basic idea of using - the Java API) - - \begin{itemize} - \item - JavaAPIExample.cpp - \item - Makefile - \end{itemize} - \end{itemize} - - \end{itemize} -\end{itemize} - -\hyperdef{}{http-c-api}{\subsubsection{C++ API}\label{http-c-api}} - -\hyperdef{}{http-interface}{\paragraph{Interface}\label{http-interface}} - - -To use the C++ API, please place the phrase \texttt{\#include\ "Client.h"} in your cpp code. Functions in Client.h should be called like below: - -\begin{verbatim} -CHttpClient hc; -string res; -int ret; -// build a new database by a RDF file. -ret = hc.Get("127.0.0.1:9000/build/lumb/data/LUBM_10.n3", res); -cout< . " + -"?y . " + -"?x ?y. " + -"?z ?y. " + -"?z . " + -"?z ?w. " + -"?w . " + -"}"; -String answer = gc.query(sparql); -//unload this database. -gc.unload("LUBM10"); -//also, you can load some exist database directly and then query. -gc.load("LUBM10");// query a SPARQL in current database -answer = gc.query(sparql); -gc.unload("LUBM10"); -\end{verbatim} - -The original declaration of these functions are as below: - -\begin{verbatim} -GstoreConnector(); -GstoreConnector(int _port); -GstoreConnector(String _ip, int _port); -boolean load(String _db_name); -boolean unload(String _db_name); -boolean build(String _db_name, String _rdf_file_path); -boolean drop(String _db_name); -String query(String _sparql); -String show(); -String show(boolean _type); -\end{verbatim} - -\clearpage - - -\hyperdef{}{chapter07}{\subsection{Chapter 07: Use gStore in Web}\label{chapter07}} - - \textbf{This Chapter provides a specific example on how to use our API in a web project.} - - \hyperdef{}{example}{\subsubsection{Example}\label{example}} - - Now you have the basic idea on how to use our APIs to connect gStore. Yet you might be still a little confused. Here we provide a simple demo to show you what to do explicitly. - - Let's say, you need to use gStore in a web project. PHP is a popular general-purpose scripting language that is especially suited to web development. So, using our PHP API can meet your requirements. Here is what we implement: http://59.108.48.18/Gstore/form.php. - - First, get your web server ready so it can run PHP files. We won't give detailed instructions on this step here. You can easily google it according to your web server(for example, Apache or Nginx, etc.) - - Next, go to your web document root(usually in /var/www/html or apache/htdocs, you can check it in config file), and create a folder named "Gstore". Then copy the GstoreConnector.php file into it. Create a "PHPAPI.php" file. Edit it like below: - - \begin{verbatim} - load($dbname); - $query = new Connector($host, $port); - $result = $query->query($sparql); - switch ($format) { - case 1: - $array = explode("<", $result); - $html = '"; - for ($i = 1; $i < count($array); $i++) { - $href = str_replace(">", "", $array[$i]); - $html.= ''; - } - $html.= '
    ' . - $array[0] . "
    ' . - $href . '
    '; - echo $html; - exit; - - case 2: - $filename = 'result.txt'; - header("Content-Type: application/octet-stream"); - header('Content-Disposition: attachment; - filename="' . $filename . '"'); - echo $result; - exit; - - case 3: - $filename = 'result.csv'; - header("Content-Type: application/octet-stream"); - header('Content-Disposition: attachment; - filename="' . $filename . '"'); - $array = explode("<", $result); - echo $array[0]; - for ($i = 1; $i < count($array); $i++) { - $href = str_replace(">", "", $array[$i]); - echo $href; - } - exit; - } - ?> - \end{verbatim} - - This PHP file get three parametres from a website, including databasename, sparql and output format. Then it use our PHP API to connect gStore and run the query. Finally, the "switch" part gives the output. - - After that, we need a website to collect those imformation(databasename, sparql and output format). We create a html file and use a form to do it, just like below: - \begin{verbatim} -
    -
    -

    Gstore SPARQL Query Editor

    -

    -
    -
      -
    • - -
      - - -
      -
    • - -
    • - -
      - -
      -
    • - -
    • - -
      - -
      - -
    • - - -
    • -
    -
    - \end{verbatim} - - As you can see in the code, we use a element to get the databasename, and for sparql, - - -
  • - -
  • - -
    - -
    -
  • - -
  • - -
    - -
    - -
  • - - -
  • - - -\end{verbatim} - -你可以在代码中看到,我们用元素得到数据库名,得到sparql,