From 68ba02cd370f6c1a384e6d91c07b1acac35ff33f Mon Sep 17 00:00:00 2001 From: bookug Date: Wed, 19 Sep 2018 01:10:10 +0800 Subject: [PATCH] feat: lock query cache; add files to record unsolved bugs; getFinalResult is locked in Database::query(); --- Database/Database.cpp | 20 +++++++----- Query/QueryCache.cpp | 6 ++++ Query/QueryCache.h | 2 ++ StringIndex/StringIndex.cpp | 64 ++++++++++++++++++------------------- StringIndex/StringIndex.h | 2 +- docs/BUGS.md | 23 +++++++++++++ 6 files changed, 76 insertions(+), 41 deletions(-) create mode 100644 docs/BUGS.md diff --git a/Database/Database.cpp b/Database/Database.cpp index 94065d7..1c081d9 100644 --- a/Database/Database.cpp +++ b/Database/Database.cpp @@ -1359,12 +1359,12 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp) { return -101; } - cout<<"read lock acquired"<stringindex; - tmpsi.emptyBuffer(); - general_evaluation.setStringIndexPointer(&tmpsi); + //StringIndex tmpsi = *this->stringindex; + //tmpsi.emptyBuffer(); + //general_evaluation.setStringIndexPointer(&tmpsi); // this->debug_lock.lock(); bool query_ret = general_evaluation.doQuery(); @@ -1375,6 +1375,7 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp) // this->debug_lock.unlock(); long tv_bfget = Util::get_cur_time(); + //NOTICE: this lock lock ensures that StringIndex is visited sequentially this->getFinalResult_lock.lock(); if (trie == NULL) { @@ -1386,7 +1387,6 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp) } trie->LoadDictionary(); } - general_evaluation.getFinalResult(_result_set); this->getFinalResult_lock.unlock(); long tv_afget = Util::get_cur_time(); @@ -1396,7 +1396,7 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp) need_output_answer = true; //general_evaluation.setNeedOutputAnswer(); - tmpsi.clear(); + //tmpsi.clear(); pthread_rwlock_unlock(&(this->update_lock)); } //Update @@ -1412,6 +1412,7 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp) cout<<"unable to write lock"<allocEntityID(); +#ifdef DEBUG cout << "this is a new subject: " << sub << " " << subid << endl; +#endif this->sub_num++; this->kvstore->setIDByEntity(sub, subid); this->kvstore->setEntityByID(subid, sub); @@ -3194,7 +3197,9 @@ Database::insert(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num, { is_new_obj = true; objid = this->allocEntityID(); +#ifdef DEBUG cout << "this is a new object: " << obj << " " << objid << endl; +#endif //this->obj_num++; this->kvstore->setIDByEntity(obj, objid); this->kvstore->setEntityByID(objid, obj); @@ -4026,7 +4031,6 @@ Database::remove(const TripleWithObjType* _triples, TYPE_TRIPLE_NUM _triple_num, return valid_num; } -//TODO: check and improve the backup program bool Database::backup() { diff --git a/Query/QueryCache.cpp b/Query/QueryCache.cpp index 4334c9c..49b3a9e 100755 --- a/Query/QueryCache.cpp +++ b/Query/QueryCache.cpp @@ -95,6 +95,8 @@ bool QueryCache::getMinimalRepresentation(const Patterns &triple_pattern, Patter bool QueryCache::tryCaching(const Patterns &triple_pattern, const TempResult &temp_result, int eva_time) { + lock_guard (this->query_cache_lock); //when quit this scope the lock will be released + Patterns minimal_repre; map minimal_mapping; @@ -176,8 +178,12 @@ bool QueryCache::tryCaching(const Patterns &triple_pattern, const TempResult &te return true; } +//NOTICE: in this function we also modify some contents, so we must use mutex instead of rwlock bool QueryCache::checkCached(const Patterns &triple_pattern, const Varset &varset, TempResult &temp_result) { + //this->query_cache_lock.lock(); + lock_guard (this->query_cache_lock); //when quit this scope the lock will be released + Patterns minimal_repre; map minimal_mapping; diff --git a/Query/QueryCache.h b/Query/QueryCache.h index c9449d6..01a5172 100755 --- a/Query/QueryCache.h +++ b/Query/QueryCache.h @@ -23,6 +23,8 @@ class QueryCache const long long ITEM_MEMORY_LIMIT = 1000000LL; const long long TOTAL_MEMORY_LIMIT = 100000000LL; + mutex query_cache_lock; + int time_now; long long total_memory_used; diff --git a/StringIndex/StringIndex.cpp b/StringIndex/StringIndex.cpp index 771243b..341d7ac 100644 --- a/StringIndex/StringIndex.cpp +++ b/StringIndex/StringIndex.cpp @@ -91,33 +91,33 @@ bool StringIndexFile::randomAccess(unsigned id, string *str, bool real) long offset = (*this->index_table)[id].offset; unsigned length = (*this->index_table)[id].length; - if(id == 9) - { - cout<<"check: "<value_file, offset, SEEK_SET); fread(this->buffer, sizeof(char), length, this->value_file); //pread(fileno(value_file), this->buffer, sizeof(char)*length, offset); this->buffer[length] = '\0'; *str = string(this->buffer); - if(id == 9) - { - cout<<"check: "<<*str<Uncompress(*str, str->length());//Uncompresss } - if(id == 9) - { - cout<<"check: "<<*str<type == Predicate) cout << "Predicate StringIndex "; - long current_offset = 0; + //long current_offset = 0; if ((max_end - min_begin) / 800000L < (long)this->request.size()) { cout << "sequence access." << endl; @@ -160,8 +160,8 @@ void StringIndexFile::trySequenceAccess(bool real) char *block = new char[MAX_BLOCK_SIZE]; long current_block_begin = min_begin; - //fseek(this->value_file, current_block_begin, SEEK_SET); - current_offset = current_block_begin; + fseek(this->value_file, current_block_begin, SEEK_SET); + //current_offset = current_block_begin; while (current_block_begin < max_end) { @@ -170,14 +170,14 @@ void StringIndexFile::trySequenceAccess(bool real) if (current_block_end <= this->request[pos].offset) { current_block_begin = this->request[pos].offset; - //fseek(this->value_file, current_block_begin, SEEK_SET); - current_offset = current_block_begin; + fseek(this->value_file, current_block_begin, SEEK_SET); + //current_offset = current_block_begin; current_block_end = min(current_block_begin + MAX_BLOCK_SIZE, max_end); } - //fread(block, sizeof(char), current_block_end - current_block_begin, this->value_file); - pread(fileno(this->value_file), block, sizeof(char)*(current_block_end-current_block_begin), current_offset); - current_offset += sizeof(char)*(current_block_end-current_block_begin); + fread(block, sizeof(char), current_block_end - current_block_begin, this->value_file); + //pread(fileno(this->value_file), block, sizeof(char)*(current_block_end-current_block_begin), current_offset); + //current_offset += sizeof(char)*(current_block_end-current_block_begin); while (pos < (int)this->request.size()) { @@ -292,13 +292,13 @@ void StringIndexFile::change(unsigned id, KVstore &kv_store) fseek(this->value_file, (*this->index_table)[id].offset, SEEK_SET); fwrite(str.c_str(), sizeof(char), (*this->index_table)[id].length, this->value_file); - if(id == 9) - { - cout<<"check in change():9 "<Uncompress(*str) diff --git a/StringIndex/StringIndex.h b/StringIndex/StringIndex.h index 3265661..914486c 100644 --- a/StringIndex/StringIndex.h +++ b/StringIndex/StringIndex.h @@ -149,7 +149,7 @@ class StringIndex void flush() { - //TODO: flush updates to disk to avoid missing + //nothing to do here } void emptyBuffer() diff --git a/docs/BUGS.md b/docs/BUGS.md new file mode 100644 index 0000000..9a95e54 --- /dev/null +++ b/docs/BUGS.md @@ -0,0 +1,23 @@ +**This file maintains details of the bugs not solved currently.** + +--- + +#### BUG_StringIndex_pread + +StringIndex::randomAcces() + +StringIndex::trySequenceAccess() + +when we insert a triple via ghttp, and query this triple immediately, we will find that answer is wrong. +when we run this query for several times, each time we will get a different answer. +Sometimes, we will get messy code. +With the same reason, if we use bin/gquery db to enter the gquery console, insert and query within this console, we will get similar errors. +Amazingly, if we quit the console and restart, run this query again, we will get the correct answer! + +The problem appears after we replace fread in StringIndex with pread, to support conncurrent queries. +The inherent reason have not been found now. +As a result, we change it back to fread, and use a lock for the StringIndex to block concurrent reads. +This is not supposed to cause a great loss in performance, because all operations to a single disk will be executed sequentially by the disk controller. + +--- +