From da5a84b23ae7c822390346847cfa472b5c8271ac Mon Sep 17 00:00:00 2001 From: qzxx <1181955272@qq.com> Date: Mon, 19 Oct 2015 16:48:02 +0800 Subject: [PATCH] fix log bug;better the gquery console;add test author:zengli --- Database/Database.cpp | 17 ++ Database/Database.h | 7 +- KVstore/Makefile | 4 +- KVstore/storage/Storage.cpp | 3 + KVstore/tree/Tree.cpp | 2 + Makefile | 11 +- Query/BasicQuery.cpp | 1 + Query/ResultSet.cpp | 10 +- README.md | 28 --- main/gquery.cpp | 65 +++++-- main/gtest.cpp | 331 ++++++++++++++++++++++++++++++++++++ 11 files changed, 429 insertions(+), 50 deletions(-) create mode 100644 main/gtest.cpp diff --git a/Database/Database.cpp b/Database/Database.cpp index f8ad1b4..2158b3b 100644 --- a/Database/Database.cpp +++ b/Database/Database.cpp @@ -36,11 +36,28 @@ Database::Database(std::string _name){ } } +void Database::release(FILE* fp0) +{ + fprintf(fp0, "begin to delete DB!\n"); + fflush(fp0); + delete this->vstree; + fprintf(fp0, "ok to delete vstree!\n"); + fflush(fp0); + delete this->kvstore; + fprintf(fp0, "ok to delete kvstore!\n"); + fflush(fp0); + fclose(fp_debug); + fp_debug = NULL; //debug: when multiple databases + fprintf(fp0, "ok to delete DB!\n"); + fflush(fp0); +} + Database::~Database() { delete this->vstree; delete this->kvstore; fclose(fp_debug); + fp_debug = NULL; //debug: when multiple databases } bool Database::load() diff --git a/Database/Database.h b/Database/Database.h index c218546..7802523 100644 --- a/Database/Database.h +++ b/Database/Database.h @@ -32,10 +32,10 @@ class Database{ public: - static const bool debug_1 = false; - static const bool debug_2 = false; + static const bool debug_1 = true; + static const bool debug_2 = true; static const bool only_sub2idpre2id = true; - static const bool debug_vstree = false; + static const bool debug_vstree = true; static const int internal = 100*1000; static FILE* fp_debug; static void log(std::string _str); @@ -56,6 +56,7 @@ public: static const int STRING_MODE = 1; static const int ID_MODE = 2; Database(std::string _name); + void release(FILE* fp0); ~Database(); bool load(); diff --git a/KVstore/Makefile b/KVstore/Makefile index 8588e10..935f7aa 100644 --- a/KVstore/Makefile +++ b/KVstore/Makefile @@ -8,7 +8,7 @@ all: $(obj) $(objdir)Tree.o: tree/Tree.cpp $(CC) $(CFLAGS) tree/Tree.cpp -o $(objdir)Tree.o $(objdir)Storage.o: storage/Storage.cpp - $(CC) $(CFLAGS) storage/Storage.cpp -o $(objdir)Storage.o + $(CC) $(CFLAGS) storage/Storage.cpp -o $(objdir)Storage.o -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE $(objdir)Node.o: node/Node.cpp $(CC) $(CFLAGS) node/Node.cpp -o $(objdir)Node.o $(objdir)IntlNode.o: node/IntlNode.cpp @@ -24,7 +24,7 @@ $(objdir)Hash.o: hash/Hash.cpp $(objdir)Heap.o: heap/Heap.cpp $(CC) $(CFLAGS) heap/Heap.cpp -o $(objdir)Heap.o $(objdir)RangeValue.o: rangevalue/RangeValue.cpp - $(CC) $(CFLAGS) rangevalue/RangeValue.cpp -o $(objdir)RangeValue.o + $(CC) $(CFLAGS) rangevalue/RangeValue.cpp -o $(objdir)RangeValue.o -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE .PHONY: clean clean: #nothing to do now diff --git a/KVstore/storage/Storage.cpp b/KVstore/storage/Storage.cpp index 1263c19..0da6c4c 100644 --- a/KVstore/storage/Storage.cpp +++ b/KVstore/storage/Storage.cpp @@ -610,6 +610,7 @@ Storage::handler(unsigned _needmem) //>0 Storage::~Storage() { //release heap and freelist... + printf("now to release the knstore!\n"); BlockInfo* bp = this->freelist; BlockInfo* next; while(bp != NULL) @@ -618,7 +619,9 @@ Storage::~Storage() delete bp; bp = next; } + printf("already empty the freelist!\n"); delete this->minheap; + printf("already empty the buffer heap!\n"); fclose(this->treefp); #ifdef DEBUG fclose(Util::logsfp); diff --git a/KVstore/tree/Tree.cpp b/KVstore/tree/Tree.cpp index 2f7d9ac..24df3c1 100644 --- a/KVstore/tree/Tree.cpp +++ b/KVstore/tree/Tree.cpp @@ -495,6 +495,7 @@ Tree::range_query(const TBstr* _key1, const TBstr* _key2) bool Tree::save() //save the whole tree to disk { + printf("now to save tree!\n"); if(TSM->writeTree(this->root)) return true; else @@ -520,6 +521,7 @@ Tree::~Tree() { //delete VALUES; delete TSM; + printf("already empty the buffer, now to delete all nodes in tree!\n"); //recursively delete each Node release(root); } diff --git a/Makefile b/Makefile index 7ff0512..8ee573b 100755 --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ objfile=$(objdir)Bstr.o $(objdir)Database.o $(objdir)KVstore.o $(objdir)SPARQLqu inc=-I./tools/libantlr3c-3.4/ -I./tools/libantlr3c-3.4/include -all: lib_antlr btree gload gquery gserver gclient +all: lib_antlr btree gload gquery gserver gclient gtest btree: $(MAKE) -C KVstore @@ -22,13 +22,15 @@ gload: $(objdir)gload.o $(objfile) $(CC) -g -o gload $(objdir)gload.o $(objfile) lib/libantlr.a gquery: $(objdir)gquery.o $(objfile) - $(CC) -g -o gquery $(objdir)gquery.o $(objfile) lib/libantlr.a + $(CC) -lreadline -ltermcap -g -o gquery $(objdir)gquery.o $(objfile) lib/libantlr.a gserver: $(objdir)gserver.o $(objfile) $(CC) -g -o gserver $(objdir)gserver.o $(objfile) lib/libantlr.a gclient: $(objdir)gclient.o $(objfile) $(CC) -g -o gclient $(objdir)gclient.o $(objfile) lib/libantlr.a +gtest: $(objdir)gtest.o $(objfile) + $(CC) -g -o gtest $(objdir)gtest.o $(objfile) lib/libantlr.a $(objdir)gload.o: main/gload.cpp $(CC) $(CFLAGS) main/gload.cpp $(inc) -L./lib lib/libantlr.a -o $(objdir)gload.o @@ -41,6 +43,8 @@ $(objdir)gserver.o: main/gserver.cpp $(objdir)gclient.o: main/gclient.cpp $(CC) $(CFLAGS) main/gclient.cpp $(inc) -o $(objdir)gclient.o +$(objdir)gtest.o: main/gtest.cpp + $(CC) $(CFLAGS) main/gtest.cpp $(inc) -o $(objdir)gtest.o $(objdir)Bstr.o: Bstr/Bstr.cpp Bstr/Bstr.h $(CC) $(CFLAGS) Bstr/Bstr.cpp $(inc) -o $(objdir)Bstr.o @@ -109,5 +113,6 @@ lib_antlr: clean: $(MAKE) -C KVstore clean - rm -rf gload gquery gserver gclient $(objdir)/*.o + rm -rf gload gquery gtest gserver gclient $(objdir)/*.o lib/libantlr.a + rm -rf *.log *.nt tools/libantlr3c-3.4 diff --git a/Query/BasicQuery.cpp b/Query/BasicQuery.cpp index e8073e5..6766066 100644 --- a/Query/BasicQuery.cpp +++ b/Query/BasicQuery.cpp @@ -314,6 +314,7 @@ void BasicQuery::encodeBasicQuery(KVstore* _p_kvstore, const std::vectorvar_name[i] << ", " << i << " " << this->var_str2id[this->var_name[i]] << "]\t"; } cout << endl; + cout << "before new IDList!" << endl; //just for debug this->candidate_list = new IDList[this->graph_var_num]; diff --git a/Query/ResultSet.cpp b/Query/ResultSet.cpp index ed04bd6..be2d6a7 100644 --- a/Query/ResultSet.cpp +++ b/Query/ResultSet.cpp @@ -46,7 +46,7 @@ string ResultSet::to_str() { if(this->ansNum == 0) { - return "[empty result]"; + return "[empty result]\n"; } @@ -63,15 +63,17 @@ string ResultSet::to_str() for(int i = 0; i < this->ansNum; i ++) { + //printf("to_str: well!\n"); //just for debug! _buf << this->answer[i][0]; for(int j = 1; j < this->select_var_num; j ++) { - //_buf << "\t" << this->answer[i][j]; - _buf << " " << this->answer[i][j]; + //there may be ' ' in spo, but no '\t' + _buf << "\t" << this->answer[i][j]; + //_buf << " " << this->answer[i][j]; } _buf << "\n"; } - + //printf("to_str: ends!\n"); //just for debug! return _buf.str(); } diff --git a/README.md b/README.md index ea213c9..1bec306 100644 --- a/README.md +++ b/README.md @@ -135,32 +135,4 @@ Note that each command ends with ";". ####5. API We provide JAVA and C++ API for accessing gStore now. Please refer to example codes in `api/cpp/example` and `api/java/example`. -### System Update Log -####Oct 15, 2015. -waiting to add - -####Sep 25, 2015. - -We implement the version of BTree, and replace the old one. - -After testing on DBpedia, LUBM, and WatDiv benchmark, we conclude that the new BTree performs more efficient than -the old version. For the same triple file, the new version spends shorter time on executing gload command. - -Besides, the new version can handle the long literal objects ocasion efficiently, while triples whose object's length exceeds 4096 bytes result in frequent inefficent split operations on the old version BTree. - -####Feb 2, 2015 - -We modify the RDF parser and SPARQL parser. - -Under the new RDF parser, we also redesign the encode strategy, which reduces RDF file scanning times. - -Now we can parse the standard SPARQL v1.1 grammar correctly, and can support basic graph pattern(BGP) SPARQL queries written by this standard grammar. - -####Dec 11, 2014. - -We add API for C/CPP and JAVA. - -####Nov 20, 2014. - -We share our gStore2.0 code as an open-source project under BSD license on github. diff --git a/main/gquery.cpp b/main/gquery.cpp index 570a2ba..e4e0702 100644 --- a/main/gquery.cpp +++ b/main/gquery.cpp @@ -4,13 +4,20 @@ * Created on: 2014-7-31 * Author: liyouhuan */ -#include +#include +#include +#include +#include +#include +#include #include "../Database/Database.h" #include "../util/util.h" using namespace std; -std::string getQueryFromFile(const char* _file_path) +//WARN:cannot support soft links! +std::string +getQueryFromFile(const char* _file_path) { char buf[10000]; std::string query_file; @@ -36,10 +43,12 @@ std::string getQueryFromFile(const char* _file_path) /* * [0]./gquery [1]db_folder_path */ -int main(int argc, char * argv[]) +int +main(int argc, char * argv[]) { cout << "gquery..." << endl; - if(argc < 2){ + if(argc < 2) + { cerr << "error: lack of DB_store to be queried" << endl; return 0; } @@ -99,17 +108,52 @@ int main(int argc, char * argv[]) } // read query file path from terminal. + // BETTER: sighandler ctrl+C/D/Z + char *buf, prompt[] = "gsql>"; + //const int commands_num = 3; + char commands[][20] = {"help", "quit", "sparql"}; + printf("Type `help` for information of all commands\n"); + rl_bind_key('\t', rl_complete); while(true) { - cout << "please input query file path:" << endl; - + //cout << "please input query file path:" << endl; + buf = readline(prompt); + if(buf == NULL) + continue; + else + add_history(buf); + if(strcmp(buf, "help") == 0) + { + //print commands message + printf("help - print commands message\n"); + printf("quit - quit the console normally\n"); + printf("sparql - load query from the second argument\n"); + continue; + } + else if(strcmp(buf, "quit") == 0) + break; + else if(strncmp(buf, "sparql", 6) != 0) + { + printf("unknown commands\n"); + continue; + } std::string query_file; - cin >> query_file; + //cin >> query_file; - string query = getQueryFromFile(query_file.c_str()); + //string query = getQueryFromFile(query_file.c_str()); + //BETTER:build a parser for this console + char* p = buf + strlen(buf) - 1; + while(*p == ' ' || *p == '\t') //set the end of path + p--; + *(p+1) = '\0'; + p = buf + 6; + while(*p == ' ' || *p == '\t') //acquire the start of path + p++; + string query = getQueryFromFile(p); if (query.empty()) { + free(buf); continue; } @@ -121,8 +165,9 @@ int main(int argc, char * argv[]) _db.query(query, _rs); //test... -// std::string answer_file = query_file+".out"; -// util::save_to_file(answer_file.c_str(), _rs.to_str()); + //std::string answer_file = query_file+".out"; + //util::save_to_file(answer_file.c_str(), _rs.to_str()); + free(buf); } return 0; diff --git a/main/gtest.cpp b/main/gtest.cpp new file mode 100644 index 0000000..4c8e158 --- /dev/null +++ b/main/gtest.cpp @@ -0,0 +1,331 @@ +/*============================================================================= +# Filename: gtest.cpp +# Author: syzz +# Mail: 1181955272@qq.com +# Last Modified: 2015-09-02 00:04 +# Description: load index once and query, there are several ways to use this program: +1. ./gtest test all datasets and corresponding queries +2. ./gtest --help simplified as -h, will print the help message +3. ./gtest -f DS_PATH load/test a specified dataset, with all corresponding queries +4. ./gtest -d FD_PATH load/test a dataset folder(like WatDiv/), with all corresponding queries +5. ./gtest -q DB_PATH q1 q2... test a loaded database with given queries(no limit to db and query) +=============================================================================*/ + +#include +#include +#include +#include +//#include +#include +#include +#include "../Database/Database.h" +#include "../util/util.h" + +using namespace std; + +#define NUM 4 + +char line1[] = "--------------------------------------------------"; +char line2[] = "##################################################"; +string path = "/media/wip/common/data/"; +string db[NUM] = {"WatDiv/", "LUBM/", "DBpedia/", "Yago/"}; + +//not achieved, so do not use +void +build_logs() +{ + system("rm -rf result.log/"); + system("rm -rf time.log/"); + system("mkdir result.log"); + system("mkdir time.log"); + int i; + string cmd; + for(i = 0; i < NUM; ++i) + { + cmd = string("mkdir result.log/\"") + db[i] + "\""; //in case of special characters like & + system(cmd.c_str()); + cmd = string("mkdir time.log/\"") + db[i] + "\""; + system(cmd.c_str()); + } +} + +bool //if satisfy suffix; case sensitive +judge(const char* p, const char* p0) //both not empty +{ + int len = strlen(p), len0 = strlen(p0); + while(len0 > 0 && len > 0) + { + len--; + len0--; + if(p[len] != p0[len0]) + return false; + } + return true; +} + +string +getQueryFromFile(const char* path) +{ + char buf[10000]; + ifstream fin(path); + if(!fin) + { + printf("can not open %s\n", path); + return ""; + } + memset(buf, 0, sizeof(buf)); + stringstream ss; + while(!fin.eof()) + { + fin.getline(buf, 9999); + ss << buf << "\n"; + } + fin.close(); + return ss.str(); +} + +void +help() +{ + printf("\ + /*=============================================================================\n\ +# Filename: gtest.cpp\n\ +# Author: syzz\n\ +# Mail: 1181955272@qq.com\n\ +# Description: load index once and query, there are several ways to use this program:\n\ +1. ./gtest test all datasets and corresponding queries\n\ +2. ./gtest --help simplified as -h, will print the help message\n\ +3. ./gtest -f DS_PATH load/test a specified dataset, with all corresponding queries\n\ +4. ./gtest -d FD_PATH load/test a dataset folder(like WatDiv/), with all corresponding queries\n\ +5. ./gtest -q DB_PATH q1 q2... test a loaded database with given queries(no limit to db and query)\n\ +=============================================================================*/\n\ + "); +} + +FILE *fp0 = NULL, *fp1 = NULL, *fp2 = NULL; + +void //single dataset, single query +runSS(Database* _db, string qf) //given a query file +{ + string query = getQueryFromFile(qf.c_str()); + if(query.empty()) + { + fprintf(fp0, "this query is empty:\n%s\n", qf.c_str()); + fflush(fp0); + return; + //continue; + } + fprintf(fp0, "%s\n", qf.c_str()); + fflush(fp0); + //getchar();getchar(); + ResultSet _rs; + long tv_begin, tv_final; + tv_begin = util::get_cur_time(); + //fprintf(fp0, "begin to query!\n"); + //fflush(fp0); + _db->query(query, _rs); + //fprintf(fp0, "finish this query!\n"); + tv_final = util::get_cur_time(); + //print to logs + fprintf(fp1, "%s\n%ld\n%s\n", qf.c_str(), tv_final - tv_begin, line1);//time is standarded as ms + fprintf(fp2, "%s\n%s%s\n", qf.c_str(), _rs.to_str().c_str(), line1); + fflush(fp0); + fflush(fp1); + fflush(fp2); +} + +void //single dataset, multi queries +runSM(Database* _db, string qd) //given a query directory +{ + DIR* dp2; + struct dirent* p2; + dp2 = opendir(qd.c_str()); + //finish all queries + while((p2 = readdir(dp2)) != NULL) + { + if(!judge(p2->d_name, ".sql")) + continue; + string file = qd + string(p2->d_name); + runSS(_db, file); + } + fprintf(fp0, "finish all queries!\n"); + fflush(fp0); + closedir(dp2); + fprintf(fp0, "ok to close dp2!\n"); +} + +void //multi datasets, multi queries +runMM(string df) //given dataset folder, end with / +{ + DIR *dp1; + struct dirent *p1; + string s1, s2, cmd; + Database* _db; + s1 = df + string("database/"); + s2 = df + string("query/"); + dp1 = opendir(s1.c_str()); + while((p1 = readdir(dp1)) != NULL) + { + if(!judge(p1->d_name, ".nt")) + continue; + system("echo 3 > /proc/sys/vm/drop_caches"); + //build the database index first + string db_folder = s1 + string(p1->d_name); + fprintf(fp0, "%s\n", db_folder.c_str()); + cmd = string("./gload \"") + string(p1->d_name) + "\" \"" + db_folder + "\" > \"gload_" + string(p1->d_name) + ".log\""; //NOTICE:\" is used to ensure file name with &(etc...) will work ok! + fprintf(fp0, "%s\n", cmd.c_str()); + system(cmd.c_str()); + fprintf(fp0, "ok to load database!\n"); + //getchar();getchar(); + //then load the index + _db = new Database(string(p1->d_name)); + _db->load(); + fprintf(fp0, "ok to load index!\n"); + //print to logs + fprintf(fp1, "%s\n%s\n", p1->d_name, line1); + fprintf(fp2, "%s\n%s\n", p1->d_name, line1); + + runSM(_db, s2); + + fflush(fp0); + fprintf(fp1, "%s\n", line2); + fprintf(fp2, "%s\n", line2); + //NOTICE:may double free due to ~Database + //to remove one when finished, should clean firstly like ~Database() + delete _db;//_db->release(fp0); + cmd = string("rm -rf \"") + string(p1->d_name) + string("\""); + system(cmd.c_str()); + fprintf(fp0, "ok to remove %s\n", p1->d_name); + fflush(fp0); + fflush(fp1); + fflush(fp2); + } + closedir(dp1); +} + +void +clean() +{ + //it is ok to check like this here! + //in other situations, notice that fp is not null after closed! + if(fp0 != NULL) + { + fclose(fp0); + fp0 = NULL; + } + if(fp1 != NULL) + { + fclose(fp1); + fp1 = NULL; + } + if(fp2 != NULL) + { + fclose(fp2); + fp2 = NULL; + } +} + +void +EXIT(int ret) +{ + clean(); + exit(ret); +} + +int +main(int argc, const char* argv[]) +{ + //BETTER: enable user to assign the files for log + int i, j; + fp0 = fopen("run.log", "w+"); + if(fp0 == NULL) + { + printf("open error!\n"); + EXIT(1); + } + //build_logs(); + fp1 = fopen("time.log", "w+"); + if(fp1 == NULL) + { + printf("open error!\n"); + EXIT(1); + } + fp2 = fopen("result.log", "w+"); + if(fp2 == NULL) + { + printf("open error!\n"); + EXIT(1); + } + + switch(argc) + { + case 1: + help(); + printf("this command will do a complete test for Gstore, do you want to continue?YN\n"); + char c; + i = 0; + while(scanf("%c", &c)) + { + if(c == 'Y' || c == 'y') + { + for(j = 0; j < 4; ++j) + { + runMM(path+db[j]); + } + break; + } + else if(c == 'N' || c == 'n') + EXIT(0); + i++; + if(i > 10) //try no more than 10 times + EXIT(1); + } + break; + case 2: + if(strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0) + { + help(); + EXIT(0); + } + else + EXIT(1); + case 3: + if(strcmp(argv[1], "-f") == 0) //this file must be in db[]/database/, end with ".nt" + { + //TODO:use judge to check if is .nt file + int k = strlen(argv[2]) - 1; + string db = string(argv[2]); + while(k >= 0) + { + if(db[k] == '/') + { + break; + } + } + string s = db.substr(0, k+1) + "../query/"; + //DIR* dp = opendir(s.c_str()); + //TODO: gload the dataset and build a new function! + printf("not achieved!\n"); + } + else if(strcmp(argv[1], "-d") == 0) + { + runMM(string(argv[2]) + "/"); + } + else + EXIT(1); + break; + default: //> 3 + if(strcmp(argv[1], "-q") == 0) + { + //TODO: user must gload first? + printf("not achieved!\n"); + } + else + EXIT(1); + break; + } + + clean(); + return 0; +} +