diff --git a/Database/Database.cpp b/Database/Database.cpp index 6954fa4..8d3ca52 100644 --- a/Database/Database.cpp +++ b/Database/Database.cpp @@ -1513,7 +1513,7 @@ Database::get_query_parse_lock() } int -Database::query(const string _query, ResultSet& _result_set, FILE* _fp, bool update_flag) +Database::query(const string _query, ResultSet& _result_set, FILE* _fp, bool update_flag, bool export_flag) { string dictionary_store_path = this->store_path + "/dictionary.dc"; @@ -1555,6 +1555,11 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp, bool upd //general_evaluation.setStringIndexPointer(&tmpsi); // this->debug_lock.lock(); + if(export_flag) + { + general_evaluation.fp = _fp; + general_evaluation.export_flag = export_flag; + } bool query_ret = general_evaluation.doQuery(); if(!query_ret) { @@ -1705,16 +1710,19 @@ Database::query(const string _query, ResultSet& _result_set, FILE* _fp, bool upd long tv_final = Util::get_cur_time(); cout << "Total time used: " << (tv_final - tv_begin) << "ms." << endl; //if (general_evaluation.needOutputAnswer()) - if (need_output_answer) + if(!export_flag) { - long long ans_num = max((long long)_result_set.ansNum - _result_set.output_offset, 0LL); - if (_result_set.output_limit != -1) - ans_num = min(ans_num, (long long)_result_set.output_limit); - cout << "There has answer: " << ans_num << endl; - cout << "final result is : " << endl; - _result_set.output(_fp); - fprintf(_fp, "\n"); - fflush(_fp); //to empty the output buffer in C (fflush(stdin) not work in GCC) + if (need_output_answer) + { + long long ans_num = max((long long)_result_set.ansNum - _result_set.output_offset, 0LL); + if (_result_set.output_limit != -1) + ans_num = min(ans_num, (long long)_result_set.output_limit); + cout << "There has answer: " << ans_num << endl; + cout << "final result is : " << endl; + _result_set.output(_fp); + fprintf(_fp, "\n"); + fflush(_fp); //to empty the output buffer in C (fflush(stdin) not work in GCC) + } } #ifdef DEBUG diff --git a/Database/Database.h b/Database/Database.h index b6d0fe8..b5eac41 100644 --- a/Database/Database.h +++ b/Database/Database.h @@ -51,7 +51,7 @@ public: bool load(); bool unload(); void clear(); - int query(const string _query, ResultSet& _result_set, FILE* _fp = stdout, bool update_flag = true); + int query(const string _query, ResultSet& _result_set, FILE* _fp = stdout, bool update_flag = true, bool export_flag = false); //1. if subject of _triple doesn't exist, //then assign a new subid, and insert a new SigEntry diff --git a/Database/Strategy.cpp b/Database/Strategy.cpp index c78fa5a..2ccc9c1 100644 --- a/Database/Strategy.cpp +++ b/Database/Strategy.cpp @@ -15,6 +15,8 @@ Strategy::Strategy() this->method = 0; this->kvstore = NULL; this->vstree = NULL; + this->fp = NULL; + this->export_flag = false; //this->prepare_handler(); } @@ -31,7 +33,8 @@ Strategy::Strategy(KVstore* _kvstore, VSTree* _vstree, TYPE_TRIPLE_NUM* _pre2num this->limitID_predicate = _limitID_predicate; this->limitID_literal = _limitID_literal; this->limitID_entity = _limitID_entity; - + this->fp = NULL; + this->export_flag = false; //this->prepare_handler(); } @@ -83,7 +86,10 @@ Strategy::handle(SPARQLquery& _query) } else if ((*iter)->getTripleNum() == 1 && pre_varNum == 1) { - this->method = 4; + if(this->export_flag) + this->method = 6; + else + this->method = 4; } if (this->method < 0 && pre_varNum == 0 && (*iter)->getTripleNum() == 1) //only one triple and no predicates @@ -138,11 +144,22 @@ Strategy::handle(SPARQLquery& _query) case 5: this->handler5(*iter, result_list); break; + case 6: + this->handler6(*iter, result_list); + break; default: cout << "not support this method" << endl; } - cout << "BasicQuery -- Final result size: " << (*iter)->getResultList().size() << endl; + if(this->method == 6) + { + cout << "BasicQuery -- Final result size: " << (*iter)->getResultList()[0][0] << endl; + (*iter)->getResultList().clear(); + } + else + { + cout << "BasicQuery -- Final result size: " << (*iter)->getResultList().size() << endl; + } } #else cout << "this BasicQuery use original query strategy" << endl; @@ -904,3 +921,54 @@ Strategy::handler5(BasicQuery* _bq, vector& _result_list) delete[] id_list; } +void +Strategy::handler6(BasicQuery* _bq, vector& _result_list) +{ + cout << "Special Case:select * and write to stream" << endl; + int varNum = _bq->getVarNum(); + //all variables(not including pre vars) + int total_num = _bq->getTotalVarNum(); + int pre_varNum = _bq->getPreVarNum(); + int selected_pre_var_num = _bq->getSelectedPreVarNum(); + int selected_var_num = _bq->getSelectVarNum(); + Triple triple = _bq->getTriple(0); + int pvpos = _bq->getSelectedPreVarPosition(triple.predicate); + + unsigned* id_list = NULL; + unsigned id_list_len = 0; + _result_list.clear(); + + int svpos = _bq->getSelectedVarPosition(triple.subject); + int ovpos = _bq->getSelectedVarPosition(triple.object); + cout<<"subject: "<limitID_predicate; ++i) + { + TYPE_PREDICATE_ID pid = i; + string p = this->kvstore->getPredicateByID(pid); + string pre = Util::node2string(p.c_str()); + this->kvstore->getsubIDobjIDlistBypreID(pid, id_list, id_list_len); + for (unsigned j = 0; j < id_list_len; j += 2) + { + string s = this->kvstore->getEntityByID(id_list[j]); + string sub = Util::node2string(s.c_str()); + string o; + if(id_list[j + 1] >= Util::LITERAL_FIRST_ID) + o = this->kvstore->getLiteralByID(id_list[j + 1]); + else + o = this->kvstore->getEntityByID(id_list[j + 1]); + string obj = Util::node2string(o.c_str()); + string record = sub + "\t" + pre + "\t" + obj + ".\n"; + fprintf(this->fp, "%s", record.c_str()); + rsize[0] += 1; + } + delete[] id_list; + } + id_list = NULL; + _result_list.push_back(rsize); +} diff --git a/Database/Strategy.h b/Database/Strategy.h index 2d8828e..df80160 100644 --- a/Database/Strategy.h +++ b/Database/Strategy.h @@ -21,6 +21,9 @@ class Strategy { public: + FILE* fp; + bool export_flag; + Strategy(); // if there exists a variable with limited matches in the query, then skip the filter of other // variables as soon as possible @@ -51,6 +54,7 @@ private: void handler3(BasicQuery*, vector&); void handler4(BasicQuery*, vector&); void handler5(BasicQuery*, vector&); + void handler6(BasicQuery*, vector&); //QueryHandler *dispatch; //void prepare_handler(); }; diff --git a/Main/gexport.cpp b/Main/gexport.cpp new file mode 100644 index 0000000..8a4a600 --- /dev/null +++ b/Main/gexport.cpp @@ -0,0 +1,83 @@ +/*============================================================================= +# Filename: gexport.cpp +# Author: suxunbin +# Last Modified: 2019-07-23 15:37 +# Description: export a database to get .nt file +=============================================================================*/ + +#include "../Database/Database.h" +#include "../Util/Util.h" + +using namespace std; + +int +main(int argc, char * argv[]) +{ + Util util; + + string db_name; + string filepath; + if (argc == 1) + { + cout << "You need to input the database name that you want to export." << endl; + return 0; + } + else if (argc == 2) + { + db_name = argv[1]; + int len = db_name.length(); + if (db_name.length() > 3 && db_name.substr(len - 3, 3) == ".db") + { + cout << "The database name can not end with .db" << endl; + return 0; + } + filepath = db_name + ".nt"; + } + else if (argc == 3) + { + db_name = argv[1]; + int len = db_name.length(); + if (db_name.length() > 3 && db_name.substr(len - 3, 3) == ".db") + { + cout << "The database name can not end with .db" << endl; + return 0; + } + filepath = argv[2]; + if(filepath[filepath.length()-1] != '/') + filepath = filepath + "/"; + if(!boost::filesystem::exists(filepath)) + boost::filesystem::create_directories(filepath); + filepath = filepath + db_name + ".nt"; + } + + cout << "gexport..." << endl; + + Database system_db("system"); + system_db.load(); + + string sparql = "ASK WHERE{<" + db_name + "> \"already_built\".}"; + ResultSet ask_rs; + FILE* ask_ofp = stdout; + int ret = system_db.query(sparql, ask_rs, ask_ofp); + if (ask_rs.answer[0][0] == "false") + { + cout << "The database does not exist." << endl; + return 0; + } + + cout << "start exporting the database......" << endl; + Database _db(db_name); + _db.load(); + cout << "finish loading" << endl; + + sparql = "select * where{?x ?y ?z.}"; + ResultSet _rs; + FILE* ofp = fopen(filepath.c_str(), "w"); + ret = _db.query(sparql, _rs, ofp, true, true); + fflush(ofp); + fclose(ofp); + ofp = NULL; + cout << "finish exporting the database." << endl; + + return 0; +} diff --git a/Main/ginit.cpp b/Main/ginit.cpp index 3a60988..277f148 100644 --- a/Main/ginit.cpp +++ b/Main/ginit.cpp @@ -13,10 +13,28 @@ using namespace std; int main(int argc, char * argv[]) { + string op; if(argc > 1) { - if(boost::filesystem::exists("system.db")) - return 0; + op = argv[1]; + if(op == "-make") + { + if(boost::filesystem::exists("system.db")) + return 0; + } + else if(op == "-d") + { + if(argc == 2) + { + cout << "You need to input at least one database name." << endl; + return 0; + } + } + else + { + cout << "The initialization option is not correct." << endl; + return 0; + } } //build system.db @@ -47,7 +65,22 @@ int main(int argc, char * argv[]) _db = new Database(_db_path); _db->load(); string time = Util::get_date_time(); - string sparql = "INSERT DATA { \"" + time + "\".}"; + string sparql = "INSERT DATA { \"" + time + "\"."; + if(argc > 1) + { + op = argv[1]; + if(op == "-d") + { + for(int i=2; i \"already_built\"."; + sparql = sparql + "<" + db_name + "> ."; + sparql = sparql + "<" + db_name + "> \"" + time + "\"."; + } + } + } + sparql = sparql + "}"; ResultSet _rs; FILE* ofp = stdout; string msg; diff --git a/Main/gmonitor.cpp b/Main/gmonitor.cpp index 303b901..9e5fd3b 100644 --- a/Main/gmonitor.cpp +++ b/Main/gmonitor.cpp @@ -1,75 +1,84 @@ -#include -#include -#include -#include +/*============================================================================= +# Filename: gmonitor.cpp +# Author: suxunbin +# Mail: suxunbin@pku.edu.cn +# Last Modified: 2019-07-26 16:00 +# Description: used to show information of the database +=============================================================================*/ + +#include "../Util/Util.h" +#include "../Database/Database.h" using namespace std; -const string UrlEncode(const string& s) +int main(int argc, char * argv[]) { - string ret; - unsigned char *ptr = (unsigned char *)s.c_str(); - ret.reserve(s.length()); + Util util; + string db_name; + if (argc == 1) + { + cout << "You need to input the database name that you want to show." << endl; + return 0; + } + else if (argc == 2) + { + db_name = argv[1]; + int len = db_name.length(); + if (db_name.length() > 3 && db_name.substr(len - 3, 3) == ".db") + { + cout << "The database name can not end with .db" << endl; + return 0; + } + } + else + { + cout << "The number of parameters is not correct." << endl; + return 0; + } - for(int i=0;i \"already_built\".}"; + ResultSet ask_rs; + FILE* ask_ofp = stdout; + int ret = system_db.query(sparql, ask_rs, ask_ofp); + if (ask_rs.answer[0][0] == "false") + { + cout << "The database does not exist." << endl; + return 0; + } + + cout << "start loading the database......" << endl; + Database _db(db_name); + _db.load(); + cout << "finish loading" << endl; + + sparql = "select ?p ?o where{<" + db_name + "> ?p ?o.}"; + ResultSet _rs; + FILE* ofp = stdout; + ret = system_db.query(sparql, _rs, ofp); + string creator; + string built_time; + for (int i = 0; i < _rs.ansNum; i++) { - if((int(ptr[i])==42) || (int(ptr[i])==45) || (int(ptr[i])==46) || (int(ptr[i])==47) || (int(ptr[i])==58) ||(int(ptr[i])==95)) - ret += ptr[i]; - else if((int(ptr[i])>=48) && (int(ptr[i])<=57)) - ret += ptr[i]; - else if((int(ptr[i])>=65) && (int(ptr[i])<=90)) - ret += ptr[i]; - else if((int(ptr[i])>=97) && (int(ptr[i])<=122)) - ret += ptr[i]; - else if(int(ptr[i])==32) - ret += '+'; - else - { - char buf[5]; - memset(buf,0,5); - snprintf(buf,5,"%%%X",ptr[i]); - ret.append(buf); - } + string p = _rs.answer[i][0]; + string o = _rs.answer[i][1]; + if(p == "") + creator = o.substr(1,o.length()-2); + else if(p == "") + built_time = o; } - return ret; -} -size_t OnWriteData(void* buffer, size_t size, size_t nmemb, void* lpVoid) -{ - string* str = dynamic_cast((string *)lpVoid); - if( NULL == str || NULL == buffer ) - return -1; - char* pData = (char*)buffer; - str->append(pData, size * nmemb); - return nmemb; -} -int -main(int argc, char * argv[]) -{ - cout << "argc: " << argc << endl; - cout << "ip: " << argv[1] << endl; - cout << "port: " << argv[2] << endl; - cout << "db_name: " << argv[3] << endl; + unsigned triple_num = _db.getTripleNum(); + unsigned entity_num = _db.getEntityNum(); + unsigned literal_num = _db.getLiteralNum(); + unsigned subject_num = _db.getSubNum(); + unsigned predicate_num = _db.getPreNum(); - string serverIP = string(argv[1]); - string serverPort = string(argv[2]); - string db_name = string(argv[3]); - - const string strUrl = "http://"+serverIP+":"+serverPort+"/?operation=monitor&db_name="+db_name; - string res; - res.clear(); - CURLcode ret; - CURL* curl = curl_easy_init(); - if(NULL == curl) - return CURLE_FAILED_INIT; - curl_easy_setopt(curl, CURLOPT_URL, UrlEncode(strUrl).c_str()); - curl_easy_setopt(curl, CURLOPT_READFUNCTION, NULL); - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, OnWriteData); - curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&res); - curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1); - curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 3); - curl_easy_setopt(curl, CURLOPT_TIMEOUT, 3); - ret = curl_easy_perform(curl); - curl_easy_cleanup(curl); - cout<< res < -#include -#include -#include +/*============================================================================= +# Filename: gshow.cpp +# Author: suxunbin +# Mail: suxunbin@pku.edu.cn +# Last Modified: 2019-07-25 17:00 +# Description: used to show all the databases that have already been built +=============================================================================*/ + +#include "../Util/Util.h" +#include "../Database/Database.h" using namespace std; -const string UrlEncode(const string& s) -{ - string ret; - unsigned char *ptr = (unsigned char *)s.c_str(); - ret.reserve(s.length()); +struct DBInfo{ + public: + string db_name; + string creator; + string built_time; + DBInfo(){ + } + DBInfo(string _db_name){ + db_name = _db_name; + } + ~DBInfo(){ + } +}; - for(int i=0;i=48) && (int(ptr[i])<=57)) - ret += ptr[i]; - else if((int(ptr[i])>=65) && (int(ptr[i])<=90)) - ret += ptr[i]; - else if((int(ptr[i])>=97) && (int(ptr[i])<=122)) - ret += ptr[i]; - else if(int(ptr[i])==32) - ret += '+'; - else + databases[i+1].db_name = _rs.answer[i][0]; + string sparql1 = "select ?p ?o where{" + _rs.answer[i][0] + " ?p ?o.}"; + ResultSet _rs1; + FILE* ofp1 = stdout; + int ret1 = system_db.query(sparql1, _rs1, ofp1); + for (int j = 0; j < _rs1.ansNum; j++) { - char buf[5]; - memset(buf,0,5); - snprintf(buf,5,"%%%X",ptr[i]); - ret.append(buf); + string p = _rs1.answer[j][0]; + string o = _rs1.answer[j][1]; + if(p == "") + databases[i+1].creator = o; + else if(p == "") + databases[i+1].built_time = o; } } - return ret; -} -size_t OnWriteData(void* buffer, size_t size, size_t nmemb, void* lpVoid) -{ - string* str = dynamic_cast((string *)lpVoid); - if( NULL == str || NULL == buffer ) - return -1; - char* pData = (char*)buffer; - str->append(pData, size * nmemb); - return nmemb; -} -int -main(int argc, char * argv[]) -{ - cout << "argc: " << argc << endl; - cout << "ip: " << argv[1] << endl; - cout << "port: " << argv[2] << endl; - - string serverIP = string(argv[1]); - string serverPort = string(argv[2]); - const string strUrl = "http://"+serverIP+":"+serverPort+"/?operation=show"; - string res; - res.clear(); - CURLcode ret; - CURL* curl = curl_easy_init(); - if(NULL == curl) - return CURLE_FAILED_INIT; - curl_easy_setopt(curl, CURLOPT_URL, UrlEncode(strUrl).c_str()); - curl_easy_setopt(curl, CURLOPT_READFUNCTION, NULL); - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, OnWriteData); - curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&res); - curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1); - curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 3); - curl_easy_setopt(curl, CURLOPT_TIMEOUT, 3); - ret = curl_easy_perform(curl); - curl_easy_cleanup(curl); - cout<< "database: "<< res <") + databases[0].creator = o; + else if(p == "") + databases[0].built_time = o; + } + cout<<"\n========================================\n"; + for (int i = 0; i < _rs.ansNum+1; i++) + { + string output = "database: " + Getstr(databases[i].db_name) + "\ncreator: " + Getstr(databases[i].creator) + + "\nbuilt_time: " + databases[i].built_time + "\n========================================\n"; + cout<export_flag) + { + this->strategy.fp = this->fp; + this->strategy.export_flag = this->export_flag; + } this->strategy.handle(sparql_query); long tv_handle = Util::get_cur_time(); printf("after Handle, used %ld ms.\n", tv_handle - tv_fillcand); diff --git a/Query/GeneralEvaluation.h b/Query/GeneralEvaluation.h index 3a1c6f3..03a8ae4 100644 --- a/Query/GeneralEvaluation.h +++ b/Query/GeneralEvaluation.h @@ -42,9 +42,13 @@ class GeneralEvaluation TYPE_ENTITY_LITERAL_ID limitID_literal; TYPE_ENTITY_LITERAL_ID limitID_entity; + public: + FILE* fp; + bool export_flag; + public: GeneralEvaluation(VSTree *_vstree, KVstore *_kvstore, StringIndex *_stringindex, QueryCache *_query_cache, TYPE_TRIPLE_NUM *_pre2num,TYPE_TRIPLE_NUM *_pre2sub, TYPE_TRIPLE_NUM *_pre2obj, TYPE_PREDICATE_ID _limitID_predicate, TYPE_ENTITY_LITERAL_ID _limitID_literal, TYPE_ENTITY_LITERAL_ID _limitID_entity): - vstree(_vstree), kvstore(_kvstore), stringindex(_stringindex), query_cache(_query_cache), pre2num(_pre2num), pre2sub(_pre2sub), pre2obj(_pre2obj), limitID_predicate(_limitID_predicate), limitID_literal(_limitID_literal), limitID_entity(_limitID_entity), temp_result(NULL) + vstree(_vstree), kvstore(_kvstore), stringindex(_stringindex), query_cache(_query_cache), pre2num(_pre2num), pre2sub(_pre2sub), pre2obj(_pre2obj), limitID_predicate(_limitID_predicate), limitID_literal(_limitID_literal), limitID_entity(_limitID_entity), temp_result(NULL), fp(NULL), export_flag(false) {} bool parseQuery(const std::string &_query); diff --git a/makefile b/makefile index a777329..a743009 100644 --- a/makefile +++ b/makefile @@ -113,7 +113,7 @@ inc = -I./tools/libantlr3c-3.4/ -I./tools/libantlr3c-3.4/include #gtest -TARGET = $(exedir)gbuild $(exedir)gserver $(exedir)gserver_backup_scheduler $(exedir)gclient $(exedir)gquery $(exedir)gconsole $(api_java) $(exedir)gadd $(exedir)gsub $(exedir)ghttp $(exedir)gmonitor $(exedir)gshow $(exedir)shutdown $(exedir)ginit $(exedir)gdrop $(testdir)update_test $(testdir)dataset_test +TARGET = $(exedir)gexport $(exedir)gbuild $(exedir)gserver $(exedir)gserver_backup_scheduler $(exedir)gclient $(exedir)gquery $(exedir)gconsole $(api_java) $(exedir)gadd $(exedir)gsub $(exedir)ghttp $(exedir)gmonitor $(exedir)gshow $(exedir)shutdown $(exedir)ginit $(exedir)gdrop $(testdir)update_test $(testdir)dataset_test all: $(TARGET) @echo "Compilation ends successfully!" @@ -126,6 +126,9 @@ all: $(TARGET) #NOTICE:not include g*.o in objfile due to multiple definitions of main() +$(exedir)gexport: $(lib_antlr) $(objdir)gexport.o $(objfile) + $(CC) $(EXEFLAG) -o $(exedir)gexport $(objdir)gexport.o $(objfile) $(library) $(openmp) + $(exedir)gdrop: $(lib_antlr) $(objdir)gdrop.o $(objfile) $(CC) $(EXEFLAG) -o $(exedir)gdrop $(objdir)gdrop.o $(objfile) $(library) $(openmp) @@ -172,6 +175,9 @@ $(testdir)dataset_test: $(lib_antlr) $(objdir)dataset_test.o $(objfile) #objects in Main/ begin +$(objdir)gexport.o: Main/gexport.cpp Database/Database.h Util/Util.h $(lib_antlr) + $(CC) $(CFLAGS) Main/gexport.cpp $(inc) -o $(objdir)gexport.o $(openmp) + $(objdir)gdrop.o: Main/gdrop.cpp Database/Database.h Util/Util.h $(lib_antlr) $(CC) $(CFLAGS) Main/gdrop.cpp $(inc) -o $(objdir)gdrop.o $(openmp) diff --git a/scripts/init.sh b/scripts/init.sh index 605e2cc..d97373b 100755 --- a/scripts/init.sh +++ b/scripts/init.sh @@ -3,5 +3,5 @@ #set -v #initialize system.db -"bin/ginit" "--make" >& /dev/null +"bin/ginit" "-make" >& /dev/null