fix log bug;better the gquery console;add test

author:zengli
This commit is contained in:
qzxx 2015-10-19 16:48:02 +08:00
parent 7e497c57d9
commit da5a84b23a
11 changed files with 429 additions and 50 deletions

View File

@ -36,11 +36,28 @@ Database::Database(std::string _name){
}
}
void Database::release(FILE* fp0)
{
fprintf(fp0, "begin to delete DB!\n");
fflush(fp0);
delete this->vstree;
fprintf(fp0, "ok to delete vstree!\n");
fflush(fp0);
delete this->kvstore;
fprintf(fp0, "ok to delete kvstore!\n");
fflush(fp0);
fclose(fp_debug);
fp_debug = NULL; //debug: when multiple databases
fprintf(fp0, "ok to delete DB!\n");
fflush(fp0);
}
Database::~Database()
{
delete this->vstree;
delete this->kvstore;
fclose(fp_debug);
fp_debug = NULL; //debug: when multiple databases
}
bool Database::load()

View File

@ -32,10 +32,10 @@ class Database{
public:
static const bool debug_1 = false;
static const bool debug_2 = false;
static const bool debug_1 = true;
static const bool debug_2 = true;
static const bool only_sub2idpre2id = true;
static const bool debug_vstree = false;
static const bool debug_vstree = true;
static const int internal = 100*1000;
static FILE* fp_debug;
static void log(std::string _str);
@ -56,6 +56,7 @@ public:
static const int STRING_MODE = 1;
static const int ID_MODE = 2;
Database(std::string _name);
void release(FILE* fp0);
~Database();
bool load();

View File

@ -8,7 +8,7 @@ all: $(obj)
$(objdir)Tree.o: tree/Tree.cpp
$(CC) $(CFLAGS) tree/Tree.cpp -o $(objdir)Tree.o
$(objdir)Storage.o: storage/Storage.cpp
$(CC) $(CFLAGS) storage/Storage.cpp -o $(objdir)Storage.o
$(CC) $(CFLAGS) storage/Storage.cpp -o $(objdir)Storage.o -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE
$(objdir)Node.o: node/Node.cpp
$(CC) $(CFLAGS) node/Node.cpp -o $(objdir)Node.o
$(objdir)IntlNode.o: node/IntlNode.cpp
@ -24,7 +24,7 @@ $(objdir)Hash.o: hash/Hash.cpp
$(objdir)Heap.o: heap/Heap.cpp
$(CC) $(CFLAGS) heap/Heap.cpp -o $(objdir)Heap.o
$(objdir)RangeValue.o: rangevalue/RangeValue.cpp
$(CC) $(CFLAGS) rangevalue/RangeValue.cpp -o $(objdir)RangeValue.o
$(CC) $(CFLAGS) rangevalue/RangeValue.cpp -o $(objdir)RangeValue.o -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE
.PHONY: clean
clean:
#nothing to do now

View File

@ -610,6 +610,7 @@ Storage::handler(unsigned _needmem) //>0
Storage::~Storage()
{
//release heap and freelist...
printf("now to release the knstore!\n");
BlockInfo* bp = this->freelist;
BlockInfo* next;
while(bp != NULL)
@ -618,7 +619,9 @@ Storage::~Storage()
delete bp;
bp = next;
}
printf("already empty the freelist!\n");
delete this->minheap;
printf("already empty the buffer heap!\n");
fclose(this->treefp);
#ifdef DEBUG
fclose(Util::logsfp);

View File

@ -495,6 +495,7 @@ Tree::range_query(const TBstr* _key1, const TBstr* _key2)
bool
Tree::save() //save the whole tree to disk
{
printf("now to save tree!\n");
if(TSM->writeTree(this->root))
return true;
else
@ -520,6 +521,7 @@ Tree::~Tree()
{
//delete VALUES;
delete TSM;
printf("already empty the buffer, now to delete all nodes in tree!\n");
//recursively delete each Node
release(root);
}

View File

@ -14,7 +14,7 @@ objfile=$(objdir)Bstr.o $(objdir)Database.o $(objdir)KVstore.o $(objdir)SPARQLqu
inc=-I./tools/libantlr3c-3.4/ -I./tools/libantlr3c-3.4/include
all: lib_antlr btree gload gquery gserver gclient
all: lib_antlr btree gload gquery gserver gclient gtest
btree:
$(MAKE) -C KVstore
@ -22,13 +22,15 @@ gload: $(objdir)gload.o $(objfile)
$(CC) -g -o gload $(objdir)gload.o $(objfile) lib/libantlr.a
gquery: $(objdir)gquery.o $(objfile)
$(CC) -g -o gquery $(objdir)gquery.o $(objfile) lib/libantlr.a
$(CC) -lreadline -ltermcap -g -o gquery $(objdir)gquery.o $(objfile) lib/libantlr.a
gserver: $(objdir)gserver.o $(objfile)
$(CC) -g -o gserver $(objdir)gserver.o $(objfile) lib/libantlr.a
gclient: $(objdir)gclient.o $(objfile)
$(CC) -g -o gclient $(objdir)gclient.o $(objfile) lib/libantlr.a
gtest: $(objdir)gtest.o $(objfile)
$(CC) -g -o gtest $(objdir)gtest.o $(objfile) lib/libantlr.a
$(objdir)gload.o: main/gload.cpp
$(CC) $(CFLAGS) main/gload.cpp $(inc) -L./lib lib/libantlr.a -o $(objdir)gload.o
@ -41,6 +43,8 @@ $(objdir)gserver.o: main/gserver.cpp
$(objdir)gclient.o: main/gclient.cpp
$(CC) $(CFLAGS) main/gclient.cpp $(inc) -o $(objdir)gclient.o
$(objdir)gtest.o: main/gtest.cpp
$(CC) $(CFLAGS) main/gtest.cpp $(inc) -o $(objdir)gtest.o
$(objdir)Bstr.o: Bstr/Bstr.cpp Bstr/Bstr.h
$(CC) $(CFLAGS) Bstr/Bstr.cpp $(inc) -o $(objdir)Bstr.o
@ -109,5 +113,6 @@ lib_antlr:
clean:
$(MAKE) -C KVstore clean
rm -rf gload gquery gserver gclient $(objdir)/*.o
rm -rf gload gquery gtest gserver gclient $(objdir)/*.o lib/libantlr.a
rm -rf *.log *.nt tools/libantlr3c-3.4

View File

@ -314,6 +314,7 @@ void BasicQuery::encodeBasicQuery(KVstore* _p_kvstore, const std::vector<std::st
cout << "[" << this->var_name[i] << ", " << i << " " << this->var_str2id[this->var_name[i]] << "]\t";
}
cout << endl;
cout << "before new IDList!" << endl; //just for debug
this->candidate_list = new IDList[this->graph_var_num];

View File

@ -46,7 +46,7 @@ string ResultSet::to_str()
{
if(this->ansNum == 0)
{
return "[empty result]";
return "[empty result]\n";
}
@ -63,15 +63,17 @@ string ResultSet::to_str()
for(int i = 0; i < this->ansNum; i ++)
{
//printf("to_str: well!\n"); //just for debug!
_buf << this->answer[i][0];
for(int j = 1; j < this->select_var_num; j ++)
{
//_buf << "\t" << this->answer[i][j];
_buf << " " << this->answer[i][j];
//there may be ' ' in spo, but no '\t'
_buf << "\t" << this->answer[i][j];
//_buf << " " << this->answer[i][j];
}
_buf << "\n";
}
//printf("to_str: ends!\n"); //just for debug!
return _buf.str();
}

View File

@ -135,32 +135,4 @@ Note that each command ends with ";".
####5. API
We provide JAVA and C++ API for accessing gStore now. Please refer to example codes in `api/cpp/example` and `api/java/example`.
### System Update Log
####Oct 15, 2015.
waiting to add
####Sep 25, 2015.
We implement the version of BTree, and replace the old one.
After testing on DBpedia, LUBM, and WatDiv benchmark, we conclude that the new BTree performs more efficient than
the old version. For the same triple file, the new version spends shorter time on executing gload command.
Besides, the new version can handle the long literal objects ocasion efficiently, while triples whose object's length exceeds 4096 bytes result in frequent inefficent split operations on the old version BTree.
####Feb 2, 2015
We modify the RDF parser and SPARQL parser.
Under the new RDF parser, we also redesign the encode strategy, which reduces RDF file scanning times.
Now we can parse the standard SPARQL v1.1 grammar correctly, and can support basic graph pattern(BGP) SPARQL queries written by this standard grammar.
####Dec 11, 2014.
We add API for C/CPP and JAVA.
####Nov 20, 2014.
We share our gStore2.0 code as an open-source project under BSD license on github.

View File

@ -4,13 +4,20 @@
* Created on: 2014-7-31
* Author: liyouhuan
*/
#include<iostream>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <readline/readline.h>
#include <readline/history.h>
#include "../Database/Database.h"
#include "../util/util.h"
using namespace std;
std::string getQueryFromFile(const char* _file_path)
//WARN:cannot support soft links!
std::string
getQueryFromFile(const char* _file_path)
{
char buf[10000];
std::string query_file;
@ -36,10 +43,12 @@ std::string getQueryFromFile(const char* _file_path)
/*
* [0]./gquery [1]db_folder_path
*/
int main(int argc, char * argv[])
int
main(int argc, char * argv[])
{
cout << "gquery..." << endl;
if(argc < 2){
if(argc < 2)
{
cerr << "error: lack of DB_store to be queried" << endl;
return 0;
}
@ -99,17 +108,52 @@ int main(int argc, char * argv[])
}
// read query file path from terminal.
// BETTER: sighandler ctrl+C/D/Z
char *buf, prompt[] = "gsql>";
//const int commands_num = 3;
char commands[][20] = {"help", "quit", "sparql"};
printf("Type `help` for information of all commands\n");
rl_bind_key('\t', rl_complete);
while(true)
{
cout << "please input query file path:" << endl;
//cout << "please input query file path:" << endl;
buf = readline(prompt);
if(buf == NULL)
continue;
else
add_history(buf);
if(strcmp(buf, "help") == 0)
{
//print commands message
printf("help - print commands message\n");
printf("quit - quit the console normally\n");
printf("sparql - load query from the second argument\n");
continue;
}
else if(strcmp(buf, "quit") == 0)
break;
else if(strncmp(buf, "sparql", 6) != 0)
{
printf("unknown commands\n");
continue;
}
std::string query_file;
cin >> query_file;
//cin >> query_file;
string query = getQueryFromFile(query_file.c_str());
//string query = getQueryFromFile(query_file.c_str());
//BETTER:build a parser for this console
char* p = buf + strlen(buf) - 1;
while(*p == ' ' || *p == '\t') //set the end of path
p--;
*(p+1) = '\0';
p = buf + 6;
while(*p == ' ' || *p == '\t') //acquire the start of path
p++;
string query = getQueryFromFile(p);
if (query.empty())
{
free(buf);
continue;
}
@ -121,8 +165,9 @@ int main(int argc, char * argv[])
_db.query(query, _rs);
//test...
// std::string answer_file = query_file+".out";
// util::save_to_file(answer_file.c_str(), _rs.to_str());
//std::string answer_file = query_file+".out";
//util::save_to_file(answer_file.c_str(), _rs.to_str());
free(buf);
}
return 0;

331
main/gtest.cpp Normal file
View File

@ -0,0 +1,331 @@
/*=============================================================================
# Filename: gtest.cpp
# Author: syzz
# Mail: 1181955272@qq.com
# Last Modified: 2015-09-02 00:04
# Description: load index once and query, there are several ways to use this program:
1. ./gtest test all datasets and corresponding queries
2. ./gtest --help simplified as -h, will print the help message
3. ./gtest -f DS_PATH load/test a specified dataset, with all corresponding queries
4. ./gtest -d FD_PATH load/test a dataset folder(like WatDiv/), with all corresponding queries
5. ./gtest -q DB_PATH q1 q2... test a loaded database with given queries(no limit to db and query)
=============================================================================*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <dirent.h>
//#include <unistd.h>
#include <string>
#include <sys/time.h>
#include "../Database/Database.h"
#include "../util/util.h"
using namespace std;
#define NUM 4
char line1[] = "--------------------------------------------------";
char line2[] = "##################################################";
string path = "/media/wip/common/data/";
string db[NUM] = {"WatDiv/", "LUBM/", "DBpedia/", "Yago/"};
//not achieved, so do not use
void
build_logs()
{
system("rm -rf result.log/");
system("rm -rf time.log/");
system("mkdir result.log");
system("mkdir time.log");
int i;
string cmd;
for(i = 0; i < NUM; ++i)
{
cmd = string("mkdir result.log/\"") + db[i] + "\""; //in case of special characters like &
system(cmd.c_str());
cmd = string("mkdir time.log/\"") + db[i] + "\"";
system(cmd.c_str());
}
}
bool //if satisfy suffix; case sensitive
judge(const char* p, const char* p0) //both not empty
{
int len = strlen(p), len0 = strlen(p0);
while(len0 > 0 && len > 0)
{
len--;
len0--;
if(p[len] != p0[len0])
return false;
}
return true;
}
string
getQueryFromFile(const char* path)
{
char buf[10000];
ifstream fin(path);
if(!fin)
{
printf("can not open %s\n", path);
return "";
}
memset(buf, 0, sizeof(buf));
stringstream ss;
while(!fin.eof())
{
fin.getline(buf, 9999);
ss << buf << "\n";
}
fin.close();
return ss.str();
}
void
help()
{
printf("\
/*=============================================================================\n\
# Filename: gtest.cpp\n\
# Author: syzz\n\
# Mail: 1181955272@qq.com\n\
# Description: load index once and query, there are several ways to use this program:\n\
1. ./gtest test all datasets and corresponding queries\n\
2. ./gtest --help simplified as -h, will print the help message\n\
3. ./gtest -f DS_PATH load/test a specified dataset, with all corresponding queries\n\
4. ./gtest -d FD_PATH load/test a dataset folder(like WatDiv/), with all corresponding queries\n\
5. ./gtest -q DB_PATH q1 q2... test a loaded database with given queries(no limit to db and query)\n\
=============================================================================*/\n\
");
}
FILE *fp0 = NULL, *fp1 = NULL, *fp2 = NULL;
void //single dataset, single query
runSS(Database* _db, string qf) //given a query file
{
string query = getQueryFromFile(qf.c_str());
if(query.empty())
{
fprintf(fp0, "this query is empty:\n%s\n", qf.c_str());
fflush(fp0);
return;
//continue;
}
fprintf(fp0, "%s\n", qf.c_str());
fflush(fp0);
//getchar();getchar();
ResultSet _rs;
long tv_begin, tv_final;
tv_begin = util::get_cur_time();
//fprintf(fp0, "begin to query!\n");
//fflush(fp0);
_db->query(query, _rs);
//fprintf(fp0, "finish this query!\n");
tv_final = util::get_cur_time();
//print to logs
fprintf(fp1, "%s\n%ld\n%s\n", qf.c_str(), tv_final - tv_begin, line1);//time is standarded as ms
fprintf(fp2, "%s\n%s%s\n", qf.c_str(), _rs.to_str().c_str(), line1);
fflush(fp0);
fflush(fp1);
fflush(fp2);
}
void //single dataset, multi queries
runSM(Database* _db, string qd) //given a query directory
{
DIR* dp2;
struct dirent* p2;
dp2 = opendir(qd.c_str());
//finish all queries
while((p2 = readdir(dp2)) != NULL)
{
if(!judge(p2->d_name, ".sql"))
continue;
string file = qd + string(p2->d_name);
runSS(_db, file);
}
fprintf(fp0, "finish all queries!\n");
fflush(fp0);
closedir(dp2);
fprintf(fp0, "ok to close dp2!\n");
}
void //multi datasets, multi queries
runMM(string df) //given dataset folder, end with /
{
DIR *dp1;
struct dirent *p1;
string s1, s2, cmd;
Database* _db;
s1 = df + string("database/");
s2 = df + string("query/");
dp1 = opendir(s1.c_str());
while((p1 = readdir(dp1)) != NULL)
{
if(!judge(p1->d_name, ".nt"))
continue;
system("echo 3 > /proc/sys/vm/drop_caches");
//build the database index first
string db_folder = s1 + string(p1->d_name);
fprintf(fp0, "%s\n", db_folder.c_str());
cmd = string("./gload \"") + string(p1->d_name) + "\" \"" + db_folder + "\" > \"gload_" + string(p1->d_name) + ".log\""; //NOTICE:\" is used to ensure file name with &(etc...) will work ok!
fprintf(fp0, "%s\n", cmd.c_str());
system(cmd.c_str());
fprintf(fp0, "ok to load database!\n");
//getchar();getchar();
//then load the index
_db = new Database(string(p1->d_name));
_db->load();
fprintf(fp0, "ok to load index!\n");
//print to logs
fprintf(fp1, "%s\n%s\n", p1->d_name, line1);
fprintf(fp2, "%s\n%s\n", p1->d_name, line1);
runSM(_db, s2);
fflush(fp0);
fprintf(fp1, "%s\n", line2);
fprintf(fp2, "%s\n", line2);
//NOTICE:may double free due to ~Database
//to remove one when finished, should clean firstly like ~Database()
delete _db;//_db->release(fp0);
cmd = string("rm -rf \"") + string(p1->d_name) + string("\"");
system(cmd.c_str());
fprintf(fp0, "ok to remove %s\n", p1->d_name);
fflush(fp0);
fflush(fp1);
fflush(fp2);
}
closedir(dp1);
}
void
clean()
{
//it is ok to check like this here!
//in other situations, notice that fp is not null after closed!
if(fp0 != NULL)
{
fclose(fp0);
fp0 = NULL;
}
if(fp1 != NULL)
{
fclose(fp1);
fp1 = NULL;
}
if(fp2 != NULL)
{
fclose(fp2);
fp2 = NULL;
}
}
void
EXIT(int ret)
{
clean();
exit(ret);
}
int
main(int argc, const char* argv[])
{
//BETTER: enable user to assign the files for log
int i, j;
fp0 = fopen("run.log", "w+");
if(fp0 == NULL)
{
printf("open error!\n");
EXIT(1);
}
//build_logs();
fp1 = fopen("time.log", "w+");
if(fp1 == NULL)
{
printf("open error!\n");
EXIT(1);
}
fp2 = fopen("result.log", "w+");
if(fp2 == NULL)
{
printf("open error!\n");
EXIT(1);
}
switch(argc)
{
case 1:
help();
printf("this command will do a complete test for Gstore, do you want to continue?YN\n");
char c;
i = 0;
while(scanf("%c", &c))
{
if(c == 'Y' || c == 'y')
{
for(j = 0; j < 4; ++j)
{
runMM(path+db[j]);
}
break;
}
else if(c == 'N' || c == 'n')
EXIT(0);
i++;
if(i > 10) //try no more than 10 times
EXIT(1);
}
break;
case 2:
if(strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0)
{
help();
EXIT(0);
}
else
EXIT(1);
case 3:
if(strcmp(argv[1], "-f") == 0) //this file must be in db[]/database/, end with ".nt"
{
//TODO:use judge to check if is .nt file
int k = strlen(argv[2]) - 1;
string db = string(argv[2]);
while(k >= 0)
{
if(db[k] == '/')
{
break;
}
}
string s = db.substr(0, k+1) + "../query/";
//DIR* dp = opendir(s.c_str());
//TODO: gload the dataset and build a new function!
printf("not achieved!\n");
}
else if(strcmp(argv[1], "-d") == 0)
{
runMM(string(argv[2]) + "/");
}
else
EXIT(1);
break;
default: //> 3
if(strcmp(argv[1], "-q") == 0)
{
//TODO: user must gload first?
printf("not achieved!\n");
}
else
EXIT(1);
break;
}
clean();
return 0;
}