not use git-lfs for data;add missing files in data/ and test/

2016-03-11 15:50:11 +08:00 · 2016-03-11 15:50:11 +08:00 · 6224d132b3
parent 8a1aaae8ef
commit 6224d132b3
22 changed files with 100558 additions and 3 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -12,5 +12,3 @@
 *.wav filter=lfs diff=lfs merge=lfs -text
 *.mp4 filter=lfs diff=lfs merge=lfs -text
 *.rmvb filter=lfs diff=lfs merge=lfs -text
-*.nt filter=lfs diff=lfs merge=lfs -text
-*.n3 filter=lfs diff=lfs merge=lfs -text
--- a/README.md
+++ b/README.md
@ -14,7 +14,7 @@ This system is really user-friendly and you can pick it up in several minutes. R

 Then you need to compile the project, just type `make` in the gStore root directory, and all executables will be ok. To run gStore, please type `./gload database_name dataset_path` to build a database named by yourself. And you can use `./gquery database_name` command to query a existing database.

-**Notice: Some files(*.jpg, *.docx, *.pdf) in the docs/ folder are stored using Git Large File Storage, you need to install [git-lfs](https://git-lfs.github.com/) to clone/download if you want to include them in the project. Do not care it if you do not want to see them.**
+**Notice: Some files(*.jpg, *.docx, *.pdf) in the docs/ folder, and files of *.tar.gz type in the test/ folder are stored using Git Large File Storage, you need to install [git-lfs](https://git-lfs.github.com/) to clone/download if you want to include them in the project. Do not care it if you do not want to see them.**

 - - -

--- a/data/LUBM_10.n3
+++ b/data/LUBM_10.n3
--- a/data/LUBM_q0.sql
+++ b/data/LUBM_q0.sql
@ -0,0 +1,4 @@
+select ?x where
+{
+	?x	<ub:name>	<FullProfessor0>.
+}
--- a/data/LUBM_q1.sql
+++ b/data/LUBM_q1.sql
@ -0,0 +1,9 @@
+select ?x where
+{
+?x	<rdf:type>	<ub:GraduateStudent>.
+?y	<rdf:type>	<ub:University>.
+?z	<rdf:type>	<ub:Department>.
+?x	<ub:memberOf>	?z.
+?z	<ub:subOrganizationOf>	?y.
+?x	<ub:undergraduateDegreeFrom>	?y.
+}
--- a/data/LUBM_q2.sql
+++ b/data/LUBM_q2.sql
@ -0,0 +1,5 @@
+select ?x where
+{
+?x	<rdf:type>	<ub:Course>.
+?x	<ub:name>	?y.
+}
--- a/data/ex1.sql
+++ b/data/ex1.sql
@ -0,0 +1,10 @@
+select ?x where 
+{ 
+	?x    <rdf:type>    <ub:UndergraduateStudent>. 
+	?y    <ub:name> <Course1>. 
+	?x    <ub:takesCourse>  ?y. 
+	?z    <ub:teacherOf>    ?y. 
+	?z    <ub:name> <FullProfessor1>. 
+	?z    <ub:worksFor>    ?w. 
+	?w    <ub:name>    <Department0>. 
+}
--- a/data/ex2.sql
+++ b/data/ex2.sql
@ -0,0 +1,4 @@
+select ?x where 
+{ 
+	?x    <rdf:type>    <ub:UndergraduateStudent>. 
+}
--- a/test/Gstore.cpp
+++ b/test/Gstore.cpp
@ -0,0 +1,28 @@
+/*
+ * Gstore.cpp
+ *
+ *  Created on: 2014-7-1
+ *      Author: liyouhuan
+ */
+
+#include "../Util/Util.h"
+#include "../Database/Database.h"
+
+using namespace std;
+
+int main()
+{
+#ifdef DEBUG
+	Util util;
+#endif
+	string _yago2_triple = "/media/wip/common/data/yago2_triple";
+	string _yago = "/media/wip/common/data/yago_triple";
+	string _yago_1000000 = "/media/wip/common/data/yago_1000000";
+	string _yago_example = "/media/wip/common/data/yago_example";
+	string _rdf = _yago_1000000;
+	Database _db("db_test");
+	_db.build(_rdf);
+
+	return 0;
+}
+
--- a/test/build_signature.cpp
+++ b/test/build_signature.cpp
@ -0,0 +1,33 @@
+/*
+ * build_signature.cpp
+ *
+ *  Created on: 2014-7-23
+ *      Author: liyouhuan
+ */
+
+#include "../Util/Util.h"
+#include "../Database/Database.h"
+
+using namespace std;
+
+int main()
+{
+#ifdef DEBUG
+	Util util;
+#endif
+	cout << "build signature" << endl;
+	string _yago2_triple = "/media/wip/common/data/yago2_triple";
+	string _yago = "/media/wip/common/data/yago_triple";
+	string _yago_1000000 = "/media/wip/common/data/yago_1000000";
+	string _yago_10000 = "/media/wip/common/data/yago_10000";
+	string _yago_example = "/media/wip/common/data/yago_example";
+	string _rdf = _yago_10000;
+	Database _db("db_test");
+	Util::log("RDF:"+_rdf);
+	_db.build(_rdf);
+	Util::log("finish build");
+	_db.test_build_sig();
+
+	return 0;
+}
+
--- a/test/dbme_test.tar.gz
+++ b/test/dbme_test.tar.gz
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23fb97aaed35be9cb1ff0252060e8e5682a26b2e6cc868ab13cd15dad8cb0435
+size 3356
--- a/test/dbms_test_xlsx.tar.gz
+++ b/test/dbms_test_xlsx.tar.gz
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64602e2554b848b113e52e5f9e818c289b148d10c2b143db0e21bbe65bbd3f00
+size 56022
--- a/test/encode_test.cpp
+++ b/test/encode_test.cpp
@ -0,0 +1,33 @@
+/*
+ * ecode_test.cpp
+ *
+ *  Created on: 2014-7-16
+ *      Author: liyouhuan
+ */
+
+#include "../Util/Util.h"
+#include "../Database/Database.h"
+
+using namespace std;
+
+int main()
+{
+#ifdef DEBUG
+	Util util;
+#endif
+	cout << "encode_test" << endl;
+	string _yago2_triple = "/media/wip/common/data/yago2_triple";
+	string _yago = "/media/wip/common/data/yago_triple";
+	string _yago_1000000 = "/media/wip/common/data/yago_1000000";
+	string _yago_10000 = "/media/wip/common/data/yago_10000";
+	string _yago_example = "/media/wip/common/data/yago_example";
+	string _rdf = _yago2_triple;
+	Database _db("db_test");
+	Util::log("RDF:"+_rdf);
+	_db.build(_rdf);
+	Util::log("finish build");
+	_db.test();
+
+	return 0;
+}
+
--- a/test/format_question.txt
+++ b/test/format_question.txt
@ -0,0 +1,19 @@
+=================================================
+Sesame: 
+load lubm_10.nt
+Malformed document: Not a valid (absolute) URI: University0 [line 2]
+load bsbm_100.nt
+Malformed document: '7683.53' was not recognised, and could not be verified, with datatype http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/USD [line 9059]
+load dbpedia2014.nt
+Malformed document: '304.0' was not recognised, and could not be verified, with datatype http://dbpedia.org/datatype/second [line 93]
+load dblp_uniq.nt
+Malformed document: Element type “http:”must be followed by either attribute specifications, “>” or “/>”. [line1, column 8]
+=================================================
+Jena:
+load yago2.db.fix
+[line: 680, col: 8 ] Illegal character in IRI (codepoint 0x5E, '^'): <0.0#m[^]...>
+load yagoFacts.nt
+[line: 3, col: 1 ] Expected BNode or IRI: Got: [DIRECTIVE:base]
+load dblp_uniq.nt
+[line: 1715764, col: 144] Bad character in IRI (space): <http://www.ifi.unizh.ch/dbtg/IDEE/team.html#Dirk[space]...>
+
--- a/test/full_test.sh
+++ b/test/full_test.sh
@ -0,0 +1,344 @@
+#! /bin/env bash
+
+#in some system, maybe /usr/bin/ instead of /bin/
+#according executables to deal with dbms
+#NOTICE: require that virtuoso/sesame/jena is installed and gstore is maked!
+
+line1=--------------------------------------------------
+line2=##################################################
+path=/media/wip/common/data/
+#db0=${path}WatDiv/
+#db1=${path}LUBM/
+#db2=${path}DBpedia/
+#db3=${path}BSBM/
+#db=($db0 $db1 $db2 $db3)	#db[4]=db4
+db=(WatDiv/ LUBM/ BSBM/ DBpedia/)
+#BETTER: add yago2/yago3, dblp...add more queries
+length1=${#db[*]}		#or @ instead of *
+
+#BETTER: let user indicate the executable directory
+gstore=/home/zengli/zengli/Gstore/
+virtuoso=/home/zengli/virtuoso/bin/
+sesame=/home/zengli/sesame/bin/
+jena=/home/zengli/jena/bin/
+#NOTICE: maybe oldGstore and newGstore
+#dbms_path=($gstore $jena $sesame $virtuoso)
+dbms_path=($gstore $jena)
+dbms_name=(gstore jena)
+#dbms_name=(gstore jena sesame virtuoso)
+length2=${#dbms_path[*]}		#or @ instead of *
+
+#for each db, compare, pass db and query as parameter 
+#firstly load database, then query with unique program
+#output format:	in each dbms, time.log/ result.log/
+#use each dataset name as subfolder like lubm_10.nt/ in result.log/
+#and time.log/lubm_10.nt.log, and for each query corresponding
+#to a dataset: result.log/lubm_10.nt/q1.sql.log
+#Finally, in the directory where this script is placed in, also 
+#build result.log/ and time.log/
+#result.log/lubm_10.nt.tsv time.log/lubm_10.nt.tsv size.log.tsv
+
+#below is old:
+#time log should be used in excel, and compare result log:
+#diff or grep -vFf file1 file2
+#better to compare line by line using awk
+
+log1=result.log/
+log2=time.log/
+log3=load.log/	
+
+#clean logs in home(this script)
+home=`pwd`
+if [ -d ${home}/garbage/ ]	#! as not
+then
+	rm -rf ${home}/garbage/
+fi
+mkdir ${home}/garbage/
+if [ -d ${home}/${log1} ]
+then 
+	rm -rf ${home}/${log1}
+fi
+mkdir ${home}/${log1}
+if [ -d ${home}/${log2} ]
+then
+	rm -rf ${home}/${log2}
+fi
+mkdir ${home}/${log2}
+if [ -d ${home}/${log3} ]
+then
+	rm -rf ${home}/${log3}
+fi
+mkdir ${home}/${log3}
+
+#clean logs in each dbms
+function initial()
+{
+	if [ -d $log1 ]
+	then
+		rm -rf $log1
+	fi
+	mkdir $log1
+	if [ -d $log2 ]
+	then
+		rm -rf $log2
+	fi
+	mkdir $log2
+	if [ -d $log3 ]
+	then
+		rm -rf $log3
+	fi
+	mkdir $log3
+}
+
+#size.tsv:the size after loaded		time.tsv:time used to load
+tsv3=${home}/${log3}time.tsv
+tsv4=${home}/${log3}size.tsv
+dsnum=0
+for i in `seq $length1`
+do
+	i=`expr $i - 1`
+	for tmpdb in `ls ${path}/${db[i]}/database/*.nt`
+	do
+		dsnum=`expr $dsnum + 1`
+		if [ $dsnum -ne 1 ]
+		then
+			sleep 60	#for other processes
+			echo 3 > /proc/sys/vm/drop_caches
+		fi
+		cntdb="${tmpdb##*/}"
+		echo "$tmpdb"	#in case of special characters like &
+		tsv1=${home}/${log1}/${cntdb}.tsv	#compare result
+		tsv2=${home}/${log2}/${cntdb}.tsv	#compare time
+		echo $tsv1
+		echo $tsv2
+		#load this database into each dbms
+		for j in `seq $length2`
+		do
+			j=`expr $j - 1`
+			cd ${dbms_path[j]}
+			#build logs structure
+			echo "build logs structure!"
+			if [ $dsnum -eq 1 ]
+			then
+				initial
+			fi
+			mkdir ${log1}/${cntdb}	#pass the cntdb if using function
+			#touch ${log2}/${cntdb}.log
+			#if [ ${dbms_name[j]}x = ${gstore}x ]	#add a x in case of empty 
+			if [ ${j} -eq 0 ]	#otherwise will unary error
+			then
+				echo "this is for gstore!"
+				./gload $cntdb $tmpdb > load.txt
+				#awk '{if($1=="after" && $2=="build," && $3=="used"){split($4, a,"m");print "time:\t"a[1]}}'  load.txt > load_${cntdb}.log
+				awk '{if($1=="after" && $2=="build," && $3=="used"){split($4, a,"m");print "'$cntdb'""\t"a[1]}}'  load.txt >> ${log3}/time.log
+				#elif [ ${dbms[j]}x = ${virtuoso}x ]
+				#elif [ ${dbms[j]}x = ${sesame}x ]
+				#elif [ ${dbms[j]}x = ${jena}x ]
+			elif [ ${j} -eq 1 ]
+			then
+				echo "this is for jena!"
+				./tdbloader --loc "$cntdb" "$tmpdb" > load.txt 2>&1
+				#awk '{if(NR==1){s=$1}else{t=$1}}END{split(s,a,":");split(t,b,":");ans=0+(b[1]-a[1])*3600+(b[2]-a[2])*60+(b[3]-a[3]);printf("%s\t%d\n", "time:", ans*1000);}' load.txt > load_${cntdb}.log
+				awk '{if(NR==1){s=$1}else{t=$1}}END{split(s,a,":");split(t,b,":");ans=0+(b[1]-a[1])*3600+(b[2]-a[2])*60+(b[3]-a[3]);printf("%s\t%d\n", "'$cntdb'", ans*1000);}' load.txt >> ${log3}/time.log
+				#cat load.txt >> "load_${cntdb}.log"
+			elif [ ${j} -eq 2 ]
+			then
+				echo "this is for sesame!"
+				#TODO
+			elif [ ${j} -eq 3 ]
+			then
+				echo "this is for virtuoso!"
+				#TODO
+			fi
+			#ls -l sums the actual size, unit is k
+			echo "now to sum the database size!"
+			#ls -lR "$cntdb" | awk 'BEGIN{sum=0}{if($1=="total"){sum=sum+$2}}END{print "size:\t"sum}' >> load_${cntdb}.log
+			ls -lR "$cntdb" | awk 'BEGIN{sum=0}{if($1=="total"){sum=sum+$2}}END{print "'$cntdb'""\t"sum}' >> ${log3}/size.log
+
+			timelog=${log2}/${cntdb}.log
+			touch $timelog
+			for query in `ls ${path}/${db[i]}/query/*.sql`
+			do
+				echo $query
+				#build logs structure
+				anslog=${log1}/${cntdb}/${query##*/}.log
+				touch $anslog	#needed because the result maybe empty
+				if [ ${j} -eq 0 ]	#add a x in case of empty 
+				then
+					echo "this is for gstore!"
+					./gquery "$cntdb" $query > ans.txt
+					awk -F ':' 'BEGIN{query="'$query'"}{if($1=="Total time used"){split($2, a, "m");split(a[1],b," ");}}END{print query"\t"b[1]}' ans.txt >> $timelog
+					#grep "Total time used:" ans.txt | grep -o "[0-9]*ms" >> ${log2}/${cntdb}.log
+					awk -F ':' 'BEGIN{flag=0}{if(flag==1 && $0 ~/^$/){flag=2}if(flag==1 && !($0 ~/[empty result]/)){print $0}if($1=="final result is "){flag=1}}' ans.txt > $anslog
+					#awk 'BEGIN{flag=0}{if(flag==1){print $0}if($1 ~/^final$/){flag=1}}' ans.txt > ${log1}/${cntdb}/${query}.log
+				elif [ ${j} -eq 1 ]
+				then
+					echo "this is for jena!"
+					#NOTICE: for program output in stderr(no cache), deal like this
+					./tdbquery --repeat 2,1 --time --results TSV --loc "$cntdb" --query $query > ans.txt 2>&1 
+					#NOTICE: redirect in awk, and jena 
+					#use old var to remove duplicates
+					awk 'BEGIN{old=""}{if(NR>1){if($1 ~/Time:/ && $3 ~/sec/){time=$2*1000;print "'$query'""\t"time >> "'$timelog'"}else if(!($0 ~/^$/) && $0 != old){print $0 >> "'$anslog'";old=$0}}}'	ans.txt
+				elif [ ${j} -eq 2 ]
+				then
+					echo "this is for sesame!"
+					#TODO
+				elif [ ${j} -eq 3 ]
+				then
+					echo "this is for virtuoso!"
+					#TODO
+				fi
+				#sort according to the path order
+				echo "now to sort anslog!"
+				mv $anslog ${anslog}.bak
+				#use own defined select-sort function
+				#this function can also delete duplicates
+				#BETTER: must use external-sorting when too large
+			#	awk -F '\t' '{for(i=1;i<=NF;++i)arr[NR]=$0}
+			#	END{
+			#		nr=sortArr(arr,NR,NF);
+			#		for(i=1;i<=nr;++i){print arr[i]}}
+			#	function sortArr(arr, nr, nf){
+			#		for(p=1;p<nr;++p){
+			#			min=p;
+			#			for(q=p+1;q<=nr;++q){
+			#				ret=less(arr[q],arr[min],nf);
+			#				if(ret==1){min=q}
+			#				else if(ret==0){swap(arr,q,nr);nr=nr-1;q=q-1}}
+			#			if(min!=p){swap(arr,p,min)}}
+			#		return nr}
+			#	function swap(arr,t1,t2){t=arr[t1];arr[t1]=arr[t2];arr[t2]=t}
+			#	function less(t1,t2,nf){
+			#		split(t1,s1,"\t");
+			#		split(t2,s2,"\t");
+			#		for(k=1;k<=nf;++k){
+			#			print s1[k]"\t"s2[k]
+			#			if(s1[k]<s2[k]){ return 1 }
+			#			else if(s1[k]>s2[k]) { return 2 }
+			#			else { continue; } }
+			#		return 0 }' ${anslog}.bak > ${anslog}
+				#-k1n -k2r ...
+				sort -t $'\t' -u ${anslog}.bak > ${anslog}
+				rm -f ${anslog}.bak
+			done
+			echo "now to sort timelog!"
+			mv $timelog ${timelog}.bak
+			awk -F '\t' '{print $1"\t"$2 | "sort -k1"}' ${timelog}.bak > ${timelog}
+			rm -f ${timelog}.bak
+			#remove the db when finished
+			echo "now to remove the cntdb!"
+			rm -rf "$cntdb"
+			#BETTER:remove *.txt in each dbms path
+			#rm -f *.txt
+			#compare time and construct the TSV table
+			if [ ${j} -eq 0 ]
+			then
+				echo "this is the first dbms!"
+				awk -F '\t' 'BEGIN{
+					print "Time\t""'${dbms_name[j]}'" }
+				{ num=split($1,str,"/"); print str[num]"\t"$2 }' ${timelog} > ${tsv2}
+			else
+				echo "this is not the first dbms!"
+				mv ${tsv2} ${tsv2}.bak
+				awk -F '\t' '{
+				if(NR==FNR) {
+					num=split($1,str,"/");
+					map[str[num]]=$2 }
+				else {
+					if(FNR==1) { print $0"\t""'${dbms_name[j]}'" }
+					else { print $0"\t"map[$1] }
+				}}' ${timelog} ${tsv2}.bak > ${tsv2}
+				rm -f ${tsv2}.bak
+			fi
+		done
+		#compare the result and construct the TSV table
+		echo "now to compare the results!"
+		cd ${home}
+		tvar1=`expr $length2 - 1`  
+		tvar2=`expr $length2 - 2`
+		for p in `seq 0 $tvar2`
+		do
+			tvar3=`expr $p + 1`
+			for q in `seq $tvar3 $tvar1`
+			do
+				echo $p,$q
+				>compare.txt
+				for query in `ls ${path}/${db[i]}/query/*.sql`
+				do
+					echo "compare: " $query
+					tmplog=${log1}/${cntdb}/${query##*/}.log
+					awk -F '\t' 'BEGIN{flag=0}{
+					if(NR==FNR){map[NR]=$0}
+					else if(flag==0){
+						num=split(map[FNR],str1,"\t");
+						split($0,str2,"\t");
+						for(i=1;i<=num;++i){
+							if(str1[i]!=str2[i]){
+								flag=1;break}}}}
+								END{
+								if(flag==0){print "'${query##*/}'""\tY"}
+								else{print "'${query##*/}'""\tN"}}' ${dbms_path[p]}/${tmplog} ${dbms_path[q]}/${tmplog} >> compare.txt
+					#diff ${dbms_path[p]}/${tmplog} ${dbms_path[q]}/${tmplog}
+					#if [ $? -ne 0 ]
+					#then
+					#	echo -e ${query##*/}"\tN" >> compare.txt
+					#else
+					#	echo -e ${query##*/}"\tY" >> compare.txt
+					#fi
+				done
+				echo "all queries done!"
+				name=${dbms_name[p]}_${dbms_name[q]}
+				if [ $p -eq 0 ] && [ $q -eq 1 ]
+				then
+					awk -F '\t' 'BEGIN{print "Result\t""'$name'"}{print $0}' compare.txt > ${tsv1}
+				else
+					mv ${tsv1} ${tsv1}.bak
+					awk -F '\t' '{
+					if(NR==FNR) { map[$1]=$2 }
+					else {
+						if(FNR==1) { print $0"\t""'$name'" }
+						else { print $0"\t"map[$1] }
+					}}' compare.txt ${tsv1}.bak > ${tsv1}
+					rm -f ${tsv1}.bak
+				fi
+			done
+		done
+	done
+done
+
+#build the load.log/ in home(this script)
+echo "now to build the load.log!"
+for j in `seq $length2`
+do
+	j=`expr $j - 1`
+	cd ${dbms_path[j]}
+	if [ $j -eq 0 ]
+	then
+		echo "this is the first dbms!"
+		awk -F '\t' 'BEGIN{print "dataset\\dbms\t""'${dbms_name[j]}'"}{print $0}' ${log3}/time.log > $tsv3
+		awk -F '\t' 'BEGIN{print "dataset\\dbms\t""'${dbms_name[j]}'"}{print $0}' ${log3}/size.log > $tsv4
+	else
+		echo "this is not the first dbms!"
+		mv ${tsv3} ${tsv3}.bak
+		awk -F '\t' '{
+		if(NR==FNR) { map[$1]=$2 }
+		else {
+			if(FNR==1) { print $0"\t""'${dbms_name[j]}'" }
+			else { print $0"\t"map[$1] }
+			}}' ${log3}/time.log ${tsv3}.bak > ${tsv3}
+		rm -f ${tsv3}.bak
+		mv ${tsv4} ${tsv4}.bak
+		awk -F '\t' '{
+		if(NR==FNR) { map[$1]=$2 }
+		else {
+			if(FNR==1) { print $0"\t""'${dbms_name[j]}'" }
+			else { print $0"\t"map[$1] }
+			}}' ${log3}/time.log ${tsv4}.bak > ${tsv4}
+		rm -f ${tsv4}.bak
+	fi
+done
+
+echo "this is the end of full test!"
+echo "please visit the result.log/, time.log/ and load.log/"
+echo "you can use excel to load the .tsv files"
+
--- a/test/gtest.cpp
+++ b/test/gtest.cpp
@ -0,0 +1,332 @@
+/*=============================================================================
+# Filename: gtest.cpp
+# Author: syzz
+# Mail: 1181955272@qq.com
+# Last Modified: 2015-09-02 00:04
+# Description: load index once and query, there are several ways to use this program:
+1.  ./gtest								test all datasets and corresponding queries
+2.	./gtest --help						simplified as -h, will print the help message
+3.	./gtest -f DS_PATH					load/test a specified dataset, with all corresponding queries
+4.  ./gtest -d FD_PATH					load/test a dataset folder(like WatDiv/), with all corresponding queries
+5.  ./gtest -q DB_PATH q1 q2...			test a loaded database with given queries(no limit to db and query)
+=============================================================================*/
+
+//#include <stdio.h>
+//#include <string.h>
+//#include <stdlib.h>
+//#include <dirent.h>
+//#include <unistd.h>
+#include "../Database/Database.h"
+#include "../Util/Util.h"
+
+using namespace std;
+
+#define NUM 4
+
+char line1[] = "--------------------------------------------------";
+char line2[] = "##################################################";
+string path = "/media/wip/common/data/";
+string db[NUM] = {"WatDiv/", "LUBM/", "DBpedia/", "Yago/"};
+
+//not achieved, so do not use
+void
+build_logs()
+{
+	system("rm -rf result.log/");
+	system("rm -rf time.log/");
+	system("mkdir result.log");
+	system("mkdir time.log");
+	int i;
+	string cmd;
+	for(i = 0; i < NUM; ++i)
+	{
+		cmd = string("mkdir result.log/\"") + db[i] + "\"";		//in case of special characters like &
+		system(cmd.c_str());
+		cmd = string("mkdir time.log/\"") + db[i] + "\"";
+		system(cmd.c_str());
+	}
+}
+
+bool		//if satisfy suffix; case sensitive
+judge(const char* p, const char* p0)	//both not empty
+{
+	int len = strlen(p), len0 = strlen(p0);
+	while(len0 > 0 && len > 0)
+	{
+		len--;
+		len0--;
+		if(p[len] != p0[len0])
+			return false;
+	}
+	return true;
+}
+
+string
+getQueryFromFile(const char* path)
+{
+	char buf[10000];
+	ifstream fin(path);
+	if(!fin)
+	{
+		printf("can not open %s\n", path);
+		return "";
+	}
+	memset(buf, 0, sizeof(buf));
+	stringstream ss;
+	while(!fin.eof())
+	{
+		fin.getline(buf, 9999);
+		ss << buf << "\n";
+	}
+	fin.close();
+	return ss.str();
+}
+
+void
+help()
+{
+	printf("\
+			/*=============================================================================\n\
+# Filename: gtest.cpp\n\
+# Author: syzz\n\
+# Mail: 1181955272@qq.com\n\
+# Description: load index once and query, there are several ways to use this program:\n\
+1.  ./gtest                             test all datasets and corresponding queries\n\
+2.  ./gtest --help                      simplified as -h, will print the help message\n\
+3.  ./gtest -f DS_PATH                  load/test a specified dataset, with all corresponding queries\n\
+4.  ./gtest -d FD_PATH                  load/test a dataset folder(like WatDiv/), with all corresponding queries\n\
+5.  ./gtest -q DB_PATH q1 q2...         test a loaded database with given queries(no limit to db and query)\n\
+=============================================================================*/\n\
+			");
+}
+
+FILE *fp0 = NULL, *fp1 = NULL, *fp2 = NULL;
+
+void								//single dataset, single query
+runSS(Database* _db, string qf)		//given a query file
+{
+	string query = getQueryFromFile(qf.c_str());
+	if(query.empty())
+	{
+		fprintf(fp0, "this query is empty:\n%s\n", qf.c_str());
+		fflush(fp0);
+		return;
+		//continue;
+	}
+	fprintf(fp0, "%s\n", qf.c_str());
+	fflush(fp0);
+	//getchar();getchar();
+	ResultSet _rs;
+	long tv_begin, tv_final;
+	tv_begin = Util::get_cur_time();
+	//fprintf(fp0, "begin to query!\n");
+	//fflush(fp0);
+	_db->query(query, _rs, stdout);
+	//fprintf(fp0, "finish this query!\n");
+	tv_final = Util::get_cur_time();
+	//print to logs
+	fprintf(fp1, "%s\n%ld\n%s\n", qf.c_str(), tv_final - tv_begin, line1);//time is standarded as ms
+	fprintf(fp2, "%s\n%s%s\n", qf.c_str(), _rs.to_str().c_str(), line1);
+	fflush(fp0);
+	fflush(fp1);
+	fflush(fp2);
+}
+
+void								//single dataset, multi queries
+runSM(Database* _db, string qd)		//given a query directory
+{
+	DIR* dp2;
+	struct dirent* p2;
+	dp2 = opendir(qd.c_str());
+	//finish all queries
+	while((p2 = readdir(dp2)) != NULL)
+	{
+		if(!judge(p2->d_name, ".sql"))
+			continue;
+		string file = qd + string(p2->d_name);
+		runSS(_db, file);
+	}
+	fprintf(fp0, "finish all queries!\n");
+	fflush(fp0);
+	closedir(dp2);
+	fprintf(fp0, "ok to close dp2!\n");
+}
+
+void					//multi datasets, multi queries
+runMM(string df)		//given dataset folder, end with /
+{
+	DIR *dp1;
+	struct dirent *p1;
+	string s1, s2, cmd;
+	Database* _db;
+	s1 = df + string("database/");
+	s2 = df + string("query/");
+	dp1 = opendir(s1.c_str());		
+	while((p1 = readdir(dp1)) != NULL)
+	{
+		if(!judge(p1->d_name, ".nt"))
+			continue;
+		system("echo 3 > /proc/sys/vm/drop_caches");
+		//build the database index first
+		string db_folder = s1 + string(p1->d_name);
+		fprintf(fp0, "%s\n", db_folder.c_str());
+		cmd = string("./gload \"") + string(p1->d_name) + "\" \"" + db_folder + "\" > \"gload_" + string(p1->d_name) + ".log\"";	//NOTICE:\" is used to ensure file name with &(etc...) will work ok!
+		fprintf(fp0, "%s\n", cmd.c_str());
+		system(cmd.c_str());
+		fprintf(fp0, "ok to load database!\n");
+		//getchar();getchar();
+		//then load the index
+		_db = new Database(string(p1->d_name));
+		_db->load();
+		fprintf(fp0, "ok to load index!\n");
+		//print to logs
+		fprintf(fp1, "%s\n%s\n", p1->d_name, line1);
+		fprintf(fp2, "%s\n%s\n", p1->d_name, line1);
+
+		runSM(_db, s2);
+
+		fflush(fp0);
+		fprintf(fp1, "%s\n", line2);
+		fprintf(fp2, "%s\n", line2);
+		//NOTICE:may double free due to ~Database
+		//to remove one when finished, should clean firstly like ~Database()
+		delete _db;//_db->release(fp0);
+		cmd = string("rm -rf \"") + string(p1->d_name) + string("\"");
+		system(cmd.c_str());
+		fprintf(fp0, "ok to remove %s\n", p1->d_name);
+		fflush(fp0);
+		fflush(fp1);
+		fflush(fp2);
+	}
+	closedir(dp1);
+}
+
+void
+clean()
+{
+	//it is ok to check like this here!
+	//in other situations, notice that fp is not null after closed!
+	if(fp0 != NULL)		
+	{
+		fclose(fp0);
+		fp0 = NULL;
+	}
+	if(fp1 != NULL)
+	{
+		fclose(fp1);
+		fp1 = NULL;
+	}
+	if(fp2 != NULL)
+	{
+		fclose(fp2);
+		fp2 = NULL;
+	}
+}
+
+void 
+EXIT(int ret)
+{
+	clean();
+	exit(ret);
+}
+
+int			
+main(int argc, const char* argv[])
+{
+#ifdef DEBUG
+	Util util;
+#endif
+	//BETTER: enable user to assign the files for log
+	int i, j;
+	fp0 = fopen("run.log", "w+");
+	if(fp0 == NULL)
+	{
+		printf("open error!\n");
+		EXIT(1);
+	}
+	//build_logs();
+	fp1 = fopen("time.log", "w+");
+	if(fp1 == NULL)
+	{
+		printf("open error!\n");
+		EXIT(1);
+	}
+	fp2 = fopen("result.log", "w+");
+	if(fp2 == NULL)
+	{
+		printf("open error!\n");
+		EXIT(1);
+	}
+
+	switch(argc)
+	{
+		case 1:
+			help();
+			printf("this command will do a complete test for Gstore, do you want to continue?YN\n");
+			char c;
+			i = 0;
+			while(scanf("%c", &c))
+			{
+				if(c == 'Y' || c == 'y')
+				{
+					for(j = 0; j < 4; ++j)
+					{
+						runMM(path+db[j]);
+					}
+					break;
+				}
+				else if(c == 'N' || c == 'n')
+					EXIT(0);
+				i++;
+				if(i > 10)				//try no more than 10 times
+					EXIT(1);
+			}
+			break;
+		case 2:
+			if(strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0)
+			{
+				help();
+				EXIT(0);
+			}
+			else
+				EXIT(1);
+		case 3:
+			if(strcmp(argv[1], "-f") == 0)		//this file must be in db[]/database/, end with ".nt"
+			{
+				//TODO:use judge to check if is .nt file
+				int k = strlen(argv[2])	- 1;
+				string db = string(argv[2]);
+				while(k >= 0)
+				{
+					if(db[k] == '/')
+					{
+						break;		
+					}
+				}
+				string s = db.substr(0, k+1) + "../query/";
+				//DIR* dp = opendir(s.c_str());
+				//TODO: gload the dataset and build a new function!
+				printf("not achieved!\n");
+			}
+			else if(strcmp(argv[1], "-d") == 0)
+			{
+				runMM(string(argv[2]) + "/");
+			}
+			else
+				EXIT(1);
+			break;
+		default:		//> 3
+			if(strcmp(argv[1], "-q") == 0)
+			{
+				//TODO: user must gload first?
+				printf("not achieved!\n");
+			}
+			else 
+				EXIT(1);
+			break;
+	}
+
+	clean();
+	return 0;
+}
+
--- a/test/hole.c
+++ b/test/hole.c
@ -0,0 +1,46 @@
+/*=============================================================================
+# Filename:		temp.c
+# Author: syzz
+# Mail: 1181955272@qq.com
+# Last Modified:	2015-05-12 15:40
+# Description: 
+=============================================================================*/
+
+#include <stdio.h>
+
+int 
+main(int argc, char* argv[]) 
+{
+	int i = 0;
+	char c = 'A';
+	FILE* fp = fopen("a.dat", "w+b");
+	fwrite(&c, sizeof(char), 1, fp);
+	//when moving to tail, always return 0(point to tail whne exceeding) 
+	//when moving to head, -1 when exceeding(point to original)
+	fseek(fp, 4096 * 10, SEEK_SET);		
+	//file hole, diffrent across platforms
+	//mv just change index, so not vary, cp may vary(if transfer data, not neglect hole)
+	//compress/uncompress will change the size with du -h
+	//du and ll is different: one for block usage(also neglect hole), 
+	//one for actual size(not neglect hole)
+	//what's more, even after cp or compress/uncompress, there may also be holes.
+	fwrite(&i, sizeof(int), 1, fp);
+	fclose(fp);
+/*
+	FILE* fp = fopen("data.dat", "r+");
+	char c;
+	while((c = fgetc(fp)) != EOF)
+		printf("%c", c);
+	fclose(fp);
+	printf("\n");
+	*/
+/*
+	printf("%c%c%c", 0xE6, 0x88, 0x91);
+	printf("%c%c%c", 0xE7, 0x88, 0xB1);
+	printf("%c%c%c", 0xE5, 0x90, 0xB4);
+	printf("%c%c%c", 0xE5, 0x98, 0x89);
+	printf("%c%c%c\n", 0xE5, 0x8D, 0xBF);
+	*/
+	return 0;
+}
+
--- a/test/insert_test.cpp
+++ b/test/insert_test.cpp
@ -0,0 +1,27 @@
+/*
+ * insert_test.cpp
+ *
+ *  Created on: 2014-12-03
+ *      Author: Caesar11
+ */
+
+#include "../Util/Util.h"
+#include "../Database/Database.h"
+
+using namespace std;
+
+int main(int argc, char * argv[])
+{
+#ifdef DEBUG
+	Util util;
+#endif
+    string db_folder = string(argv[1]);
+    string insert_rdf_file = string(argv[2]);
+    Database _db(db_folder);
+
+    _db.insert(insert_rdf_file);
+
+    return 0;
+}
+
+
--- a/test/join_test.cpp
+++ b/test/join_test.cpp
@ -0,0 +1,33 @@
+/*
+ * join_test.cpp
+ *
+ *  Created on: 2014-7-30
+ *      Author: liyouhuan
+ */
+
+#include "../Util/Util.h"
+#include "../Database/Database.h"
+
+using namespace std;
+int main()
+{
+#ifdef DEBUG
+	Util util;
+#endif
+	cout << "build signature" << endl;
+	string _yago2_triple = "/media/wip/common/data/yago2_triple";
+	string _yago = "/media/wip/common/data/yago_triple";
+	string _yago_1000000 = "/media/wip/common/data/yago_1000000";
+	string _yago_10000 = "/media/wip/common/data/yago_10000";
+	string _yago_example = "/media/wip/common/data/yago_example";
+	string _rdf = _yago_10000;
+	Database _db("db_test");
+	Util::log("RDF:"+_rdf);
+	_db.build(_rdf);
+	_db.test();
+	Util::log("finish build");
+	_db.test_join();
+
+	return 0;
+}
+
--- a/test/sumline.sh
+++ b/test/sumline.sh
@ -0,0 +1,2 @@
+find . -type f -print | grep -E "\.(c(pp)?|h)$" | xargs wc -l
+
--- a/test/testParser.cpp
+++ b/test/testParser.cpp
@ -0,0 +1,44 @@
+/*=============================================================================
+# Filename: testParser.cpp
+# Author: Bookug Lobert 
+# Mail: 1181955272@qq.com
+# Last Modified: 2015-10-24 19:57
+# Description: 
+=============================================================================*/
+
+#include "../Util/Util.h"
+#include "../Parser/DBparser.h"
+#include "../Query/SPARQLquery.h"
+
+int main()
+{
+#ifdef DEBUG
+	Util util;
+#endif
+	std::cout<<"start"<<std::endl;
+	DBparser db_parser;
+	SPARQLquery sparql_query;
+	std::string sparql("");
+	std::string s=std::string("PREFIX dc10:  <http://purl.org/dc/elements/1.0/> ")+
+			"PREFIX dc11:  <http://purl.org/dc/elements/1.1/> "+
+			"SELECT ?title "+
+			"WHERE  { { ?book dc10:title  ?title } "+
+			"UNION { ?book dc11:title  ?title } }";
+	std::string s2=std::string("PREFIX foaf: <http://xmlns.com/foaf/0.1/> ")+
+		"SELECT ?x1 ?x4 ?x5 WHERE {{?x1	y:hasGivenName	?x5."+
+		"?x1	y:hasFamilyName	?x6."+
+		"?x1	rdf:type	<wordnet_scientist_110560637>."+
+		"?x1	y:bornIn	?x2."+
+		"?x1	y:hasAcademicAdvisor	?x4."+
+		"?x2	y:locatedIn	<Switzerland>."+
+		"?x3	y:locatedIn	<Germany>."+
+		"?x4	y:bornIn	?x3.}"+
+		"UNION { ?book dc11:title  ?title}"+
+		"} order by ?x5";
+	std::cout<<"string: "<<s2<<std::endl;
+	db_parser.sparqlParser(s2,sparql_query);
+	sparql_query.print(cout);
+	
+	return 0;
+}
+
--- a/test/vstree_test.cpp
+++ b/test/vstree_test.cpp
@ -0,0 +1,28 @@
+/*=============================================================================
+# Filename: vstree_test.cpp
+# Author: Bookug Lobert 
+# Mail: 1181955272@qq.com
+# Last Modified: 2015-10-24 19:57
+# Description: 
+=============================================================================*/
+
+#include "../Util/Util.h"
+#include "../Database/Database.h"
+
+using namespace std;
+
+int main()
+{
+#ifdef DEBUG
+	Util util;
+#endif
+	cout << "vstree_test" << endl;
+	string _yago_example = "/media/wip/common/data/yago_example";
+	string _rdf = _yago_example;
+	Database _db("db_test");
+	_db.build(_rdf);
+	Util::log("finish build");
+
+	return 0;
+}
+
				`@ -0,0 +1,2 @@`
				`find . -type f -print \| grep -E "\.(c(pp)?\|h)$" \| xargs wc -l`