not use git-lfs for data;add missing files in data/ and test/
This commit is contained in:
parent
8a1aaae8ef
commit
6224d132b3
|
@ -12,5 +12,3 @@
|
|||
*.wav filter=lfs diff=lfs merge=lfs -text
|
||||
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
||||
*.rmvb filter=lfs diff=lfs merge=lfs -text
|
||||
*.nt filter=lfs diff=lfs merge=lfs -text
|
||||
*.n3 filter=lfs diff=lfs merge=lfs -text
|
||||
|
|
|
@ -14,7 +14,7 @@ This system is really user-friendly and you can pick it up in several minutes. R
|
|||
|
||||
Then you need to compile the project, just type `make` in the gStore root directory, and all executables will be ok. To run gStore, please type `./gload database_name dataset_path` to build a database named by yourself. And you can use `./gquery database_name` command to query a existing database.
|
||||
|
||||
**Notice: Some files(*.jpg, *.docx, *.pdf) in the docs/ folder are stored using Git Large File Storage, you need to install [git-lfs](https://git-lfs.github.com/) to clone/download if you want to include them in the project. Do not care it if you do not want to see them.**
|
||||
**Notice: Some files(*.jpg, *.docx, *.pdf) in the docs/ folder, and files of *.tar.gz type in the test/ folder are stored using Git Large File Storage, you need to install [git-lfs](https://git-lfs.github.com/) to clone/download if you want to include them in the project. Do not care it if you do not want to see them.**
|
||||
|
||||
- - -
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,4 @@
|
|||
select ?x where
|
||||
{
|
||||
?x <ub:name> <FullProfessor0>.
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
select ?x where
|
||||
{
|
||||
?x <rdf:type> <ub:GraduateStudent>.
|
||||
?y <rdf:type> <ub:University>.
|
||||
?z <rdf:type> <ub:Department>.
|
||||
?x <ub:memberOf> ?z.
|
||||
?z <ub:subOrganizationOf> ?y.
|
||||
?x <ub:undergraduateDegreeFrom> ?y.
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
select ?x where
|
||||
{
|
||||
?x <rdf:type> <ub:Course>.
|
||||
?x <ub:name> ?y.
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
select ?x where
|
||||
{
|
||||
?x <rdf:type> <ub:UndergraduateStudent>.
|
||||
?y <ub:name> <Course1>.
|
||||
?x <ub:takesCourse> ?y.
|
||||
?z <ub:teacherOf> ?y.
|
||||
?z <ub:name> <FullProfessor1>.
|
||||
?z <ub:worksFor> ?w.
|
||||
?w <ub:name> <Department0>.
|
||||
}
|
|
@ -0,0 +1,4 @@
|
|||
select ?x where
|
||||
{
|
||||
?x <rdf:type> <ub:UndergraduateStudent>.
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
/*
|
||||
* Gstore.cpp
|
||||
*
|
||||
* Created on: 2014-7-1
|
||||
* Author: liyouhuan
|
||||
*/
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Database/Database.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main()
|
||||
{
|
||||
#ifdef DEBUG
|
||||
Util util;
|
||||
#endif
|
||||
string _yago2_triple = "/media/wip/common/data/yago2_triple";
|
||||
string _yago = "/media/wip/common/data/yago_triple";
|
||||
string _yago_1000000 = "/media/wip/common/data/yago_1000000";
|
||||
string _yago_example = "/media/wip/common/data/yago_example";
|
||||
string _rdf = _yago_1000000;
|
||||
Database _db("db_test");
|
||||
_db.build(_rdf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* build_signature.cpp
|
||||
*
|
||||
* Created on: 2014-7-23
|
||||
* Author: liyouhuan
|
||||
*/
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Database/Database.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main()
|
||||
{
|
||||
#ifdef DEBUG
|
||||
Util util;
|
||||
#endif
|
||||
cout << "build signature" << endl;
|
||||
string _yago2_triple = "/media/wip/common/data/yago2_triple";
|
||||
string _yago = "/media/wip/common/data/yago_triple";
|
||||
string _yago_1000000 = "/media/wip/common/data/yago_1000000";
|
||||
string _yago_10000 = "/media/wip/common/data/yago_10000";
|
||||
string _yago_example = "/media/wip/common/data/yago_example";
|
||||
string _rdf = _yago_10000;
|
||||
Database _db("db_test");
|
||||
Util::log("RDF:"+_rdf);
|
||||
_db.build(_rdf);
|
||||
Util::log("finish build");
|
||||
_db.test_build_sig();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:23fb97aaed35be9cb1ff0252060e8e5682a26b2e6cc868ab13cd15dad8cb0435
|
||||
size 3356
|
|
@ -0,0 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:64602e2554b848b113e52e5f9e818c289b148d10c2b143db0e21bbe65bbd3f00
|
||||
size 56022
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* ecode_test.cpp
|
||||
*
|
||||
* Created on: 2014-7-16
|
||||
* Author: liyouhuan
|
||||
*/
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Database/Database.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main()
|
||||
{
|
||||
#ifdef DEBUG
|
||||
Util util;
|
||||
#endif
|
||||
cout << "encode_test" << endl;
|
||||
string _yago2_triple = "/media/wip/common/data/yago2_triple";
|
||||
string _yago = "/media/wip/common/data/yago_triple";
|
||||
string _yago_1000000 = "/media/wip/common/data/yago_1000000";
|
||||
string _yago_10000 = "/media/wip/common/data/yago_10000";
|
||||
string _yago_example = "/media/wip/common/data/yago_example";
|
||||
string _rdf = _yago2_triple;
|
||||
Database _db("db_test");
|
||||
Util::log("RDF:"+_rdf);
|
||||
_db.build(_rdf);
|
||||
Util::log("finish build");
|
||||
_db.test();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
=================================================
|
||||
Sesame:
|
||||
load lubm_10.nt
|
||||
Malformed document: Not a valid (absolute) URI: University0 [line 2]
|
||||
load bsbm_100.nt
|
||||
Malformed document: '7683.53' was not recognised, and could not be verified, with datatype http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/USD [line 9059]
|
||||
load dbpedia2014.nt
|
||||
Malformed document: '304.0' was not recognised, and could not be verified, with datatype http://dbpedia.org/datatype/second [line 93]
|
||||
load dblp_uniq.nt
|
||||
Malformed document: Element type “http:”must be followed by either attribute specifications, “>” or “/>”. [line1, column 8]
|
||||
=================================================
|
||||
Jena:
|
||||
load yago2.db.fix
|
||||
[line: 680, col: 8 ] Illegal character in IRI (codepoint 0x5E, '^'): <0.0#m[^]...>
|
||||
load yagoFacts.nt
|
||||
[line: 3, col: 1 ] Expected BNode or IRI: Got: [DIRECTIVE:base]
|
||||
load dblp_uniq.nt
|
||||
[line: 1715764, col: 144] Bad character in IRI (space): <http://www.ifi.unizh.ch/dbtg/IDEE/team.html#Dirk[space]...>
|
||||
|
|
@ -0,0 +1,344 @@
|
|||
#! /bin/env bash
|
||||
|
||||
#in some system, maybe /usr/bin/ instead of /bin/
|
||||
#according executables to deal with dbms
|
||||
#NOTICE: require that virtuoso/sesame/jena is installed and gstore is maked!
|
||||
|
||||
line1=--------------------------------------------------
|
||||
line2=##################################################
|
||||
path=/media/wip/common/data/
|
||||
#db0=${path}WatDiv/
|
||||
#db1=${path}LUBM/
|
||||
#db2=${path}DBpedia/
|
||||
#db3=${path}BSBM/
|
||||
#db=($db0 $db1 $db2 $db3) #db[4]=db4
|
||||
db=(WatDiv/ LUBM/ BSBM/ DBpedia/)
|
||||
#BETTER: add yago2/yago3, dblp...add more queries
|
||||
length1=${#db[*]} #or @ instead of *
|
||||
|
||||
#BETTER: let user indicate the executable directory
|
||||
gstore=/home/zengli/zengli/Gstore/
|
||||
virtuoso=/home/zengli/virtuoso/bin/
|
||||
sesame=/home/zengli/sesame/bin/
|
||||
jena=/home/zengli/jena/bin/
|
||||
#NOTICE: maybe oldGstore and newGstore
|
||||
#dbms_path=($gstore $jena $sesame $virtuoso)
|
||||
dbms_path=($gstore $jena)
|
||||
dbms_name=(gstore jena)
|
||||
#dbms_name=(gstore jena sesame virtuoso)
|
||||
length2=${#dbms_path[*]} #or @ instead of *
|
||||
|
||||
#for each db, compare, pass db and query as parameter
|
||||
#firstly load database, then query with unique program
|
||||
#output format: in each dbms, time.log/ result.log/
|
||||
#use each dataset name as subfolder like lubm_10.nt/ in result.log/
|
||||
#and time.log/lubm_10.nt.log, and for each query corresponding
|
||||
#to a dataset: result.log/lubm_10.nt/q1.sql.log
|
||||
#Finally, in the directory where this script is placed in, also
|
||||
#build result.log/ and time.log/
|
||||
#result.log/lubm_10.nt.tsv time.log/lubm_10.nt.tsv size.log.tsv
|
||||
|
||||
#below is old:
|
||||
#time log should be used in excel, and compare result log:
|
||||
#diff or grep -vFf file1 file2
|
||||
#better to compare line by line using awk
|
||||
|
||||
log1=result.log/
|
||||
log2=time.log/
|
||||
log3=load.log/
|
||||
|
||||
#clean logs in home(this script)
|
||||
home=`pwd`
|
||||
if [ -d ${home}/garbage/ ] #! as not
|
||||
then
|
||||
rm -rf ${home}/garbage/
|
||||
fi
|
||||
mkdir ${home}/garbage/
|
||||
if [ -d ${home}/${log1} ]
|
||||
then
|
||||
rm -rf ${home}/${log1}
|
||||
fi
|
||||
mkdir ${home}/${log1}
|
||||
if [ -d ${home}/${log2} ]
|
||||
then
|
||||
rm -rf ${home}/${log2}
|
||||
fi
|
||||
mkdir ${home}/${log2}
|
||||
if [ -d ${home}/${log3} ]
|
||||
then
|
||||
rm -rf ${home}/${log3}
|
||||
fi
|
||||
mkdir ${home}/${log3}
|
||||
|
||||
#clean logs in each dbms
|
||||
function initial()
|
||||
{
|
||||
if [ -d $log1 ]
|
||||
then
|
||||
rm -rf $log1
|
||||
fi
|
||||
mkdir $log1
|
||||
if [ -d $log2 ]
|
||||
then
|
||||
rm -rf $log2
|
||||
fi
|
||||
mkdir $log2
|
||||
if [ -d $log3 ]
|
||||
then
|
||||
rm -rf $log3
|
||||
fi
|
||||
mkdir $log3
|
||||
}
|
||||
|
||||
#size.tsv:the size after loaded time.tsv:time used to load
|
||||
tsv3=${home}/${log3}time.tsv
|
||||
tsv4=${home}/${log3}size.tsv
|
||||
dsnum=0
|
||||
for i in `seq $length1`
|
||||
do
|
||||
i=`expr $i - 1`
|
||||
for tmpdb in `ls ${path}/${db[i]}/database/*.nt`
|
||||
do
|
||||
dsnum=`expr $dsnum + 1`
|
||||
if [ $dsnum -ne 1 ]
|
||||
then
|
||||
sleep 60 #for other processes
|
||||
echo 3 > /proc/sys/vm/drop_caches
|
||||
fi
|
||||
cntdb="${tmpdb##*/}"
|
||||
echo "$tmpdb" #in case of special characters like &
|
||||
tsv1=${home}/${log1}/${cntdb}.tsv #compare result
|
||||
tsv2=${home}/${log2}/${cntdb}.tsv #compare time
|
||||
echo $tsv1
|
||||
echo $tsv2
|
||||
#load this database into each dbms
|
||||
for j in `seq $length2`
|
||||
do
|
||||
j=`expr $j - 1`
|
||||
cd ${dbms_path[j]}
|
||||
#build logs structure
|
||||
echo "build logs structure!"
|
||||
if [ $dsnum -eq 1 ]
|
||||
then
|
||||
initial
|
||||
fi
|
||||
mkdir ${log1}/${cntdb} #pass the cntdb if using function
|
||||
#touch ${log2}/${cntdb}.log
|
||||
#if [ ${dbms_name[j]}x = ${gstore}x ] #add a x in case of empty
|
||||
if [ ${j} -eq 0 ] #otherwise will unary error
|
||||
then
|
||||
echo "this is for gstore!"
|
||||
./gload $cntdb $tmpdb > load.txt
|
||||
#awk '{if($1=="after" && $2=="build," && $3=="used"){split($4, a,"m");print "time:\t"a[1]}}' load.txt > load_${cntdb}.log
|
||||
awk '{if($1=="after" && $2=="build," && $3=="used"){split($4, a,"m");print "'$cntdb'""\t"a[1]}}' load.txt >> ${log3}/time.log
|
||||
#elif [ ${dbms[j]}x = ${virtuoso}x ]
|
||||
#elif [ ${dbms[j]}x = ${sesame}x ]
|
||||
#elif [ ${dbms[j]}x = ${jena}x ]
|
||||
elif [ ${j} -eq 1 ]
|
||||
then
|
||||
echo "this is for jena!"
|
||||
./tdbloader --loc "$cntdb" "$tmpdb" > load.txt 2>&1
|
||||
#awk '{if(NR==1){s=$1}else{t=$1}}END{split(s,a,":");split(t,b,":");ans=0+(b[1]-a[1])*3600+(b[2]-a[2])*60+(b[3]-a[3]);printf("%s\t%d\n", "time:", ans*1000);}' load.txt > load_${cntdb}.log
|
||||
awk '{if(NR==1){s=$1}else{t=$1}}END{split(s,a,":");split(t,b,":");ans=0+(b[1]-a[1])*3600+(b[2]-a[2])*60+(b[3]-a[3]);printf("%s\t%d\n", "'$cntdb'", ans*1000);}' load.txt >> ${log3}/time.log
|
||||
#cat load.txt >> "load_${cntdb}.log"
|
||||
elif [ ${j} -eq 2 ]
|
||||
then
|
||||
echo "this is for sesame!"
|
||||
#TODO
|
||||
elif [ ${j} -eq 3 ]
|
||||
then
|
||||
echo "this is for virtuoso!"
|
||||
#TODO
|
||||
fi
|
||||
#ls -l sums the actual size, unit is k
|
||||
echo "now to sum the database size!"
|
||||
#ls -lR "$cntdb" | awk 'BEGIN{sum=0}{if($1=="total"){sum=sum+$2}}END{print "size:\t"sum}' >> load_${cntdb}.log
|
||||
ls -lR "$cntdb" | awk 'BEGIN{sum=0}{if($1=="total"){sum=sum+$2}}END{print "'$cntdb'""\t"sum}' >> ${log3}/size.log
|
||||
|
||||
timelog=${log2}/${cntdb}.log
|
||||
touch $timelog
|
||||
for query in `ls ${path}/${db[i]}/query/*.sql`
|
||||
do
|
||||
echo $query
|
||||
#build logs structure
|
||||
anslog=${log1}/${cntdb}/${query##*/}.log
|
||||
touch $anslog #needed because the result maybe empty
|
||||
if [ ${j} -eq 0 ] #add a x in case of empty
|
||||
then
|
||||
echo "this is for gstore!"
|
||||
./gquery "$cntdb" $query > ans.txt
|
||||
awk -F ':' 'BEGIN{query="'$query'"}{if($1=="Total time used"){split($2, a, "m");split(a[1],b," ");}}END{print query"\t"b[1]}' ans.txt >> $timelog
|
||||
#grep "Total time used:" ans.txt | grep -o "[0-9]*ms" >> ${log2}/${cntdb}.log
|
||||
awk -F ':' 'BEGIN{flag=0}{if(flag==1 && $0 ~/^$/){flag=2}if(flag==1 && !($0 ~/[empty result]/)){print $0}if($1=="final result is "){flag=1}}' ans.txt > $anslog
|
||||
#awk 'BEGIN{flag=0}{if(flag==1){print $0}if($1 ~/^final$/){flag=1}}' ans.txt > ${log1}/${cntdb}/${query}.log
|
||||
elif [ ${j} -eq 1 ]
|
||||
then
|
||||
echo "this is for jena!"
|
||||
#NOTICE: for program output in stderr(no cache), deal like this
|
||||
./tdbquery --repeat 2,1 --time --results TSV --loc "$cntdb" --query $query > ans.txt 2>&1
|
||||
#NOTICE: redirect in awk, and jena
|
||||
#use old var to remove duplicates
|
||||
awk 'BEGIN{old=""}{if(NR>1){if($1 ~/Time:/ && $3 ~/sec/){time=$2*1000;print "'$query'""\t"time >> "'$timelog'"}else if(!($0 ~/^$/) && $0 != old){print $0 >> "'$anslog'";old=$0}}}' ans.txt
|
||||
elif [ ${j} -eq 2 ]
|
||||
then
|
||||
echo "this is for sesame!"
|
||||
#TODO
|
||||
elif [ ${j} -eq 3 ]
|
||||
then
|
||||
echo "this is for virtuoso!"
|
||||
#TODO
|
||||
fi
|
||||
#sort according to the path order
|
||||
echo "now to sort anslog!"
|
||||
mv $anslog ${anslog}.bak
|
||||
#use own defined select-sort function
|
||||
#this function can also delete duplicates
|
||||
#BETTER: must use external-sorting when too large
|
||||
# awk -F '\t' '{for(i=1;i<=NF;++i)arr[NR]=$0}
|
||||
# END{
|
||||
# nr=sortArr(arr,NR,NF);
|
||||
# for(i=1;i<=nr;++i){print arr[i]}}
|
||||
# function sortArr(arr, nr, nf){
|
||||
# for(p=1;p<nr;++p){
|
||||
# min=p;
|
||||
# for(q=p+1;q<=nr;++q){
|
||||
# ret=less(arr[q],arr[min],nf);
|
||||
# if(ret==1){min=q}
|
||||
# else if(ret==0){swap(arr,q,nr);nr=nr-1;q=q-1}}
|
||||
# if(min!=p){swap(arr,p,min)}}
|
||||
# return nr}
|
||||
# function swap(arr,t1,t2){t=arr[t1];arr[t1]=arr[t2];arr[t2]=t}
|
||||
# function less(t1,t2,nf){
|
||||
# split(t1,s1,"\t");
|
||||
# split(t2,s2,"\t");
|
||||
# for(k=1;k<=nf;++k){
|
||||
# print s1[k]"\t"s2[k]
|
||||
# if(s1[k]<s2[k]){ return 1 }
|
||||
# else if(s1[k]>s2[k]) { return 2 }
|
||||
# else { continue; } }
|
||||
# return 0 }' ${anslog}.bak > ${anslog}
|
||||
#-k1n -k2r ...
|
||||
sort -t $'\t' -u ${anslog}.bak > ${anslog}
|
||||
rm -f ${anslog}.bak
|
||||
done
|
||||
echo "now to sort timelog!"
|
||||
mv $timelog ${timelog}.bak
|
||||
awk -F '\t' '{print $1"\t"$2 | "sort -k1"}' ${timelog}.bak > ${timelog}
|
||||
rm -f ${timelog}.bak
|
||||
#remove the db when finished
|
||||
echo "now to remove the cntdb!"
|
||||
rm -rf "$cntdb"
|
||||
#BETTER:remove *.txt in each dbms path
|
||||
#rm -f *.txt
|
||||
#compare time and construct the TSV table
|
||||
if [ ${j} -eq 0 ]
|
||||
then
|
||||
echo "this is the first dbms!"
|
||||
awk -F '\t' 'BEGIN{
|
||||
print "Time\t""'${dbms_name[j]}'" }
|
||||
{ num=split($1,str,"/"); print str[num]"\t"$2 }' ${timelog} > ${tsv2}
|
||||
else
|
||||
echo "this is not the first dbms!"
|
||||
mv ${tsv2} ${tsv2}.bak
|
||||
awk -F '\t' '{
|
||||
if(NR==FNR) {
|
||||
num=split($1,str,"/");
|
||||
map[str[num]]=$2 }
|
||||
else {
|
||||
if(FNR==1) { print $0"\t""'${dbms_name[j]}'" }
|
||||
else { print $0"\t"map[$1] }
|
||||
}}' ${timelog} ${tsv2}.bak > ${tsv2}
|
||||
rm -f ${tsv2}.bak
|
||||
fi
|
||||
done
|
||||
#compare the result and construct the TSV table
|
||||
echo "now to compare the results!"
|
||||
cd ${home}
|
||||
tvar1=`expr $length2 - 1`
|
||||
tvar2=`expr $length2 - 2`
|
||||
for p in `seq 0 $tvar2`
|
||||
do
|
||||
tvar3=`expr $p + 1`
|
||||
for q in `seq $tvar3 $tvar1`
|
||||
do
|
||||
echo $p,$q
|
||||
>compare.txt
|
||||
for query in `ls ${path}/${db[i]}/query/*.sql`
|
||||
do
|
||||
echo "compare: " $query
|
||||
tmplog=${log1}/${cntdb}/${query##*/}.log
|
||||
awk -F '\t' 'BEGIN{flag=0}{
|
||||
if(NR==FNR){map[NR]=$0}
|
||||
else if(flag==0){
|
||||
num=split(map[FNR],str1,"\t");
|
||||
split($0,str2,"\t");
|
||||
for(i=1;i<=num;++i){
|
||||
if(str1[i]!=str2[i]){
|
||||
flag=1;break}}}}
|
||||
END{
|
||||
if(flag==0){print "'${query##*/}'""\tY"}
|
||||
else{print "'${query##*/}'""\tN"}}' ${dbms_path[p]}/${tmplog} ${dbms_path[q]}/${tmplog} >> compare.txt
|
||||
#diff ${dbms_path[p]}/${tmplog} ${dbms_path[q]}/${tmplog}
|
||||
#if [ $? -ne 0 ]
|
||||
#then
|
||||
# echo -e ${query##*/}"\tN" >> compare.txt
|
||||
#else
|
||||
# echo -e ${query##*/}"\tY" >> compare.txt
|
||||
#fi
|
||||
done
|
||||
echo "all queries done!"
|
||||
name=${dbms_name[p]}_${dbms_name[q]}
|
||||
if [ $p -eq 0 ] && [ $q -eq 1 ]
|
||||
then
|
||||
awk -F '\t' 'BEGIN{print "Result\t""'$name'"}{print $0}' compare.txt > ${tsv1}
|
||||
else
|
||||
mv ${tsv1} ${tsv1}.bak
|
||||
awk -F '\t' '{
|
||||
if(NR==FNR) { map[$1]=$2 }
|
||||
else {
|
||||
if(FNR==1) { print $0"\t""'$name'" }
|
||||
else { print $0"\t"map[$1] }
|
||||
}}' compare.txt ${tsv1}.bak > ${tsv1}
|
||||
rm -f ${tsv1}.bak
|
||||
fi
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
|
||||
#build the load.log/ in home(this script)
|
||||
echo "now to build the load.log!"
|
||||
for j in `seq $length2`
|
||||
do
|
||||
j=`expr $j - 1`
|
||||
cd ${dbms_path[j]}
|
||||
if [ $j -eq 0 ]
|
||||
then
|
||||
echo "this is the first dbms!"
|
||||
awk -F '\t' 'BEGIN{print "dataset\\dbms\t""'${dbms_name[j]}'"}{print $0}' ${log3}/time.log > $tsv3
|
||||
awk -F '\t' 'BEGIN{print "dataset\\dbms\t""'${dbms_name[j]}'"}{print $0}' ${log3}/size.log > $tsv4
|
||||
else
|
||||
echo "this is not the first dbms!"
|
||||
mv ${tsv3} ${tsv3}.bak
|
||||
awk -F '\t' '{
|
||||
if(NR==FNR) { map[$1]=$2 }
|
||||
else {
|
||||
if(FNR==1) { print $0"\t""'${dbms_name[j]}'" }
|
||||
else { print $0"\t"map[$1] }
|
||||
}}' ${log3}/time.log ${tsv3}.bak > ${tsv3}
|
||||
rm -f ${tsv3}.bak
|
||||
mv ${tsv4} ${tsv4}.bak
|
||||
awk -F '\t' '{
|
||||
if(NR==FNR) { map[$1]=$2 }
|
||||
else {
|
||||
if(FNR==1) { print $0"\t""'${dbms_name[j]}'" }
|
||||
else { print $0"\t"map[$1] }
|
||||
}}' ${log3}/time.log ${tsv4}.bak > ${tsv4}
|
||||
rm -f ${tsv4}.bak
|
||||
fi
|
||||
done
|
||||
|
||||
echo "this is the end of full test!"
|
||||
echo "please visit the result.log/, time.log/ and load.log/"
|
||||
echo "you can use excel to load the .tsv files"
|
||||
|
|
@ -0,0 +1,332 @@
|
|||
/*=============================================================================
|
||||
# Filename: gtest.cpp
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-09-02 00:04
|
||||
# Description: load index once and query, there are several ways to use this program:
|
||||
1. ./gtest test all datasets and corresponding queries
|
||||
2. ./gtest --help simplified as -h, will print the help message
|
||||
3. ./gtest -f DS_PATH load/test a specified dataset, with all corresponding queries
|
||||
4. ./gtest -d FD_PATH load/test a dataset folder(like WatDiv/), with all corresponding queries
|
||||
5. ./gtest -q DB_PATH q1 q2... test a loaded database with given queries(no limit to db and query)
|
||||
=============================================================================*/
|
||||
|
||||
//#include <stdio.h>
|
||||
//#include <string.h>
|
||||
//#include <stdlib.h>
|
||||
//#include <dirent.h>
|
||||
//#include <unistd.h>
|
||||
#include "../Database/Database.h"
|
||||
#include "../Util/Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
#define NUM 4
|
||||
|
||||
char line1[] = "--------------------------------------------------";
|
||||
char line2[] = "##################################################";
|
||||
string path = "/media/wip/common/data/";
|
||||
string db[NUM] = {"WatDiv/", "LUBM/", "DBpedia/", "Yago/"};
|
||||
|
||||
//not achieved, so do not use
|
||||
void
|
||||
build_logs()
|
||||
{
|
||||
system("rm -rf result.log/");
|
||||
system("rm -rf time.log/");
|
||||
system("mkdir result.log");
|
||||
system("mkdir time.log");
|
||||
int i;
|
||||
string cmd;
|
||||
for(i = 0; i < NUM; ++i)
|
||||
{
|
||||
cmd = string("mkdir result.log/\"") + db[i] + "\""; //in case of special characters like &
|
||||
system(cmd.c_str());
|
||||
cmd = string("mkdir time.log/\"") + db[i] + "\"";
|
||||
system(cmd.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
bool //if satisfy suffix; case sensitive
|
||||
judge(const char* p, const char* p0) //both not empty
|
||||
{
|
||||
int len = strlen(p), len0 = strlen(p0);
|
||||
while(len0 > 0 && len > 0)
|
||||
{
|
||||
len--;
|
||||
len0--;
|
||||
if(p[len] != p0[len0])
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
string
|
||||
getQueryFromFile(const char* path)
|
||||
{
|
||||
char buf[10000];
|
||||
ifstream fin(path);
|
||||
if(!fin)
|
||||
{
|
||||
printf("can not open %s\n", path);
|
||||
return "";
|
||||
}
|
||||
memset(buf, 0, sizeof(buf));
|
||||
stringstream ss;
|
||||
while(!fin.eof())
|
||||
{
|
||||
fin.getline(buf, 9999);
|
||||
ss << buf << "\n";
|
||||
}
|
||||
fin.close();
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
void
|
||||
help()
|
||||
{
|
||||
printf("\
|
||||
/*=============================================================================\n\
|
||||
# Filename: gtest.cpp\n\
|
||||
# Author: syzz\n\
|
||||
# Mail: 1181955272@qq.com\n\
|
||||
# Description: load index once and query, there are several ways to use this program:\n\
|
||||
1. ./gtest test all datasets and corresponding queries\n\
|
||||
2. ./gtest --help simplified as -h, will print the help message\n\
|
||||
3. ./gtest -f DS_PATH load/test a specified dataset, with all corresponding queries\n\
|
||||
4. ./gtest -d FD_PATH load/test a dataset folder(like WatDiv/), with all corresponding queries\n\
|
||||
5. ./gtest -q DB_PATH q1 q2... test a loaded database with given queries(no limit to db and query)\n\
|
||||
=============================================================================*/\n\
|
||||
");
|
||||
}
|
||||
|
||||
FILE *fp0 = NULL, *fp1 = NULL, *fp2 = NULL;
|
||||
|
||||
void //single dataset, single query
|
||||
runSS(Database* _db, string qf) //given a query file
|
||||
{
|
||||
string query = getQueryFromFile(qf.c_str());
|
||||
if(query.empty())
|
||||
{
|
||||
fprintf(fp0, "this query is empty:\n%s\n", qf.c_str());
|
||||
fflush(fp0);
|
||||
return;
|
||||
//continue;
|
||||
}
|
||||
fprintf(fp0, "%s\n", qf.c_str());
|
||||
fflush(fp0);
|
||||
//getchar();getchar();
|
||||
ResultSet _rs;
|
||||
long tv_begin, tv_final;
|
||||
tv_begin = Util::get_cur_time();
|
||||
//fprintf(fp0, "begin to query!\n");
|
||||
//fflush(fp0);
|
||||
_db->query(query, _rs, stdout);
|
||||
//fprintf(fp0, "finish this query!\n");
|
||||
tv_final = Util::get_cur_time();
|
||||
//print to logs
|
||||
fprintf(fp1, "%s\n%ld\n%s\n", qf.c_str(), tv_final - tv_begin, line1);//time is standarded as ms
|
||||
fprintf(fp2, "%s\n%s%s\n", qf.c_str(), _rs.to_str().c_str(), line1);
|
||||
fflush(fp0);
|
||||
fflush(fp1);
|
||||
fflush(fp2);
|
||||
}
|
||||
|
||||
void //single dataset, multi queries
|
||||
runSM(Database* _db, string qd) //given a query directory
|
||||
{
|
||||
DIR* dp2;
|
||||
struct dirent* p2;
|
||||
dp2 = opendir(qd.c_str());
|
||||
//finish all queries
|
||||
while((p2 = readdir(dp2)) != NULL)
|
||||
{
|
||||
if(!judge(p2->d_name, ".sql"))
|
||||
continue;
|
||||
string file = qd + string(p2->d_name);
|
||||
runSS(_db, file);
|
||||
}
|
||||
fprintf(fp0, "finish all queries!\n");
|
||||
fflush(fp0);
|
||||
closedir(dp2);
|
||||
fprintf(fp0, "ok to close dp2!\n");
|
||||
}
|
||||
|
||||
void //multi datasets, multi queries
|
||||
runMM(string df) //given dataset folder, end with /
|
||||
{
|
||||
DIR *dp1;
|
||||
struct dirent *p1;
|
||||
string s1, s2, cmd;
|
||||
Database* _db;
|
||||
s1 = df + string("database/");
|
||||
s2 = df + string("query/");
|
||||
dp1 = opendir(s1.c_str());
|
||||
while((p1 = readdir(dp1)) != NULL)
|
||||
{
|
||||
if(!judge(p1->d_name, ".nt"))
|
||||
continue;
|
||||
system("echo 3 > /proc/sys/vm/drop_caches");
|
||||
//build the database index first
|
||||
string db_folder = s1 + string(p1->d_name);
|
||||
fprintf(fp0, "%s\n", db_folder.c_str());
|
||||
cmd = string("./gload \"") + string(p1->d_name) + "\" \"" + db_folder + "\" > \"gload_" + string(p1->d_name) + ".log\""; //NOTICE:\" is used to ensure file name with &(etc...) will work ok!
|
||||
fprintf(fp0, "%s\n", cmd.c_str());
|
||||
system(cmd.c_str());
|
||||
fprintf(fp0, "ok to load database!\n");
|
||||
//getchar();getchar();
|
||||
//then load the index
|
||||
_db = new Database(string(p1->d_name));
|
||||
_db->load();
|
||||
fprintf(fp0, "ok to load index!\n");
|
||||
//print to logs
|
||||
fprintf(fp1, "%s\n%s\n", p1->d_name, line1);
|
||||
fprintf(fp2, "%s\n%s\n", p1->d_name, line1);
|
||||
|
||||
runSM(_db, s2);
|
||||
|
||||
fflush(fp0);
|
||||
fprintf(fp1, "%s\n", line2);
|
||||
fprintf(fp2, "%s\n", line2);
|
||||
//NOTICE:may double free due to ~Database
|
||||
//to remove one when finished, should clean firstly like ~Database()
|
||||
delete _db;//_db->release(fp0);
|
||||
cmd = string("rm -rf \"") + string(p1->d_name) + string("\"");
|
||||
system(cmd.c_str());
|
||||
fprintf(fp0, "ok to remove %s\n", p1->d_name);
|
||||
fflush(fp0);
|
||||
fflush(fp1);
|
||||
fflush(fp2);
|
||||
}
|
||||
closedir(dp1);
|
||||
}
|
||||
|
||||
void
|
||||
clean()
|
||||
{
|
||||
//it is ok to check like this here!
|
||||
//in other situations, notice that fp is not null after closed!
|
||||
if(fp0 != NULL)
|
||||
{
|
||||
fclose(fp0);
|
||||
fp0 = NULL;
|
||||
}
|
||||
if(fp1 != NULL)
|
||||
{
|
||||
fclose(fp1);
|
||||
fp1 = NULL;
|
||||
}
|
||||
if(fp2 != NULL)
|
||||
{
|
||||
fclose(fp2);
|
||||
fp2 = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
EXIT(int ret)
|
||||
{
|
||||
clean();
|
||||
exit(ret);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, const char* argv[])
|
||||
{
|
||||
#ifdef DEBUG
|
||||
Util util;
|
||||
#endif
|
||||
//BETTER: enable user to assign the files for log
|
||||
int i, j;
|
||||
fp0 = fopen("run.log", "w+");
|
||||
if(fp0 == NULL)
|
||||
{
|
||||
printf("open error!\n");
|
||||
EXIT(1);
|
||||
}
|
||||
//build_logs();
|
||||
fp1 = fopen("time.log", "w+");
|
||||
if(fp1 == NULL)
|
||||
{
|
||||
printf("open error!\n");
|
||||
EXIT(1);
|
||||
}
|
||||
fp2 = fopen("result.log", "w+");
|
||||
if(fp2 == NULL)
|
||||
{
|
||||
printf("open error!\n");
|
||||
EXIT(1);
|
||||
}
|
||||
|
||||
switch(argc)
|
||||
{
|
||||
case 1:
|
||||
help();
|
||||
printf("this command will do a complete test for Gstore, do you want to continue?YN\n");
|
||||
char c;
|
||||
i = 0;
|
||||
while(scanf("%c", &c))
|
||||
{
|
||||
if(c == 'Y' || c == 'y')
|
||||
{
|
||||
for(j = 0; j < 4; ++j)
|
||||
{
|
||||
runMM(path+db[j]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
else if(c == 'N' || c == 'n')
|
||||
EXIT(0);
|
||||
i++;
|
||||
if(i > 10) //try no more than 10 times
|
||||
EXIT(1);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if(strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0)
|
||||
{
|
||||
help();
|
||||
EXIT(0);
|
||||
}
|
||||
else
|
||||
EXIT(1);
|
||||
case 3:
|
||||
if(strcmp(argv[1], "-f") == 0) //this file must be in db[]/database/, end with ".nt"
|
||||
{
|
||||
//TODO:use judge to check if is .nt file
|
||||
int k = strlen(argv[2]) - 1;
|
||||
string db = string(argv[2]);
|
||||
while(k >= 0)
|
||||
{
|
||||
if(db[k] == '/')
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
string s = db.substr(0, k+1) + "../query/";
|
||||
//DIR* dp = opendir(s.c_str());
|
||||
//TODO: gload the dataset and build a new function!
|
||||
printf("not achieved!\n");
|
||||
}
|
||||
else if(strcmp(argv[1], "-d") == 0)
|
||||
{
|
||||
runMM(string(argv[2]) + "/");
|
||||
}
|
||||
else
|
||||
EXIT(1);
|
||||
break;
|
||||
default: //> 3
|
||||
if(strcmp(argv[1], "-q") == 0)
|
||||
{
|
||||
//TODO: user must gload first?
|
||||
printf("not achieved!\n");
|
||||
}
|
||||
else
|
||||
EXIT(1);
|
||||
break;
|
||||
}
|
||||
|
||||
clean();
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
/*=============================================================================
|
||||
# Filename: temp.c
|
||||
# Author: syzz
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-05-12 15:40
|
||||
# Description:
|
||||
=============================================================================*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int
|
||||
main(int argc, char* argv[])
|
||||
{
|
||||
int i = 0;
|
||||
char c = 'A';
|
||||
FILE* fp = fopen("a.dat", "w+b");
|
||||
fwrite(&c, sizeof(char), 1, fp);
|
||||
//when moving to tail, always return 0(point to tail whne exceeding)
|
||||
//when moving to head, -1 when exceeding(point to original)
|
||||
fseek(fp, 4096 * 10, SEEK_SET);
|
||||
//file hole, diffrent across platforms
|
||||
//mv just change index, so not vary, cp may vary(if transfer data, not neglect hole)
|
||||
//compress/uncompress will change the size with du -h
|
||||
//du and ll is different: one for block usage(also neglect hole),
|
||||
//one for actual size(not neglect hole)
|
||||
//what's more, even after cp or compress/uncompress, there may also be holes.
|
||||
fwrite(&i, sizeof(int), 1, fp);
|
||||
fclose(fp);
|
||||
/*
|
||||
FILE* fp = fopen("data.dat", "r+");
|
||||
char c;
|
||||
while((c = fgetc(fp)) != EOF)
|
||||
printf("%c", c);
|
||||
fclose(fp);
|
||||
printf("\n");
|
||||
*/
|
||||
/*
|
||||
printf("%c%c%c", 0xE6, 0x88, 0x91);
|
||||
printf("%c%c%c", 0xE7, 0x88, 0xB1);
|
||||
printf("%c%c%c", 0xE5, 0x90, 0xB4);
|
||||
printf("%c%c%c", 0xE5, 0x98, 0x89);
|
||||
printf("%c%c%c\n", 0xE5, 0x8D, 0xBF);
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
* insert_test.cpp
|
||||
*
|
||||
* Created on: 2014-12-03
|
||||
* Author: Caesar11
|
||||
*/
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Database/Database.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char * argv[])
|
||||
{
|
||||
#ifdef DEBUG
|
||||
Util util;
|
||||
#endif
|
||||
string db_folder = string(argv[1]);
|
||||
string insert_rdf_file = string(argv[2]);
|
||||
Database _db(db_folder);
|
||||
|
||||
_db.insert(insert_rdf_file);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* join_test.cpp
|
||||
*
|
||||
* Created on: 2014-7-30
|
||||
* Author: liyouhuan
|
||||
*/
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Database/Database.h"
|
||||
|
||||
using namespace std;
|
||||
int main()
|
||||
{
|
||||
#ifdef DEBUG
|
||||
Util util;
|
||||
#endif
|
||||
cout << "build signature" << endl;
|
||||
string _yago2_triple = "/media/wip/common/data/yago2_triple";
|
||||
string _yago = "/media/wip/common/data/yago_triple";
|
||||
string _yago_1000000 = "/media/wip/common/data/yago_1000000";
|
||||
string _yago_10000 = "/media/wip/common/data/yago_10000";
|
||||
string _yago_example = "/media/wip/common/data/yago_example";
|
||||
string _rdf = _yago_10000;
|
||||
Database _db("db_test");
|
||||
Util::log("RDF:"+_rdf);
|
||||
_db.build(_rdf);
|
||||
_db.test();
|
||||
Util::log("finish build");
|
||||
_db.test_join();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
find . -type f -print | grep -E "\.(c(pp)?|h)$" | xargs wc -l
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
/*=============================================================================
|
||||
# Filename: testParser.cpp
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-24 19:57
|
||||
# Description:
|
||||
=============================================================================*/
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Parser/DBparser.h"
|
||||
#include "../Query/SPARQLquery.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
#ifdef DEBUG
|
||||
Util util;
|
||||
#endif
|
||||
std::cout<<"start"<<std::endl;
|
||||
DBparser db_parser;
|
||||
SPARQLquery sparql_query;
|
||||
std::string sparql("");
|
||||
std::string s=std::string("PREFIX dc10: <http://purl.org/dc/elements/1.0/> ")+
|
||||
"PREFIX dc11: <http://purl.org/dc/elements/1.1/> "+
|
||||
"SELECT ?title "+
|
||||
"WHERE { { ?book dc10:title ?title } "+
|
||||
"UNION { ?book dc11:title ?title } }";
|
||||
std::string s2=std::string("PREFIX foaf: <http://xmlns.com/foaf/0.1/> ")+
|
||||
"SELECT ?x1 ?x4 ?x5 WHERE {{?x1 y:hasGivenName ?x5."+
|
||||
"?x1 y:hasFamilyName ?x6."+
|
||||
"?x1 rdf:type <wordnet_scientist_110560637>."+
|
||||
"?x1 y:bornIn ?x2."+
|
||||
"?x1 y:hasAcademicAdvisor ?x4."+
|
||||
"?x2 y:locatedIn <Switzerland>."+
|
||||
"?x3 y:locatedIn <Germany>."+
|
||||
"?x4 y:bornIn ?x3.}"+
|
||||
"UNION { ?book dc11:title ?title}"+
|
||||
"} order by ?x5";
|
||||
std::cout<<"string: "<<s2<<std::endl;
|
||||
db_parser.sparqlParser(s2,sparql_query);
|
||||
sparql_query.print(cout);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
/*=============================================================================
|
||||
# Filename: vstree_test.cpp
|
||||
# Author: Bookug Lobert
|
||||
# Mail: 1181955272@qq.com
|
||||
# Last Modified: 2015-10-24 19:57
|
||||
# Description:
|
||||
=============================================================================*/
|
||||
|
||||
#include "../Util/Util.h"
|
||||
#include "../Database/Database.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main()
|
||||
{
|
||||
#ifdef DEBUG
|
||||
Util util;
|
||||
#endif
|
||||
cout << "vstree_test" << endl;
|
||||
string _yago_example = "/media/wip/common/data/yago_example";
|
||||
string _rdf = _yago_example;
|
||||
Database _db("db_test");
|
||||
_db.build(_rdf);
|
||||
Util::log("finish build");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue