1. use the new encode function encodeRDF_new() to replace the old one;

2. implement the inserting triple parts of the triple store updating feature;
3. fix some bugs in LRUCache.cpp and CBteeFunc.cpp.

author: hanshuo
This commit is contained in:
Caesar11 2015-02-02 02:53:47 -05:00
parent 6b5bf3fea5
commit 3cdffdc7b2
26 changed files with 2333 additions and 381 deletions

File diff suppressed because it is too large Load Diff

View File

@ -20,6 +20,7 @@ using namespace std;
#include "../KVstore/KVstore.h"
#include "../VSTree/VSTree.h"
#include "../Parser/DBparser.h"
#include "../Parser/RDFParser.h"
#include "../util/util.h"
#include<stdio.h>
#include<sys/time.h>
@ -67,9 +68,10 @@ public:
* 3. if subject exist, update SigEntry, and update spo, ops... etc. if needed
* 4.
* */
bool insert(const Triple& _triple);
bool remove(const Triple& _triple);
bool build(const string& _rdf_file);
bool insert(const string& _insert_rdf_file);
bool remove(const string& _rdf_file);
bool build(const string& _rdf_file);
/* name of this DB*/
string getName();
@ -79,6 +81,9 @@ public:
/* root Path of this DB + signatureBFile */
string getSignatureBFile();
/* root Path of this DB + DBInfoFile */
string getDBInfoFile();
private:
string name;
bool is_active;
@ -88,25 +93,31 @@ private:
int pre_num;
int literal_num;
string rdf_prefix;
int encode_mode;
VSTree* vstree;
KVstore* kvstore;
/* metadata of this database: sub_num, pre_num, obj_num, literal_num, etc. */
string db_info_file;
/* six tuples: <sub pre obj sid pid oid> */
string six_tuples_file;
/* B means binary */
string signature_binary_file;
bool saveDBInfoFile();
bool loadDBInfoFile();
string getStorePath();
/* encode relative signature data of all Basic Graph Query, who union together into SPARQLquery */
void buildSparqlSignature(SPARQLquery & _sparql_q);
/* encode Triple into EntityBitSet */
bool encodeTriple2EntityBitSet(EntityBitSet& _bitset, const Triple* _p_triple);
/* encode Triple into Subject EntityBitSet */
bool encodeTriple2SubEntityBitSet(EntityBitSet& _bitset, const Triple* _p_triple);
/* encode Triple into Object EntityBitSet */
bool encodeTriple2ObjEntityBitSet(EntityBitSet& _bitset, const Triple* _p_triple);
bool calculateEntityBitSet(int _sub_id, EntityBitSet & _bitset);
@ -124,10 +135,17 @@ private:
* 3. build: subID2objIDlist, <subIDpreID>2objIDlist subID2<preIDobjID>list
* 4. build: objID2subIDlist, <objIDpreID>2subIDlist objID2<preIDsubID>list
* */
//encodeRDF_new invoke new rdfParser to solve task 1 & 2 in one time scan.
bool encodeRDF(const string _rdf_file);
bool encodeRDF_new(const string _rdf_file);
int insertTriple(const TripleWithObjType& _triple);
bool removeTriple(const TripleWithObjType& _triple);
bool sub2id_pre2id_obj2id_RDFintoSignature(const string _rdf_file, int**& _p_id_tuples, int & _id_tuples_max);
bool sub2id_pre2id(const string _rdf_file, int**& _p_id_tuples, int & _id_tuples_max);
bool literal2id_RDFintoSignature(const string _rdf_file, int** _p_id_tuples, int _id_tuples_max);
bool s2o_sp2o_s2po(int** _p_id_tuples, int _id_tuples_max);
bool o2s_op2s_o2ps(int** _p_id_tuples, int _id_tuples_max);
static int _spo_cmp(const void* _a, const void* _b);

View File

@ -493,9 +493,14 @@ bool BPlusTree::Insert(const mleafdata & _leafdata)
int _ikey = _pOldLeaf ->iExist(data);
if(_ikey > 0)
{
_pOldLeaf ->dupInsert(_leafdata, _ikey);
return false;
if(! _pOldLeaf ->getModify())
{
DelDisk(mfp, _pOldLeaf ->getAddrFB(), mblockQueue);
}
// set modified in dupInsert
return _pOldLeaf ->dupInsert(_leafdata, _ikey);
}
{
this ->insert_count ++;
}
@ -505,7 +510,7 @@ bool BPlusTree::Insert(const mleafdata & _leafdata)
{
DelDisk(mfp, _pOldLeaf ->getAddrFB(), mblockQueue);
}
// setmodified in insert
// set modified in insert
return _pOldLeaf ->Insert(_leafdata);
}
else
@ -517,7 +522,7 @@ bool BPlusTree::Insert(const mleafdata & _leafdata)
_pNewLeaf ->setAddrFB(_addr_newleaf);
// _key_tmp 也就将是_pnewleaf的第一个元素的key
// setmodified in split;
// set modified in split;
if(! _pOldLeaf ->getModify())
{
DelDisk(mfp, _pOldLeaf ->getAddrFB(), mblockQueue);
@ -577,6 +582,7 @@ bool mLeafNode :: dupInsert(const mleafdata & _mleafdata, int _index_insert)
mleafdata & _tmp_leafdata = (this ->getElement(_index_insert));
_tmp_leafdata = _mleafdata;
this ->setModify();
return true;
}
@ -993,16 +999,16 @@ void BPlusTree :: StoreTree()
mNode * pNode;
int _i_ord = 0;
bool any = false;
//cout << "in store " << endl;
while(!pQueue.empty())
{
pNode = pQueue.front();
pQueue.pop();
// 内有unmodify
WriteNode(pNode, fp, this ->mblockQueue);
if(pNode->getModify()){
any = true;
}
WriteNode(pNode, fp, this ->mblockQueue);
_i_ord ++;
if(_i_ord % 1600 == 0)
cout << " - _i_ord= " << _i_ord;

View File

@ -5,6 +5,7 @@
* Author: liyouhuan
*/
#include"KVstore.h"
#include"../Database/Database.h"
/* public methods: */
int KVstore::getEntityDegree(int _entity_id)
@ -35,101 +36,163 @@ int KVstore::getEntityOutDegree(int _entity_id)
* 2. remove triple
* before call this function, we were sure that this triple did not exist
*/
void KVstore::updateTupleslist_insert(int _sub_id, int _pre_id, int _obj_id)
int KVstore::updateTupleslist_insert(int _sub_id, int _pre_id, int _obj_id)
{
/* update sp2o */
{
int* _sp2olist = NULL;
int _sp2o_len = 0;
this->getobjIDlistBysubIDpreID(_sub_id, _pre_id, _sp2olist, _sp2o_len);
/* if no duplication, _insert will be true
* this->setXXX function will override the previous value */
bool _insert = this->insert_x(_sp2olist, _sp2o_len, _obj_id);
if(_insert){
this->setobjIDlistBysubIDpreID(_sub_id, _pre_id, _sp2olist, _sp2o_len);
}
}
//debug
// {
// stringstream _ss;
// _ss << "updateTupleslist_insert: " << _sub_id << " " << _pre_id << " " << _obj_id << endl;
// Database::log(_ss.str());
// }
/* update op2s */
{
int* _op2slist = NULL;
int _op2s_len = 0;
this->getsubIDlistByobjIDpreID(_obj_id, _pre_id, _op2slist, _op2s_len);
/* if no duplication, _insert will be true
* this->setXXX function will override the previous value */
bool _insert = this->insert_x(_op2slist, _op2s_len, _sub_id);
if(_insert){
this->setsubIDlistByobjIDpreID(_obj_id, _pre_id, _op2slist, _op2s_len);
}
}
//debug
int updateListLen = 0;
/* update s2po */
{
int* _s2polist = NULL;
int _s2po_len = 0;
this->getpreIDobjIDlistBysubID(_sub_id, _s2polist, _s2po_len);
/* if no duplication, _insert will be true
* this->setXXX function will override the previous value */
bool _insert = this->insert_xy(_s2polist, _s2po_len, _pre_id, _obj_id);
if(_insert){
this->setpreIDobjIDlistBysubID(_sub_id, _s2polist, _s2po_len);
}
}
/* update sp2o */
{
int* _sp2olist = NULL;
int _sp2o_len = 0;
this->getobjIDlistBysubIDpreID(_sub_id, _pre_id, _sp2olist, _sp2o_len);
/* update o2ps */
{
int* _o2pslist = NULL;
int _o2ps_len = 0;
this->getpreIDsubIDlistByobjID(_obj_id, _o2pslist, _o2ps_len);
/* if no duplication, _insert will be true
* this->setXXX function will override the previous value */
bool _insert = this->insert_xy(_o2pslist, _o2ps_len, _pre_id, _sub_id);
if(_insert){
this->setpreIDsubIDlistByobjID(_obj_id, _o2pslist, _o2ps_len);
}
}
/* if no duplication, _insert will be true
* this->setXXX function will override the previous value */
bool _insert = this->insert_x(_sp2olist, _sp2o_len, _obj_id);
if(_insert){
this->setobjIDlistBysubIDpreID(_sub_id, _pre_id, _sp2olist, _sp2o_len);
}
/* update s2o */
{
int* _s2olist = NULL;
int _s2o_len = 0;
this->getobjIDlistBysubID(_sub_id, _s2olist, _s2o_len);
/* if no duplication, _insert will be true
* this->setXXX function will override the previous value */
bool _insert = this->insert_x(_s2olist, _s2o_len, _obj_id);
if(_insert){
this->setobjIDlistBysubID(_sub_id, _s2olist, _s2o_len);
}
}
updateListLen += _sp2o_len;
/* update o2s */
{
int* _o2slist = NULL;
int _o2s_len = 0;
this->getsubIDlistByobjID(_obj_id, _o2slist, _o2s_len);
/* if no duplication, _insert will be true
* this->setXXX function will override the previous value */
bool _insert = this->insert_x(_o2slist, _o2s_len, _sub_id);
if(_insert){
this->setsubIDlistByobjID(_obj_id, _o2slist, _o2s_len);
}
}
delete[] _sp2olist;
_sp2olist = NULL;
_sp2o_len = 0;
}
//debug
// Database::log("update sp2o done.");
/* update op2s */
{
int* _op2slist = NULL;
int _op2s_len = 0;
this->getsubIDlistByobjIDpreID(_obj_id, _pre_id, _op2slist, _op2s_len);
/* if no duplication, _insert will be true
* this->setXXX function will override the previous value */
bool _insert = this->insert_x(_op2slist, _op2s_len, _sub_id);
if(_insert){
this->setsubIDlistByobjIDpreID(_obj_id, _pre_id, _op2slist, _op2s_len);
}
updateListLen += _op2s_len;
delete[] _op2slist;
_op2slist = NULL;
_op2s_len = 0;
}
//debug
// Database::log("update op2s done.");
/* update s2po */
{
int* _s2polist = NULL;
int _s2po_len = 0;
this->getpreIDobjIDlistBysubID(_sub_id, _s2polist, _s2po_len);
/* if no duplication, _insert will be true
* this->setXXX function will override the previous value */
bool _insert = this->insert_xy(_s2polist, _s2po_len, _pre_id, _obj_id);
if(_insert){
this->setpreIDobjIDlistBysubID(_sub_id, _s2polist, _s2po_len);
}
updateListLen += _s2po_len;
delete[] _s2polist;
_s2polist = NULL;
_s2po_len = 0;
}
//debug
// Database::log("update s2po done.");
/* update o2ps */
{
int* _o2pslist = NULL;
int _o2ps_len = 0;
this->getpreIDsubIDlistByobjID(_obj_id, _o2pslist, _o2ps_len);
/* if no duplication, _insert will be true
* this->setXXX function will override the previous value */
bool _insert = this->insert_xy(_o2pslist, _o2ps_len, _pre_id, _sub_id);
if(_insert){
this->setpreIDsubIDlistByobjID(_obj_id, _o2pslist, _o2ps_len);
}
updateListLen += _o2ps_len;
delete[] _o2pslist;
_o2pslist = NULL;
_o2ps_len = 0;
}
//debug
// Database::log("update o2ps done.");
/* update s2o */
{
int* _s2olist = NULL;
int _s2o_len = 0;
this->getobjIDlistBysubID(_sub_id, _s2olist, _s2o_len);
/* if no duplication, _insert will be true
* this->setXXX function will override the previous value */
bool _insert = this->insert_x(_s2olist, _s2o_len, _obj_id);
if(_insert){
this->setobjIDlistBysubID(_sub_id, _s2olist, _s2o_len);
}
updateListLen += _s2o_len;
delete[] _s2olist;
_s2olist = NULL;
_s2o_len = 0;
}
//debug
// Database::log("update s2o done.");
/* update o2s */
{
int* _o2slist = NULL;
int _o2s_len = 0;
this->getsubIDlistByobjID(_obj_id, _o2slist, _o2s_len);
/* if no duplication, _insert will be true
* this->setXXX function will override the previous value */
bool _insert = this->insert_x(_o2slist, _o2s_len, _sub_id);
if(_insert){
this->setsubIDlistByobjID(_obj_id, _o2slist, _o2s_len);
}
updateListLen += _o2s_len;
delete[] _o2slist;
_o2slist = NULL;
_o2s_len = 0;
}
//debug
return updateListLen;
//debug
// Database::log("update o2s done.");
}
/* insert <_x_id, _y_id> into _xylist(keep _xylist(<x,y>) in ascending order) */
bool KVstore::insert_xy(int*& _xylist, int& _list_len,int _x_id, int _y_id)
{
int _new_list_len = _list_len + 2;
int* _new_xylist = new int[_new_list_len];
/* if _xylist does not exist */
if(_xylist == NULL){
_new_xylist[0] = _x_id;
_new_xylist[1] = _y_id;
_list_len = _new_list_len;
return true;
}
/* check duplication */
for(int i = 0; i < _list_len; i += 2)
@ -141,12 +204,24 @@ bool KVstore::insert_xy(int*& _xylist, int& _list_len,int _x_id, int _y_id)
}
}
int _new_list_len = _list_len + 2;
int* _new_xylist = new int[_new_list_len];
/* if _xylist does not exist */
if(_xylist == NULL){
_new_xylist[0] = _x_id;
_new_xylist[1] = _y_id;
_xylist = _new_xylist;
_list_len = _new_list_len;
return true;
}
bool _insert_head = (_xylist[0] > _x_id) || (_xylist[0] == _x_id && _xylist[1] > _y_id);
if(_insert_head)
{
_new_xylist[0] = _x_id;
_new_xylist[1] = _y_id;
memcpy(_new_xylist, _xylist, _list_len*(sizeof(int)));
memcpy(_new_xylist + 2, _xylist, _list_len*(sizeof(int)));
delete[] _xylist;
_xylist = _new_xylist;
_list_len = _new_list_len;
@ -169,17 +244,17 @@ bool KVstore::insert_xy(int*& _xylist, int& _list_len,int _x_id, int _y_id)
_insert_xyid = _gt_previous && _lt_current;
if(_insert_xyid)
{
//insert the new pair.
_new_xylist[j] = _x_id;
_new_xylist[j+1] = _y_id;
j += 2;
}
else
{
_new_xylist[j] = _xylist[i];
_new_xylist[j+1] = _xylist[i+1];
j += 2;
i += 2;
}
//copy the ith old pair to the new list.
_new_xylist[j] = _xylist[i];
_new_xylist[j+1] = _xylist[i+1];
j += 2;
i += 2;
}
bool _insert_tail = (j == _list_len);
@ -201,6 +276,16 @@ bool KVstore::insert_xy(int*& _xylist, int& _list_len,int _x_id, int _y_id)
/* insert _x_id into _xlist(keep _xlist in ascending order) */
bool KVstore::insert_x(int*& _xlist, int& _list_len, int _x_id)
{
/* check duplication */
for(int i = 0; i < _list_len; i ++)
{
if(_xlist[i] == _x_id){
return false;
}
}
int _new_list_len = _list_len + 1;
int* _new_xlist = new int[_new_list_len];
@ -212,14 +297,6 @@ bool KVstore::insert_x(int*& _xlist, int& _list_len, int _x_id)
return true;
}
/* check duplication */
for(int i = 0; i < _list_len; i ++)
{
if(_xlist[i] == _x_id){
return false;
}
}
bool _insert_head = _x_id < _xlist[0];
if(_insert_head)
{
@ -238,18 +315,19 @@ bool KVstore::insert_x(int*& _xlist, int& _list_len, int _x_id)
while(i < _list_len)
{
_insert_xid = (_xlist[i-1] < _x_id) && (_x_id < _xlist[i]);
//insert the new element.
if(_insert_xid)
{
_new_xlist[j] = _x_id;
j++;
continue;
}
else
{
_new_xlist[j] = _xlist[i];
j++;
i++;
j ++;
}
//copy the ith old element to the new list.
_new_xlist[j] = _xlist[i];
j ++;
i ++;
}
bool _insert_tail = (j == _list_len);
if(_insert_tail)

View File

@ -29,7 +29,7 @@ public:
* 1. insert triple
* 2. remove triple
*/
void updateTupleslist_insert(int _sub_id, int _pre_id, int _obj_id);
int updateTupleslist_insert(int _sub_id, int _pre_id, int _obj_id);
void updateTupleslist_remove(int _sub_id, int _pre_id, int _obj_id);
private:

View File

@ -2,9 +2,11 @@ objdir=objs/
objfile= $(objdir)Bstr.o $(objdir)Database.o $(objdir)KVstore.o $(objdir)Btree.o \
$(objdir)CBtreeFunc.o $(objdir)SPARQLquery.o $(objdir)BasicQuery.o $(objdir)ResultSet.o \
$(objdir)SigEntry.o $(objdir)Signature.o $(objdir)Triple.o $(objdir)util.o $(objdir)VSTree.o \
$(objdir)IDList.o $(objdir)EntryBuffer.o $(objdir)LRUCache.o $(objdir)VNode.o $(objdir)Parser.o \
$(objdir)IDList.o $(objdir)EntryBuffer.o $(objdir)LRUCache.o $(objdir)VNode.o $(objdir)DBparser.o \
$(objdir)SparqlParser.o $(objdir)SparqlLexer.o $(objdir)Operation.o $(objdir)Socket.o \
$(objdir)Server.o $(objdir)Client.o
$(objdir)Server.o $(objdir)Client.o \
$(objdir)TurtleParser.o $(objdir)RDFParser.o
inc=-I./tools/libantlr3c-3.4/ -I./tools/libantlr3c-3.4/include
all: gload gquery gserver gclient
@ -20,7 +22,7 @@ gserver: $(objdir)gserver.o $(objfile)
gclient: $(objdir)gclient.o $(objfile)
g++ -o gclient $(objdir)gclient.o $(objfile) lib/libantlr.a
$(objdir)gload.o: main/gload.cpp
g++ -c main/gload.cpp $(inc) -L./lib lib/libantlr.a -o $(objdir)gload.o
@ -32,13 +34,14 @@ $(objdir)gserver.o: main/gserver.cpp
$(objdir)gclient.o: main/gclient.cpp
g++ -c main/gclient.cpp $(inc) -o $(objdir)gclient.o
$(objdir)Bstr.o: Bstr/Bstr.cpp Bstr/Bstr.h
g++ -c Bstr/Bstr.cpp $(inc) -o $(objdir)Bstr.o
$(objdir)Database.o: Database/Database.cpp Database/Database.h $(objdir)IDList.o $(objdir)ResultSet.o $(objdir)SPARQLquery.o \
$(objdir)BasicQuery.o \
$(objdir)Triple.o $(objdir)SigEntry.o $(objdir)KVstore.o $(objdir)VSTree.o $(objdir)Parser.o $(objdir)util.o
$(objdir)Triple.o $(objdir)SigEntry.o $(objdir)KVstore.o $(objdir)VSTree.o $(objdir)DBparser.o $(objdir)util.o \
$(objdir)RDFParser.o
g++ -c Database/Database.cpp $(inc) -o $(objdir)Database.o
$(objdir)KVstore.o: KVstore/KVstore.cpp KVstore/KVstore.h $(objdir)Btree.o
@ -75,13 +78,18 @@ $(objdir)LRUCache.o: VSTree/LRUCache.cpp VSTree/LRUCache.h VSTree/VNode.h
g++ -c VSTree/LRUCache.cpp $(inc) -o $(objdir)LRUCache.o
$(objdir)VNode.o: VSTree/VNode.cpp VSTree/VNode.h
g++ -c VSTree/VNode.cpp $(inc) -o $(objdir)VNode.o
$(objdir)Parser.o: Parser/DBparser.cpp Parser/DBparser.h $(objdir)SparqlParser.o $(objdir)SparqlLexer.o
g++ -c Parser/DBparser.cpp $(inc) -o $(objdir)Parser.o
$(objdir)DBparser.o: Parser/DBparser.cpp Parser/DBparser.h $(objdir)SparqlParser.o $(objdir)SparqlLexer.o $(objdir)Triple.o
g++ -c Parser/DBparser.cpp $(inc) -o $(objdir)DBparser.o
$(objdir)SparqlParser.o: Parser/SparqlParser.c Parser/SparqlParser.h
gcc -c Parser/SparqlParser.c $(inc) -o $(objdir)SparqlParser.o
$(objdir)SparqlLexer.o: Parser/SparqlLexer.c Parser/SparqlLexer.h
gcc -c Parser/SparqlLexer.c $(inc) -o $(objdir)SparqlLexer.o
$(objdir)TurtleParser.o: Parser/TurtleParser.cpp Parser/TurtleParser.h Parser/Type.h
gcc -c Parser/TurtleParser.cpp $(inc) -o $(objdir)TurtleParser.o
$(objdir)RDFParser.o: Parser/RDFParser.cpp Parser/RDFParser.h $(objdir)TurtleParser.o $(objdir)Triple.o
gcc -c Parser/RDFParser.cpp $(inc) -o $(objdir)RDFParser.o
$(objdir)Operation.o: Server/Operation.cpp Server/Operation.h
g++ -c Server/Operation.cpp $(inc) -o $(objdir)Operation.o
$(objdir)Socket.o: Server/Socket.cpp Server/Socket.h

114
Parser/RDFParser.cpp Normal file
View File

@ -0,0 +1,114 @@
#include "RDFParser.h"
/* if you want to parse a triple file, you need to create a RDFParser object
* and run parseFile several times until all the triple is processed
* for example:
* RDFParser _RDFParser(filename);
* triple_array = new Triple_with_objtype[RDFParser::RDFParser::TRIPLE_NUM_PER_GROUP];
* while (true)
* {
* triple_num = 0;
* _RDFParser.parseFile();
* if (triple_num == 0) break;
* ......
* }
*/
string RDFParser::parseFile(TripleWithObjType* _triple_array, int& _triple_num)
{
string _subject, _predicate, _object, _objectSubType;
Type::Type_ID _objectType;
while (_triple_num < RDFParser::TRIPLE_NUM_PER_GROUP)
{
try
{
if (!this->_TurtleParser.parse(_subject, _predicate, _object, _objectType, _objectSubType)) break;
}
catch (const TurtleParser::Exception& _e)
{
cerr << _e.message << endl;
this->_TurtleParser.discardLine();
continue;
}
_subject = "<" + _subject + ">";
_predicate = "<" + _predicate + ">";
char _objectTypec;
if (_objectType == Type::Type_URI)
{
_object = "<" + _object + ">";
_objectTypec = 'e';
}
else if (_objectType == Type::Type_CustomLanguage)
{
_object = "\"" + _object + "\"@" + _objectSubType;
_objectTypec = 'l';
}
else
{
_object = "\"" + _object + "\"";
_objectTypec = 'l';
}
_triple_array[_triple_num++] = TripleWithObjType(_subject, _predicate, _object, _objectTypec);
}
return "";
}
/* if you want to parse a string, you need to create a RDFParser object with no parameter, if the triple has prefix, you also need to provide it.
* the whole string must be processed in one time of invoking parseString, pay attention to the triples in the string won't exceed the limit of RDFParser::TRIPLE_NUM_PER_GROUP.
* for example:
* RDFParser _RDFParser;
* _RDFParser.parseString(prefix, ..);
* _RDFParser.parseString(triple string 1, ..);
* _RDFParser.parseString(triple string 2, ..);
*
* triple string 1 & 2 will share the common prefix, if you don't want this, create a new RDFParser object.
* _RDFParser.parseString(prefix + "\n" + triple string 1 + "\n" + triple string 2, ..); is also acceptable.
*/
string RDFParser::parseString(string _str, TripleWithObjType* _triple_array, int& _triple_num)
{
//clear in each time invoking
this->_sin.clear();
this->_sin << _str;
string _subject, _predicate, _object, _objectSubType;
Type::Type_ID _objectType;
while (_triple_num < RDFParser::TRIPLE_NUM_PER_GROUP)
{
try
{
if (!this->_TurtleParser.parse(_subject, _predicate, _object, _objectType, _objectSubType)) break;
}
catch (const TurtleParser::Exception& _e)
{
cerr << _e.message << endl;
this->_TurtleParser.discardLine();
continue;
}
_subject = "<" + _subject + ">";
_predicate = "<" + _predicate + ">";
char _objectTypec;
if (_objectType == Type::Type_URI)
{
_object = "<" + _object + ">";
_objectTypec = 'e';
}
else if (_objectType == Type::Type_CustomLanguage)
{
_object = "\"" + _object + "\"@" + _objectSubType;
_objectTypec = 'l';
}
else
{
_object = "\"" + _object + "\"";
_objectTypec = 'l';
}
_triple_array[_triple_num++] = TripleWithObjType(_subject, _predicate, _object, _objectTypec);
}
return "";
}

30
Parser/RDFParser.h Normal file
View File

@ -0,0 +1,30 @@
#ifndef gstore_parser_RDFParser
#define gstore_parser_RDFParser
#include "TurtleParser.h"
#include "../Triple/Triple.h"
#include <iostream>
#include <fstream>
#include <sstream>
#include <cstring>
using namespace std;
class RDFParser
{
private:
stringstream _sin;
TurtleParser _TurtleParser;
public:
static const int TRIPLE_NUM_PER_GROUP = 10 * 1000 * 1000;
//for parseString
RDFParser():_TurtleParser(_sin){}
//for parseFile
RDFParser(ifstream& _fin):_TurtleParser(_fin){}
string parseFile(TripleWithObjType* _triple_array, int& _triple_num);
string parseString(string _str, TripleWithObjType* _triple_array, int& _triple_num);
};
#endif

761
Parser/TurtleParser.cpp Normal file
View File

@ -0,0 +1,761 @@
#include "TurtleParser.h"
#include <sstream>
//---------------------------------------------------------------------------
// RDF-3X
// (c) 2008 Thomas Neumann. Web site: http://www.mpi-inf.mpg.de/~neumann/rdf3x
//
// This work is licensed under the Creative Commons
// Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy
// of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/
// or send a letter to Creative Commons, 171 Second Street, Suite 300,
// San Francisco, California, 94105, USA.
//---------------------------------------------------------------------------
using namespace std;
//---------------------------------------------------------------------------
TurtleParser::Exception::Exception(const std::string& message)
: message(message)
// Constructor
{
}
//---------------------------------------------------------------------------
TurtleParser::Exception::Exception(const char* message)
: message(message)
// Constructor
{
}
//---------------------------------------------------------------------------
TurtleParser::Exception::~Exception()
// Destructor
{
}
//---------------------------------------------------------------------------
TurtleParser::Lexer::Lexer(istream& in)
: in(in),putBack(Token_Eof),line(1),readBufferStart(0),readBufferEnd(0)
// Constructor
{
}
//---------------------------------------------------------------------------
TurtleParser::Lexer::~Lexer()
// Destructor
{
}
//---------------------------------------------------------------------------
bool TurtleParser::Lexer::doRead(char& c)
// Read new characters
{
while (in) {
readBufferStart=readBuffer;
in.read(readBuffer,readBufferSize);
if (!in.gcount()) return false;
readBufferEnd=readBufferStart+in.gcount();
if (readBufferStart<readBufferEnd) {
c=*(readBufferStart++);
return true;
}
}
return false;
}
//---------------------------------------------------------------------------
static bool issep(char c) { return (c==' ')||(c=='\t')||(c=='\n')||(c=='\r')||(c=='[')||(c==']')||(c=='(')||(c==')')||(c==',')||(c==';')||(c==':')||(c=='.'); }
//---------------------------------------------------------------------------
TurtleParser::Lexer::Token TurtleParser::Lexer::lexNumber(std::string& token,char c)
// Lex a number
{
token.resize(0);
while (true) {
// Sign?
if ((c=='+')||(c=='-')) {
token+=c;
if (!read(c)) break;
}
// First number block
if (c!='.') {
if ((c<'0')||(c>'9')) break;
while ((c>='0')&&(c<='9')) {
token+=c;
if (!read(c)) return Token_Integer;
}
if (issep(c)) {
unread();
return Token_Integer;
}
}
// Dot?
if (c=='.') {
token+=c;
if (!read(c)) break;
// Second number block
while ((c>='0')&&(c<='9')) {
token+=c;
if (!read(c)) return Token_Decimal;
}
if (issep(c)) {
unread();
return Token_Decimal;
}
}
// Exponent
if ((c!='e')&&(c!='E')) break;
token+=c;
if (!read(c)) break;
if ((c=='-')||(c=='+')) {
token+=c;
if (!read(c)) break;
}
if ((c<'0')||(c>'9')) break;
while ((c>='0')&&(c<='9')) {
token+=c;
if (!read(c)) return Token_Double;
}
if (issep(c)) {
unread();
return Token_Double;
}
break;
}
stringstream msg;
msg << "lexer error in line " << line << ": invalid number " << token << c;
throw Exception(msg.str());
}
//---------------------------------------------------------------------------
unsigned TurtleParser::Lexer::lexHexCode(unsigned len)
// Parse a hex code
{
unsigned result=0;
for (unsigned index=0;;index++) {
// Done?
if (index==len) return result;
// Read the next char
char c;
if (!read(c)) break;
// Interpret it
if ((c>='0')&&(c<='9')) result=(result<<4)|(c-'0'); else
if ((c>='A')&&(c<='F')) result=(result<<4)|(c-'A'+10); else
if ((c>='a')&&(c<='f')) result=(result<<4)|(c-'a'+10); else
break;
}
stringstream msg;
msg << "lexer error in line " << line << ": invalid unicode escape";
throw Exception(msg.str());
}
//---------------------------------------------------------------------------
static string encodeUtf8(unsigned code)
// Encode a unicode character as utf8
{
string result;
if (code&&(code<0x80)) {
result+=static_cast<char>(code);
} else if (code<0x800) {
result+=static_cast<char>(0xc0 | (0x1f & (code >> 6)));
result+=static_cast<char>(0x80 | (0x3f & code));
} else {
result+=static_cast<char>(0xe0 | (0x0f & (code >> 12)));
result+=static_cast<char>(0x80 | (0x3f & (code >> 6)));
result+=static_cast<char>(0x80 | (0x3f & code));
}
return result;
}
//---------------------------------------------------------------------------
void TurtleParser::Lexer::lexEscape(std::string& token)
// Lex an escape sequence, \ already consumed
{
while (true) {
char c;
if (!read(c)) break;
// Standard escapes?
if (c=='t') { token+='\t'; return; }
if (c=='n') { token+='\n'; return; }
if (c=='r') { token+='\r'; return; }
if (c=='\"') { token+='\"'; return; }
if (c=='>') { token+='>'; return; }
if (c=='\\') { token+='\\'; return; }
// Unicode sequences?
if (c=='u') {
unsigned code=lexHexCode(4);
token+=encodeUtf8(code);
return;
}
if (c=='U') {
unsigned code=lexHexCode(8);
token+=encodeUtf8(code);
return;
}
// Invalid escape
break;
}
stringstream msg;
msg << "lexer error in line " << line << ": invalid escape sequence";
throw Exception(msg.str());
}
//---------------------------------------------------------------------------
TurtleParser::Lexer::Token TurtleParser::Lexer::lexLongString(std::string& token)
// Lex a long string, first """ already consumed
{
char c;
while (read(c)) {
if (c=='\"') {
if (!read(c)) break;
if (c!='\"') { token+='\"'; continue; }
if (!read(c)) break;
if (c!='\"') { token+="\"\""; continue; }
return Token_String;
}
if (c=='\\') {
lexEscape(token);
} else {
token+=c;
if (c=='\n') line++;
}
}
stringstream msg;
msg << "lexer error in line " << line << ": invalid string";
throw Exception(msg.str());
}
//---------------------------------------------------------------------------
TurtleParser::Lexer::Token TurtleParser::Lexer::lexString(std::string& token,char c)
// Lex a string
{
token.resize(0);
// Check the next character
if (!read(c)) {
stringstream msg;
msg << "lexer error in line " << line << ": invalid string";
throw Exception(msg.str());
}
// Another quote?
if (c=='\"') {
if (!read(c))
return Token_String;
if (c=='\"')
return lexLongString(token);
unread();
return Token_String;
}
// Process normally
while (true) {
if (c=='\"') return Token_String;
if (c=='\\') {
lexEscape(token);
} else {
token+=c;
if (c == '\n')
{
unread();
stringstream msg;
msg << "lexer error in line " << line << ": invalid string";
throw Exception(msg.str());
}
}
if (!read(c)) {
stringstream msg;
msg << "lexer error in line " << line << ": invalid string";
throw Exception(msg.str());
}
}
}
//---------------------------------------------------------------------------
TurtleParser::Lexer::Token TurtleParser::Lexer::lexURI(std::string& token,char c)
// Lex a URI
{
token.resize(0);
// Check the next character
if (!read(c)) {
stringstream msg;
msg << "lexer error in line " << line << ": invalid URI";
throw Exception(msg.str());
}
// Process normally
while (true) {
if (c=='>') return Token_URI;
if (c=='\\') {
lexEscape(token);
} else {
token+=c;
if (c == '\n')
{
unread();
stringstream msg;
msg << "lexer error in line " << line << ": invalid URI";
throw Exception(msg.str());
}
}
if (!read(c)) {
stringstream msg;
msg << "lexer error in line " << line << ": invalid URI";
throw Exception(msg.str());
}
}
}
//---------------------------------------------------------------------------
TurtleParser::Lexer::Token TurtleParser::Lexer::next(std::string& token)
// Get the next token
{
// Do we already have one?
if (putBack!=Token_Eof) {
Token result=putBack;
token=putBackValue;
putBack=Token_Eof;
return result;
}
// Read more
char c;
while (read(c)) {
switch (c) {
case ' ': case '\t': case '\r': continue;
case '\n': line++; continue;
case '#': while (read(c)) if ((c=='\n')||(c=='\r')) break; if (c=='\n') ++line; continue;
case '.': if (!read(c)) return Token_Dot; unread(); if ((c>='0')&&(c<='9')) return lexNumber(token,'.'); return Token_Dot;
case ':': return Token_Colon;
case ';': return Token_Semicolon;
case ',': return Token_Comma;
case '[': return Token_LBracket;
case ']': return Token_RBracket;
case '(': return Token_LParen;
case ')': return Token_RParen;
case '@': return Token_At;
case '+': case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
return lexNumber(token,c);
case '^':
if ((!read(c))||(c!='^')) {
stringstream msg;
msg << "lexer error in line " << line << ": '^' expected";
throw Exception(msg.str());
}
return Token_Type;
case '\"': return lexString(token,c);
case '<': return lexURI(token,c);
default:
if (((c>='A')&&(c<='Z'))||((c>='a')&&(c<='z'))||(c=='_')) { // XXX unicode!
token=c;
while (read(c)) {
if (issep(c)) { unread(); break; }
token+=c;
}
if (token=="a") return Token_A;
if (token=="true") return Token_True;
if (token=="false") return Token_False;
return Token_Name;
} else {
stringstream msg;
msg << "lexer error in line " << line << ": unexpected character " << c;
throw Exception(msg.str());
}
}
}
return Token_Eof;
}
//---------------------------------------------------------------------------
TurtleParser::TurtleParser(istream& in)
: lexer(in),triplesReader(0),nextBlank(0)
// Constructor
{
}
//---------------------------------------------------------------------------
TurtleParser::~TurtleParser()
// Destructor
{
}
//---------------------------------------------------------------------------
void TurtleParser::parseError(const string& message)
// Report an error
{
stringstream msg;
msg << "parse error in line " << lexer.getLine() << ": " << message;
throw Exception(msg.str());
}
//---------------------------------------------------------------------------
void TurtleParser::newBlankNode(std::string& node)
// Construct a new blank node
{
stringstream buffer;
buffer << "_:_" << (nextBlank++);
node=buffer.str();
}
//---------------------------------------------------------------------------
void TurtleParser::constructAbsoluteURI(std::string& uri)
// Convert a relative URI into an absolute one
{
// No base?
if (base.empty())
return;
// Already absolute? XXX fix the check!
if (uri.find("://")<10)
return;
// Put the base in front
uri=base+uri;
}
//---------------------------------------------------------------------------
void TurtleParser::parseDirective()
// Parse a directive
{
std::string value;
if (lexer.next(value)!=Lexer::Token_Name)
parseError("directive name expected after '@'");
if (value=="base") {
if (lexer.next(base)!=Lexer::Token_URI)
parseError("URI expected after @base");
} else if (value=="prefix") {
std::string prefixName;
Lexer::Token token=lexer.next(prefixName);
// A prefix name?
if (token==Lexer::Token_Name) {
token=lexer.next();
} else prefixName.resize(0);
// Colon
if (token!=Lexer::Token_Colon)
parseError("':' expected after @prefix");
// URI
std::string uri;
if (lexer.next(uri)!=Lexer::Token_URI)
parseError("URI expected after @prefix");
prefixes[prefixName]=uri;
} else {
parseError("unknown directive @"+value);
}
// Final dot
if (lexer.next()!=Lexer::Token_Dot)
parseError("'.' expected after directive");
}
//---------------------------------------------------------------------------
inline bool TurtleParser::isName(Lexer::Token token)
// Is a (generalized) name token?
{
return (token==Lexer::Token_Name)||(token==Lexer::Token_A)||(token==Lexer::Token_True)||(token==Lexer::Token_False);
}
//---------------------------------------------------------------------------
void TurtleParser::parseQualifiedName(const string& prefix,string& name)
// Parse a qualified name
{
if (lexer.next()!=Lexer::Token_Colon)
parseError("':' expected in qualified name");
if (!prefixes.count(prefix))
parseError("unknown prefix '"+prefix+"'");
string expandedPrefix=prefixes[prefix];
Lexer::Token token=lexer.next(name);
if (isName(token)) {
name=expandedPrefix+name;
} else {
lexer.unget(token,name);
name=expandedPrefix;
}
}
//---------------------------------------------------------------------------
void TurtleParser::parseBlank(std::string& entry)
// Parse a blank entry
{
Lexer::Token token=lexer.next(entry);
switch (token) {
case Lexer::Token_Name:
if ((entry!="_")||(lexer.next()!=Lexer::Token_Colon)||(!isName(lexer.next(entry))))
parseError("blank nodes must start with '_:'");
entry="_:"+entry;
return;
case Lexer::Token_LBracket:
{
newBlankNode(entry);
token=lexer.next();
if (token!=Lexer::Token_RBracket) {
lexer.ungetIgnored(token);
std::string predicate,object,objectSubType;
Type::Type_ID objectType;
parsePredicateObjectList(entry,predicate,object,objectType,objectSubType);
triples.push_back(Triple(entry,predicate,object,objectType,objectSubType));
if (lexer.next()!=Lexer::Token_RBracket)
parseError("']' expected");
}
return;
}
case Lexer::Token_LParen:
{
// Collection
vector<string> entries,entrySubTypes;
vector<Type::Type_ID> entryTypes;
while ((token=lexer.next())!=Lexer::Token_RParen) {
lexer.ungetIgnored(token);
entries.push_back(string());
entryTypes.push_back(Type::Type_URI);
entrySubTypes.push_back(string());
parseObject(entries.back(),entryTypes.back(),entrySubTypes.back());
}
// Empty collection?
if (entries.empty()) {
entry="http://www.w3.org/1999/02/22-rdf-syntax-ns#nil";
return;
}
// Build blank nodes
vector<string> nodes;
nodes.resize(entries.size());
for (unsigned index=0;index<entries.size();index++)
newBlankNode(nodes[index]);
nodes.push_back("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil");
// Derive triples
for (unsigned index=0;index<entries.size();index++) {
triples.push_back(Triple(nodes[index],"http://www.w3.org/1999/02/22-rdf-syntax-ns#first",entries[index],entryTypes[index],entrySubTypes[index]));
triples.push_back(Triple(nodes[index],"http://www.w3.org/1999/02/22-rdf-syntax-ns#rest",nodes[index+1],Type::Type_URI,""));
}
entry=nodes.front();
}
default: parseError("invalid blank entry");
}
}
//---------------------------------------------------------------------------
void TurtleParser::parseSubject(Lexer::Token token,std::string& subject)
// Parse a subject
{
switch (token) {
case Lexer::Token_URI:
// URI
constructAbsoluteURI(subject);
return;
case Lexer::Token_A: subject="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; return;
case Lexer::Token_Colon:
// Qualified name with empty prefix?
lexer.unget(token,subject);
parseQualifiedName("",subject);
return;
case Lexer::Token_Name:
// Qualified name
// Blank node?
if (subject=="_") {
lexer.unget(token,subject);
parseBlank(subject);
return;
}
// No
parseQualifiedName(subject,subject);
return;
case Lexer::Token_LBracket: case Lexer::Token_LParen:
// Opening bracket/parenthesis
lexer.unget(token,subject);
parseBlank(subject);
default: parseError("invalid subject");
}
}
//---------------------------------------------------------------------------
void TurtleParser::parseObject(std::string& object,Type::Type_ID& objectType,std::string& objectSubType)
// Parse an object
{
Lexer::Token token=lexer.next(object);
objectSubType="";
switch (token) {
case Lexer::Token_URI:
// URI
constructAbsoluteURI(object);
objectType=Type::Type_URI;
return;
case Lexer::Token_Colon:
// Qualified name with empty prefix?
lexer.unget(token,object);
parseQualifiedName("",object);
objectType=Type::Type_URI;
return;
case Lexer::Token_Name:
// Qualified name
// Blank node?
if (object=="_") {
lexer.unget(token,object);
parseBlank(object);
objectType=Type::Type_URI;
return;
}
// No
parseQualifiedName(object,object);
objectType=Type::Type_URI;
return;
case Lexer::Token_LBracket: case Lexer::Token_LParen:
// Opening bracket/parenthesis
lexer.unget(token,object);
parseBlank(object);
objectType=Type::Type_URI;
return;
case Lexer::Token_Integer:
// Literal
objectType=Type::Type_Integer;
return;
case Lexer::Token_Decimal:
// Literal
objectType=Type::Type_Decimal;
return;
case Lexer::Token_Double:
// Literal
objectType=Type::Type_Double;
return;
case Lexer::Token_A:
// Literal
object="http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
objectType=Type::Type_URI;
return;
case Lexer::Token_True:
// Literal
objectType=Type::Type_Boolean;
return;
case Lexer::Token_False:
// Literal
objectType=Type::Type_Boolean;
return;
case Lexer::Token_String:
// String literal
{
token=lexer.next();
objectType=Type::Type_Literal;
if (token==Lexer::Token_At) {
if (lexer.next(objectSubType)!=Lexer::Token_Name)
parseError("language tag expected");
objectType=Type::Type_CustomLanguage;
} else if (token==Lexer::Token_Type) {
string type;
token=lexer.next(type);
if (token==Lexer::Token_URI) {
// Already parsed
} else if (token==Lexer::Token_Colon) {
parseQualifiedName("",type);
} else if (token==Lexer::Token_Name) {
parseQualifiedName(type,type);
}
if (type=="http://www.w3.org/2001/XMLSchema#string") {
objectType=Type::Type_String;
} else if (type=="http://www.w3.org/2001/XMLSchema#integer") {
objectType=Type::Type_Integer;
} else if (type=="http://www.w3.org/2001/XMLSchema#decimal") {
objectType=Type::Type_Decimal;
} else if (type=="http://www.w3.org/2001/XMLSchema#double") {
objectType=Type::Type_Double;
} else if (type=="http://www.w3.org/2001/XMLSchema#boolean") {
objectType=Type::Type_Boolean;
} else {
objectType=Type::Type_CustomType;
objectSubType=type;
}
} else {
lexer.ungetIgnored(token);
}
return;
}
default: parseError("invalid object");
}
}
//---------------------------------------------------------------------------
void TurtleParser::parsePredicateObjectList(const string& subject,string& predicate,string& object,Type::Type_ID& objectType,string& objectSubType)
// Parse a predicate object list
{
// Parse the first predicate
Lexer::Token token;
switch (token=lexer.next(predicate)) {
case Lexer::Token_URI: constructAbsoluteURI(predicate); break;
case Lexer::Token_A: predicate="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; break;
case Lexer::Token_Colon: lexer.unget(token,predicate); parseQualifiedName("",predicate); break;
case Lexer::Token_Name: if (predicate=="_") parseError("blank nodes not allowed as predicate"); parseQualifiedName(predicate,predicate); break;
default: parseError("invalid predicate");
}
// Parse the object
parseObject(object,objectType,objectSubType);
// Additional objects?
token=lexer.next();
while (token==Lexer::Token_Comma) {
string additionalObject,additionalObjectSubType;
Type::Type_ID additionalObjectType;
parseObject(additionalObject,additionalObjectType,additionalObjectSubType);
triples.push_back(Triple(subject,predicate,additionalObject,additionalObjectType,additionalObjectSubType));
token=lexer.next();
}
// Additional predicates?
while (token==Lexer::Token_Semicolon) {
// Parse the predicate
string additionalPredicate;
switch (token=lexer.next(additionalPredicate)) {
case Lexer::Token_URI: constructAbsoluteURI(additionalPredicate); break;
case Lexer::Token_A: additionalPredicate="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; break;
case Lexer::Token_Colon: lexer.unget(token,additionalPredicate); parseQualifiedName("",additionalPredicate); break;
case Lexer::Token_Name: if (additionalPredicate=="_") parseError("blank nodes not allowed as predicate"); parseQualifiedName(additionalPredicate,additionalPredicate); break;
default: lexer.unget(token,additionalPredicate); return;
}
// Parse the object
string additionalObject,additionalObjectSubType;
Type::Type_ID additionalObjectType;
parseObject(additionalObject,additionalObjectType,additionalObjectSubType);
triples.push_back(Triple(subject,additionalPredicate,additionalObject,additionalObjectType,additionalObjectSubType));
// Additional objects?
token=lexer.next();
while (token==Lexer::Token_Comma) {
parseObject(additionalObject,additionalObjectType,additionalObjectSubType);
triples.push_back(Triple(subject,additionalPredicate,additionalObject,additionalObjectType,additionalObjectSubType));
token=lexer.next();
}
}
lexer.ungetIgnored(token);
}
//---------------------------------------------------------------------------
void TurtleParser::parseTriple(Lexer::Token token,std::string& subject,std::string& predicate,std::string& object,Type::Type_ID& objectType,std::string& objectSubType)
// Parse a triple
{
parseSubject(token,subject);
parsePredicateObjectList(subject,predicate,object,objectType,objectSubType);
if (lexer.next()!=Lexer::Token_Dot)
parseError("'.' expected after triple");
}
//---------------------------------------------------------------------------
bool TurtleParser::parse(std::string& subject,std::string& predicate,std::string& object,Type::Type_ID& objectType,std::string& objectSubType)
// Read the next triple
{
// Some triples left?
if (triplesReader<triples.size()) {
subject=triples[triplesReader].subject;
predicate=triples[triplesReader].predicate;
object=triples[triplesReader].object;
objectType=triples[triplesReader].objectType;
objectSubType=triples[triplesReader].objectSubType;
if ((++triplesReader)>=triples.size()) {
triples.clear();
triplesReader=0;
}
return true;
}
// No, check if the input is done
Lexer::Token token;
while (true) {
token=lexer.next(subject);
if (token==Lexer::Token_Eof) return false;
// A directive?
if (token==Lexer::Token_At) {
parseDirective();
continue;
} else break;
}
// No, parse a triple
parseTriple(token,subject,predicate,object,objectType,objectSubType);
return true;
}
//---------------------------------------------------------------------------

169
Parser/TurtleParser.h Normal file
View File

@ -0,0 +1,169 @@
#ifndef H_tools_rdf3xload_TurtleParser
#define H_tools_rdf3xload_TurtleParser
//---------------------------------------------------------------------------
// RDF-3X
// (c) 2008 Thomas Neumann. Web site: http://www.mpi-inf.mpg.de/~neumann/rdf3x
//
// This work is licensed under the Creative Commons
// Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy
// of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/
// or send a letter to Creative Commons, 171 Second Street, Suite 300,
// San Francisco, California, 94105, USA.
//---------------------------------------------------------------------------
#include "Type.h"
#include <istream>
#include <string>
#include <map>
#include <vector>
//---------------------------------------------------------------------------
/// Parse a turtle file
class TurtleParser
{
public:
/// A parse error
class Exception {
public:
/// The message
std::string message;
/// Constructor
Exception(const std::string& message);
/// Constructor
Exception(const char* message);
/// Destructor
~Exception();
};
private:
/// A turtle lexer
class Lexer {
public:
/// Possible tokens
enum Token { Token_Eof, Token_Dot, Token_Colon, Token_Comma, Token_Semicolon, Token_LBracket, Token_RBracket, Token_LParen, Token_RParen, Token_At, Token_Type, Token_Integer, Token_Decimal, Token_Double, Token_Name, Token_A, Token_True, Token_False, Token_String, Token_URI };
private:
/// The input
std::istream& in;
/// The putback
Token putBack;
/// The putback string
std::string putBackValue;
/// Buffer for parsing when ignoring the value
std::string ignored;
/// The current line
unsigned line;
/// Size of the read buffer
static const unsigned readBufferSize = 1024;
/// Read buffer
char readBuffer[readBufferSize];
/// Read buffer pointers
char* readBufferStart,*readBufferEnd;
/// Read new characters
bool doRead(char& c);
/// Read a character
bool read(char& c) { if (readBufferStart<readBufferEnd) { c=*(readBufferStart++); return true; } else return doRead(c); }
/// Unread the last character
void unread() { readBufferStart--; }
/// Lex a hex code
unsigned lexHexCode(unsigned len);
/// Lex an escape sequence
void lexEscape(std::string& token);
/// Lex a long string
Token lexLongString(std::string& token);
/// Lex a string
Token lexString(std::string& token,char c);
/// Lex a URI
Token lexURI(std::string& token,char c);
/// Lex a number
Token lexNumber(std::string& token,char c);
public:
/// Constructor
Lexer(std::istream& in);
/// Destructor
~Lexer();
/// The next token (including value)
Token next(std::string& value);
/// The next token (ignoring the value)
Token next() { return next(ignored); }
/// Put a token and a string back
void unget(Token t,const std::string& s) { putBack=t; if (t>=Token_Integer) putBackValue=s; }
/// Put a token back
void ungetIgnored(Token t) { putBack=t; if (t>=Token_Integer) putBackValue=ignored; }
/// Get the line
unsigned getLine() const { return line; }
void discardLine()
{
char c;
while (read(c) && c != '\n');
}
};
/// A triple
struct Triple {
/// The entries
std::string subject,predicate,object,objectSubType;
/// Type for the object
Type::Type_ID objectType;
/// Constructor
Triple(const std::string& subject,const std::string& predicate,const std::string& object,Type::Type_ID objectType,const std::string& objectSubType) : subject(subject),predicate(predicate),object(object),objectSubType(objectSubType),objectType(objectType) {}
};
/// The lexer
Lexer lexer;
/// The uri base
std::string base;
/// All known prefixes
std::map<std::string,std::string> prefixes;
/// The currently available triples
std::vector<Triple> triples;
/// Reader in the triples
unsigned triplesReader;
/// The next blank node id
unsigned nextBlank;
/// Is a (generalized) name token?
static inline bool isName(Lexer::Token token);
// Convert a relative URI into an absolute one
void constructAbsoluteURI(std::string& uri);
/// Construct a new blank node
void newBlankNode(std::string& node);
/// Report an error
void parseError(const std::string& message);
/// Parse a qualified name
void parseQualifiedName(const std::string& prefix,std::string& name);
/// Parse a blank entry
void parseBlank(std::string& entry);
/// Parse a subject
void parseSubject(Lexer::Token token,std::string& subject);
/// Parse an object
void parseObject(std::string& object,Type::Type_ID& objectType,std::string& objectSubType);
/// Parse a predicate object list
void parsePredicateObjectList(const std::string& subject,std::string& predicate,std::string& object,Type::Type_ID& objectType,std::string& objectSubType);
/// Parse a directive
void parseDirective();
/// Parse a new triple
void parseTriple(Lexer::Token token,std::string& subject,std::string& predicate,std::string& object,Type::Type_ID& objectType,std::string& objectSubType);
public:
/// Constructor
TurtleParser(std::istream& in);
/// Destructor
~TurtleParser();
/// Read the next triple
bool parse(std::string& subject,std::string& predicate,std::string& object,Type::Type_ID& objectType,std::string& objectSubType);
void discardLine()
{
lexer.discardLine();
}
};
//---------------------------------------------------------------------------
#endif

27
Parser/Type.h Normal file
View File

@ -0,0 +1,27 @@
#ifndef H_infra_util_Type
#define H_infra_util_Type
//---------------------------------------------------------------------------
// RDF-3X
// (c) 2009 Thomas Neumann. Web site: http://www.mpi-inf.mpg.de/~neumann/rdf3x
//
// This work is licensed under the Creative Commons
// Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy
// of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/
// or send a letter to Creative Commons, 171 Second Street, Suite 300,
// San Francisco, California, 94105, USA.
//---------------------------------------------------------------------------
/// Information about the type system
class Type {
public:
/// Different literal types
enum Type_ID {
Type_URI, Type_Literal, Type_CustomLanguage, Type_CustomType,
Type_String, Type_Integer, Type_Decimal, Type_Double, Type_Boolean
};
/// Does the type have a sub-type?
static inline bool hasSubType(Type_ID t) { return (t==Type_CustomLanguage)||(t==Type_CustomType); }
/// Get the type of the sub-type
static inline Type_ID getSubTypeType(Type_ID t) { return (t==Type_CustomLanguage)?Type_Literal:Type_URI; }
};
//---------------------------------------------------------------------------
#endif

View File

@ -53,21 +53,21 @@ string ResultSet::to_str()
std::stringstream _buf;
//debug
_buf << "There has answer: " << this->ansNum << endl;
_buf << this->var_name[0];
for(int i = 1; i < this->select_var_num; i ++)
{
_buf << "\t" << this->var_name[i];
}
_buf << "\n";
// _buf << "There has answer: " << this->ansNum << endl;
// _buf << this->var_name[0];
// for(int i = 1; i < this->select_var_num; i ++)
// {
// _buf << "\t" << this->var_name[i];
// }
// _buf << "\n";
for(int i = 0; i < this->ansNum; i ++)
{
_buf << this->answer[i][0];
for(int j = 1; j < this->select_var_num; j ++)
{
_buf << "\t" << this->answer[i][j];
//_buf << " " << this->answer[i][j];
//_buf << "\t" << this->answer[i][j];
_buf << " " << this->answer[i][j];
}
_buf << "\n";
}

View File

@ -123,9 +123,5 @@ void Client::run()
std::cerr << "disconnect server error. @Client::run" << std::endl;
continue;
}
}
}

View File

@ -13,7 +13,7 @@
#include"../Bstr/Bstr.h"
enum CommandType {CMD_CONNECT, CMD_EXIT, CMD_LOAD, CMD_UNLOAD, CMD_CREATE_DB, CMD_DELETE_DB,
CMD_IMPORT, CMD_QUERY, CMD_SHOW, CMD_OTHER}; // extend the operation command type here.
CMD_IMPORT, CMD_QUERY, CMD_SHOW, CMD_INSERT, CMD_OTHER}; // extend the operation command type here.
class Operation
{

View File

@ -149,6 +149,13 @@ void Server::listen()
}
break;
}
case CMD_INSERT:
{
string db_name = operation.getParameter(0);
string rdf_path = operation.getParameter(1);
this->insertTriple(db_name, "", rdf_path, ret_msg);
break;
}
default:
cerr << "this command is not supported by now. @Server::listen" << endl;
}
@ -219,6 +226,11 @@ bool Server::parser(std::string _raw_cmd, Operation& _ret_oprt)
_ret_oprt.setCommand(CMD_SHOW);
para_cnt = 1;
}
else if (cmd == "insert")
{
_ret_oprt.setCommand(CMD_INSERT);
para_cnt = 2;
}
else
{
return false;
@ -339,6 +351,29 @@ bool Server::importRDF(std::string _db_name, std::string _ac_name, std::string _
return flag;
}
bool Server::insertTriple(std::string _db_name, std::string _ac_name, std::string _rdf_path, std::string& _ret_msg)
{
if (this->database != NULL)
{
this->database->unload();
delete this->database;
}
this->database = new Database(_db_name);
bool flag = this->database->insert(_rdf_path);
if (flag)
{
_ret_msg = "insert triple file to database done.";
}
else
{
_ret_msg = "import triple file to database failed.";
}
return flag;
}
bool Server::query(const std::string _query, std::string& _ret_msg)
{
if (this->database == NULL)

View File

@ -46,6 +46,7 @@ public:
bool unloadDatabase(std::string _db_name, std::string _ac_name, std::string& _ret_msg);
bool showDatabases(std::string _ac_name, std::string& _ret_msg);
bool importRDF(std::string _db_name, std::string _ac_name, std::string _rdf_path, std::string& _ret_msg);
bool insertTriple(std::string _db_name, std::string _ac_name, std::string _rdf_path, std::string& _ret_msg);
bool query(const std::string _query, std::string& _ret_msg);

View File

@ -37,26 +37,27 @@ bool Socket::create()
if (!this->isValid())
{
std::cerr << "create socket failed. @Socket::create" << std::endl;
return false;
}
// TIME_WAIT - arg
int on = 1;
int setsockopt_REUSEADDR_return = setsockopt(this->sock, SOL_SOCKET, SO_REUSEADDR, (const char*)&on, sizeof(on));
if (setsockopt_REUSEADDR_return == -1)
// BUFFER_SIZE - arg
int bufferSize = 128 * 1024;
int setsockopt_RCVBUF_return = setsockopt(this->sock, SOL_SOCKET, SO_RCVBUF,(const char*)&bufferSize, sizeof(bufferSize));
int setsockopt_SNDBUF_return = setsockopt(this->sock, SOL_SOCKET, SO_SNDBUF,(const char*)&bufferSize, sizeof(bufferSize));
if (setsockopt_REUSEADDR_return == -1 ||
setsockopt_RCVBUF_return == -1 ||
setsockopt_SNDBUF_return == -1)
{
std::cerr << "set socket options failed. @Socket::create" << std::endl;
return false;
}
// send and recv TIME_OUT -arg
// struct timeval timeout = {3,0};
// int setsockopt_SNDTIMEO_return = setsockopt(this->sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&timeout, sizeof(struct timeval));
// int setsockopt_RCVTIMEO_return = setsockopt(this->sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, sizeof(struct timeval));
// if (setsockopt_SNDTIMEO_return == -1 || setsockopt_RCVTIMEO_return == -1)
// {
// return false;
// }
return true;
}

View File

@ -7,7 +7,7 @@
#include "Triple.h"
Triple::Triple(const string _s, const string _p, const string _o)
Triple::Triple(const string& _s, const string& _p, const string& _o)
{
this->subject = _s;
this->predicate = _p;
@ -91,3 +91,51 @@ const string Triple::toString()const{
this->predicate+"\t"+
this->object+".";
}
/* for TripleWithObjType */
TripleWithObjType::TripleWithObjType():Triple()
{
this->object_type = ' ';
}
TripleWithObjType::TripleWithObjType(const string& _s, const string& _p, const string& _o, const char& _o_type):Triple(_s, _p, _o)
{
this->object_type = _o_type;
}
TripleWithObjType::TripleWithObjType(const TripleWithObjType& _triple_with_objtype):Triple(_triple_with_objtype)
{
this->object_type = _triple_with_objtype.object_type;
}
TripleWithObjType& TripleWithObjType::operator=(const TripleWithObjType& _triple_with_objtype)
{
Triple::operator=(_triple_with_objtype);
this->object_type = _triple_with_objtype.object_type;
return *this;
}
void TripleWithObjType::setObjType(const char &_o_type)
{
this->object_type = _o_type;
}
bool TripleWithObjType::isObjEntity()const
{
return this->object_type == TripleWithObjType::ENTITY;
}
bool TripleWithObjType::isObjLiteral()const
{
return this->object_type == TripleWithObjType::LITERA;
}
const string TripleWithObjType::toString()const
{
return this->subject+"\t"+
this->predicate+"\t"+
this->object+"\t"+
this->object_type + ".";
}

View File

@ -24,7 +24,7 @@ public:
* tuples separated by '\t'
* */
Triple(string _line);
Triple(const string _s, const string _p, const string _o);
Triple(const string& _s, const string& _p, const string& _o);
Triple(const Triple& _triple);
Triple& operator=(const Triple& _triple);
@ -39,5 +39,26 @@ public:
const string toString()const;
};
class TripleWithObjType : public Triple
{
public:
char object_type;
static const char ENTITY = 'e';
static const char LITERA = 'l';
TripleWithObjType();
TripleWithObjType(const string& _s, const string& _p, const string& _o, const char& _o_type = ' ');
TripleWithObjType(const TripleWithObjType& _triple_with_objtype);
TripleWithObjType& operator=(const TripleWithObjType& _triple_with_objtype);
void setObjType(const char &_o_type);
bool isObjEntity()const;
bool isObjLiteral()const;
const string toString()const;
};
#endif /* TRIPLE_H_ */

View File

@ -80,6 +80,16 @@ bool LRUCache::loadCache(string _filePath)
int pos = LRUCache::DEFAULT_NUM + this->size;
this->setElem(pos, nodePtr->getFileLine(), nodePtr);
//debug
{
if (_tmp_cycle_count != nodePtr->getFileLine())
{
stringstream _ss;
_ss << "error file line: " << _tmp_cycle_count << " " << nodePtr->getFileLine() << " " << nodePtr->getChildNum() << endl;
Database::log(_ss.str());
}
}
_tmp_cycle_count ++;
}
@ -256,13 +266,6 @@ void LRUCache:: freeElem(int _pos)
/* set the memory of the _pos element in cache */
void LRUCache:: setElem(int _pos, int _key, VNode* _value)
{
//debug
{
if (_pos < 2 || _pos >= this->capacity)
{
cout << "!!! _pos=" << _pos << endl;
}
}
this->key2pos[_key] = _pos;
this->keys[_pos] = _key;
this->values[_pos] = _value;
@ -282,7 +285,7 @@ void LRUCache:: setElem(int _pos, int _key, VNode* _value)
bool LRUCache::writeOut(int _pos, int _fileLine)
{
VNode* nodePtr = this->values[_pos];
FILE* filePtr = fopen(this->dataFilePath.c_str(),"a+b");
FILE* filePtr = fopen(this->dataFilePath.c_str(),"r+b");
if (nodePtr == NULL)
{
@ -368,7 +371,7 @@ bool LRUCache::readIn(int _pos, int _fileLine)
/* write out all the elements to hard disk. */
bool LRUCache::flush()
{
FILE* filePtr = fopen(this->dataFilePath.c_str(),"a+b");
FILE* filePtr = fopen(this->dataFilePath.c_str(),"r+b");
if (filePtr == NULL)
{
@ -386,19 +389,14 @@ bool LRUCache::flush()
int line = this->keys[i];
//debug
// {
// stringstream _ss;
// if (nodePtr->getFileLine() != line)
// {
// _ss << "line error!!!" << endl;
// }
// if (line == 0)
// {
// _ss << "at save tree, node 0 bitset:" << endl;
// _ss << nodePtr->to_str() << endl;
// Database::log(_ss.str());
// }
// }
{
if (nodePtr->getFileLine() != line)
{
stringstream _ss;
_ss << "line error at !!!" << line << " " << nodePtr->getFileLine() << endl;
Database::log(_ss.str());
}
}
if (nodePtr == NULL)
{

View File

@ -254,17 +254,6 @@ void VNode::refreshAncestorSignature(LRUCache& _nodeBuffer)
int rank = this->getIndexInFatherNode(_nodeBuffer);
if (fatherNodePtr->getChildEntry(rank).getEntitySig() != this->entry.getEntitySig())
{
//debug
// {
// if (fatherNodePtr->getFileLine()==0 && fatherNodePtr->getChildFileLine(rank)==153)
// {
// Database::log("0->153 refreshAncestorSignature");
// stringstream _ss;
// _ss << "node " << this->getFileLine() << " entry:" << endl;
// _ss << Signature::BitSet2str(this->entry.getEntitySig().entityBitSet) << endl;
// Database::log(_ss.str());
// }
// }
fatherNodePtr->setChildEntry(rank, this->entry);
fatherNodePtr->refreshAncestorSignature(_nodeBuffer);
}
@ -308,6 +297,19 @@ bool VNode::retrieveEntry(vector<SigEntry>& _entry_vec, const EntitySig _filter_
return false;
}
bool VNode::checkState()
{
if (this->getFileLine() < 0)
return false;
for (int i=0;i<this->child_num;i++)
if (!this->isLeaf() && this->getChildFileLine(i) < 0)
{
return false;
}
return true;
}
std::string VNode::to_str()
{
std::stringstream _ss;

View File

@ -54,6 +54,9 @@ public:
/* only used by leaf Node */
bool retrieveEntry(std::vector<SigEntry>& _entry_vec, const EntitySig _filter_sig, LRUCache& _nodeBuffer);
/* for debug */
bool checkState();
std::string to_str();
private:

View File

@ -66,6 +66,34 @@ VNode* VSTree::getNode(int _line)
void VSTree::retrieve(SPARQLquery& _query)
{
Database::log("IN retrieve");
//debug
// {
// VNode* temp_ptr = this->getLeafNodeByEntityID(473738);
// stringstream _ss;
//
// for (int i=0;i<temp_ptr->getChildNum();i++)
// if (temp_ptr->getChildEntry(i).getEntityId() == 473738)
// {
// _ss << "entity id=473738 entry sig:" << endl;
// _ss << "entity id=473738 leaf node line: " << temp_ptr->getFileLine() << endl;
// _ss << Signature::BitSet2str(temp_ptr->getChildEntry(i).getEntitySig().entityBitSet) << endl;
// break;
// }
//
// _ss << "leaf node sig:" << endl;
// _ss << Signature::BitSet2str(temp_ptr->getEntry().getEntitySig().entityBitSet) << endl;
//
// temp_ptr = temp_ptr->getFather(*(this->node_buffer));
// while (temp_ptr != NULL)
// {
// _ss << "line=" << temp_ptr->getFileLine() << endl;
// _ss << Signature::BitSet2str(temp_ptr->getEntry().getEntitySig().entityBitSet) << endl;
// temp_ptr = temp_ptr->getFather(*(this->node_buffer));
// }
// Database::log(_ss.str());
// }
vector<BasicQuery*>& queryList = _query.getBasicQueryVec();
// enumerate each BasicQuery and retrieve their variables' mapping entity in the VSTree.
vector<BasicQuery*>::iterator iter=queryList.begin();
@ -88,7 +116,15 @@ void VSTree::retrieve(SPARQLquery& _query)
{
std::stringstream _ss;
_ss << "candidate num: " << idListPtr->size() << endl;
//_ss << (idListPtr->isExistID(4000001)?"true":"false") <<endl;
// if (i == 0)
// {
// for (int j=0;j<idListPtr->size();j++)
// _ss << idListPtr->getID(j) << " ";
// }
_ss << endl;
_ss << "isExist 473738: " << (idListPtr->isExistID(473738)?"true":"false") <<endl;
_ss << "isExist 473472: " << (idListPtr->isExistID(473472)?"true":"false") <<endl;
_ss << "isExist 473473: " << (idListPtr->isExistID(473473)?"true":"false") <<endl;
Database::log(_ss.str());
}
@ -156,6 +192,7 @@ bool VSTree::buildTree(std::string _entry_file_path)
{
stringstream _ss;
_ss << "tree height: " << this->getHeight() << endl;
_ss << "node num: " << this->node_num << endl;
Database::log(_ss.str());
}
@ -205,13 +242,28 @@ bool VSTree::updateEntry(int _entity_id, const EntityBitSet& _bitset)
for (int i=0;i<childNum;i++)
{
const SigEntry& entry = leafNodePtr->getChildEntry(i);
if (entry.getEntityId() == _entity_id)
{
SigEntry newEntry = entry;
newEntry |= SigEntry(EntitySig(_bitset), _entity_id);
//debug
// {
// if (_entity_id == 10)
// {
// stringstream _ss;
// _ss << "lead node line: " << leafNodePtr->getFileLine() << endl;
// _ss << "old entry:\n " << Signature::BitSet2str(entry.getEntitySig().entityBitSet) << endl;
// _ss << "new entry:\n " << Signature::BitSet2str(newEntry.getEntitySig().entityBitSet) << endl;
// Database::log(_ss.str());
// }
// }
leafNodePtr->setChildEntry(i, newEntry);
leafNodePtr->refreshAncestorSignature(*(this->node_buffer));
findFlag = true;
break;
}
}
@ -298,12 +350,30 @@ bool VSTree::insertEntry(const SigEntry& _entry)
{
/* if the choosed leaf node to insert is full, the node should be split.*/
this->split(choosedNodePtr, _entry, NULL);
//debug
// if (!choosedNodePtr->checkState())
// {
// stringstream _ss;
// _ss << "node " << choosedNodePtr->getFileLine() << " childFileLine error. after split" << endl;
// Database::log(_ss.str());
// }
}
else
{
choosedNodePtr->addChildEntry(_entry, false);
choosedNodePtr->refreshAncestorSignature(*(this->node_buffer));
//debug
// if (!choosedNodePtr->checkState())
// {
// stringstream _ss;
// _ss << "node " << choosedNodePtr->getFileLine() << " childFileLine error. after addChildEntry" << endl;
// _ss <<"child num=" << choosedNodePtr->getChildNum() << endl;
// _ss <<"node num=" << this->node_num << " entry num=" << this->entry_num << endl;
// Database::log(_ss.str());
// }
// update the entityID2FileLineMap.
this->entityID2FileLineMap[_entry.getEntityId()] = choosedNodePtr->getFileLine();
}
@ -377,6 +447,12 @@ bool VSTree::loadTree()
bool flag = this->loadTreeInfo();
//debug
{
stringstream _ss;
_ss << "tree node num: " << this->node_num << endl;
Database::log(_ss.str());
}
if (flag)
{
this->node_buffer->loadCache(VSTree::tree_node_file_path);
@ -690,6 +766,20 @@ void VSTree::split(VNode* _p_node_being_split, const SigEntry& _insert_entry, VN
}
}
//debug
// if (!oldNodePtr->checkState())
// {
// stringstream _ss;
// _ss << "node " << oldNodePtr->getFileLine() << " childFileLine error. oldNode when split" << endl;
// Database::log(_ss.str());
// }
// if (!newNodePtr->checkState())
// {
// stringstream _ss;
// _ss << "node " << newNodePtr->getFileLine() << " childFileLine error. newNode when split" << endl;
// Database::log(_ss.str());
// }
// update the entityID2FileLineMap by these two nodes.
this->updateEntityID2FileLineMap(oldNodePtr);
this->updateEntityID2FileLineMap(newNodePtr);

View File

@ -119,6 +119,10 @@ int main(int argc, char * argv[])
ResultSet _rs;
_db.query(query, _rs);
//test...
// std::string answer_file = query_file+".out";
// util::save_to_file(answer_file.c_str(), _rs.to_str());
}
return 0;

View File

@ -16,12 +16,12 @@ int main(int argc, char * argv[])
{
std::stringstream ss(argv[1]);
ss >> port;
std::cout << "port=" << port << std::endl; //debug
}
Server server(port);
std::cout << "port=" << port << std::endl; //debug
server.createConnection();
server.listen();

23
main/insert_test.cpp Normal file
View File

@ -0,0 +1,23 @@
/*
* insert_test.cpp
*
* Created on: 2014-12-03
* Author: Caesar11
*/
#include<iostream>
#include "../Database/Database.h"
using namespace std;
int main(int argc, char * argv[])
{
string db_folder = string(argv[1]);
string insert_rdf_file = string(argv[2]);
Database _db(db_folder);
_db.insert(insert_rdf_file);
return 0;
}