gStore/VSTree/VSTree.cpp

1071 lines
33 KiB
C++
Raw Normal View History

/*
* VSTREE.cpp
*
* Created on: 2014-6-20
* Author: liyouhuan
*/
#include"VSTree.h"
#include<stdio.h>
#include<queue>
#include"../Database/Database.h"
#include"../Signature/Signature.h"
#include<algorithm>
#include<vector>
#include<iostream>
using namespace std;
string VSTree::tree_file_foler_path;
string VSTree::tree_node_file_path; // to be determine
string VSTree::tree_info_file_path; // to be determine
VSTree::VSTree(std::string _store_path)
{
this->height = 0;
this->node_num = 0;
this->entry_num = 0;
this->root_file_line = 0;
this->entry_buffer = NULL;
this->node_buffer = NULL;
/* set the store path */
VSTree::tree_file_foler_path = _store_path;
VSTree::tree_node_file_path = VSTree::tree_file_foler_path + "/tree_node_file.dat";
VSTree::tree_info_file_path = VSTree::tree_file_foler_path + "/tree_info_file.dat";
}
VSTree::~VSTree()
{
delete this->node_buffer;
delete this->entry_buffer;
}
int VSTree::getHeight()const
{
return this->height;
}
/* get the tree's root node pointer. */
VNode* VSTree::getRoot()
{
return (this->node_buffer)->get(this->root_file_line);
}
/* get the node pointer by its file line. */
VNode* VSTree::getNode(int _line)
{
if (_line >= this->node_num)
{
cerr << "error, the parameter:_line is bigger than the tree node_num. @VSTree::getNode" << endl;
return NULL;
}
return this->node_buffer->get(_line);
}
/* retrieve candidate result set by the var_sig in the _query. */
void VSTree::retrieve(SPARQLquery& _query)
{
Database::log("IN retrieve");
vector<BasicQuery*>& queryList = _query.getBasicQueryVec();
// enumerate each BasicQuery and retrieve their variables' mapping entity in the VSTree.
vector<BasicQuery*>::iterator iter=queryList.begin();
for ( ;iter!=queryList.end();iter++)
{
int varNum = (*iter)->getVarNum();
for (int i=0;i<varNum;i++)
{
//debug
{
std::stringstream _ss;
_ss << "retrieve of var: " << i << endl;
Database::log(_ss.str());
}
const EntityBitSet& entityBitSet = (*iter)->getVarBitSet(i);
IDList* idListPtr = &( (*iter)->getCandidateList(i) );
this->retrieveEntity(entityBitSet, idListPtr);
//debug
{
std::stringstream _ss;
_ss << "candidate num: " << idListPtr->size() << endl;
//_ss << (idListPtr->isExistID(4000001)?"true":"false") <<endl;
Database::log(_ss.str());
}
}
}
Database::log("OUT retrieve");
}
/* build the VSTree from the _entity_signature_file. */
bool VSTree::buildTree(std::string _entry_file_path)
{
Database::log("IN VSTree::buildTree");
// create the entry buffer and node buffer.
this->entry_buffer = new EntryBuffer(EntryBuffer::DEFAULT_CAPACITY);
this->node_buffer = new LRUCache(LRUCache::DEFAULT_CAPACITY);
// create the root node.
VNode* rootNodePtr = new VNode();
rootNodePtr->setAsRoot(true);
rootNodePtr->setAsLeaf(true);
rootNodePtr->setFileLine(this->root_file_line);
this->node_buffer->set(this->root_file_line, rootNodePtr);
this->node_num ++;
this->height ++;
/* when building a new VSTree,
* we should first create a new tree node file as the external storage
* of the node buffer on hard disk.*/
this->node_buffer->createCache(VSTree::tree_node_file_path);
FILE* filePtr = fopen(_entry_file_path.c_str(), "rb");
if (filePtr == NULL)
{
cerr << "error, can not open file. @VSTree::buildTree" << endl;
return false;
}
/* load the entry file to entry buffer in memory, when the entry buffer is full,
insert them into the tree. */
int n;
n = this->entry_buffer->fillElemsFromFile(filePtr);
while (n != 0)
{
for (int i=0;i<n;i++)
{
SigEntry* entryPtr = this->entry_buffer->getElem(i);
/* the most important part of this function */
this->insertEntry(*entryPtr);
/* insertEntry one by one */
}
n = this->entry_buffer->fillElemsFromFile(filePtr);
}
//debug
Database::log("insert entries to tree done.");
//bool flag = this->node_buffer->flush();
bool flag = this->saveTree();
//debug
{
stringstream _ss;
_ss << "tree height: " << this->getHeight() << endl;
Database::log(_ss.str());
}
Database::log("OUT VSTree::buildTree");
//debug
// {
// Database::log(this->to_str());
// Database::log("\n\n\n");
// }
return flag;
}
bool VSTree::deleteTree()
{
this->height = 0;
this->node_num = 0;
this->entry_num = 0;
this->root_file_line = 0;
delete this->node_buffer;
delete this->entry_buffer;
// backup the tree data file.
if (rename(VSTree::tree_file_foler_path.c_str(), (VSTree::tree_file_foler_path+"_bak").c_str()) == 0)
return true;
else
return false;
}
/* Incrementally update bitset of _entity_id
* conduct OR operation on Entry(_entity_id)'s EntityBitSet with _bitset
* Entry of _entity_id must exists */
bool VSTree::updateEntry(int _entity_id, const EntityBitSet& _bitset)
{
VNode* leafNodePtr = this->getLeafNodeByEntityID(_entity_id);
if (leafNodePtr == NULL)
{
cerr << "error, can not find the mapping leaf node. @VSTree::updateEntry" << endl;
return false;
}
// find the mapping child entry, update it and refresh signature.
int childNum = leafNodePtr->getChildNum();
bool findFlag = false;
for (int i=0;i<childNum;i++)
{
const SigEntry& entry = leafNodePtr->getChildEntry(i);
if (entry.getEntityId() == _entity_id)
{
SigEntry newEntry = entry;
newEntry |= SigEntry(EntitySig(_bitset), _entity_id);
leafNodePtr->setChildEntry(i, newEntry);
leafNodePtr->refreshAncestorSignature(*(this->node_buffer));
findFlag = true;
break;
}
}
if (!findFlag)
{
cerr<< "error, can not find the mapping child entry in the leaf node. @VSTree::updateEntry" << endl;
return false;
}
return true;
}
/* Replace the Entry(_enitty_id)'s EntityBitSet with _bitset
* Entry of _entity_id must exists */
bool VSTree::replaceEntry(int _entity_id, const EntityBitSet& _bitset)
{
VNode* leafNodePtr = this->getLeafNodeByEntityID(_entity_id);
if (leafNodePtr == NULL)
{
cerr << "error, can not find the mapping leaf node. @VSTree::replaceEntry" << endl;
return false;
}
// find the mapping child entry, update it and refresh signature.
int childNum = leafNodePtr->getChildNum();
bool findFlag = false;
for (int i=0;i<childNum;i++)
{
const SigEntry& entry = leafNodePtr->getChildEntry(i);
if (entry.getEntityId() == _entity_id)
{
SigEntry newEntry(EntitySig(_bitset), _entity_id);
leafNodePtr->setChildEntry(i, newEntry);
leafNodePtr->refreshAncestorSignature(*(this->node_buffer));
findFlag = true;
break;
}
}
if (!findFlag)
{
cerr << "error, can not find the mapping child entry in the leaf node. @VSTree::replaceEntry" << endl;
return false;
}
return true;
}
/* insert an new Entry, whose entity doesn't exist before */
bool VSTree::insertEntry(const SigEntry& _entry)
{
/* choose the best leaf node to insert the _entry */
VNode* choosedNodePtr = this->chooseNode(this->getRoot(), _entry);
//debug
// {
// if (_entry.getEntityId() == 4000001)
// {
// stringstream _ss;
// if (choosedNodePtr)
// {
// _ss << "insert " << _entry.getEntityId()
// << " into [" << choosedNodePtr->getFileLine() << "],\t";
// _ss << "whose childnum is " << choosedNodePtr->getChildNum() << endl;
// }
// else
// {
// _ss << "insert " << _entry.getEntityId() << " , can not choose a leaf node to insert entry. @VSTree::insert" << endl;
// }
// Database::log(_ss.str());
// }
// }
if (choosedNodePtr == NULL)
{
cerr << "error, can not choose a leaf node to insert entry. @VSTree::insert" << endl;
return false;
}
if (choosedNodePtr->isFull())
{
/* if the choosed leaf node to insert is full, the node should be split.*/
this->split(choosedNodePtr, _entry, NULL);
}
else
{
choosedNodePtr->addChildEntry(_entry, false);
choosedNodePtr->refreshAncestorSignature(*(this->node_buffer));
// update the entityID2FileLineMap.
this->entityID2FileLineMap[_entry.getEntityId()] = choosedNodePtr->getFileLine();
}
this->entry_num ++;
return true;
}
/* remove an existed Entry(_entity_id) from VSTree */
bool VSTree::removeEntry(int _entity_id)
{
VNode* leafNodePtr = this->getLeafNodeByEntityID(_entity_id);
if (leafNodePtr == NULL)
{
cerr<< "error, can not find the mapping leaf node. @VSTree::removeEntry" << endl;
return false;
}
// seek the entry index of the leaf node.
int entryIndex = -1;
int childNum = leafNodePtr->getChildNum();
for (int i=0;i<childNum;i++)
{
if (leafNodePtr->getChildEntry(i).getEntityId() == _entity_id)
{
entryIndex = i;
break;
}
}
if (entryIndex == -1)
{
cerr << "error, can not find the entry in leaf node. @VSTree::removeEntry" << endl;
return false;
}
// remove the entry in this leaf node and refresh itself and its ancestors' signature.
leafNodePtr->removeChild(entryIndex);
leafNodePtr->refreshAncestorSignature(*(this->node_buffer));
this->entry_num --;
/* we do not consider the situation which the leaf node is to be empty by now...
* in a better way, if the leaf node is empty after removing entry, we should delete it. and recursively judge whether its
* father is empty, and delete its father node if true. to make the VSTree more balanced, we should combine two nodes if
* their child number are less than the MIN_CHILD_NUM. when deleting one node from the tree, we should also remove it from
* tree node file in hard disk by doing some operations on the node_buffer.
*/
return true;
}
/* save the tree information to tree_info_file_path, and flush the tree nodes in memory to tree_node_file_path. */
bool VSTree::saveTree()
{
bool flag = this->saveTreeInfo();
if (flag)
{
flag = this->node_buffer->flush();
}
return flag;
}
bool VSTree::loadTree()
{
cout << "loadTree..." << endl;
(this->node_buffer) = new LRUCache(LRUCache::DEFAULT_CAPACITY);
bool flag = this->loadTreeInfo();
if (flag)
{
this->node_buffer->loadCache(VSTree::tree_node_file_path);
cout << "finish loadCache" << endl;
}
if (flag)
{
flag = loadEntityID2FileLineMap();
cout << "finish loadEntityID2FileLineMap" << endl;
}
return flag;
}
/* choose the best leaf node to insert the _entry,
* return the choosed leaf node's pointer.
* Recursion function! */
VNode* VSTree::chooseNode(VNode* _p_node, const SigEntry& _entry)
{
if (_p_node->isLeaf())
{
return _p_node;
}
else
{
int minDis = Signature::ENTITY_SIG_LENGTH + 1;
int candidateIndex[VNode::MAX_CHILD_NUM];
int candidateNum = 0;
int childNum = _p_node->getChildNum();
for (int i=0;i<childNum;i++)
{
int curDis = _p_node->getChildEntry(i).xEpsilen(_entry);
if (minDis >= curDis)
{
if (minDis > curDis)
{
minDis = curDis;
candidateNum = 0;
}
candidateIndex[candidateNum ++] = i;
}
}
minDis = Signature::ENTITY_SIG_LENGTH + 1;
VNode* ret = NULL;
for (int i=0;i<candidateNum;i++)
{
int child_i = candidateIndex[i];
VNode* p_child = _p_node->getChild(child_i, *(this->node_buffer));
/* Recursion */
VNode *candidateLeafPtr = this->chooseNode(p_child, _entry);
int curDis = candidateLeafPtr->getEntry().xEpsilen(_entry);
if (curDis == 0)
{
return candidateLeafPtr;
}
if (minDis > curDis)
{
minDis = curDis;
ret = candidateLeafPtr;
}
}
return ret;
}
}
void VSTree::split(VNode* _p_node_being_split, const SigEntry& _insert_entry, VNode* _p_insert_node)
{
//debug
// {
// stringstream _ss;
// _ss << "**********************split happen at "
// << _p_node_being_split->getFileLine() << endl;
// _ss << _p_node_being_split->to_str() << endl;
// Database::log(_ss.str());
// }
// first, add the new child node(if not leaf) or child entry(if leaf) to the full node.
bool just_insert_entry = (_p_insert_node == NULL);
if (just_insert_entry)
{
_p_node_being_split->addChildEntry(_insert_entry, true);
}
else
{
_p_node_being_split->addChildNode(_p_insert_node, true);
}
SigEntry entryA, entryB;
/* two seeds to generate two new nodes.
* seedA kernel: the SigEntry with the minimal count of signature.
* seedB kernel: the SigEntry with the second minimal count of signature.
* */
int minCount = 0; // record the minimal signature count.
int entryA_index = 0; // record the seedA kernel index.
for (int i=0;i<VNode::MAX_CHILD_NUM;i++)
{
int currentCount = (int) _p_node_being_split->getChildEntry(i).getSigCount();
if (minCount < currentCount)
{
minCount = currentCount;
entryA_index = i;
}
}
entryA = _p_node_being_split->getChildEntry(entryA_index);
minCount = 0;
int entryB_index = 0; // record the seedB kernel index.
for (int i=0;i<VNode::MAX_CHILD_NUM;i++)
{
int currentCount = entryA.xEpsilen(_p_node_being_split->getChildEntry(i));
if (i != entryA_index && minCount <= currentCount)
{
minCount = currentCount;
entryB_index = i;
}
}
entryB = _p_node_being_split->getChildEntry(entryB_index);
// AEntryIndex: the entry index near seedA.
// BEntryIndex: the entry index near seedB.
std::vector<int> entryIndex_nearA, entryIndex_nearB;
entryIndex_nearA.clear();
entryIndex_nearB.clear();
entryIndex_nearA.push_back(entryA_index);
entryIndex_nearB.push_back(entryB_index);
/* just tmp variables, for more readibility */
int nearA_max_size, nearB_max_size;
bool nearA_tooSmall, nearB_tooSmall;
for (int i=0;i<VNode::MAX_CHILD_NUM;i++)
{
if (i == entryA_index || i == entryB_index) continue;
/* should guarantee that each new node has at least MIN_CHILD_NUM children. */
nearA_max_size = VNode::MAX_CHILD_NUM - entryIndex_nearB.size();
nearA_tooSmall = (nearA_max_size <= VNode::MIN_CHILD_NUM);
if (nearA_tooSmall)
{
for (;i<VNode::MAX_CHILD_NUM;i++)
{
if (i == entryA_index || i == entryB_index) continue;
entryIndex_nearA.push_back(i);
}
break;
}
nearB_max_size = VNode::MAX_CHILD_NUM - entryIndex_nearA.size();
nearB_tooSmall = (nearB_max_size <= VNode::MIN_CHILD_NUM);
if (nearB_tooSmall)
{
for (;i<VNode::MAX_CHILD_NUM;i++)
{
if (i == entryA_index || i == entryB_index) continue;
entryIndex_nearB.push_back(i);
}
break;
}
/* calculate the distance from
* the i-th child entry signature to seedA(or seedB).*/
/*debug target 1*/
int disToSeedA = entryA.xEpsilen(_p_node_being_split->getChildEntry(i));
int disToSeedB = entryB.xEpsilen(_p_node_being_split->getChildEntry(i));
// choose the near one seed to add into
if (disToSeedA <= disToSeedB)
{
entryIndex_nearA.push_back(i);
}
else
{
entryIndex_nearB.push_back(i);
}
}
// then create a new node to act as BEntryIndex's father.
VNode* newNodePtr = this->createNode();
//debug
// {
// stringstream _ss;
// _ss << "new Node is :[" << newNodePtr->getFileLine() << "]" << endl;
// Database::log(_ss.str());
// }
// the old one acts as AEntryIndex's father.
VNode* oldNodePtr = _p_node_being_split;
// if the old node is leaf, set the new node as a leaf.
if (oldNodePtr->isLeaf())
{
newNodePtr->setAsLeaf(true);
}
/* add all the entries in BEntryIndex into the new node child entry array,
and calculate the new node's entry.*/
for (int i=0;i<entryIndex_nearB.size();i++)
{
if (oldNodePtr->isLeaf())
{
newNodePtr->addChildEntry(oldNodePtr->getChildEntry(entryIndex_nearB[i]), false);
}
else
{
/*debug target 2*/
VNode* childPtr = oldNodePtr->getChild(entryIndex_nearB[i], *(this->node_buffer));
newNodePtr->addChildNode(childPtr);
}
}
newNodePtr->refreshSignature();
/* label the child being removed with -1,
* and update the old node's entry.*/
std::sort(entryIndex_nearA.begin(), entryIndex_nearA.end(), less<int>());
//debug
// {
// stringstream _ss;
// {
// _ss << "nearA: ";
// for(int i = 0; i < entryIndex_nearA.size(); i ++)
// {
// _ss << entryIndex_nearA[i] << " ";
// }
// _ss << endl;
//
// _ss << "nearB: ";
// for(int i = 0; i < entryIndex_nearB.size(); i ++)
// {
// _ss << entryIndex_nearB[i] << " ";
// }
// _ss << endl;
// }
// Database::log(_ss.str());
// }
for (int i=0;i<entryIndex_nearA.size();i++)
{
oldNodePtr->setChildEntry(i, oldNodePtr->getChildEntry(entryIndex_nearA[i]));
oldNodePtr->setChildFileLine(i, oldNodePtr->getChildFileLine(entryIndex_nearA[i]));
}
oldNodePtr->setChildNum(entryIndex_nearA.size());
oldNodePtr->refreshSignature();
int oldNode_index = oldNodePtr->getIndexInFatherNode(*(this->node_buffer));
// full node's father pointer.
VNode* oldNodeFatherPtr = oldNodePtr->getFather(*(this->node_buffer));
if (oldNodePtr->isRoot())
{
/* if the old node is root,
* split the root, create a new root,
* and the tree height will be increased.*/
VNode* RootNewPtr = this->createNode();
/* change the old root node to not-root node,
* and set the RootNew to root node.*/
oldNodePtr->setAsRoot(false);
RootNewPtr->setAsRoot(true);
/* set the split two node(old node and new node) as the new root's child,
* and update signatures.*/
RootNewPtr->addChildNode(oldNodePtr);
RootNewPtr->addChildNode(newNodePtr);
RootNewPtr->refreshSignature();
//debug
// {
// stringstream _ss;
// _ss << "create new root:" << endl;
// _ss << "before swap file line, two sons are: " << oldNodePtr->getFileLine() << " " << newNodePtr->getFileLine() << endl;
// Database::log(_ss.str());
// }
/* should keep the root node always being
* at the first line(line zero) of the tree node file.*/
this->swapNodeFileLine(RootNewPtr, oldNodePtr);
this->height ++;
//debug
// {
// stringstream _ss;
// _ss << "create new root:" << endl;
// _ss << "two sons are: " << oldNodePtr->getFileLine() << " " << newNodePtr->getFileLine() << endl;
// _ss << Signature::BitSet2str(oldNodePtr->getEntry().getEntitySig().entityBitSet) << endl;
// _ss << RootNewPtr->to_str() << endl;
// Database::log(_ss.str());
// }
}
else
{
/* if the (OldNode) is not Root,
* change the old node's signature to A's signature.*/
oldNodeFatherPtr->setChildEntry(oldNode_index, oldNodePtr->getEntry());
if (oldNodeFatherPtr->isFull())
{
oldNodeFatherPtr->refreshAncestorSignature(*(this->node_buffer));
this->split(oldNodeFatherPtr, newNodePtr->getEntry(), newNodePtr);
}
else
{
oldNodeFatherPtr->addChildNode(newNodePtr);
oldNodeFatherPtr->refreshAncestorSignature(*(this->node_buffer));
}
}
// update the entityID2FileLineMap by these two nodes.
this->updateEntityID2FileLineMap(oldNodePtr);
this->updateEntityID2FileLineMap(newNodePtr);
}
/* create a new node when one node need splitting. */
VNode* VSTree::createNode()
{
VNode* newNodePtr = new VNode();
newNodePtr->setFileLine(this->node_num);
this->node_buffer->set(this->node_num, newNodePtr);
this->node_num ++;
return newNodePtr;
}
/* swap two nodes' file line, their related nodes(father and children nodes) will also be updated. */
void VSTree::swapNodeFileLine(VNode* _p_node_a, VNode* _p_node_b)
{
int oldNodeAFileLine = _p_node_a->getFileLine();
int oldNodeBFileLine = _p_node_b->getFileLine();
int newNodeAFileLine = oldNodeBFileLine;
int newNodeBFileLine = oldNodeAFileLine;
// at first, we should get their fathers' and children's pointer.
VNode* nodeAFatherPtr = _p_node_a->getFather(*(this->node_buffer));
int nodeARank = _p_node_a->getIndexInFatherNode(*(this->node_buffer));
VNode* nodeBFatherPtr = _p_node_b->getFather(*(this->node_buffer));
int nodeBRank = _p_node_b->getIndexInFatherNode(*(this->node_buffer));
VNode* nodeAChildPtr[VNode::MAX_CHILD_NUM];
VNode* nodeBChildPtr[VNode::MAX_CHILD_NUM];
int nodeAChildNum = _p_node_a->getChildNum();
int nodeBChildNum = _p_node_b->getChildNum();
for (int i=0;i<nodeAChildNum;i++)
{
nodeAChildPtr[i] = _p_node_a->getChild(i, *(this->node_buffer));
}
for (int i=0;i<nodeBChildNum;i++)
{
nodeBChildPtr[i] = _p_node_b->getChild(i, *(this->node_buffer));
}
// update nodes self file line.
_p_node_a->setFileLine(newNodeAFileLine);
_p_node_b->setFileLine(newNodeBFileLine);
// update nodes' fathers' child file line.
if (!_p_node_a->isRoot())
{
nodeAFatherPtr->setChildFileLine(nodeARank, newNodeAFileLine);
}
if (!_p_node_b->isRoot())
{
nodeBFatherPtr->setChildFileLine(nodeBRank, newNodeBFileLine);
}
// update nodes' children's father file line.
if (!_p_node_a->isLeaf())
{
for (int i=0;i<nodeAChildNum;i++)
{
nodeAChildPtr[i]->setFatherFileLine(newNodeAFileLine);
}
}
if (!_p_node_b->isLeaf())
{
for (int i=0;i<nodeBChildNum;i++)
{
nodeBChildPtr[i]->setFatherFileLine(newNodeBFileLine);
}
}
// update the node_buffer.
this->node_buffer->update(newNodeAFileLine, _p_node_a);
this->node_buffer->update(newNodeBFileLine, _p_node_b);
}
/* save VSTree's information to tree_info_file_path, such as node_num, entry_num, height, etc. */
bool VSTree::saveTreeInfo()
{
FILE* filePtr = fopen(VSTree::tree_info_file_path.c_str(), "wb");
if (filePtr == NULL)
{
cerr << "error, can not create tree info file. @VSTree::saveTreeInfo" << endl;
return false;
}
fseek(filePtr, 0, SEEK_SET);
fwrite(&this->node_num, sizeof(int), 1, filePtr);
fwrite(&this->root_file_line, sizeof(int), 1, filePtr);
fwrite(&this->height, sizeof(int), 1, filePtr);
int minChildNum = VNode::MIN_CHILD_NUM;
fwrite(&minChildNum, sizeof(int), 1, filePtr);
int maxChildNum = VNode::MAX_CHILD_NUM;
fwrite(&maxChildNum, sizeof(int), 1, filePtr);
int nodeSize = sizeof(VNode);
fwrite(&nodeSize,sizeof(int), 1, filePtr);
int sigLength = Signature::ENTITY_SIG_LENGTH;
fwrite(&sigLength, sizeof(int), 1, filePtr);
fwrite(&this->entry_num, sizeof(int), 1, filePtr);
int nodeBufferSize = this->node_buffer->getCapacity();
fwrite(&nodeBufferSize, sizeof(int), 1, filePtr);
fclose(filePtr);
return true;
}
/* load VSTree's information from tree_info_file_path. */
bool VSTree::loadTreeInfo()
{
FILE* filePtr = fopen(VSTree::tree_info_file_path.c_str(), "rb");
if (filePtr == NULL)
{
cerr << "error, can not open tree file:[" <<
VSTree::tree_info_file_path <<
"]@VSTree::loadTreeInfo" << endl;
return false;
}
fseek(filePtr, 0, SEEK_SET);
int tmp;
fread(&this->node_num, sizeof(int), 1, filePtr);
fread(&this->root_file_line, sizeof(int), 1, filePtr);
fread(&this->height, sizeof(int), 1, filePtr);
fread(&tmp, sizeof(int), 1, filePtr);
fread(&tmp, sizeof(int), 1, filePtr);
fread(&tmp,sizeof(int), 1, filePtr);
int sigLength = Signature::ENTITY_SIG_LENGTH;
fread(&sigLength, sizeof(int), 1, filePtr);
if (sigLength > Signature::ENTITY_SIG_LENGTH)
{
cerr << "WARNING: signature length is too short. @VSTree::loadTreeInfo" << endl;
}
fread(&this->entry_num, sizeof(int), 1, filePtr);
int nodeBufferSize = this->node_buffer->getCapacity();
fread(&nodeBufferSize, sizeof(int), 1, filePtr);
if (nodeBufferSize > this->node_buffer->getCapacity())
{
cerr << "WARNING: node buffer size may be too small. @VSTree::loadTreeInfo" << endl;
}
fclose(filePtr);
return true;
}
/* traverse the tree_node_file_path file, load the mapping from entity id to file line. */
bool VSTree::loadEntityID2FileLineMap()
{
FILE* filePtr = fopen(VSTree::tree_node_file_path.c_str(), "rb");
if (filePtr == NULL)
{
cerr << "error, can not open tree node file. @VSTree::loadEntityID2FileLineMap" << endl;
return false;
}
size_t vNodeSize = sizeof(VNode);
int flag = 0;
flag = fseek(filePtr, 0, SEEK_SET);
if (flag != 0)
{
cerr << "error,can't seek to the fileLine. @VSTree::loadEntityID2FileLineMap" << endl;
return false;
}
this->entityID2FileLineMap.clear();
VNode* nodePtr = new VNode();
int cycle_count = 0;
while (!feof(filePtr))
{
bool is_node_read = (fread((char *)nodePtr,vNodeSize,1,filePtr) == 1);
if (is_node_read)
{
this->updateEntityID2FileLineMap(nodePtr);
//debug
{
stringstream _ss;
if (cycle_count != nodePtr->getFileLine())
{
_ss << "line=" << cycle_count << " nodeLine=" << nodePtr->getFileLine() << endl;
Database::log(_ss.str());
}
}
cycle_count ++;
}
}
delete nodePtr;
fclose(filePtr);
return true;
}
/* update the entityID2FileLineMap with the _p_node's child entries, the _p_node should be leaf node. */
void VSTree::updateEntityID2FileLineMap(VNode* _p_node)
{
if (_p_node->isLeaf())
{
int line = _p_node->getFileLine();
int childNum = _p_node->getChildNum();
for (int i=0;i<childNum;i++)
{
// update all this node's child entries' entityID to file line mapping.
const SigEntry& entry = _p_node->getChildEntry(i);
int entityID = entry.getEntityId();
this->entityID2FileLineMap[entityID] = line;
//debug
{
if (entityID == 4000001)
{
Database::log("entity(4000001) found in leaf node!!!");
}
}
}
}
}
/* get the leaf node pointer by the given _entityID */
VNode* VSTree::getLeafNodeByEntityID(int _entityID)
{
map<int,int>::iterator iter = this->entityID2FileLineMap.find(_entityID);
if (iter == this->entityID2FileLineMap.end())
{
cerr << "error,can not find the _entityID's mapping fileLine. @VSTree::getLeafNodeByEntityID" << endl;
return NULL;
}
int line = iter->second;
return this->getNode(line);
}
/* retrieve the candidate entity ID which signature can cover the_entity_bit_set, and add them to the _p_id_list. */
void VSTree::retrieveEntity(const EntityBitSet& _entity_bit_set, IDList* _p_id_list)
{
Database::log("IN retrieveEntity");
EntitySig filterSig(_entity_bit_set);
std::queue<int>nodeFileFileQueue; //searching node file line queue.
//debug
{
stringstream _ss;
_ss << "filterSig=" << Signature::BitSet2str(filterSig.entityBitSet) << endl;
Database::log(_ss.str());
}
const SigEntry& root_entry = (this->getRoot())->getEntry();
Database::log("Get Root Entry");
if(root_entry.cover(filterSig))
{
nodeFileFileQueue.push(this->getRoot()->getFileLine());
Database::log("root cover the filter_sig");
}
else
{
Database::log("warning: root is not cover the filter_sig");
}
//debug
// {
// Database::log(this->getRoot()->to_str());
// Database::log("Before BFS");
// }
/* using BFS algorithm to traverse the VSTree and retrieve the entry.*/
while (!nodeFileFileQueue.empty())
{
int currentNodeFileLine = nodeFileFileQueue.front();
nodeFileFileQueue.pop();
VNode* currentNodePtr = this->getNode(currentNodeFileLine);
int childNum = currentNodePtr->getChildNum();
//debug
// {
// std::stringstream _ss;
// _ss << "childNum of ["
// << currentNodePtr->getFileLine()
// << "] is " << childNum << endl;
//
// for (int i=0;i<childNum;i++)
// {
// _ss << currentNodePtr->getChildFileLine(i) << " ";
// }
// _ss << endl;
//
// Database::log(_ss.str());
// }
for (int i=0;i<childNum;i++)
{
const SigEntry& entry = currentNodePtr->getChildEntry(i);
if (entry.cover(filterSig))
{
if (currentNodePtr->isLeaf())
{
// if leaf node, add the satisfying entries' entity id to result list.
_p_id_list->addID(entry.getEntityId());
//debug
// {
// stringstream _ss;
// _ss << "child_" << i << " cover filter sig" << endl;
// _ss << Signature::BitSet2str(entry.getEntitySig().entityBitSet)<< endl;
// Database::log(_ss.str());
// }
}
else
{
// if non-leaf node, add the child node pointer to the searching queue.
//VNode* childPtr = currentNodePtr->getChild(i, *(this->node_buffer));
// if non-leaf node, add the child node file line to the searching queue.
int childNodeFileLine = currentNodePtr->getChildFileLine(i);
nodeFileFileQueue.push(childNodeFileLine);
//debug
// {
// stringstream _ss;
// _ss << "child[" << childPtr->getFileLine() << "] cover filter sig" << endl;
// Database::log(_ss.str());
// }
}
}
}
}
Database::log("OUT retrieveEntity");
}
std::string VSTree::to_str()
{
//debug
{
stringstream _ss;
_ss << "after build tree, root is:" << endl;
_ss << this->getRoot()->to_str() << endl;
Database::log(_ss.str());
}
std::stringstream _ss;
std::queue<int> nodeFileLineQueue;
nodeFileLineQueue.push(this->getRoot()->getFileLine());
while(! nodeFileLineQueue.empty())
{
int currentNodeFileLine = nodeFileLineQueue.front();
nodeFileLineQueue.pop();
VNode* currentNodePtr = this->getNode(currentNodeFileLine);
_ss << currentNodePtr->to_str();
int childNum = currentNodePtr->getChildNum();
for(int i = 0; i < childNum; i ++)
{
if(! currentNodePtr->isLeaf())
{
int childNodeFileLine = currentNodePtr->getChildFileLine(i);
nodeFileLineQueue.push(childNodeFileLine);
}
}
}
return _ss.str();
}