gStore/VSTree/LRUCache.cpp

576 lines
14 KiB
C++

/*
* LRUCache.cpp
*
* Created on: 2014-6-30
* Author: hanshuo
*/
#include "LRUCache.h"
#include "VNode.h"
using namespace std;
//NOTICE: we aim to support 1 billion triples in a single machine, whose entity num
//can not exceed the 2 billion limit, and the maxium VNODE num is 2000000000/100=20000000=20M
int LRUCache::DEFAULT_CAPACITY = 20000000;
//int LRUCache::DEFAULT_CAPACITY = 1 * 1000 * 1000;
LRUCache::LRUCache(int _capacity)
{
cout << "LRUCache initial..." << endl;
this->capacity = _capacity > 0 ? _capacity : LRUCache::DEFAULT_CAPACITY;
// we should guarantee the cache is big enough.
this->capacity = std::max(this->capacity, VNode::MAX_CHILD_NUM * 2000);
this->next = new int[this->capacity + 2];
this->prev = new int[this->capacity + 2];
//for keys and values, each time we remove one ,we will fill one in the exact position
//(each VNODE is the same size, so swap just one can be ok)
//(if needing to remove, then we should move the last one to here)
this->keys = new int[this->capacity + 2];
this->values = new VNode*[this->capacity + 2];
for (int i = 0; i < this->capacity + 2; ++i)
{
this->values[i] = NULL;
}
//prev and next are used to implement the LRU strategy
this->next[LRUCache::START_INDEX] = LRUCache::END_INDEX;
this->next[LRUCache::END_INDEX] = LRUCache::NULL_INDEX;
this->prev[LRUCache::START_INDEX] = LRUCache::NULL_INDEX;
this->prev[LRUCache::END_INDEX] = LRUCache::START_INDEX;
this->size = 0;
cout << "LRUCache initial finish" << endl;
}
LRUCache::~LRUCache()
{
delete[] this->next;
delete[] this->prev;
delete[] this->keys;
for (int i = 0; i < this->size; ++i)
{
delete this->values[i];
}
delete[] this->values;
}
//load cache's elements from an exist data file.
bool LRUCache::loadCache(string _filePath)
{
this->dataFilePath = _filePath;
FILE* filePtr = fopen(this->dataFilePath.c_str(), "rb");
if (filePtr == NULL)
{
cerr << "error, can not load an exist data file. @LRUCache::loadCache" << endl;
return false;
}
//NOTICE:here we set it to the maxium, to ensure all VNODE in memory
int defaultLoadSize = this->capacity;
//int defaultLoadSize = this->capacity / 2;
size_t vNodeSize = sizeof(VNode);
int flag = 0;
flag = fseek(filePtr, 0, SEEK_SET);
if (flag != 0)
{
cerr << "error,can't seek to the fileLine. @LRUCache::loadCache" << endl;
return false;
}
int _tmp_cycle_count = 0;
while (this->size < defaultLoadSize)
{
VNode* nodePtr = new VNode();
bool is_reach_EOF = feof(filePtr);
bool is_node_read = (fread((char *)nodePtr, vNodeSize, 1, filePtr) == 1);
if (is_reach_EOF || !is_node_read)
{
break;
}
//NOTICE:not consider invalid node
if(nodePtr->getFileLine() < 0)
{
continue;
}
//this->size if the real size, while DEFAULT_NUM is the prefix
//To maintain a double-linked list, the pos 0 is head, while the pos 1 is tail
int pos = LRUCache::DEFAULT_NUM + this->size;
this->setElem(pos, nodePtr->getFileLine(), nodePtr);
//debug
//{
//if (_tmp_cycle_count != nodePtr->getFileLine())
//{
//stringstream _ss;
//_ss << "error file line: " << _tmp_cycle_count << " " << nodePtr->getFileLine() << " " << nodePtr->getChildNum() << endl;
//Util::logging(_ss.str());
//}
//}
_tmp_cycle_count++;
}
fclose(filePtr);
return true;
}
//create a new empty data file, the original one will be overwrite.
bool LRUCache::createCache(string _filePath)
{
this->dataFilePath = _filePath;
FILE* filePtr = fopen(this->dataFilePath.c_str(), "wb");
if (filePtr == NULL)
{
cerr << "error, can not create a new data file. @LRUCache::createCache" << endl;
return false;
}
fclose(filePtr);
return true;
}
//DEBUG+WARN:the memory-disk swap strategy exists serious bugs, however, we do not really use this startegy now!!!
//
//set the key(node's file line) and value(node's pointer). if the key exists now, the value of this key will be overwritten.
bool LRUCache::set(int _key, VNode * _value)
{
map<int, int>::iterator iter = this->key2pos.find(_key);
// if the _key is found, overwrite its mapping value.
if (iter != this->key2pos.end())
{
int pos = iter->second;
this->freeElem(pos);
this->setElem(pos, _key, _value);
}
// if the cache is not full now, just put the key-value to the free slot.
else if (this->size < this->capacity)
{
int pos = LRUCache::DEFAULT_NUM + this->size;
this->setElem(pos, _key, _value);
}
// if the cache is full, should swap out the least recently used one to hard disk.
else
{
cout<<"memory-disk swap hadppened in VSTree - LRUCache"<<endl;
// write out and free the memory of the least recently used one.
int pos = this->next[LRUCache::START_INDEX];
this->writeOut(pos, this->keys[pos]);
this->freeElem(pos);
// set the new one to the memory pool.
this->setElem(pos, _key, _value);
}
return false;
}
//Assume that the node of this key exist in memory now
bool
LRUCache::del(int _key)
{
#ifdef DEBUG
cout<<"to del in LRUCache "<<_key<<endl;
#endif
map<int, int>::iterator iter = this->key2pos.find(_key);
if (iter != this->key2pos.end())
{
int pos1 = iter->second;
int pos2 = LRUCache::DEFAULT_NUM + this->size - 1;
cout<<"pos 1: "<<pos1<<" pos2: "<<pos2<<endl;
if(this->values[pos1]->getFileLine() != _key)
{
cout<<"error in del() - file line not mapping"<<endl;
}
this->fillElem(pos1, pos2);
//NOTICE:we do not need to update the file now
//We only record the freed file_line, and not used now
//When this file_line is allocated again, then the new node can
//be written into the unused file part
//(VNode size is fixed)
return true;
}
return false;
}
//get the value(node's pointer) by key(node's file line).
VNode* LRUCache::get(int _key)
{
VNode* ret = NULL;
map<int, int>::iterator iter = this->key2pos.find(_key);
if (iter != this->key2pos.end())
{
int pos = iter->second;
ret = this->values[pos];
}
// the value is not in memory now, should load it from hard disk.
else if (this->size < this->capacity)
{
int pos = LRUCache::DEFAULT_NUM + this->size;
if (this->readIn(pos, _key))
{
ret = this->values[pos];
}
}
// if the memory pool is full now, should swap out the least recently used one, and swap in the required value.
else
{
cout<<"memory-disk swap hadppened in VSTree - LRUCache"<<endl;
int pos = this->next[LRUCache::START_INDEX];
this->writeOut(pos, this->keys[pos]);
this->freeElem(pos);
if (this->readIn(pos, _key))
{
ret = this->values[pos];
}
}
return ret;
}
//update the _key's mapping _value. if the key do not exist, this operation will fail and return false.
bool LRUCache::update(int _key, VNode* _value)
{
// should swap it into cache first.
VNode* valuePtr = this->get(_key);
if (valuePtr != NULL)
{
int pos = this->key2pos[_key];
//BETTER:remove the below cerr
if (this->keys[pos] != _key)
{
cerr << "error, the pos is wrong. @LRUCache::update" << endl;
return false;
}
this->values[pos] = _value;
return true;
}
cerr << "error:the key not exist!"<<endl;
return false;
}
int LRUCache::getCapacity()
{
return this->capacity;
}
int LRUCache::getRestAmount()
{
return this->capacity - this->size;
}
void LRUCache::showAmount()
{
printf(
"TotalAmount=%d\tUsedAmount=%d\tUsedPercent=%.2f%%\n",
this->capacity, this->size,
(double)this->size / this->capacity * 100.0);
}
bool LRUCache::isFull()
{
return this->size == this->capacity;
}
//put the new visited one to the tail
void LRUCache::refresh(int _pos)
{
int prevPos, nextPos;
prevPos = this->prev[_pos];
nextPos = this->next[_pos];
this->next[prevPos] = nextPos;
this->prev[nextPos] = prevPos;
prevPos = this->prev[LRUCache::END_INDEX];
nextPos = LRUCache::END_INDEX;
this->next[prevPos] = _pos;
this->prev[nextPos] = _pos;
this->next[_pos] = LRUCache::END_INDEX;
this->prev[_pos] = prevPos;
}
//free the memory of the _pos element in cache.
void LRUCache::freeElem(int _pos)
{
if(_pos < LRUCache::DEFAULT_NUM || _pos >= LRUCache::DEFAULT_NUM + this->size)
{
cerr << "error in LRUCache::freeElem() -- invalid pos" << endl;
return;
}
if(this->values[_pos] != NULL)
{
delete this->values[_pos];
this->values[_pos] = NULL;
}
this->key2pos.erase(this->keys[_pos]);
this->keys[_pos] = LRUCache::NULL_INDEX;
// update the double linked list.
int prevPos = this->prev[_pos];
int nextPos = this->next[_pos];
this->next[prevPos] = nextPos;
this->prev[nextPos] = prevPos;
this->next[_pos] = LRUCache::NULL_INDEX;
this->prev[_pos] = LRUCache::NULL_INDEX;
this->size--;
}
//set the memory of the _pos element in cache
void LRUCache::setElem(int _pos, int _key, VNode* _value)
{
this->key2pos[_key] = _pos;
this->keys[_pos] = _key;
this->values[_pos] = _value;
// put the new element to the tail of the linked list.
int prevPos = this->prev[LRUCache::END_INDEX];
int nextPos = LRUCache::END_INDEX;
this->next[prevPos] = _pos;
this->prev[nextPos] = _pos;
this->next[_pos] = LRUCache::END_INDEX;
this->prev[_pos] = prevPos;
//NOTICE: this cannot be placed in loadCache() because this may be called by other functions
this->size++;
}
//move pos2 ele to pos1, and pos1 ele should be freed
void LRUCache::fillElem(int _pos1, int _pos2)
{
cout<<"fill elem in LRUCache() happen"<<endl;
//NOTICE:update to disk, set the node as invalid
this->freeDisk(_pos1);
//NOTICE:size is reduced in freeElem
this->freeElem(_pos1);
if(_pos1 >= _pos2) //0 ele or 1 ele(just remove the only one)
{
cout<<"LRUCache::fillElem() - no need to fill"<<endl;
return;
}
int key = this->keys[_pos2];
cout<<"another key in fillElem() - "<<key<<endl;
if(this->values[_pos2] == NULL)
{
cout<<"error in fillElem() - value for pos2 is NULL"<<endl;
}
this->key2pos[key] = _pos1;
this->keys[_pos1] = key;
this->values[_pos1] = this->values[_pos2];
this->keys[_pos2] = LRUCache::NULL_INDEX;
this->values[_pos2] = NULL;
int prevPos = this->prev[_pos2];
int nextPos = this->next[_pos2];
//QUERY:if pos1 and pos2 are neighbors in prev-next relations
//can this conflict with freeElem?
this->next[prevPos] = _pos1;
this->prev[nextPos] = _pos1;
this->next[_pos1] = nextPos;
this->prev[_pos1] = prevPos;
}
bool
LRUCache::freeDisk(int _pos)
{
VNode* nodePtr = this->values[_pos];
FILE* filePtr = fopen(this->dataFilePath.c_str(), "r+b");
if (nodePtr == NULL)
{
cerr << "error, VNode do not exist. @LRUCache::freeDisk" << endl;
return false;
}
if (filePtr == NULL)
{
cerr << "error, can't open file. @LRUCache::freeDisk" << endl;
return false;
}
size_t vNodeSize = sizeof(VNode);
int line = nodePtr->getFileLine();
int flag = 0;
long long seekPos = (long long)line * vNodeSize;
flag = fseek(filePtr, seekPos, SEEK_SET);
if (flag != 0)
{
cerr << "error, can't seek to the fileLine. @LRUCache::writeOut" << endl;
return false;
}
nodePtr->setFileLine(-1);
fwrite((char *)nodePtr, vNodeSize, 1, filePtr);
fclose(filePtr);
return true;
}
//just write the values[_pos] to the hard disk, the VNode in memory will not be free.
bool
LRUCache::writeOut(int _pos, int _fileLine)
{
VNode* nodePtr = this->values[_pos];
FILE* filePtr = fopen(this->dataFilePath.c_str(), "r+b");
if (nodePtr == NULL)
{
cerr << "error, VNode do not exist. @LRUCache::writeOut" << endl;
return false;
}
if (filePtr == NULL)
{
cerr << "error, can't open file. @LRUCache::writeOut" << endl;
return false;
}
if (nodePtr->getFileLine() != _fileLine)
{
cerr << "error, fileLine " << _fileLine << "wrong. @LRUCache::writeOut" << endl;
}
int line = _fileLine == -1 ? nodePtr->getFileLine() : _fileLine;
size_t vNodeSize = sizeof(VNode);
int flag = 0;
long long seekPos = (long long)line * vNodeSize;
flag = fseek(filePtr, seekPos, SEEK_SET);
if (flag != 0)
{
cerr << "error, can't seek to the fileLine. @LRUCache::writeOut" << endl;
return false;
}
fwrite((char *)nodePtr, vNodeSize, 1, filePtr);
fclose(filePtr);
return true;
}
//read the value from hard disk, and put it to the values[_pos].
//before use it, you must make sure that the _pos element in cache is free(unoccupied).
bool LRUCache::readIn(int _pos, int _fileLine)
{
VNode* nodePtr = new VNode();
FILE* filePtr = fopen(this->dataFilePath.c_str(), "rb");
if (nodePtr == NULL)
{
cerr << "error, can not new a VNode. @LRUCache::readIn" << endl;
return false;
}
if (filePtr == NULL)
{
cerr << "error, can't open " <<
"[" << this->dataFilePath << "]" <<
". @LRUCache::readIn" << endl;
return false;
}
int line = _fileLine;
size_t vNodeSize = sizeof(VNode);
int flag = 0;
long long seekPos = (long long)line * vNodeSize;
flag = fseek(filePtr, seekPos, SEEK_SET);
if (flag != 0)
{
cerr << "error,can't seek to the fileLine. @LRUCache::readIn" << endl;
return false;
}
//bool is_node_read = (fread((char *)nodePtr, vNodeSize, 1, filePtr) == 1);
fread((char *)nodePtr, vNodeSize, 1, filePtr);
fclose(filePtr);
if (nodePtr == NULL || nodePtr->getFileLine() != _fileLine)
{
cerr << "error,node fileLine error. @LRUCache::readIn" << endl;
}
this->setElem(_pos, _fileLine, nodePtr);
return true;
}
//write out all the elements to hard disk.
bool LRUCache::flush()
{
cout<<"to flush in LRUCache"<<endl;
FILE* filePtr = fopen(this->dataFilePath.c_str(), "r+b");
if (filePtr == NULL)
{
cerr << "error, can't open file. @LRUCache::flush" << endl;
return false;
}
int startIndex = LRUCache::DEFAULT_NUM;
int endIndex = startIndex + this->size;
size_t vNodeSize = sizeof(VNode);
//NOTICE:values are continuous
for (int i = startIndex; i < endIndex; ++i)
{
VNode* nodePtr = this->values[i];
int line = this->keys[i];
//cout<<"file line to write "<<line<<endl;
#ifdef DEBUG
if (nodePtr->getFileLine() != line)
{
cout << "line error at !!!" << line << " " << nodePtr->getFileLine() << endl;
}
#endif
if (nodePtr == NULL)
{
cerr << "error, VNode do not exist. @LRUCache::flush" << endl;
return false;
}
int flag = 0;
long long seekPos = (long long)line * vNodeSize;
flag = fseek(filePtr, seekPos, SEEK_SET);
if (flag != 0)
{
cerr << "error, can't seek to the fileLine. @LRUCache::flush" << endl;
return false;
}
fwrite((char *)nodePtr, vNodeSize, 1, filePtr);
}
fclose(filePtr);
return true;
}