1508 lines
32 KiB
C++
1508 lines
32 KiB
C++
/*=============================================================================
|
|
# Filename: Util.cpp
|
|
# Author: Bookug Lobert
|
|
# Mail: 1181955272@qq.com
|
|
# Last Modified: 2015-10-16 10:43
|
|
# Description:
|
|
1. firstly written by liyouhuan, modified by zengli
|
|
2. achieve functions in Util.h
|
|
=============================================================================*/
|
|
|
|
#include "Util.h"
|
|
|
|
using namespace std;
|
|
|
|
//==================================================================================================================
|
|
//configure() to config the basic options of gStore system
|
|
//==================================================================================================================
|
|
|
|
//string Util::profile = "../init.conf";
|
|
string Util::profile = "init.conf";
|
|
|
|
map<string, string> Util::global_config;
|
|
|
|
//database home directory, which is an absolute path by config
|
|
//TODO:everywhere using database, the prefix should be it
|
|
//string Util::db_home = ".";
|
|
|
|
//false:single true:distribute
|
|
//bool Util::gstore_mode = false;
|
|
|
|
//control the debug information
|
|
//string Util::debug_level = "simple";
|
|
|
|
//database placed in which path
|
|
//string Util::db_path = ".";
|
|
|
|
//the suffix to be added to database name
|
|
//string Util::db_suffix = ".db";
|
|
|
|
//the maxium buffer size assigned to gStore system
|
|
//string Util::buffer_maxium = "100"; //the unit is GB
|
|
|
|
//the maxium thread num assigned to gStore system
|
|
//string Util::thread_maxium = "1000";
|
|
|
|
//if record logs in gStore system(to be recoverable or faster)
|
|
//string Util::operation_logs = "true";
|
|
|
|
//==================================================================================================================
|
|
|
|
//NOTICE:used in Database, Join and Strategy
|
|
//int Util::triple_num = 0;
|
|
//int Util::pre_num = 0;
|
|
//int Util::entity_num = 0;
|
|
//int Util::literal_num = 0;
|
|
|
|
//string Util::tmp_path = "../.tmp/";
|
|
//string Util::debug_path = "../.debug/";
|
|
string Util::tmp_path = ".tmp/";
|
|
string Util::debug_path = ".debug/";
|
|
|
|
//QUERY: assign all in Util()?
|
|
//BETTER:assigned in KVstore, not one tree?
|
|
FILE* Util::debug_kvstore = NULL; //used by KVstore
|
|
FILE* Util::debug_database = NULL; //used by Database
|
|
FILE* Util::debug_vstree = NULL; //used by VSTree
|
|
|
|
//set hash table
|
|
HashFunction Util::hash[] = { Util::simpleHash, Util::APHash, Util::BKDRHash, Util::DJBHash, Util::ELFHash, \
|
|
Util::DEKHash, Util::BPHash, Util::FNVHash, Util::HFLPHash, Util::HFHash, Util::JSHash, \
|
|
Util::PJWHash, Util::RSHash, Util::SDBMHash, Util::StrHash, Util::TianlHash, NULL};
|
|
|
|
//remove spaces in the left
|
|
char*
|
|
Util::l_trim(char* szOutput, const char* szInput)
|
|
{
|
|
assert(szInput != NULL);
|
|
assert(szOutput != NULL);
|
|
assert(szOutput != szInput);
|
|
for (; *szInput != '\0' && isspace(*szInput); ++szInput);
|
|
return strcpy(szOutput, szInput);
|
|
}
|
|
|
|
//remove spaces in the right
|
|
char*
|
|
Util::r_trim(char *szOutput, const char* szInput)
|
|
{
|
|
char *p = NULL;
|
|
assert(szInput != NULL);
|
|
assert(szOutput != NULL);
|
|
assert(szOutput != szInput);
|
|
strcpy(szOutput, szInput);
|
|
for(p = szOutput + strlen(szOutput) - 1; p >= szOutput && isspace(*p); --p);
|
|
*(++p) = '\0';
|
|
return szOutput;
|
|
}
|
|
|
|
//remove spaces in the two sides
|
|
char*
|
|
Util::a_trim(char * szOutput, const char * szInput)
|
|
{
|
|
char *p = NULL;
|
|
assert(szInput != NULL);
|
|
assert(szOutput != NULL);
|
|
l_trim(szOutput, szInput);
|
|
for (p = szOutput + strlen(szOutput) - 1; p >= szOutput && isspace(*p); --p);
|
|
*(++p) = '\0';
|
|
return szOutput;
|
|
}
|
|
|
|
bool
|
|
Util::configure()
|
|
{
|
|
const unsigned len = 505;
|
|
char *buf, *c;
|
|
char buf_i[len], buf_o[len];
|
|
FILE *fp = NULL;
|
|
char keyname[len];
|
|
char keyval[len];
|
|
|
|
//initialize the settings
|
|
Util::global_config["gstore_mode"] = "single";
|
|
//NOTICE+BETTER+TODO:use macro is better to avoid too many judging on this variable(add a DEBUG macro at the outer)
|
|
Util::global_config["debug_level"] = "simple";
|
|
Util::global_config["db_home"] = ".";
|
|
Util::global_config["db_suffix"] = ".db";
|
|
Util::global_config["buffer_maxium"] = "100";
|
|
Util::global_config["thread_maxium"] = "1000";
|
|
//TODO:to be recoverable
|
|
Util::global_config["operation_logs"] = "true";
|
|
|
|
#ifdef DEBUG
|
|
fprintf(stderr, "profile: %s\n", profile.c_str());
|
|
#endif
|
|
if((fp = fopen(profile.c_str(), "r")) == NULL) //NOTICE: this is not a binary file
|
|
{
|
|
#ifdef DEBUG
|
|
fprintf(stderr, "openfile [%s] error [%s]\n", profile.c_str(), strerror(errno));
|
|
#endif
|
|
return false;
|
|
}
|
|
fseek(fp, 0, SEEK_SET);
|
|
|
|
while(!feof(fp) && fgets(buf_i, len, fp) != NULL)
|
|
{
|
|
//fprintf(stderr, "buffer: %s\n", buf_i);
|
|
Util::l_trim(buf_o, buf_i);
|
|
if(strlen(buf_o) <= 0)
|
|
continue;
|
|
buf = NULL;
|
|
buf = buf_o;
|
|
if(buf[0] == '#')
|
|
{
|
|
continue;
|
|
}
|
|
else if(buf[0] == '[')
|
|
{
|
|
continue;
|
|
}
|
|
if((c = (char*)strchr(buf, '=')) == NULL)
|
|
continue;
|
|
memset(keyname, 0, sizeof(keyname));
|
|
sscanf(buf, "%[^=|^ |^\t]", keyname);
|
|
#ifdef DEBUG
|
|
//fprintf(stderr, "keyname: %s\n", keyname);
|
|
#endif
|
|
sscanf(++c, "%[^\n]", keyval);
|
|
char *keyval_o = (char *)calloc(strlen(keyval) + 1, sizeof(char));
|
|
if(keyval_o != NULL)
|
|
{
|
|
Util::a_trim(keyval_o, keyval);
|
|
#ifdef DEBUG
|
|
//fprintf(stderr, "keyval: %s\n", keyval_o);
|
|
#endif
|
|
if(keyval_o && strlen(keyval_o) > 0)
|
|
{
|
|
//strcpy(keyval, keyval_o);
|
|
global_config[string(keyname)] = string(keyval_o);
|
|
}
|
|
xfree(keyval_o);
|
|
}
|
|
}
|
|
|
|
fclose(fp);
|
|
//display all settings here
|
|
cout<<"the current settings are as below: "<<endl;
|
|
cout<<"key : value"<<endl;
|
|
cout<<"------------------------------------------------------------"<<endl;
|
|
for(map<string, string>::iterator it = global_config.begin(); it != global_config.end(); ++it)
|
|
{
|
|
cout<<it->first<<" : "<<it->second<<endl;
|
|
}
|
|
cout<<endl;
|
|
|
|
return true;
|
|
//return Util::config_setting() && Util::config_debug() && Util::config_advanced();
|
|
}
|
|
|
|
bool
|
|
Util::config_debug()
|
|
{
|
|
const unsigned len1 = 100;
|
|
const unsigned len2 = 505;
|
|
char AppName[] = "setting";
|
|
char KeyName[] = "mode";
|
|
char appname[len1], keyname[len1];
|
|
char KeyVal[len1];
|
|
char *buf, *c;
|
|
char buf_i[len1], buf_o[len1];
|
|
FILE *fp = NULL;
|
|
int status = 0; // 1 AppName 2 KeyName
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
Util::config_advanced()
|
|
{
|
|
const unsigned len1 = 100;
|
|
const unsigned len2 = 505;
|
|
char AppName[] = "setting";
|
|
char KeyName[] = "mode";
|
|
char appname[len1], keyname[len1];
|
|
char KeyVal[len1];
|
|
char *buf, *c;
|
|
char buf_i[len1], buf_o[len1];
|
|
FILE *fp = NULL;
|
|
int status = 0; // 1 AppName 2 KeyName
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
Util::config_setting()
|
|
{
|
|
const unsigned len1 = 100;
|
|
const unsigned len2 = 505;
|
|
char AppName[] = "setting";
|
|
char KeyName[] = "mode";
|
|
char appname[len1], keyname[len1];
|
|
char KeyVal[len1];
|
|
char *buf, *c;
|
|
char buf_i[len2], buf_o[len2];
|
|
FILE *fp = NULL;
|
|
int status = 0; // 1 AppName 2 KeyName
|
|
|
|
#ifdef DEBUG
|
|
fprintf(stderr, "profile: %s\n", profile.c_str());
|
|
#endif
|
|
if((fp = fopen(profile.c_str(), "r")) == NULL) //NOTICE: this is not a binary file
|
|
{
|
|
#ifdef DEBUG
|
|
fprintf(stderr, "openfile [%s] error [%s]\n", profile.c_str(), strerror(errno));
|
|
#endif
|
|
return false;
|
|
}
|
|
fseek(fp, 0, SEEK_SET);
|
|
memset(appname, 0, sizeof(appname));
|
|
sprintf(appname,"[%s]", AppName);
|
|
#ifdef DEBUG
|
|
fprintf(stderr, "appname: %s\n", appname);
|
|
#endif
|
|
|
|
while(!feof(fp) && fgets(buf_i, len2, fp) != NULL)
|
|
{
|
|
//fprintf(stderr, "buffer: %s\n", buf_i);
|
|
Util::l_trim(buf_o, buf_i);
|
|
if(strlen(buf_o) <= 0)
|
|
continue;
|
|
buf = NULL;
|
|
buf = buf_o;
|
|
if(buf[0] == '#')
|
|
{
|
|
continue;
|
|
}
|
|
if(status == 0)
|
|
{
|
|
if(strncmp(buf, appname, strlen(appname)) == 0)
|
|
{
|
|
#ifdef DEBUG
|
|
fprintf(stderr, "app found!\n");
|
|
#endif
|
|
status = 1;
|
|
continue;
|
|
}
|
|
}
|
|
else if(status == 1)
|
|
{
|
|
if(buf[0] == '[')
|
|
{
|
|
//NOTICE: nested module is not allowed
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
if((c = (char*)strchr(buf, '=')) == NULL)
|
|
continue;
|
|
memset(keyname, 0, sizeof(keyname));
|
|
sscanf(buf, "%[^=|^ |^\t]", keyname);
|
|
#ifdef DEBUG
|
|
fprintf(stderr, "keyname: %s\n", keyname);
|
|
#endif
|
|
if(strcmp(keyname, KeyName) == 0)
|
|
{
|
|
#ifdef DEBUG
|
|
fprintf(stderr, "key found!\n");
|
|
#endif
|
|
sscanf(++c, "%[^\n]", KeyVal);
|
|
char *KeyVal_o = (char *)calloc(strlen(KeyVal) + 1, sizeof(char));
|
|
if(KeyVal_o != NULL)
|
|
{
|
|
Util::a_trim(KeyVal_o, KeyVal);
|
|
#ifdef DEBUG
|
|
fprintf(stderr, "KeyVal: %s\n", KeyVal_o);
|
|
#endif
|
|
if(KeyVal_o && strlen(KeyVal_o) > 0)
|
|
strcpy(KeyVal, KeyVal_o);
|
|
xfree(KeyVal_o);
|
|
}
|
|
status = 2;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
fclose(fp);
|
|
//if(found == 2)
|
|
//return(0);
|
|
//else
|
|
//return(-1);
|
|
//fprintf(stderr, "%s\n", KeyVal);
|
|
if(strcmp(KeyVal, "distribute") == 0)
|
|
{
|
|
#ifdef DEBUG
|
|
fprintf(stderr, "the gStore will run in distributed mode!\n");
|
|
#endif
|
|
//Util::gStore_mode = true;
|
|
}
|
|
|
|
return true; //config success
|
|
}
|
|
|
|
Util::Util()
|
|
{
|
|
Util::configure();
|
|
#ifdef DEBUG_KVSTORE
|
|
if(this->debug_kvstore == NULL)
|
|
{
|
|
string s = this->debug_path + "kv.log";
|
|
this->debug_kvstore = fopen(s.c_str(), "w+");
|
|
if(this->debug_kvstore == NULL)
|
|
{
|
|
cerr << "open error: kv.log\n";
|
|
this->debug_kvstore = stderr;
|
|
}
|
|
}
|
|
#endif
|
|
#ifdef DEBUG_DATABASE
|
|
if(this->debug_database == NULL)
|
|
{
|
|
string s = this->debug_path + "db.log";
|
|
this->debug_database = fopen(s.c_str(), "w+");
|
|
if(this->debug_database == NULL)
|
|
{
|
|
cerr << "open error: db.log\n";
|
|
this->debug_database = stderr;
|
|
}
|
|
}
|
|
#endif
|
|
#ifdef DEBUG_VSTREE
|
|
if(this->debug_vstree == NULL)
|
|
{
|
|
string s = this->debug_path + "vs.log";
|
|
this->debug_vstree = fopen(s.c_str(), "w+");
|
|
if(this->debug_vstree == NULL)
|
|
{
|
|
cerr << "open error: vs.log\n";
|
|
this->debug_vstree = stderr;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
Util::~Util()
|
|
{
|
|
#ifdef DEBUG_KVSTORE
|
|
fclose(this->debug_kvstore); //NULL is ok, just like free(NULL)
|
|
this->debug_kvstore = NULL;
|
|
#endif
|
|
#ifdef DEBUG_DATABASE
|
|
fclose(this->debug_database); //NULL is ok, just like free(NULL)
|
|
this->debug_database = NULL;
|
|
#endif
|
|
}
|
|
|
|
int
|
|
Util::memUsedPercentage()
|
|
{
|
|
FILE* fp = fopen("/proc/meminfo", "r");
|
|
if(fp == NULL)
|
|
return -1;
|
|
char str[20], tail[3];
|
|
unsigned t, sum, used = 0; //WARN:unsigned,memory cant be too large!
|
|
fscanf(fp, "%s%u%s", str, &sum, tail); //MemTotal, KB
|
|
fscanf(fp, "%s%u%s", str, &used, tail); //MemFree
|
|
fscanf(fp, "%s%u%s", str, &t, tail);
|
|
if(strcmp(str, "MemAvailable") == 0)
|
|
{
|
|
//QUERY:what is the relation between MemFree and MemAvailable?
|
|
used = t;
|
|
//scanf("%s%u%s", str, &t, tail); //Buffers
|
|
//used += t;
|
|
//scanf("%s%u%s", str, &t, tail); //Cached
|
|
//used += t;
|
|
}
|
|
//else //Buffers
|
|
//{
|
|
// scanf("%s%u%s", str, &t, tail); //Cached
|
|
// used += t;
|
|
//}
|
|
used = sum - used;
|
|
fclose(fp);
|
|
return (int)(used * 100.0 / sum);
|
|
}
|
|
|
|
int
|
|
Util::memoryLeft()
|
|
{
|
|
FILE* fp = fopen("/proc/meminfo", "r");
|
|
if(fp == NULL)
|
|
return -1;
|
|
char str[20], tail[3];
|
|
unsigned t, sum, unuse = 0; //WARN:unsigned,memory cant be too large!
|
|
fscanf(fp, "%s%u%s", str, &sum, tail); //MemTotal, KB
|
|
fscanf(fp, "%s%u%s", str, &unuse, tail); //MemFree
|
|
fscanf(fp, "%s%u%s", str, &t, tail);
|
|
if(strcmp(str, "MemAvailable") == 0)
|
|
{
|
|
unuse = t;
|
|
}
|
|
fclose(fp);
|
|
return unuse / Util::MB;
|
|
}
|
|
|
|
bool
|
|
Util::is_literal_ele(int _id)
|
|
{
|
|
return _id >= Util::LITERAL_FIRST_ID;
|
|
}
|
|
|
|
//NOTICE: require that the list is ordered
|
|
int
|
|
Util::removeDuplicate(int* _list, int _len)
|
|
{
|
|
if (_list == NULL || _len == 0) {
|
|
return 0;
|
|
}
|
|
int valid = 0, limit = _len - 1;
|
|
for(int i = 0; i < limit; ++i)
|
|
{
|
|
if(_list[i] != _list[i+1])
|
|
{
|
|
_list[valid++] = _list[i];
|
|
}
|
|
}
|
|
_list[valid++] = _list[limit];
|
|
return valid;
|
|
}
|
|
|
|
int
|
|
Util::cmp_int(const void* _i1, const void* _i2)
|
|
{
|
|
return *(int*)_i1 - *(int*)_i2;
|
|
}
|
|
|
|
void
|
|
Util::sort(int*& _id_list, int _list_len)
|
|
{
|
|
qsort(_id_list, _list_len, sizeof(int), Util::cmp_int);
|
|
}
|
|
|
|
int
|
|
Util::bsearch_int_uporder(int _key, const int* _array,int _array_num)
|
|
{
|
|
if (_array_num == 0)
|
|
{
|
|
return -1;
|
|
}
|
|
if (_array == NULL)
|
|
{
|
|
return -1;
|
|
}
|
|
int _first = _array[0];
|
|
int _last = _array[_array_num - 1];
|
|
|
|
if (_last == _key)
|
|
{
|
|
return _array_num - 1;
|
|
}
|
|
|
|
if (_last < _key || _first > _key)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
int low = 0;
|
|
int high = _array_num - 1;
|
|
|
|
int mid;
|
|
while (low <= high)
|
|
{
|
|
mid = (high - low) / 2 + low;
|
|
if (_array[mid] == _key)
|
|
{
|
|
return mid;
|
|
}
|
|
if (_array[mid] > _key)
|
|
{
|
|
high = mid - 1;
|
|
}
|
|
else
|
|
{
|
|
low = mid + 1;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
bool
|
|
Util::bsearch_preid_uporder(int _preid, int* _pair_idlist, int _list_len)
|
|
{
|
|
if(_list_len == 0)
|
|
{
|
|
return false;
|
|
}
|
|
int pair_num = _list_len / 2;
|
|
int _first = _pair_idlist[2*0 + 0];
|
|
int _last = _pair_idlist[2*(pair_num-1) + 0];
|
|
|
|
if(_preid == _last)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
bool not_find = (_last < _preid || _first > _preid);
|
|
if(not_find)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
int low = 0;
|
|
int high = pair_num - 1;
|
|
int mid;
|
|
|
|
while(low <= high)
|
|
{
|
|
mid = (high - low) / 2 + low;
|
|
if(_pair_idlist[2*mid + 0] == _preid)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
if(_pair_idlist[2*mid + 0] > _preid)
|
|
{
|
|
high = mid - 1;
|
|
}
|
|
else
|
|
{
|
|
low = mid + 1;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
int
|
|
Util::bsearch_vec_uporder(int _key, const vector<int>* _vec)
|
|
{
|
|
int tmp_size = _vec->size();
|
|
if (tmp_size == 0)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
int _first = (*_vec)[0];
|
|
int _last = (*_vec)[tmp_size - 1];
|
|
|
|
if (_key == _last)
|
|
{
|
|
return tmp_size - 1;
|
|
}
|
|
|
|
bool not_find = (_last < _key || _first > _key);
|
|
if (not_find)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
int low = 0;
|
|
int high = tmp_size - 1;
|
|
int mid;
|
|
|
|
while (low <= high)
|
|
{
|
|
mid = (high - low) / 2 + low;
|
|
if ((*_vec)[mid] == _key)
|
|
{
|
|
return mid;
|
|
}
|
|
|
|
if ((*_vec)[mid] > _key)
|
|
{
|
|
high = mid - 1;
|
|
}
|
|
else
|
|
{
|
|
low = mid + 1;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
string
|
|
Util::result_id_str(vector<int*>& _v, int _var_num)
|
|
{
|
|
stringstream _ss;
|
|
|
|
for(unsigned i = 0; i < _v.size(); i ++)
|
|
{
|
|
int* _p_int = _v[i];
|
|
_ss << "[";
|
|
for(int j = 0; j < _var_num-1; j ++)
|
|
{
|
|
_ss << _p_int[j] << ",";
|
|
}
|
|
_ss << _p_int[_var_num-1] << "]\t";
|
|
}
|
|
|
|
return _ss.str();
|
|
}
|
|
|
|
bool
|
|
Util::dir_exist(const string _dir)
|
|
{
|
|
return (opendir(_dir.c_str()) != NULL);
|
|
}
|
|
|
|
bool
|
|
Util::create_dir(const string _dir)
|
|
{
|
|
if(! Util::dir_exist(_dir))
|
|
{
|
|
mkdir(_dir.c_str(), 0755);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
long
|
|
Util::get_cur_time()
|
|
{
|
|
timeval tv;
|
|
gettimeofday(&tv, NULL);
|
|
return (tv.tv_sec*1000 + tv.tv_usec/1000);
|
|
}
|
|
|
|
bool
|
|
Util::save_to_file(const char* _dir, const string _content)
|
|
{
|
|
ofstream fout(_dir);
|
|
|
|
if (fout.is_open())
|
|
{
|
|
fout << _content;
|
|
fout.close();
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
int
|
|
Util::compare(const char* _str1, unsigned _len1, const char* _str2, unsigned _len2)
|
|
{
|
|
int ifswap = 1; //1 indicate: not swapped
|
|
if(_len1 > _len2)
|
|
{
|
|
const char* str = _str1;
|
|
_str1 = _str2;
|
|
_str2 = str;
|
|
unsigned len = _len1;
|
|
_len1 = _len2;
|
|
_len2 = len;
|
|
ifswap = -1;
|
|
}
|
|
unsigned i;
|
|
//DEBUG: if char can be negative, which cause problem when comparing(128+)
|
|
//
|
|
//NOTICE:little-endian-storage, when string buffer poniter is changed to
|
|
//unsigned long long*, the first char is the lowest byte!
|
|
/*
|
|
unsigned long long *p1 = (unsigned long long*)_str1, *p2 = (unsigned long long*)_str2;
|
|
unsigned limit = _len1/8;
|
|
for(i = 0; i < limit; ++i, ++p1, ++p2)
|
|
{
|
|
if((*p1 ^ *p2) == 0) continue;
|
|
else
|
|
{
|
|
if(*p1 < *p2) return -1 * ifswap;
|
|
else return 1 * ifswap;
|
|
}
|
|
}
|
|
for(i = 8 * limit; i < _len1; ++i)
|
|
{
|
|
if(_str1[i] < _str2[i]) return -1 * ifswap;
|
|
else if(_str1[i] > _str2[i]) return 1 * ifswap;
|
|
else continue;
|
|
}
|
|
if(i == _len2) return 0;
|
|
else return -1 * ifswap;
|
|
*/
|
|
for(i = 0; i < _len1; ++i)
|
|
{ //ASCII: 0~127 but c: 0~255(-1) all transfered to unsigned char when comparing
|
|
if((unsigned char)_str1[i] < (unsigned char)_str2[i])
|
|
return -1 * ifswap;
|
|
else if((unsigned char)_str1[i] > (unsigned char)_str2[i])
|
|
return 1 * ifswap;
|
|
else;
|
|
}
|
|
if(i == _len2)
|
|
return 0;
|
|
else
|
|
return -1 * ifswap;
|
|
}
|
|
|
|
int
|
|
Util::string2int(string s)
|
|
{
|
|
return atoi(s.c_str());
|
|
}
|
|
|
|
string
|
|
Util::int2string(long n)
|
|
{
|
|
string s;
|
|
stringstream ss;
|
|
ss<<n;
|
|
ss>>s;
|
|
return s;
|
|
}
|
|
|
|
string
|
|
Util::showtime()
|
|
{
|
|
// fputs("\n\n", logsfp);
|
|
time_t now;
|
|
time(&now);
|
|
// fputs(ctime(&now), logsfp);
|
|
return string("\n\n") + ctime(&now);
|
|
}
|
|
|
|
string
|
|
Util::getQueryFromFile(const char* _file_path)
|
|
{
|
|
#ifdef DEBUG_PRECISE
|
|
cerr << "file to open: " << _file_path <<endl;
|
|
#endif
|
|
char buf[10000];
|
|
std::string query_file;
|
|
|
|
ifstream fin(_file_path);
|
|
if(!fin)
|
|
{
|
|
printf("can not open: %s\n", _file_path);
|
|
return "";
|
|
}
|
|
|
|
memset(buf, 0, sizeof(buf));
|
|
stringstream _ss;
|
|
while(!fin.eof())
|
|
{
|
|
fin.getline(buf, 9999);
|
|
_ss << buf << "\n";
|
|
}
|
|
fin.close();
|
|
|
|
return _ss.str();
|
|
}
|
|
|
|
string
|
|
Util::getItemsFromDir(string _path)
|
|
{
|
|
DIR* dp = NULL;
|
|
struct dirent* entry;
|
|
string ret = "";
|
|
if((dp = opendir(_path.c_str())) == NULL)
|
|
{
|
|
fprintf(stderr, "error opening directory!\n");
|
|
}
|
|
else
|
|
{
|
|
while((entry = readdir(dp)) != NULL)
|
|
{
|
|
#ifdef DEBUG_PRECISE
|
|
fprintf(stderr, "%s\n", entry->d_name);
|
|
#endif
|
|
string name= string(entry->d_name);
|
|
int len = name.length();
|
|
if(len <= 3)
|
|
{
|
|
continue;
|
|
}
|
|
if(name.substr(len-3, 3) == ".db")
|
|
{
|
|
if(ret == "")
|
|
ret = name;
|
|
else
|
|
ret = ret + " " + name;
|
|
}
|
|
}
|
|
closedir(dp);
|
|
}
|
|
#ifdef DEBUG_PRECISE
|
|
fprintf(stderr, "OUT getItemsFromDir\n");
|
|
#endif
|
|
return ret;
|
|
}
|
|
|
|
string
|
|
Util::getSystemOutput(string cmd)
|
|
{
|
|
string ans = "";
|
|
string file = Util::tmp_path;
|
|
file += "ans.txt";
|
|
cmd += " > ";
|
|
cmd += file;
|
|
cerr << cmd << endl;
|
|
int ret = system(cmd.c_str());
|
|
cmd = "rm -rf " + file;
|
|
if(ret < 0)
|
|
{
|
|
fprintf(stderr, "system call failed!\n");
|
|
system(cmd.c_str());
|
|
return NULL;
|
|
}
|
|
|
|
ifstream fin(file.c_str());
|
|
if(!fin)
|
|
{
|
|
cerr << "getSystemOutput: Fail to open : " << file << endl;
|
|
return NULL;
|
|
}
|
|
|
|
string temp;
|
|
getline(fin, temp);
|
|
while(!fin.eof())
|
|
{
|
|
//cout<<"system line"<<endl;
|
|
if(ans == "")
|
|
ans = temp;
|
|
else
|
|
ans = ans + "\n" + temp;
|
|
getline(fin, temp);
|
|
}
|
|
fin.close();
|
|
//FILE *fp = NULL;
|
|
//if((fp = fopen(file.c_str(), "r")) == NULL)
|
|
//{
|
|
//fprintf(stderr, "unbale to open file: %s\n", file.c_str());
|
|
//}
|
|
//else
|
|
//{
|
|
//char *ans = (char *)malloc(100);
|
|
//fgets(path, 100, fp);
|
|
//char *find = strchr(path, '\n');
|
|
//if(find != NULL)
|
|
//*find = '\0';
|
|
//fclose(fp);
|
|
//}
|
|
system(cmd.c_str());
|
|
//cerr<<"ans: "<<ans<<endl;
|
|
return ans;
|
|
}
|
|
|
|
// Get the exact file path from given string: ~, ., symbol links
|
|
string
|
|
Util::getExactPath(const char *str)
|
|
{
|
|
string cmd = "realpath ";
|
|
cmd += string(str);
|
|
|
|
return getSystemOutput(cmd);
|
|
}
|
|
|
|
void
|
|
Util::logging(string _str)
|
|
{
|
|
_str += "\n";
|
|
#ifdef DEBUG_DATABASE
|
|
fputs(_str.c_str(), Util::debug_database);
|
|
fflush(Util::debug_database);
|
|
#endif
|
|
|
|
#ifdef DEBUG_VSTREE
|
|
fputs(_str.c_str(), Util::debug_vstree);
|
|
fflush(Util::debug_vstree);
|
|
#endif
|
|
}
|
|
|
|
unsigned
|
|
Util::BKDRHash(const char *_str)
|
|
{
|
|
unsigned int seed = 131; // 31 131 1313 13131 131313 etc..
|
|
unsigned int key = 0;
|
|
|
|
//for(unsigned i = 0; i < i; ++i)
|
|
while(*_str)
|
|
{
|
|
//key = key * seed + _str[i];
|
|
key = key * seed + *(_str++);
|
|
}
|
|
|
|
return (key & 0x7FFFFFFF);
|
|
}
|
|
|
|
unsigned
|
|
Util::simpleHash(const char *_str)
|
|
{
|
|
unsigned int key;
|
|
unsigned char *p;
|
|
|
|
for(key = 0, p = (unsigned char *)_str; *p; p++)
|
|
key = 31 * key + *p;
|
|
|
|
return (key & 0x7FFFFFFF);
|
|
}
|
|
|
|
unsigned
|
|
Util::RSHash(const char *_str)
|
|
{
|
|
unsigned int b = 378551;
|
|
unsigned int a = 63689;
|
|
unsigned int key = 0;
|
|
|
|
while (*_str)
|
|
{
|
|
key = key * a + (*_str++);
|
|
a *= b;
|
|
}
|
|
|
|
return (key & 0x7FFFFFFF);
|
|
}
|
|
|
|
unsigned
|
|
Util::JSHash(const char *_str)
|
|
{
|
|
unsigned int key = 1315423911;
|
|
|
|
while (*_str)
|
|
{
|
|
key ^= ((key << 5) + (*_str++) + (key >> 2));
|
|
}
|
|
|
|
return (key & 0x7FFFFFFF);
|
|
}
|
|
|
|
unsigned
|
|
Util::PJWHash(const char *_str)
|
|
{
|
|
unsigned int bits_in_unsigned_int = (unsigned int)(sizeof(unsigned int) * 8);
|
|
unsigned int three_quarters = (unsigned int)((bits_in_unsigned_int * 3) / 4);
|
|
unsigned int one_eighth = (unsigned int)(bits_in_unsigned_int / 8);
|
|
|
|
unsigned int high_bits = (unsigned int)(0xFFFFFFFF) << (bits_in_unsigned_int - one_eighth);
|
|
unsigned int key = 0;
|
|
unsigned int test = 0;
|
|
|
|
while (*_str)
|
|
{
|
|
key = (key << one_eighth) + (*_str++);
|
|
if ((test = key & high_bits) != 0)
|
|
{
|
|
key = ((key ^ (test >> three_quarters)) & (~high_bits));
|
|
}
|
|
}
|
|
|
|
return (key & 0x7FFFFFFF);
|
|
}
|
|
|
|
unsigned
|
|
Util::ELFHash(const char *_str)
|
|
{
|
|
unsigned int key = 0;
|
|
unsigned int x = 0;
|
|
|
|
while (*_str)
|
|
{
|
|
key = (key << 4) + (*_str++);
|
|
if ((x = key & 0xF0000000L) != 0)
|
|
{
|
|
key ^= (x >> 24);
|
|
key &= ~x;
|
|
}
|
|
}
|
|
|
|
return (key & 0x7FFFFFFF);
|
|
}
|
|
|
|
unsigned
|
|
Util::SDBMHash(const char *_str)
|
|
{
|
|
unsigned int key = 0;
|
|
|
|
while (*_str)
|
|
{
|
|
key = (*_str++) + (key << 6) + (key << 16) - key;
|
|
}
|
|
|
|
return (key & 0x7FFFFFFF);
|
|
}
|
|
|
|
unsigned
|
|
Util::DJBHash(const char *_str)
|
|
{
|
|
unsigned int key = 5381;
|
|
while (*_str) {
|
|
key += (key << 5) + (*_str++);
|
|
}
|
|
return (key & 0x7FFFFFFF);
|
|
}
|
|
|
|
unsigned
|
|
Util::APHash(const char *_str)
|
|
{
|
|
unsigned int key = 0;
|
|
int i;
|
|
|
|
for (i=0; *_str; i++)
|
|
{
|
|
if ((i & 1) == 0)
|
|
{
|
|
key ^= ((key << 7) ^ (*_str++) ^ (key >> 3));
|
|
}
|
|
else
|
|
{
|
|
key ^= (~((key << 11) ^ (*_str++) ^ (key >> 5)));
|
|
}
|
|
}
|
|
|
|
return (key & 0x7FFFFFFF);
|
|
}
|
|
|
|
unsigned
|
|
Util::DEKHash(const char* _str)
|
|
{
|
|
unsigned int hash = strlen(_str);
|
|
for(; *_str; _str++)
|
|
{
|
|
hash = ((hash << 5) ^ (hash >> 27)) ^ (*_str);
|
|
}
|
|
return hash;
|
|
}
|
|
|
|
unsigned
|
|
Util::BPHash(const char* _str)
|
|
{
|
|
unsigned int hash = 0;
|
|
for(; *_str; _str++)
|
|
{
|
|
hash = hash << 7 ^ (*_str);
|
|
}
|
|
|
|
return hash;
|
|
}
|
|
|
|
unsigned
|
|
Util::FNVHash(const char* _str)
|
|
{
|
|
const unsigned int fnv_prime = 0x811C9DC5;
|
|
unsigned int hash = 0;
|
|
|
|
for(; *_str; _str++)
|
|
{
|
|
hash *= fnv_prime;
|
|
hash ^= (*_str);
|
|
}
|
|
|
|
return hash;
|
|
}
|
|
|
|
unsigned
|
|
Util::HFLPHash(const char* _str)
|
|
{
|
|
unsigned int n = 0;
|
|
char* b = (char*)&n;
|
|
unsigned int len = strlen(_str);
|
|
for(unsigned i = 0; i < len; ++i)
|
|
{
|
|
b[i%4] ^= _str[i];
|
|
}
|
|
return n%len;
|
|
}
|
|
|
|
unsigned
|
|
Util::HFHash(const char* _str)
|
|
{
|
|
int result=0;
|
|
const char* ptr = _str;
|
|
int c;
|
|
unsigned int len = strlen(_str);
|
|
for(int i=1; (c=*ptr++); i++)
|
|
result += c*3*i;
|
|
if (result<0)
|
|
result = -result;
|
|
return result%len;
|
|
}
|
|
|
|
unsigned
|
|
Util::StrHash(const char* _str)
|
|
{
|
|
register unsigned int h;
|
|
register unsigned char *p;
|
|
for(h = 0, p = (unsigned char *)_str; *p; p++)
|
|
{
|
|
h = 31 * h + *p;
|
|
}
|
|
|
|
return h;
|
|
|
|
}
|
|
|
|
unsigned
|
|
Util::TianlHash(const char* _str)
|
|
{
|
|
unsigned long urlHashValue=0;
|
|
int ilength=strlen(_str);
|
|
int i;
|
|
unsigned char ucChar;
|
|
if(!ilength) {
|
|
return 0;
|
|
}
|
|
if(ilength<=256) {
|
|
urlHashValue=16777216*(ilength-1);
|
|
} else {
|
|
urlHashValue = 42781900080;
|
|
}
|
|
if(ilength<=96) {
|
|
for(i=1; i<=ilength; i++) {
|
|
ucChar = _str[i-1];
|
|
if(ucChar<='Z'&&ucChar>='A') {
|
|
ucChar=ucChar+32;
|
|
}
|
|
urlHashValue+=(3*i*ucChar*ucChar+5*i*ucChar+7*i+11*ucChar)%1677216;
|
|
}
|
|
} else {
|
|
for(i=1; i<=96; i++)
|
|
{
|
|
ucChar = _str[i+ilength-96-1];
|
|
if(ucChar<='Z'&&ucChar>='A')
|
|
{
|
|
ucChar=ucChar+32;
|
|
}
|
|
urlHashValue+=(3*i*ucChar*ucChar+5*i*ucChar+7*i+11*ucChar)%1677216;
|
|
}
|
|
}
|
|
|
|
return urlHashValue;
|
|
}
|
|
|
|
//NOTICE: the time of log() and sqrt() in C can be seen as constant
|
|
|
|
//NOTICE:_b must >= 1
|
|
double
|
|
Util::logarithm(double _a, double _b)
|
|
{
|
|
//REFRENCE: http://blog.csdn.net/liyuanbhu/article/details/8997850
|
|
//a>0 != 1; b>0 (b>=2 using log/log10/change, 1<b<2 using log1p, b<=1?)
|
|
if(_a <= 1 || _b < 1)
|
|
return -1.0;
|
|
double under = log(_a);
|
|
if(_b == 1)
|
|
return 0.0;
|
|
else if(_b < 2)
|
|
return log1p(_b - 1) / under;
|
|
else //_b >= 2
|
|
return log(_b) / under;
|
|
return -1.0;
|
|
}
|
|
|
|
void
|
|
Util::intersect(int*& _id_list, int& _id_list_len, const int* _list1, int _len1, const int* _list2, int _len2)
|
|
{
|
|
vector<int> res;
|
|
//cout<<"intersect prevar: "<<_len1<<" "<<_len2<<endl;
|
|
if(_list1 == NULL || _len1 == 0 || _list2 == NULL || _len2 == 0)
|
|
{
|
|
_id_list = NULL;
|
|
_id_list_len = 0;
|
|
}
|
|
|
|
//when size is almost the same, intersect O(n)
|
|
//when one size is small ratio, search in the larger one O(mlogn)
|
|
//
|
|
//n>0 m=nk(0<k<1)
|
|
//compare n(k+1) and nklogn: k0 = log(n/2)2 requiring that n>2
|
|
//k<=k0 binary search; k>k0 intersect
|
|
int method = -1; //0: intersect 1: search in list1 2: search in list2
|
|
int n = _len1;
|
|
double k = 0;
|
|
if(n < _len2)
|
|
{
|
|
k = (double)n / (double)_len2;
|
|
n = _len2;
|
|
method = 2;
|
|
}
|
|
else
|
|
{
|
|
k = (double)_len2 / (double)n;
|
|
method = 1;
|
|
}
|
|
if(n <= 2)
|
|
method = 0;
|
|
else
|
|
{
|
|
double limit = Util::logarithm(n/2, 2);
|
|
if(k > limit)
|
|
method = 0;
|
|
}
|
|
|
|
switch(method)
|
|
{
|
|
case 0:
|
|
{ //this bracket is needed if vars are defined in case
|
|
int id_i = 0;
|
|
int num = _len1;
|
|
for(int i = 0; i < num; ++i)
|
|
{
|
|
int can_id = _list1[i];
|
|
while((id_i < _len2) && (_list2[id_i] < can_id))
|
|
{
|
|
id_i ++;
|
|
}
|
|
|
|
if(id_i == _len2)
|
|
{
|
|
break;
|
|
}
|
|
|
|
if(can_id == _list2[id_i])
|
|
{
|
|
res.push_back(can_id);
|
|
id_i ++;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case 1:
|
|
{
|
|
for(int i = 0; i < _len2; ++i)
|
|
{
|
|
if(Util::bsearch_int_uporder(_list2[i], _list1, _len1) != -1)
|
|
res.push_back(_list2[i]);
|
|
}
|
|
break;
|
|
}
|
|
case 2:
|
|
{
|
|
int m = _len1, i;
|
|
for(i = 0; i < m; ++i)
|
|
{
|
|
int t = _list1[i];
|
|
if(Util::bsearch_int_uporder(t, _list2, _len2) != -1)
|
|
res.push_back(t);
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
cerr << "no such method in Util::intersect()" << endl;
|
|
break;
|
|
}
|
|
|
|
_id_list_len = res.size();
|
|
|
|
if (_id_list_len == 0) {
|
|
_id_list = NULL;
|
|
}
|
|
else {
|
|
_id_list = new int[_id_list_len];
|
|
for (int i = 0; i < _id_list_len; ++i)
|
|
_id_list[i] = res[i];
|
|
}
|
|
delete[] _list1;
|
|
delete[] _list2;
|
|
}
|
|
|
|
int
|
|
Util::compIIpair(int _a1, int _b1, int _a2, int _b2)
|
|
{
|
|
if(_a1 == _a2 && _b1 == _b2)
|
|
return 0;
|
|
else if(_a1 < _a2 || (_a1 == _a2 && _b1 <= _b2))
|
|
return -1;
|
|
else
|
|
return 1;
|
|
}
|
|
|
|
bool
|
|
Util::isValidPort(string str)
|
|
{
|
|
//valid port number: 0 - 65535
|
|
if(str.length() < 1 || str.length() > 5)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
unsigned i;
|
|
for(i = 0; i < str.length(); i++)
|
|
{
|
|
if(str[i] < '0' || str[i] > '9')
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
int port = Util::string2int(str);
|
|
if(port < 0 || port>65535)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
Util::isValidIP(string str)
|
|
{
|
|
if(str == "localhost")
|
|
{
|
|
return true;
|
|
}
|
|
return (Util::isValidIPV4(str) || Util::isValidIPV6(str));
|
|
}
|
|
|
|
bool
|
|
Util::isValidIPV4(string str)
|
|
{
|
|
regex_t reg;
|
|
char pattern[] = "^(([01]?[0-9][0-9]?|2[0-4][0-9]|25[0-5])\\.){3}([01]?[0-9][0-9]?|2[0-4][0-9]|25[0-5])$";
|
|
regcomp(®, pattern, REG_EXTENDED | REG_NOSUB);
|
|
regmatch_t pm[1];
|
|
int status = regexec(®, str.c_str(), 1, pm, 0);
|
|
regfree(®);
|
|
if(status == REG_NOMATCH)
|
|
{
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
Util::isValidIPV6(string str)
|
|
{
|
|
//TO BE IMPLEMENTED
|
|
return false;
|
|
}
|
|
|
|
string
|
|
Util::getTimeString() {
|
|
static const int max = 20; // max length of time string
|
|
char time_str[max];
|
|
time_t timep;
|
|
time(&timep);
|
|
strftime(time_str, max, "%Y%m%d %H:%M:%S\t", gmtime(&timep));
|
|
return string(time_str);
|
|
}
|
|
|
|
string
|
|
Util::node2string(const char* _raw_str) {
|
|
string _output;
|
|
unsigned _first_quote = 0;
|
|
unsigned _last_quote = 0;
|
|
bool _has_quote = false;
|
|
for (unsigned i = 0; _raw_str[i] != '\0'; i++) {
|
|
if (_raw_str[i] == '\"') {
|
|
if (!_has_quote) {
|
|
_first_quote = i;
|
|
_last_quote = i;
|
|
_has_quote = true;
|
|
}
|
|
else {
|
|
_last_quote = i;
|
|
}
|
|
}
|
|
}
|
|
if (_first_quote==_last_quote) {
|
|
_output += _raw_str;
|
|
return _output;
|
|
}
|
|
for (unsigned i = 0; i <= _first_quote; i++) {
|
|
_output += _raw_str[i];
|
|
}
|
|
for (unsigned i = _first_quote + 1; i < _last_quote; i++) {
|
|
switch (_raw_str[i]) {
|
|
case '\n':
|
|
_output += "\\n";
|
|
break;
|
|
case '\r':
|
|
_output += "\\r";
|
|
break;
|
|
case '\t':
|
|
_output += "\\t";
|
|
break;
|
|
case '\"':
|
|
_output += "\\\"";
|
|
break;
|
|
case '\\':
|
|
_output += "\\\\";
|
|
break;
|
|
default:
|
|
_output += _raw_str[i];
|
|
}
|
|
}
|
|
for (unsigned i = _last_quote; _raw_str[i] != 0; i++) {
|
|
_output += _raw_str[i];
|
|
}
|
|
return _output;
|
|
}
|
|
|
|
int
|
|
Util::_spo_cmp(const void* _a, const void* _b)
|
|
{
|
|
int** _p_a = (int**)_a;
|
|
int** _p_b = (int**)_b;
|
|
|
|
int _sub_id_a = (*_p_a)[0];
|
|
int _sub_id_b = (*_p_b)[0];
|
|
if (_sub_id_a != _sub_id_b) {
|
|
return _sub_id_a - _sub_id_b;
|
|
}
|
|
|
|
int _pre_id_a = (*_p_a)[1];
|
|
int _pre_id_b = (*_p_b)[1];
|
|
if (_pre_id_a != _pre_id_b) {
|
|
return _pre_id_a - _pre_id_b;
|
|
}
|
|
|
|
int _obj_id_a = (*_p_a)[2];
|
|
int _obj_id_b = (*_p_b)[2];
|
|
if (_obj_id_a != _obj_id_b) {
|
|
return _obj_id_a - _obj_id_b;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
Util::_ops_cmp(const void* _a, const void* _b)
|
|
{
|
|
int** _p_a = (int**)_a;
|
|
int** _p_b = (int**)_b;
|
|
|
|
int _obj_id_a = (*_p_a)[2];
|
|
int _obj_id_b = (*_p_b)[2];
|
|
if (_obj_id_a != _obj_id_b) {
|
|
return _obj_id_a - _obj_id_b;
|
|
}
|
|
|
|
int _pre_id_a = (*_p_a)[1];
|
|
int _pre_id_b = (*_p_b)[1];
|
|
if (_pre_id_a != _pre_id_b) {
|
|
return _pre_id_a - _pre_id_b;
|
|
}
|
|
|
|
int _sub_id_a = (*_p_a)[0];
|
|
int _sub_id_b = (*_p_b)[0];
|
|
if (_sub_id_a != _sub_id_b) {
|
|
return _sub_id_a - _sub_id_b;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
Util::_pso_cmp(const void* _a, const void* _b)
|
|
{
|
|
int** _p_a = (int**)_a;
|
|
int** _p_b = (int**)_b;
|
|
|
|
int _pre_id_a = (*_p_a)[1];
|
|
int _pre_id_b = (*_p_b)[1];
|
|
if (_pre_id_a != _pre_id_b) {
|
|
return _pre_id_a - _pre_id_b;
|
|
}
|
|
|
|
int _sub_id_a = (*_p_a)[0];
|
|
int _sub_id_b = (*_p_b)[0];
|
|
if (_sub_id_a != _sub_id_b) {
|
|
return _sub_id_a - _sub_id_b;
|
|
}
|
|
|
|
int _obj_id_a = (*_p_a)[2];
|
|
int _obj_id_b = (*_p_b)[2];
|
|
if (_obj_id_a != _obj_id_b) {
|
|
return _obj_id_a - _obj_id_b;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|