/*============================================================================= # Filename: Signature.cpp # Author: Bookug Lobert # Mail: zengli-bookug@pku.edu.cn # Last Modified: 2016-04-11 13:18 # Description: =============================================================================*/ #include "Signature.h" using namespace std; std::string Signature::BitSet2str(const EntityBitSet& _bitset) { std::stringstream _ss; bool any = false; for (unsigned i = 0; i < _bitset.size(); i++) { if (_bitset.test(i)) { _ss << "[" << i << "] "; any = true; } } if (!any) { _ss << "empty" << endl; } _ss << endl; return _ss.str(); } void Signature::encodePredicate2Entity(int _pre_id, EntityBitSet& _entity_bs, const char _type) { if (Signature::PREDICATE_ENCODE_METHOD == 0) { //WARN:change if need to use again, because the encoding method has changed now! int pos = ((_pre_id + 10) % Signature::EDGE_SIG_LENGTH) + Signature::STR_SIG_LENGTH; _entity_bs.set(pos); } else { int seed_num = _pre_id % Signature::EDGE_SIG_INTERVAL_NUM_HALF; if (_type == Util::EDGE_OUT) { seed_num += Signature::EDGE_SIG_INTERVAL_NUM_HALF; } //int primeSize = 5; //int prime1[]={5003,5009,5011,5021,5023}; //int prime2[]={49943,49957,49991,49993,49999}; //NOTICE: more ones in the bitset(use more primes) means less conflicts, but also weakens the filtration of VSTree. // when the data set is big enough, cutting down the size of candidate list should come up to our primary consideration. // in this case we should not encode too many ones in entities' signature. // also, when the data set is small, hash conflicts can hardly happen. // therefore, I think using 2 primes(set up two ones in bitset) is enough. // --by hanshuo. //int primeSize = 2; //int prime1[] = {5003, 5011}; //int prime2[] = {49957, 49993}; //for(int i = 0; i < primeSize; i++) //{ //int seed = _pre_id * prime1[i] % prime2[i]; //int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num; //_entity_bs.set(pos); //} int seed = _pre_id * 5003 % 49957; int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::STR_SIG_LENGTH + Signature::EDGE_SIG_INTERVAL_BASE * seed_num; _entity_bs.set(pos); } } void Signature::encodePredicate2Edge(int _pre_id, EdgeBitSet& _edge_bs) { if (Signature::PREDICATE_ENCODE_METHOD == 0) { int pos = (_pre_id + 10) % Signature::EDGE_SIG_LENGTH; _edge_bs.set(pos); } else { int seed_num = _pre_id % Signature::EDGE_SIG_INTERVAL_NUM_HALF; //int primeSize = 5; //int prime1[]={5003,5009,5011,5021,5023}; //int prime2[]={49943,49957,49991,49993,49999}; //int primeSize = 2; //int prime1[] = {5003,5011}; //int prime2[] = {49957,49993}; //for (int i = 0; i < primeSize; i++) //{ //int seed = _pre_id * prime1[i] % prime2[i]; //int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::EDGE_SIG_INTERVAL_BASE * seed_num; //_edge_bs.set(pos); //} int seed = _pre_id * 5003 % 49957; int pos = (seed % Signature::EDGE_SIG_INTERVAL_BASE) + Signature::EDGE_SIG_INTERVAL_BASE * seed_num; _edge_bs.set(pos); } } //NOTICE: no need to encode itself because only variable in query need to be filtered! //So only consider all neighbors! void Signature::encodeStr2Entity(const char* _str, EntityBitSet& _entity_bs) { //_str is subject or object or literal if (strlen(_str) >0 && _str[0] == '?') return; int length = (int)strlen(_str); unsigned int hashKey = 0; unsigned int pos = 0; char *str2 = (char*)calloc(length + 1, sizeof(char)); strcpy(str2, _str); char *str = str2; unsigned base = Signature::STR_SIG_BASE * (Signature::HASH_NUM - 1); for (int i = Signature::HASH_NUM - 1; i >= 0; --i) { HashFunction hf = Util::hash[i]; if (hf == NULL) break; hashKey = hf(str); str = str2; pos = base + hashKey % Signature::STR_SIG_BASE; base -= Signature::STR_SIG_BASE; if (_str[0] == '"') { pos += Signature::STR_SIG_LENGTH2; } else if (_str[0] != '<') { #ifdef DEBUG_VSTREE cerr << "error in encodeStr2Entity(): neighbor is neither a literal or entity!" << endl; #endif } _entity_bs.set(pos); } //BETTER: use multiple threads for different hash functions #ifdef DEBUG_VSTREE //std::stringstream _ss; //_ss << "encodeStr2Entity:" << str2 << endl; //Util::logging(_ss.str()); #endif free(str2); } void Signature::encodeStrID2Entity(int _str_id, EntityBitSet& _entity_bs) { //TODO } EntitySig::EntitySig() { this->entityBitSet.reset(); } EntitySig::EntitySig(const EntitySig* _p_sig) { this->entityBitSet.reset(); this->entityBitSet |= _p_sig->entityBitSet; } EntitySig::EntitySig(const EntitySig& _sig) { this->entityBitSet.reset(); this->entityBitSet |= _sig.entityBitSet; } EntitySig::EntitySig(const EntityBitSet& _bitset) { this->entityBitSet.reset(); this->entityBitSet |= _bitset; } EntitySig& EntitySig::operator|=(const EntitySig& _sig) { this->entityBitSet |= _sig.entityBitSet; return *this; } bool EntitySig::operator==(const EntitySig& _sig)const { return (this->entityBitSet == _sig.entityBitSet); } bool EntitySig::operator!=(const EntitySig& _sig)const { return (this->entityBitSet != _sig.entityBitSet); } EntitySig& EntitySig::operator=(const EntitySig& _sig) { this->entityBitSet.reset(); this->entityBitSet |= _sig.getBitset(); return *this; } const EntityBitSet& EntitySig::getBitset()const { return this->entityBitSet; } EdgeSig::EdgeSig() { this->edgeBitSet.reset(); } EdgeSig::EdgeSig(const EdgeSig* _p_sig) { this->edgeBitSet.reset(); this->edgeBitSet |= _p_sig->edgeBitSet; } EdgeSig::EdgeSig(const EdgeSig& _sig) { this->edgeBitSet.reset(); this->edgeBitSet |= _sig.edgeBitSet; } EdgeSig::EdgeSig(const EdgeBitSet& _bitset) { this->edgeBitSet.reset(); this->edgeBitSet |= _bitset; } EdgeSig& EdgeSig::operator|=(const EdgeSig& _sig) { this->edgeBitSet |= _sig.edgeBitSet; return *this; } string EntitySig::to_str() const { std::stringstream _ss; _ss << Signature::BitSet2str(this->entityBitSet); return _ss.str(); }