/*============================================================================= # Filename: IDList.cpp # Author: Bookug Lobert # Mail: zengli-bookug@pku.edu.cn # Last Modified: 2016-05-08 12:44 # Description: originally written by liyouhuan, modified by zengli =============================================================================*/ #include "IDList.h" using namespace std; IDList::IDList() { this->id_list.clear(); } //return the _i-th id of the list if _i exceeds, return -1 int IDList::getID(int _i)const { if (this->size() > _i) { return this->id_list[_i]; } return -1; } bool IDList::addID(int _id) { //a check for duplicate case will be more reliable this->id_list.push_back(_id); return true; } int IDList::size()const { return this->id_list.size(); } bool IDList::empty()const { return this->id_list.size() == 0; } bool IDList::isExistID(int _id)const { // naive implementation of searching(linear search). // you can use binary search when the id list is sorted, if necessary. for (unsigned i = 0; i < this->id_list.size(); i++) { if (this->id_list[i] == _id) { return true; } } return false; } const vector* IDList::getList()const { return &(this->id_list); } int& IDList::operator[](const int& _i) { if (this->size() > _i) { return this->id_list[_i]; } return id_list[0]; } string IDList::to_str() { std::stringstream _ss; _ss << "size=" << this->id_list.size() << ""; for (unsigned i = 0; i < this->id_list.size(); i++) { _ss << "\t[" << this->id_list[i] << "]"; } return _ss.str(); } int IDList::sort() { std::sort(id_list.begin(), id_list.end()); return 0; } void IDList::clear() { this->id_list.clear(); } void IDList::copy(const vector& _new_idlist) { this->id_list = _new_idlist; } void IDList::copy(const IDList* _new_idlist) { this->id_list = *(_new_idlist->getList()); } int IDList::intersectList(const int* _id_list, int _list_len) { if (_id_list == NULL || _list_len == 0) { int remove_number = this->id_list.size(); this->id_list.clear(); return remove_number; } //when size is almost the same, intersect O(n) //when one size is small ratio, search in the larger one O(mlogn) // //n>0 m=nk(02 //k<=k0 binary search; k>k0 intersect int method = -1; //0: intersect 1: search in vector 2: search in int* int n = this->id_list.size(); double k = 0; if (n < _list_len) { k = (double)n / (double)_list_len; n = _list_len; method = 2; } else { k = (double)_list_len / (double)n; method = 1; } if (n <= 2) method = 0; else { double limit = Util::logarithm(n / 2, 2); if (k > limit) method = 0; } int remove_number = 0; switch (method) { case 0: { //this bracket is needed if vars are defined in case int id_i = 0; int index_move_forward = 0; vector::iterator it = this->id_list.begin(); while (it != (this->id_list).end()) { int can_id = *it; while ((id_i < _list_len) && (_id_list[id_i] < can_id)) { id_i++; } if (id_i == _list_len) { break; } if (can_id == _id_list[id_i]) { (this->id_list)[index_move_forward] = can_id; index_move_forward++; id_i++; } it++; } remove_number = this->id_list.size() - index_move_forward; vector::iterator new_end = this->id_list.begin() + index_move_forward; (this->id_list).erase(new_end, this->id_list.end()); break; } case 1: { vector new_id_list; for (int i = 0; i < _list_len; ++i) { if (Util::bsearch_vec_uporder(_id_list[i], this->getList()) != -1) new_id_list.push_back(_id_list[i]); } this->id_list = new_id_list; remove_number = n - this->id_list.size(); break; } case 2: { vector new_id_list; int m = this->id_list.size(), i; for (i = 0; i < m; ++i) { if (Util::bsearch_int_uporder(this->id_list[i], _id_list, _list_len) != -1) new_id_list.push_back(this->id_list[i]); } this->id_list = new_id_list; remove_number = m - this->id_list.size(); break; } default: cout << "no such method in IDList::intersectList()" << endl; break; } return remove_number; } int IDList::intersectList(const IDList& _id_list) { // copy _id_list to the temp array first. int temp_list_len = _id_list.size(); int* temp_list = new int[temp_list_len]; //BETTER:not to copy, just achieve here for (int i = 0; i < temp_list_len; i++) { temp_list[i] = _id_list.getID(i); } int remove_number = this->intersectList(temp_list, temp_list_len); delete[]temp_list; return remove_number; } int IDList::unionList(const int* _id_list, int _list_len, bool only_literal) { if (_id_list == NULL || _list_len == 0) return 0; if (only_literal) { //NOTICE:this means that the original is no literals and we need to add from a list(containing entities/literals) int k = 0; //NOTICE:literal id > entity id; the list is ordered for (; k < _list_len; ++k) if (Util::is_literal_ele(_id_list[k])) break; for (; k < _list_len; ++k) this->addID(_id_list[k]); return _list_len - k; } // O(n) int origin_size = (this->id_list).size(); int* temp_list = new int[origin_size + _list_len]; int temp_list_len = 0; // union { int i = 0, j = 0; while (i < origin_size && j < _list_len) { if (this->id_list[i] == _id_list[j]) { temp_list[temp_list_len++] = this->id_list[i]; i++; j++; } else if (this->id_list[i] < _id_list[j]) { temp_list[temp_list_len++] = this->id_list[i]; i++; } else { temp_list[temp_list_len++] = _id_list[j]; j++; } } while (i < origin_size) { temp_list[temp_list_len++] = this->id_list[i]; i++; } while (j < _list_len) { temp_list[temp_list_len++] = _id_list[j]; j++; } } int add_number = temp_list_len - origin_size; // update this IDList this->clear(); for (int i = 0; i < temp_list_len; i++) this->addID(temp_list[i]); delete[]temp_list; return add_number; // O(n*logn) /* int origin_size = (this->id_list).size(); //union { int i = 0, j = 0; for (i = 0; i < origin_size; i ++) { while (j < _list_len && _id_list[j] < this->id_list[i]) { this->addID(_id_list[j]); j ++; } if (j == _list_len) { break; } } for(; j < _list_len; j ++) { this->addID(_id_list[j]); } } //sort this->sort(); int add_number = this->size() - origin_size; return add_number; */ } int IDList::unionList(const IDList& _id_list, bool only_literal) { // copy _id_list to the temp array first. int temp_list_len = _id_list.size(); int* temp_list = new int[temp_list_len]; //BETTER:not to copy, just achieve here for (int i = 0; i < temp_list_len; i++) { temp_list[i] = _id_list.getID(i); } int ret = this->unionList(temp_list, temp_list_len, only_literal); delete[] temp_list; return ret; } IDList* IDList::intersect(const IDList& _id_list, const int* _list, int _len) { IDList* p = new IDList; //if (_list == NULL || _len == 0) //just copy _id_list //{ //int size = _id_list.size(); //for (int i = 0; i < size; ++i) //p->addID(_id_list.getID(i)); //return p; //} //when size is almost the same, intersect O(n) //when one size is small ratio, search in the larger one O(mlogn) // //n>0 m=nk(02 //k<=k0 binary search; k>k0 intersect int method = -1; //0: intersect 1: search in vector 2: search in int* int n = _id_list.size(); double k = 0; if (n < _len) { k = (double)n / (double)_len; n = _len; method = 2; } else { k = (double)_len / (double)n; method = 1; } if (n <= 2) method = 0; else { double limit = Util::logarithm(n / 2, 2); if (k > limit) method = 0; } //int remove_number = 0; switch (method) { case 0: { //this bracket is needed if vars are defined in case int id_i = 0; int num = _id_list.size(); for (int i = 0; i < num; ++i) { int can_id = _id_list.getID(i); while ((id_i < _len) && (_list[id_i] < can_id)) { id_i++; } if (id_i == _len) { break; } if (can_id == _list[id_i]) { p->addID(can_id); id_i++; } } break; } case 1: { for (int i = 0; i < _len; ++i) { if (Util::bsearch_vec_uporder(_list[i], _id_list.getList()) != -1) p->addID(_list[i]); } break; } case 2: { int m = _id_list.size(), i; for (i = 0; i < m; ++i) { int t = _id_list.getID(i); if (Util::bsearch_int_uporder(t, _list, _len) != -1) p->addID(t); } break; } default: cout << "no such method in IDList::intersectList()" << endl; break; } return p; } int IDList::erase(int i) { id_list.erase(id_list.begin() + i, id_list.end()); return 0; } int IDList::bsearch_uporder(int _key) { return Util::bsearch_vec_uporder(_key, this->getList()); }