mirror of https://gitee.com/jasder/isearch.git
add index_write
This commit is contained in:
parent
09c1c6a45d
commit
8f5fe9d199
|
@ -0,0 +1,28 @@
|
|||
cmake_minimum_required(VERSION 2.6)
|
||||
|
||||
PROJECT(index_write)
|
||||
EXECUTE_PROCESS(COMMAND git log -1 --pretty=format:%h . OUTPUT_VARIABLE version)
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -lrt -Wall")
|
||||
|
||||
AUX_SOURCE_DIRECTORY(. main)
|
||||
|
||||
LINK_DIRECTORIES(
|
||||
${PROJECT_SOURCE_DIR}/../../comm
|
||||
${PROJECT_SOURCE_DIR}/../../3rdlib/jsoncpp/lib
|
||||
${PROJECT_SOURCE_DIR}/../../comm/stat
|
||||
)
|
||||
|
||||
ADD_EXECUTABLE(index_write ${main})
|
||||
|
||||
target_include_directories(index_write PUBLIC
|
||||
../../3rdlib/jsoncpp/include
|
||||
../../comm
|
||||
../../comm/stat
|
||||
../index_storage/api/c_api_cc
|
||||
)
|
||||
|
||||
add_definitions(-DGIT_VERSION="${version}" -DMAIN)
|
||||
|
||||
target_link_libraries(index_write libcommon.a libdtc.so jsoncpp stat ssl)
|
||||
SET_TARGET_PROPERTIES(index_write PROPERTIES RUNTIME_OUTPUT_DIRECTORY "./bin")
|
|
@ -0,0 +1,529 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: add_request_proc.cc
|
||||
*
|
||||
* Description: AddReqProc class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#include "add_request_proc.h"
|
||||
#include "index_tbl_op.h"
|
||||
#include "geohash.h"
|
||||
#include "split_manager.h"
|
||||
#include <sstream>
|
||||
#include <arpa/inet.h>
|
||||
#include <netinet/in.h>
|
||||
#include <iomanip>
|
||||
|
||||
AddReqProc::AddReqProc(){
|
||||
}
|
||||
|
||||
AddReqProc::AddReqProc(const Json::Value& jf, InsertParam& insert_param){
|
||||
doc_version = insert_param.doc_version;
|
||||
trans_version = insert_param.trans_version;
|
||||
app_id = insert_param.appid;
|
||||
doc_id = insert_param.doc_id;
|
||||
json_field = jf;
|
||||
}
|
||||
|
||||
AddReqProc::~AddReqProc(){
|
||||
}
|
||||
|
||||
void AddReqProc::do_stat_word_freq(vector<vector<string> > &strss, map<string, item> &word_map, string extend) {
|
||||
string word;
|
||||
uint32_t id = 0;
|
||||
ostringstream oss;
|
||||
vector<vector<string> >::iterator iters = strss.begin();
|
||||
uint32_t index = 0;
|
||||
|
||||
for(;iters != strss.end(); iters++){
|
||||
index++;
|
||||
vector<string>::iterator iter = iters->begin();
|
||||
|
||||
log_debug("start do_stat_word_freq, appid = %u\n",app_id);
|
||||
for (; iter != iters->end(); iter++) {
|
||||
|
||||
word = *iter;
|
||||
if (!SplitManager::Instance()->wordValid(word, app_id, id)){
|
||||
continue;
|
||||
}
|
||||
if (word_map.find(word) == word_map.end()) {
|
||||
item it;
|
||||
it.doc_id = doc_id;
|
||||
it.freq = 1;
|
||||
it.extend = extend;
|
||||
it.indexs.push_back(index);
|
||||
word_map.insert(make_pair(word, it));
|
||||
}
|
||||
else {
|
||||
word_map[word].freq++;
|
||||
word_map[word].indexs.push_back(index);
|
||||
}
|
||||
|
||||
oss << (*iter) << "|";
|
||||
}
|
||||
}
|
||||
log_debug("split: %s",oss.str().c_str());
|
||||
}
|
||||
|
||||
void AddReqProc::do_stat_word_freq(vector<string> &strss, map<string, item> &word_map) {
|
||||
string word;
|
||||
vector<string>::iterator iters = strss.begin();
|
||||
uint32_t index = 0;
|
||||
|
||||
for (; iters != strss.end(); iters++) {
|
||||
index++;
|
||||
word = *iters;
|
||||
if (word_map.find(word) == word_map.end()) {
|
||||
item it;
|
||||
it.doc_id = doc_id;
|
||||
it.freq = 1;
|
||||
it.indexs.push_back(index);
|
||||
word_map.insert(make_pair(word, it));
|
||||
}
|
||||
else {
|
||||
word_map[word].freq++;
|
||||
word_map[word].indexs.push_back(index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int AddReqProc::deal_index_tag(struct table_info *tbinfo, string field_name){
|
||||
int ret =0;
|
||||
map<string, item> word_map;
|
||||
vector<vector<string> > split_content;
|
||||
switch(tbinfo->field_type){
|
||||
case FIELD_STRING:
|
||||
case FIELD_TEXT:
|
||||
if(json_field[field_name].isString()){
|
||||
if (tbinfo->segment_tag == SEGMENT_NGRAM) { // NGram split mode
|
||||
vector<string> ngram_content = SplitManager::Instance()->split(json_field[field_name].asString());
|
||||
do_stat_word_freq(ngram_content, word_map);
|
||||
}
|
||||
else if (tbinfo->segment_tag == SEGMENT_CHINESE || tbinfo->segment_tag == SEGMENT_ENGLISH) { // use intelligent_info
|
||||
string str = json_field[field_name].asString();
|
||||
// segment_tag为3对应的字段内容必须为全中文,为4对应的的字段不能包含中文
|
||||
if (tbinfo->segment_tag == SEGMENT_CHINESE && allChinese(str) == false) {
|
||||
log_error("segment_tag is 3, the content[%s] must be Chinese.", str.c_str());
|
||||
return RT_ERROR_FIELD_FORMAT;
|
||||
}
|
||||
if (tbinfo->segment_tag == SEGMENT_ENGLISH && noChinese(str) == false) {
|
||||
log_error("segment_tag is 4, the content[%s] can not contain Chinese.", str.c_str());
|
||||
return RT_ERROR_FIELD_FORMAT;
|
||||
}
|
||||
item it;
|
||||
it.doc_id = doc_id;
|
||||
it.freq = 1;
|
||||
if(tbinfo->segment_feature == SEGMENT_FEATURE_SNAPSHOT){
|
||||
Json::FastWriter ex_writer;
|
||||
it.extend = ex_writer.write(snapshot_content);
|
||||
}
|
||||
word_map.insert(make_pair(str, it));
|
||||
vector<IntelligentInfo> info;
|
||||
bool flag = false;
|
||||
get_intelligent(str, info, flag);
|
||||
if (flag) {
|
||||
stringstream ss;
|
||||
ss << app_id << "#" << tbinfo->field_value;
|
||||
ret = g_hanpinIndexInstance.do_insert_intelligent(ss.str(), doc_id, str, info, doc_version);
|
||||
if(0 != ret){
|
||||
roll_back();
|
||||
return ret;
|
||||
}
|
||||
intelligent_keys.push_back(ss.str());
|
||||
}
|
||||
}
|
||||
else {
|
||||
split_content = SplitManager::Instance()->split(json_field[field_name].asString(), app_id);
|
||||
string extend = "";
|
||||
if(tbinfo->segment_feature == SEGMENT_FEATURE_SNAPSHOT){
|
||||
Json::FastWriter ex_writer;
|
||||
extend = ex_writer.write(snapshot_content);
|
||||
}
|
||||
do_stat_word_freq(split_content, word_map, extend);
|
||||
split_content.clear();
|
||||
}
|
||||
ret = g_IndexInstance.do_insert_index(word_map, app_id, doc_version, tbinfo->field_value, docid_index_map);
|
||||
if (0 != ret) {
|
||||
roll_back();
|
||||
return ret;
|
||||
}
|
||||
word_map.clear();
|
||||
}else{
|
||||
log_error("field type error, not FIELD_STRING.");
|
||||
return RT_ERROR_FIELD_FORMAT;
|
||||
}
|
||||
break;
|
||||
case FIELD_INT:
|
||||
if(json_field[field_name].isInt()){
|
||||
int ret;
|
||||
struct item it;
|
||||
it.doc_id = doc_id;
|
||||
it.freq = 0;
|
||||
string key = "";
|
||||
if(tbinfo->segment_tag == SEGMENT_RANGE){ // 范围查的字段将key补全到20位
|
||||
stringstream ss;
|
||||
ss << setw(20) << setfill('0') << json_field[field_name].asInt();
|
||||
key = gen_dtc_key_string(app_id, "00", ss.str());
|
||||
} else {
|
||||
key = gen_dtc_key_string(app_id, "00", (uint32_t)json_field[field_name].asInt());
|
||||
}
|
||||
ret = g_IndexInstance.insert_index_dtc(key, it, tbinfo->field_value, doc_version, docid_index_map);
|
||||
if(ret != 0){
|
||||
roll_back();
|
||||
return RT_ERROR_INSERT_INDEX_DTC;
|
||||
}
|
||||
}else{
|
||||
log_error("field type error, not FIELD_INT.");
|
||||
return RT_ERROR_FIELD_FORMAT;
|
||||
}
|
||||
break;
|
||||
case FIELD_LONG:
|
||||
if(json_field[field_name].isInt64()){
|
||||
struct item it;
|
||||
it.doc_id = doc_id;
|
||||
it.freq = 0;
|
||||
string key = gen_dtc_key_string(app_id, "00", (int64_t)json_field[field_name].asInt64());
|
||||
int ret = g_IndexInstance.insert_index_dtc(key, it, tbinfo->field_value, doc_version, docid_index_map);
|
||||
if(0 != ret){
|
||||
roll_back();
|
||||
log_error("insert_index_dtc error, appid[%d], key[%s]", app_id, key.c_str());
|
||||
return RT_ERROR_INSERT_INDEX_DTC;
|
||||
}
|
||||
} else {
|
||||
log_error("field type error, not FIELD_LONG.");
|
||||
return RT_ERROR_FIELD_FORMAT;
|
||||
}
|
||||
break;
|
||||
case FIELD_DOUBLE:
|
||||
if(json_field[field_name].isDouble()){
|
||||
struct item it;
|
||||
it.doc_id = doc_id;
|
||||
it.freq = 0;
|
||||
string key = gen_dtc_key_string(app_id, "00", (double)json_field[field_name].asDouble());
|
||||
int ret = g_IndexInstance.insert_index_dtc(key, it, tbinfo->field_value, doc_version, docid_index_map);
|
||||
if(0 != ret){
|
||||
roll_back();
|
||||
log_error("insert_index_dtc error, appid[%d], key[%s]", app_id, key.c_str());
|
||||
return RT_ERROR_INSERT_INDEX_DTC;
|
||||
}
|
||||
} else {
|
||||
log_error("field type error, not FIELD_DOUBLE.");
|
||||
return RT_ERROR_FIELD_FORMAT;
|
||||
}
|
||||
break;
|
||||
case FIELD_IP:
|
||||
uint32_t s;
|
||||
int ret;
|
||||
if(json_field[field_name].isString()){
|
||||
ret = inet_pton(AF_INET, json_field[field_name].asString().c_str(), (void *)&s);
|
||||
if(ret == 0){
|
||||
log_error("ip format is error\n");
|
||||
return RT_ERROR_FIELD_FORMAT;
|
||||
}
|
||||
struct item it;
|
||||
it.doc_id = doc_id;
|
||||
it.freq = 0;
|
||||
string key = gen_dtc_key_string(app_id, "00", ntohl(s));
|
||||
ret = g_IndexInstance.insert_index_dtc(key, it, tbinfo->field_value, doc_version, docid_index_map);
|
||||
if(ret != 0){
|
||||
roll_back();
|
||||
return RT_ERROR_INSERT_INDEX_DTC;
|
||||
}
|
||||
}else{
|
||||
return RT_ERROR_FIELD_FORMAT;
|
||||
}
|
||||
break;
|
||||
case FIELD_LNG:
|
||||
if(json_field[field_name].isString()){
|
||||
lng = json_field[field_name].asString();
|
||||
}else{
|
||||
return RT_ERROR_FIELD_FORMAT;
|
||||
}
|
||||
break;
|
||||
case FIELD_LAT:
|
||||
if(json_field[field_name].isString()){
|
||||
lat = json_field[field_name].asString();
|
||||
}else{
|
||||
return RT_ERROR_FIELD_FORMAT;
|
||||
}
|
||||
break;
|
||||
case FIELD_LNG_ARRAY:
|
||||
if(json_field[field_name].isArray()){
|
||||
Json::Value lngs = json_field[field_name];
|
||||
for (uint32_t lng_idx = 0; lng_idx < lngs.size(); ++lng_idx) {
|
||||
if (lngs[lng_idx].isString()){
|
||||
lng_arr.push_back(lngs[lng_idx].asString());
|
||||
} else {
|
||||
log_error("longitude must be string");
|
||||
return RT_ERROR_FIELD_FORMAT;
|
||||
}
|
||||
}
|
||||
}else{
|
||||
log_error("FIELD_LNG_ARRAY must be array");
|
||||
return RT_ERROR_FIELD_FORMAT;
|
||||
}
|
||||
break;
|
||||
case FIELD_LAT_ARRAY:
|
||||
if(json_field[field_name].isArray()){
|
||||
Json::Value lats = json_field[field_name];
|
||||
for (uint32_t lat_idx = 0; lat_idx < lats.size(); ++lat_idx) {
|
||||
if (lats[lat_idx].isString()){
|
||||
lat_arr.push_back(lats[lat_idx].asString());
|
||||
} else {
|
||||
log_error("latitude must be string");
|
||||
return RT_ERROR_FIELD_FORMAT;
|
||||
}
|
||||
}
|
||||
}else{
|
||||
log_error("FIELD_LAT_ARRAY must be array");
|
||||
return RT_ERROR_FIELD_FORMAT;
|
||||
}
|
||||
break;
|
||||
case FIELD_WKT:
|
||||
if(json_field[field_name].isString()){
|
||||
string str = json_field[field_name].asString();
|
||||
str = delPrefix(str);
|
||||
vector<string> str_vec = splitEx(str, ",");
|
||||
for(uint32_t str_vec_idx = 0; str_vec_idx < str_vec.size(); str_vec_idx++){
|
||||
string wkt_str = trim(str_vec[str_vec_idx]);
|
||||
vector<string> wkt_vec = splitEx(wkt_str, " ");
|
||||
if(wkt_vec.size() == 2){
|
||||
lng_arr.push_back(wkt_vec[0]);
|
||||
lat_arr.push_back(wkt_vec[1]);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log_error("FIELD_WKT must be string");
|
||||
return RT_ERROR_FIELD_FORMAT;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int AddReqProc::do_insert_index(UserTableContent& content_fields){
|
||||
int ret = 0;
|
||||
Json::Value::Members member = json_field.getMemberNames();
|
||||
Json::Value::Members::iterator iter = member.begin();
|
||||
for(; iter != member.end(); ++iter)
|
||||
{
|
||||
string field_name = *iter;
|
||||
struct table_info *tbinfo = NULL;
|
||||
tbinfo = SplitManager::Instance()->get_table_info(app_id, field_name);
|
||||
if(tbinfo == NULL){
|
||||
continue;
|
||||
}
|
||||
if(tbinfo->snapshot_tag == 1){ //snapshot
|
||||
if(tbinfo->field_type == 1 && json_field[field_name].isInt()){
|
||||
snapshot_content[field_name] = json_field[field_name].asInt();
|
||||
}else if(tbinfo->field_type > 1 && json_field[field_name].isString()){
|
||||
snapshot_content[field_name] = json_field[field_name].asString();
|
||||
}else if(tbinfo->field_type > 1 && json_field[field_name].isDouble()){
|
||||
snapshot_content[field_name] = json_field[field_name].asDouble();
|
||||
}else if(tbinfo->field_type > 1 && json_field[field_name].isInt64()){
|
||||
snapshot_content[field_name] = json_field[field_name].asInt64();
|
||||
}else if(tbinfo->field_type > 1 && json_field[field_name].isArray()){
|
||||
snapshot_content[field_name] = json_field[field_name];
|
||||
}
|
||||
}
|
||||
}
|
||||
for(iter = member.begin(); iter != member.end(); ++iter)
|
||||
{
|
||||
string field_name = *iter;
|
||||
struct table_info *tbinfo = NULL;
|
||||
tbinfo = SplitManager::Instance()->get_table_info(app_id, field_name);
|
||||
if(tbinfo == NULL){
|
||||
continue;
|
||||
}
|
||||
if(tbinfo->index_tag == 1){
|
||||
ret = deal_index_tag(tbinfo, field_name);
|
||||
if(0 != ret){
|
||||
log_error("deal index tag process error, ret: %d", ret);
|
||||
roll_back();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(lng.length() != 0 && lat.length() != 0){
|
||||
struct table_info *tbinfo = NULL;
|
||||
tbinfo = SplitManager::Instance()->get_table_info(app_id, "gis");
|
||||
if(tbinfo == NULL){
|
||||
roll_back();
|
||||
return RT_NO_GIS_DEFINE;
|
||||
}
|
||||
|
||||
string gisid = encode(atof(lat.c_str()), atof(lng.c_str()), 6);
|
||||
log_debug("gis code = %s",gisid.c_str());
|
||||
int ret;
|
||||
uint64_t id = 0;
|
||||
struct item it;
|
||||
it.doc_id = doc_id;
|
||||
it.freq = 0;
|
||||
Json::FastWriter gis_writer;
|
||||
it.extend = gis_writer.write(snapshot_content);
|
||||
string key = gen_dtc_key_string(app_id, "00", gisid);
|
||||
ret = g_IndexInstance.insert_index_dtc(key, it, tbinfo->field_value, doc_version, docid_index_map);
|
||||
if(ret != 0){
|
||||
roll_back();
|
||||
return RT_ERROR_INSERT_INDEX_DTC;
|
||||
}
|
||||
log_debug("id = %llu,doc_vesion = %d,docid = %s\n",(long long unsigned int)id,doc_version,it.doc_id.c_str());
|
||||
}
|
||||
log_debug("lng_arr size: %d, lat_arr size: %d", (int)lng_arr.size(), (int)lat_arr.size());
|
||||
if(lng_arr.size() > 0 && lat_arr.size() > 0){
|
||||
if(lng_arr.size() != lat_arr.size()){
|
||||
log_error("lng_arr size not equal with lat_arr size");
|
||||
return RT_ERROR_FIELD_FORMAT;
|
||||
}
|
||||
set<string> gis_set;
|
||||
for(uint32_t arr_idx = 0; arr_idx < lng_arr.size(); arr_idx++){
|
||||
string tmp_lng = lng_arr[arr_idx];
|
||||
string tmp_lat = lat_arr[arr_idx];
|
||||
struct table_info *tbinfo = NULL;
|
||||
tbinfo = SplitManager::Instance()->get_table_info(app_id, "gis");
|
||||
if(tbinfo == NULL){
|
||||
roll_back();
|
||||
log_error("gis field not defined");
|
||||
return RT_NO_GIS_DEFINE;
|
||||
}
|
||||
string gisid = encode(atof(tmp_lat.c_str()), atof(tmp_lng.c_str()), 6);
|
||||
if(gis_set.find(gisid) != gis_set.end()){
|
||||
continue;
|
||||
}
|
||||
gis_set.insert(gisid);
|
||||
struct item it;
|
||||
it.doc_id = doc_id;
|
||||
it.freq = 0;
|
||||
Json::FastWriter gis_writer;
|
||||
it.extend = gis_writer.write(snapshot_content);
|
||||
string key = gen_dtc_key_string(app_id, "00", gisid);
|
||||
int ret = g_IndexInstance.insert_index_dtc(key, it, tbinfo->field_value, doc_version, docid_index_map);
|
||||
if(ret != 0){
|
||||
roll_back();
|
||||
return RT_ERROR_INSERT_INDEX_DTC;
|
||||
}
|
||||
log_debug("gis code = %s,doc_vesion = %d,docid = %s\n",gisid.c_str(),doc_version,it.doc_id.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
vector<string> union_key_vec;
|
||||
SplitManager::Instance()->getUnionKeyField(app_id, union_key_vec);
|
||||
vector<string>::iterator union_key_iter = union_key_vec.begin();
|
||||
for(; union_key_iter != union_key_vec.end(); union_key_iter++){
|
||||
string union_key = *union_key_iter;
|
||||
vector<int> union_field_vec = splitInt(union_key, ",");
|
||||
vector<int>::iterator union_field_iter = union_field_vec.begin();
|
||||
vector<vector<string> > keys_vvec;
|
||||
for(; union_field_iter != union_field_vec.end(); union_field_iter++){
|
||||
int union_field_value = *union_field_iter;
|
||||
if(union_field_value >= (int)docid_index_map.size()){
|
||||
log_error("appid[%d] field[%d] is invalid", app_id, *union_field_iter);
|
||||
break;
|
||||
}
|
||||
vector<string> key_vec;
|
||||
if(!docid_index_map[union_field_value].isArray()){
|
||||
log_debug("doc_id[%s] union_field_value[%d] has no keys", doc_id.c_str(), union_field_value);
|
||||
break;
|
||||
}
|
||||
for (int key_index = 0; key_index < (int)docid_index_map[union_field_value].size(); key_index++){
|
||||
if(docid_index_map[union_field_value][key_index].isString()){
|
||||
string union_index_key = docid_index_map[union_field_value][key_index].asString();
|
||||
if(union_index_key.size() > 9){ // 倒排key的格式为:10061#00#折扣,这里只取第二个#后面的内容
|
||||
key_vec.push_back(union_index_key.substr(9));
|
||||
}
|
||||
}
|
||||
}
|
||||
keys_vvec.push_back(key_vec);
|
||||
}
|
||||
if(keys_vvec.size() != union_field_vec.size()){
|
||||
log_debug("keys_vvec.size not equal union_field_vec.size");
|
||||
break;
|
||||
}
|
||||
vector<string> union_keys = combination(keys_vvec);
|
||||
for(int m = 0 ; m < (int)union_keys.size(); m++){
|
||||
ret = g_IndexInstance.insert_union_index_dtc(union_keys[m], doc_id, app_id, doc_version);
|
||||
if(ret != 0){
|
||||
log_error("insert union key[%s] error", union_keys[m].c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Json::FastWriter writer;
|
||||
content_fields.content = writer.write(snapshot_content);
|
||||
Json::FastWriter doc_index_writer;
|
||||
string doc_index_map_string = doc_index_writer.write(docid_index_map);
|
||||
if(doc_version != 1){//need update
|
||||
map<uint32_t, vector<string> > index_res;
|
||||
g_IndexInstance.GetIndexData(gen_dtc_key_string(content_fields.appid, "20", doc_id), doc_version - 1, index_res);
|
||||
map<uint32_t, vector<string> >::iterator map_iter = index_res.begin();
|
||||
for(; map_iter != index_res.end(); map_iter++){
|
||||
uint32_t field = map_iter->first;
|
||||
vector<string> words = map_iter->second;
|
||||
for(int i = 0; i < (int)words.size(); i++){
|
||||
DeleteTask::GetInstance().RegisterInfo(words[i], doc_id, doc_version - 1, field);
|
||||
}
|
||||
}
|
||||
|
||||
int affected_rows = 0;
|
||||
ret = g_IndexInstance.update_sanpshot_dtc(content_fields, doc_version, trans_version, affected_rows);
|
||||
if(ret != 0 || affected_rows == 0){
|
||||
log_error("update_sanpshot_dtc error, roll back, ret: %d, affected_rows: %d.", ret, affected_rows);
|
||||
roll_back();
|
||||
return RT_ERROR_UPDATE_SNAPSHOT;
|
||||
}
|
||||
g_IndexInstance.update_docid_index_dtc(doc_index_map_string, doc_id, app_id, doc_version);
|
||||
}else{
|
||||
int affected_rows = 0;
|
||||
ret = g_IndexInstance.update_sanpshot_dtc(content_fields, doc_version, trans_version, affected_rows);
|
||||
if(ret != 0 || affected_rows == 0){
|
||||
log_error("update_sanpshot_dtc error, roll back, ret: %d, affected_rows: %d.", ret, affected_rows);
|
||||
roll_back();
|
||||
return RT_ERROR_UPDATE_SNAPSHOT;
|
||||
}
|
||||
g_IndexInstance.insert_docid_index_dtc(doc_index_map_string, doc_id, app_id, doc_version);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int AddReqProc::roll_back(){
|
||||
// 删除hanpin_index
|
||||
for(int i = 0; i < (int)intelligent_keys.size(); i++){
|
||||
g_hanpinIndexInstance.delete_intelligent(intelligent_keys[i], doc_id, trans_version);
|
||||
}
|
||||
|
||||
// 删除keyword_index
|
||||
if(docid_index_map.isArray()){
|
||||
for(int i = 0;i < (int)docid_index_map.size();i++){
|
||||
Json::Value info = docid_index_map[i];
|
||||
if(info.isArray()){
|
||||
for(int j = 0;j < (int)info.size();j++){
|
||||
if(info[j].isString()){
|
||||
string key = info[j].asString();
|
||||
g_IndexInstance.delete_index(key, doc_id, trans_version, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// 如果trans_version=1,删除快照,否则更新快照的trans_version=trans_version-1
|
||||
Json::Value res;
|
||||
if(trans_version == 1){
|
||||
g_IndexInstance.delete_snapshot_dtc(doc_id, app_id, res);
|
||||
} else {
|
||||
g_IndexInstance.update_sanpshot_dtc(app_id, doc_id, trans_version);
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: add_request_proc.h
|
||||
*
|
||||
* Description: AddReqProc class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef ADD_REQUEST_PROC_H
|
||||
#define ADD_REQUEST_PROC_H
|
||||
|
||||
#include "log.h"
|
||||
#include "json/json.h"
|
||||
#include "comm.h"
|
||||
|
||||
class UserTableContent;
|
||||
class SplitManager;
|
||||
class AddReqProc
|
||||
{
|
||||
public:
|
||||
AddReqProc();
|
||||
AddReqProc(const Json::Value& jf, InsertParam& insert_param);
|
||||
~AddReqProc();
|
||||
|
||||
int do_insert_index(UserTableContent& content_fields);
|
||||
|
||||
private:
|
||||
void do_stat_word_freq(vector<vector<string> > &strss, map<string, item> &word_map, string extend);
|
||||
void do_stat_word_freq(vector<string> &strss, map<string, item> &word_map);
|
||||
int deal_index_tag(struct table_info *tbinfo, string field_name);
|
||||
int roll_back();
|
||||
|
||||
private:
|
||||
Json::Value json_field;
|
||||
uint32_t app_id;
|
||||
uint32_t doc_version;
|
||||
uint32_t trans_version;
|
||||
string doc_id;
|
||||
string lng;
|
||||
string lat;
|
||||
vector<string> lng_arr;
|
||||
vector<string> lat_arr;
|
||||
vector<string> intelligent_keys;
|
||||
Json::Value snapshot_content;
|
||||
Json::Value docid_index_map;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,121 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: comm.h
|
||||
*
|
||||
* Description: common enumeration classes definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef __COMM_H__
|
||||
#define __COMM_H__
|
||||
#include <string>
|
||||
#include <stdint.h>
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
#define BUILD_BIGINT_KEY(a,b) ((((unsigned long long)(a)) << 32)&0xffffffff00000000ll) | (b);
|
||||
#define KETTOAPPID(a) (((unsigned long long)(a))>>32)&0xFFFFFFFF
|
||||
#define MESSAGE "message"
|
||||
|
||||
|
||||
struct item {
|
||||
string doc_id;
|
||||
uint32_t freq;
|
||||
vector<uint32_t> indexs;
|
||||
string extend;
|
||||
};
|
||||
|
||||
struct InsertParam{
|
||||
uint32_t appid;
|
||||
string doc_id;
|
||||
uint32_t doc_version;
|
||||
uint32_t trans_version;
|
||||
};
|
||||
|
||||
enum CHARACTERTYPE {
|
||||
CHINESE = 1,
|
||||
INITIAL = 2,
|
||||
WHOLE_SPELL = 3,
|
||||
};
|
||||
|
||||
enum FieldType{
|
||||
FIELD_INT = 1,
|
||||
FIELD_STRING,
|
||||
FIELD_TEXT,
|
||||
FIELD_IP,
|
||||
FIELD_LNG,
|
||||
FIELD_LAT,
|
||||
FIELD_GIS,
|
||||
FIELD_DISTANCE,
|
||||
FIELD_DOUBLE,
|
||||
FIELD_LONG,
|
||||
FIELD_INDEX = 11,
|
||||
FIELD_LNG_ARRAY,
|
||||
FIELD_LAT_ARRAY,
|
||||
FIELD_WKT,
|
||||
};
|
||||
|
||||
enum SEGMENTTAG {
|
||||
SEGMENT_DEFAULT = 1,
|
||||
SEGMENT_NGRAM = 2,
|
||||
SEGMENT_CHINESE = 3,
|
||||
SEGMENT_ENGLISH = 4,
|
||||
SEGMENT_RANGE = 5,
|
||||
};
|
||||
|
||||
enum SegmentFeature
|
||||
{
|
||||
SEGMENT_FEATURE_DEFAULT = 0, // 默认值,只支持前缀模糊匹配
|
||||
SEGMENT_FEATURE_ALLLOCATE = 1, // 支持任意位置的模糊匹配
|
||||
SEGMENT_FEATURE_SNAPSHOT = 2, // 该字段的倒排索引中extend字段需带上快照信息
|
||||
};
|
||||
|
||||
enum CmdType {
|
||||
CMD_INDEX_GEN = 106,
|
||||
CMD_TOP_INDEX = 107,
|
||||
CMD_SNAPSHOT = 108,
|
||||
CMD_IMAGE_REPORT = 109,
|
||||
};
|
||||
|
||||
enum RetCode{
|
||||
RT_CMD_ADD=10000,
|
||||
RT_CMD_UPDATE,
|
||||
RT_CMD_GET,
|
||||
RT_CMD_DELETE,
|
||||
RT_PARSE_JSON_ERR = 20001,
|
||||
RT_PARSE_CONF_ERR,
|
||||
RT_INIT_ERR,
|
||||
RT_NO_TABLE_CONTENT,
|
||||
RT_NO_FIELD_COUNT,
|
||||
RT_NO_APPID,
|
||||
RT_NO_DOCID,
|
||||
RT_NO_GIS_DEFINE,
|
||||
RT_ERROR_FIELD_COUNT,
|
||||
RT_ERROR_FIELD_CMD,
|
||||
RT_ERROR_FIELD,
|
||||
RT_ERROR_SERVICE_TYPE,
|
||||
RT_ERROR_GET_SNAPSHOT,
|
||||
RT_ERROR_DELETE_SNAPSHOT,
|
||||
RT_ERROR_UPDATE_SNAPSHOT,
|
||||
RT_ERROR_INSERT_SNAPSHOT,
|
||||
RT_ERROR_INSERT_TOP_INDEX_DTC,
|
||||
RT_ERROR_INSERT_INDEX_DTC,
|
||||
RT_ERROR_INVALID_SP_WORD,
|
||||
RT_ERROR_FIELD_FORMAT,
|
||||
RT_ERROR_GET_GISCODE,
|
||||
RT_NO_THIS_DOC,
|
||||
RT_UPDATE_SNAPSHOT_CONFLICT,
|
||||
RT_ERROR_INDEX_READONLY
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,52 @@
|
|||
{
|
||||
"program_name" : "index_write v1.0",
|
||||
"pid_file" : "index_write.pid",
|
||||
"log" : "../log/",
|
||||
"log_level" : 7,
|
||||
"daemon": true,
|
||||
"listen_addr": "*:11017/tcp",
|
||||
"timeout": 6000,
|
||||
"stop_words_path":"../conf/stop_words.dict",
|
||||
"training_path":"../conf/msr_training.utf8",
|
||||
"words_base_path":"../conf/words_base.dict",
|
||||
"words_file":"../conf/words_base.txt",
|
||||
"character_path":"../conf/character_map.txt",
|
||||
"phonetic_path":"../conf/phonetic_map.txt",
|
||||
"phonetic_base_file" : "../conf/phonetic_base.txt",
|
||||
"service_type":"index_gen",
|
||||
"dtc_index_config" :
|
||||
{
|
||||
"table_name": "keyword_index_data",
|
||||
"accesskey": "000020942f22577f38c66a20d8cb8ba30cbb3d75",
|
||||
"timeout": 4000,
|
||||
"keytype": 4,
|
||||
"route":
|
||||
[
|
||||
{
|
||||
"ip": "127.0.0.1",
|
||||
"bid": 2094,
|
||||
"port": 20000,
|
||||
"weight": 1,
|
||||
"status": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"dtc_intelligent_config" :
|
||||
{
|
||||
"table_name": "hanpin_index_data",
|
||||
"accesskey": "000020915b27ecebfbb0dfa6e4cf32397c2bf7be",
|
||||
"timeout": 4000,
|
||||
"keytype": 4,
|
||||
"route":
|
||||
[
|
||||
{
|
||||
"ip": "127.0.0.1",
|
||||
"bid": 2091,
|
||||
"port": 20001,
|
||||
"weight": 1,
|
||||
"status": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"split_mode":"Post"
|
||||
}
|
|
@ -0,0 +1,434 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: dtc_tools.cc
|
||||
*
|
||||
* Description: DTCTools class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#include "dtc_tools.h"
|
||||
#include "split_manager.h"
|
||||
#include "log.h"
|
||||
#include "comm.h"
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string.h>
|
||||
|
||||
string initial_table[] = { "b","p","m","f","d","t","n","l","g","k","h","j","q","x","zh","ch","sh","r","z","c","s","y","w" };
|
||||
|
||||
int DTCTools::init_servers(DTC::DTCServers &servers, SDTCHost &dtc_config)
|
||||
{
|
||||
int ret = 0;
|
||||
ret = servers.SetTableName(dtc_config.szTablename.c_str());
|
||||
if (0 != ret)
|
||||
{
|
||||
cout << "SetTableName error !\n";
|
||||
return ret;
|
||||
}
|
||||
std::vector<DTC::ROUTE_NODE> list;
|
||||
for(std::vector<SDTCroute>::const_iterator route_elem = dtc_config.vecRoute.begin(); route_elem != dtc_config.vecRoute.end(); route_elem++)
|
||||
{
|
||||
DTC::ROUTE_NODE route;
|
||||
route.bid = route_elem->uBid;
|
||||
route.port = route_elem->uPort;
|
||||
route.status = route_elem->uStatus;
|
||||
route.weight = route_elem->uWeight;
|
||||
memcpy(route.ip,route_elem->szIpadrr.c_str(), strlen(route_elem->szIpadrr.c_str()));
|
||||
route.ip[strlen(route_elem->szIpadrr.c_str())] = '\0';
|
||||
list.push_back(route);
|
||||
}
|
||||
ret = servers.SetRouteList(list);
|
||||
if (0 != ret) {
|
||||
cout << "SetRouteList error!\n";
|
||||
return ret;
|
||||
}
|
||||
|
||||
servers.SetMTimeout(dtc_config.uTimeout);
|
||||
ret = servers.SetAccessKey(dtc_config.szAccesskey.c_str());
|
||||
if (0 != ret)
|
||||
{
|
||||
cout << "SetAccessKey error !\n";
|
||||
return ret;
|
||||
}
|
||||
ret = servers.SetKeyType(dtc_config.uKeytype);
|
||||
if (0 != ret)
|
||||
{
|
||||
cout << "SetKeyType error !\n";
|
||||
return ret;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int DTCTools::init_dtc_server(DTC::Server &server, const char *ip_str, const char *dtc_port, SDTCHost &dtc_config)
|
||||
{
|
||||
int ret = 0;
|
||||
ret = server.SetTableName(dtc_config.szTablename.c_str());
|
||||
if (0 != ret)
|
||||
{
|
||||
cout << "SetTableName error !\n";
|
||||
return ret;
|
||||
}
|
||||
server.SetAddress(ip_str, dtc_port);
|
||||
server.SetMTimeout(dtc_config.uTimeout);
|
||||
if(1 == dtc_config.uKeytype || 2 == dtc_config.uKeytype)
|
||||
server.IntKey();
|
||||
else
|
||||
server.StringKey();
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool DTCTools::insert_dtc_server(u_int64_t ip_port_key,const char *ip_str,const char *port_str,SDTCHost &dtc_config){
|
||||
DTC::Server s;
|
||||
init_dtc_server(s,ip_str,port_str,dtc_config);
|
||||
dtc_handle.insert(make_pair(ip_port_key,s));
|
||||
return true;
|
||||
}
|
||||
|
||||
string gen_dtc_key_string(uint32_t appid, string type, string key) {
|
||||
stringstream ss;
|
||||
ss << appid << "#" << type << "#" << key;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
string gen_dtc_key_string(uint32_t appid, string type, uint32_t key) {
|
||||
stringstream ss;
|
||||
ss << appid << "#" << type << "#" << key;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
string gen_dtc_key_string(uint32_t appid, string type, int64_t key) {
|
||||
stringstream ss;
|
||||
ss << appid << "#" << type << "#" << key;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
string gen_dtc_key_string(uint32_t appid, string type, double key) {
|
||||
stringstream ss;
|
||||
ss << appid << "#" << type << "#" << key;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
void split_func(string pinyin, string &split_str) {
|
||||
int i = 0;
|
||||
stringstream result;
|
||||
for (i = 0; i < (int)pinyin.size(); i++)
|
||||
{
|
||||
if (strchr("aeiouv", pinyin.at(i)))
|
||||
{
|
||||
result << pinyin.at(i);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (pinyin.at(i) != 'n') //不是n从该辅音前分开
|
||||
{
|
||||
if (i == 0)
|
||||
{
|
||||
result << pinyin.at(i);
|
||||
}
|
||||
else
|
||||
{
|
||||
result << ' ' << pinyin.at(i);
|
||||
}
|
||||
if ((i + 1) < (int)pinyin.size() && (pinyin.at(i) == 'z' || pinyin.at(i) == 'c' || pinyin.at(i) == 's') &&
|
||||
(pinyin.at(i + 1) == 'h'))
|
||||
{
|
||||
result << 'h';
|
||||
i++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else //是n,继续向后
|
||||
{
|
||||
if (i == (int)pinyin.size() - 1)
|
||||
{
|
||||
result << pinyin.at(i);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
i++; //继续向后
|
||||
|
||||
if (strchr("aeiouv", pinyin.at(i))) //如果是元音,从n前分开
|
||||
{
|
||||
if (i == 1)
|
||||
{
|
||||
result << 'n' << pinyin.at(i);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
result << ' ' << 'n' << pinyin.at(i);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
//如果是辅音字母
|
||||
else
|
||||
{
|
||||
if (pinyin.at(i) == 'g')
|
||||
{
|
||||
if (i == (int)pinyin.size() - 1)
|
||||
{
|
||||
result << 'n' << pinyin.at(i);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
i++; //继续向后
|
||||
|
||||
if (strchr("aeiouv", pinyin.at(i)))
|
||||
{
|
||||
result << 'n' << ' ' << 'g' << pinyin.at(i);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
result << 'n' << 'g' << ' ' << pinyin.at(i);
|
||||
if ((i + 1) < (int)pinyin.size() && (pinyin.at(i) == 'z' || pinyin.at(i) == 'c' || pinyin.at(i) == 's') &&
|
||||
(pinyin.at(i + 1) == 'h'))
|
||||
{
|
||||
result << 'h';
|
||||
i++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else //不是g的辅音字母,从n后分开
|
||||
{
|
||||
result << 'n' << ' ' << pinyin.at(i);
|
||||
if ((i + 1) < (int)pinyin.size() && (pinyin.at(i) == 'z' || pinyin.at(i) == 'c' || pinyin.at(i) == 's') &&
|
||||
(pinyin.at(i + 1) == 'h'))
|
||||
{
|
||||
result << 'h';
|
||||
i++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
split_str = result.str();
|
||||
}
|
||||
|
||||
void convert_intelligent_alpha_num(const vector<Content> &result, vector<IntelligentInfo> &info_vec, bool &flag) {
|
||||
int i = 0;
|
||||
flag = true;
|
||||
IntelligentInfo basic_info;
|
||||
vector<Content>::const_iterator content_iter = result.begin();
|
||||
for (; content_iter != result.end(); content_iter++, i++) {
|
||||
if (i >= 16) {
|
||||
log_info("content length[%d] must be less than 16", (int)result.size());
|
||||
break;
|
||||
}
|
||||
basic_info.initial_char[i] = ((*content_iter).str)[0];
|
||||
}
|
||||
info_vec.push_back(basic_info);
|
||||
}
|
||||
|
||||
void convert_intelligent(const vector<Content> &result, vector<IntelligentInfo> &info_vec, bool &flag) {
|
||||
int i = 0;
|
||||
flag = true;
|
||||
IntelligentInfo basic_info;
|
||||
vector<vector<string> > phonetic_id_vecs;
|
||||
vector<uint32_t> length_vec;
|
||||
vector<Content>::const_iterator content_iter = result.begin();
|
||||
for (; content_iter != result.end(); content_iter++, i++) {
|
||||
if (i >= 8) {
|
||||
log_info("content length[%d] must be less than 8", (int)result.size());
|
||||
break;
|
||||
}
|
||||
uint32_t charact_id = 0;
|
||||
uint32_t phonetic_id = 0;
|
||||
vector<string> phonetic_id_vec;
|
||||
if ((*content_iter).type == CHINESE) { // 查找字id
|
||||
SplitManager::Instance()->GetCharactId((*content_iter).str, charact_id);
|
||||
basic_info.charact_id[i] = charact_id;
|
||||
vector<string> vec = SplitManager::Instance()->GetPhonetic((*content_iter).str);
|
||||
if (vec.size() == 1) {
|
||||
phonetic_id_vec.push_back(vec[0]);
|
||||
}
|
||||
else if (vec.size() > 1) { // 多音字
|
||||
int j = 0;
|
||||
for (; j < (int)vec.size(); j++) {
|
||||
SplitManager::Instance()->GetPhoneticId(vec[j], phonetic_id);
|
||||
phonetic_id_vec.push_back(vec[j]);
|
||||
}
|
||||
}
|
||||
phonetic_id_vecs.push_back(phonetic_id_vec);
|
||||
length_vec.push_back(phonetic_id_vec.size());
|
||||
}
|
||||
else {
|
||||
basic_info.initial_char[i] = (*content_iter).str[0];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* 以下为计算不定长数组取值交叉遍历组合生成的算法
|
||||
* 如词语 重传 会生成以下4种组合
|
||||
* chongchuan chongzhuan zhongchuan zhongzhuan
|
||||
* 由于词语本身长度不固定,每个字对应的多音字数量不固定,因此采用该方法
|
||||
* 示例:
|
||||
* int factor[3][4] =
|
||||
* {
|
||||
* {0, 1, 2, 3},
|
||||
* {0, 1},
|
||||
* {0, 1, 2},
|
||||
* };
|
||||
* 将位置3x2x4的24种组合理解为[0-2] [0-1] [0-3]的三个方框的组合方式,把每个方框看成一
|
||||
* 位的话,那个方框就使用了一个固定的进制,所以0 - 23 之间的值都可以用三个位表示,
|
||||
* 每一位就代表在每个方框中的取值,也即在二维数组中的位置。
|
||||
* 而0 - 23这24个值恰好覆盖了三个方框所有种组合,所以用这种多进制组合位的方式可以实现多组值的交叉遍历。
|
||||
*/
|
||||
i = 0;
|
||||
int j = 0;
|
||||
int k = 0;
|
||||
int len = 0;
|
||||
int len_num = 0;
|
||||
int totalLength = 1;
|
||||
uint32_t phonetic_id = 0;
|
||||
int colum = phonetic_id_vecs.size();
|
||||
for (i = 0; i < colum; i++)
|
||||
{
|
||||
totalLength *= length_vec[i];
|
||||
}
|
||||
for (i = 0; i < totalLength; i++) {
|
||||
k = i;
|
||||
len_num = 0;
|
||||
IntelligentInfo info = basic_info;
|
||||
for (j = 0; j < colum; j++) {
|
||||
len = length_vec[len_num];
|
||||
string phonetic = phonetic_id_vecs[j][k % len];
|
||||
SplitManager::Instance()->GetPhoneticId(phonetic, phonetic_id);
|
||||
info.phonetic_id[j] = phonetic_id;
|
||||
if (phonetic.size() > 1) {
|
||||
info.initial_char[j] = phonetic[0];
|
||||
}
|
||||
k = k / len;
|
||||
len_num++;
|
||||
}
|
||||
info_vec.push_back(info);
|
||||
}
|
||||
if (info_vec.size() == 0 && phonetic_id_vecs.size() == 0) {
|
||||
info_vec.push_back(basic_info);
|
||||
}
|
||||
}
|
||||
|
||||
void get_intelligent(string str, vector<IntelligentInfo> &info_vec, bool &flag) {
|
||||
vector<Content> result;
|
||||
set<string> initial_vec(initial_table, initial_table + 23);
|
||||
iutf8string utf8_str(str);
|
||||
int i = 0;
|
||||
if (noChinese(str)) {
|
||||
for (; i < (int)str.length(); i++) {
|
||||
Content content;
|
||||
content.str = str[i];
|
||||
content.type = INITIAL;
|
||||
result.push_back(content);
|
||||
}
|
||||
convert_intelligent_alpha_num(result, info_vec, flag);
|
||||
}
|
||||
else{
|
||||
for (; i < utf8_str.length(); ) {
|
||||
if (utf8_str[i].size() > 1) {
|
||||
Content content;
|
||||
content.type = CHINESE;
|
||||
content.str = utf8_str[i];
|
||||
result.push_back(content);
|
||||
i++;
|
||||
}
|
||||
else {
|
||||
Content content;
|
||||
content.type = INITIAL;
|
||||
content.str = utf8_str[i];
|
||||
result.push_back(content);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
convert_intelligent(result, info_vec, flag);
|
||||
}
|
||||
}
|
||||
|
||||
bool noChinese(string str) {
|
||||
iutf8string utf8_str(str);
|
||||
if (utf8_str.length() == (int)str.length()) {
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool allChinese(string str) {
|
||||
iutf8string utf8_str(str);
|
||||
for (int i = 0; i < utf8_str.length(); i++) {
|
||||
if (utf8_str[i].length() == 1) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
** 通过递归求出二维vector每一维vector中取一个数的各种组合
|
||||
** 输入:[[a],[b1,b2],[c1,c2,c3]]
|
||||
** 输出:[a_b1_c1,a_b1_c2,a_b1_c3,a_b2_c1,a_b2_c2,a_b2_c3]
|
||||
*/
|
||||
vector<string> combination(vector<vector<string> > &dimensionalArr){
|
||||
int FLength = dimensionalArr.size();
|
||||
if(FLength >= 2){
|
||||
int SLength1 = dimensionalArr[0].size();
|
||||
int SLength2 = dimensionalArr[1].size();
|
||||
int DLength = SLength1 * SLength2;
|
||||
vector<string> temporary(DLength);
|
||||
int index = 0;
|
||||
for(int i = 0; i < SLength1; i++){
|
||||
for (int j = 0; j < SLength2; j++) {
|
||||
temporary[index] = dimensionalArr[0][i] +"_"+ dimensionalArr[1][j];
|
||||
index++;
|
||||
}
|
||||
}
|
||||
vector<vector<string> > new_arr;
|
||||
new_arr.push_back(temporary);
|
||||
for(int i = 2; i < (int)dimensionalArr.size(); i++){
|
||||
new_arr.push_back(dimensionalArr[i]);
|
||||
}
|
||||
return combination(new_arr);
|
||||
} else {
|
||||
return dimensionalArr[0];
|
||||
}
|
||||
}
|
||||
|
||||
vector<int> splitInt(const string& src, string separate_character)
|
||||
{
|
||||
vector<int> strs;
|
||||
|
||||
//分割字符串的长度,这样就可以支持如“,,”多字符串的分隔符
|
||||
int separate_characterLen = separate_character.size();
|
||||
int lastPosition = 0, index = -1;
|
||||
string str;
|
||||
int pos = 0;
|
||||
while (-1 != (index = src.find(separate_character, lastPosition)))
|
||||
{
|
||||
if (src.substr(lastPosition, index - lastPosition) != " ") {
|
||||
str = src.substr(lastPosition, index - lastPosition);
|
||||
pos = atoi(str.c_str());
|
||||
strs.push_back(pos);
|
||||
}
|
||||
lastPosition = index + separate_characterLen;
|
||||
}
|
||||
string lastString = src.substr(lastPosition);//截取最后一个分隔符后的内容
|
||||
if (!lastString.empty() && lastString != " "){
|
||||
pos = atoi(lastString.c_str());
|
||||
strs.push_back(pos);//如果最后一个分隔符后还有内容就入队
|
||||
}
|
||||
return strs;
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: dtc_tools.h
|
||||
*
|
||||
* Description: DTCTools class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef SRC_INDEX_GEN_DTC_TOOLS_H_
|
||||
#define SRC_INDEX_GEN_DTC_TOOLS_H_
|
||||
#include "index_conf.h"
|
||||
#include "dtcapi.h"
|
||||
#include <map>
|
||||
|
||||
class DTCTools{
|
||||
public:
|
||||
DTCTools(){
|
||||
|
||||
}
|
||||
static DTCTools *Instance()
|
||||
{
|
||||
return CSingleton<DTCTools>::Instance();
|
||||
}
|
||||
|
||||
static void Destroy()
|
||||
{
|
||||
CSingleton<DTCTools>::Destroy();
|
||||
}
|
||||
int init_servers(DTC::DTCServers &servers, SDTCHost &dtc_config);
|
||||
int init_dtc_server(DTC::Server &server, const char *ip_str, const char *dtc_port,SDTCHost &dtc_config);
|
||||
DTC::Server *find_dtc_server(u_int64_t ip_port_key){
|
||||
if(dtc_handle.find(ip_port_key) != dtc_handle.end()){
|
||||
return &dtc_handle[ip_port_key];
|
||||
}else{
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
bool insert_dtc_server(u_int64_t ip_port_key, const char *ip_str, const char *port_str, SDTCHost &dtc_config);
|
||||
private:
|
||||
map<u_int64_t, DTC::Server> dtc_handle;
|
||||
};
|
||||
|
||||
struct IntelligentInfo {
|
||||
IntelligentInfo() {
|
||||
int i = 0;
|
||||
for (; i < 8; i++) {
|
||||
charact_id[i] = 0;
|
||||
}
|
||||
for (i = 0; i < 8; i++) {
|
||||
phonetic_id[i] = 0;
|
||||
}
|
||||
for (i = 0; i < 16; i++) {
|
||||
initial_char[i] = "";
|
||||
}
|
||||
}
|
||||
uint16_t charact_id[8];
|
||||
uint16_t phonetic_id[8];
|
||||
string initial_char[16];
|
||||
};
|
||||
|
||||
struct Content {
|
||||
uint32_t type;
|
||||
string str;
|
||||
};
|
||||
|
||||
|
||||
string gen_dtc_key_string(uint32_t appid, string type, string key);
|
||||
string gen_dtc_key_string(uint32_t appid, string type, uint32_t key);
|
||||
string gen_dtc_key_string(uint32_t appid, string type, int64_t key);
|
||||
string gen_dtc_key_string(uint32_t appid, string type, double key);
|
||||
void split_func(string pinyin, string &split_str);
|
||||
void get_intelligent(string str, vector<IntelligentInfo> &info_vec, bool &flag);
|
||||
void convert_intelligent(const vector<Content> &result, vector<IntelligentInfo> &info_vec, bool &flag);
|
||||
void convert_intelligent_alpha_num(const vector<Content> &result, vector<IntelligentInfo> &info_vec, bool &flag);
|
||||
vector<string> combination(vector<vector<string> > &dimensionalArr);
|
||||
vector<int> splitInt(const string& src, string separate_character);
|
||||
bool noChinese(string str);
|
||||
bool allChinese(string str);
|
||||
|
||||
#endif /* SRC_INDEX_GEN_DTC_TOOLS_H_ */
|
|
@ -0,0 +1,474 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: image_service.cc
|
||||
*
|
||||
* Description: CTaskImage class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#include "image_service.h"
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include "log.h"
|
||||
#include "poll_thread.h"
|
||||
#include "task_request.h"
|
||||
#include "dtc_tools.h"
|
||||
#include "comm.h"
|
||||
#include "index_clipping.h"
|
||||
#include "monitor.h"
|
||||
#include "chash.h"
|
||||
|
||||
CTaskImage::CTaskImage(CPollThread * o) :
|
||||
CTaskDispatcher<CTaskRequest>(o),
|
||||
ownerThread(o),
|
||||
output(o)
|
||||
{
|
||||
}
|
||||
|
||||
CTaskImage::~CTaskImage()
|
||||
{
|
||||
}
|
||||
|
||||
int CTaskImage::insert_snapshot_dtc(const UserTableContent &fields,int &doc_version,Json::Value &res){
|
||||
int ret;
|
||||
DTC::Server* dtc_server = index_servers.GetServer();
|
||||
if(NULL == dtc_server){
|
||||
log_error("snapshot server connect error!");
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
DTC::InsertRequest insertReq(dtc_server);
|
||||
insertReq.SetKey(gen_dtc_key_string(fields.appid, "10", fields.doc_id).c_str());
|
||||
insertReq.Set("doc_id", fields.doc_id.c_str());
|
||||
insertReq.Set("doc_version", doc_version);
|
||||
insertReq.Set("extend", fields.content.c_str());
|
||||
insertReq.Set("field", fields.top);
|
||||
insertReq.Set("weight", fields.weight);
|
||||
insertReq.Set("created_time", fields.publish_time);
|
||||
insertReq.Set("word_freq", 0);
|
||||
insertReq.Set("location", "");
|
||||
insertReq.Set("start_time", 0);
|
||||
insertReq.Set("end_time", 0);
|
||||
DTC::Result rst;
|
||||
ret = insertReq.Execute(rst);
|
||||
if (ret != 0)
|
||||
{
|
||||
|
||||
log_error("insert request error! ,errno %d ,errmsg %s, errfrom %s\n", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return -1;
|
||||
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int CTaskImage::delete_snapshot_dtc(string &doc_id,uint32_t appid,Json::Value &res){
|
||||
int ret;
|
||||
DTC::Server* dtc_server = index_servers.GetServer();
|
||||
if(NULL == dtc_server){
|
||||
log_error("snapshot server connect error!");
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
DTC::DeleteRequest deleteReq(dtc_server);
|
||||
ret = deleteReq.SetKey(gen_dtc_key_string(appid, "10", doc_id).c_str());
|
||||
|
||||
DTC::Result rst;
|
||||
ret = deleteReq.Execute(rst);
|
||||
if (ret != 0)
|
||||
{
|
||||
log_error("delete request error! ,errno %d ,errmsg %s, errfrom %s\n", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return RT_ERROR_DELETE_SNAPSHOT;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int get_snapshot_execute(DTC::Server* dtc_server,const UserTableContent &fields,DTC::Result &rst){
|
||||
DTC::GetRequest getReq(dtc_server);
|
||||
int ret = 0;
|
||||
|
||||
ret = getReq.SetKey(gen_dtc_key_string(fields.appid, "10", fields.doc_id).c_str());
|
||||
ret = getReq.Need("doc_version");
|
||||
|
||||
ret = getReq.Execute(rst);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int CTaskImage::get_snapshot_active_doc(const UserTableContent &fields,int &doc_version,Json::Value &res){
|
||||
int ret;
|
||||
DTC::Server* dtc_server = index_servers.GetServer();
|
||||
if(NULL == dtc_server){
|
||||
log_error("snapshot server connect error!");
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
DTC::Result rst;
|
||||
ret = get_snapshot_execute(dtc_server,fields,rst);
|
||||
if (ret != 0) {
|
||||
if (ret == -110) {
|
||||
rst.Reset();
|
||||
ret = get_snapshot_execute(dtc_server,fields,rst);
|
||||
if (ret != 0) {
|
||||
log_error("get request error! errcode %d,errmsg %s, errfrom %s", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
}
|
||||
else {
|
||||
log_error("get request error! errcode %d,errmsg %s, errfrom %s", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
}
|
||||
int cnt = rst.NumRows();
|
||||
struct index_item item;
|
||||
if (rst.NumRows() <= 0) {
|
||||
return RT_NO_THIS_DOC;
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < cnt; i++) {
|
||||
rst.FetchRow();
|
||||
doc_version = rst.IntValue("doc_version");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int insert_index_execute(DTC::Server* dtcServer,string key,struct item &it,u_int8_t field_type,int doc_version,DTC::Result &rst){
|
||||
int ret = 0;
|
||||
|
||||
stringstream index_sstr;
|
||||
index_sstr << "[";
|
||||
int count = 0;
|
||||
vector<uint32_t>::iterator iter = it.indexs.begin();
|
||||
for (; iter != it.indexs.end(); iter++) {
|
||||
if (count++ > 25) {
|
||||
break;
|
||||
}
|
||||
index_sstr << *iter << ",";
|
||||
}
|
||||
string index_str = index_sstr.str();
|
||||
index_str = index_str.substr(0, index_str.size()-1);
|
||||
index_str.append("]");
|
||||
DTC::InsertRequest insertReq(dtcServer);
|
||||
insertReq.SetKey(key.c_str());
|
||||
insertReq.Set("doc_id", it.doc_id.c_str());
|
||||
insertReq.Set("field", field_type);
|
||||
insertReq.Set("word_freq", it.freq);
|
||||
insertReq.Set("weight", 1);
|
||||
insertReq.Set("extend","");
|
||||
insertReq.Set("doc_version",doc_version);
|
||||
insertReq.Set("created_time",time(NULL));
|
||||
insertReq.Set("location", index_str.c_str());
|
||||
ret = insertReq.Execute(rst);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int CTaskImage::insert_index_dtc(DTC::Server* dtcServer,string key,struct item &it,u_int8_t field_type,int doc_version,Json::Value &res){
|
||||
int ret = 0;
|
||||
|
||||
char tmp[41] = { '0' };
|
||||
snprintf(tmp, sizeof(tmp), "%40s", it.doc_id.c_str());
|
||||
|
||||
dtcServer->SetAccessKey(tmp);
|
||||
|
||||
DTC::Result rst;
|
||||
ret = insert_index_execute(dtcServer,key,it,field_type,doc_version,rst);
|
||||
if (ret != 0)
|
||||
{
|
||||
log_error("insert request error! ,errno %d ,errmsg %s, errfrom %s\n", ret,rst.ErrorMessage(), rst.ErrorFrom());
|
||||
res[MESSAGE] = rst.ErrorMessage();
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CTaskImage::do_insert_index(DTC::Server* dtcServer, map<string, item> &word_map, map<string, item> &title_map,uint64_t app_id,int doc_version,Json::Value &res) {
|
||||
|
||||
int ret;
|
||||
map<string, item>::iterator map_iter = word_map.begin();
|
||||
for (; map_iter != word_map.end(); map_iter++) {
|
||||
string key = gen_dtc_key_string(app_id, "00", map_iter->first);
|
||||
item it = map_iter->second;
|
||||
ret = insert_index_dtc(dtcServer,key,it,3,doc_version,res);
|
||||
if(ret != 0)
|
||||
return RT_ERROR_INSERT_INDEX_DTC;
|
||||
}
|
||||
|
||||
map_iter = title_map.begin();
|
||||
for (; map_iter != title_map.end(); map_iter++) {
|
||||
string key = gen_dtc_key_string(app_id, "00", map_iter->first);
|
||||
item it = map_iter->second;
|
||||
ret = insert_index_dtc(dtcServer,key,it,3,doc_version,res);
|
||||
if(ret != 0){
|
||||
return RT_ERROR_INSERT_INDEX_DTC;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CTaskImage::pre_process(void){
|
||||
DTCTools *dtc_tools = DTCTools::Instance();
|
||||
dtc_tools->init_servers(index_servers,IndexConf::Instance()->GetDTCIndexConfig());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void CTaskImage::do_stat_word_freq(vector<vector<string> > &strss, string &doc_id, uint32_t appid, map<string, item> &word_map,Json::Value &res) {
|
||||
string word;
|
||||
uint32_t id = 0;
|
||||
ostringstream oss;
|
||||
vector<vector<string> >::iterator iters = strss.begin();
|
||||
uint32_t index = 0;
|
||||
|
||||
for(;iters != strss.end(); iters++){
|
||||
index++;
|
||||
vector<string>::iterator iter = iters->begin();
|
||||
|
||||
log_debug("start do_stat_word_freq, appid = %u\n",appid);
|
||||
for (; iter != iters->end(); iter++) {
|
||||
|
||||
word = *iter;
|
||||
if (!SplitManager::Instance()->wordValid(word, appid, id)){
|
||||
continue;
|
||||
}
|
||||
log_debug("id == %u\n",id);
|
||||
if (word_map.find(word) == word_map.end()) {
|
||||
item it;
|
||||
it.doc_id = doc_id;
|
||||
it.freq = 1;
|
||||
it.indexs.push_back(index);
|
||||
word_map.insert(make_pair(word, it));
|
||||
}
|
||||
else {
|
||||
word_map[word].freq++;
|
||||
word_map[word].indexs.push_back(index);
|
||||
}
|
||||
|
||||
oss << (*iter) << "|";
|
||||
}
|
||||
}
|
||||
log_debug("split: %s",oss.str().c_str());
|
||||
}
|
||||
|
||||
static int decode_request(const Json::Value &req, Json::Value &subreq, uint32_t &id, uint32_t &count){
|
||||
if(req.isMember("table_content") && req["table_content"].isArray()){
|
||||
subreq = req["table_content"];
|
||||
}else{
|
||||
return RT_NO_TABLE_CONTENT;
|
||||
}
|
||||
|
||||
if(req.isMember("appid") && req["appid"].isInt()){
|
||||
id = req["appid"].asInt();
|
||||
}else{
|
||||
return RT_NO_APPID;
|
||||
}
|
||||
|
||||
if(req.isMember("fields_count") && req["fields_count"].isInt()){
|
||||
count = req["fields_count"].asInt();
|
||||
}else{
|
||||
return RT_NO_FIELD_COUNT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CTaskImage::update_sanpshot_dtc(const UserTableContent &fields,int doc_version,Json::Value &res){
|
||||
int ret = 0;
|
||||
DTC::Server* dtc_server = index_servers.GetServer();
|
||||
if(NULL == dtc_server){
|
||||
log_error("snapshot server connect error!");
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
DTC::UpdateRequest updateReq(dtc_server);
|
||||
ret = updateReq.SetKey(gen_dtc_key_string(fields.appid, "00", fields.doc_id).c_str());
|
||||
updateReq.Set("doc_version", doc_version);
|
||||
if (fields.content.length() > 0)
|
||||
updateReq.Set("extend", fields.content.c_str());
|
||||
updateReq.Set("weight", fields.weight);
|
||||
updateReq.Set("created_time", fields.publish_time);
|
||||
|
||||
DTC::Result rst;
|
||||
ret = updateReq.Execute(rst);
|
||||
if (ret != 0)
|
||||
{
|
||||
log_error("updateReq error! ,errno %d ,errmsg %s, errfrom %s\n", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return RT_ERROR_UPDATE_SNAPSHOT;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int decode_fields(Json::Value table_content,UserTableContent &fields){
|
||||
string cmd;
|
||||
time_t now = time(NULL);
|
||||
if(table_content.isMember("cmd") && table_content["cmd"].isString()){
|
||||
cmd = table_content["cmd"].asString();
|
||||
if(cmd == "add" || cmd == "update"){
|
||||
if(table_content.isMember("fields") && table_content["fields"].isObject()){
|
||||
Json::Value field = table_content["fields"];
|
||||
if(field.isMember("doc_id") && field["doc_id"].isString()){
|
||||
fields.doc_id = field["doc_id"].asString();
|
||||
}
|
||||
if(field.isMember("title") && field["title"].isString()){
|
||||
fields.title = field["title"].asString();
|
||||
}
|
||||
if(field.isMember("content") && field["content"].isString()){
|
||||
fields.content = field["content"].asString();
|
||||
}
|
||||
if(field.isMember("author") && field["author"].isString()){
|
||||
fields.author = field["author"].asString();
|
||||
}
|
||||
if(field.isMember("url") && field["url"].isString()){
|
||||
fields.description = field["url"].asString();
|
||||
}
|
||||
if(field.isMember("weight") && field["weight"].isInt()){
|
||||
fields.weight = field["weight"].asInt();
|
||||
}else{
|
||||
fields.weight = 1;
|
||||
}
|
||||
if(field.isMember("publish_time") && field["publish_time"].isInt()){
|
||||
fields.publish_time = field["publish_time"].asInt();
|
||||
}else{
|
||||
fields.publish_time = now;
|
||||
}
|
||||
return RT_CMD_ADD;
|
||||
}
|
||||
}else if(cmd == "delete"){
|
||||
Json::Value field = table_content["fields"];
|
||||
if(field.isMember("doc_id") && field["doc_id"].isString()){
|
||||
fields.doc_id = field["doc_id"].asString();
|
||||
return RT_CMD_DELETE;
|
||||
}
|
||||
}else{
|
||||
return RT_ERROR_FIELD_CMD;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CTaskImage::index_gen_process(Json::Value &req,Json::Value &res){
|
||||
|
||||
vector<vector<string> > split_content;
|
||||
vector<vector<string> > split_title;
|
||||
int doc_version = 0,old_version = 0;
|
||||
uint32_t app_id,fields_count = 0;
|
||||
int ret = 0;
|
||||
Json::Value table_content;
|
||||
|
||||
map<string, item> word_map;
|
||||
map<string, item> title_map;
|
||||
|
||||
ret = decode_request(req, table_content, app_id,fields_count);
|
||||
if(ret != 0){
|
||||
return ret;
|
||||
}
|
||||
if(fields_count == 0 || fields_count != table_content.size()){
|
||||
return RT_ERROR_FIELD_COUNT;
|
||||
}
|
||||
DTC::Server* dtcServer = index_servers.GetServer();
|
||||
for(int i = 0;i < (int)table_content.size();i++){
|
||||
doc_version = 0; old_version = 0;
|
||||
UserTableContent fields(app_id);
|
||||
ret = decode_fields(table_content[i],fields);
|
||||
if(RT_CMD_ADD == ret){
|
||||
ret = get_snapshot_active_doc(fields,old_version,res);
|
||||
if(0 == ret){
|
||||
doc_version = ++old_version;
|
||||
}else if(ret != RT_NO_THIS_DOC) return ret;
|
||||
split_content = SplitManager::Instance()->split(fields.content,fields.appid);
|
||||
split_title = SplitManager::Instance()->split(fields.title,fields.appid);
|
||||
do_stat_word_freq(split_content, fields.doc_id,fields.appid, word_map,res);
|
||||
do_stat_word_freq(split_title, fields.doc_id, fields.appid, title_map,res);
|
||||
ret = do_insert_index(dtcServer, word_map, title_map,app_id,doc_version,res);
|
||||
if(0 != ret){
|
||||
return ret;
|
||||
}
|
||||
if(doc_version != 0){//need update
|
||||
update_sanpshot_dtc(fields,doc_version,res);
|
||||
}else{
|
||||
insert_snapshot_dtc(fields,doc_version,res);//insert the snapshot doc
|
||||
}
|
||||
word_map.clear();
|
||||
title_map.clear();
|
||||
}
|
||||
else if(RT_CMD_DELETE == ret){
|
||||
ret = delete_snapshot_dtc(fields.doc_id,fields.appid,res);//not use the doc_version curr
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
void CTaskImage::TaskNotify(CTaskRequest * curr)
|
||||
{
|
||||
log_debug("CTaskImage::TaskNotify start");
|
||||
common::CallerInfo caller_info = common::ProfilerMonitor::GetInstance().RegisterInfo(std::string("searchEngine.searchService.imageReportTask"));
|
||||
//there is a race condition here:
|
||||
//curr may be deleted during process (in task->ReplyNotify())
|
||||
int ret;
|
||||
Json::Reader reader;
|
||||
Json::FastWriter writer;
|
||||
Json::Value value, res;
|
||||
std::string req;
|
||||
res["code"] = 0;
|
||||
|
||||
|
||||
CTaskRequest * task = curr;
|
||||
if(NULL == curr){
|
||||
common::ProfilerMonitor::GetInstance().FunctionError(caller_info);
|
||||
common::ProfilerMonitor::GetInstance().RegisterInfoEnd(caller_info);
|
||||
return;
|
||||
}
|
||||
if(SERVICE_PIC != task->GetReqCmd()){
|
||||
res["code"] = RT_ERROR_SERVICE_TYPE;
|
||||
res["reqcmd"] = task->GetReqCmd();
|
||||
res["message"] = "service type wrong! need 109";
|
||||
goto end;
|
||||
}
|
||||
req = task->buildRequsetString();
|
||||
log_debug("recv:%s\n",req.c_str());
|
||||
if(!reader.parse(req,value,false))
|
||||
{
|
||||
log_error("parse json error!\ndata:%s errors:%s\n",req.c_str(),reader.getFormattedErrorMessages().c_str());
|
||||
res["code"] = RT_PARSE_JSON_ERR;
|
||||
res["message"] = reader.getFormattedErrorMessages();
|
||||
res["data"] = req;
|
||||
goto end;
|
||||
|
||||
}
|
||||
if(!value.isObject()){
|
||||
log_error("parse json error!\ndata:%s errors:%s\n",req.c_str(),reader.getFormattedErrorMessages().c_str());
|
||||
res["code"] = RT_PARSE_JSON_ERR;
|
||||
res["message"] = "it's not a json";
|
||||
res["data"] = req;
|
||||
goto end;
|
||||
}
|
||||
ret = index_gen_process(value,res);
|
||||
if(0 != ret){
|
||||
res["code"] = ret;
|
||||
}
|
||||
|
||||
end:
|
||||
task->setResult(writer.write(res));
|
||||
task->ReplyNotify();
|
||||
|
||||
common::ProfilerMonitor::GetInstance().RegisterInfoEnd(caller_info);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: image_service.h
|
||||
*
|
||||
* Description: CTaskImage class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef IMAGE_SERVICE_H_
|
||||
#define IMAGE_SERVICE_H_
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include "request_base.h"
|
||||
#include "index_conf.h"
|
||||
#include "dtcapi.h"
|
||||
#include "comm.h"
|
||||
#include "split_manager.h"
|
||||
using namespace std;
|
||||
|
||||
|
||||
class CPollThread;
|
||||
class CTaskRequest;
|
||||
class SplitManager;
|
||||
class DTCServers;
|
||||
|
||||
class CTaskImage : public CTaskDispatcher<CTaskRequest>
|
||||
{
|
||||
private:
|
||||
CPollThread * ownerThread;
|
||||
CRequestOutput<CTaskRequest> output;
|
||||
DTC::DTCServers index_servers;
|
||||
|
||||
private:
|
||||
int insert_index_dtc(DTC::Server* dtcServer,string key,struct item &it,u_int8_t field_type,int doc_version,Json::Value &res);
|
||||
int do_insert_index(DTC::Server* dtcServer, map<string, item> &word_map, map<string, item> &title_map,uint64_t app_id,int doc_version,Json::Value &res);
|
||||
void do_stat_word_freq(vector<vector<string> > &strss, string &doc_id, uint32_t appid, map<string, item> &word_map,Json::Value &res);
|
||||
int get_snapshot_active_doc(const UserTableContent &fields,int &active,Json::Value &res);
|
||||
int delete_snapshot_dtc(string &doc_id,uint32_t appid,Json::Value &res);
|
||||
int insert_snapshot_dtc(const UserTableContent &fields,int &doc_version,Json::Value &res);
|
||||
int update_sanpshot_dtc(const UserTableContent &fields,int doc_version,Json::Value &res);
|
||||
|
||||
public:
|
||||
CTaskImage(CPollThread * o);
|
||||
virtual ~CTaskImage();
|
||||
int index_gen_process(Json::Value &req,Json::Value &res);
|
||||
int pre_process(void);
|
||||
|
||||
inline void BindDispatcher(CTaskDispatcher<CTaskRequest> *p)
|
||||
{
|
||||
output.BindDispatcher(p);
|
||||
}
|
||||
virtual void TaskNotify(CTaskRequest * curr);
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif /* IMAGE_SERVICE_H_ */
|
|
@ -0,0 +1,274 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: index_clipping.cc
|
||||
*
|
||||
* Description: IndexClipping class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include "log.h"
|
||||
#include "index_clipping.h"
|
||||
|
||||
IndexClipping::IndexClipping(DTC::Server* server) {
|
||||
indexSet.clear();
|
||||
snapshot_server = server;
|
||||
}
|
||||
|
||||
IndexClipping::~IndexClipping() {
|
||||
}
|
||||
|
||||
static int get_snapshot_execute(DTC::Server* dtc_server,string &doc_id,uint32_t appid,int doc_version,DTC::Result &rst,int top){
|
||||
DTC::GetRequest getReq(dtc_server);
|
||||
int ret = 0;
|
||||
|
||||
ret = getReq.SetKey(doc_id.c_str());
|
||||
ret = getReq.EQ("doc_version",doc_version);
|
||||
ret = getReq.EQ("appid",appid);
|
||||
ret = getReq.EQ("active",1);
|
||||
ret = getReq.EQ("top",top);
|
||||
|
||||
ret = getReq.Execute(rst);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool IndexClipping::is_active_doc(string &doc_id,uint32_t appid,int doc_version,int top){
|
||||
int ret;
|
||||
DTC::Result rst;
|
||||
ret = get_snapshot_execute(snapshot_server,doc_id,appid,doc_version,rst,top);
|
||||
if (ret != 0) {
|
||||
if (ret == -110) {
|
||||
rst.Reset();
|
||||
ret = get_snapshot_execute(snapshot_server,doc_id,appid,doc_version,rst,top);
|
||||
if (ret != 0) {
|
||||
log_error("get request error! errcode %d,errmsg %s, errfrom %s", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return true;//not clipping
|
||||
}
|
||||
}
|
||||
else {
|
||||
log_error("get request error! errcode %d,errmsg %s, errfrom %s", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return true;//not clipping
|
||||
}
|
||||
}
|
||||
struct index_item item;
|
||||
if (rst.NumRows() <= 0) {
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IndexClipping::do_delete_index_dtc(DTC::Server* dtc_server, string key, const struct index_item& item){
|
||||
int ret = 0;
|
||||
|
||||
DTC::DeleteRequest deleteReq(dtc_server);
|
||||
ret = deleteReq.SetKey(key.c_str());
|
||||
ret = deleteReq.EQ("doc_id", item.doc_id.c_str());
|
||||
ret = deleteReq.EQ("created_time", item.created_time);
|
||||
ret = deleteReq.EQ("field", item.field);
|
||||
ret = deleteReq.EQ("doc_version",item.doc_version);
|
||||
|
||||
DTC::Result rst;
|
||||
ret = deleteReq.Execute(rst);
|
||||
if (ret != 0)
|
||||
{
|
||||
log_error("delete request error! ,errno %d ,errmsg %s, errfrom %s\n", ret,rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return false;
|
||||
}
|
||||
log_debug("delete key = %s doc_id = %s doc_verson = %d field = %d createdtime = %d",key.c_str(),item.doc_id.c_str(),item.doc_version,item.field,item.created_time);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IndexClipping::do_delete_top_index_dtc(DTC::Server* dtc_server,string key, const struct index_item&item){
|
||||
int ret = 0;
|
||||
|
||||
DTC::DeleteRequest deleteReq(dtc_server);
|
||||
ret = deleteReq.SetKey(key.c_str());
|
||||
ret = deleteReq.EQ("doc_id", item.doc_id.c_str());
|
||||
ret = deleteReq.EQ("created_time", item.created_time);
|
||||
ret = deleteReq.EQ("doc_version",item.doc_version);
|
||||
|
||||
DTC::Result rst;
|
||||
ret = deleteReq.Execute(rst);
|
||||
if (ret != 0)
|
||||
{
|
||||
log_error("delete request error! ,errno %d ,errmsg %s, errfrom %s\n", ret,rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return false;
|
||||
}
|
||||
log_debug("delete key = %s doc_id = %s doc_verson = %d createdtime = %d",key.c_str(),item.doc_id.c_str(),item.doc_version,item.created_time);
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IndexClipping::do_index_clipping(DTC::Server* dtc_server,string key,uint32_t rows_limit){
|
||||
if(indexSet.size() <= ((rows_limit * 80) / 100)){
|
||||
indexSet.clear();
|
||||
return true;
|
||||
}
|
||||
uint64_t slipping_count = indexSet.size() - ((rows_limit * 80) / 100);
|
||||
set<struct index_item>::iterator it = indexSet.begin();
|
||||
for(uint count = 0;it != indexSet.end() && count < slipping_count;it++){
|
||||
if(!do_delete_index_dtc(dtc_server,key,*it))
|
||||
log_error("do delete dtc error!!");
|
||||
count ++;
|
||||
}
|
||||
indexSet.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IndexClipping::do_top_index_clipping(DTC::Server* dtc_server,string key,uint32_t rows_limit){
|
||||
if(indexSet.size() <= ((rows_limit * 80) / 100)){
|
||||
indexSet.clear();
|
||||
return true;
|
||||
}
|
||||
uint64_t slipping_count = indexSet.size() - ((rows_limit * 80) / 100);
|
||||
set<struct index_item>::iterator it = indexSet.begin();
|
||||
for(uint count = 0;it != indexSet.end() && count < slipping_count;it++){
|
||||
if(!do_delete_top_index_dtc(dtc_server,key,*it))
|
||||
log_error("do delete dtc error!!");
|
||||
count ++;
|
||||
}
|
||||
indexSet.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
static int get_index_dtc_execute(DTC::Server* dtc_server,string key, DTC::Result &rst){
|
||||
int ret = 0;
|
||||
DTC::GetRequest getReq(dtc_server);
|
||||
|
||||
ret = getReq.SetKey(key.c_str());
|
||||
|
||||
ret = getReq.Need("created_time");
|
||||
ret = getReq.Need("doc_id");
|
||||
ret = getReq.Need("field");
|
||||
ret = getReq.Need("word_freq");
|
||||
ret = getReq.Need("doc_version");
|
||||
ret = getReq.Execute(rst);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool IndexClipping::get_rows_and_index_clipping(DTC::Server* dtc_server,string key,uint32_t rows_limit){
|
||||
log_debug("get_rows_and_index_clipping start!");
|
||||
int ret;
|
||||
pair<set<struct index_item>::iterator,bool> ret_p;
|
||||
DTC::Result rst;
|
||||
ret = get_index_dtc_execute(dtc_server,key,rst);
|
||||
if (ret != 0) {
|
||||
if (ret == -110) {
|
||||
rst.Reset();
|
||||
ret = get_index_dtc_execute(dtc_server,key,rst);
|
||||
if (ret != 0) {
|
||||
log_error("get request error! errcode %d,errmsg %s, errfrom %s", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
log_error("get request error! errcode %d,errmsg %s, errfrom %s", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
int cnt = rst.NumRows();
|
||||
if (rst.NumRows() <= 0) {
|
||||
log_debug("no data in this node");
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < cnt; i++) {
|
||||
rst.FetchRow();
|
||||
struct index_item item;
|
||||
item.created_time = rst.IntValue("created_time");
|
||||
item.doc_id = rst.StringValue("doc_id");
|
||||
item.field = rst.IntValue("field");
|
||||
item.freq = rst.IntValue("word_freq");
|
||||
item.doc_version = rst.IntValue("doc_version");
|
||||
ret_p = indexSet.insert(item);
|
||||
if(ret_p.second == false){
|
||||
if(!do_delete_index_dtc(dtc_server,key,item))
|
||||
log_error("do delete dtc error!");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return do_index_clipping(dtc_server,key,rows_limit);
|
||||
}
|
||||
|
||||
static int get_top_index_dtc_execute(DTC::Server* dtc_server,string key, DTC::Result &rst){
|
||||
int ret = 0;
|
||||
DTC::GetRequest getReq(dtc_server);
|
||||
|
||||
ret = getReq.SetKey(key.c_str());
|
||||
|
||||
ret = getReq.Need("created_time");
|
||||
ret = getReq.Need("doc_id");
|
||||
ret = getReq.Need("doc_version");
|
||||
ret = getReq.Need("end_time");
|
||||
ret = getReq.Execute(rst);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool IndexClipping::get_rows_and_top_index_clipping(DTC::Server* dtc_server,string key,uint32_t rows_limit){
|
||||
log_debug("get_rows_and_top_index_clipping start!");
|
||||
int ret;
|
||||
time_t now_time = time(NULL);
|
||||
pair<set<struct index_item>::iterator,bool> ret_p;
|
||||
DTC::Result rst;
|
||||
ret = get_top_index_dtc_execute(dtc_server,key,rst);
|
||||
if (ret != 0) {
|
||||
if (ret == -110) {
|
||||
rst.Reset();
|
||||
ret = get_top_index_dtc_execute(dtc_server,key,rst);
|
||||
if (ret != 0) {
|
||||
log_error("get request error! errcode %d,errmsg %s, errfrom %s", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
log_error("get request error! errcode %d,errmsg %s, errfrom %s", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
int cnt = rst.NumRows();
|
||||
struct index_item item;
|
||||
if (rst.NumRows() <= 0) {
|
||||
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < cnt; i++) {
|
||||
rst.FetchRow();
|
||||
item.created_time = rst.IntValue("created_time");
|
||||
item.doc_id = rst.StringValue("doc_id");
|
||||
item.freq = 0;
|
||||
item.field = 0;
|
||||
item.doc_version = rst.IntValue("doc_version");
|
||||
item.end_time = rst.IntValue("end_time");
|
||||
if(item.end_time < now_time){
|
||||
if(!do_delete_top_index_dtc(dtc_server,key,item))
|
||||
log_error("do delete dtc error!");
|
||||
continue;
|
||||
}
|
||||
ret_p = indexSet.insert(item);
|
||||
if(ret_p.second == false){
|
||||
if(!do_delete_top_index_dtc(dtc_server,key,item))
|
||||
log_error("do delete dtc error!");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return do_top_index_clipping(dtc_server,key,rows_limit);
|
||||
}
|
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: index_clipping.h
|
||||
*
|
||||
* Description: IndexClipping class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef SRC_INDEX_GEN_INDEX_CLIPPING_H_
|
||||
#define SRC_INDEX_GEN_INDEX_CLIPPING_H_
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include "index_conf.h"
|
||||
#include "dtcapi.h"
|
||||
using namespace std;
|
||||
|
||||
struct index_item {
|
||||
uint32_t created_time;
|
||||
uint8_t field;
|
||||
uint32_t freq;
|
||||
string doc_id;
|
||||
int doc_version;
|
||||
time_t end_time;
|
||||
bool friend operator<(const struct index_item &left, const struct index_item &right) //对于<的重载
|
||||
{
|
||||
if (left.field < right.field)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else if(left.field == right.field)
|
||||
{
|
||||
if(left.freq < right.freq){
|
||||
return true;
|
||||
}
|
||||
else if(left.freq == right.freq){
|
||||
if(left.created_time < right.created_time){
|
||||
return true;
|
||||
}else if(left.created_time == right.created_time){
|
||||
if(left.doc_id < right.doc_id)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
else{
|
||||
return false;
|
||||
}
|
||||
}else{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class IndexClipping {
|
||||
public:
|
||||
IndexClipping(DTC::Server* server);
|
||||
~IndexClipping();
|
||||
|
||||
bool get_rows_and_index_clipping(DTC::Server* dtc_server,string key,uint32_t rows_limit);
|
||||
bool get_rows_and_top_index_clipping(DTC::Server* dtc_server,string key,uint32_t rows_limit);
|
||||
private:
|
||||
bool do_delete_index_dtc(DTC::Server* dtc_server,string key,const struct index_item &item);
|
||||
bool do_index_clipping(DTC::Server* dtc_server,string key,uint32_t rows_limit);
|
||||
|
||||
bool do_delete_top_index_dtc(DTC::Server* dtc_server,string key,const struct index_item &item);
|
||||
bool do_top_index_clipping(DTC::Server* dtc_server,string key,uint32_t rows_limit);
|
||||
|
||||
bool is_active_doc(string &doc_id,uint32_t appid,int doc_version,int top);
|
||||
|
||||
|
||||
private:
|
||||
set<struct index_item> indexSet;
|
||||
DTC::Server* snapshot_server;
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif /* SRC_INDEX_GEN_INDEX_CLIPPING_H_ */
|
|
@ -0,0 +1,276 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: index_conf.cc
|
||||
*
|
||||
* Description: IndexConf class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#include "index_conf.h"
|
||||
#include "log.h"
|
||||
#include "comm.h"
|
||||
#include <fstream>
|
||||
|
||||
SGlobalIndexConfig::SGlobalIndexConfig() {
|
||||
iTimeout = 300;
|
||||
iTimeInterval = 0;
|
||||
iLogLevel = 4;
|
||||
background = 1;
|
||||
service_type = 106;
|
||||
}
|
||||
|
||||
UserTableContent::UserTableContent(uint32_t app_id) {
|
||||
appid = app_id;
|
||||
weight = 1;
|
||||
publish_time = time(NULL);
|
||||
top = 0;
|
||||
top_start_time = 0;
|
||||
top_end_time = 0;
|
||||
}
|
||||
|
||||
int IndexConf::ParseDTCPara(const char *dtc_name,SDTCHost &dtchost) {
|
||||
Json::Value dtc_config;
|
||||
if (m_value.isMember(dtc_name) && m_value[dtc_name].isObject()) {
|
||||
dtc_config = m_value[dtc_name];
|
||||
if (dtc_config.isMember("table_name") && dtc_config["table_name"].isString()) {
|
||||
dtchost.szTablename = dtc_config["table_name"].asString();
|
||||
}else {
|
||||
log_error("parse data error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
|
||||
if (dtc_config.isMember("accesskey") && dtc_config["accesskey"].isString()) {
|
||||
dtchost.szAccesskey = dtc_config["accesskey"].asString();
|
||||
}else {
|
||||
log_error("parse data error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
|
||||
if (dtc_config.isMember("timeout") && dtc_config["timeout"].isInt()) {
|
||||
dtchost.uTimeout = dtc_config["timeout"].asInt();
|
||||
}else {
|
||||
log_error("parse data error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
|
||||
if (dtc_config.isMember("keytype") && dtc_config["keytype"].isInt()) {
|
||||
dtchost.uKeytype = dtc_config["keytype"].asInt();
|
||||
}else {
|
||||
log_error("parse data error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
|
||||
if (dtc_config.isMember("route") && dtc_config["route"].isArray()) {
|
||||
for (int i = 0; i < (int)dtc_config["route"].size(); i++) {
|
||||
SDTCroute dtc_route;
|
||||
Json::Value route = dtc_config["route"][i];
|
||||
if (route.isMember("ip") && route["ip"].isString()) {
|
||||
dtc_route.szIpadrr = route["ip"].asString();
|
||||
}else {
|
||||
log_error("parse data error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
|
||||
if (route.isMember("bid") && route["bid"].isInt()) {
|
||||
dtc_route.uBid = route["bid"].asInt();
|
||||
}else {
|
||||
log_error("parse data error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
|
||||
if (route.isMember("port") && route["port"].isInt()) {
|
||||
dtc_route.uPort = route["port"].asInt();
|
||||
}else {
|
||||
log_error("parse data error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
|
||||
if (route.isMember("weight") && route["weight"].isInt()) {
|
||||
dtc_route.uWeight = route["weight"].asInt();
|
||||
}else {
|
||||
log_error("parse data error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
|
||||
if (route.isMember("status") && route["status"].isInt()) {
|
||||
dtc_route.uStatus = route["status"].asInt();
|
||||
}else {
|
||||
log_error("parse data error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
dtchost.vecRoute.push_back(dtc_route);
|
||||
}
|
||||
}
|
||||
else {
|
||||
log_error("parse data error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
}
|
||||
else {
|
||||
log_error("parse data error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int IndexConf::ParseGlobalPara()
|
||||
{
|
||||
if (m_value.isMember("listen_addr") && m_value["listen_addr"].isString()) {
|
||||
m_GlobalConf.listen_addr = m_value["listen_addr"].asString();
|
||||
}
|
||||
else {
|
||||
log_error("parse data error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
if (m_value.isMember("log") && m_value["log"].isString()) {
|
||||
m_GlobalConf.logPath = m_value["log"].asString();
|
||||
}
|
||||
else {
|
||||
log_error("parse data error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
|
||||
if (m_value.isMember("log_level") && m_value["log_level"].isInt()) {
|
||||
m_GlobalConf.iLogLevel = m_value["log_level"].asInt();
|
||||
}
|
||||
else {
|
||||
log_error("parse data error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
if (m_value.isMember("pid_file") && m_value["pid_file"].isString()) {
|
||||
m_GlobalConf.pid_file = m_value["pid_file"].asString();
|
||||
}
|
||||
else {
|
||||
log_error("parse data error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
if (m_value.isMember("timeout") && m_value["timeout"].isInt()) {
|
||||
m_GlobalConf.iTimeout = m_value["timeout"].asInt();
|
||||
}
|
||||
else {
|
||||
m_GlobalConf.iTimeout = 5000;
|
||||
}
|
||||
if (m_value.isMember("words_file") && m_value["words_file"].isString()) {
|
||||
m_GlobalConf.sWordsPath = m_value["words_file"].asString();
|
||||
}
|
||||
else {
|
||||
log_error("parse data error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
if (m_value.isMember("service_type") && m_value["service_type"].isString()) {
|
||||
if(m_value["service_type"].asString() == "top_index"){
|
||||
m_GlobalConf.service_type = CMD_TOP_INDEX;//top_index
|
||||
}else if(m_value["service_type"].asString() == "snapshot"){
|
||||
m_GlobalConf.service_type = CMD_SNAPSHOT;//snapshot
|
||||
}else if(m_value["service_type"].asString() == "image"){
|
||||
m_GlobalConf.service_type = CMD_IMAGE_REPORT;//image_report
|
||||
}
|
||||
else
|
||||
m_GlobalConf.service_type = CMD_INDEX_GEN;//index_gen
|
||||
m_GlobalConf.service_name = m_value["service_type"].asString();
|
||||
|
||||
}
|
||||
else {
|
||||
m_GlobalConf.service_type = CMD_INDEX_GEN;//index_gen
|
||||
}
|
||||
if (m_value.isMember("stop_words_path") && m_value["stop_words_path"].isString()) {
|
||||
m_GlobalConf.stopWordsPath = m_value["stop_words_path"].asString();
|
||||
}
|
||||
else {
|
||||
log_error("parse data error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
if (m_value.isMember("words_base_path") && m_value["words_base_path"].isString()) {
|
||||
m_GlobalConf.wordsBasePath = m_value["words_base_path"].asString();
|
||||
}
|
||||
else {
|
||||
log_error("parse data error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
if (m_value.isMember("training_path") && m_value["training_path"].isString()) {
|
||||
m_GlobalConf.trainingPath = m_value["training_path"].asString();
|
||||
}
|
||||
else {
|
||||
log_error("parse data error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
if (m_value.isMember("daemon") && m_value["daemon"].isBool()) {
|
||||
m_GlobalConf.background = m_value["daemon"].asBool();
|
||||
}
|
||||
else {
|
||||
log_error("parse data error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
if (m_value.isMember("split_mode") && m_value["split_mode"].isString()) {
|
||||
m_GlobalConf.sSplitMode = m_value["split_mode"].asString();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_GlobalConf.sSplitMode = "PrePostNGram";
|
||||
}
|
||||
if (m_value.isMember("phonetic_path") && m_value["phonetic_path"].isString()) {
|
||||
m_GlobalConf.sPhoneticPath = m_value["phonetic_path"].asString();
|
||||
}
|
||||
else {
|
||||
log_error("parse data[phonetic_path] error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
if (m_value.isMember("character_path") && m_value["character_path"].isString()) {
|
||||
m_GlobalConf.sCharacterPath = m_value["character_path"].asString();
|
||||
}
|
||||
else {
|
||||
log_error("parse data[character_path] error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
if (m_value.isMember("phonetic_base_file") && m_value["phonetic_base_file"].isString()) {
|
||||
m_GlobalConf.sPhoneticBasePath = m_value["phonetic_base_file"].asString();
|
||||
}
|
||||
else {
|
||||
log_error("parse data[phonetic_base_file] error!");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool IndexConf::ParseConf(string path) {
|
||||
bool ret = false;
|
||||
Json::Reader reader;
|
||||
ifstream file(path.c_str());
|
||||
if (file) {
|
||||
ret = reader.parse(file, m_value);
|
||||
if (ret == false) {
|
||||
log_error("parse json error!");
|
||||
return false;
|
||||
}
|
||||
if (ParseGlobalPara() != 0) {
|
||||
log_error("parse json error!");
|
||||
return false;
|
||||
}
|
||||
if (ParseDTCPara("dtc_index_config",m_DTCIndexHost) != 0) {
|
||||
log_error("parse json error!");
|
||||
return false;
|
||||
}
|
||||
if (ParseDTCPara("dtc_intelligent_config", m_DTCIntelligentHost) != 0) {
|
||||
log_error("parse json error!");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
log_error("open file error!");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,114 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: index_conf.h
|
||||
*
|
||||
* Description: IndexConf class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef __INDEX_CONF_H__
|
||||
#define __INDEX_CONF_H__
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <stdint.h>
|
||||
#include "singleton.h"
|
||||
#include "json/json.h"
|
||||
#include <stdint.h>
|
||||
#include "split_tool.h"
|
||||
using namespace std;
|
||||
|
||||
class SGlobalIndexConfig {
|
||||
public:
|
||||
SGlobalIndexConfig();
|
||||
~SGlobalIndexConfig(){}
|
||||
int iTimeout;
|
||||
int iTimeInterval;
|
||||
int iLogLevel;
|
||||
int service_type;
|
||||
string programName;
|
||||
string listen_addr;
|
||||
string pid_file;
|
||||
string logPath;
|
||||
string sWordsPath;
|
||||
string sEnWordsPath;
|
||||
string sCharacterPath;
|
||||
string sPhoneticPath;
|
||||
string sPhoneticBasePath;
|
||||
string stopWordsPath;
|
||||
string wordsBasePath;
|
||||
string trainingPath;
|
||||
string service_name;
|
||||
bool background;
|
||||
string sSplitMode;
|
||||
};
|
||||
|
||||
class UserTableContent{
|
||||
public:
|
||||
UserTableContent(uint32_t app_id);
|
||||
~UserTableContent(){}
|
||||
uint32_t appid;
|
||||
string doc_id;
|
||||
string title;
|
||||
string content;
|
||||
string author;
|
||||
string description;
|
||||
string sp_words;
|
||||
int weight;
|
||||
int publish_time;
|
||||
int top;
|
||||
int top_start_time;
|
||||
int top_end_time;
|
||||
};
|
||||
|
||||
class IndexConf {
|
||||
|
||||
public:
|
||||
IndexConf() {
|
||||
}
|
||||
static IndexConf *Instance()
|
||||
{
|
||||
return CSingleton<IndexConf>::Instance();
|
||||
}
|
||||
|
||||
static void Destroy()
|
||||
{
|
||||
CSingleton<IndexConf>::Destroy();
|
||||
}
|
||||
|
||||
bool ParseConf(string path);
|
||||
|
||||
SGlobalIndexConfig &GetGlobalConfig(){
|
||||
return m_GlobalConf;
|
||||
}
|
||||
|
||||
SDTCHost &GetDTCIndexConfig(){
|
||||
return m_DTCIndexHost;
|
||||
}
|
||||
|
||||
SDTCHost &GetDTCIntelligentConfig() {
|
||||
return m_DTCIntelligentHost;
|
||||
}
|
||||
|
||||
private:
|
||||
int ParseDTCPara(const char *dtc_name,SDTCHost &dtchost) ;
|
||||
int ParseGlobalPara();
|
||||
int ParseMYSQLPara();
|
||||
private:
|
||||
SGlobalIndexConfig m_GlobalConf;
|
||||
SDTCHost m_DTCIndexHost;
|
||||
SDTCHost m_DTCIntelligentHost;
|
||||
Json::Value m_value;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,689 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: index_tbl_op.cc
|
||||
*
|
||||
* Description: IndexConf class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#include "index_tbl_op.h"
|
||||
#include "index_clipping.h"
|
||||
#include "add_request_proc.h"
|
||||
#include <sstream>
|
||||
|
||||
CIndexTableManager g_IndexInstance;
|
||||
CIndexTableManager g_delIndexInstance;
|
||||
CIndexTableManager g_hanpinIndexInstance;
|
||||
|
||||
static char* gen_access_key(string doc_id){
|
||||
static char tmp[41] = {'0'};
|
||||
snprintf(tmp, sizeof(tmp), "%40s", doc_id.c_str());
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static int get_snapshot_execute(DTC::Server* dtc_server, const UserTableContent &fields, DTC::Result &rst){
|
||||
DTC::GetRequest getReq(dtc_server);
|
||||
int ret = 0;
|
||||
|
||||
ret = getReq.SetKey(gen_dtc_key_string(fields.appid, "10", fields.doc_id).c_str());
|
||||
ret = getReq.Need("trans_version");
|
||||
|
||||
ret = getReq.Execute(rst);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int insert_index_execute(DTC::Server* dtcServer,string key,struct item &it,u_int8_t field_type,int doc_version,DTC::Result &rst){
|
||||
int ret = 0;
|
||||
|
||||
stringstream index_sstr;
|
||||
index_sstr << "[";
|
||||
int count = 0;
|
||||
vector<uint32_t>::iterator iter = it.indexs.begin();
|
||||
for (; iter != it.indexs.end(); iter++) {
|
||||
if (count++ > 25) {
|
||||
break;
|
||||
}
|
||||
index_sstr << *iter << ",";
|
||||
}
|
||||
string index_str = index_sstr.str();
|
||||
index_str = index_str.substr(0, index_str.size()-1);
|
||||
index_str.append("]");
|
||||
if (it.indexs.size() == 0) {
|
||||
index_str = "";
|
||||
}
|
||||
DTC::InsertRequest insertReq(dtcServer);
|
||||
insertReq.SetKey(key.c_str());
|
||||
insertReq.Set("doc_id", it.doc_id.c_str());
|
||||
insertReq.Set("field", field_type);
|
||||
insertReq.Set("word_freq", it.freq);
|
||||
insertReq.Set("weight", 1);
|
||||
insertReq.Set("extend", it.extend.c_str());
|
||||
insertReq.Set("doc_version",doc_version);
|
||||
insertReq.Set("trans_version",doc_version);
|
||||
insertReq.Set("created_time",time(NULL));
|
||||
insertReq.Set("location", index_str.c_str());
|
||||
insertReq.Set("start_time", 0);
|
||||
insertReq.Set("end_time", 0);
|
||||
ret = insertReq.Execute(rst);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int insert_intelligent_execute(DTC::Server* dtcServer, string key, string doc_id, string word, const IntelligentInfo &info, DTC::Result &rst, int doc_version) {
|
||||
int ret = 0;
|
||||
DTC::InsertRequest insertReq(dtcServer);
|
||||
insertReq.SetKey(key.c_str());
|
||||
insertReq.Set("word", word.c_str());
|
||||
insertReq.Set("doc_id", doc_id.c_str());
|
||||
insertReq.Set("doc_version", doc_version);
|
||||
insertReq.Set("charact_id_01", info.charact_id[0]);
|
||||
insertReq.Set("charact_id_02", info.charact_id[1]);
|
||||
insertReq.Set("charact_id_03", info.charact_id[2]);
|
||||
insertReq.Set("charact_id_04", info.charact_id[3]);
|
||||
insertReq.Set("charact_id_05", info.charact_id[4]);
|
||||
insertReq.Set("charact_id_06", info.charact_id[5]);
|
||||
insertReq.Set("charact_id_07", info.charact_id[6]);
|
||||
insertReq.Set("charact_id_08", info.charact_id[7]);
|
||||
insertReq.Set("phonetic_id_01", info.phonetic_id[0]);
|
||||
insertReq.Set("phonetic_id_02", info.phonetic_id[1]);
|
||||
insertReq.Set("phonetic_id_03", info.phonetic_id[2]);
|
||||
insertReq.Set("phonetic_id_04", info.phonetic_id[3]);
|
||||
insertReq.Set("phonetic_id_05", info.phonetic_id[4]);
|
||||
insertReq.Set("phonetic_id_06", info.phonetic_id[5]);
|
||||
insertReq.Set("phonetic_id_07", info.phonetic_id[6]);
|
||||
insertReq.Set("phonetic_id_08", info.phonetic_id[7]);
|
||||
insertReq.Set("initial_char_01", info.initial_char[0].c_str());
|
||||
insertReq.Set("initial_char_02", info.initial_char[1].c_str());
|
||||
insertReq.Set("initial_char_03", info.initial_char[2].c_str());
|
||||
insertReq.Set("initial_char_04", info.initial_char[3].c_str());
|
||||
insertReq.Set("initial_char_05", info.initial_char[4].c_str());
|
||||
insertReq.Set("initial_char_06", info.initial_char[5].c_str());
|
||||
insertReq.Set("initial_char_07", info.initial_char[6].c_str());
|
||||
insertReq.Set("initial_char_08", info.initial_char[7].c_str());
|
||||
insertReq.Set("initial_char_09", info.initial_char[8].c_str());
|
||||
insertReq.Set("initial_char_10", info.initial_char[9].c_str());
|
||||
insertReq.Set("initial_char_11", info.initial_char[10].c_str());
|
||||
insertReq.Set("initial_char_12", info.initial_char[11].c_str());
|
||||
insertReq.Set("initial_char_13", info.initial_char[12].c_str());
|
||||
insertReq.Set("initial_char_14", info.initial_char[13].c_str());
|
||||
insertReq.Set("initial_char_15", info.initial_char[14].c_str());
|
||||
insertReq.Set("initial_char_16", info.initial_char[15].c_str());
|
||||
ret = insertReq.Execute(rst);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int CIndexTableManager::InitServer(const SDTCHost &dtchost) {
|
||||
string _MasterAddress = "127.0.0.1";
|
||||
stringstream ss;
|
||||
uint32_t port = 0;
|
||||
if (dtchost.vecRoute.size() > 0) {
|
||||
SDTCroute route = dtchost.vecRoute[0];
|
||||
port = route.uPort;
|
||||
_MasterAddress = route.szIpadrr;
|
||||
}
|
||||
ss << ":" << port << "/tcp";
|
||||
string master_bind_port = ss.str();
|
||||
_MasterAddress.append(master_bind_port);
|
||||
|
||||
log_info("master address is [%s]", _MasterAddress.c_str());
|
||||
|
||||
server.StringKey();
|
||||
server.SetTableName(dtchost.szTablename.c_str());
|
||||
server.SetAddress(_MasterAddress.c_str());
|
||||
server.SetMTimeout(300);
|
||||
|
||||
int ret;
|
||||
if ((ret = server.Ping()) != 0 && ret != -DTC::EC_TABLE_MISMATCH) {
|
||||
log_error("ping server[%s] failed, err: %d", _MasterAddress.c_str(), ret);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool CIndexTableManager::DeleteIndex(std::string word, const std::string& doc_id, uint32_t doc_version, uint32_t field){
|
||||
DTC::Server* dtc_server = &server;
|
||||
if (NULL == dtc_server) {
|
||||
log_error("dtc_server is null !");
|
||||
return false;
|
||||
}
|
||||
dtc_server->SetAccessKey(gen_access_key(doc_id));
|
||||
|
||||
DTC::DeleteRequest delReq(dtc_server);
|
||||
int ret = delReq.SetKey(word.c_str());
|
||||
ret |= delReq.EQ("doc_id", doc_id.c_str());
|
||||
ret |= delReq.EQ("doc_version", doc_version);
|
||||
ret |= delReq.EQ("field", field);
|
||||
DTC::Result rst;
|
||||
ret = delReq.Execute(rst);
|
||||
if(ret != 0)
|
||||
{
|
||||
log_error("delete request error! errcode %d,errmsg %s, errfrom %s", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int CIndexTableManager::delete_snapshot_dtc(string &doc_id, uint32_t appid, Json::Value &res){
|
||||
int ret;
|
||||
DTC::Server* dtc_server = &server;
|
||||
if(NULL == dtc_server){
|
||||
log_error("snapshot server connect error!");
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
dtc_server->SetAccessKey(gen_access_key(doc_id));
|
||||
DTC::DeleteRequest deleteReq(dtc_server);
|
||||
ret = deleteReq.SetKey(gen_dtc_key_string(appid, "10", doc_id).c_str());
|
||||
|
||||
DTC::Result rst;
|
||||
ret = deleteReq.Execute(rst);
|
||||
if (ret != 0)
|
||||
{
|
||||
log_error("delete request error! ,errno %d ,errmsg %s, errfrom %s\n", ret,rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return RT_ERROR_DELETE_SNAPSHOT;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CIndexTableManager::delete_hanpin_index(string key, string doc_id) {
|
||||
int ret = 0;
|
||||
DTC::Server* dtcServer = &server;
|
||||
if(NULL == dtcServer){
|
||||
log_error("dtc server connect error!");
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
dtcServer->SetAccessKey(gen_access_key(doc_id));
|
||||
DTC::DeleteRequest deleteReq(dtcServer);
|
||||
ret = deleteReq.SetKey(key.c_str());
|
||||
ret = deleteReq.EQ("doc_id", doc_id.c_str());
|
||||
|
||||
DTC::Result rst;
|
||||
ret = deleteReq.Execute(rst);
|
||||
if (ret != 0){
|
||||
log_error("delete request error! ,errno %d ,errmsg %s, errfrom %s\n", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
}
|
||||
else {
|
||||
log_debug("delete key = %s doc_id = %s", key.c_str(), doc_id.c_str());
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int CIndexTableManager::get_snapshot_active_doc(const UserTableContent &fields, int &doc_version, Json::Value &res){
|
||||
int ret;
|
||||
DTC::Server* dtc_server = &server;
|
||||
if(NULL == dtc_server){
|
||||
log_error("snapshot server connect error!");
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
DTC::Result rst;
|
||||
ret = get_snapshot_execute(dtc_server,fields,rst);
|
||||
if (ret != 0) {
|
||||
if (ret == -110) {
|
||||
rst.Reset();
|
||||
ret = get_snapshot_execute(dtc_server,fields,rst);
|
||||
if (ret != 0) {
|
||||
log_error("get request error! errcode %d,errmsg %s, errfrom %s", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
}
|
||||
else {
|
||||
log_error("get request error! errcode %d,errmsg %s, errfrom %s", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
}
|
||||
int cnt = rst.NumRows();
|
||||
if (rst.NumRows() <= 0) {
|
||||
return RT_NO_THIS_DOC;
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < cnt; i++) {
|
||||
rst.FetchRow();
|
||||
doc_version = rst.IntValue("trans_version");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CIndexTableManager::do_insert_index(map<string, item> &word_map, uint64_t app_id,int doc_version,int field,Json::Value &res) {
|
||||
int ret;
|
||||
map<string, item>::iterator map_iter = word_map.begin();
|
||||
for (; map_iter != word_map.end(); map_iter++) {
|
||||
string key = gen_dtc_key_string(app_id, "00", map_iter->first);
|
||||
item it = map_iter->second;
|
||||
ret = insert_index_dtc(key,it,field,doc_version,res);
|
||||
log_debug("key = %s,doc_vesion = %d,docid = %s\n",key.c_str(),doc_version,it.doc_id.c_str());
|
||||
if(ret != 0)
|
||||
return RT_ERROR_INSERT_INDEX_DTC;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CIndexTableManager::insert_index_dtc(string key, struct item &it, u_int8_t field_type, int doc_version, Json::Value &res){
|
||||
int ret = 0;
|
||||
|
||||
DTC::Server* dtcServer = &server;
|
||||
res[field_type].append(key);
|
||||
|
||||
char tmp[41] = { '0' };
|
||||
snprintf(tmp, sizeof(tmp), "%40s", it.doc_id.c_str());
|
||||
|
||||
dtcServer->SetAccessKey(tmp);
|
||||
|
||||
DTC::Result rst;
|
||||
ret = insert_index_execute(dtcServer, key, it, field_type, doc_version, rst);
|
||||
if (ret != 0)
|
||||
{
|
||||
log_error("insert request error! ,errno %d ,errmsg %s, errfrom %s\n", ret,rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CIndexTableManager::do_insert_intelligent(string key, string doc_id, string word, const vector<IntelligentInfo> & info_vec, int doc_version) {
|
||||
int ret = 0;
|
||||
|
||||
DTC::Server* dtcServer = &server;
|
||||
|
||||
|
||||
char tmp[41] = { '0' };
|
||||
snprintf(tmp, sizeof(tmp), "%40s", doc_id.c_str());
|
||||
|
||||
dtcServer->SetAccessKey(tmp);
|
||||
|
||||
vector<IntelligentInfo>::const_iterator iter = info_vec.begin();
|
||||
for (; iter != info_vec.end(); iter++) {
|
||||
IntelligentInfo info = *iter;
|
||||
DTC::Result rst;
|
||||
ret = insert_intelligent_execute(dtcServer, key, doc_id, word, info, rst, doc_version);
|
||||
if (ret != 0)
|
||||
{
|
||||
log_error("insert request error! ,errno %d ,errmsg %s, errfrom %s\n", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CIndexTableManager::update_sanpshot_dtc(const UserTableContent &fields,int doc_version,int trans_version,int &affected_rows){
|
||||
int ret = 0;
|
||||
DTC::Server* dtc_server = &server;
|
||||
if(NULL == dtc_server){
|
||||
log_error("snapshot server connect error!");
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
dtc_server->SetAccessKey(gen_access_key(fields.doc_id));
|
||||
DTC::UpdateRequest updateReq(dtc_server);
|
||||
ret = updateReq.SetKey(gen_dtc_key_string(fields.appid, "10", fields.doc_id).c_str());
|
||||
updateReq.Set("doc_version", doc_version);
|
||||
if(fields.content.length() > 0)
|
||||
updateReq.Set("extend", fields.content.c_str());
|
||||
updateReq.Set("weight",fields.weight);
|
||||
updateReq.Set("created_time",fields.publish_time);
|
||||
updateReq.EQ("trans_version", trans_version);
|
||||
|
||||
DTC::Result rst;
|
||||
ret = updateReq.Execute(rst);
|
||||
if (ret != 0)
|
||||
{
|
||||
log_error("updateReq error! ,errno %d ,errmsg %s, errfrom %s\n", ret,rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return RT_ERROR_UPDATE_SNAPSHOT;
|
||||
}
|
||||
affected_rows = rst.AffectedRows();
|
||||
return ret;
|
||||
}
|
||||
|
||||
int CIndexTableManager::update_sanpshot_dtc(uint32_t appid, string doc_id, int trans_version){
|
||||
DTC::Server* dtc_server = &server;
|
||||
if(NULL == dtc_server){
|
||||
log_error("snapshot server connect error!");
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
dtc_server->SetAccessKey(gen_access_key(doc_id));
|
||||
DTC::UpdateRequest updateReq(dtc_server);
|
||||
int ret = updateReq.SetKey(gen_dtc_key_string(appid, "10", doc_id).c_str());
|
||||
updateReq.Set("trans_version", trans_version - 1);
|
||||
updateReq.EQ("trans_version", trans_version);
|
||||
DTC::Result rst;
|
||||
ret = updateReq.Execute(rst);
|
||||
if (ret != 0)
|
||||
{
|
||||
log_error("delete request error! ,errno %d ,errmsg %s, errfrom %s\n", ret,rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return RT_ERROR_DELETE_SNAPSHOT;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CIndexTableManager::update_snapshot_version(const UserTableContent &fields,int doc_version,int &affected_rows){
|
||||
int ret = 0;
|
||||
DTC::Server* dtc_server = &server;
|
||||
if(NULL == dtc_server){
|
||||
log_error("snapshot server connect error!");
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
dtc_server->SetAccessKey(gen_access_key(fields.doc_id));
|
||||
DTC::UpdateRequest updateReq(dtc_server);
|
||||
ret = updateReq.SetKey(gen_dtc_key_string(fields.appid, "10", fields.doc_id).c_str());
|
||||
updateReq.Set("trans_version", doc_version);
|
||||
updateReq.EQ("trans_version", doc_version - 1);
|
||||
DTC::Result rst;
|
||||
ret = updateReq.Execute(rst);
|
||||
if (ret != 0)
|
||||
{
|
||||
log_error("updateReq error! ,errno %d ,errmsg %s, errfrom %s\n", ret,rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return RT_ERROR_UPDATE_SNAPSHOT;
|
||||
}
|
||||
affected_rows = rst.AffectedRows();
|
||||
return ret;
|
||||
}
|
||||
|
||||
int CIndexTableManager::insert_snapshot_version(const UserTableContent &fields,int doc_version){
|
||||
int ret = 0;
|
||||
DTC::Server* dtc_server = &server;
|
||||
if(NULL == dtc_server){
|
||||
log_error("snapshot server connect error!");
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
dtc_server->SetAccessKey(gen_access_key(fields.doc_id));
|
||||
DTC::InsertRequest insertReq(dtc_server);
|
||||
ret = insertReq.SetKey(gen_dtc_key_string(fields.appid, "10", fields.doc_id).c_str());
|
||||
insertReq.Set("doc_version", 0);
|
||||
insertReq.Set("trans_version", doc_version);
|
||||
insertReq.Set("doc_id", fields.doc_id.c_str());
|
||||
DTC::Result rst;
|
||||
ret = insertReq.Execute(rst);
|
||||
if (ret != 0)
|
||||
{
|
||||
log_error("insertReq error! ,errno %d ,errmsg %s, errfrom %s\n", ret,rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return RT_ERROR_INSERT_SNAPSHOT;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int CIndexTableManager::update_docid_index_dtc(const string & invert_keys, const string & doc_id, uint32_t appid, int doc_version)
|
||||
{
|
||||
int ret = 0;
|
||||
DTC::Server* dtc_server = &server;
|
||||
if (NULL == dtc_server) {
|
||||
log_error("snapshot server connect error!");
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
dtc_server->SetAccessKey(gen_access_key(doc_id));
|
||||
DTC::UpdateRequest updateReq(dtc_server);
|
||||
ret = updateReq.SetKey(gen_dtc_key_string(appid, "20", doc_id).c_str());
|
||||
updateReq.Set("doc_version", doc_version);
|
||||
if (invert_keys.length() > 0)
|
||||
updateReq.Set("extend", invert_keys.c_str());
|
||||
|
||||
DTC::Result rst;
|
||||
ret = updateReq.Execute(rst);
|
||||
if (ret != 0)
|
||||
{
|
||||
log_error("updateReq error! ,errno %d ,errmsg %s, errfrom %s\n", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return RT_ERROR_UPDATE_SNAPSHOT;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int CIndexTableManager::insert_docid_index_dtc(const string & invert_keys, const string & doc_id, uint32_t appid, int doc_version)
|
||||
{
|
||||
int ret = 0;
|
||||
DTC::Server* dtc_server = &server;
|
||||
if (NULL == dtc_server) {
|
||||
log_error("snapshot server connect error!");
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
dtc_server->SetAccessKey(gen_access_key(doc_id));
|
||||
DTC::InsertRequest insertReq(dtc_server);
|
||||
ret = insertReq.SetKey(gen_dtc_key_string(appid, "20", doc_id).c_str());
|
||||
insertReq.Set("doc_id", doc_id.c_str());
|
||||
insertReq.Set("doc_version", doc_version);
|
||||
if (invert_keys.length() > 0)
|
||||
insertReq.Set("extend", invert_keys.c_str());
|
||||
|
||||
DTC::Result rst;
|
||||
ret = insertReq.Execute(rst);
|
||||
if (ret != 0)
|
||||
{
|
||||
log_error("updateReq error! ,errno %d ,errmsg %s, errfrom %s\n", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return RT_ERROR_INSERT_INDEX_DTC;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int CIndexTableManager::insert_union_index_dtc(const string & union_key, const string & doc_id, uint32_t appid, int doc_version)
|
||||
{
|
||||
int ret = 0;
|
||||
DTC::Server* dtc_server = &server;
|
||||
if (NULL == dtc_server) {
|
||||
log_error("snapshot server connect error!");
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
dtc_server->SetAccessKey(gen_access_key(doc_id));
|
||||
DTC::InsertRequest insertReq(dtc_server);
|
||||
ret = insertReq.SetKey(gen_dtc_key_string(appid, "00", union_key).c_str());
|
||||
insertReq.Set("doc_id", doc_id.c_str());
|
||||
insertReq.Set("doc_version", doc_version);
|
||||
insertReq.Set("trans_version", doc_version);
|
||||
insertReq.Set("created_time", time(NULL));
|
||||
insertReq.Set("word_freq", 1);
|
||||
insertReq.Set("weight", 1);
|
||||
|
||||
DTC::Result rst;
|
||||
ret = insertReq.Execute(rst);
|
||||
if (ret != 0)
|
||||
{
|
||||
log_error("updateReq error! ,errno %d ,errmsg %s, errfrom %s\n", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return RT_ERROR_INSERT_INDEX_DTC;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int CIndexTableManager::delete_docid_index_dtc(const string & key, const string & doc_id){
|
||||
int ret = 0;
|
||||
DTC::Server* dtc_server = &server;
|
||||
if (NULL == dtc_server) {
|
||||
log_error("snapshot server connect error!");
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
dtc_server->SetAccessKey(gen_access_key(doc_id));
|
||||
DTC::DeleteRequest deleteReq(dtc_server);
|
||||
ret = deleteReq.SetKey(key.c_str());
|
||||
DTC::Result rst;
|
||||
ret = deleteReq.Execute(rst);
|
||||
if(ret != 0)
|
||||
{
|
||||
log_error("deleteReq error! ,errno %d ,errmsg %s, errfrom %s\n", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return RT_ERROR_DELETE_SNAPSHOT;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
bool CIndexTableManager::GetIndexData(const std::string& doc_id, uint32_t doc_version, map<uint32_t, vector<string> > &res){
|
||||
DTC::Server* dtc_server = &server;
|
||||
if (NULL == dtc_server) {
|
||||
log_error("dtc_server is null !");
|
||||
return false;
|
||||
}
|
||||
|
||||
DTC::GetRequest getReq(dtc_server);
|
||||
int ret = getReq.SetKey(doc_id.c_str());
|
||||
ret |= getReq.EQ("doc_version", doc_version);
|
||||
ret |= getReq.Need("extend");
|
||||
|
||||
DTC::Result rst;
|
||||
ret = getReq.Execute(rst);
|
||||
if(ret != 0)
|
||||
{
|
||||
log_error("get request error! errcode %d,errmsg %s, errfrom %s", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return false;
|
||||
}
|
||||
int cnt = rst.NumRows();
|
||||
if(cnt <= 0)
|
||||
{
|
||||
log_debug("can not find any result. key:%s", doc_id.c_str());
|
||||
return false;
|
||||
}
|
||||
rst.FetchRow();
|
||||
string extend = rst.StringValue("extend");
|
||||
Json::Reader reader;
|
||||
Json::Value value;
|
||||
if(!reader.parse(extend, value, false))
|
||||
{
|
||||
log_error("parse json error!\ndata:%s errors:%s\n",extend.c_str(),reader.getFormattedErrorMessages().c_str());
|
||||
return false;
|
||||
}
|
||||
if(value.isArray()){
|
||||
for(int i = 0;i < (int)value.size();i++){
|
||||
Json::Value info = value[i];
|
||||
if(info.isArray()){
|
||||
for(int j = 0;j < (int)info.size();j++){
|
||||
if(info[j].isString()){
|
||||
res[i].push_back(info[j].asString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CIndexTableManager::delete_index(std::string word, const std::string& doc_id, uint32_t doc_version, uint32_t field){
|
||||
DTC::Server* dtc_server = &server;
|
||||
if (NULL == dtc_server) {
|
||||
log_error("dtc_server is null !");
|
||||
return false;
|
||||
}
|
||||
dtc_server->SetAccessKey(gen_access_key(doc_id));
|
||||
|
||||
DTC::DeleteRequest delReq(dtc_server);
|
||||
int ret = delReq.SetKey(word.c_str());
|
||||
ret |= delReq.EQ("doc_id", doc_id.c_str());
|
||||
ret |= delReq.EQ("doc_version", doc_version);
|
||||
ret |= delReq.EQ("field", field);
|
||||
DTC::Result rst;
|
||||
ret = delReq.Execute(rst);
|
||||
if(ret != 0)
|
||||
{
|
||||
log_error("delete request error! errcode %d,errmsg %s, errfrom %s", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CIndexTableManager::delete_intelligent(std::string key, std::string doc_id, uint32_t trans_version){
|
||||
DTC::Server* intelligent_server = &server;
|
||||
if(NULL == intelligent_server){
|
||||
log_error("GetServer error!");
|
||||
return false;
|
||||
}
|
||||
intelligent_server->SetAccessKey(gen_access_key(doc_id));
|
||||
DTC::DeleteRequest deleteReq(intelligent_server);
|
||||
int ret = deleteReq.SetKey(key.c_str());
|
||||
deleteReq.EQ("doc_id", doc_id.c_str());
|
||||
deleteReq.EQ("doc_version", trans_version);
|
||||
|
||||
DTC::Result rst;
|
||||
ret = deleteReq.Execute(rst);
|
||||
if (ret != 0)
|
||||
{
|
||||
log_error("delete request error! ,errno %d ,errmsg %s, errfrom %s\n", ret,rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void * DeleteTask::ProcessCycle(void * arg)
|
||||
{
|
||||
int statistic_period = 5;
|
||||
pthread_mutex_lock(&DeleteTask::GetInstance()._Mutex);
|
||||
std::vector<DeleteItem> temp_result;
|
||||
int last_append_time = time(NULL);
|
||||
while (!DeleteTask::GetInstance()._StopFlag) {
|
||||
if (DeleteTask::GetInstance()._InfoHead == NULL) {
|
||||
if(temp_result.size() != 0){
|
||||
for (std::vector<DeleteItem>::iterator it = temp_result.begin(); it != temp_result.end(); it++) {
|
||||
DeleteItem item = *it;
|
||||
g_delIndexInstance.DeleteIndex(item.word, item.doc_id, item.doc_version, item.field);
|
||||
}
|
||||
temp_result.clear();
|
||||
}
|
||||
pthread_cond_wait(&DeleteTask::GetInstance()._NotEmpty, &DeleteTask::GetInstance()._Mutex);
|
||||
continue;
|
||||
}
|
||||
DeleteItem *head = DeleteTask::GetInstance()._InfoHead;
|
||||
DeleteTask::GetInstance()._InfoHead = DeleteTask::GetInstance()._InfoTail = NULL;
|
||||
pthread_mutex_unlock(&DeleteTask::GetInstance()._Mutex);
|
||||
|
||||
DeleteTask::GetInstance().Coalesce(head, temp_result);
|
||||
int now_time = time(NULL);
|
||||
if (now_time - last_append_time >= statistic_period) {
|
||||
last_append_time = now_time;
|
||||
for (std::vector<DeleteItem>::iterator it = temp_result.begin(); it != temp_result.end(); it++) {
|
||||
DeleteItem item = *it;
|
||||
g_delIndexInstance.DeleteIndex(item.word, item.doc_id, item.doc_version, item.field);
|
||||
}
|
||||
temp_result.clear();
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&DeleteTask::GetInstance()._Mutex);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void DeleteTask::Coalesce(DeleteItem * head, std::vector<DeleteItem>& temp_result)
|
||||
{
|
||||
DeleteItem *p = head;
|
||||
DeleteItem *q;
|
||||
while (p != NULL) {
|
||||
|
||||
std::vector<DeleteItem>::iterator it = temp_result.begin();
|
||||
for ( ; it != temp_result.end(); it++) {
|
||||
if (*it == *p) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (temp_result.size() == 0 || it == temp_result.end()) {
|
||||
temp_result.push_back(*p);
|
||||
}
|
||||
|
||||
q = p;
|
||||
p = p->_Next;
|
||||
delete q;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool DeleteTask::Initialize(){
|
||||
_InfoHead = _InfoTail = NULL;
|
||||
return true;
|
||||
}
|
||||
|
||||
void DeleteTask::RegisterInfo(const std::string& word, const std::string& doc_id, uint32_t doc_version, uint32_t field) {
|
||||
DeleteItem *item = new DeleteItem();
|
||||
if (item != NULL) {
|
||||
item->word = word;
|
||||
item->doc_id = doc_id;
|
||||
item->doc_version = doc_version;
|
||||
item->field = field;
|
||||
PushReportItem(item);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,126 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: index_tbl_op.h
|
||||
*
|
||||
* Description: IndexConf class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef INDEX_TBL_OP_H
|
||||
#define INDEX_TBL_OP_H
|
||||
|
||||
#include "log.h"
|
||||
#include "dtcapi.h"
|
||||
#include "split_tool.h"
|
||||
#include "index_conf.h"
|
||||
#include "json/json.h"
|
||||
#include "dtc_tools.h"
|
||||
#include "comm.h"
|
||||
|
||||
class CIndexTableManager
|
||||
{
|
||||
public:
|
||||
int InitServer(const SDTCHost &dtchost);
|
||||
bool DeleteIndex(std::string word, const std::string& doc_id, uint32_t doc_version, uint32_t field);
|
||||
int delete_snapshot_dtc(string &doc_id, uint32_t appid, Json::Value &res);
|
||||
int delete_hanpin_index(string key, string doc_id);
|
||||
int get_snapshot_active_doc(const UserTableContent &fields, int &doc_version, Json::Value &res);
|
||||
int do_insert_index(map<string, item> &word_map, uint64_t app_id,int doc_version,int field,Json::Value &res);
|
||||
int insert_index_dtc(string key, struct item &it, u_int8_t field_type, int doc_version, Json::Value &res);
|
||||
int do_insert_intelligent(string key, string doc_id, string word, const vector<IntelligentInfo> & info_vec, int doc_version);
|
||||
int update_sanpshot_dtc(const UserTableContent &fields,int doc_version,int trans_version,int &affected_rows);
|
||||
int update_sanpshot_dtc(uint32_t appid, string doc_id, int trans_version);
|
||||
int update_snapshot_version(const UserTableContent &fields,int doc_version,int &affected_rows);
|
||||
int insert_snapshot_version(const UserTableContent &fields,int doc_version);
|
||||
int update_docid_index_dtc(const string & invert_keys, const string & doc_id, uint32_t appid, int doc_version);
|
||||
int insert_docid_index_dtc(const string & invert_keys, const string & doc_id, uint32_t appid, int doc_version);
|
||||
int insert_union_index_dtc(const string & union_key, const string & doc_id, uint32_t appid, int doc_version);
|
||||
int delete_docid_index_dtc(const string & key, const string & doc_id);
|
||||
bool GetIndexData(const std::string& doc_id, uint32_t doc_version, map<uint32_t, vector<string> > &res);
|
||||
bool delete_index(std::string word, const std::string& doc_id, uint32_t doc_version, uint32_t field);
|
||||
bool delete_intelligent(std::string key, std::string doc_id, uint32_t trans_version);
|
||||
private:
|
||||
DTC::Server server;
|
||||
};
|
||||
|
||||
extern CIndexTableManager g_IndexInstance;
|
||||
extern CIndexTableManager g_delIndexInstance;
|
||||
extern CIndexTableManager g_hanpinIndexInstance;
|
||||
|
||||
class DeleteItem {
|
||||
public:
|
||||
friend class DeleteTask;
|
||||
DeleteItem() :_Next(NULL) {}
|
||||
bool operator==(const DeleteItem& a) {
|
||||
return this->word == a.word &&
|
||||
this->doc_id == a.doc_id &&
|
||||
this->doc_version == a.doc_version &&
|
||||
this->field == a.field;
|
||||
}
|
||||
private:
|
||||
std::string word;
|
||||
std::string doc_id;
|
||||
uint32_t doc_version;
|
||||
uint32_t field;
|
||||
DeleteItem *_Next;
|
||||
};
|
||||
|
||||
class DeleteTask{
|
||||
public:
|
||||
static DeleteTask& GetInstance() {
|
||||
static DeleteTask instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
bool Initialize();
|
||||
void RegisterInfo(const std::string& word, const std::string& doc_id, uint32_t doc_version, uint32_t field);
|
||||
|
||||
private:
|
||||
pthread_t _ReportThread;
|
||||
pthread_cond_t _NotEmpty;
|
||||
pthread_mutex_t _Mutex;
|
||||
DeleteItem *_InfoHead;
|
||||
DeleteItem *_InfoTail;
|
||||
bool _StopFlag;
|
||||
|
||||
private:
|
||||
static void *ProcessCycle(void *arg);
|
||||
DeleteTask() {
|
||||
pthread_mutex_init(&_Mutex, NULL);
|
||||
pthread_cond_init(&_NotEmpty, NULL);
|
||||
pthread_create(&_ReportThread, NULL, ProcessCycle, NULL);
|
||||
_StopFlag = false;
|
||||
}
|
||||
~DeleteTask() {
|
||||
_StopFlag = true;
|
||||
pthread_cond_signal(&_NotEmpty);
|
||||
pthread_join(_ReportThread, NULL);
|
||||
}
|
||||
void Coalesce(DeleteItem *head, std::vector<DeleteItem>& temp_result);
|
||||
|
||||
void PushReportItem(DeleteItem* item) {
|
||||
pthread_mutex_lock(&_Mutex);
|
||||
if (_InfoHead == NULL) {
|
||||
_InfoHead = _InfoTail = item;
|
||||
}
|
||||
else {
|
||||
_InfoTail->_Next = item;
|
||||
_InfoTail = item;
|
||||
}
|
||||
|
||||
pthread_cond_signal(&_NotEmpty);
|
||||
pthread_mutex_unlock(&_Mutex);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,307 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: index_write.cc
|
||||
*
|
||||
* Description: IndexConf class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#include "index_write.h"
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include "log.h"
|
||||
#include "poll_thread.h"
|
||||
#include "task_request.h"
|
||||
#include "dtc_tools.h"
|
||||
#include "index_clipping.h"
|
||||
#include "monitor.h"
|
||||
#include "chash.h"
|
||||
#include "index_tbl_op.h"
|
||||
#include "geohash.h"
|
||||
#include "add_request_proc.h"
|
||||
|
||||
CTaskIndexGen::CTaskIndexGen(CPollThread * o) :
|
||||
CTaskDispatcher<CTaskRequest>(o),
|
||||
ownerThread(o),
|
||||
output(o),
|
||||
read_only(0)
|
||||
{
|
||||
}
|
||||
|
||||
CTaskIndexGen::~CTaskIndexGen()
|
||||
{
|
||||
}
|
||||
|
||||
int CTaskIndexGen::decode_request(const Json::Value & req, Json::Value & subreq, uint32_t & id, uint32_t & count)
|
||||
{
|
||||
if (req.isMember("table_content") && req["table_content"].isArray()) {
|
||||
subreq = req["table_content"];
|
||||
}
|
||||
else {
|
||||
return RT_NO_TABLE_CONTENT;
|
||||
}
|
||||
|
||||
if (req.isMember("appid") && req["appid"].isInt()) {
|
||||
id = req["appid"].asInt();
|
||||
}
|
||||
else {
|
||||
return RT_NO_APPID;
|
||||
}
|
||||
|
||||
if (req.isMember("fields_count") && req["fields_count"].isInt()) {
|
||||
count = req["fields_count"].asInt();
|
||||
}
|
||||
else {
|
||||
return RT_NO_FIELD_COUNT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int decode_fields(Json::Value table_content,Json::Value &json_fields,UserTableContent &fields){
|
||||
string cmd;
|
||||
if(table_content.isMember("cmd") && table_content["cmd"].isString()){
|
||||
cmd = table_content["cmd"].asString();
|
||||
if(cmd == "add" || cmd == "update"){
|
||||
if(table_content.isMember("fields") && table_content["fields"].isObject()){
|
||||
json_fields = table_content["fields"];
|
||||
|
||||
if(json_fields.isMember("id") && (json_fields["id"].isString() || json_fields["id"].isInt())){
|
||||
fields.doc_id = json_fields["id"].asString();
|
||||
}else{
|
||||
if(json_fields.isMember("doc_id") && json_fields["doc_id"].isString()){
|
||||
fields.doc_id = json_fields["doc_id"].asString();
|
||||
}else
|
||||
return RT_NO_DOCID;
|
||||
}
|
||||
|
||||
if(json_fields.isMember("weight") && json_fields["weight"].isInt()){
|
||||
fields.weight = json_fields["weight"].asInt();
|
||||
}else{
|
||||
fields.weight = 1;
|
||||
}
|
||||
return RT_CMD_ADD;
|
||||
}
|
||||
else{
|
||||
return RT_ERROR_FIELD;
|
||||
}
|
||||
}else if(cmd == "delete"){
|
||||
json_fields = table_content["fields"];
|
||||
if(json_fields.isMember("doc_id") && json_fields["doc_id"].isString()){
|
||||
fields.doc_id = json_fields["doc_id"].asString();
|
||||
}else if(json_fields.isMember("id") && (json_fields["id"].isString() || json_fields["id"].isInt())){
|
||||
fields.doc_id = json_fields["id"].asString();
|
||||
}else{
|
||||
return RT_NO_DOCID;
|
||||
}
|
||||
return RT_CMD_DELETE;
|
||||
}else{
|
||||
return RT_ERROR_FIELD_CMD;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CTaskIndexGen::index_gen_process(Json::Value &req, Json::Value &res){
|
||||
int doc_version = 0, old_version = 0, trans_version = 0;
|
||||
uint32_t app_id, fields_count = 0;
|
||||
int ret = 0;
|
||||
Json::Value table_content;
|
||||
|
||||
if (req.isMember("read_only") && req["read_only"].isInt()) {
|
||||
read_only = req["read_only"].asInt();
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = decode_request(req, table_content, app_id, fields_count);
|
||||
if(ret != 0){
|
||||
return ret;
|
||||
}
|
||||
log_debug("table_content: %s", table_content.toStyledString().c_str());
|
||||
|
||||
if(fields_count == 0 || fields_count != table_content.size()){
|
||||
return RT_ERROR_FIELD_COUNT;
|
||||
}
|
||||
if(!SplitManager::Instance()->is_effective_appid(app_id)){
|
||||
return RT_NO_APPID;
|
||||
}
|
||||
|
||||
if (read_only) {
|
||||
return RT_ERROR_INDEX_READONLY;
|
||||
}
|
||||
|
||||
for(int i = 0;i < (int)table_content.size();i++){
|
||||
doc_version = 0;
|
||||
old_version = 0;
|
||||
trans_version = 0;
|
||||
UserTableContent content_fields(app_id);
|
||||
Json::Value json_field;
|
||||
ret = decode_fields(table_content[i], json_field, content_fields);
|
||||
if(RT_CMD_ADD == ret){
|
||||
ret = g_IndexInstance.get_snapshot_active_doc(content_fields, old_version, res);
|
||||
if(0 == ret){
|
||||
trans_version = old_version + 1;
|
||||
doc_version = old_version + 1;
|
||||
}else if(ret == RT_NO_THIS_DOC){
|
||||
trans_version = 1;
|
||||
doc_version = 1;
|
||||
} else {
|
||||
log_error("get_snapshot_active_doc error.");
|
||||
return ret;
|
||||
}
|
||||
|
||||
if(trans_version != 1){
|
||||
// 更新快照的trans_version字段
|
||||
int affected_rows = 0;
|
||||
ret = g_IndexInstance.update_snapshot_version(content_fields, trans_version, affected_rows);
|
||||
if(0 != ret){
|
||||
log_error("doc_id[%s] update snapshot version error, continue.", content_fields.doc_id.c_str());
|
||||
continue;
|
||||
}
|
||||
else if(affected_rows == 0){
|
||||
ret = RT_UPDATE_SNAPSHOT_CONFLICT;
|
||||
log_info("doc_id[%s] update snapshot conflict, continue.", content_fields.doc_id.c_str());
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
ret = g_IndexInstance.insert_snapshot_version(content_fields, trans_version);
|
||||
if(0 != ret){
|
||||
// 再查询一次快照
|
||||
ret = g_IndexInstance.get_snapshot_active_doc(content_fields, old_version, res);
|
||||
if(0 == ret){
|
||||
trans_version = old_version + 1;
|
||||
doc_version = old_version + 1;
|
||||
int affected_rows = 0;
|
||||
ret = g_IndexInstance.update_snapshot_version(content_fields, trans_version, affected_rows);
|
||||
if(0 != ret){
|
||||
log_error("doc_id[%s] update snapshot version error, continue.", content_fields.doc_id.c_str());
|
||||
continue;
|
||||
}
|
||||
else if(affected_rows == 0){
|
||||
ret = RT_UPDATE_SNAPSHOT_CONFLICT;
|
||||
log_info("doc_id[%s] update snapshot conflict, continue.", content_fields.doc_id.c_str());
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
log_error("doc_id[%s] insert error, continue.", content_fields.doc_id.c_str());
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InsertParam insert_param;
|
||||
insert_param.appid = app_id;
|
||||
insert_param.doc_id = content_fields.doc_id;
|
||||
insert_param.doc_version = doc_version;
|
||||
insert_param.trans_version = trans_version;
|
||||
AddReqProc add_req_proc(json_field, insert_param);
|
||||
ret = add_req_proc.do_insert_index(content_fields);
|
||||
if(0 != ret){
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
else if(RT_CMD_DELETE == ret){
|
||||
// 从hanpin_index_data中删除
|
||||
vector<uint32_t> field_vec;
|
||||
SplitManager::Instance()->getHanpinField(content_fields.appid, field_vec);
|
||||
vector<uint32_t>::iterator iter = field_vec.begin();
|
||||
for (; iter != field_vec.end(); iter++) {
|
||||
stringstream ss;
|
||||
ss << content_fields.appid << "#" << *iter;
|
||||
ret = g_IndexInstance.delete_hanpin_index(ss.str(), content_fields.doc_id);
|
||||
if (ret != 0) {
|
||||
log_error("delete error! errcode %d", ret);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
ret = g_IndexInstance.get_snapshot_active_doc(content_fields, old_version,res);
|
||||
if(ret != 0 && ret != RT_NO_THIS_DOC){
|
||||
log_error("get_snapshot_active_doc error! errcode %d", ret);
|
||||
return ret;
|
||||
}
|
||||
map<uint32_t, vector<string> > index_res;
|
||||
g_IndexInstance.GetIndexData(gen_dtc_key_string(content_fields.appid, "20", content_fields.doc_id), old_version, index_res);
|
||||
map<uint32_t, vector<string> >::iterator map_iter = index_res.begin();
|
||||
for(; map_iter != index_res.end(); map_iter++){
|
||||
uint32_t field = map_iter->first;
|
||||
vector<string> words = map_iter->second;
|
||||
for(int i = 0; i < (int)words.size(); i++){
|
||||
DeleteTask::GetInstance().RegisterInfo(words[i], content_fields.doc_id, old_version, field);
|
||||
}
|
||||
}
|
||||
|
||||
ret = g_IndexInstance.delete_snapshot_dtc(content_fields.doc_id, content_fields.appid, res);//not use the doc_version curr
|
||||
g_IndexInstance.delete_docid_index_dtc(gen_dtc_key_string(content_fields.appid, "20", content_fields.doc_id), content_fields.doc_id);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void CTaskIndexGen::TaskNotify(CTaskRequest * curr)
|
||||
{
|
||||
log_debug("CTaskIndexGen::TaskNotify start");
|
||||
common::CallerInfo caller_info = common::ProfilerMonitor::GetInstance().RegisterInfo(std::string("searchEngine.searchService.indexGenTask"));
|
||||
//there is a race condition here:
|
||||
//curr may be deleted during process (in task->ReplyNotify())
|
||||
int ret;
|
||||
Json::Reader reader;
|
||||
Json::FastWriter writer;
|
||||
Json::Value value, res;
|
||||
std::string req;
|
||||
res["code"] = 0;
|
||||
|
||||
CTaskRequest * task = curr;
|
||||
if(NULL == curr){
|
||||
common::ProfilerMonitor::GetInstance().FunctionError(caller_info);
|
||||
common::ProfilerMonitor::GetInstance().RegisterInfoEnd(caller_info);
|
||||
return;
|
||||
}
|
||||
if(SERVICE_INDEXGEN != task->GetReqCmd()){
|
||||
res["code"] = RT_ERROR_SERVICE_TYPE;
|
||||
res["reqcmd"] = task->GetReqCmd();
|
||||
res["message"] = "service type wrong! need 106";
|
||||
goto end;
|
||||
}
|
||||
req = task->buildRequsetString();
|
||||
log_debug("recv:%s\n",req.c_str());
|
||||
if(!reader.parse(req,value,false))
|
||||
{
|
||||
log_error("parse json error!\ndata:%s errors:%s\n",req.c_str(),reader.getFormattedErrorMessages().c_str());
|
||||
res["code"] = RT_PARSE_JSON_ERR;
|
||||
res["message"] = reader.getFormattedErrorMessages();
|
||||
res["data"] = req;
|
||||
goto end;
|
||||
|
||||
}
|
||||
if(!value.isObject()){
|
||||
log_error("parse json error!\ndata:%s errors:%s\n",req.c_str(),reader.getFormattedErrorMessages().c_str());
|
||||
res["code"] = RT_PARSE_JSON_ERR;
|
||||
res["message"] = "it's not a json";
|
||||
res["data"] = req;
|
||||
goto end;
|
||||
}
|
||||
ret = index_gen_process(value, res);
|
||||
if(0 != ret){
|
||||
res["code"] = ret;
|
||||
}
|
||||
|
||||
end:
|
||||
task->setResult(writer.write(res));
|
||||
task->ReplyNotify();
|
||||
|
||||
common::ProfilerMonitor::GetInstance().RegisterInfoEnd(caller_info);
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: index_write.h
|
||||
*
|
||||
* Description: IndexConf class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef INDEX_GEN_H_
|
||||
#define INDEX_GEN_H_
|
||||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include "request_base.h"
|
||||
#include "index_conf.h"
|
||||
#include "dtcapi.h"
|
||||
#include "comm.h"
|
||||
#include "split_manager.h"
|
||||
using namespace std;
|
||||
|
||||
class CPollThread;
|
||||
class CTaskRequest;
|
||||
class SplitManager;
|
||||
|
||||
class CTaskIndexGen : public CTaskDispatcher<CTaskRequest>
|
||||
{
|
||||
private:
|
||||
CPollThread * ownerThread;
|
||||
CRequestOutput<CTaskRequest> output;
|
||||
int read_only;
|
||||
|
||||
private:
|
||||
int decode_request(const Json::Value &req, Json::Value &subreq, uint32_t &id, uint32_t &count);
|
||||
|
||||
public:
|
||||
CTaskIndexGen(CPollThread * o);
|
||||
virtual ~CTaskIndexGen();
|
||||
int index_gen_process(Json::Value &req,Json::Value &res);
|
||||
int pre_process(void);
|
||||
|
||||
inline void BindDispatcher(CTaskDispatcher<CTaskRequest> *p){
|
||||
output.BindDispatcher(p);
|
||||
}
|
||||
virtual void TaskNotify(CTaskRequest * curr);
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif /* INDEX_GEN_H_ */
|
|
@ -0,0 +1,281 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: main.cc
|
||||
*
|
||||
* Description: Entrance.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#include "agent_listen_pkg.h"
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include "stat_index.h"
|
||||
#include "task_request.h"
|
||||
#include "config.h"
|
||||
#include "poll_thread.h"
|
||||
#include "log.h"
|
||||
#include "pipetask.h"
|
||||
#include "memcheck.h"
|
||||
#include "agent_process.h"
|
||||
#include "index_conf.h"
|
||||
#include "index_write.h"
|
||||
#include "snapshot_service.h"
|
||||
#include "top_index_service.h"
|
||||
#include "image_service.h"
|
||||
#include "dtc_tools.h"
|
||||
#include "comm.h"
|
||||
#include "version.h"
|
||||
#include "monitor.h"
|
||||
#include "index_tbl_op.h"
|
||||
|
||||
#define STRING_HELPER(str) #str
|
||||
#define STRING(x) STRING_HELPER(x)
|
||||
#define VERSION_MAJOR 1
|
||||
#define VERSION_MINOR 1
|
||||
#define VERSION_BUILD 0
|
||||
#define MAIN_VERSION \
|
||||
STRING(VERSION_MAJOR) "." \
|
||||
STRING(VERSION_MINOR) "." \
|
||||
STRING(VERSION_BUILD)
|
||||
|
||||
#ifndef GIT_VERSION
|
||||
#define GIT_VERSION 0000000
|
||||
#endif
|
||||
#define INDEX_VERSION_STR MAIN_VERSION "-" STRING(GIT_VERSION)
|
||||
|
||||
volatile int stop = 0;
|
||||
int background = 1;
|
||||
pthread_t mainthreadid;
|
||||
|
||||
const char progname[] = "index_write";
|
||||
const char *conf_filename = "../conf/index_write.conf";
|
||||
int gMaxConnCnt;
|
||||
static CAgentListenPkg *agentListener;
|
||||
static CAgentProcess *agentProcess;
|
||||
static CTaskIndexGen *indexGen;
|
||||
static CTaskTopIndex *topIndex;
|
||||
static CTaskSnapShot *snapShot;
|
||||
static CTaskImage *image;
|
||||
//single thread version
|
||||
static CPollThread *workerThread;
|
||||
|
||||
static int Startup_Thread()
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
indexGen = NULL;
|
||||
topIndex = NULL;
|
||||
snapShot = NULL;
|
||||
|
||||
workerThread = new CPollThread("worker");
|
||||
if (workerThread->InitializeThread() == -1){
|
||||
log_error("InitializeThread error");
|
||||
return -1;
|
||||
}
|
||||
agentProcess = new CAgentProcess(workerThread);
|
||||
if(NULL == agentProcess){
|
||||
return -1;
|
||||
}
|
||||
switch(IndexConf::Instance()->GetGlobalConfig().service_type)
|
||||
{
|
||||
default:
|
||||
case SERVICE_INDEXGEN:
|
||||
indexGen = new CTaskIndexGen(workerThread);
|
||||
if(NULL == indexGen)
|
||||
return -1;
|
||||
agentProcess->BindDispatcher(indexGen);
|
||||
break;
|
||||
case SERVICE_TOPINDEX:
|
||||
topIndex = new CTaskTopIndex(workerThread);
|
||||
if(NULL == topIndex)
|
||||
return -1;
|
||||
ret = topIndex->pre_process();
|
||||
if(ret != 0){
|
||||
return -1;
|
||||
}
|
||||
agentProcess->BindDispatcher(topIndex);
|
||||
break;
|
||||
case SERVICE_SNAPSHOT:
|
||||
snapShot = new CTaskSnapShot(workerThread);
|
||||
if(NULL == snapShot)
|
||||
return -1;
|
||||
ret = snapShot->pre_process();
|
||||
if(ret != 0){
|
||||
return -1;
|
||||
}
|
||||
agentProcess->BindDispatcher(snapShot);
|
||||
break;
|
||||
case SERVICE_PIC:
|
||||
image = new CTaskImage(workerThread);
|
||||
if(NULL == image)
|
||||
return -1;
|
||||
ret = image->pre_process();
|
||||
if(0!= ret)
|
||||
return -1;
|
||||
agentProcess->BindDispatcher(image);
|
||||
break;
|
||||
}
|
||||
|
||||
agentListener = new CAgentListenPkg();
|
||||
if(agentListener->Bind(IndexConf::Instance()->GetGlobalConfig().listen_addr.c_str(), agentProcess, 0) < 0){
|
||||
log_error("bind addr error");
|
||||
return -1;
|
||||
}
|
||||
|
||||
workerThread->RunningThread();
|
||||
agentListener->Run();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int configInit(void)
|
||||
{
|
||||
if (!IndexConf::Instance()->ParseConf(conf_filename)) {
|
||||
cout << "load conf file error " << conf_filename << endl;
|
||||
return -RT_PARSE_CONF_ERR;
|
||||
}
|
||||
SGlobalIndexConfig &globalconfig = IndexConf::Instance()->GetGlobalConfig();
|
||||
|
||||
stat_init_log_(progname, globalconfig.logPath.c_str());
|
||||
stat_set_log_level_(globalconfig.iLogLevel);
|
||||
log_info("%s v%s: log level %d starting....", progname, INDEX_VERSION_STR, globalconfig.iLogLevel);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void sigterm_handler(int signo)
|
||||
{
|
||||
stop = 1;
|
||||
}
|
||||
|
||||
void index_create_pid(string str_pid_file) {
|
||||
ofstream pid_file;
|
||||
pid_file.open(str_pid_file.c_str(), ios::out | ios::trunc);
|
||||
if (pid_file.is_open()) {
|
||||
pid_file << getpid();
|
||||
pid_file.close();
|
||||
}
|
||||
else {
|
||||
log_error("open pid file error. file:%s, errno:%d, errmsg:%s.",
|
||||
str_pid_file.c_str(), errno, strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
static int DaemonStart()
|
||||
{
|
||||
struct sigaction sa;
|
||||
sigset_t sset;
|
||||
|
||||
memset(&sa, 0, sizeof(sa));
|
||||
sa.sa_handler = sigterm_handler;
|
||||
sigaction(SIGINT, &sa, NULL);
|
||||
sigaction(SIGTERM, &sa, NULL);
|
||||
sigaction(SIGQUIT, &sa, NULL);
|
||||
sigaction(SIGHUP, &sa, NULL);
|
||||
|
||||
signal(SIGPIPE, SIG_IGN);
|
||||
signal(SIGCHLD, SIG_IGN);
|
||||
|
||||
sigemptyset(&sset);
|
||||
sigaddset(&sset, SIGTERM);
|
||||
sigaddset(&sset, SIGSEGV);
|
||||
sigaddset(&sset, SIGBUS);
|
||||
sigaddset(&sset, SIGABRT);
|
||||
sigaddset(&sset, SIGILL);
|
||||
sigaddset(&sset, SIGCHLD);
|
||||
sigaddset(&sset, SIGFPE);
|
||||
sigprocmask(SIG_UNBLOCK, &sset, &sset);
|
||||
if(!IndexConf::Instance()->GetGlobalConfig().background){
|
||||
background = 0;
|
||||
}
|
||||
|
||||
int ret = background ? daemon (1, 1) : 0;
|
||||
mainthreadid = pthread_self();
|
||||
return ret;
|
||||
}
|
||||
|
||||
void ServicePostRun(string str_pid_file) {
|
||||
IndexConf::Instance()->Destroy();
|
||||
SplitManager::Instance()->Destroy();
|
||||
unlink(str_pid_file.c_str());
|
||||
DTCTools::Destroy();
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
CThread *mainThread;
|
||||
NEW(CThread("main", CThread::ThreadTypeProcess), mainThread);
|
||||
if(mainThread != NULL) {
|
||||
mainThread->InitializeThread();
|
||||
}
|
||||
if(configInit() != 0){
|
||||
log_error("config init error");
|
||||
return -1;
|
||||
}
|
||||
if (DaemonStart () < 0){
|
||||
log_error("DaemonStart error");
|
||||
return -1;
|
||||
}
|
||||
index_create_pid(IndexConf::Instance()->GetGlobalConfig().pid_file);
|
||||
InitStat(IndexConf::Instance()->GetGlobalConfig().service_name.c_str());
|
||||
SDTCHost &dtchost = IndexConf::Instance()->GetDTCIndexConfig();
|
||||
if (g_IndexInstance.InitServer(dtchost) != 0) {
|
||||
log_error("dtc init error");
|
||||
return -1;
|
||||
}
|
||||
if (g_delIndexInstance.InitServer(dtchost) != 0) {
|
||||
log_error("dtc init error");
|
||||
return -1;
|
||||
}
|
||||
SDTCHost &indexHost = IndexConf::Instance()->GetDTCIntelligentConfig();
|
||||
if (g_hanpinIndexInstance.InitServer(indexHost) != 0) {
|
||||
log_error("dtc init error");
|
||||
return -1;
|
||||
}
|
||||
if (!SplitManager::Instance()->Init(IndexConf::Instance()->GetGlobalConfig())) {
|
||||
log_error("g_splitManager init error");
|
||||
return -1;
|
||||
}
|
||||
//start statistic thread.
|
||||
statmgr.StartBackgroundThread();
|
||||
//ump monitor initialize
|
||||
common::ProfilerMonitor::GetInstance().Initialize();
|
||||
DeleteTask::GetInstance().Initialize();
|
||||
if(Startup_Thread() < 0){
|
||||
stop = 1;
|
||||
}
|
||||
log_info("%s v%s: running...", progname, INDEX_VERSION_STR);
|
||||
while(!stop){
|
||||
sleep(10);
|
||||
}
|
||||
|
||||
log_info("%s v%s: stoppping...", progname, INDEX_VERSION_STR);
|
||||
|
||||
if(workerThread){
|
||||
workerThread->interrupt();
|
||||
}
|
||||
|
||||
//DELETE(workerThread);
|
||||
DELETE(agentListener);
|
||||
DELETE(indexGen);
|
||||
DELETE(agentProcess);
|
||||
ServicePostRun(IndexConf::Instance()->GetGlobalConfig().pid_file);
|
||||
statmgr.StopBackgroundThread();
|
||||
log_info("%s v%s: stopped", progname, INDEX_VERSION_STR);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ends here */
|
|
@ -0,0 +1,278 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: snapshot_service.cc
|
||||
*
|
||||
* Description: CTaskSnapShot class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include "log.h"
|
||||
#include "poll_thread.h"
|
||||
#include "task_request.h"
|
||||
#include "dtc_tools.h"
|
||||
#include "comm.h"
|
||||
#include "snapshot_service.h"
|
||||
#include "monitor.h"
|
||||
#include "chash.h"
|
||||
|
||||
|
||||
CTaskSnapShot::CTaskSnapShot(CPollThread * o) :
|
||||
CTaskDispatcher<CTaskRequest>(o),
|
||||
ownerThread(o),
|
||||
output(o)
|
||||
{
|
||||
}
|
||||
|
||||
CTaskSnapShot::~CTaskSnapShot()
|
||||
{
|
||||
}
|
||||
|
||||
static int decode_request(const Json::Value &req, Json::Value &subreq, int &id, int &count){
|
||||
if(req.isMember("table_content") && req["table_content"].isArray()){
|
||||
subreq = req["table_content"];
|
||||
}else{
|
||||
return RT_NO_TABLE_CONTENT;
|
||||
}
|
||||
|
||||
if(req.isMember("appid") && req["appid"].isInt()){
|
||||
id = req["appid"].asInt();
|
||||
}else{
|
||||
return RT_NO_APPID;
|
||||
}
|
||||
|
||||
if(req.isMember("fields_count") && req["fields_count"].isInt()){
|
||||
count = req["fields_count"].asInt();
|
||||
}else{
|
||||
return RT_NO_FIELD_COUNT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int decode_field(Json::Value table_content,UserTableContent &fields){
|
||||
string cmd;
|
||||
if(table_content.isMember("cmd") && table_content["cmd"].isString()){
|
||||
cmd = table_content["cmd"].asString();
|
||||
if(cmd == "snapshot"){
|
||||
if(table_content.isMember("fields") && table_content["fields"].isObject()){
|
||||
Json::Value field = table_content["fields"];
|
||||
if(field.isMember("doc_id") && field["doc_id"].isString()){
|
||||
fields.doc_id = field["doc_id"].asString();
|
||||
}
|
||||
if (field.isMember("top") && field["top"].isInt()) {
|
||||
fields.top = field["top"].asInt();
|
||||
}
|
||||
}
|
||||
return RT_CMD_GET;
|
||||
}else if(cmd == "update_snapshot"){
|
||||
if(table_content.isMember("fields") && table_content["fields"].isObject()){
|
||||
Json::Value field = table_content["fields"];
|
||||
if(field.isMember("doc_id") && field["doc_id"].isString()){
|
||||
fields.doc_id = field["doc_id"].asString();
|
||||
}
|
||||
if (field.isMember("top") && field["top"].isInt()) {
|
||||
fields.top = field["top"].asInt();
|
||||
}
|
||||
if(field.isMember("weight") && field["weight"].isInt()){
|
||||
fields.weight = field["weight"].asInt();
|
||||
}
|
||||
}
|
||||
return RT_CMD_UPDATE;
|
||||
}else{
|
||||
return RT_ERROR_FIELD_CMD;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_snapshot_execute(DTC::Server* dtc_server,const UserTableContent &fields,DTC::Result &rst){
|
||||
DTC::GetRequest getReq(dtc_server);
|
||||
int ret = 0;
|
||||
|
||||
string top_tag = "10";
|
||||
if (fields.top == 1) {
|
||||
top_tag = "11";
|
||||
}
|
||||
ret = getReq.SetKey(gen_dtc_key_string(fields.appid, top_tag, fields.doc_id).c_str());
|
||||
ret = getReq.Need("extend");
|
||||
ret = getReq.Need("created_time");
|
||||
|
||||
ret = getReq.Execute(rst);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int CTaskSnapShot::get_snapshot_dtc(UserTableContent &fields,Json::Value &res){
|
||||
int ret;
|
||||
DTC::Server* dtc_server = index_servers.GetServer();
|
||||
if(NULL == dtc_server){
|
||||
log_error("snapshot server connect error!");
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
DTC::Result rst;
|
||||
ret = get_snapshot_execute(dtc_server,fields,rst);
|
||||
if (ret != 0) {
|
||||
if (ret == -110) {
|
||||
rst.Reset();
|
||||
ret = get_snapshot_execute(dtc_server,fields,rst);
|
||||
if (ret != 0) {
|
||||
log_error("get request error! errcode %d,errmsg %s, errfrom %s", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
res[MESSAGE] = rst.ErrorMessage();
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
}
|
||||
else {
|
||||
log_error("get request error! errcode %d,errmsg %s, errfrom %s", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
res[MESSAGE] = rst.ErrorMessage();
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
}
|
||||
int cnt = rst.NumRows();
|
||||
if (rst.NumRows() <= 0) {
|
||||
res["doc_id"] = fields.doc_id;
|
||||
res[MESSAGE] = "no this doc";
|
||||
return RT_NO_THIS_DOC;
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < cnt; i++) {
|
||||
rst.FetchRow();
|
||||
fields.title = "";
|
||||
fields.content = rst.StringValue("extend");
|
||||
fields.publish_time = rst.IntValue("created_time");
|
||||
fields.author = "";
|
||||
if(fields.title.length() > 0 && fields.content.length() > 0)
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CTaskSnapShot::pre_process(void){
|
||||
DTCTools *dtc_tools = DTCTools::Instance();
|
||||
dtc_tools->init_servers(index_servers, IndexConf::Instance()->GetDTCIndexConfig());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CTaskSnapShot::update_sanpshot_dtc(const UserTableContent &fields,Json::Value &res){
|
||||
int ret = 0;
|
||||
DTC::Server* dtc_server = index_servers.GetServer();
|
||||
if(NULL == dtc_server){
|
||||
log_error("snapshot server connect error!");
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
string top_tag = "10";
|
||||
if (fields.top == 1) {
|
||||
top_tag = "11";
|
||||
}
|
||||
DTC::UpdateRequest updateReq(dtc_server);
|
||||
ret = updateReq.SetKey(gen_dtc_key_string(fields.appid, top_tag, fields.doc_id).c_str());
|
||||
updateReq.Set("weight",fields.weight);
|
||||
|
||||
DTC::Result rst;
|
||||
ret = updateReq.Execute(rst);
|
||||
if (ret != 0)
|
||||
{
|
||||
log_error("updateReq error! ,errno %d ,errmsg %s, errfrom %s\n", ret,rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return RT_ERROR_UPDATE_SNAPSHOT;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int CTaskSnapShot::snapshot_process(Json::Value &req,Json::Value &res){
|
||||
int app_id,fields_count = 0,ret = 0;
|
||||
Json::Value table_content;
|
||||
|
||||
ret = decode_request(req, table_content, app_id,fields_count);
|
||||
if(ret != 0){
|
||||
return ret;
|
||||
}
|
||||
if(fields_count != 1 || fields_count != (int)table_content.size()){
|
||||
res["message"] = "fields_count and table size must be 1";
|
||||
return RT_ERROR_FIELD_COUNT;
|
||||
}
|
||||
UserTableContent content_fields(app_id);
|
||||
ret = decode_field(table_content[0],content_fields);
|
||||
if(RT_CMD_GET == ret && content_fields.doc_id.length() > 0){
|
||||
ret = get_snapshot_dtc(content_fields,res);
|
||||
if(0 == ret){
|
||||
res["doc_id"] = content_fields.doc_id;
|
||||
res["title"] = content_fields.title;
|
||||
res["content"] = content_fields.content;
|
||||
res["author"] = content_fields.author;
|
||||
res["publish_time"] = content_fields.publish_time;
|
||||
}
|
||||
}else if(RT_CMD_UPDATE == ret && content_fields.doc_id.length() > 0){
|
||||
ret = update_sanpshot_dtc(content_fields,res);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void CTaskSnapShot::TaskNotify(CTaskRequest * curr)
|
||||
{
|
||||
log_debug("CTaskSnapShot::TaskNotify start");
|
||||
common::CallerInfo caller_info = common::ProfilerMonitor::GetInstance().RegisterInfo(std::string("searchEngine.searchService.snapshotTask"));
|
||||
//there is a race condition here:
|
||||
//curr may be deleted during process (in task->ReplyNotify())
|
||||
int ret;
|
||||
Json::Reader reader;
|
||||
Json::FastWriter writer;
|
||||
Json::Value value, res;
|
||||
std::string req;
|
||||
res["code"] = 0;
|
||||
|
||||
CTaskRequest * task = curr;
|
||||
if(NULL == curr){
|
||||
common::ProfilerMonitor::GetInstance().FunctionError(caller_info);
|
||||
common::ProfilerMonitor::GetInstance().RegisterInfoEnd(caller_info);
|
||||
return;
|
||||
}
|
||||
if(SERVICE_SNAPSHOT != task->GetReqCmd()){
|
||||
res["code"] = RT_ERROR_SERVICE_TYPE;
|
||||
res["reqcmd"] = task->GetReqCmd();
|
||||
res["message"] = "service type wrong! need 108";
|
||||
goto end;
|
||||
}
|
||||
req = task->buildRequsetString();
|
||||
log_debug("recv:%s\n",req.c_str());
|
||||
if(!reader.parse(req,value,false))
|
||||
{
|
||||
log_error("parse json error!\ndata:%s errors:%s\n",req.c_str(),reader.getFormattedErrorMessages().c_str());
|
||||
res["code"] = RT_PARSE_JSON_ERR;
|
||||
res["message"] = reader.getFormattedErrorMessages();
|
||||
res["data"] = req;
|
||||
goto end;
|
||||
|
||||
}
|
||||
if(!value.isObject()){
|
||||
log_error("parse json error!\ndata:%s errors:%s\n",req.c_str(),reader.getFormattedErrorMessages().c_str());
|
||||
res["code"] = RT_PARSE_JSON_ERR;
|
||||
res["message"] = "it's not a json";
|
||||
res["data"] = req;
|
||||
goto end;
|
||||
}
|
||||
ret = snapshot_process(value,res);
|
||||
if(0 != ret){
|
||||
res["code"] = ret;
|
||||
}
|
||||
|
||||
end:
|
||||
task->setResult(writer.write(res));
|
||||
task->ReplyNotify();
|
||||
|
||||
common::ProfilerMonitor::GetInstance().RegisterInfoEnd(caller_info);
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: snapshot_service.h
|
||||
*
|
||||
* Description: CTaskSnapShot class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef SNAPSHOT_SERVICE_H_
|
||||
#define SNAPSHOT_SERVICE_H_
|
||||
#include "request_base.h"
|
||||
#include "index_conf.h"
|
||||
#include "dtcapi.h"
|
||||
#include "split_manager.h"
|
||||
using namespace std;
|
||||
|
||||
class CTaskSnapShot : public CTaskDispatcher<CTaskRequest>
|
||||
{
|
||||
private:
|
||||
CPollThread * ownerThread;
|
||||
CRequestOutput<CTaskRequest> output;
|
||||
DTC::DTCServers index_servers;
|
||||
|
||||
private:
|
||||
int get_snapshot_dtc(UserTableContent &fields,Json::Value &res);
|
||||
int update_sanpshot_dtc(const UserTableContent &fields,Json::Value &res);
|
||||
|
||||
public:
|
||||
CTaskSnapShot(CPollThread * o);
|
||||
virtual ~CTaskSnapShot();
|
||||
int pre_process(void);
|
||||
int snapshot_process(Json::Value &req,Json::Value &res);
|
||||
|
||||
inline void BindDispatcher(CTaskDispatcher<CTaskRequest> *p)
|
||||
{
|
||||
output.BindDispatcher(p);
|
||||
}
|
||||
virtual void TaskNotify(CTaskRequest * curr);
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif /* SNAPSHOT_SERVICE_H_ */
|
|
@ -0,0 +1,335 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: split_manager.cc
|
||||
*
|
||||
* Description: SplitManager class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#include "split_manager.h"
|
||||
#include "log.h"
|
||||
#include "stem.h"
|
||||
#include "comm.h"
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <math.h>
|
||||
#include <sys/time.h>
|
||||
using namespace std;
|
||||
|
||||
typedef pair<string, int> PAIR;
|
||||
struct CmpByValue {
|
||||
bool operator()(const PAIR& lhs, const PAIR& rhs) {
|
||||
return lhs.second > rhs.second;
|
||||
}
|
||||
};
|
||||
|
||||
SplitManager::SplitManager() {
|
||||
stop_word_set.clear();
|
||||
}
|
||||
|
||||
SplitManager::~SplitManager() {
|
||||
}
|
||||
|
||||
static int32_t ToInt(const char* str) {
|
||||
if (NULL != str)
|
||||
return atoi(str);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static string ToString(const char* str) {
|
||||
if (NULL == str)
|
||||
return "";
|
||||
else
|
||||
return str;
|
||||
}
|
||||
|
||||
bool SplitManager::fetch_tbinfo_from_mysql_to_map(){
|
||||
ifstream app_filed_infile;
|
||||
app_filed_infile.open("../conf/app_field_define.txt");
|
||||
if (app_filed_infile.is_open() == false) {
|
||||
log_error("open file error: ../conf/app_field_define.txt");
|
||||
return false;
|
||||
}
|
||||
|
||||
string str;
|
||||
while (getline(app_filed_infile, str))
|
||||
{
|
||||
vector<string> str_vec = splitEx(str, "\t");
|
||||
if (str_vec.size() >= 11) {
|
||||
struct table_info tbinfo;
|
||||
int32_t row_index = 1;
|
||||
uint32_t appid = ToInt(str_vec[row_index++].c_str());
|
||||
string field_name = ToString(str_vec[row_index++].c_str());
|
||||
tbinfo.is_primary_key = ToInt(str_vec[row_index++].c_str());
|
||||
tbinfo.field_type = ToInt(str_vec[row_index++].c_str());
|
||||
tbinfo.index_tag = ToInt(str_vec[row_index++].c_str());
|
||||
tbinfo.snapshot_tag = ToInt(str_vec[row_index++].c_str());
|
||||
tbinfo.segment_tag = ToInt(str_vec[row_index++].c_str());
|
||||
tbinfo.field_value = ToInt(str_vec[row_index++].c_str());
|
||||
row_index++;
|
||||
tbinfo.segment_feature = ToInt(str_vec[row_index++].c_str());
|
||||
if (str_vec.size() >= 12){
|
||||
// union_key的格式是:27,1,26,数字代表的是field对应的value值
|
||||
tbinfo.index_info = ToString(str_vec[row_index].c_str());
|
||||
log_debug("union key[%s]", tbinfo.index_info.c_str());
|
||||
}
|
||||
log_debug("appid: %d, field_name: %s", appid, field_name.c_str());
|
||||
tableDefine[appid][field_name] = tbinfo;
|
||||
}
|
||||
}
|
||||
log_debug("tableDefine size: %d", (int)tableDefine.size());
|
||||
app_filed_infile.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SplitManager::Init(const SGlobalIndexConfig &global_cfg) {
|
||||
|
||||
bool ret = seg.Init3(global_cfg.trainingPath,global_cfg.sWordsPath);
|
||||
if (ret == false) {
|
||||
log_error("seg init error.");
|
||||
return false;
|
||||
}
|
||||
|
||||
ifstream inf;
|
||||
string s;
|
||||
string word;
|
||||
split_mode = global_cfg.sSplitMode;
|
||||
//load stop words
|
||||
inf.open(global_cfg.stopWordsPath.c_str());
|
||||
if (inf.is_open() == false) {
|
||||
printf("open file error: %s.\n", "./stop_words.dict");
|
||||
return false;
|
||||
}
|
||||
while (getline(inf, s)) {
|
||||
stop_word_set.insert(s);
|
||||
}
|
||||
inf.close();
|
||||
log_info("load %d words from stop_words.dict",(int)stop_word_set.size());
|
||||
|
||||
string str;
|
||||
ifstream phonetic_infile;
|
||||
uint32_t phonetic_id = 0;
|
||||
uint32_t character_id = 0;
|
||||
string phonetic;
|
||||
string charact;
|
||||
phonetic_infile.open(global_cfg.sPhoneticPath.c_str());
|
||||
if (phonetic_infile.is_open() == false) {
|
||||
log_error("open file error: %s.", global_cfg.sPhoneticPath.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
while (getline(phonetic_infile, str))
|
||||
{
|
||||
vector<string> str_vec = splitEx(str, "\t");
|
||||
if (str_vec.size() == 2) {
|
||||
phonetic_id = atoi(str_vec[0].c_str());
|
||||
phonetic = str_vec[1];
|
||||
phonetic_map[phonetic] = phonetic_id;
|
||||
}
|
||||
}
|
||||
phonetic_infile.close();
|
||||
|
||||
ifstream phonetic_base_infile;
|
||||
phonetic_base_infile.open(global_cfg.sPhoneticBasePath.c_str());
|
||||
if (phonetic_base_infile.is_open() == false) {
|
||||
log_error("open file error: %s.", global_cfg.sPhoneticBasePath.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
while (getline(phonetic_base_infile, str))
|
||||
{
|
||||
vector<string> str_vec = splitEx(str, "\t");
|
||||
if (str_vec.size() == 2) {
|
||||
charact = str_vec[0];
|
||||
phonetic = str_vec[1];
|
||||
charact_phonetic_map.insert(make_pair(charact, phonetic));
|
||||
}
|
||||
}
|
||||
phonetic_base_infile.close();
|
||||
|
||||
ifstream character_infile;
|
||||
character_infile.open(global_cfg.sCharacterPath.c_str());
|
||||
if (character_infile.is_open() == false) {
|
||||
log_error("open file error: %s.", global_cfg.sCharacterPath.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
while (getline(character_infile, str))
|
||||
{
|
||||
vector<string> str_vec = splitEx(str, "\t");
|
||||
if (str_vec.size() == 2) {
|
||||
character_id = atoi(str_vec[0].c_str());
|
||||
charact = str_vec[1];
|
||||
charact_map[charact] = character_id;
|
||||
}
|
||||
}
|
||||
character_infile.close();
|
||||
log_info("load %d words from phonetic_map, %d words from charact_map", (int)phonetic_map.size(), (int)charact_map.size());
|
||||
|
||||
return fetch_tbinfo_from_mysql_to_map();
|
||||
}
|
||||
|
||||
bool SplitManager::wordValid(string word, uint32_t appid, uint32_t &id) {
|
||||
|
||||
if(stop_word_set.find(word) != stop_word_set.end()){
|
||||
log_debug("word:%s invalid,in the stop.dict",word.c_str());
|
||||
return false;
|
||||
}
|
||||
uint64_t int_word;
|
||||
string output_word = word;
|
||||
WordInfo wordinfo;
|
||||
if((word[0]>='a' && word[0]<='z')||(word[0]>='A' && word[0]<='Z')){
|
||||
output_word = stem(word);
|
||||
seg.GetWordInfo(output_word, appid, wordinfo);//bug fixed ,English need to call dtc to search the wordinfo first
|
||||
}
|
||||
|
||||
if(0 == wordinfo.word_id && !GetWordInfo(output_word,appid,wordinfo)){
|
||||
int_word = strtoull(word.c_str(),NULL,10);
|
||||
if(int_word != 0){
|
||||
while(int_word > MAXNUMBER){
|
||||
int_word = int_word/10;
|
||||
}
|
||||
id = NUMBER_ID + int_word;
|
||||
return true;
|
||||
}
|
||||
log_debug("word:%s invalid,not in the wordbase",output_word.c_str());
|
||||
return false;
|
||||
}else{
|
||||
id = wordinfo.word_id;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SplitManager::GetCharactId(string charact, uint32_t &id) {
|
||||
id = 0;
|
||||
if (charact_map.find(charact) != charact_map.end()) {
|
||||
id = charact_map[charact];
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SplitManager::GetPhoneticId(string phonetic, uint32_t &id) {
|
||||
id = 0;
|
||||
if (phonetic_map.find(phonetic) != phonetic_map.end()) {
|
||||
id = phonetic_map[phonetic];
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
vector<string> SplitManager::GetPhonetic(string charact) {
|
||||
vector<string> vec;
|
||||
multimap<string, string>::iterator iter;
|
||||
iter = charact_phonetic_map.find(charact);
|
||||
int k = 0;
|
||||
for (; k < (int)charact_phonetic_map.count(charact); k++, iter++) {
|
||||
vec.push_back(iter->second);
|
||||
}
|
||||
|
||||
return vec;
|
||||
}
|
||||
|
||||
struct table_info *SplitManager::get_table_info(uint32_t appid, string field_name){
|
||||
if(tableDefine.find(appid) != tableDefine.end()){
|
||||
if(tableDefine[appid].find(field_name) != tableDefine[appid].end()){
|
||||
return &(tableDefine[appid][field_name]);
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool SplitManager::getHanpinField(uint32_t appid, vector<uint32_t> & field_vec) {
|
||||
if (tableDefine.find(appid) != tableDefine.end()) {
|
||||
map<string, table_info> stMap = tableDefine[appid];
|
||||
map<string, table_info>::iterator iter = stMap.begin();
|
||||
for (; iter != stMap.end(); iter++) {
|
||||
table_info tInfo = iter->second;
|
||||
if (tInfo.segment_tag == 3 || tInfo.segment_tag == 4) {
|
||||
field_vec.push_back(tInfo.field_value);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SplitManager::getUnionKeyField(uint32_t appid, vector<string> & field_vec){
|
||||
if (tableDefine.find(appid) != tableDefine.end()) {
|
||||
map<string, table_info> stMap = tableDefine[appid];
|
||||
map<string, table_info>::iterator iter = stMap.begin();
|
||||
for (; iter != stMap.end(); iter++) {
|
||||
table_info tInfo = iter->second;
|
||||
if(tInfo.field_type == FIELD_INDEX){
|
||||
field_vec.push_back(tInfo.index_info);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SplitManager::is_effective_appid(uint32_t appid){
|
||||
if(tableDefine.find(appid) != tableDefine.end()){
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void SplitManager::DeInit() {
|
||||
stop_word_set.clear();
|
||||
}
|
||||
|
||||
vector<vector<string> > SplitManager::split(string str,uint32_t appid) {
|
||||
iutf8string test(str);
|
||||
unsigned int t1;
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
t1 = tv.tv_sec * 1000000 + tv.tv_usec;
|
||||
vector<vector<string> > res_all;
|
||||
seg.cut_for_search(test,appid,res_all,split_mode);
|
||||
unsigned int t2;
|
||||
gettimeofday(&tv, NULL);
|
||||
t2 = tv.tv_sec * 1000000 + tv.tv_usec;
|
||||
|
||||
log_debug("split time:%u ms",(t2-t1)/1000);
|
||||
return res_all;
|
||||
}
|
||||
|
||||
vector<string> SplitManager::split(string str) {
|
||||
vector<string> vec;
|
||||
iutf8string utf8_str(str);
|
||||
seg.cut_ngram(utf8_str, vec, utf8_str.length());
|
||||
return vec;
|
||||
}
|
||||
|
||||
string trim(string& str)
|
||||
{
|
||||
str.erase(0, str.find_first_not_of(" ")); // 去掉头部空格
|
||||
str.erase(str.find_last_not_of(" ") + 1); // 去掉尾部空格
|
||||
return str;
|
||||
}
|
||||
|
||||
string delPrefix(string& str){
|
||||
size_t pos1 = str.find_first_of("((");
|
||||
size_t pos2 = str.find_last_of("))");
|
||||
string res = str;
|
||||
if(pos1 != string::npos && pos2 != string::npos){
|
||||
res = str.substr(pos1+2, pos2-pos1-3);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: split_manager.h
|
||||
*
|
||||
* Description: SplitManager class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef __SPLIT_MANAGER_H__
|
||||
#define __SPLIT_MANAGER_H__
|
||||
#include <string>
|
||||
#include <sys/types.h>
|
||||
#include <vector>
|
||||
#include "index_conf.h"
|
||||
#include "split_tool.h"
|
||||
using namespace std;
|
||||
|
||||
#define NUMBER_ID 500000000
|
||||
#define MAXNUMBER 100000000
|
||||
|
||||
struct table_info{
|
||||
int is_primary_key;
|
||||
int field_type;
|
||||
int index_tag;
|
||||
int snapshot_tag;
|
||||
int field_value;
|
||||
int segment_tag;
|
||||
int segment_feature;
|
||||
string index_info;
|
||||
};
|
||||
|
||||
class SplitManager {
|
||||
public:
|
||||
SplitManager();
|
||||
~SplitManager();
|
||||
static SplitManager *Instance(){
|
||||
return CSingleton<SplitManager>::Instance();
|
||||
}
|
||||
|
||||
static void Destroy(){
|
||||
CSingleton<SplitManager>::Destroy();
|
||||
}
|
||||
|
||||
bool Init(const SGlobalIndexConfig &global_cfg);
|
||||
void DeInit();
|
||||
vector<vector<string> > split(string str,uint32_t appid);
|
||||
vector<string> split(string str);
|
||||
bool wordValid(string word, uint32_t appid, uint32_t &id);
|
||||
bool GetWordInfo(string word, uint32_t appid, WordInfo &word_info) {
|
||||
return seg.GetWordInfoFromDictOnly(word, appid, word_info);
|
||||
}
|
||||
struct table_info *get_table_info(uint32_t appid, string filed_name);
|
||||
bool is_effective_appid(uint32_t appid);
|
||||
bool GetCharactId(string charact, uint32_t &id);
|
||||
bool GetPhoneticId(string phonetic, uint32_t &id);
|
||||
vector<string> GetPhonetic(string charact);
|
||||
bool getHanpinField(uint32_t appid, vector<uint32_t> & field_vec);
|
||||
bool getUnionKeyField(uint32_t appid, vector<string> & field_vec);
|
||||
|
||||
private:
|
||||
bool fetch_tbinfo_from_mysql_to_map();
|
||||
|
||||
FBSegment seg;
|
||||
set<string> stop_word_set;
|
||||
map<string, u_int32_t> word_map;
|
||||
map<uint32_t,map<string,table_info> > tableDefine;
|
||||
string split_mode;
|
||||
map<string, uint32_t> charact_map;
|
||||
map<string, uint32_t> phonetic_map;
|
||||
multimap<string, string> charact_phonetic_map;
|
||||
};
|
||||
|
||||
string trim(string& str);
|
||||
string delPrefix(string& str);
|
||||
|
||||
|
||||
#endif
|
|
@ -0,0 +1,12 @@
|
|||
#!/bin/sh
|
||||
|
||||
cd $(dirname $0)
|
||||
proname="index_write"
|
||||
|
||||
if [ -f $proname ] ; then
|
||||
chmod 755 $proname
|
||||
./$proname
|
||||
else
|
||||
echo "no program"
|
||||
fi
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
#!/bin/sh
|
||||
|
||||
root=$(cd $(dirname $0); pwd)
|
||||
|
||||
pid_file=$root/index_write.pid
|
||||
|
||||
if [ -f $pid_file ] ; then
|
||||
pid=`cat $pid_file`
|
||||
kill $pid
|
||||
/bin/rm -f $pid_file
|
||||
else
|
||||
echo "No pid file."
|
||||
fi
|
||||
|
|
@ -0,0 +1,466 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: top_index_service.cc
|
||||
*
|
||||
* Description: class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#include "top_index_service.h"
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include "log.h"
|
||||
#include "poll_thread.h"
|
||||
#include "task_request.h"
|
||||
#include "dtc_tools.h"
|
||||
#include "comm.h"
|
||||
#include "index_clipping.h"
|
||||
#include "monitor.h"
|
||||
#include "chash.h"
|
||||
|
||||
CTaskTopIndex::CTaskTopIndex(CPollThread * o) :
|
||||
CTaskDispatcher<CTaskRequest>(o),
|
||||
ownerThread(o),
|
||||
output(o)
|
||||
{
|
||||
}
|
||||
|
||||
CTaskTopIndex::~CTaskTopIndex()
|
||||
{
|
||||
}
|
||||
|
||||
int CTaskTopIndex::insert_snapshot_dtc(const UserTableContent &fields,int &doc_version,Json::Value &res){
|
||||
int ret;
|
||||
DTC::Server* dtc_server = index_servers.GetServer();
|
||||
if(NULL == dtc_server){
|
||||
log_error("snapshot server connect error!");
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
DTC::InsertRequest insertReq(dtc_server);
|
||||
insertReq.SetKey(gen_dtc_key_string(fields.appid, "11", fields.doc_id).c_str());
|
||||
insertReq.Set("doc_id", fields.doc_id.c_str());
|
||||
insertReq.Set("doc_version", doc_version);
|
||||
insertReq.Set("created_time", fields.publish_time);
|
||||
insertReq.Set("field", fields.top);
|
||||
insertReq.Set("word_freq", 0);
|
||||
insertReq.Set("weight", 0);
|
||||
insertReq.Set("location", "");
|
||||
insertReq.Set("start_time", 0);
|
||||
insertReq.Set("end_time", 0);
|
||||
insertReq.Set("extend", fields.content.c_str());
|
||||
DTC::Result rst;
|
||||
ret = insertReq.Execute(rst);
|
||||
if (ret != 0)
|
||||
{
|
||||
|
||||
log_error("insert request error! ,errno %d ,errmsg %s, errfrom %s\n", ret,rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return -1;
|
||||
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CTaskTopIndex::delete_snapshot_dtc(const string &doc_id, uint32_t appid, Json::Value &res) {
|
||||
int ret;
|
||||
DTC::Server* dtc_server = index_servers.GetServer();
|
||||
if (NULL == dtc_server) {
|
||||
log_error("snapshot server connect error!");
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
DTC::DeleteRequest deleteReq(dtc_server);
|
||||
ret = deleteReq.SetKey(gen_dtc_key_string(appid, "11", doc_id).c_str());
|
||||
ret = deleteReq.EQ("doc_id", doc_id.c_str());
|
||||
|
||||
DTC::Result rst;
|
||||
ret = deleteReq.Execute(rst);
|
||||
if (ret != 0)
|
||||
{
|
||||
log_error("delete request error! ,errno %d ,errmsg %s, errfrom %s\n", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return RT_ERROR_DELETE_SNAPSHOT;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_snapshot_execute(DTC::Server* dtc_server,const UserTableContent &fields,DTC::Result &rst){
|
||||
DTC::GetRequest getReq(dtc_server);
|
||||
int ret = 0;
|
||||
|
||||
ret = getReq.SetKey(gen_dtc_key_string(fields.appid, "11", fields.doc_id).c_str());
|
||||
ret = getReq.Need("doc_version");
|
||||
|
||||
ret = getReq.Execute(rst);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int CTaskTopIndex::get_snapshot_active_doc(const UserTableContent &fields,int &doc_version,Json::Value &res){
|
||||
int ret;
|
||||
DTC::Server* dtc_server = index_servers.GetServer();
|
||||
if(NULL == dtc_server){
|
||||
log_error("snapshot server connect error!");
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
DTC::Result rst;
|
||||
ret = get_snapshot_execute(dtc_server,fields,rst);
|
||||
if (ret != 0) {
|
||||
if (ret == -110) {
|
||||
rst.Reset();
|
||||
ret = get_snapshot_execute(dtc_server,fields,rst);
|
||||
if (ret != 0) {
|
||||
log_error("get request error! errcode %d,errmsg %s, errfrom %s", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
}
|
||||
else {
|
||||
log_error("get request error! errcode %d,errmsg %s, errfrom %s", ret, rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
}
|
||||
int cnt = rst.NumRows();
|
||||
struct index_item item;
|
||||
if (rst.NumRows() <= 0) {
|
||||
return RT_NO_THIS_DOC;
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < cnt; i++) {
|
||||
rst.FetchRow();
|
||||
doc_version = rst.IntValue("doc_version");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int decode_request(const Json::Value &req, Json::Value &subreq, int &id, int &count){
|
||||
if(req.isMember("table_content") && req["table_content"].isArray()){
|
||||
subreq = req["table_content"];
|
||||
}else{
|
||||
return RT_NO_TABLE_CONTENT;
|
||||
}
|
||||
|
||||
if(req.isMember("appid") && req["appid"].isInt()){
|
||||
id = req["appid"].asInt();
|
||||
}else{
|
||||
return RT_NO_APPID;
|
||||
}
|
||||
|
||||
if(req.isMember("fields_count") && req["fields_count"].isInt()){
|
||||
count = req["fields_count"].asInt();
|
||||
}else{
|
||||
return RT_NO_FIELD_COUNT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int decode_field(Json::Value table_content,Json::Value &json_fields,UserTableContent &fields){
|
||||
string cmd;
|
||||
time_t now = time(NULL);
|
||||
if(table_content.isMember("cmd") && table_content["cmd"].isString()){
|
||||
cmd = table_content["cmd"].asString();
|
||||
if(cmd == "top_add" || cmd == "top_update"){
|
||||
fields.top = 1;
|
||||
if(table_content.isMember("fields") && table_content["fields"].isObject()){
|
||||
json_fields = table_content["fields"];
|
||||
|
||||
if(json_fields.isMember("id") && json_fields["id"].isString()){
|
||||
fields.doc_id = json_fields["id"].asString();
|
||||
}else{
|
||||
if(json_fields.isMember("doc_id") && json_fields["doc_id"].isString()){
|
||||
fields.doc_id = json_fields["doc_id"].asString();
|
||||
}else
|
||||
return RT_NO_DOCID;
|
||||
}
|
||||
|
||||
if(json_fields.isMember("sp_words") && json_fields["sp_words"].isString()){
|
||||
fields.sp_words = json_fields["sp_words"].asString();
|
||||
fields.description = json_fields["sp_words"].asString();//description is using as sp_words section;
|
||||
}
|
||||
if(json_fields.isMember("weight") && json_fields["weight"].isInt()){
|
||||
fields.weight = json_fields["weight"].asInt();
|
||||
}else{
|
||||
fields.weight = 1;
|
||||
}
|
||||
if(json_fields.isMember("publish_time") && json_fields["publish_time"].isInt()){
|
||||
fields.publish_time = json_fields["publish_time"].asInt();
|
||||
}else
|
||||
fields.publish_time = now;
|
||||
if(json_fields.isMember("top_start_time") && json_fields["top_start_time"].isInt()){
|
||||
fields.top_start_time = json_fields["top_start_time"].asInt();
|
||||
}else{
|
||||
fields.top_start_time = now;
|
||||
}
|
||||
if(json_fields.isMember("top_end_time") && json_fields["top_end_time"].isInt()){
|
||||
if(json_fields["top_end_time"].asInt() < fields.top_start_time)
|
||||
fields.top_end_time = fields.top_start_time;
|
||||
fields.top_end_time = json_fields["top_end_time"].asInt();
|
||||
}else{
|
||||
fields.top_end_time = fields.top_start_time + (24*60*60);
|
||||
}
|
||||
return RT_CMD_ADD;
|
||||
|
||||
}
|
||||
}else if(cmd == "top_delete"){
|
||||
fields.top = 1;
|
||||
Json::Value field = table_content["fields"];
|
||||
if(field.isMember("doc_id") && field["doc_id"].isString()){
|
||||
fields.doc_id = field["doc_id"].asString();
|
||||
return RT_CMD_DELETE;
|
||||
}
|
||||
}else{
|
||||
return RT_ERROR_FIELD_CMD;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CTaskTopIndex::pre_process(void){
|
||||
DTCTools *dtc_tools = DTCTools::Instance();
|
||||
dtc_tools->init_servers(index_servers, IndexConf::Instance()->GetDTCIndexConfig());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CTaskTopIndex::do_split_sp_words(string &str, string &doc_id, uint32_t appid, set<string> &word_set,Json::Value &res) {
|
||||
string word;
|
||||
uint32_t id = 0;
|
||||
vector<string> strs = splitEx(str, "|");
|
||||
vector<string>::iterator iter = strs.begin();
|
||||
uint32_t index = 0;
|
||||
for (; iter != strs.end(); iter++) {
|
||||
index++;
|
||||
word = *iter;
|
||||
if (!SplitManager::Instance()->wordValid(word, appid, id)){
|
||||
log_error("invalued sp_word!%s",word.c_str());
|
||||
return RT_ERROR_INVALID_SP_WORD;
|
||||
}
|
||||
word_set.insert(word);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int insert_top_index_execute(DTC::Server* dtcServer,string key,const UserTableContent &fields,int doc_version,DTC::Result &rst){
|
||||
int ret = 0;
|
||||
|
||||
DTC::InsertRequest insertReq(dtcServer);
|
||||
insertReq.SetKey(key.c_str());
|
||||
insertReq.Set("doc_id", fields.doc_id.c_str());
|
||||
insertReq.Set("doc_version",doc_version);
|
||||
insertReq.Set("created_time",time(NULL));
|
||||
insertReq.Set("start_time",fields.top_start_time);
|
||||
insertReq.Set("end_time",fields.top_end_time);
|
||||
insertReq.Set("weight", fields.weight);
|
||||
insertReq.Set("extend","");
|
||||
ret = insertReq.Execute(rst);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int CTaskTopIndex::insert_top_index_dtc(string key,const UserTableContent &fields,int doc_version,Json::Value &res){
|
||||
int ret = 0;
|
||||
|
||||
DTC::Server* dtcServer = index_servers.GetServer();
|
||||
if(dtcServer == NULL){
|
||||
log_error("GetServer error");
|
||||
return -1;
|
||||
}
|
||||
char tmp[41] = { '0' };
|
||||
snprintf(tmp, sizeof(tmp), "%40s", fields.doc_id.c_str());
|
||||
|
||||
dtcServer->SetAccessKey(tmp);
|
||||
|
||||
DTC::Result rst;
|
||||
ret = insert_top_index_execute(dtcServer,key,fields,doc_version,rst);
|
||||
if (ret != 0)
|
||||
{
|
||||
log_error("insert request error! ,errno %d ,errmsg %s, errfrom %s\n", ret,rst.ErrorMessage(), rst.ErrorFrom());
|
||||
res[MESSAGE] = rst.ErrorMessage();
|
||||
return -1;
|
||||
}
|
||||
log_debug("insert word:%s sp_word:%s doc_id:%s doc_version: %d to top index!",key.c_str(),fields.sp_words.c_str(),fields.doc_id.c_str(),doc_version);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CTaskTopIndex::do_insert_top_index(const UserTableContent &fields,int doc_version, set<string> &word_set,Json::Value &res) {
|
||||
|
||||
int ret;
|
||||
|
||||
set<string>::iterator iter = word_set.begin();
|
||||
for (; iter != word_set.end(); iter++) {
|
||||
string key = gen_dtc_key_string(fields.appid, "01", *iter);
|
||||
ret = insert_top_index_dtc(key,fields,doc_version,res);
|
||||
if(ret < 0)
|
||||
return RT_ERROR_INSERT_TOP_INDEX_DTC;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CTaskTopIndex::update_sanpshot_dtc(const UserTableContent &fields,int doc_version,Json::Value &res){
|
||||
int ret = 0;
|
||||
DTC::Server* dtc_server = index_servers.GetServer();
|
||||
if(NULL == dtc_server){
|
||||
log_error("snapshot server connect error!");
|
||||
return RT_ERROR_GET_SNAPSHOT;
|
||||
}
|
||||
DTC::UpdateRequest updateReq(dtc_server);
|
||||
ret = updateReq.SetKey(gen_dtc_key_string(fields.appid, "11", fields.doc_id).c_str());
|
||||
updateReq.Set("doc_version", doc_version);
|
||||
if(fields.content != "null\n")
|
||||
updateReq.Set("extend", fields.content.c_str());
|
||||
updateReq.Set("created_time",fields.publish_time);
|
||||
|
||||
DTC::Result rst;
|
||||
ret = updateReq.Execute(rst);
|
||||
if (ret != 0)
|
||||
{
|
||||
log_error("updateReq error! ,errno %d ,errmsg %s, errfrom %s\n", ret,rst.ErrorMessage(), rst.ErrorFrom());
|
||||
return RT_ERROR_UPDATE_SNAPSHOT;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int CTaskTopIndex::top_index_process(Json::Value &req,Json::Value &res){
|
||||
string split_content;
|
||||
string split_title;
|
||||
int doc_version = 0,old_version = 0;
|
||||
int app_id,fields_count = 0,ret = 0;
|
||||
Json::Value table_content;
|
||||
|
||||
set<string> word_set;
|
||||
|
||||
ret = decode_request(req, table_content, app_id,fields_count);
|
||||
if(ret != 0){
|
||||
return ret;
|
||||
}
|
||||
if(fields_count == 0 || fields_count != (int)table_content.size()){
|
||||
return RT_ERROR_FIELD_COUNT;
|
||||
}
|
||||
if(!SplitManager::Instance()->is_effective_appid(app_id)){
|
||||
return RT_NO_APPID;
|
||||
}
|
||||
|
||||
for(int i = 0;i < (int)table_content.size();i++){
|
||||
doc_version = 0; old_version = 0;
|
||||
UserTableContent content_fields(app_id);
|
||||
Json::Value json_field;
|
||||
ret = decode_field(table_content[i],json_field,content_fields);
|
||||
if(RT_CMD_ADD == ret){
|
||||
ret = get_snapshot_active_doc(content_fields,old_version,res);
|
||||
if(0 == ret){
|
||||
doc_version = ++old_version;
|
||||
}else if(ret != RT_NO_THIS_DOC) return ret;
|
||||
Json::Value::Members member = json_field.getMemberNames();
|
||||
Json::Value snapshot_content;
|
||||
string lng = "",lat = "";
|
||||
for(Json::Value::Members::iterator iter = member.begin(); iter != member.end(); ++iter)
|
||||
{
|
||||
string field_name = *iter;
|
||||
struct table_info *tbinfo = NULL;
|
||||
tbinfo = SplitManager::Instance()->get_table_info(app_id,field_name);
|
||||
if(tbinfo == NULL){
|
||||
continue;
|
||||
}
|
||||
if(tbinfo->snapshot_tag == 1){//snapshot
|
||||
if(tbinfo->field_type == 1 && json_field[field_name].isInt()){
|
||||
snapshot_content[field_name] = json_field[field_name].asInt();
|
||||
}else if(tbinfo->field_type > 1 && json_field[field_name].isString()){
|
||||
snapshot_content[field_name] = json_field[field_name].asString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log_debug("sp_words:%s\n",content_fields.sp_words.c_str());
|
||||
ret = do_split_sp_words(content_fields.sp_words,content_fields.doc_id,content_fields.appid,word_set,res);
|
||||
if(0 != ret){
|
||||
res[MESSAGE] = "do_split_sp_words error";
|
||||
return ret;
|
||||
}
|
||||
ret = do_insert_top_index(content_fields,doc_version,word_set,res);
|
||||
if( 0!= ret){
|
||||
return ret;
|
||||
}
|
||||
|
||||
Json::FastWriter writer;
|
||||
content_fields.content = writer.write(snapshot_content);
|
||||
if(doc_version != 0){//need update
|
||||
update_sanpshot_dtc(content_fields,doc_version,res);
|
||||
}else{
|
||||
insert_snapshot_dtc(content_fields,doc_version,res);//insert the snapshot doc
|
||||
}
|
||||
word_set.clear();
|
||||
}
|
||||
else if(RT_CMD_DELETE == ret){
|
||||
ret = delete_snapshot_dtc(content_fields.doc_id,content_fields.appid,res);//not use the doc_version curr
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void CTaskTopIndex::TaskNotify(CTaskRequest * curr)
|
||||
{
|
||||
log_debug("CTaskTopIndex::TaskNotify start");
|
||||
common::CallerInfo caller_info = common::ProfilerMonitor::GetInstance().RegisterInfo(std::string("searchEngine.searchService.topIndexTask"));
|
||||
//there is a race condition here:
|
||||
//curr may be deleted during process (in task->ReplyNotify())
|
||||
int ret;
|
||||
Json::Reader reader;
|
||||
Json::FastWriter writer;
|
||||
Json::Value value, res;
|
||||
std::string req;
|
||||
res["code"] = 0;
|
||||
|
||||
|
||||
CTaskRequest * task = curr;
|
||||
if(NULL == curr){
|
||||
common::ProfilerMonitor::GetInstance().FunctionError(caller_info);
|
||||
common::ProfilerMonitor::GetInstance().RegisterInfoEnd(caller_info);
|
||||
return;
|
||||
}
|
||||
if(SERVICE_TOPINDEX != task->GetReqCmd()){
|
||||
res["code"] = RT_ERROR_SERVICE_TYPE;
|
||||
res["reqcmd"] = task->GetReqCmd();
|
||||
res["message"] = "service type wrong! need 107";
|
||||
goto end;
|
||||
}
|
||||
req = task->buildRequsetString();
|
||||
log_debug("recv:%s\n",req.c_str());
|
||||
if(!reader.parse(req,value,false))
|
||||
{
|
||||
log_error("parse json error!\ndata:%s errors:%s\n",req.c_str(),reader.getFormattedErrorMessages().c_str());
|
||||
res["code"] = RT_PARSE_JSON_ERR;
|
||||
res["message"] = reader.getFormattedErrorMessages();
|
||||
res["data"] = req;
|
||||
goto end;
|
||||
|
||||
}
|
||||
if(!value.isObject()){
|
||||
log_error("parse json error!\ndata:%s errors:%s\n",req.c_str(),reader.getFormattedErrorMessages().c_str());
|
||||
res["code"] = RT_PARSE_JSON_ERR;
|
||||
res["message"] = "it's not a json";
|
||||
res["data"] = req;
|
||||
goto end;
|
||||
}
|
||||
ret = top_index_process(value,res);
|
||||
if(0 != ret){
|
||||
res["code"] = ret;
|
||||
}
|
||||
|
||||
end:
|
||||
task->setResult(writer.write(res));
|
||||
task->ReplyNotify();
|
||||
|
||||
common::ProfilerMonitor::GetInstance().RegisterInfoEnd(caller_info);
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: top_index_service.h
|
||||
*
|
||||
* Description: class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2020 10:02:05 PM
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: shrewdlin, linjinming@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef TOP_INDEX_SERVICE_H_
|
||||
#define TOP_INDEX_SERVICE_H_
|
||||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include "request_base.h"
|
||||
#include "index_conf.h"
|
||||
#include "dtcapi.h"
|
||||
#include "split_manager.h"
|
||||
using namespace std;
|
||||
|
||||
class CPollThread;
|
||||
class CTaskRequest;
|
||||
class SplitManager;
|
||||
class DTCServers;
|
||||
|
||||
class CTaskTopIndex : public CTaskDispatcher<CTaskRequest>
|
||||
{
|
||||
private:
|
||||
CPollThread * ownerThread;
|
||||
CRequestOutput<CTaskRequest> output;
|
||||
DTC::DTCServers index_servers;
|
||||
|
||||
private:
|
||||
int insert_top_index_dtc(string key,const UserTableContent &fields,int doc_version,Json::Value &res);
|
||||
int do_insert_top_index(const UserTableContent &fields,int doc_version, set<string> &word_set,Json::Value &res);
|
||||
int get_snapshot_active_doc(const UserTableContent &fields,int &doc_version,Json::Value &res);
|
||||
int delete_snapshot_dtc(const string &doc_id,uint32_t appid,Json::Value &res);
|
||||
int insert_snapshot_dtc(const UserTableContent &fields,int &doc_version,Json::Value &res);
|
||||
int do_split_sp_words(string &str, string &doc_id, uint32_t appid, set<string> &word_set,Json::Value &res);
|
||||
int update_sanpshot_dtc(const UserTableContent &fields,int doc_version,Json::Value &res);
|
||||
|
||||
public:
|
||||
CTaskTopIndex(CPollThread * o);
|
||||
virtual ~CTaskTopIndex();
|
||||
int pre_process(void);
|
||||
int top_index_process(Json::Value &req,Json::Value &res);
|
||||
|
||||
inline void BindDispatcher(CTaskDispatcher<CTaskRequest> *p)
|
||||
{
|
||||
output.BindDispatcher(p);
|
||||
}
|
||||
virtual void TaskNotify(CTaskRequest * curr);
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif /* TOP_INDEX_SERVICE_H_ */
|
Loading…
Reference in New Issue