From dc84f31e4029bb4d0240c9b3d8b9017dad86c503 Mon Sep 17 00:00:00 2001 From: tlk-dsg <467460833@qq.com> Date: Mon, 8 Nov 2021 20:41:18 +0800 Subject: [PATCH] fix bug --- .../attribution_extraction/standard/tools/preprocess.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/deepke/attribution_extraction/standard/tools/preprocess.py b/src/deepke/attribution_extraction/standard/tools/preprocess.py index f8e4d9b..441f77e 100644 --- a/src/deepke/attribution_extraction/standard/tools/preprocess.py +++ b/src/deepke/attribution_extraction/standard/tools/preprocess.py @@ -53,10 +53,10 @@ def _convert_tokens_into_index(data: List[Dict], vocab): d['token2idx'] = [vocab.word2idx.get(i, unk_idx) for i in d['tokens']] d['seq_len'] = len(d['token2idx']) -def _serialize_sentence(data: List[Dict], serial, cfg): +def _serialize_sentence(data: List[Dict], serial): for d in data: sent = d['sentence'].strip() - snet = sent.replace(d['entity'] , ' entity ' , 1).replace(d['attribute_value'] , ' attribute_value ' , 1) + sent = sent.replace(d['entity'] , ' entity ' , 1).replace(d['attribute_value'] , ' attribute_value ' , 1) d['tokens'] = serial(sent, never_split=['entity','attribute_value']) entity_index, attribute_value_index = d['entity_offset'] , d['attribute_value_offset'] d['entity_index'],d['attribute_value_index'] = int(entity_index) , int(attribute_value_index)