Merge remote-tracking branch 'origin/dygraph' into dygraph
This commit is contained in:
commit
e4d160e9e5
|
@ -185,11 +185,11 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs
|
||||||
<a name="数据增强"></a>
|
<a name="数据增强"></a>
|
||||||
#### 2.1 数据增强
|
#### 2.1 数据增强
|
||||||
|
|
||||||
PaddleOCR提供了多种数据增强方式,如果您希望在训练时加入扰动,请在配置文件中设置 `distort: true`。
|
PaddleOCR提供了多种数据增强方式,默认配置文件中已经添加了数据增广。
|
||||||
|
|
||||||
默认的扰动方式有:颜色空间转换(cvtColor)、模糊(blur)、抖动(jitter)、噪声(Gasuss noise)、随机切割(random crop)、透视(perspective)、颜色反转(reverse)。
|
默认的扰动方式有:颜色空间转换(cvtColor)、模糊(blur)、抖动(jitter)、噪声(Gasuss noise)、随机切割(random crop)、透视(perspective)、颜色反转(reverse)、TIA数据增广。
|
||||||
|
|
||||||
训练过程中每种扰动方式以50%的概率被选择,具体代码实现请参考:[img_tools.py](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/ppocr/data/rec/img_tools.py)
|
训练过程中每种扰动方式以40%的概率被选择,具体代码实现请参考:[rec_img_aug.py](../../ppocr/data/imaug/rec_img_aug.py)
|
||||||
|
|
||||||
*由于OpenCV的兼容性问题,扰动操作暂时只支持Linux*
|
*由于OpenCV的兼容性问题,扰动操作暂时只支持Linux*
|
||||||
|
|
||||||
|
|
|
@ -177,11 +177,11 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs
|
||||||
<a name="Data_Augmentation"></a>
|
<a name="Data_Augmentation"></a>
|
||||||
#### 2.1 Data Augmentation
|
#### 2.1 Data Augmentation
|
||||||
|
|
||||||
PaddleOCR provides a variety of data augmentation methods. If you want to add disturbance during training, please set `distort: true` in the configuration file.
|
PaddleOCR provides a variety of data augmentation methods. All the augmentation methods are enabled by default.
|
||||||
|
|
||||||
The default perturbation methods are: cvtColor, blur, jitter, Gasuss noise, random crop, perspective, color reverse.
|
The default perturbation methods are: cvtColor, blur, jitter, Gasuss noise, random crop, perspective, color reverse, TIA augmentation.
|
||||||
|
|
||||||
Each disturbance method is selected with a 50% probability during the training process. For specific code implementation, please refer to: [img_tools.py](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/ppocr/data/rec/img_tools.py)
|
Each disturbance method is selected with a 40% probability during the training process. For specific code implementation, please refer to: [rec_img_aug.py](../../ppocr/data/imaug/rec_img_aug.py)
|
||||||
|
|
||||||
<a name="Training"></a>
|
<a name="Training"></a>
|
||||||
#### 2.2 Training
|
#### 2.2 Training
|
||||||
|
|
|
@ -49,14 +49,12 @@ def term_mp(sig_num, frame):
|
||||||
os.killpg(pgid, signal.SIGKILL)
|
os.killpg(pgid, signal.SIGKILL)
|
||||||
|
|
||||||
|
|
||||||
signal.signal(signal.SIGINT, term_mp)
|
|
||||||
signal.signal(signal.SIGTERM, term_mp)
|
|
||||||
|
|
||||||
|
|
||||||
def build_dataloader(config, mode, device, logger, seed=None):
|
def build_dataloader(config, mode, device, logger, seed=None):
|
||||||
config = copy.deepcopy(config)
|
config = copy.deepcopy(config)
|
||||||
|
|
||||||
support_dict = ['SimpleDataSet', 'LMDBDataSet', 'PGDataSet', 'PubTabDataSet']
|
support_dict = [
|
||||||
|
'SimpleDataSet', 'LMDBDataSet', 'PGDataSet', 'PubTabDataSet'
|
||||||
|
]
|
||||||
module_name = config[mode]['dataset']['name']
|
module_name = config[mode]['dataset']['name']
|
||||||
assert module_name in support_dict, Exception(
|
assert module_name in support_dict, Exception(
|
||||||
'DataSet only support {}'.format(support_dict))
|
'DataSet only support {}'.format(support_dict))
|
||||||
|
@ -96,4 +94,8 @@ def build_dataloader(config, mode, device, logger, seed=None):
|
||||||
return_list=True,
|
return_list=True,
|
||||||
use_shared_memory=use_shared_memory)
|
use_shared_memory=use_shared_memory)
|
||||||
|
|
||||||
|
# support exit using ctrl+c
|
||||||
|
signal.signal(signal.SIGINT, term_mp)
|
||||||
|
signal.signal(signal.SIGTERM, term_mp)
|
||||||
|
|
||||||
return data_loader
|
return data_loader
|
||||||
|
|
|
@ -74,7 +74,7 @@ class TextE2E(object):
|
||||||
|
|
||||||
self.preprocess_op = create_operators(pre_process_list)
|
self.preprocess_op = create_operators(pre_process_list)
|
||||||
self.postprocess_op = build_post_process(postprocess_params)
|
self.postprocess_op = build_post_process(postprocess_params)
|
||||||
self.predictor, self.input_tensor, self.output_tensors = utility.create_predictor(
|
self.predictor, self.input_tensor, self.output_tensors, _ = utility.create_predictor(
|
||||||
args, 'e2e', logger) # paddle.jit.load(args.det_model_dir)
|
args, 'e2e', logger) # paddle.jit.load(args.det_model_dir)
|
||||||
# self.predictor.eval()
|
# self.predictor.eval()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue