deepke/example/ae/regular/conf/model/lm.yaml

20 lines
572 B
YAML

model_name: lm
# 当使用预训练语言模型时,该预训练的模型存放位置
# lm_name = 'bert-base-chinese' # download usage
#lm_file: 'pretrained'
lm_file: '/home/yhy/transformers/bert-base-chinese'
# transformer 层数,初始 base bert 为12层
# 但是数据量较小时调低些反而收敛更快效果更好
num_hidden_layers: 1
# 后面所接 bilstm 的参数
type_rnn: 'LSTM' # [RNN, GRU, LSTM]
input_size: 768 # 这个值由bert得到
hidden_size: 100 # 必须为偶数
num_layers: 1
dropout: 0.3
bidirectional: True
last_layer_hn: True