12 lines
358 B
YAML
12 lines
358 B
YAML
|
model_name: transformer
|
||
|
|
||
|
hidden_size: ??? # 使用 embedding 输出的结果,不需要指定
|
||
|
num_heads: 4 # 必须能被 hidden_size 整除
|
||
|
num_hidden_layers: 3
|
||
|
intermediate_size: 256
|
||
|
dropout: 0.1
|
||
|
layer_norm_eps: 1e-12
|
||
|
hidden_act: gelu_new # [relu, gelu, swish, gelu_new]
|
||
|
|
||
|
output_attentions: True
|
||
|
output_hidden_states: True
|