12 lines
358 B
YAML
12 lines
358 B
YAML
model_name: transformer
|
|
|
|
hidden_size: ??? # 使用 embedding 输出的结果,不需要指定
|
|
num_heads: 4 # 必须能被 hidden_size 整除
|
|
num_hidden_layers: 3
|
|
intermediate_size: 256
|
|
dropout: 0.1
|
|
layer_norm_eps: 1e-12
|
|
hidden_act: gelu_new # [relu, gelu, swish, gelu_new]
|
|
|
|
output_attentions: True
|
|
output_hidden_states: True |