train_dlcl.yaml 912 Bytes
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
train-subset: train
valid-subset: valid

max-epoch: 50
max-update: 100000

num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
skip-invalid-size-inputs-valid-test: True

#load-pretrained-encoder-from:
xuchen committed
16
#load-pretrained-decoder-from:
17

xuchen committed
18
arch: dlcl_transformer
19 20 21 22 23 24
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 8000
xuchen committed
25 26
lr: 1e-3
adam_betas: (0.9,0.997)
27 28 29 30 31

criterion: label_smoothed_cross_entropy
label_smoothing: 0.1

dropout: 0.1
xuchen committed
32 33 34
attention-dropout: 0.1
activation-dropout: 0.1

35 36 37 38 39 40 41 42 43 44 45 46
activation-fn: relu
encoder-normalize-before: True
decoder-normalize-before: True
encoder-embed-dim: 512
encoder-ffn-embed-dim: 2048
encoder-layers: 6
decoder-layers: 6
encoder-attention-heads: 8

decoder-embed-dim: 512
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 8
47 48 49

use-enc-dlcl: True
use-dec-dlcl: True