arch: transformer_wmt_en_de_big_t2t share-all-embeddings: True optimizer: adam clip-norm: 10.0 lr-scheduler: inverse_sqrt warmup-init-lr: 1e-7 warmup-updates: 8000 lr: 7e-4 adam_betas: (0.9,0.997) criterion: label_smoothed_cross_entropy label_smoothing: 0.1 dropout: 0.3 attention-dropout: 0.1 activation-dropout: 0.1 activation-fn: relu encoder-normalize-before: True decoder-normalize-before: True encoder-embed-dim: 1024 encoder-ffn-embed-dim: 4096 encoder-layers: 6 decoder-layers: 6 encoder-attention-heads: 16 decoder-embed-dim: 512 decoder-ffn-embed-dim: 2048 decoder-attention-heads: 8 #load-pretrained-encoder-from: #load-pretrained-decoder-from: