Commit 42ea101c by xuchen

load the tokenizer during decoding

parent 2ba93df9
......@@ -18,6 +18,7 @@ from itertools import chain
import numpy as np
import torch
from fairseq import checkpoint_utils, options, scoring, tasks, utils
from fairseq.data import encoders
from fairseq.dataclass.utils import convert_namespace_to_omegaconf
from fairseq.logging import progress_bar
from fairseq.logging.meters import StopwatchMeter, TimeMeter
......@@ -172,7 +173,8 @@ def _main(cfg: DictConfig, output_file, translation_path=None):
)
# Handle tokenization and BPE
tokenizer = task.build_tokenizer(cfg.tokenizer)
# tokenizer = task.build_tokenizer(cfg.tokenizer)
tokenizer = encoders.build_tokenizer(cfg.tokenizer)
bpe = task.build_bpe(cfg.bpe)
def decode_fn(x):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论