Commit 42ea101c by xuchen

load the tokenizer during decoding

parent 2ba93df9
...@@ -18,6 +18,7 @@ from itertools import chain ...@@ -18,6 +18,7 @@ from itertools import chain
import numpy as np import numpy as np
import torch import torch
from fairseq import checkpoint_utils, options, scoring, tasks, utils from fairseq import checkpoint_utils, options, scoring, tasks, utils
from fairseq.data import encoders
from fairseq.dataclass.utils import convert_namespace_to_omegaconf from fairseq.dataclass.utils import convert_namespace_to_omegaconf
from fairseq.logging import progress_bar from fairseq.logging import progress_bar
from fairseq.logging.meters import StopwatchMeter, TimeMeter from fairseq.logging.meters import StopwatchMeter, TimeMeter
...@@ -172,7 +173,8 @@ def _main(cfg: DictConfig, output_file, translation_path=None): ...@@ -172,7 +173,8 @@ def _main(cfg: DictConfig, output_file, translation_path=None):
) )
# Handle tokenization and BPE # Handle tokenization and BPE
tokenizer = task.build_tokenizer(cfg.tokenizer) # tokenizer = task.build_tokenizer(cfg.tokenizer)
tokenizer = encoders.build_tokenizer(cfg.tokenizer)
bpe = task.build_bpe(cfg.bpe) bpe = task.build_bpe(cfg.bpe)
def decode_fn(x): def decode_fn(x):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论