#!/usr/bin/env python3## Copyright (c) Facebook, Inc. and its affiliates.## This source code is licensed under the MIT license found in the# LICENSE file in the root directory of this source tree.importsysimportsacremosesdefmain(args):"""Tokenizes, preserving tabs"""mt=sacremoses.MosesTokenizer(lang=args.lang)deftok(s):returnmt.tokenize(s,return_str=True)forlineinsys.stdin:parts=list(map(tok,line.split("\t")))print(*parts,sep="\t",flush=True)if__name__=="__main__":importargparseparser=argparse.ArgumentParser()parser.add_argument("--lang","-l",default="en")parser.add_argument("--penn","-p",action="store_true")parser.add_argument("--fields","-f",help="fields to tokenize")args=parser.parse_args()main(args)