Commit 51395037 by xuchen

accu update

parent e7625a34
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
#! /usr/bin/env python3
import unicodedata
import jiwer
import jiwer.transforms as tr
import sys
import re
ref_file = sys.argv[1]
hyp_file = sys.argv[2]
wer_standardize = tr.Compose(
[
tr.SubstituteRegexes({r"<<unk>>": r"@"}),
tr.ToLowerCase(),
tr.RemovePunctuation(),
tr.ExpandCommonEnglishContractions(),
tr.RemoveKaldiNonWords(),
tr.RemoveWhiteSpace(replace_by_space=True),
tr.ReduceToListOfListOfWords(),
]
)
cer_standardize = tr.Compose(
[
tr.SubstituteRegexes({r"<<unk>>": r"@"}),
tr.SubstituteRegexes({r" ": r""}),
tr.ToLowerCase(),
tr.RemovePunctuation(),
tr.Strip(),
tr.ReduceToListOfListOfChars(),
]
)
def process_text(text):
# 将中文字符和英文字符间加空格
text = re.sub(r'([\u4e00-\u9fa5])([a-zA-Z0-9])', r'\1 \2', text)
text = re.sub(r'([a-zA-Z0-9])([\u4e00-\u9fa5])', r'\1 \2', text)
# 将中文字符间加空格
text = re.sub(r'([\u4e00-\u9fa5])', r'\1 ', text)
# 去掉多余的空格
text = re.sub(r'\s+', ' ', text).strip()
return text
ref_lines = open(ref_file, "r").readlines()
hyp_lines = open(hyp_file, "r").readlines()
mer_ref_lines = [process_text(line) for line in ref_lines]
mer_hyp_lines = [process_text(line) for line in hyp_lines]
wer = jiwer.wer(ref_lines, hyp_lines,
truth_transform=wer_standardize,
hypothesis_transform=wer_standardize,
)
cer = jiwer.cer(ref_lines, hyp_lines,
truth_transform=cer_standardize,
hypothesis_transform=cer_standardize,
)
print("WER = %.4f" % wer)
print("CER = %.4f" % cer)
mer = jiwer.wer(mer_ref_lines, mer_hyp_lines,
truth_transform=wer_standardize,
hypothesis_transform=wer_standardize,
)
print("MER = %.4f" % mer)
#! /usr/bin/env bash
infer_file=$1
cut -f 3 $infer_file > ${infer_file}.ref
cut -f 4 $infer_file > ${infer_file}.hyp
cal_wer.py ${infer_file}.ref ${infer_file}.hyp
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
#! /usr/bin/env python3
import sys import sys
import csv import csv
......
#! /usr/bin/env python3
import string import string
import sys import sys
......
...@@ -6,6 +6,6 @@ item=$3 ...@@ -6,6 +6,6 @@ item=$3
tmp=$(mktemp -t temp.record.XXXXXX) tmp=$(mktemp -t temp.record.XXXXXX)
python3 extract_txt_from_tsv.py $in_tsv $tmp $item extract_txt_from_tsv.py $in_tsv $tmp $item
cat $tmp | python3 lcrm.py > $tmp.lcrm cat $tmp | lcrm.py > $tmp.lcrm
python3 replace_txt_from_tsv.py $in_tsv $out_tsv $tmp.lcrm $item replace_txt_from_tsv.py $in_tsv $out_tsv $tmp.lcrm $item
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
#! /usr/bin/env python3
import sys import sys
import csv import csv
import pandas as pd import pandas as pd
......
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论