update the shell scripts

9452b069 · xuchen · a598692d · 9452b069 · 9452b069 · 9452b069
Commit 9452b069 authored Jul 19, 2022 by xuchen
--- a/egs/aishell/asr/local/cal_ctc_bleu.sh
+++ b/egs/aishell/asr/local/cal_ctc_bleu.sh
+set -e
+
+infer_dir=$1
+tag=$2
+s2s_infer_file=${infer_dir}/$3
+org_ctc_infer_file=${infer_dir}/$4
+ref=$5
+tokenizer=$6
+lang=$7
+
+idx=${infer_dir}/${tag}_idx
+ctc_infer=${infer_dir}/${tag}_ctc_infer
+ctc_infer_sort=${infer_dir}/${tag}_ctc_infer_sort
+
+if [[ ! -f ${ctc_infer_sort} ]]; then
+    cut -f1 ${s2s_infer_file} > ${idx}
+    paste ${idx} ${org_ctc_infer_file} > ${ctc_infer}
+    sort -n -t $'\t' ${ctc_infer} | cut -f2 > ${ctc_infer_sort}
+fi
+
+gen=${ctc_infer_sort}
+./cal_bleu.sh ${ref} ${gen} ${tokenizer} ${lang}
\ No newline at end of file
--- a/egs/aishell/asr/local/cal_wer.py
+++ b/egs/aishell/asr/local/cal_wer.py
+
+
+import unicodedata
+import jiwer
+import jiwer.transforms as tr
+import sys
+
+ref_file = sys.argv[1]
+hyp_file = sys.argv[2]
+
+
+wer_standardize = tr.Compose(
+    [
+        tr.SubstituteRegexes({r"<<unk>>": r"@"}),
+        tr.ToLowerCase(),
+        tr.RemovePunctuation(),
+        tr.ExpandCommonEnglishContractions(),
+        tr.RemoveKaldiNonWords(),
+        tr.RemoveWhiteSpace(replace_by_space=True),
+        tr.ReduceToListOfListOfWords(),
+    ]
+)
+cer_standardize = tr.Compose(
+    [
+        tr.SubstituteRegexes({r"<<unk>>": r"@"}),
+        tr.ToLowerCase(),
+        tr.RemovePunctuation(),
+        tr.Strip(),
+        tr.ReduceToListOfListOfChars(),
+    ]
+)
+
+
+ref_lines = open(ref_file, "r").readlines()
+hyp_lines = open(hyp_file, "r").readlines()
+
+wer = jiwer.wer(ref_lines, hyp_lines,
+                truth_transform=wer_standardize,
+                hypothesis_transform=wer_standardize,
+                )
+cer = jiwer.cer(ref_lines, hyp_lines,
+                truth_transform=cer_standardize,
+                hypothesis_transform=cer_standardize,
+                )
+
+print("WER: %.4f" % wer)
+print("CER: %.4f" % cer)
--- a/egs/aishell/asr/local/cal_wer.sh
+++ b/egs/aishell/asr/local/cal_wer.sh
+set -e
+
+infer_dir=$1
+tag=$2
+s2s_infer_file=${infer_dir}/$3
+org_ctc_infer_file=${infer_dir}/$4
+ref=$5
+
+idx=${infer_dir}/${tag}_idx
+ctc_infer=${infer_dir}/${tag}_ctc_infer
+ctc_infer_sort=${infer_dir}/${tag}_ctc_infer_sort
+
+cut -f1 ${s2s_infer_file} > ${idx}
+paste ${idx} ${org_ctc_infer_file} > ${ctc_infer}
+sort -n -t $'\t' ${ctc_infer} | cut -f2 > ${ctc_infer_sort}
+python3 ./cal_wer.py ${ref} ${ctc_infer_sort}
\ No newline at end of file