Commit cf64b587 by xuchen

script

parent f7a9b1a0
......@@ -145,6 +145,9 @@ def save_checkpoint(cfg: CheckpointConfig, trainer, epoch_itr, val_loss):
for old_chk in checkpoints[cfg.keep_best_checkpoints:]:
if os.path.lexists(old_chk):
os.remove(old_chk)
if os.path.exists(os.path.join(cfg.save_dir, "stop")):
logger.error("Encounter Stop Flag. Exit.")
exit()
def load_checkpoint(cfg: CheckpointConfig, trainer, **passthrough_args):
......
import string
import sys
punctuation_str = string.punctuation
punctuation_str = punctuation_str.replace("'", "")
user_input = sys.stdin.readlines()
for line in user_input:
line = line.strip().lower()
for w in punctuation_str:
line = line .replace(w, "")
line = " ".join(line.split(" "))
print(line)
set -e
in_tsv=$1
out_tsv=$2
item=$3
tmp=$(mktemp -t temp.record.XXXXXX)
python3 extract_txt_from_tsv.py $in_tsv $tmp $item
cat $tmp | python3 lcrm.py > $tmp.lcrm
python3 replace_txt_from_tsv.py $in_tsv $out_tsv $tmp.lcrm $item
dir=/xuchen/st/data/must_c/en-$1
org_dir=$dir/st_tok.bak/
replace_dir=$dir/st_tok/
out_dir=$dir/st_tok/
item=audio
cp $org_dir/spm* $org_dir/config* $out_dir
sed -i "s#/mnt/bn/nas-xc-1#/xuchen/st#g" $out_dir/config*
tsv=train.tsv
./replace_tsv.sh $org_dir/$tsv $replace_dir/$tsv $out_dir/$tsv $item
tsv=dev.tsv
./replace_tsv.sh $org_dir/$tsv $replace_dir/$tsv $out_dir/$tsv $item
tsv=tst-COMMON.tsv
./replace_tsv.sh $org_dir/$tsv $replace_dir/$tsv $out_dir/$tsv $item
set -e
org_tsv=$1
replace_tsv=$2
out_tsv=$3
item=$4
tmp=$(mktemp -t temp.record.XXXXXX)
python3 extract_txt_from_tsv.py $replace_tsv $tmp $item
python3 replace_txt_from_tsv.py $org_tsv $out_tsv $tmp $item
import sys
import csv
import pandas as pd
tsv_file = sys.argv[1]
out_file = sys.argv[2]
replace_file = sys.argv[3]
replace_item = sys.argv[4]
fr = open(replace_file, "r", encoding="utf-8")
replace_lines = fr.readlines()
idx = 0
with open(tsv_file) as f:
reader = csv.DictReader(
f,
delimiter="\t",
quotechar=None,
doublequote=False,
lineterminator="\n",
quoting=csv.QUOTE_NONE,
)
samples = [dict(e) for e in reader]
for s in samples:
if replace_item in s:
s[replace_item] = replace_lines[idx].strip()
idx += 1
else:
print("Item %s Error in sample: " % replace_item)
print(s)
exit()
df = pd.DataFrame.from_dict(samples)
df.to_csv(
out_file,
sep="\t",
header=True,
index=False,
encoding="utf-8",
escapechar="\\",
quoting=csv.QUOTE_NONE,
)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论