Merge pull request #148 from yt605155624/add_typehint
add sclite test for text_frontend
This commit is contained in:
commit
c4615e3bba
|
@ -11,6 +11,10 @@ For text normalization, the test data is `data/textnorm_test_cases.txt`, we use
|
||||||
|
|
||||||
We use `CER` as evaluation criterion.
|
We use `CER` as evaluation criterion.
|
||||||
## Start
|
## Start
|
||||||
|
If you want to use sclite to get more detail information of WER, you should run the command below to make sclite first.
|
||||||
|
```bash
|
||||||
|
./make_sclite.sh
|
||||||
|
```
|
||||||
Run the command below to get the results of test.
|
Run the command below to get the results of test.
|
||||||
```bash
|
```bash
|
||||||
./run.sh
|
./run.sh
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
if [ ! -d "./SCTK" ];then
|
||||||
|
echo "Clone SCTK ..."
|
||||||
|
git clone https://github.com/usnistgov/SCTK
|
||||||
|
echo "Clone SCTK done!"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -d "./SCTK/bin" ];then
|
||||||
|
echo "Start make SCTK ..."
|
||||||
|
pushd SCTK && make config && make all && make check && make install && make doc && popd
|
||||||
|
echo "SCTK make done!"
|
||||||
|
fi
|
|
@ -1,14 +1,25 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
|
USE_SCLITE=true
|
||||||
|
|
||||||
# test g2p
|
# test g2p
|
||||||
echo "Start get g2p test data."
|
echo "Start get g2p test data ..."
|
||||||
python3 get_g2p_data.py --root-dir=~/datasets/BZNSYP --output-dir=data/g2p
|
python3 get_g2p_data.py --root-dir=~/datasets/BZNSYP --output-dir=data/g2p
|
||||||
echo "Start test g2p."
|
echo "Start test g2p ..."
|
||||||
python3 test_g2p.py --input-dir=data/g2p --output-dir=exp/g2p
|
python3 test_g2p.py --input-dir=data/g2p --output-dir=exp/g2p
|
||||||
|
|
||||||
# test text normalization
|
# test text normalization
|
||||||
echo "Start get text normalization test data."
|
echo "Start get text normalization test data ..."
|
||||||
python3 get_textnorm_data.py --test-file=data/textnorm_test_cases.txt --output-dir=data/textnorm
|
python3 get_textnorm_data.py --test-file=data/textnorm_test_cases.txt --output-dir=data/textnorm
|
||||||
echo "Start test text normalization."
|
echo "Start test text normalization ..."
|
||||||
python3 test_textnorm.py --input-dir=data/textnorm --output-dir=exp/textnorm
|
python3 test_textnorm.py --input-dir=data/textnorm --output-dir=exp/textnorm
|
||||||
|
|
||||||
|
# whether use sclite to get more detail information of WER
|
||||||
|
if [ "$USE_SCLITE" = true ];then
|
||||||
|
echo "Start sclite g2p ..."
|
||||||
|
./SCTK/bin/sclite -i wsj -r ./exp/g2p/text.ref.clean trn -h ./exp/g2p/text.g2p trn -e utf-8 -o all
|
||||||
|
echo
|
||||||
|
|
||||||
|
echo "Start sclite textnorm ..."
|
||||||
|
./SCTK/bin/sclite -i wsj -r ./exp/textnorm/text.ref.clean trn -h ./exp/textnorm/text.tn trn -e utf-8 -o all
|
||||||
|
fi
|
|
@ -47,8 +47,8 @@ def get_avg_wer(raw_dict, ref_dict, frontend, output_dir):
|
||||||
gt_phones = [phn for phn in gt_phones if phn not in SILENCE_TOKENS]
|
gt_phones = [phn for phn in gt_phones if phn not in SILENCE_TOKENS]
|
||||||
gt_phones = " ".join(gt_phones)
|
gt_phones = " ".join(gt_phones)
|
||||||
g2p_phones = " ".join(g2p_phones)
|
g2p_phones = " ".join(g2p_phones)
|
||||||
wf_ref.write(utt_id + " " + gt_phones + "\n")
|
wf_ref.write(gt_phones + "(baker_" + utt_id + ")" + "\n")
|
||||||
wf_g2p.write(utt_id + " " + g2p_phones + "\n")
|
wf_g2p.write(g2p_phones + "(baker_" + utt_id + ")" + "\n")
|
||||||
edit_distance, ref_len = word_errors(gt_phones, g2p_phones)
|
edit_distance, ref_len = word_errors(gt_phones, g2p_phones)
|
||||||
edit_distances.append(edit_distance)
|
edit_distances.append(edit_distance)
|
||||||
ref_lens.append(ref_len)
|
ref_lens.append(ref_len)
|
||||||
|
|
|
@ -43,8 +43,8 @@ def get_avg_cer(raw_dict, ref_dict, text_normalizer, output_dir):
|
||||||
|
|
||||||
gt_text = del_en_add_space(gt_text)
|
gt_text = del_en_add_space(gt_text)
|
||||||
textnorm_text = del_en_add_space(textnorm_text)
|
textnorm_text = del_en_add_space(textnorm_text)
|
||||||
wf_ref.write(text_id + " " + gt_text + "\n")
|
wf_ref.write(gt_text + "(" + text_id + ")" + "\n")
|
||||||
wf_tn.write(text_id + " " + textnorm_text + "\n")
|
wf_tn.write(textnorm_text + "(" + text_id + ")" + "\n")
|
||||||
edit_distance, ref_len = char_errors(gt_text, textnorm_text)
|
edit_distance, ref_len = char_errors(gt_text, textnorm_text)
|
||||||
edit_distances.append(edit_distance)
|
edit_distances.append(edit_distance)
|
||||||
ref_lens.append(ref_len)
|
ref_lens.append(ref_len)
|
||||||
|
|
Loading…
Reference in New Issue