提交 25ff2242 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!2664 [MD]move mindrecord transform scripts to utils directory of model zoo.

Merge pull request !2664 from liyong126/mindrecord_model_zoo
......@@ -42,7 +42,7 @@ MINDRECORD_PATH=`pwd`/data_mr
rm -f $MINDRECORD_PATH/*
cd ../../../example/graph_to_mindrecord || exit
cd ../../utils/graph_to_mindrecord || exit
python writer.py --mindrecord_script $DATASET_NAME \
--mindrecord_file "$MINDRECORD_PATH/$DATASET_NAME" \
......
......@@ -43,7 +43,7 @@ MINDRECORD_PATH=`pwd`/data_mr
rm -f $MINDRECORD_PATH/$DATASET_NAME
rm -f $MINDRECORD_PATH/$DATASET_NAME.db
cd ../../../example/graph_to_mindrecord || exit
cd ../../utils/graph_to_mindrecord || exit
python writer.py --mindrecord_script $DATASET_NAME \
--mindrecord_file "$MINDRECORD_PATH/$DATASET_NAME" \
......
......@@ -17,24 +17,24 @@
rm -f output/train.mindrecord*
rm -f output/dev.mindrecord*
if [ ! -d "../../../third_party/to_mindrecord/CLUERNER2020" ]; then
echo "The patch base dir ../../../third_party/to_mindrecord/CLUERNER2020 is not exist."
if [ ! -d "../../../../third_party/to_mindrecord/CLUERNER2020" ]; then
echo "The patch base dir ../../../../third_party/to_mindrecord/CLUERNER2020 is not exist."
exit 1
fi
if [ ! -f "../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch" ]; then
echo "The patch file ../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch is not exist."
if [ ! -f "../../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch" ]; then
echo "The patch file ../../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch is not exist."
exit 1
fi
# patch for data_processor_seq.py
patch -p0 -d ../../../third_party/to_mindrecord/CLUERNER2020/ -o data_processor_seq_patched.py < ../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch
patch -p0 -d ../../../../third_party/to_mindrecord/CLUERNER2020/ -o data_processor_seq_patched.py < ../../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch
if [ $? -ne 0 ]; then
echo "Patch ../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq.py failed"
echo "Patch ../../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq.py failed"
exit 1
fi
# use patched script
python ../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq_patched.py \
--vocab_file=../../../third_party/to_mindrecord/CLUERNER2020/vocab.txt \
--label2id_file=../../../third_party/to_mindrecord/CLUERNER2020/label2id.json
python ../../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq_patched.py \
--vocab_file=../../../../third_party/to_mindrecord/CLUERNER2020/vocab.txt \
--label2id_file=../../../../third_party/to_mindrecord/CLUERNER2020/label2id.json
......@@ -66,20 +66,20 @@ getdir "${data_dir}"
# echo "The input files: "${file_list[@]}
# echo "The output files: "${output_filename[@]}
if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then
echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist."
if [ ! -d "../../../../third_party/to_mindrecord/zhwiki" ]; then
echo "The patch base dir ../../../../third_party/to_mindrecord/zhwiki is not exist."
exit 1
fi
if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
if [ ! -f "../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
echo "The patch file ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
exit 1
fi
# patch for create_pretraining_data.py
patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
patch -p0 -d ../../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
if [ $? -ne 0 ]; then
echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
echo "Patch ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
exit 1
fi
......@@ -94,7 +94,7 @@ file_list_len=`expr ${#file_list[*]} - 1`
for index in $(seq 0 $file_list_len); do
echo "Begin preprocess input file: ${file_list[$index]}"
echo "Begin output file: ${output_filename[$index]}"
python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
python ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
--input_file=${file_list[$index]} \
--output_file=${output_dir}/${output_filename[$index]} \
--partition_number=1 \
......
......@@ -26,7 +26,7 @@ This example is based on [zhwiki](https://dumps.wikimedia.org/zhwiki) training d
Follow the step:
```bash
bash run_simple.sh # generate output/simple.mindrecord* by ../../../third_party/to_mindrecord/zhwiki/sample_text.txt
bash run_simple.sh # generate output/simple.mindrecord* by ../../../../third_party/to_mindrecord/zhwiki/sample_text.txt
bash run_read_simple.sh # use MindDataset to read output/simple.mindrecord*
```
......
......@@ -45,20 +45,20 @@ getdir "${data_dir}"
# echo "The input files: "${file_list[@]}
# echo "The output files: "${output_filename[@]}
if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then
echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist."
if [ ! -d "../../../../third_party/to_mindrecord/zhwiki" ]; then
echo "The patch base dir ../../../../third_party/to_mindrecord/zhwiki is not exist."
exit 1
fi
if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
if [ ! -f "../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
echo "The patch file ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
exit 1
fi
# patch for create_pretraining_data.py
patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
patch -p0 -d ../../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
if [ $? -ne 0 ]; then
echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
echo "Patch ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
exit 1
fi
......@@ -73,11 +73,11 @@ file_list_len=`expr ${#file_list[*]} - 1`
for index in $(seq 0 $file_list_len); do
echo "Begin preprocess input file: ${file_list[$index]}"
echo "Begin output file: ${output_filename[$index]}"
python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
python ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
--input_file=${file_list[$index]} \
--output_file=output/${output_filename[$index]} \
--partition_number=1 \
--vocab_file=../../../third_party/to_mindrecord/zhwiki/vocab.txt \
--vocab_file=../../../../third_party/to_mindrecord/zhwiki/vocab.txt \
--do_lower_case=True \
--max_seq_length=128 \
--max_predictions_per_seq=20 \
......
......@@ -15,4 +15,4 @@
# ============================================================================
# create dataset for train
python create_dataset.py --input_file=output/simple.mindrecord0
python create_dataset.py --input_file=output/simple.mindrecord
......@@ -16,29 +16,29 @@
rm -f output/simple.mindrecord*
if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then
echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist."
if [ ! -d "../../../../third_party/to_mindrecord/zhwiki" ]; then
echo "The patch base dir ../../../../third_party/to_mindrecord/zhwiki is not exist."
exit 1
fi
if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
if [ ! -f "../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
echo "The patch file ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
exit 1
fi
# patch for create_pretraining_data.py
patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
patch -p0 -d ../../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
if [ $? -ne 0 ]; then
echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
echo "Patch ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
exit 1
fi
# using patched script to generate mindrecord
python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
--input_file=../../../third_party/to_mindrecord/zhwiki/sample_text.txt \
python ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
--input_file=../../../../third_party/to_mindrecord/zhwiki/sample_text.txt \
--output_file=output/simple.mindrecord \
--partition_number=4 \
--vocab_file=../../../third_party/to_mindrecord/zhwiki/vocab.txt \
--partition_number=1 \
--vocab_file=../../../../third_party/to_mindrecord/zhwiki/vocab.txt \
--do_lower_case=True \
--max_seq_length=128 \
--max_predictions_per_seq=20 \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册