diff --git a/examples/esc50/README.md b/examples/esc50/README.md index aa2838452847bfeab52cd51fa210c09a975c0ee0..66409754d464b7a674bc28050350fef6c77ec48a 100644 --- a/examples/esc50/README.md +++ b/examples/esc50/README.md @@ -28,7 +28,7 @@ PaddleAudio提供了PANNs的CNN14、CNN10和CNN6的预训练模型,可供用 $ CUDA_VISIBLE_DEVICES=0 ./run.sh 1 ``` -`paddlespeech/cls/exps/PANNs/train.py` 脚本中可支持配置的参数: +`paddlespeech/cls/exps/panns/train.py` 脚本中可支持配置的参数: - `device`: 指定模型预测时使用的设备。 - `feat_backend`: 选择提取特征的后端,可选`'numpy'`或`'paddle'`,默认为`'numpy'`。 @@ -65,7 +65,7 @@ model = SoundClassifier(backbone, num_class=len(ESC50.label_list)) $ CUDA_VISIBLE_DEVICES=0 ./run.sh 2 ``` -`paddlespeech/cls/exps/PANNs/predict.py` 脚本中可支持配置的参数: +`paddlespeech/cls/exps/panns/predict.py` 脚本中可支持配置的参数: - `device`: 指定模型预测时使用的设备。 - `wav`: 指定预测的音频文件。 @@ -91,7 +91,7 @@ Cat: 6.579841738130199e-06 $ CUDA_VISIBLE_DEVICES=0 ./run.sh 3 ``` -`paddlespeech/cls/exps/PANNs/export_model.py` 脚本中可支持配置的参数: +`paddlespeech/cls/exps/panns/export_model.py` 脚本中可支持配置的参数: - `checkpoint`: 模型参数checkpoint文件。 - `output_dir`: 导出静态图模型和参数文件的保存目录。 @@ -106,16 +106,13 @@ export #### 2. 模型部署和预测 -`paddlespeech/cls/exps/PANNs/deploy/predict.py` 脚本使用了`paddle.inference`模块下的api,提供了python端部署的示例: +`paddlespeech/cls/exps/panns/deploy/predict.py` 脚本使用了`paddle.inference`模块下的api,提供了python端部署的示例: ```shell -$ CUDA_VISIBLE_DEVICES=0 ./run.sh 3 -``` -```sh -python paddlespeech/cls/exps/PANNs/deploy/predict.py --model_dir ./export --device gpu +$ CUDA_VISIBLE_DEVICES=0 ./run.sh 4 ``` -`paddlespeech/cls/exps/PANNs/deploy/predict.py` 脚本中可支持配置的主要参数: +`paddlespeech/cls/exps/panns/deploy/predict.py` 脚本中可支持配置的主要参数: - `device`: 指定模型预测时使用的设备。 - `model_dir`: 导出静态图模型和参数文件的保存目录。 - `wav`: 指定预测的音频文件。 diff --git a/examples/esc50/cls0/local/infer.sh b/examples/esc50/cls0/local/infer.sh index 57fc157a45222a8a9350ab098ec7ed9b5efe2c63..bc03d68106e6123e0f7b4dfff45bb0d9182f28c1 100755 --- a/examples/esc50/cls0/local/infer.sh +++ b/examples/esc50/cls0/local/infer.sh @@ -1,13 +1,11 @@ #!/bin/bash -device=$1 -audio_file=$2 -ckpt_dir=$3 -feat_backend=$4 +audio_file=$1 +ckpt_dir=$2 +feat_backend=$3 python3 ${BIN_DIR}/predict.py \ ---device ${device} \ --wav ${audio_file} \ --feat_backend ${feat_backend} \ --top_k 10 \ ---checkpoint ${ckpt_dir}/model.pdparams \ No newline at end of file +--checkpoint ${ckpt_dir}/model.pdparams diff --git a/examples/esc50/cls0/local/static_model_infer.sh b/examples/esc50/cls0/local/static_model_infer.sh index ba4eeda47c11a09644fbc7ca5db1ed8745cb7f12..9b3abb5d75344d1ce19b029eae574e06704656c1 100755 --- a/examples/esc50/cls0/local/static_model_infer.sh +++ b/examples/esc50/cls0/local/static_model_infer.sh @@ -8,4 +8,3 @@ python3 ${BIN_DIR}/deploy/predict.py \ --device ${device} \ --model_dir ${model_dir} \ --wav ${audio_file} - diff --git a/examples/esc50/cls0/local/train.sh b/examples/esc50/cls0/local/train.sh index 194904723a0a14e8b4218ef43ed18ec8a11f6eb5..0f0f3d091a314fdadf39c27d4c35e9debaf0984c 100755 --- a/examples/esc50/cls0/local/train.sh +++ b/examples/esc50/cls0/local/train.sh @@ -1,15 +1,14 @@ #!/bin/bash ngpu=$1 -device=$2 -feat_backend=$3 +feat_backend=$2 num_epochs=50 batch_size=16 ckpt_dir=./checkpoint save_freq=10 -if [ ${ngpu} -gt 1 ]; then +if [ ${ngpu} -gt 0 ]; then python3 -m paddle.distributed.launch --gpus $CUDA_VISIBLE_DEVICES ${BIN_DIR}/train.py \ --epochs ${num_epochs} \ --feat_backend ${feat_backend} \ @@ -18,7 +17,6 @@ if [ ${ngpu} -gt 1 ]; then --save_freq ${save_freq} else python3 ${BIN_DIR}/train.py \ - --device ${device} \ --epochs ${num_epochs} \ --feat_backend ${feat_backend} \ --batch_size ${batch_size} \ diff --git a/examples/esc50/cls0/path.sh b/examples/esc50/cls0/path.sh index 2cc73e27ae5b77947547ade1c700e8591e665975..3eff28e48caca0cfa18173639c4ecc905b73d4fc 100644 --- a/examples/esc50/cls0/path.sh +++ b/examples/esc50/cls0/path.sh @@ -9,5 +9,5 @@ export PYTHONDONTWRITEBYTECODE=1 export PYTHONIOENCODING=UTF-8 export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH} -MODEL=PANNs +MODEL=panns export BIN_DIR=${MAIN_ROOT}/paddlespeech/cls/exps/${MODEL} \ No newline at end of file diff --git a/examples/esc50/cls0/run.sh b/examples/esc50/cls0/run.sh index 63ba99f42f04be6a74f1ca6d771972cb2843e808..7283aa8d7cf85c2acd5bede3eaf77ffc03f848f3 100755 --- a/examples/esc50/cls0/run.sh +++ b/examples/esc50/cls0/run.sh @@ -3,35 +3,31 @@ set -e source path.sh ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') -if [ ${ngpu} == 0 ];then - device=cpu -else - device=gpu -fi stage=$1 stop_stage=100 feat_backend=numpy +audio_file=~/cat.wav +ckpt_dir=./checkpoint/epoch_50 +output_dir=./export +infer_device=cpu if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then - ./local/train.sh ${ngpu} ${device} ${feat_backend} || exit -1 + ./local/train.sh ${ngpu} ${feat_backend} || exit -1 exit 0 fi -audio_file=~/cat.wav -ckpt_dir=./checkpoint/epoch_50 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then - ./local/infer.sh ${device} ${audio_file} ${ckpt_dir} ${feat_backend} || exit -1 + ./local/infer.sh ${audio_file} ${ckpt_dir} ${feat_backend} || exit -1 exit 0 fi -output_dir=./export if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then ./local/export.sh ${ckpt_dir} ${output_dir} || exit -1 exit 0 fi if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then - ./local/static_model_infer.sh ${device} ${output_dir} ${audio_file} || exit -1 + ./local/static_model_infer.sh ${infer_device} ${output_dir} ${audio_file} || exit -1 exit 0 fi diff --git a/paddlespeech/cls/exps/PANNs/__init__.py b/paddlespeech/cls/exps/panns/__init__.py similarity index 100% rename from paddlespeech/cls/exps/PANNs/__init__.py rename to paddlespeech/cls/exps/panns/__init__.py diff --git a/paddlespeech/cls/exps/PANNs/deploy/__init__.py b/paddlespeech/cls/exps/panns/deploy/__init__.py similarity index 100% rename from paddlespeech/cls/exps/PANNs/deploy/__init__.py rename to paddlespeech/cls/exps/panns/deploy/__init__.py diff --git a/paddlespeech/cls/exps/PANNs/deploy/predict.py b/paddlespeech/cls/exps/panns/deploy/predict.py similarity index 100% rename from paddlespeech/cls/exps/PANNs/deploy/predict.py rename to paddlespeech/cls/exps/panns/deploy/predict.py diff --git a/paddlespeech/cls/exps/PANNs/export_model.py b/paddlespeech/cls/exps/panns/export_model.py similarity index 100% rename from paddlespeech/cls/exps/PANNs/export_model.py rename to paddlespeech/cls/exps/panns/export_model.py diff --git a/paddlespeech/cls/exps/PANNs/predict.py b/paddlespeech/cls/exps/panns/predict.py similarity index 94% rename from paddlespeech/cls/exps/PANNs/predict.py rename to paddlespeech/cls/exps/panns/predict.py index 717b35edb7e8a6791c0643b1f651a468487d9264..9cfd8b6ce44ab7318a218a52bd1c2eaee28d680a 100644 --- a/paddlespeech/cls/exps/PANNs/predict.py +++ b/paddlespeech/cls/exps/panns/predict.py @@ -26,7 +26,6 @@ from paddlespeech.cls.models import SoundClassifier # yapf: disable parser = argparse.ArgumentParser(__doc__) -parser.add_argument('--device', choices=['cpu', 'gpu'], default="gpu", help="Select which device to predict, defaults to gpu.") parser.add_argument("--wav", type=str, required=True, help="Audio file to infer.") parser.add_argument("--feat_backend", type=str, choices=['numpy', 'paddle'], default='numpy', help="Choose backend to extract features from audio files.") parser.add_argument("--top_k", type=int, default=1, help="Show top k predicted results") @@ -51,7 +50,6 @@ def extract_features(file: str, feat_backend: str='numpy', if __name__ == '__main__': - paddle.set_device(args.device) model = SoundClassifier( backbone=cnn14(pretrained=False, extract_embedding=True), diff --git a/paddlespeech/cls/exps/PANNs/train.py b/paddlespeech/cls/exps/panns/train.py similarity index 97% rename from paddlespeech/cls/exps/PANNs/train.py rename to paddlespeech/cls/exps/panns/train.py index e66724b8a482faebc7e50c05973dfb3c2f9f2e4b..1213097899cee2594b5ecf4e91310c16b5f46841 100644 --- a/paddlespeech/cls/exps/PANNs/train.py +++ b/paddlespeech/cls/exps/panns/train.py @@ -25,7 +25,6 @@ from paddlespeech.cls.models import SoundClassifier # yapf: disable parser = argparse.ArgumentParser(__doc__) -parser.add_argument('--device', choices=['cpu', 'gpu'], default="gpu", help="Select which device to train model, defaults to gpu.") parser.add_argument("--epochs", type=int, default=50, help="Number of epoches for fine-tuning.") parser.add_argument("--feat_backend", type=str, choices=['numpy', 'paddle'], default='numpy', help="Choose backend to extract features from audio files.") parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.") @@ -38,7 +37,6 @@ args = parser.parse_args() # yapf: enable if __name__ == "__main__": - paddle.set_device(args.device) nranks = paddle.distributed.get_world_size() if paddle.distributed.get_world_size() > 1: paddle.distributed.init_parallel_env() diff --git a/paddlespeech/cls/models/__init__.py b/paddlespeech/cls/models/__init__.py index 66030b727590f95358bb3f93039085edc9f134bd..4bfadda11968aa2262d810941761dca6838a0d79 100644 --- a/paddlespeech/cls/models/__init__.py +++ b/paddlespeech/cls/models/__init__.py @@ -11,4 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .PANNs import * +from .panns import * diff --git a/paddlespeech/cls/models/PANNs/__init__.py b/paddlespeech/cls/models/panns/__init__.py similarity index 100% rename from paddlespeech/cls/models/PANNs/__init__.py rename to paddlespeech/cls/models/panns/__init__.py diff --git a/paddlespeech/cls/models/PANNs/classifier.py b/paddlespeech/cls/models/panns/classifier.py similarity index 100% rename from paddlespeech/cls/models/PANNs/classifier.py rename to paddlespeech/cls/models/panns/classifier.py diff --git a/paddlespeech/cls/models/PANNs/panns.py b/paddlespeech/cls/models/panns/panns.py similarity index 100% rename from paddlespeech/cls/models/PANNs/panns.py rename to paddlespeech/cls/models/panns/panns.py