From 7afbdbefadd6b249e0560d21afa47c7a33a9ab6f Mon Sep 17 00:00:00 2001 From: xiongxinlei Date: Tue, 24 May 2022 21:29:36 +0800 Subject: [PATCH] update the vector model, test=doc --- demos/audio_content_search/requirements.txt | 1 + .../streaming_asr_server.py | 38 +++++ demos/speaker_verification/README.md | 159 +++++++++--------- demos/speaker_verification/README_cn.md | 158 ++++++++--------- docs/source/released_model.md | 2 +- examples/voxceleb/sv0/README.md | 6 +- examples/voxceleb/sv0/RESULT.md | 1 + examples/voxceleb/sv0/conf/ecapa_tdnn.yaml | 8 + .../voxceleb/sv0/conf/ecapa_tdnn_small.yaml | 7 + paddlespeech/cli/vector/pretrained_models.py | 4 +- .../server/engine/acs/python/acs_engine.py | 56 ++++-- .../server/tests/vector/http_client.py | 59 +++++++ 12 files changed, 324 insertions(+), 175 deletions(-) create mode 100644 demos/audio_content_search/requirements.txt create mode 100644 demos/audio_content_search/streaming_asr_server.py create mode 100644 paddlespeech/server/tests/vector/http_client.py diff --git a/demos/audio_content_search/requirements.txt b/demos/audio_content_search/requirements.txt new file mode 100644 index 00000000..4126a486 --- /dev/null +++ b/demos/audio_content_search/requirements.txt @@ -0,0 +1 @@ +websocket-client \ No newline at end of file diff --git a/demos/audio_content_search/streaming_asr_server.py b/demos/audio_content_search/streaming_asr_server.py new file mode 100644 index 00000000..011b009a --- /dev/null +++ b/demos/audio_content_search/streaming_asr_server.py @@ -0,0 +1,38 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse + +from paddlespeech.cli.log import logger +from paddlespeech.server.bin.paddlespeech_server import ServerExecutor +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog='paddlespeech_server.start', add_help=True) + parser.add_argument( + "--config_file", + action="store", + help="yaml file of the app", + default=None, + required=True) + + parser.add_argument( + "--log_file", + action="store", + help="log file", + default="./log/paddlespeech.log") + logger.info("start to parse the args") + args = parser.parse_args() + + logger.info("start to launch the streaming asr server") + streaming_asr_server = ServerExecutor() + streaming_asr_server(config_file=args.config_file, log_file=args.log_file) diff --git a/demos/speaker_verification/README.md b/demos/speaker_verification/README.md index b6a1d9bc..a7d0f819 100644 --- a/demos/speaker_verification/README.md +++ b/demos/speaker_verification/README.md @@ -53,50 +53,49 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav Output: ```bash - demo [ 1.4217498 5.626253 -5.342073 1.1773866 3.308055 - 1.756596 5.167894 10.80636 -3.8226728 -5.6141334 - 2.623845 -0.8072968 1.9635103 -7.3128724 0.01103897 - -9.723131 0.6619743 -6.976803 10.213478 7.494748 - 2.9105635 3.8949256 3.7999806 7.1061673 16.905321 - -7.1493764 8.733103 3.4230042 -4.831653 -11.403367 - 11.232214 7.1274667 -4.2828417 2.452362 -5.130748 - -18.177666 -2.6116815 -11.000337 -6.7314315 1.6564683 - 0.7618269 1.1253023 -2.083836 4.725744 -8.782597 - -3.539873 3.814236 5.1420674 2.162061 4.096431 - -6.4162116 12.747448 1.9429878 -15.152943 6.417416 - 16.097002 -9.716668 -1.9920526 -3.3649497 -1.871939 - 11.567354 3.69788 11.258265 7.442363 9.183411 - 4.5281515 -1.2417862 4.3959084 6.6727695 5.8898783 - 7.627124 -0.66919386 -11.889693 -9.208865 -7.4274073 - -3.7776625 6.917234 -9.848748 -2.0944717 -5.135116 - 0.49563864 9.317534 -5.9141874 -1.8098574 -0.11738578 - -7.169265 -1.0578263 -5.7216787 -5.1173844 16.137651 - -4.473626 7.6624317 -0.55381083 9.631587 -6.4704556 - -8.548508 4.3716145 -0.79702514 4.478997 -2.9758704 - 3.272176 2.8382776 5.134597 -9.190781 -0.5657382 - -4.8745747 2.3165567 -5.984303 -2.1798875 0.35541576 - -0.31784213 9.493548 2.1144536 4.358092 -12.089823 - 8.451689 -7.925461 4.6242585 4.4289427 18.692003 - -2.6204622 -5.149185 -0.35821092 8.488551 4.981496 - -9.32683 -2.2544234 6.6417594 1.2119585 10.977129 - 16.555033 3.3238444 9.551863 -1.6676947 -0.79539716 - -8.605674 -0.47356385 2.6741948 -5.359179 -2.6673796 - 0.66607 15.443222 4.740594 -3.4725387 11.592567 - -2.054497 1.7361217 -8.265324 -9.30447 5.4068313 - -1.5180256 -7.746615 -6.089606 0.07112726 -0.34904733 - -8.649895 -9.998958 -2.564841 -0.53999114 2.601808 - -0.31927416 -1.8815292 -2.07215 -3.4105783 -8.2998085 - 1.483641 -15.365992 -8.288208 3.8847756 -3.4876456 - 7.3629923 0.4657332 3.132599 12.438889 -1.8337058 - 4.532936 2.7264361 10.145339 -6.521951 2.897153 - -3.3925855 5.079156 7.759716 4.677565 5.8457737 - 2.402413 7.7071047 3.9711342 -6.390043 6.1268735 - -3.7760346 -11.118123 ] + demo [ -1.3251206 7.8606825 -4.620626 0.3000721 2.2648535 + -1.1931441 3.0647137 7.673595 -6.0044727 -12.02426 + -1.9496069 3.1269536 1.618838 -7.6383104 -1.2299773 + -12.338331 2.1373026 -5.3957124 9.717328 5.6752305 + 3.7805123 3.0597172 3.429692 8.97601 13.174125 + -0.53132284 8.9424715 4.46511 -4.4262476 -9.726503 + 8.399328 7.2239175 -7.435854 2.9441683 -4.3430395 + -13.886965 -1.6346735 -10.9027405 -5.311245 3.8007221 + 3.8976038 -2.1230774 -2.3521194 4.151031 -7.4048667 + 0.13911647 2.4626107 4.9664545 0.9897574 5.4839754 + -3.3574002 10.1340065 -0.6120171 -10.403095 4.6007543 + 16.00935 -7.7836914 -4.1945305 -6.9368606 1.1789556 + 11.490801 4.2380238 9.550931 8.375046 7.5089145 + -0.65707296 -0.30051577 2.8406055 3.0828028 0.730817 + 6.148354 0.13766119 -13.424735 -7.7461405 -2.3227983 + -8.305252 2.9879124 -10.995229 0.15211068 -2.3820348 + -1.7984174 8.495629 -5.8522367 -3.755498 0.6989711 + -5.2702994 -2.6188622 -1.8828466 -4.64665 14.078544 + -0.5495333 10.579158 -3.2160501 9.349004 -4.381078 + -11.675817 -2.8630207 4.5721755 2.246612 -4.574342 + 1.8610188 2.3767874 5.6257877 -9.784078 0.64967257 + -1.4579505 0.4263264 -4.9211264 -2.454784 3.4869802 + -0.42654222 8.341269 1.356552 7.0966883 -13.102829 + 8.016734 -7.1159344 1.8699781 0.208721 14.699384 + -1.025278 -2.6107233 -2.5082312 8.427193 6.9138527 + -6.2912464 0.6157366 2.489688 -3.4668267 9.921763 + 11.200815 -0.1966403 7.4916005 -0.62312716 -0.25848144 + -9.947997 -0.9611041 1.1649219 -2.1907122 -1.5028487 + -0.51926106 15.165954 2.4649463 -0.9980445 7.4416637 + -2.0768049 3.5896823 -7.3055434 -7.5620847 4.323335 + 0.0804418 -6.56401 -2.3148053 -1.7642345 -2.4708817 + -7.675618 -9.548878 -1.0177554 0.16986446 2.5877135 + -1.8752296 -0.36614323 -6.0493784 -2.3965611 -5.9453387 + 0.9424033 -13.155974 -7.457801 0.14658108 -3.742797 + 5.8414927 -1.2872906 5.5694313 12.57059 1.0939219 + 2.2142086 1.9181576 6.9914207 -5.888139 3.1409824 + -2.003628 2.4434285 9.973139 5.03668 2.0051203 + 2.8615603 5.860224 2.9176188 -1.6311141 2.0292206 + -4.070415 -6.831437 ] ``` - Python API ```python - import paddle from paddlespeech.cli import VectorExecutor vector_executor = VectorExecutor() @@ -169,47 +168,47 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav -3.7760346 -11.118123 ] # get the test embedding Test embedding Result: - [ -1.902964 2.0690894 -8.034194 3.5472693 0.18089125 - 6.9085927 1.4097427 -1.9487704 -10.021278 -0.20755845 - -8.04332 4.344489 2.3200977 -14.306299 5.184692 - -11.55602 -3.8497238 0.6444722 1.2833948 2.6766639 - 0.5878921 0.7946299 1.7207596 2.5791872 14.998469 - -1.3385371 15.031221 -0.8006958 1.99287 -9.52007 - 2.435466 4.003221 -4.33817 -4.898601 -5.304714 - -18.033886 10.790787 -12.784645 -5.641755 2.9761686 - -10.566622 1.4839455 6.152458 -5.7195854 2.8603241 - 6.112133 8.489869 5.5958056 1.2836679 -1.2293907 - 0.89927405 7.0288725 -2.854029 -0.9782962 5.8255906 - 14.905906 -5.025907 0.7866458 -4.2444224 -16.354029 - 10.521315 0.9604709 -3.3257897 7.144871 -13.592733 - -8.568869 -1.7953678 0.26313916 10.916714 -6.9374123 - 1.857403 -6.2746415 2.8154466 -7.2338667 -2.293357 - -0.05452765 5.4287076 5.0849075 -6.690375 -1.6183422 - 3.654291 0.94352573 -9.200294 -5.4749465 -3.5235846 - 1.3420814 4.240421 -2.772944 -2.8451524 16.311104 - 4.2969875 -1.762936 -12.5758915 8.595198 -0.8835239 - -1.5708797 1.568961 1.1413603 3.5032008 -0.45251232 - -6.786333 16.89443 5.3366146 -8.789056 0.6355629 - 3.2579517 -3.328322 7.5969577 0.66025066 -6.550468 - -9.148656 2.020372 -0.4615173 1.1965656 -3.8764873 - 11.6562195 -6.0750933 12.182899 3.2218833 0.81969476 - 5.570001 -3.8459578 -7.205299 7.9262037 -7.6611166 - -5.249467 -2.2671914 7.2658715 -13.298164 4.821147 - -2.7263982 11.691089 -3.8918593 -2.838112 -1.0336838 - -3.8034165 2.8536487 -5.60398 -1.1972581 1.3455094 - -3.4903061 2.2408795 5.5010734 -3.970756 11.99696 - -7.8858757 0.43160373 -5.5059714 4.3426995 16.322706 - 11.635366 0.72157705 -9.245714 -3.91465 -4.449838 - -1.5716927 7.713747 -2.2430465 -6.198303 -13.481864 - 2.8156567 -5.7812386 5.1456156 2.7289324 -14.505571 - 13.270688 3.448231 -7.0659585 4.5886116 -4.466099 - -0.296428 -11.463529 -2.6076477 14.110243 -6.9725137 - -1.9962958 2.7119343 19.391657 0.01961198 14.607133 - -1.6695905 -4.391516 1.3131028 -6.670972 -5.888604 - 12.0612335 5.9285784 3.3715196 1.492534 10.723728 - -0.95514804 -12.085431 ] + [ 2.5247195 5.119042 -4.335273 4.4583654 5.047907 + 3.5059214 1.6159848 0.49364898 -11.6899185 -3.1014526 + -5.6589785 -0.42684984 2.674276 -11.937654 6.2248464 + -10.776924 -5.694543 1.112041 1.5709964 1.0961034 + 1.3976512 2.324352 1.339981 5.279319 13.734659 + -2.5753925 13.651442 -2.2357535 5.1575427 -3.251567 + 1.4023279 6.1191974 -6.0845175 -1.3646189 -2.6789894 + -15.220778 9.779349 -9.411551 -6.388947 6.8313975 + -9.245996 0.31196198 2.5509644 -4.413065 6.1649427 + 6.793837 2.6328635 8.620976 3.4832475 0.52491665 + 2.9115407 5.8392377 0.6702376 -3.2726715 2.6694255 + 16.91701 -5.5811176 0.23362345 -4.5573606 -11.801059 + 14.728292 -0.5198082 -3.999922 7.0927105 -7.0459595 + -5.4389 -0.46420583 -5.1085467 10.376568 -8.889225 + -0.37705845 -1.659806 2.6731026 -7.1909504 1.4608804 + -2.163136 -0.17949677 4.0241547 0.11319201 0.601279 + 2.039692 3.1910992 -11.649526 -8.121584 -4.8707457 + 0.3851982 1.4231744 -2.3321972 0.99332285 14.121717 + 5.899413 0.7384519 -17.760096 10.555021 4.1366534 + -0.3391071 -0.20792882 3.208204 0.8847948 -8.721497 + -6.432868 13.006379 4.8956 -9.155822 -1.9441519 + 5.7815638 -2.066733 10.425042 -0.8802383 -2.4314315 + -9.869258 0.35095334 -5.3549943 2.1076174 -8.290468 + 8.4433365 -4.689333 9.334139 -2.172678 -3.0250976 + 8.394216 -3.2110903 -7.93868 2.3960824 -2.3213403 + -1.4963245 -3.476059 4.132903 -10.893354 4.362673 + -0.45456508 10.258634 -1.1655927 -6.7799754 0.22885278 + -4.399287 2.333433 -4.84745 -4.2752337 -1.3577863 + -1.0685898 9.505196 7.3062205 0.08708266 12.927811 + -9.57974 1.3936648 -1.9444873 5.776769 15.251903 + 10.6118355 -1.4903594 -9.535318 -3.6553776 -1.6699586 + -0.5933151 7.600357 -4.8815503 -8.698617 -15.855757 + 0.25632986 -7.2235737 0.9506656 0.7128582 -9.051738 + 8.74869 -1.6426028 -6.5762258 2.506905 -6.7431564 + 5.129912 -12.189555 -3.6435068 12.068113 -6.0059533 + -2.3535995 2.9014351 22.3082 -1.5563312 13.193291 + 2.7583609 -7.468798 1.3407065 -4.599617 -6.2345777 + 10.7689295 7.137627 5.099476 0.3473359 9.647881 + -2.0484571 -5.8549366 ] # get the score between enroll and test - Eembeddings Score: 0.4292638301849365 + Eembeddings Score: 0.45332613587379456 ``` ### 4.Pretrained Models diff --git a/demos/speaker_verification/README_cn.md b/demos/speaker_verification/README_cn.md index 90bba38a..04e1aeec 100644 --- a/demos/speaker_verification/README_cn.md +++ b/demos/speaker_verification/README_cn.md @@ -51,45 +51,45 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav 输出: ```bash - demo [ 1.4217498 5.626253 -5.342073 1.1773866 3.308055 - 1.756596 5.167894 10.80636 -3.8226728 -5.6141334 - 2.623845 -0.8072968 1.9635103 -7.3128724 0.01103897 - -9.723131 0.6619743 -6.976803 10.213478 7.494748 - 2.9105635 3.8949256 3.7999806 7.1061673 16.905321 - -7.1493764 8.733103 3.4230042 -4.831653 -11.403367 - 11.232214 7.1274667 -4.2828417 2.452362 -5.130748 - -18.177666 -2.6116815 -11.000337 -6.7314315 1.6564683 - 0.7618269 1.1253023 -2.083836 4.725744 -8.782597 - -3.539873 3.814236 5.1420674 2.162061 4.096431 - -6.4162116 12.747448 1.9429878 -15.152943 6.417416 - 16.097002 -9.716668 -1.9920526 -3.3649497 -1.871939 - 11.567354 3.69788 11.258265 7.442363 9.183411 - 4.5281515 -1.2417862 4.3959084 6.6727695 5.8898783 - 7.627124 -0.66919386 -11.889693 -9.208865 -7.4274073 - -3.7776625 6.917234 -9.848748 -2.0944717 -5.135116 - 0.49563864 9.317534 -5.9141874 -1.8098574 -0.11738578 - -7.169265 -1.0578263 -5.7216787 -5.1173844 16.137651 - -4.473626 7.6624317 -0.55381083 9.631587 -6.4704556 - -8.548508 4.3716145 -0.79702514 4.478997 -2.9758704 - 3.272176 2.8382776 5.134597 -9.190781 -0.5657382 - -4.8745747 2.3165567 -5.984303 -2.1798875 0.35541576 - -0.31784213 9.493548 2.1144536 4.358092 -12.089823 - 8.451689 -7.925461 4.6242585 4.4289427 18.692003 - -2.6204622 -5.149185 -0.35821092 8.488551 4.981496 - -9.32683 -2.2544234 6.6417594 1.2119585 10.977129 - 16.555033 3.3238444 9.551863 -1.6676947 -0.79539716 - -8.605674 -0.47356385 2.6741948 -5.359179 -2.6673796 - 0.66607 15.443222 4.740594 -3.4725387 11.592567 - -2.054497 1.7361217 -8.265324 -9.30447 5.4068313 - -1.5180256 -7.746615 -6.089606 0.07112726 -0.34904733 - -8.649895 -9.998958 -2.564841 -0.53999114 2.601808 - -0.31927416 -1.8815292 -2.07215 -3.4105783 -8.2998085 - 1.483641 -15.365992 -8.288208 3.8847756 -3.4876456 - 7.3629923 0.4657332 3.132599 12.438889 -1.8337058 - 4.532936 2.7264361 10.145339 -6.521951 2.897153 - -3.3925855 5.079156 7.759716 4.677565 5.8457737 - 2.402413 7.7071047 3.9711342 -6.390043 6.1268735 - -3.7760346 -11.118123 ] + [ -1.3251206 7.8606825 -4.620626 0.3000721 2.2648535 + -1.1931441 3.0647137 7.673595 -6.0044727 -12.02426 + -1.9496069 3.1269536 1.618838 -7.6383104 -1.2299773 + -12.338331 2.1373026 -5.3957124 9.717328 5.6752305 + 3.7805123 3.0597172 3.429692 8.97601 13.174125 + -0.53132284 8.9424715 4.46511 -4.4262476 -9.726503 + 8.399328 7.2239175 -7.435854 2.9441683 -4.3430395 + -13.886965 -1.6346735 -10.9027405 -5.311245 3.8007221 + 3.8976038 -2.1230774 -2.3521194 4.151031 -7.4048667 + 0.13911647 2.4626107 4.9664545 0.9897574 5.4839754 + -3.3574002 10.1340065 -0.6120171 -10.403095 4.6007543 + 16.00935 -7.7836914 -4.1945305 -6.9368606 1.1789556 + 11.490801 4.2380238 9.550931 8.375046 7.5089145 + -0.65707296 -0.30051577 2.8406055 3.0828028 0.730817 + 6.148354 0.13766119 -13.424735 -7.7461405 -2.3227983 + -8.305252 2.9879124 -10.995229 0.15211068 -2.3820348 + -1.7984174 8.495629 -5.8522367 -3.755498 0.6989711 + -5.2702994 -2.6188622 -1.8828466 -4.64665 14.078544 + -0.5495333 10.579158 -3.2160501 9.349004 -4.381078 + -11.675817 -2.8630207 4.5721755 2.246612 -4.574342 + 1.8610188 2.3767874 5.6257877 -9.784078 0.64967257 + -1.4579505 0.4263264 -4.9211264 -2.454784 3.4869802 + -0.42654222 8.341269 1.356552 7.0966883 -13.102829 + 8.016734 -7.1159344 1.8699781 0.208721 14.699384 + -1.025278 -2.6107233 -2.5082312 8.427193 6.9138527 + -6.2912464 0.6157366 2.489688 -3.4668267 9.921763 + 11.200815 -0.1966403 7.4916005 -0.62312716 -0.25848144 + -9.947997 -0.9611041 1.1649219 -2.1907122 -1.5028487 + -0.51926106 15.165954 2.4649463 -0.9980445 7.4416637 + -2.0768049 3.5896823 -7.3055434 -7.5620847 4.323335 + 0.0804418 -6.56401 -2.3148053 -1.7642345 -2.4708817 + -7.675618 -9.548878 -1.0177554 0.16986446 2.5877135 + -1.8752296 -0.36614323 -6.0493784 -2.3965611 -5.9453387 + 0.9424033 -13.155974 -7.457801 0.14658108 -3.742797 + 5.8414927 -1.2872906 5.5694313 12.57059 1.0939219 + 2.2142086 1.9181576 6.9914207 -5.888139 3.1409824 + -2.003628 2.4434285 9.973139 5.03668 2.0051203 + 2.8615603 5.860224 2.9176188 -1.6311141 2.0292206 + -4.070415 -6.831437 ] ``` - Python API @@ -166,47 +166,47 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav -3.7760346 -11.118123 ] # get the test embedding Test embedding Result: - [ -1.902964 2.0690894 -8.034194 3.5472693 0.18089125 - 6.9085927 1.4097427 -1.9487704 -10.021278 -0.20755845 - -8.04332 4.344489 2.3200977 -14.306299 5.184692 - -11.55602 -3.8497238 0.6444722 1.2833948 2.6766639 - 0.5878921 0.7946299 1.7207596 2.5791872 14.998469 - -1.3385371 15.031221 -0.8006958 1.99287 -9.52007 - 2.435466 4.003221 -4.33817 -4.898601 -5.304714 - -18.033886 10.790787 -12.784645 -5.641755 2.9761686 - -10.566622 1.4839455 6.152458 -5.7195854 2.8603241 - 6.112133 8.489869 5.5958056 1.2836679 -1.2293907 - 0.89927405 7.0288725 -2.854029 -0.9782962 5.8255906 - 14.905906 -5.025907 0.7866458 -4.2444224 -16.354029 - 10.521315 0.9604709 -3.3257897 7.144871 -13.592733 - -8.568869 -1.7953678 0.26313916 10.916714 -6.9374123 - 1.857403 -6.2746415 2.8154466 -7.2338667 -2.293357 - -0.05452765 5.4287076 5.0849075 -6.690375 -1.6183422 - 3.654291 0.94352573 -9.200294 -5.4749465 -3.5235846 - 1.3420814 4.240421 -2.772944 -2.8451524 16.311104 - 4.2969875 -1.762936 -12.5758915 8.595198 -0.8835239 - -1.5708797 1.568961 1.1413603 3.5032008 -0.45251232 - -6.786333 16.89443 5.3366146 -8.789056 0.6355629 - 3.2579517 -3.328322 7.5969577 0.66025066 -6.550468 - -9.148656 2.020372 -0.4615173 1.1965656 -3.8764873 - 11.6562195 -6.0750933 12.182899 3.2218833 0.81969476 - 5.570001 -3.8459578 -7.205299 7.9262037 -7.6611166 - -5.249467 -2.2671914 7.2658715 -13.298164 4.821147 - -2.7263982 11.691089 -3.8918593 -2.838112 -1.0336838 - -3.8034165 2.8536487 -5.60398 -1.1972581 1.3455094 - -3.4903061 2.2408795 5.5010734 -3.970756 11.99696 - -7.8858757 0.43160373 -5.5059714 4.3426995 16.322706 - 11.635366 0.72157705 -9.245714 -3.91465 -4.449838 - -1.5716927 7.713747 -2.2430465 -6.198303 -13.481864 - 2.8156567 -5.7812386 5.1456156 2.7289324 -14.505571 - 13.270688 3.448231 -7.0659585 4.5886116 -4.466099 - -0.296428 -11.463529 -2.6076477 14.110243 -6.9725137 - -1.9962958 2.7119343 19.391657 0.01961198 14.607133 - -1.6695905 -4.391516 1.3131028 -6.670972 -5.888604 - 12.0612335 5.9285784 3.3715196 1.492534 10.723728 - -0.95514804 -12.085431 ] + [ 2.5247195 5.119042 -4.335273 4.4583654 5.047907 + 3.5059214 1.6159848 0.49364898 -11.6899185 -3.1014526 + -5.6589785 -0.42684984 2.674276 -11.937654 6.2248464 + -10.776924 -5.694543 1.112041 1.5709964 1.0961034 + 1.3976512 2.324352 1.339981 5.279319 13.734659 + -2.5753925 13.651442 -2.2357535 5.1575427 -3.251567 + 1.4023279 6.1191974 -6.0845175 -1.3646189 -2.6789894 + -15.220778 9.779349 -9.411551 -6.388947 6.8313975 + -9.245996 0.31196198 2.5509644 -4.413065 6.1649427 + 6.793837 2.6328635 8.620976 3.4832475 0.52491665 + 2.9115407 5.8392377 0.6702376 -3.2726715 2.6694255 + 16.91701 -5.5811176 0.23362345 -4.5573606 -11.801059 + 14.728292 -0.5198082 -3.999922 7.0927105 -7.0459595 + -5.4389 -0.46420583 -5.1085467 10.376568 -8.889225 + -0.37705845 -1.659806 2.6731026 -7.1909504 1.4608804 + -2.163136 -0.17949677 4.0241547 0.11319201 0.601279 + 2.039692 3.1910992 -11.649526 -8.121584 -4.8707457 + 0.3851982 1.4231744 -2.3321972 0.99332285 14.121717 + 5.899413 0.7384519 -17.760096 10.555021 4.1366534 + -0.3391071 -0.20792882 3.208204 0.8847948 -8.721497 + -6.432868 13.006379 4.8956 -9.155822 -1.9441519 + 5.7815638 -2.066733 10.425042 -0.8802383 -2.4314315 + -9.869258 0.35095334 -5.3549943 2.1076174 -8.290468 + 8.4433365 -4.689333 9.334139 -2.172678 -3.0250976 + 8.394216 -3.2110903 -7.93868 2.3960824 -2.3213403 + -1.4963245 -3.476059 4.132903 -10.893354 4.362673 + -0.45456508 10.258634 -1.1655927 -6.7799754 0.22885278 + -4.399287 2.333433 -4.84745 -4.2752337 -1.3577863 + -1.0685898 9.505196 7.3062205 0.08708266 12.927811 + -9.57974 1.3936648 -1.9444873 5.776769 15.251903 + 10.6118355 -1.4903594 -9.535318 -3.6553776 -1.6699586 + -0.5933151 7.600357 -4.8815503 -8.698617 -15.855757 + 0.25632986 -7.2235737 0.9506656 0.7128582 -9.051738 + 8.74869 -1.6426028 -6.5762258 2.506905 -6.7431564 + 5.129912 -12.189555 -3.6435068 12.068113 -6.0059533 + -2.3535995 2.9014351 22.3082 -1.5563312 13.193291 + 2.7583609 -7.468798 1.3407065 -4.599617 -6.2345777 + 10.7689295 7.137627 5.099476 0.3473359 9.647881 + -2.0484571 -5.8549366 ] # get the score between enroll and test - Eembeddings Score: 0.4292638301849365 + Eembeddings Score: 0.45332613587379456 ``` ### 4.预训练模型 diff --git a/docs/source/released_model.md b/docs/source/released_model.md index 74435ae1..3231fecd 100644 --- a/docs/source/released_model.md +++ b/docs/source/released_model.md @@ -82,7 +82,7 @@ PANN | ESC-50 |[pann-esc50](../../examples/esc50/cls0)|[esc50_cnn6.tar.gz](https Model Type | Dataset| Example Link | Pretrained Models | Static Models :-------------:| :------------:| :-----: | :-----: | :-----: -PANN | VoxCeleb| [voxceleb_ecapatdnn](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/voxceleb/sv0) | [ecapatdnn.tar.gz](https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_0.tar.gz) | - +PANN | VoxCeleb| [voxceleb_ecapatdnn](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/voxceleb/sv0) | [ecapatdnn.tar.gz](https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1.tar.gz) | - ## Punctuation Restoration Models Model Type | Dataset| Example Link | Pretrained Models diff --git a/examples/voxceleb/sv0/README.md b/examples/voxceleb/sv0/README.md index 418102b4..26c95aca 100644 --- a/examples/voxceleb/sv0/README.md +++ b/examples/voxceleb/sv0/README.md @@ -141,11 +141,11 @@ using the `tar` scripts to unpack the model and then you can use the script to t For example: ``` -wget https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_0.tar.gz -tar -xvf sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_0.tar.gz +wget https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1.tar.gz +tar -xvf sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1.tar.gz source path.sh # If you have processed the data and get the manifest file, you can skip the following 2 steps -CUDA_VISIBLE_DEVICES= bash ./local/test.sh ./data sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_2/model/ conf/ecapa_tdnn.yaml +CUDA_VISIBLE_DEVICES= bash ./local/test.sh ./data sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1/model/ conf/ecapa_tdnn.yaml ``` The performance of the released models are shown in [this](./RESULTS.md) diff --git a/examples/voxceleb/sv0/RESULT.md b/examples/voxceleb/sv0/RESULT.md index 3a3f67d0..a1d2a181 100644 --- a/examples/voxceleb/sv0/RESULT.md +++ b/examples/voxceleb/sv0/RESULT.md @@ -5,3 +5,4 @@ | Model | Number of Params | Release | Config | dim | Test set | Cosine | Cosine + S-Norm | | --- | --- | --- | --- | --- | --- | --- | ---- | | ECAPA-TDNN | 85M | 0.2.0 | conf/ecapa_tdnn.yaml |192 | test | 1.02 | 0.95 | +| ECAPA-TDNN | 85M | 0.2.1 | conf/ecapa_tdnn.yaml | 192 | test | 0.8188 | 0.7815| diff --git a/examples/voxceleb/sv0/conf/ecapa_tdnn.yaml b/examples/voxceleb/sv0/conf/ecapa_tdnn.yaml index 3e3a1307..b7b71d77 100644 --- a/examples/voxceleb/sv0/conf/ecapa_tdnn.yaml +++ b/examples/voxceleb/sv0/conf/ecapa_tdnn.yaml @@ -59,3 +59,11 @@ global_embedding_norm: True embedding_mean_norm: True embedding_std_norm: False +########################################### +# score-norm # +########################################### +score_norm: s-norm +cohort_size: 20000 # amount of imposter utterances in normalization cohort +n_train_snts: 400000 # used for normalization stats + + diff --git a/examples/voxceleb/sv0/conf/ecapa_tdnn_small.yaml b/examples/voxceleb/sv0/conf/ecapa_tdnn_small.yaml index 5925e573..40498c87 100644 --- a/examples/voxceleb/sv0/conf/ecapa_tdnn_small.yaml +++ b/examples/voxceleb/sv0/conf/ecapa_tdnn_small.yaml @@ -58,3 +58,10 @@ global_embedding_norm: True embedding_mean_norm: True embedding_std_norm: False +########################################### +# score-norm # +########################################### +score_norm: s-norm +cohort_size: 20000 # amount of imposter utterances in normalization cohort +n_train_snts: 400000 # used for normalization stats + diff --git a/paddlespeech/cli/vector/pretrained_models.py b/paddlespeech/cli/vector/pretrained_models.py index 686a22d8..4d1d3a04 100644 --- a/paddlespeech/cli/vector/pretrained_models.py +++ b/paddlespeech/cli/vector/pretrained_models.py @@ -19,9 +19,9 @@ pretrained_models = { # "paddlespeech vector --task spk --model ecapatdnn_voxceleb12-16k --sr 16000 --input ./input.wav" "ecapatdnn_voxceleb12-16k": { 'url': - 'https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_0.tar.gz', + 'https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1.tar.gz', 'md5': - 'cc33023c54ab346cd318408f43fcaf95', + '67c7ff8885d5246bd16e0f5ac1cba99f', 'cfg_path': 'conf/model.yaml', # the yaml config path 'ckpt_path': diff --git a/paddlespeech/server/engine/acs/python/acs_engine.py b/paddlespeech/server/engine/acs/python/acs_engine.py index 30deeeb5..d52852dc 100644 --- a/paddlespeech/server/engine/acs/python/acs_engine.py +++ b/paddlespeech/server/engine/acs/python/acs_engine.py @@ -16,6 +16,7 @@ import json import os import re +import numpy as np import paddle import soundfile import websocket @@ -44,11 +45,7 @@ class ACSEngine(BaseEngine): logger.info("Init the acs engine") try: self.config = config - if self.config.device: - self.device = self.config.device - else: - self.device = paddle.get_device() - + self.device = self.config.get("device", paddle.get_device()) paddle.set_device(self.device) logger.info(f"ACS Engine set the device: {self.device}") @@ -116,11 +113,17 @@ class ACSEngine(BaseEngine): logger.info("client receive msg={}".format(msg)) # send the total audio data - samples, sample_rate = soundfile.read(audio_data, dtype='int16') - ws.send_binary(samples.tobytes()) - msg = ws.recv() - msg = json.loads(msg) - logger.info(f"audio result: {msg}") + for chunk_data in self.read_wave(audio_data): + ws.send_binary(chunk_data.tobytes()) + msg = ws.recv() + msg = json.loads(msg) + logger.info(f"audio result: {msg}") + # samples, sample_rate = soundfile.read(audio_data, dtype='int16') + + # ws.send_binary(samples.tobytes()) + # msg = ws.recv() + # msg = json.loads(msg) + # logger.info(f"audio result: {msg}") # 3. send chunk audio data to engine logger.info("send the end signal") @@ -142,6 +145,39 @@ class ACSEngine(BaseEngine): return msg + def read_wave(self, audio_data: str): + """read the audio file from specific wavfile path + + Args: + audio_data (str): the audio data, + we assume that audio sample rate matches the model + + Yields: + numpy.array: the samall package audio pcm data + """ + samples, sample_rate = soundfile.read(audio_data, dtype='int16') + x_len = len(samples) + assert sample_rate == 16000 + + chunk_size = int(85 * sample_rate / 1000) # 85ms, sample_rate = 16kHz + + if x_len % chunk_size != 0: + padding_len_x = chunk_size - x_len % chunk_size + else: + padding_len_x = 0 + + padding = np.zeros((padding_len_x), dtype=samples.dtype) + padded_x = np.concatenate([samples, padding], axis=0) + + assert (x_len + padding_len_x) % chunk_size == 0 + num_chunk = (x_len + padding_len_x) / chunk_size + num_chunk = int(num_chunk) + for i in range(0, num_chunk): + start = i * chunk_size + end = start + chunk_size + x_chunk = padded_x[start:end] + yield x_chunk + def get_macthed_word(self, msg): """Get the matched info in msg diff --git a/paddlespeech/server/tests/vector/http_client.py b/paddlespeech/server/tests/vector/http_client.py new file mode 100644 index 00000000..49f2adf7 --- /dev/null +++ b/paddlespeech/server/tests/vector/http_client.py @@ -0,0 +1,59 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the +import base64 +import json +import time + +import requests + + +def readwav2base64(wav_file): + """ + read wave file and covert to base64 string + """ + with open(wav_file, 'rb') as f: + base64_bytes = base64.b64encode(f.read()) + base64_string = base64_bytes.decode('utf-8') + return base64_string + + +def main(): + """ + main func + """ + url = "http://127.0.0.1:8090/paddlespeech/asr" + + # start Timestamp + time_start = time.time() + + test_audio_dir = "./16_audio.wav" + audio = readwav2base64(test_audio_dir) + + data = { + "audio": audio, + "audio_format": "wav", + "sample_rate": 16000, + "lang": "zh_cn", + } + + r = requests.post(url=url, data=json.dumps(data)) + + # ending Timestamp + time_end = time.time() + print('time cost', time_end - time_start, 's') + + print(r.json()) + + +if __name__ == "__main__": + main() -- GitLab