提交 6abc5d9f 编写于 作者: H Hui Zhang

format

上级 854b63b5
......@@ -61,6 +61,7 @@ def remove_version_py(filename='paddleaudio/__init__.py'):
if "__version__" not in line:
f.write(line)
remove_version_py()
write_version_py()
......
......@@ -192,7 +192,7 @@ class ConfigCache:
try:
cfg = yaml.load(file, Loader=yaml.FullLoader)
self._data.update(cfg)
except:
except Exception as e:
self.flush()
@property
......
......@@ -174,7 +174,7 @@ class ServerStatsExecutor():
"Failed to get the table of TTS pretrained models supported in the service."
)
return False
elif self.task == 'cls':
try:
from paddlespeech.cli.cls.infer import pretrained_models
......
......@@ -13,4 +13,4 @@ Example to play `decoder`:
```
pushd decoder
bash run.sh
```
\ No newline at end of file
```
......@@ -164,8 +164,8 @@ int main(int argc, char* argv[]) {
// test feature linear_spectorgram: wave --> decibel_normalizer --> hanning
// window -->linear_spectrogram --> cmvn
int32 num_done = 0, num_err = 0;
//std::unique_ptr<ppspeech::FeatureExtractorInterface> data_source(new
//ppspeech::RawDataCache());
// std::unique_ptr<ppspeech::FeatureExtractorInterface> data_source(new
// ppspeech::RawDataCache());
std::unique_ptr<ppspeech::FeatureExtractorInterface> data_source(
new ppspeech::RawAudioCache());
......
......@@ -52,14 +52,14 @@ CTCBeamSearch::CTCBeamSearch(const CTCBeamSearchOptions& opts)
}
void CTCBeamSearch::Reset() {
//num_frame_decoded_ = 0;
//ResetPrefixes();
// num_frame_decoded_ = 0;
// ResetPrefixes();
InitDecoder();
}
void CTCBeamSearch::InitDecoder() {
num_frame_decoded_ = 0;
//ResetPrefixes();
// ResetPrefixes();
prefixes_.clear();
root_ = std::make_shared<PathTrie>();
......
......@@ -29,8 +29,7 @@ FeatureCache::FeatureCache(
base_extractor_ = std::move(base_extractor);
}
void FeatureCache::Accept(
const kaldi::VectorBase<kaldi::BaseFloat>& inputs) {
void FeatureCache::Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs) {
base_extractor_->Accept(inputs);
// feed current data
bool result = false;
......
......@@ -24,8 +24,7 @@ class FeatureCache : public FeatureExtractorInterface {
explicit FeatureCache(
int32 max_size = kint16max,
std::unique_ptr<FeatureExtractorInterface> base_extractor = NULL);
virtual void Accept(
const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
// feats dim = num_frames * feature_dim
virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats);
// feature cache only cache feature which from base extractor
......
......@@ -11,4 +11,3 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
......@@ -11,4 +11,3 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
......@@ -21,10 +21,9 @@ namespace ppspeech {
class FeatureExtractorInterface {
public:
// accept input data, accept feature or raw waves which decided
// accept input data, accept feature or raw waves which decided
// by the base_extractor
virtual void Accept(
const kaldi::VectorBase<kaldi::BaseFloat>& inputs) = 0;
virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs) = 0;
// get the processed result
// the length of output = feature_row * feature_dim,
// the Matrix is squashed into Vector
......
......@@ -57,8 +57,9 @@ bool LinearSpectrogram::Read(Vector<BaseFloat>* feats) {
if (flag == false || input_feats.Dim() == 0) return false;
vector<BaseFloat> input_feats_vec(input_feats.Dim());
std::memcpy(input_feats_vec.data(), input_feats.Data(),
input_feats.Dim()*sizeof(BaseFloat));
std::memcpy(input_feats_vec.data(),
input_feats.Data(),
input_feats.Dim() * sizeof(BaseFloat));
vector<vector<BaseFloat>> result;
Compute(input_feats_vec, result);
int32 feat_size = 0;
......@@ -86,10 +87,10 @@ bool LinearSpectrogram::NumpyFft(vector<BaseFloat>* v,
vector<BaseFloat>* img) const {
Vector<BaseFloat> v_tmp;
v_tmp.Resize(v->size());
std::memcpy(v_tmp.Data(), v->data(), sizeof(BaseFloat)*(v->size()));
std::memcpy(v_tmp.Data(), v->data(), sizeof(BaseFloat) * (v->size()));
RealFft(&v_tmp, true);
v->resize(v_tmp.Dim());
std::memcpy(v->data(), v_tmp.Data(), sizeof(BaseFloat)*(v->size()));
std::memcpy(v->data(), v_tmp.Data(), sizeof(BaseFloat) * (v->size()));
real->push_back(v->at(0));
img->push_back(0);
......
......@@ -38,16 +38,13 @@ class LinearSpectrogram : public FeatureExtractorInterface {
explicit LinearSpectrogram(
const LinearSpectrogramOptions& opts,
std::unique_ptr<FeatureExtractorInterface> base_extractor);
virtual void Accept(
const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats);
// the dim_ is the dim of single frame feature
virtual size_t Dim() const { return dim_; }
virtual void SetFinished() { base_extractor_->SetFinished(); }
virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
virtual void Reset() {
base_extractor_->Reset();
}
virtual void Reset() { base_extractor_->Reset(); }
private:
void Hanning(std::vector<kaldi::BaseFloat>* data) const;
......
......@@ -34,14 +34,12 @@ DecibelNormalizer::DecibelNormalizer(
dim_ = 1;
}
void DecibelNormalizer::Accept(
const kaldi::VectorBase<BaseFloat>& waves) {
void DecibelNormalizer::Accept(const kaldi::VectorBase<BaseFloat>& waves) {
base_extractor_->Accept(waves);
}
bool DecibelNormalizer::Read(kaldi::Vector<BaseFloat>* waves) {
if (base_extractor_->Read(waves) == false ||
waves->Dim() == 0) {
if (base_extractor_->Read(waves) == false || waves->Dim() == 0) {
return false;
}
Compute(waves);
......@@ -88,7 +86,8 @@ bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* waves) const {
item *= std::pow(10.0, gain / 20.0);
}
std::memcpy(waves->Data(), samples.data(), sizeof(BaseFloat)*samples.size());
std::memcpy(
waves->Data(), samples.data(), sizeof(BaseFloat) * samples.size());
return true;
}
......
......@@ -45,16 +45,13 @@ class DecibelNormalizer : public FeatureExtractorInterface {
explicit DecibelNormalizer(
const DecibelNormalizerOptions& opts,
std::unique_ptr<FeatureExtractorInterface> base_extractor);
virtual void Accept(
const kaldi::VectorBase<kaldi::BaseFloat>& waves);
virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& waves);
virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* waves);
// noramlize audio, the dim is 1.
virtual size_t Dim() const { return dim_; }
virtual void SetFinished() { base_extractor_->SetFinished(); }
virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
virtual void Reset() {
base_extractor_->Reset();
}
virtual void Reset() { base_extractor_->Reset(); }
private:
bool Compute(kaldi::VectorBase<kaldi::BaseFloat>* waves) const;
......@@ -69,8 +66,7 @@ class CMVN : public FeatureExtractorInterface {
public:
explicit CMVN(std::string cmvn_file,
std::unique_ptr<FeatureExtractorInterface> base_extractor);
virtual void Accept(
const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
// the length of feats = feature_row * feature_dim,
// the Matrix is squashed into Vector
......@@ -79,9 +75,7 @@ class CMVN : public FeatureExtractorInterface {
virtual size_t Dim() const { return dim_; }
virtual void SetFinished() { base_extractor_->SetFinished(); }
virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
virtual void Reset() {
base_extractor_->Reset();
}
virtual void Reset() { base_extractor_->Reset(); }
private:
void Compute(kaldi::VectorBase<kaldi::BaseFloat>* feats) const;
......
......@@ -32,7 +32,7 @@ void RawAudioCache::Accept(const VectorBase<BaseFloat>& waves) {
ready_feed_condition_.wait(lock);
}
for (size_t idx = 0; idx < waves.Dim(); ++idx) {
int32 buffer_idx = (idx + start_) % ring_buffer_.size();
int32 buffer_idx = (idx + start_) % ring_buffer_.size();
ring_buffer_[buffer_idx] = waves(idx);
}
data_length_ += waves.Dim();
......@@ -44,7 +44,8 @@ bool RawAudioCache::Read(Vector<BaseFloat>* waves) {
std::unique_lock<std::mutex> lock(mutex_);
while (chunk_size > data_length_) {
// when audio is empty and no more data feed
// ready_read_condition will block in dead lock. so replace with timeout_
// ready_read_condition will block in dead lock. so replace with
// timeout_
// ready_read_condition_.wait(lock);
int32 elapsed = static_cast<int32>(timer.Elapsed() * 1000);
if (elapsed > timeout_) {
......
......@@ -35,9 +35,9 @@ class RawAudioCache : public FeatureExtractorInterface {
}
virtual bool IsFinished() const { return finished_; }
virtual void Reset() {
start_ = 0;
data_length_ = 0;
finished_ = false;
start_ = 0;
data_length_ = 0;
finished_ = false;
}
private:
......@@ -72,9 +72,7 @@ class RawDataCache : public FeatureExtractorInterface {
virtual void SetFinished() { finished_ = true; }
virtual bool IsFinished() const { return finished_; }
void SetDim(int32 dim) { dim_ = dim; }
virtual void Reset() {
finished_ = true;
}
virtual void Reset() { finished_ = true; }
private:
kaldi::Vector<kaldi::BaseFloat> data_;
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// itf/decodable-itf.h
// Copyright 2009-2011 Microsoft Corporation; Saarland University;
......@@ -42,8 +56,10 @@ namespace kaldi {
For online decoding, where the features are coming in in real time, it is
important to understand the IsLastFrame() and NumFramesReady() functions.
There are two ways these are used: the old online-decoding code, in ../online/,
and the new online-decoding code, in ../online2/. In the old online-decoding
There are two ways these are used: the old online-decoding code, in
../online/,
and the new online-decoding code, in ../online2/. In the old
online-decoding
code, the decoder would do:
\code{.cc}
for (int frame = 0; !decodable.IsLastFrame(frame); frame++) {
......@@ -52,13 +68,16 @@ namespace kaldi {
\endcode
and the call to IsLastFrame would block if the features had not arrived yet.
The decodable object would have to know when to terminate the decoding. This
online-decoding mode is still supported, it is what happens when you call, for
online-decoding mode is still supported, it is what happens when you call,
for
example, LatticeFasterDecoder::Decode().
We realized that this "blocking" mode of decoding is not very convenient
because it forces the program to be multi-threaded and makes it complex to
control endpointing. In the "new" decoding code, you don't call (for example)
LatticeFasterDecoder::Decode(), you call LatticeFasterDecoder::InitDecoding(),
control endpointing. In the "new" decoding code, you don't call (for
example)
LatticeFasterDecoder::Decode(), you call
LatticeFasterDecoder::InitDecoding(),
and then each time you get more features, you provide them to the decodable
object, and you call LatticeFasterDecoder::AdvanceDecoding(), which does
something like this:
......@@ -68,7 +87,8 @@ namespace kaldi {
}
\endcode
So the decodable object never has IsLastFrame() called. For decoding where
you are starting with a matrix of features, the NumFramesReady() function will
you are starting with a matrix of features, the NumFramesReady() function
will
always just return the number of frames in the file, and IsLastFrame() will
return true for the last frame.
......@@ -80,45 +100,54 @@ namespace kaldi {
frame of the file once we've decided to terminate decoding.
*/
class DecodableInterface {
public:
/// Returns the log likelihood, which will be negated in the decoder.
/// The "frame" starts from zero. You should verify that NumFramesReady() > frame
/// before calling this.
virtual BaseFloat LogLikelihood(int32 frame, int32 index) = 0;
/// Returns true if this is the last frame. Frames are zero-based, so the
/// first frame is zero. IsLastFrame(-1) will return false, unless the file
/// is empty (which is a case that I'm not sure all the code will handle, so
/// be careful). Caution: the behavior of this function in an online setting
/// is being changed somewhat. In future it may return false in cases where
/// we haven't yet decided to terminate decoding, but later true if we decide
/// to terminate decoding. The plan in future is to rely more on
/// NumFramesReady(), and in future, IsLastFrame() would always return false
/// in an online-decoding setting, and would only return true in a
/// decoding-from-matrix setting where we want to allow the last delta or LDA
/// features to be flushed out for compatibility with the baseline setup.
virtual bool IsLastFrame(int32 frame) const = 0;
/// The call NumFramesReady() will return the number of frames currently available
/// for this decodable object. This is for use in setups where you don't want the
/// decoder to block while waiting for input. This is newly added as of Jan 2014,
/// and I hope, going forward, to rely on this mechanism more than IsLastFrame to
/// know when to stop decoding.
virtual int32 NumFramesReady() const {
KALDI_ERR << "NumFramesReady() not implemented for this decodable type.";
return -1;
}
/// Returns the number of states in the acoustic model
/// (they will be indexed one-based, i.e. from 1 to NumIndices();
/// this is for compatibility with OpenFst).
virtual int32 NumIndices() const = 0;
virtual bool FrameLogLikelihood(int32 frame,
std::vector<kaldi::BaseFloat>* likelihood) = 0;
virtual ~DecodableInterface() {}
public:
/// Returns the log likelihood, which will be negated in the decoder.
/// The "frame" starts from zero. You should verify that NumFramesReady() >
/// frame
/// before calling this.
virtual BaseFloat LogLikelihood(int32 frame, int32 index) = 0;
/// Returns true if this is the last frame. Frames are zero-based, so the
/// first frame is zero. IsLastFrame(-1) will return false, unless the file
/// is empty (which is a case that I'm not sure all the code will handle, so
/// be careful). Caution: the behavior of this function in an online
/// setting
/// is being changed somewhat. In future it may return false in cases where
/// we haven't yet decided to terminate decoding, but later true if we
/// decide
/// to terminate decoding. The plan in future is to rely more on
/// NumFramesReady(), and in future, IsLastFrame() would always return false
/// in an online-decoding setting, and would only return true in a
/// decoding-from-matrix setting where we want to allow the last delta or
/// LDA
/// features to be flushed out for compatibility with the baseline setup.
virtual bool IsLastFrame(int32 frame) const = 0;
/// The call NumFramesReady() will return the number of frames currently
/// available
/// for this decodable object. This is for use in setups where you don't
/// want the
/// decoder to block while waiting for input. This is newly added as of Jan
/// 2014,
/// and I hope, going forward, to rely on this mechanism more than
/// IsLastFrame to
/// know when to stop decoding.
virtual int32 NumFramesReady() const {
KALDI_ERR
<< "NumFramesReady() not implemented for this decodable type.";
return -1;
}
/// Returns the number of states in the acoustic model
/// (they will be indexed one-based, i.e. from 1 to NumIndices();
/// this is for compatibility with OpenFst).
virtual int32 NumIndices() const = 0;
virtual bool FrameLogLikelihood(
int32 frame, std::vector<kaldi::BaseFloat>* likelihood) = 0;
virtual ~DecodableInterface() {}
};
/// @}
} // namespace Kaldi
......
......@@ -23,10 +23,7 @@ using kaldi::Vector;
Decodable::Decodable(const std::shared_ptr<NnetInterface>& nnet,
const std::shared_ptr<FeatureExtractorInterface>& frontend)
: frontend_(frontend),
nnet_(nnet),
frame_offset_(0),
frames_ready_(0) {}
: frontend_(frontend), nnet_(nnet), frame_offset_(0), frames_ready_(0) {}
void Decodable::Acceptlikelihood(const Matrix<BaseFloat>& likelihood) {
frames_ready_ += likelihood.NumRows();
......@@ -83,7 +80,7 @@ void Decodable::Reset() {
frontend_->Reset();
nnet_->Reset();
frame_offset_ = 0;
frames_ready_ = 0;
frames_ready_ = 0;
}
} // namespace ppspeech
\ No newline at end of file
......@@ -148,7 +148,7 @@ def merge_configs(
for item in remove_train_list:
try:
remove_config_part(config, [item])
except:
except Exception as e:
print(item + " " + "can not be removed")
# Save the config
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册