未验证 提交 ee7c266f 编写于 作者: Y YangZhou 提交者: GitHub

[speechx] rm openblas && refactor kaldi-matrix, kaldi-vector (#2824)

* rm openblas && refactor kaldi-matrix kaldi-vector
上级 c1b1ae05
...@@ -53,9 +53,6 @@ include(gflags) ...@@ -53,9 +53,6 @@ include(gflags)
include(glog) include(glog)
#openblas
include(openblas)
# openfst # openfst
include(openfst) include(openfst)
add_dependencies(openfst gflags glog) add_dependencies(openfst gflags glog)
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
#include "decoder/ctc_prefix_beam_search_decoder.h" #include "decoder/ctc_prefix_beam_search_decoder.h"
#include "base/common.h" #include "base/common.h"
#include "frontend/audio/data_cache.h" #include "frontend/data_cache.h"
#include "fst/symbol-table.h" #include "fst/symbol-table.h"
#include "kaldi/util/table-types.h" #include "kaldi/util/table-types.h"
#include "nnet/decodable.h" #include "nnet/decodable.h"
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
#include "base/common.h" #include "base/common.h"
#include "kaldi/decoder/decodable-itf.h" #include "kaldi/decoder/decodable-itf.h"
#include "kaldi/matrix/kaldi-matrix.h" #include "matrix/kaldi-matrix.h"
#include "nnet/nnet_itf.h" #include "nnet/nnet_itf.h"
#include "nnet/nnet_producer.h" #include "nnet/nnet_producer.h"
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
#include "base/basic_types.h" #include "base/basic_types.h"
#include "kaldi/base/kaldi-types.h" #include "kaldi/base/kaldi-types.h"
#include "kaldi/matrix/kaldi-matrix.h"
#include "kaldi/util/options-itf.h" #include "kaldi/util/options-itf.h"
DECLARE_int32(subsampling_rate); DECLARE_int32(subsampling_rate);
......
...@@ -13,10 +13,10 @@ ...@@ -13,10 +13,10 @@
// limitations under the License. // limitations under the License.
#include "nnet/nnet_producer.h" #include "nnet/nnet_producer.h"
#include "matrix/kaldi-matrix.h"
namespace ppspeech { namespace ppspeech {
using kaldi::Vector;
using std::vector; using std::vector;
using kaldi::BaseFloat; using kaldi::BaseFloat;
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include "base/common.h" #include "base/common.h"
#include "base/safe_queue.h" #include "base/safe_queue.h"
#include "frontend/audio/frontend_itf.h" #include "frontend/frontend_itf.h"
#include "nnet/nnet_itf.h" #include "nnet/nnet_itf.h"
namespace ppspeech { namespace ppspeech {
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#pragma once #pragma once
#include "base/common.h" #include "base/common.h"
#include "kaldi/matrix/kaldi-matrix.h" #include "matrix/kaldi-matrix.h"
#include "nnet/nnet_itf.h" #include "nnet/nnet_itf.h"
#include "paddle/extension.h" #include "paddle/extension.h"
#include "paddle/jit/all.h" #include "paddle/jit/all.h"
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
#include "base/common.h" #include "base/common.h"
#include "decoder/param.h" #include "decoder/param.h"
#include "frontend/audio/assembler.h" #include "frontend/assembler.h"
#include "frontend/audio/data_cache.h" #include "frontend/data_cache.h"
#include "kaldi/util/table-types.h" #include "kaldi/util/table-types.h"
#include "nnet/decodable.h" #include "nnet/decodable.h"
#include "nnet/u2_nnet.h" #include "nnet/u2_nnet.h"
......
...@@ -15,7 +15,7 @@ set(TEST_BINS ...@@ -15,7 +15,7 @@ set(TEST_BINS
foreach(bin_name IN LISTS TEST_BINS) foreach(bin_name IN LISTS TEST_BINS)
add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc) add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi) target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
target_link_libraries(${bin_name} recognizer nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util kaldi-feat-common) target_link_libraries(${bin_name} recognizer nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util)
target_compile_options(${bin_name} PRIVATE ${PADDLE_COMPILE_FLAGS}) target_compile_options(${bin_name} PRIVATE ${PADDLE_COMPILE_FLAGS})
target_include_directories(${bin_name} PRIVATE ${pybind11_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR}) target_include_directories(${bin_name} PRIVATE ${pybind11_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR})
target_link_libraries(${bin_name} ${PYTHON_LIBRARIES} ${PADDLE_LINK_FLAGS}) target_link_libraries(${bin_name} ${PYTHON_LIBRARIES} ${PADDLE_LINK_FLAGS})
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#include "decoder/ctc_beam_search_opt.h" #include "decoder/ctc_beam_search_opt.h"
#include "decoder/ctc_prefix_beam_search_decoder.h" #include "decoder/ctc_prefix_beam_search_decoder.h"
#include "decoder/decoder_itf.h" #include "decoder/decoder_itf.h"
#include "frontend/audio/feature_pipeline.h" #include "frontend/feature_pipeline.h"
#include "fst/fstlib.h" #include "fst/fstlib.h"
#include "fst/symbol-table.h" #include "fst/symbol-table.h"
#include "nnet/decodable.h" #include "nnet/decodable.h"
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
// limitations under the License. // limitations under the License.
#include "decoder/param.h" #include "decoder/param.h"
#include "kaldi/feat/wave-reader.h" #include "frontend/wave-reader.h"
#include "kaldi/util/table-types.h" #include "kaldi/util/table-types.h"
#include "recognizer/u2_recognizer.h" #include "recognizer/u2_recognizer.h"
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
#include "recognizer/u2_recognizer.h" #include "recognizer/u2_recognizer.h"
#include "decoder/param.h" #include "decoder/param.h"
#include "kaldi/feat/wave-reader.h" #include "frontend/wave-reader.h"
#include "kaldi/util/table-types.h" #include "kaldi/util/table-types.h"
DEFINE_string(wav_rspecifier, "", "test feature rspecifier"); DEFINE_string(wav_rspecifier, "", "test feature rspecifier");
......
...@@ -4,6 +4,8 @@ ${CMAKE_CURRENT_SOURCE_DIR}/../ ...@@ -4,6 +4,8 @@ ${CMAKE_CURRENT_SOURCE_DIR}/../
) )
add_subdirectory(utils) add_subdirectory(utils)
add_subdirectory(matrix)
include_directories( include_directories(
${CMAKE_CURRENT_SOURCE_DIR}/frontend ${CMAKE_CURRENT_SOURCE_DIR}/frontend
) )
......
add_library(kaldi-native-fbank-core
feature-fbank.cc
feature-functions.cc
feature-window.cc
fftsg.c
mel-computations.cc
rfft.cc
)
add_subdirectory(audio) add_library(frontend STATIC
\ No newline at end of file cmvn.cc
audio_cache.cc
feature_cache.cc
feature_pipeline.cc
assembler.cc
wave-reader.cc
)
target_link_libraries(frontend PUBLIC kaldi-native-fbank-core utils)
set(BINS
compute_fbank_main
)
foreach(bin_name IN LISTS BINS)
add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
target_link_libraries(${bin_name} PUBLIC frontend utils kaldi-util gflags glog)
endforeach()
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "frontend/audio/assembler.h" #include "frontend/assembler.h"
namespace ppspeech { namespace ppspeech {
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#pragma once #pragma once
#include "base/common.h" #include "base/common.h"
#include "frontend/audio/frontend_itf.h" #include "frontend/frontend_itf.h"
namespace ppspeech { namespace ppspeech {
......
add_library(kaldi-native-fbank-core
feature-fbank.cc
feature-functions.cc
feature-window.cc
fftsg.c
mel-computations.cc
rfft.cc
)
add_library(frontend STATIC
cmvn.cc
audio_cache.cc
feature_cache.cc
feature_pipeline.cc
assembler.cc
)
target_link_libraries(frontend PUBLIC kaldi-native-fbank-core utils)
set(BINS
compute_fbank_main
)
foreach(bin_name IN LISTS BINS)
add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
target_link_libraries(${bin_name} PUBLIC frontend utils kaldi-util gflags glog kaldi-feat-common)
endforeach()
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "frontend/audio/audio_cache.h" #include "frontend/audio_cache.h"
#include "kaldi/base/timer.h" #include "kaldi/base/timer.h"
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#pragma once #pragma once
#include "base/common.h" #include "base/common.h"
#include "frontend/audio/frontend_itf.h" #include "frontend/frontend_itf.h"
namespace ppspeech { namespace ppspeech {
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
// limitations under the License. // limitations under the License.
#include "frontend/audio/cmvn.h" #include "frontend/cmvn.h"
#include "utils/file_utils.h" #include "utils/file_utils.h"
#include "utils/picojson.h" #include "utils/picojson.h"
......
...@@ -15,8 +15,7 @@ ...@@ -15,8 +15,7 @@
#pragma once #pragma once
#include "base/common.h" #include "base/common.h"
#include "frontend/audio/frontend_itf.h" #include "frontend/frontend_itf.h"
#include "kaldi/matrix/kaldi-matrix.h"
#include "kaldi/util/options-itf.h" #include "kaldi/util/options-itf.h"
namespace ppspeech { namespace ppspeech {
......
...@@ -16,13 +16,13 @@ ...@@ -16,13 +16,13 @@
#include "base/flags.h" #include "base/flags.h"
#include "base/log.h" #include "base/log.h"
#include "frontend/audio/audio_cache.h" #include "frontend/audio_cache.h"
#include "frontend/audio/data_cache.h" #include "frontend/data_cache.h"
#include "frontend/audio/fbank.h" #include "frontend/fbank.h"
#include "frontend/audio/feature_cache.h" #include "frontend/feature_cache.h"
#include "frontend/audio/frontend_itf.h" #include "frontend/frontend_itf.h"
#include "frontend/audio/normalizer.h" #include "frontend/normalizer.h"
#include "kaldi/feat/wave-reader.h" #include "frontend/wave-reader.h"
#include "kaldi/util/kaldi-io.h" #include "kaldi/util/kaldi-io.h"
#include "kaldi/util/table-types.h" #include "kaldi/util/table-types.h"
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#pragma once #pragma once
#include "base/common.h" #include "base/common.h"
#include "frontend/audio/frontend_itf.h" #include "frontend/frontend_itf.h"
using std::vector; using std::vector;
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
#pragma once #pragma once
#include "base/common.h" #include "base/common.h"
#include "frontend/audio/feature_common.h" #include "frontend/feature_common.h"
#include "frontend/audio/feature-fbank.h" #include "frontend/feature-fbank.h"
namespace ppspeech { namespace ppspeech {
......
...@@ -18,11 +18,11 @@ ...@@ -18,11 +18,11 @@
// This file is copied/modified from kaldi/src/feat/feature-fbank.cc // This file is copied/modified from kaldi/src/feat/feature-fbank.cc
// //
#include "frontend/audio/feature-fbank.h" #include "frontend/feature-fbank.h"
#include <cmath> #include <cmath>
#include "frontend/audio/feature-functions.h" #include "frontend/feature-functions.h"
namespace knf { namespace knf {
......
...@@ -23,9 +23,9 @@ ...@@ -23,9 +23,9 @@
#include <map> #include <map>
#include "frontend/audio/feature-window.h" #include "frontend/feature-window.h"
#include "frontend/audio/mel-computations.h" #include "frontend/mel-computations.h"
#include "frontend/audio/rfft.h" #include "frontend/rfft.h"
namespace knf { namespace knf {
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
// This file is copied/modified from kaldi/src/feat/feature-functions.cc // This file is copied/modified from kaldi/src/feat/feature-functions.cc
#include "frontend/audio/feature-functions.h" #include "frontend/feature-functions.h"
#include <cstdint> #include <cstdint>
#include <vector> #include <vector>
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
// This file is copied/modified from kaldi/src/feat/feature-window.cc // This file is copied/modified from kaldi/src/feat/feature-window.cc
#include "frontend/audio/feature-window.h" #include "frontend/feature-window.h"
#include <cmath> #include <cmath>
#include <vector> #include <vector>
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "frontend/audio/feature_cache.h" #include "frontend/feature_cache.h"
namespace ppspeech { namespace ppspeech {
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#pragma once #pragma once
#include "base/common.h" #include "base/common.h"
#include "frontend/audio/frontend_itf.h" #include "frontend/frontend_itf.h"
namespace ppspeech { namespace ppspeech {
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#pragma once #pragma once
#include "frontend_itf.h" #include "frontend_itf.h"
#include "frontend/audio/feature-window.h" #include "frontend/feature-window.h"
namespace ppspeech { namespace ppspeech {
...@@ -52,4 +52,4 @@ class StreamingFeatureTpl : public FrontendInterface { ...@@ -52,4 +52,4 @@ class StreamingFeatureTpl : public FrontendInterface {
} // namespace ppspeech } // namespace ppspeech
#include "frontend/audio/feature_common_inl.h" #include "frontend/feature_common_inl.h"
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "frontend/audio/feature_pipeline.h" #include "frontend/feature_pipeline.h"
namespace ppspeech { namespace ppspeech {
......
...@@ -16,13 +16,13 @@ ...@@ -16,13 +16,13 @@
#pragma once #pragma once
#include "frontend/audio/assembler.h" #include "frontend/assembler.h"
#include "frontend/audio/audio_cache.h" #include "frontend/audio_cache.h"
#include "frontend/audio/data_cache.h" #include "frontend/data_cache.h"
#include "frontend/audio/fbank.h" #include "frontend/fbank.h"
#include "frontend/audio/feature_cache.h" #include "frontend/feature_cache.h"
#include "frontend/audio/frontend_itf.h" #include "frontend/frontend_itf.h"
#include "frontend/audio/normalizer.h" #include "frontend/cmvn.h"
// feature // feature
DECLARE_bool(fill_zero); DECLARE_bool(fill_zero);
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#pragma once #pragma once
#include "base/basic_types.h" #include "base/basic_types.h"
#include "kaldi/matrix/kaldi-vector.h" #include "matrix/kaldi-vector.h"
namespace ppspeech { namespace ppspeech {
......
...@@ -18,12 +18,12 @@ ...@@ -18,12 +18,12 @@
// This file is copied/modified from kaldi/src/feat/mel-computations.cc // This file is copied/modified from kaldi/src/feat/mel-computations.cc
#include "frontend/audio/mel-computations.h" #include "frontend/mel-computations.h"
#include <algorithm> #include <algorithm>
#include <sstream> #include <sstream>
#include "frontend/audio/feature-window.h" #include "frontend/feature-window.h"
namespace knf { namespace knf {
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#include <cmath> #include <cmath>
#include <string> #include <string>
#include "frontend/audio/feature-window.h" #include "frontend/feature-window.h"
namespace knf { namespace knf {
......
...@@ -14,5 +14,4 @@ ...@@ -14,5 +14,4 @@
#pragma once #pragma once
#include "frontend/audio/cmvn.h" #include "frontend/cmvn.h"
#include "frontend/audio/db_norm.h" \ No newline at end of file
\ No newline at end of file
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
* limitations under the License. * limitations under the License.
*/ */
#include "frontend/audio/rfft.h" #include "frontend/rfft.h"
#include <cmath> #include <cmath>
#include <vector> #include <vector>
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#include <sstream> #include <sstream>
#include <vector> #include <vector>
#include "feat/wave-reader.h" #include "frontend/wave-reader.h"
#include "base/kaldi-error.h" #include "base/kaldi-error.h"
#include "base/kaldi-utils.h" #include "base/kaldi-utils.h"
......
add_library(kaldi-matrix
kaldi-matrix.cc
kaldi-vector.cc
)
target_link_libraries(kaldi-matrix kaldi-base)
...@@ -28,7 +28,7 @@ namespace kaldi { ...@@ -28,7 +28,7 @@ namespace kaldi {
template<typename Real> template<typename Real>
Matrix<Real>::Matrix(): MatrixBase<Real>(NULL, 0, 0, 0) { } Matrix<Real>::Matrix(): MatrixBase<Real>(NULL, 0, 0, 0) { }
/*
template<> template<>
template<> template<>
void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float> &ra, const VectorBase<float> &rb); void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float> &ra, const VectorBase<float> &rb);
...@@ -36,6 +36,7 @@ void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float> &ra ...@@ -36,6 +36,7 @@ void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float> &ra
template<> template<>
template<> template<>
void MatrixBase<double>::AddVecVec(const double alpha, const VectorBase<double> &ra, const VectorBase<double> &rb); void MatrixBase<double>::AddVecVec(const double alpha, const VectorBase<double> &ra, const VectorBase<double> &rb);
*/
template<typename Real> template<typename Real>
inline std::ostream & operator << (std::ostream & os, const MatrixBase<Real> & M) { inline std::ostream & operator << (std::ostream & os, const MatrixBase<Real> & M) {
......
...@@ -23,17 +23,9 @@ ...@@ -23,17 +23,9 @@
// limitations under the License. // limitations under the License.
#include "matrix/kaldi-matrix.h" #include "matrix/kaldi-matrix.h"
#include "matrix/sp-matrix.h"
#include "matrix/jama-svd.h"
#include "matrix/jama-eig.h"
#include "matrix/compressed-matrix.h"
#include "matrix/sparse-matrix.h"
static_assert(int(kaldi::kNoTrans) == int(CblasNoTrans) && int(kaldi::kTrans) == int(CblasTrans),
"kaldi::kNoTrans and kaldi::kTrans must be equal to the appropriate CBLAS library constants!");
namespace kaldi { namespace kaldi {
/*
template<typename Real> template<typename Real>
void MatrixBase<Real>::Invert(Real *log_det, Real *det_sign, void MatrixBase<Real>::Invert(Real *log_det, Real *det_sign,
bool inverse_needed) { bool inverse_needed) {
...@@ -206,29 +198,30 @@ void MatrixBase<Real>::SetMatMatDivMat(const MatrixBase<Real>& A, ...@@ -206,29 +198,30 @@ void MatrixBase<Real>::SetMatMatDivMat(const MatrixBase<Real>& A,
} }
} }
} }
*/
//template<typename Real>
//void MatrixBase<Real>::CopyLowerToUpper() {
//KALDI_ASSERT(num_rows_ == num_cols_);
//Real *data = data_;
//MatrixIndexT num_rows = num_rows_, stride = stride_;
//for (int32 i = 0; i < num_rows; i++)
//for (int32 j = 0; j < i; j++)
//data[j * stride + i ] = data[i * stride + j];
//}
template<typename Real>
void MatrixBase<Real>::CopyLowerToUpper() {
KALDI_ASSERT(num_rows_ == num_cols_);
Real *data = data_;
MatrixIndexT num_rows = num_rows_, stride = stride_;
for (int32 i = 0; i < num_rows; i++)
for (int32 j = 0; j < i; j++)
data[j * stride + i ] = data[i * stride + j];
}
//template<typename Real>
//void MatrixBase<Real>::CopyUpperToLower() {
//KALDI_ASSERT(num_rows_ == num_cols_);
//Real *data = data_;
//MatrixIndexT num_rows = num_rows_, stride = stride_;
//for (int32 i = 0; i < num_rows; i++)
//for (int32 j = 0; j < i; j++)
//data[i * stride + j] = data[j * stride + i];
//}
template<typename Real> /*
void MatrixBase<Real>::CopyUpperToLower() {
KALDI_ASSERT(num_rows_ == num_cols_);
Real *data = data_;
MatrixIndexT num_rows = num_rows_, stride = stride_;
for (int32 i = 0; i < num_rows; i++)
for (int32 j = 0; j < i; j++)
data[i * stride + j] = data[j * stride + i];
}
template<typename Real> template<typename Real>
void MatrixBase<Real>::SymAddMat2(const Real alpha, void MatrixBase<Real>::SymAddMat2(const Real alpha,
const MatrixBase<Real> &A, const MatrixBase<Real> &A,
...@@ -734,7 +727,7 @@ void MatrixBase<Real>::LapackGesvd(VectorBase<Real> *s, MatrixBase<Real> *U_in, ...@@ -734,7 +727,7 @@ void MatrixBase<Real>::LapackGesvd(VectorBase<Real> *s, MatrixBase<Real> *U_in,
} }
#endif #endif
*/
// Copy constructor. Copies data to newly allocated memory. // Copy constructor. Copies data to newly allocated memory.
template<typename Real> template<typename Real>
Matrix<Real>::Matrix (const MatrixBase<Real> & M, Matrix<Real>::Matrix (const MatrixBase<Real> & M,
...@@ -898,6 +891,7 @@ template ...@@ -898,6 +891,7 @@ template
void MatrixBase<double>::CopyFromMat(const MatrixBase<double> & M, void MatrixBase<double>::CopyFromMat(const MatrixBase<double> & M,
MatrixTransposeType Trans); MatrixTransposeType Trans);
/*
// Specialize the template for CopyFromSp for float, float. // Specialize the template for CopyFromSp for float, float.
template<> template<>
template<> template<>
...@@ -992,7 +986,7 @@ template ...@@ -992,7 +986,7 @@ template
void MatrixBase<double>::CopyFromTp(const TpMatrix<double> & M, void MatrixBase<double>::CopyFromTp(const TpMatrix<double> & M,
MatrixTransposeType trans); MatrixTransposeType trans);
*/
template<typename Real> template<typename Real>
void MatrixBase<Real>::CopyRowsFromVec(const VectorBase<Real> &rv) { void MatrixBase<Real>::CopyRowsFromVec(const VectorBase<Real> &rv) {
if (rv.Dim() == num_rows_*num_cols_) { if (rv.Dim() == num_rows_*num_cols_) {
...@@ -1076,7 +1070,6 @@ void MatrixBase<Real>::CopyColsFromVec(const VectorBase<Real> &rv) { ...@@ -1076,7 +1070,6 @@ void MatrixBase<Real>::CopyColsFromVec(const VectorBase<Real> &rv) {
} }
} }
template<typename Real> template<typename Real>
void MatrixBase<Real>::CopyRowFromVec(const VectorBase<Real> &rv, const MatrixIndexT row) { void MatrixBase<Real>::CopyRowFromVec(const VectorBase<Real> &rv, const MatrixIndexT row) {
KALDI_ASSERT(rv.Dim() == num_cols_ && KALDI_ASSERT(rv.Dim() == num_cols_ &&
...@@ -1088,7 +1081,7 @@ void MatrixBase<Real>::CopyRowFromVec(const VectorBase<Real> &rv, const MatrixIn ...@@ -1088,7 +1081,7 @@ void MatrixBase<Real>::CopyRowFromVec(const VectorBase<Real> &rv, const MatrixIn
std::memcpy(row_data, rv_data, num_cols_ * sizeof(Real)); std::memcpy(row_data, rv_data, num_cols_ * sizeof(Real));
} }
/*
template<typename Real> template<typename Real>
void MatrixBase<Real>::CopyDiagFromVec(const VectorBase<Real> &rv) { void MatrixBase<Real>::CopyDiagFromVec(const VectorBase<Real> &rv) {
KALDI_ASSERT(rv.Dim() == std::min(num_cols_, num_rows_)); KALDI_ASSERT(rv.Dim() == std::min(num_cols_, num_rows_));
...@@ -1096,7 +1089,7 @@ void MatrixBase<Real>::CopyDiagFromVec(const VectorBase<Real> &rv) { ...@@ -1096,7 +1089,7 @@ void MatrixBase<Real>::CopyDiagFromVec(const VectorBase<Real> &rv) {
Real *my_data = this->Data(); Real *my_data = this->Data();
for (; rv_data != rv_end; rv_data++, my_data += (this->stride_+1)) for (; rv_data != rv_end; rv_data++, my_data += (this->stride_+1))
*my_data = *rv_data; *my_data = *rv_data;
} }*/
template<typename Real> template<typename Real>
void MatrixBase<Real>::CopyColFromVec(const VectorBase<Real> &rv, void MatrixBase<Real>::CopyColFromVec(const VectorBase<Real> &rv,
...@@ -1135,7 +1128,7 @@ void Matrix<Real>::Destroy() { ...@@ -1135,7 +1128,7 @@ void Matrix<Real>::Destroy() {
} }
/*
template<typename Real> template<typename Real>
void MatrixBase<Real>::MulElements(const MatrixBase<Real> &a) { void MatrixBase<Real>::MulElements(const MatrixBase<Real> &a) {
KALDI_ASSERT(a.NumRows() == num_rows_ && a.NumCols() == num_cols_); KALDI_ASSERT(a.NumRows() == num_rows_ && a.NumCols() == num_cols_);
...@@ -1325,6 +1318,7 @@ void MatrixBase<Real>::MulColsVec(const VectorBase<Real> &scale) { ...@@ -1325,6 +1318,7 @@ void MatrixBase<Real>::MulColsVec(const VectorBase<Real> &scale) {
} }
} }
} }
*/
template<typename Real> template<typename Real>
void MatrixBase<Real>::SetZero() { void MatrixBase<Real>::SetZero() {
...@@ -1344,6 +1338,7 @@ void MatrixBase<Real>::Set(Real value) { ...@@ -1344,6 +1338,7 @@ void MatrixBase<Real>::Set(Real value) {
} }
} }
/*
template<typename Real> template<typename Real>
void MatrixBase<Real>::SetUnit() { void MatrixBase<Real>::SetUnit() {
SetZero(); SetZero();
...@@ -1374,6 +1369,7 @@ void MatrixBase<Real>::SetRandUniform() { ...@@ -1374,6 +1369,7 @@ void MatrixBase<Real>::SetRandUniform() {
} }
} }
} }
*/
template<typename Real> template<typename Real>
void MatrixBase<Real>::Write(std::ostream &os, bool binary) const { void MatrixBase<Real>::Write(std::ostream &os, bool binary) const {
...@@ -1420,23 +1416,11 @@ void MatrixBase<Real>::Write(std::ostream &os, bool binary) const { ...@@ -1420,23 +1416,11 @@ void MatrixBase<Real>::Write(std::ostream &os, bool binary) const {
template<typename Real> template<typename Real>
void MatrixBase<Real>::Read(std::istream & is, bool binary, bool add) { void MatrixBase<Real>::Read(std::istream & is, bool binary) {
if (add) {
Matrix<Real> tmp(num_rows_, num_cols_);
tmp.Read(is, binary, false); // read without adding.
if (tmp.num_rows_ != this->num_rows_ || tmp.num_cols_ != this->num_cols_)
KALDI_ERR << "MatrixBase::Read, size mismatch "
<< this->num_rows_ << ", " << this->num_cols_
<< " vs. " << tmp.num_rows_ << ", " << tmp.num_cols_;
this->AddMat(1.0, tmp);
return;
}
// now assume add == false.
// In order to avoid rewriting this, we just declare a Matrix and // In order to avoid rewriting this, we just declare a Matrix and
// use it to read the data, then copy. // use it to read the data, then copy.
Matrix<Real> tmp; Matrix<Real> tmp;
tmp.Read(is, binary, false); tmp.Read(is, binary);
if (tmp.NumRows() != NumRows() || tmp.NumCols() != NumCols()) { if (tmp.NumRows() != NumRows() || tmp.NumCols() != NumCols()) {
KALDI_ERR << "MatrixBase<Real>::Read, size mismatch " KALDI_ERR << "MatrixBase<Real>::Read, size mismatch "
<< NumRows() << " x " << NumCols() << " versus " << NumRows() << " x " << NumCols() << " versus "
...@@ -1447,23 +1431,7 @@ void MatrixBase<Real>::Read(std::istream & is, bool binary, bool add) { ...@@ -1447,23 +1431,7 @@ void MatrixBase<Real>::Read(std::istream & is, bool binary, bool add) {
template<typename Real> template<typename Real>
void Matrix<Real>::Read(std::istream & is, bool binary, bool add) { void Matrix<Real>::Read(std::istream & is, bool binary) {
if (add) {
Matrix<Real> tmp;
tmp.Read(is, binary, false); // read without adding.
if (this->num_rows_ == 0) this->Resize(tmp.num_rows_, tmp.num_cols_);
else {
if (this->num_rows_ != tmp.num_rows_ || this->num_cols_ != tmp.num_cols_) {
if (tmp.num_rows_ == 0) return; // do nothing in this case.
else KALDI_ERR << "Matrix::Read, size mismatch "
<< this->num_rows_ << ", " << this->num_cols_
<< " vs. " << tmp.num_rows_ << ", " << tmp.num_cols_;
}
}
this->AddMat(1.0, tmp);
return;
}
// now assume add == false. // now assume add == false.
MatrixIndexT pos_at_start = is.tellg(); MatrixIndexT pos_at_start = is.tellg();
std::ostringstream specific_error; std::ostringstream specific_error;
...@@ -1472,10 +1440,10 @@ void Matrix<Real>::Read(std::istream & is, bool binary, bool add) { ...@@ -1472,10 +1440,10 @@ void Matrix<Real>::Read(std::istream & is, bool binary, bool add) {
int peekval = Peek(is, binary); int peekval = Peek(is, binary);
if (peekval == 'C') { if (peekval == 'C') {
// This code enables us to read CompressedMatrix as a regular matrix. // This code enables us to read CompressedMatrix as a regular matrix.
CompressedMatrix compressed_mat; //CompressedMatrix compressed_mat;
compressed_mat.Read(is, binary); // at this point, add == false. //compressed_mat.Read(is, binary); // at this point, add == false.
this->Resize(compressed_mat.NumRows(), compressed_mat.NumCols()); //this->Resize(compressed_mat.NumRows(), compressed_mat.NumCols());
compressed_mat.CopyToMat(this); //compressed_mat.CopyToMat(this);
return; return;
} }
const char *my_token = (sizeof(Real) == 4 ? "FM" : "DM"); const char *my_token = (sizeof(Real) == 4 ? "FM" : "DM");
...@@ -1483,7 +1451,7 @@ void Matrix<Real>::Read(std::istream & is, bool binary, bool add) { ...@@ -1483,7 +1451,7 @@ void Matrix<Real>::Read(std::istream & is, bool binary, bool add) {
if (peekval == other_token_start) { // need to instantiate the other type to read it. if (peekval == other_token_start) { // need to instantiate the other type to read it.
typedef typename OtherReal<Real>::Real OtherType; // if Real == float, OtherType == double, and vice versa. typedef typename OtherReal<Real>::Real OtherType; // if Real == float, OtherType == double, and vice versa.
Matrix<OtherType> other(this->num_rows_, this->num_cols_); Matrix<OtherType> other(this->num_rows_, this->num_cols_);
other.Read(is, binary, false); // add is false at this point anyway. other.Read(is, binary); // add is false at this point anyway.
this->Resize(other.NumRows(), other.NumCols()); this->Resize(other.NumRows(), other.NumCols());
this->CopyFromMat(other); this->CopyFromMat(other);
return; return;
...@@ -1672,7 +1640,7 @@ SubMatrix<Real>::SubMatrix(Real *data, ...@@ -1672,7 +1640,7 @@ SubMatrix<Real>::SubMatrix(Real *data,
} }
} }
/*
template<typename Real> template<typename Real>
void MatrixBase<Real>::Add(const Real alpha) { void MatrixBase<Real>::Add(const Real alpha) {
Real *data = data_; Real *data = data_;
...@@ -1812,15 +1780,15 @@ void MatrixBase<Real>::DestructiveSvd(VectorBase<Real> *s, MatrixBase<Real> *U, ...@@ -1812,15 +1780,15 @@ void MatrixBase<Real>::DestructiveSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
for(int32 i = 0; i < NumRows(); i++) for(int32 i = 0; i < NumRows(); i++)
(*this)(i, i) *= 1.00001; (*this)(i, i) *= 1.00001;
}*/ }*/
bool ans = JamaSvd(s, U, Vt); // bool ans = JamaSvd(s, U, Vt);
if (Vt != NULL) Vt->Transpose(); // possibly to do: change this and also the transpose inside the JamaSvd routine. note, Vt is square. //if (Vt != NULL) Vt->Transpose(); // possibly to do: change this and also the transpose inside the JamaSvd routine. note, Vt is square.
if (!ans) { //if (!ans) {
KALDI_ERR << "Error doing Svd"; // This one will be caught. //KALDI_ERR << "Error doing Svd"; // This one will be caught.
} //}
#endif //#endif
if (prescale != 1.0) s->Scale(1.0/prescale); //if (prescale != 1.0) s->Scale(1.0/prescale);
} //}
/*
template<typename Real> template<typename Real>
void MatrixBase<Real>::Svd(VectorBase<Real> *s, MatrixBase<Real> *U, MatrixBase<Real> *Vt) const { void MatrixBase<Real>::Svd(VectorBase<Real> *s, MatrixBase<Real> *U, MatrixBase<Real> *Vt) const {
try { try {
...@@ -2052,17 +2020,18 @@ void MatrixBase<Real>::InvertDouble(Real *log_det, Real *det_sign, ...@@ -2052,17 +2020,18 @@ void MatrixBase<Real>::InvertDouble(Real *log_det, Real *det_sign,
if (log_det) *log_det = log_det_tmp; if (log_det) *log_det = log_det_tmp;
if (det_sign) *det_sign = det_sign_tmp; if (det_sign) *det_sign = det_sign_tmp;
} }
*/
template<class Real> //template<class Real>
void MatrixBase<Real>::CopyFromMat(const CompressedMatrix &mat) { //void MatrixBase<Real>::CopyFromMat(const CompressedMatrix &mat) {
mat.CopyToMat(this); //mat.CopyToMat(this);
} //}
template<class Real> //template<class Real>
Matrix<Real>::Matrix(const CompressedMatrix &M): MatrixBase<Real>() { //Matrix<Real>::Matrix(const CompressedMatrix &M): MatrixBase<Real>() {
Resize(M.NumRows(), M.NumCols(), kUndefined); //Resize(M.NumRows(), M.NumCols(), kUndefined);
M.CopyToMat(this); //M.CopyToMat(this);
} //}
...@@ -2074,7 +2043,7 @@ void MatrixBase<Real>::InvertElements() { ...@@ -2074,7 +2043,7 @@ void MatrixBase<Real>::InvertElements() {
} }
} }
} }
/*
template<typename Real> template<typename Real>
void MatrixBase<Real>::Transpose() { void MatrixBase<Real>::Transpose() {
KALDI_ASSERT(num_rows_ == num_cols_); KALDI_ASSERT(num_rows_ == num_cols_);
...@@ -2250,7 +2219,7 @@ bool MatrixBase<Real>::Power(Real power) { ...@@ -2250,7 +2219,7 @@ bool MatrixBase<Real>::Power(Real power) {
(*this).AddMatMat(1.0, tmp, kNoTrans, P, kNoTrans, 0.0); (*this).AddMatMat(1.0, tmp, kNoTrans, P, kNoTrans, 0.0);
return true; return true;
} }
*/
template<typename Real> template<typename Real>
void Matrix<Real>::Swap(Matrix<Real> *other) { void Matrix<Real>::Swap(Matrix<Real> *other) {
std::swap(this->data_, other->data_); std::swap(this->data_, other->data_);
...@@ -2258,7 +2227,7 @@ void Matrix<Real>::Swap(Matrix<Real> *other) { ...@@ -2258,7 +2227,7 @@ void Matrix<Real>::Swap(Matrix<Real> *other) {
std::swap(this->num_rows_, other->num_rows_); std::swap(this->num_rows_, other->num_rows_);
std::swap(this->stride_, other->stride_); std::swap(this->stride_, other->stride_);
} }
/*
// Repeating this comment that appeared in the header: // Repeating this comment that appeared in the header:
// Eigenvalue Decomposition of a square NxN matrix into the form (*this) = P D // Eigenvalue Decomposition of a square NxN matrix into the form (*this) = P D
// P^{-1}. Be careful: the relationship of D to the eigenvalues we output is // P^{-1}. Be careful: the relationship of D to the eigenvalues we output is
...@@ -2298,7 +2267,7 @@ void MatrixBase<Real>::Eig(MatrixBase<Real> *P, ...@@ -2298,7 +2267,7 @@ void MatrixBase<Real>::Eig(MatrixBase<Real> *P,
// INT_32 mVersion; // INT_32 mVersion;
// INT_32 mSampSize; // INT_32 mSampSize;
// }; // };
/*
template<typename Real> template<typename Real>
bool ReadHtk(std::istream &is, Matrix<Real> *M_ptr, HtkHeader *header_ptr) bool ReadHtk(std::istream &is, Matrix<Real> *M_ptr, HtkHeader *header_ptr)
{ {
...@@ -2821,7 +2790,7 @@ void MatrixBase<Real>::GroupMax(const MatrixBase<Real> &src) { ...@@ -2821,7 +2790,7 @@ void MatrixBase<Real>::GroupMax(const MatrixBase<Real> &src) {
} }
} }
} }
*/
template<typename Real> template<typename Real>
void MatrixBase<Real>::CopyCols(const MatrixBase<Real> &src, void MatrixBase<Real>::CopyCols(const MatrixBase<Real> &src,
const MatrixIndexT *indices) { const MatrixIndexT *indices) {
...@@ -2847,7 +2816,7 @@ void MatrixBase<Real>::CopyCols(const MatrixBase<Real> &src, ...@@ -2847,7 +2816,7 @@ void MatrixBase<Real>::CopyCols(const MatrixBase<Real> &src,
} }
} }
/*
template<typename Real> template<typename Real>
void MatrixBase<Real>::AddCols(const MatrixBase<Real> &src, void MatrixBase<Real>::AddCols(const MatrixBase<Real> &src,
const MatrixIndexT *indices) { const MatrixIndexT *indices) {
...@@ -2871,8 +2840,9 @@ void MatrixBase<Real>::AddCols(const MatrixBase<Real> &src, ...@@ -2871,8 +2840,9 @@ void MatrixBase<Real>::AddCols(const MatrixBase<Real> &src,
this_data[c] += src_data[*index_ptr]; this_data[c] += src_data[*index_ptr];
} }
} }
} }*/
/*
template<typename Real> template<typename Real>
void MatrixBase<Real>::CopyRows(const MatrixBase<Real> &src, void MatrixBase<Real>::CopyRows(const MatrixBase<Real> &src,
const MatrixIndexT *indices) { const MatrixIndexT *indices) {
...@@ -3022,9 +2992,9 @@ void MatrixBase<Real>::DiffTanh(const MatrixBase<Real> &value, ...@@ -3022,9 +2992,9 @@ void MatrixBase<Real>::DiffTanh(const MatrixBase<Real> &value,
value_data += value_stride; value_data += value_stride;
diff_data += diff_stride; diff_data += diff_stride;
} }
} }*/
/*
template<typename Real> template<typename Real>
template<typename OtherReal> template<typename OtherReal>
void MatrixBase<Real>::AddVecToRows(const Real alpha, const VectorBase<OtherReal> &v) { void MatrixBase<Real>::AddVecToRows(const Real alpha, const VectorBase<OtherReal> &v) {
...@@ -3087,7 +3057,7 @@ template void MatrixBase<double>::AddVecToCols(const double alpha, ...@@ -3087,7 +3057,7 @@ template void MatrixBase<double>::AddVecToCols(const double alpha,
const VectorBase<float> &v); const VectorBase<float> &v);
template void MatrixBase<double>::AddVecToCols(const double alpha, template void MatrixBase<double>::AddVecToCols(const double alpha,
const VectorBase<double> &v); const VectorBase<double> &v);
*/
//Explicit instantiation of the classes //Explicit instantiation of the classes
//Apparently, it seems to be necessary that the instantiation //Apparently, it seems to be necessary that the instantiation
//happens at the end of the file. Otherwise, not all the member //happens at the end of the file. Otherwise, not all the member
......
...@@ -32,13 +32,6 @@ namespace kaldi { ...@@ -32,13 +32,6 @@ namespace kaldi {
/// @{ \addtogroup matrix_funcs_scalar /// @{ \addtogroup matrix_funcs_scalar
/// We need to declare this here as it will be a friend function.
/// tr(A B), or tr(A B^T).
template<typename Real>
Real TraceMatMat(const MatrixBase<Real> &A, const MatrixBase<Real> &B,
MatrixTransposeType trans = kNoTrans);
/// @}
/// \addtogroup matrix_group /// \addtogroup matrix_group
/// @{ /// @{
...@@ -50,15 +43,8 @@ class MatrixBase { ...@@ -50,15 +43,8 @@ class MatrixBase {
public: public:
// so this child can access protected members of other instances. // so this child can access protected members of other instances.
friend class Matrix<Real>; friend class Matrix<Real>;
friend class SubMatrix<Real>;
// friend declarations for CUDA matrices (see ../cudamatrix/) // friend declarations for CUDA matrices (see ../cudamatrix/)
friend class CuMatrixBase<Real>;
friend class CuMatrix<Real>;
friend class CuSubMatrix<Real>;
friend class CuPackedMatrix<Real>;
friend class PackedMatrix<Real>;
friend class SparseMatrix<Real>;
friend class SparseMatrix<float>;
friend class SparseMatrix<double>;
/// Returns number of rows (or zero for empty matrix). /// Returns number of rows (or zero for empty matrix).
inline MatrixIndexT NumRows() const { return num_rows_; } inline MatrixIndexT NumRows() const { return num_rows_; }
...@@ -127,14 +113,6 @@ class MatrixBase { ...@@ -127,14 +113,6 @@ class MatrixBase {
/// Sets all elements to a specific value. /// Sets all elements to a specific value.
void Set(Real); void Set(Real);
/// Sets to zero, except ones along diagonal [for non-square matrices too] /// Sets to zero, except ones along diagonal [for non-square matrices too]
void SetUnit();
/// Sets to random values of a normal distribution
void SetRandn();
/// Sets to numbers uniformly distributed on (0, 1)
void SetRandUniform();
/* Copying functions. These do not resize the matrix! */
/// Copy given matrix. (no resize is done). /// Copy given matrix. (no resize is done).
template<typename OtherReal> template<typename OtherReal>
...@@ -142,21 +120,17 @@ class MatrixBase { ...@@ -142,21 +120,17 @@ class MatrixBase {
MatrixTransposeType trans = kNoTrans); MatrixTransposeType trans = kNoTrans);
/// Copy from compressed matrix. /// Copy from compressed matrix.
void CopyFromMat(const CompressedMatrix &M); //void CopyFromMat(const CompressedMatrix &M);
/// Copy given spmatrix. (no resize is done).
template<typename OtherReal>
void CopyFromSp(const SpMatrix<OtherReal> &M);
/// Copy given tpmatrix. (no resize is done). /// Copy given tpmatrix. (no resize is done).
template<typename OtherReal> //template<typename OtherReal>
void CopyFromTp(const TpMatrix<OtherReal> &M, //void CopyFromTp(const TpMatrix<OtherReal> &M,
MatrixTransposeType trans = kNoTrans); //MatrixTransposeType trans = kNoTrans);
/// Copy from CUDA matrix. Implemented in ../cudamatrix/cu-matrix.h /// Copy from CUDA matrix. Implemented in ../cudamatrix/cu-matrix.h
template<typename OtherReal> //template<typename OtherReal>
void CopyFromMat(const CuMatrixBase<OtherReal> &M, //void CopyFromMat(const CuMatrixBase<OtherReal> &M,
MatrixTransposeType trans = kNoTrans); //MatrixTransposeType trans = kNoTrans);
/// This function has two modes of operation. If v.Dim() == NumRows() * /// This function has two modes of operation. If v.Dim() == NumRows() *
/// NumCols(), then treats the vector as a row-by-row concatenation of a /// NumCols(), then treats the vector as a row-by-row concatenation of a
...@@ -165,7 +139,7 @@ class MatrixBase { ...@@ -165,7 +139,7 @@ class MatrixBase {
void CopyRowsFromVec(const VectorBase<Real> &v); void CopyRowsFromVec(const VectorBase<Real> &v);
/// This version of CopyRowsFromVec is implemented in ../cudamatrix/cu-vector.cc /// This version of CopyRowsFromVec is implemented in ../cudamatrix/cu-vector.cc
void CopyRowsFromVec(const CuVectorBase<Real> &v); //void CopyRowsFromVec(const CuVectorBase<Real> &v);
template<typename OtherReal> template<typename OtherReal>
void CopyRowsFromVec(const VectorBase<OtherReal> &v); void CopyRowsFromVec(const VectorBase<OtherReal> &v);
...@@ -215,7 +189,7 @@ class MatrixBase { ...@@ -215,7 +189,7 @@ class MatrixBase {
return SubMatrix<Real>(*this, 0, num_rows_, col_offset, num_cols); return SubMatrix<Real>(*this, 0, num_rows_, col_offset, num_cols);
} }
/* Various special functions. */ /*
/// Returns sum of all elements in matrix. /// Returns sum of all elements in matrix.
Real Sum() const; Real Sum() const;
/// Returns trace of matrix. /// Returns trace of matrix.
...@@ -268,15 +242,16 @@ class MatrixBase { ...@@ -268,15 +242,16 @@ class MatrixBase {
/// Does inversion in double precision even if matrix was not double. /// Does inversion in double precision even if matrix was not double.
void InvertDouble(Real *LogDet = NULL, Real *det_sign = NULL, void InvertDouble(Real *LogDet = NULL, Real *det_sign = NULL,
bool inverse_needed = true); bool inverse_needed = true);
*/
/// Inverts all the elements of the matrix /// Inverts all the elements of the matrix
void InvertElements(); void InvertElements();
/*
/// Transpose the matrix. This one is only /// Transpose the matrix. This one is only
/// applicable to square matrices (the one in the /// applicable to square matrices (the one in the
/// Matrix child class works also for non-square. /// Matrix child class works also for non-square.
void Transpose(); void Transpose();
*/
/// Copies column r from column indices[r] of src. /// Copies column r from column indices[r] of src.
/// As a special case, if indexes[i] == -1, sets column i to zero. /// As a special case, if indexes[i] == -1, sets column i to zero.
/// all elements of "indices" must be in [-1, src.NumCols()-1], /// all elements of "indices" must be in [-1, src.NumCols()-1],
...@@ -296,8 +271,8 @@ class MatrixBase { ...@@ -296,8 +271,8 @@ class MatrixBase {
/// indices.size() must equal this->NumCols(), /// indices.size() must equal this->NumCols(),
/// all elements of "reorder" must be in [-1, src.NumCols()-1], /// all elements of "reorder" must be in [-1, src.NumCols()-1],
/// and src.NumRows() must equal this.NumRows() /// and src.NumRows() must equal this.NumRows()
void AddCols(const MatrixBase<Real> &src, //void AddCols(const MatrixBase<Real> &src,
const MatrixIndexT *indices); // const MatrixIndexT *indices);
/// Copies row r of this matrix from an array of floats at the location given /// Copies row r of this matrix from an array of floats at the location given
/// by src[r]. If any src[r] is NULL then this.Row(r) will be set to zero. /// by src[r]. If any src[r] is NULL then this.Row(r) will be set to zero.
...@@ -314,30 +289,30 @@ class MatrixBase { ...@@ -314,30 +289,30 @@ class MatrixBase {
/// Does for each row r, this.Row(r) += alpha * src.row(indexes[r]). /// Does for each row r, this.Row(r) += alpha * src.row(indexes[r]).
/// If indexes[r] < 0, does not add anything. all elements of "indexes" must /// If indexes[r] < 0, does not add anything. all elements of "indexes" must
/// be in [-1, src.NumRows()-1], and src.NumCols() must equal this.NumCols(). /// be in [-1, src.NumRows()-1], and src.NumCols() must equal this.NumCols().
void AddRows(Real alpha, // void AddRows(Real alpha,
const MatrixBase<Real> &src, // const MatrixBase<Real> &src,
const MatrixIndexT *indexes); // const MatrixIndexT *indexes);
/// Does for each row r, this.Row(r) += alpha * src[r], treating src[r] as the /// Does for each row r, this.Row(r) += alpha * src[r], treating src[r] as the
/// beginning of a region of memory representing a vector of floats, of the /// beginning of a region of memory representing a vector of floats, of the
/// same length as this.NumCols(). If src[r] is NULL, does not add anything. /// same length as this.NumCols(). If src[r] is NULL, does not add anything.
void AddRows(Real alpha, const Real *const *src); //void AddRows(Real alpha, const Real *const *src);
/// For each row r of this matrix, adds it (times alpha) to the array of /// For each row r of this matrix, adds it (times alpha) to the array of
/// floats at the location given by dst[r]. If dst[r] is NULL, does not do /// floats at the location given by dst[r]. If dst[r] is NULL, does not do
/// anything for that row. Requires that none of the memory regions pointed /// anything for that row. Requires that none of the memory regions pointed
/// to by the pointers in "dst" overlap (e.g. none of the pointers should be /// to by the pointers in "dst" overlap (e.g. none of the pointers should be
/// the same). /// the same).
void AddToRows(Real alpha, Real *const *dst) const; //void AddToRows(Real alpha, Real *const *dst) const;
/// For each row i of *this, adds this->Row(i) to /// For each row i of *this, adds this->Row(i) to
/// dst->Row(indexes(i)) if indexes(i) >= 0, else do nothing. /// dst->Row(indexes(i)) if indexes(i) >= 0, else do nothing.
/// Requires that all the indexes[i] that are >= 0 /// Requires that all the indexes[i] that are >= 0
/// be distinct, otherwise the behavior is undefined. /// be distinct, otherwise the behavior is undefined.
void AddToRows(Real alpha, //void AddToRows(Real alpha,
const MatrixIndexT *indexes, // const MatrixIndexT *indexes,
MatrixBase<Real> *dst) const; // MatrixBase<Real> *dst) const;
/*
inline void ApplyPow(Real power) { inline void ApplyPow(Real power) {
this -> Pow(*this, power); this -> Pow(*this, power);
} }
...@@ -374,7 +349,7 @@ class MatrixBase { ...@@ -374,7 +349,7 @@ class MatrixBase {
inline void ApplyLog() { inline void ApplyLog() {
this -> Log(*this); this -> Log(*this);
} }
*/
/// Eigenvalue Decomposition of a square NxN matrix into the form (*this) = P D /// Eigenvalue Decomposition of a square NxN matrix into the form (*this) = P D
/// P^{-1}. Be careful: the relationship of D to the eigenvalues we output is /// P^{-1}. Be careful: the relationship of D to the eigenvalues we output is
/// slightly complicated, due to the need for P to be real. In the symmetric /// slightly complicated, due to the need for P to be real. In the symmetric
...@@ -389,9 +364,9 @@ class MatrixBase { ...@@ -389,9 +364,9 @@ class MatrixBase {
/// instead (*this) P = P D. /// instead (*this) P = P D.
/// ///
/// The non-member function CreateEigenvalueMatrix creates D from eigs_real and eigs_imag. /// The non-member function CreateEigenvalueMatrix creates D from eigs_real and eigs_imag.
void Eig(MatrixBase<Real> *P, //void Eig(MatrixBase<Real> *P,
VectorBase<Real> *eigs_real, // VectorBase<Real> *eigs_real,
VectorBase<Real> *eigs_imag) const; // VectorBase<Real> *eigs_imag) const;
/// The Power method attempts to take the matrix to a power using a method that /// The Power method attempts to take the matrix to a power using a method that
/// works in general for fractional and negative powers. The input matrix must /// works in general for fractional and negative powers. The input matrix must
...@@ -400,7 +375,7 @@ class MatrixBase { ...@@ -400,7 +375,7 @@ class MatrixBase {
/// return false and leave the matrix unchanged, if at entry the matrix had /// return false and leave the matrix unchanged, if at entry the matrix had
/// real negative eigenvalues (or if it had zero eigenvalues and the power was /// real negative eigenvalues (or if it had zero eigenvalues and the power was
/// negative). /// negative).
bool Power(Real pow); // bool Power(Real pow);
/** Singular value decomposition /** Singular value decomposition
Major limitations: Major limitations:
...@@ -413,31 +388,32 @@ class MatrixBase { ...@@ -413,31 +388,32 @@ class MatrixBase {
expect that S.Dim() == m, U is either NULL or m by n, expect that S.Dim() == m, U is either NULL or m by n,
and v is either NULL or n by n. and v is either NULL or n by n.
The singular values are not sorted (use SortSvd for that). */ The singular values are not sorted (use SortSvd for that). */
void DestructiveSvd(VectorBase<Real> *s, MatrixBase<Real> *U, //void DestructiveSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
MatrixBase<Real> *Vt); // Destroys calling matrix. // MatrixBase<Real> *Vt); // Destroys calling matrix.
/// Compute SVD (*this) = U diag(s) Vt. Note that the V in the call is already /// Compute SVD (*this) = U diag(s) Vt. Note that the V in the call is already
/// transposed; the normal formulation is U diag(s) V^T. /// transposed; the normal formulation is U diag(s) V^T.
/// Null pointers for U or V mean we don't want that output (this saves /// Null pointers for U or V mean we don't want that output (this saves
/// compute). The singular values are not sorted (use SortSvd for that). /// compute). The singular values are not sorted (use SortSvd for that).
void Svd(VectorBase<Real> *s, MatrixBase<Real> *U, //void Svd(VectorBase<Real> *s, MatrixBase<Real> *U,
MatrixBase<Real> *Vt) const; // MatrixBase<Real> *Vt) const;
/// Compute SVD but only retain the singular values. /// Compute SVD but only retain the singular values.
void Svd(VectorBase<Real> *s) const { Svd(s, NULL, NULL); } //void Svd(VectorBase<Real> *s) const { Svd(s, NULL, NULL); }
/// Returns smallest singular value. /// Returns smallest singular value.
Real MinSingularValue() const { //Real MinSingularValue() const {
Vector<Real> tmp(std::min(NumRows(), NumCols())); // Vector<Real> tmp(std::min(NumRows(), NumCols()));
Svd(&tmp); //Svd(&tmp);
return tmp.Min(); //return tmp.Min();
} //}
void TestUninitialized() const; // This function is designed so that if any element //void TestUninitialized() const; // This function is designed so that if any element
// if the matrix is uninitialized memory, valgrind will complain. // if the matrix is uninitialized memory, valgrind will complain.
/// Returns condition number by computing Svd. Works even if cols > rows. /// Returns condition number by computing Svd. Works even if cols > rows.
/// Returns infinity if all singular values are zero. /// Returns infinity if all singular values are zero.
/*
Real Cond() const; Real Cond() const;
/// Returns true if matrix is Symmetric. /// Returns true if matrix is Symmetric.
...@@ -559,7 +535,7 @@ class MatrixBase { ...@@ -559,7 +535,7 @@ class MatrixBase {
// element-by-element, set *this = diff * (1.0 - value^2). // element-by-element, set *this = diff * (1.0 - value^2).
void DiffTanh(const MatrixBase<Real> &value, void DiffTanh(const MatrixBase<Real> &value,
const MatrixBase<Real> &diff); const MatrixBase<Real> &diff);
*/
/** Uses Svd to compute the eigenvalue decomposition of a symmetric positive /** Uses Svd to compute the eigenvalue decomposition of a symmetric positive
* semi-definite matrix: (*this) = rP * diag(rS) * rP^T, with rP an * semi-definite matrix: (*this) = rP * diag(rS) * rP^T, with rP an
* orthogonal matrix so rP^{-1} = rP^T. Throws exception if input was not * orthogonal matrix so rP^{-1} = rP^T. Throws exception if input was not
...@@ -571,208 +547,15 @@ class MatrixBase { ...@@ -571,208 +547,15 @@ class MatrixBase {
* SpMatrix and use Eig() function there, which uses eigenvalue decomposition * SpMatrix and use Eig() function there, which uses eigenvalue decomposition
* directly rather than SVD. * directly rather than SVD.
*/ */
void SymPosSemiDefEig(VectorBase<Real> *s, MatrixBase<Real> *P,
Real check_thresh = 0.001);
friend Real kaldi::TraceMatMat<Real>(const MatrixBase<Real> &A,
const MatrixBase<Real> &B, MatrixTransposeType trans); // tr (A B)
// so it can get around const restrictions on the pointer to data_.
friend class SubMatrix<Real>;
/// Add a scalar to each element
void Add(const Real alpha);
/// Add a scalar to each diagonal element.
void AddToDiag(const Real alpha);
/// *this += alpha * a * b^T
template<typename OtherReal>
void AddVecVec(const Real alpha, const VectorBase<OtherReal> &a,
const VectorBase<OtherReal> &b);
/// [each row of *this] += alpha * v
template<typename OtherReal>
void AddVecToRows(const Real alpha, const VectorBase<OtherReal> &v);
/// [each col of *this] += alpha * v
template<typename OtherReal>
void AddVecToCols(const Real alpha, const VectorBase<OtherReal> &v);
/// *this += alpha * M [or M^T]
void AddMat(const Real alpha, const MatrixBase<Real> &M,
MatrixTransposeType transA = kNoTrans);
/// *this += alpha * A [or A^T].
void AddSmat(Real alpha, const SparseMatrix<Real> &A,
MatrixTransposeType trans = kNoTrans);
/// (*this) = alpha * op(A) * B + beta * (*this), where A is sparse.
/// Multiplication of sparse with dense matrix. See also AddMatSmat.
void AddSmatMat(Real alpha, const SparseMatrix<Real> &A,
MatrixTransposeType transA, const MatrixBase<Real> &B,
Real beta);
/// (*this) = alpha * A * op(B) + beta * (*this), where B is sparse
/// and op(B) is either B or trans(B) depending on the 'transB' argument.
/// This is multiplication of a dense by a sparse matrix. See also
/// AddSmatMat.
void AddMatSmat(Real alpha, const MatrixBase<Real> &A,
const SparseMatrix<Real> &B, MatrixTransposeType transB,
Real beta);
/// *this = beta * *this + alpha * M M^T, for symmetric matrices. It only
/// updates the lower triangle of *this. It will leave the matrix asymmetric;
/// if you need it symmetric as a regular matrix, do CopyLowerToUpper().
void SymAddMat2(const Real alpha, const MatrixBase<Real> &M,
MatrixTransposeType transA, Real beta);
/// *this = beta * *this + alpha * diag(v) * M [or M^T].
/// The same as adding M but scaling each row M_i by v(i).
void AddDiagVecMat(const Real alpha, const VectorBase<Real> &v,
const MatrixBase<Real> &M, MatrixTransposeType transM,
Real beta = 1.0);
/// *this = beta * *this + alpha * M [or M^T] * diag(v)
/// The same as adding M but scaling each column M_j by v(j).
void AddMatDiagVec(const Real alpha,
const MatrixBase<Real> &M, MatrixTransposeType transM,
VectorBase<Real> &v,
Real beta = 1.0);
/// *this = beta * *this + alpha * A .* B (.* element by element multiplication)
void AddMatMatElements(const Real alpha,
const MatrixBase<Real>& A,
const MatrixBase<Real>& B,
const Real beta);
/// *this += alpha * S
template<typename OtherReal>
void AddSp(const Real alpha, const SpMatrix<OtherReal> &S);
void AddMatMat(const Real alpha,
const MatrixBase<Real>& A, MatrixTransposeType transA,
const MatrixBase<Real>& B, MatrixTransposeType transB,
const Real beta);
/// *this = a * b / c (by element; when c = 0, *this = a)
void SetMatMatDivMat(const MatrixBase<Real>& A,
const MatrixBase<Real>& B,
const MatrixBase<Real>& C);
/// A version of AddMatMat specialized for when the second argument
/// contains a lot of zeroes.
void AddMatSmat(const Real alpha,
const MatrixBase<Real>& A, MatrixTransposeType transA,
const MatrixBase<Real>& B, MatrixTransposeType transB,
const Real beta);
/// A version of AddMatMat specialized for when the first argument
/// contains a lot of zeroes.
void AddSmatMat(const Real alpha,
const MatrixBase<Real>& A, MatrixTransposeType transA,
const MatrixBase<Real>& B, MatrixTransposeType transB,
const Real beta);
/// this <-- beta*this + alpha*A*B*C.
void AddMatMatMat(const Real alpha,
const MatrixBase<Real>& A, MatrixTransposeType transA,
const MatrixBase<Real>& B, MatrixTransposeType transB,
const MatrixBase<Real>& C, MatrixTransposeType transC,
const Real beta);
/// this <-- beta*this + alpha*SpA*B.
// This and the routines below are really
// stubs that need to be made more efficient.
void AddSpMat(const Real alpha,
const SpMatrix<Real>& A,
const MatrixBase<Real>& B, MatrixTransposeType transB,
const Real beta) {
Matrix<Real> M(A);
return AddMatMat(alpha, M, kNoTrans, B, transB, beta);
}
/// this <-- beta*this + alpha*A*B.
void AddTpMat(const Real alpha,
const TpMatrix<Real>& A, MatrixTransposeType transA,
const MatrixBase<Real>& B, MatrixTransposeType transB,
const Real beta) {
Matrix<Real> M(A);
return AddMatMat(alpha, M, transA, B, transB, beta);
}
/// this <-- beta*this + alpha*A*B.
void AddMatSp(const Real alpha,
const MatrixBase<Real>& A, MatrixTransposeType transA,
const SpMatrix<Real>& B,
const Real beta) {
Matrix<Real> M(B);
return AddMatMat(alpha, A, transA, M, kNoTrans, beta);
}
/// this <-- beta*this + alpha*A*B*C.
void AddSpMatSp(const Real alpha,
const SpMatrix<Real> &A,
const MatrixBase<Real>& B, MatrixTransposeType transB,
const SpMatrix<Real>& C,
const Real beta) {
Matrix<Real> M(A), N(C);
return AddMatMatMat(alpha, M, kNoTrans, B, transB, N, kNoTrans, beta);
}
/// this <-- beta*this + alpha*A*B.
void AddMatTp(const Real alpha,
const MatrixBase<Real>& A, MatrixTransposeType transA,
const TpMatrix<Real>& B, MatrixTransposeType transB,
const Real beta) {
Matrix<Real> M(B);
return AddMatMat(alpha, A, transA, M, transB, beta);
}
/// this <-- beta*this + alpha*A*B.
void AddTpTp(const Real alpha,
const TpMatrix<Real>& A, MatrixTransposeType transA,
const TpMatrix<Real>& B, MatrixTransposeType transB,
const Real beta) {
Matrix<Real> M(A), N(B);
return AddMatMat(alpha, M, transA, N, transB, beta);
}
/// this <-- beta*this + alpha*A*B.
// This one is more efficient, not like the others above.
void AddSpSp(const Real alpha,
const SpMatrix<Real>& A, const SpMatrix<Real>& B,
const Real beta);
/// Copy lower triangle to upper triangle (symmetrize)
void CopyLowerToUpper();
/// Copy upper triangle to lower triangle (symmetrize)
void CopyUpperToLower();
/// This function orthogonalizes the rows of a matrix using the Gram-Schmidt
/// process. It is only applicable if NumRows() <= NumCols(). It will use
/// random number generation to fill in rows with something nonzero, in cases
/// where the original matrix was of deficient row rank.
void OrthogonalizeRows();
/// stream read. /// stream read.
/// Use instead of stream<<*this, if you want to add to existing contents. /// Use instead of stream<<*this, if you want to add to existing contents.
// Will throw exception on failure. // Will throw exception on failure.
void Read(std::istream & in, bool binary, bool add = false); void Read(std::istream & in, bool binary);
/// write to stream. /// write to stream.
void Write(std::ostream & out, bool binary) const; void Write(std::ostream & out, bool binary) const;
// Below is internal methods for Svd, user does not have to know about this. // Below is internal methods for Svd, user does not have to know about this.
#if !defined(HAVE_ATLAS) && !defined(USE_KALDI_SVD)
// protected:
// Should be protected but used directly in testing routine.
// destroys *this!
void LapackGesvd(VectorBase<Real> *s, MatrixBase<Real> *U,
MatrixBase<Real> *Vt);
#else
protected:
// destroys *this!
bool JamaSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
MatrixBase<Real> *V);
#endif
protected: protected:
/// Initializer, callable only from child. /// Initializer, callable only from child.
...@@ -827,19 +610,9 @@ class Matrix : public MatrixBase<Real> { ...@@ -827,19 +610,9 @@ class Matrix : public MatrixBase<Real> {
MatrixStrideType stride_type = kDefaultStride): MatrixStrideType stride_type = kDefaultStride):
MatrixBase<Real>() { Resize(r, c, resize_type, stride_type); } MatrixBase<Real>() { Resize(r, c, resize_type, stride_type); }
/// Copy constructor from CUDA matrix
/// This is defined in ../cudamatrix/cu-matrix.h
template<typename OtherReal>
explicit Matrix(const CuMatrixBase<OtherReal> &cu,
MatrixTransposeType trans = kNoTrans);
/// Swaps the contents of *this and *other. Shallow swap. /// Swaps the contents of *this and *other. Shallow swap.
void Swap(Matrix<Real> *other); void Swap(Matrix<Real> *other);
/// Defined in ../cudamatrix/cu-matrix.cc
void Swap(CuMatrix<Real> *mat);
/// Constructor from any MatrixBase. Can also copy with transpose. /// Constructor from any MatrixBase. Can also copy with transpose.
/// Allocates new memory. /// Allocates new memory.
explicit Matrix(const MatrixBase<Real> & M, explicit Matrix(const MatrixBase<Real> & M,
...@@ -853,40 +626,29 @@ class Matrix : public MatrixBase<Real> { ...@@ -853,40 +626,29 @@ class Matrix : public MatrixBase<Real> {
explicit Matrix(const MatrixBase<OtherReal> & M, explicit Matrix(const MatrixBase<OtherReal> & M,
MatrixTransposeType trans = kNoTrans); MatrixTransposeType trans = kNoTrans);
/// Copy constructor taking SpMatrix...
/// It is symmetric, so no option for transpose, and NumRows == Cols
template<typename OtherReal>
explicit Matrix(const SpMatrix<OtherReal> & M) : MatrixBase<Real>() {
Resize(M.NumRows(), M.NumRows(), kUndefined);
this->CopyFromSp(M);
}
/// Constructor from CompressedMatrix
explicit Matrix(const CompressedMatrix &C);
/// Copy constructor taking TpMatrix... /// Copy constructor taking TpMatrix...
template <typename OtherReal> //template <typename OtherReal>
explicit Matrix(const TpMatrix<OtherReal> & M, //explicit Matrix(const TpMatrix<OtherReal> & M,
MatrixTransposeType trans = kNoTrans) : MatrixBase<Real>() { //MatrixTransposeType trans = kNoTrans) : MatrixBase<Real>() {
if (trans == kNoTrans) { //if (trans == kNoTrans) {
Resize(M.NumRows(), M.NumCols(), kUndefined); //Resize(M.NumRows(), M.NumCols(), kUndefined);
this->CopyFromTp(M); //this->CopyFromTp(M);
} else { //} else {
Resize(M.NumCols(), M.NumRows(), kUndefined); //Resize(M.NumCols(), M.NumRows(), kUndefined);
this->CopyFromTp(M, kTrans); //this->CopyFromTp(M, kTrans);
} //}
} //}
/// read from stream. /// read from stream.
// Unlike one in base, allows resizing. // Unlike one in base, allows resizing.
void Read(std::istream & in, bool binary, bool add = false); void Read(std::istream & in, bool binary);
/// Remove a specified row. /// Remove a specified row.
void RemoveRow(MatrixIndexT i); void RemoveRow(MatrixIndexT i);
/// Transpose the matrix. Works for non-square /// Transpose the matrix. Works for non-square
/// matrices as well as square ones. /// matrices as well as square ones.
void Transpose(); //void Transpose();
/// Distructor to free matrices. /// Distructor to free matrices.
~Matrix() { Destroy(); } ~Matrix() { Destroy(); }
...@@ -947,37 +709,6 @@ class Matrix : public MatrixBase<Real> { ...@@ -947,37 +709,6 @@ class Matrix : public MatrixBase<Real> {
/// A structure containing the HTK header. /// A structure containing the HTK header.
/// [TODO: change the style of the variables to Kaldi-compliant] /// [TODO: change the style of the variables to Kaldi-compliant]
struct HtkHeader {
/// Number of samples.
int32 mNSamples;
/// Sample period.
int32 mSamplePeriod;
/// Sample size
int16 mSampleSize;
/// Sample kind.
uint16 mSampleKind;
};
// Read HTK formatted features from file into matrix.
template<typename Real>
bool ReadHtk(std::istream &is, Matrix<Real> *M, HtkHeader *header_ptr);
// Write (HTK format) features to file from matrix.
template<typename Real>
bool WriteHtk(std::ostream &os, const MatrixBase<Real> &M, HtkHeader htk_hdr);
// Write (CMUSphinx format) features to file from matrix.
template<typename Real>
bool WriteSphinx(std::ostream &os, const MatrixBase<Real> &M);
/// @} end of "addtogroup matrix_funcs_io"
/**
Sub-matrix representation.
Can work with sub-parts of a matrix using this class.
Note that SubMatrix is not very const-correct-- it allows you to
change the contents of a const Matrix. Be careful!
*/
template<typename Real> template<typename Real>
class SubMatrix : public MatrixBase<Real> { class SubMatrix : public MatrixBase<Real> {
...@@ -1012,6 +743,7 @@ class SubMatrix : public MatrixBase<Real> { ...@@ -1012,6 +743,7 @@ class SubMatrix : public MatrixBase<Real> {
/// Disallow assignment. /// Disallow assignment.
SubMatrix<Real> &operator = (const SubMatrix<Real> &other); SubMatrix<Real> &operator = (const SubMatrix<Real> &other);
}; };
/// @} End of "addtogroup matrix_funcs_io". /// @} End of "addtogroup matrix_funcs_io".
/// \addtogroup matrix_funcs_scalar /// \addtogroup matrix_funcs_scalar
...@@ -1019,7 +751,7 @@ class SubMatrix : public MatrixBase<Real> { ...@@ -1019,7 +751,7 @@ class SubMatrix : public MatrixBase<Real> {
// Some declarations. These are traces of products. // Some declarations. These are traces of products.
/************************
template<typename Real> template<typename Real>
bool ApproxEqual(const MatrixBase<Real> &A, bool ApproxEqual(const MatrixBase<Real> &A,
const MatrixBase<Real> &B, Real tol = 0.01) { const MatrixBase<Real> &B, Real tol = 0.01) {
...@@ -1085,7 +817,7 @@ void CreateEigenvalueMatrix(const VectorBase<Real> &real, const VectorBase<Real> ...@@ -1085,7 +817,7 @@ void CreateEigenvalueMatrix(const VectorBase<Real> &real, const VectorBase<Real>
template<typename Real> template<typename Real>
bool AttemptComplexPower(Real *x_re, Real *x_im, Real power); bool AttemptComplexPower(Real *x_re, Real *x_im, Real power);
**********/
/// @} end of addtogroup matrix_funcs_misc /// @} end of addtogroup matrix_funcs_misc
...@@ -1101,7 +833,6 @@ std::istream & operator >> (std::istream & In, MatrixBase<Real> & M); ...@@ -1101,7 +833,6 @@ std::istream & operator >> (std::istream & In, MatrixBase<Real> & M);
template<typename Real> template<typename Real>
std::istream & operator >> (std::istream & In, Matrix<Real> & M); std::istream & operator >> (std::istream & In, Matrix<Real> & M);
template<typename Real> template<typename Real>
bool SameDim(const MatrixBase<Real> &M, const MatrixBase<Real> &N) { bool SameDim(const MatrixBase<Real> &M, const MatrixBase<Real> &N) {
return (M.NumRows() == N.NumRows() && M.NumCols() == N.NumCols()); return (M.NumRows() == N.NumRows() && M.NumCols() == N.NumCols());
......
...@@ -44,14 +44,14 @@ std::istream &operator >> (std::istream &is, Vector<Real> &rv) { ...@@ -44,14 +44,14 @@ std::istream &operator >> (std::istream &is, Vector<Real> &rv) {
return is; return is;
} }
template<> //template<>
template<> //template<>
void VectorBase<float>::AddVec(const float alpha, const VectorBase<float> &rv); //void VectorBase<float>::AddVec(const float alpha, const VectorBase<float> &rv);
template<> //template<>
template<> //template<>
void VectorBase<double>::AddVec<double>(const double alpha, //void VectorBase<double>::AddVec<double>(const double alpha,
const VectorBase<double> &rv); //const VectorBase<double> &rv);
} // namespace kaldi } // namespace kaldi
......
...@@ -25,144 +25,11 @@ ...@@ -25,144 +25,11 @@
#include <algorithm> #include <algorithm>
#include <string> #include <string>
#include "matrix/cblas-wrappers.h"
#include "matrix/kaldi-vector.h" #include "matrix/kaldi-vector.h"
#include "matrix/kaldi-matrix.h" #include "matrix/kaldi-matrix.h"
#include "matrix/sp-matrix.h"
#include "matrix/sparse-matrix.h"
namespace kaldi { namespace kaldi {
template<typename Real>
Real VecVec(const VectorBase<Real> &a,
const VectorBase<Real> &b) {
MatrixIndexT adim = a.Dim();
KALDI_ASSERT(adim == b.Dim());
return cblas_Xdot(adim, a.Data(), 1, b.Data(), 1);
}
template
float VecVec<>(const VectorBase<float> &a,
const VectorBase<float> &b);
template
double VecVec<>(const VectorBase<double> &a,
const VectorBase<double> &b);
template<typename Real, typename OtherReal>
Real VecVec(const VectorBase<Real> &ra,
const VectorBase<OtherReal> &rb) {
MatrixIndexT adim = ra.Dim();
KALDI_ASSERT(adim == rb.Dim());
const Real *a_data = ra.Data();
const OtherReal *b_data = rb.Data();
Real sum = 0.0;
for (MatrixIndexT i = 0; i < adim; i++)
sum += a_data[i]*b_data[i];
return sum;
}
// instantiate the template above.
template
float VecVec<>(const VectorBase<float> &ra,
const VectorBase<double> &rb);
template
double VecVec<>(const VectorBase<double> &ra,
const VectorBase<float> &rb);
template<>
template<>
void VectorBase<float>::AddVec(const float alpha,
const VectorBase<float> &v) {
KALDI_ASSERT(dim_ == v.dim_);
KALDI_ASSERT(&v != this);
cblas_Xaxpy(dim_, alpha, v.Data(), 1, data_, 1);
}
template<>
template<>
void VectorBase<double>::AddVec(const double alpha,
const VectorBase<double> &v) {
KALDI_ASSERT(dim_ == v.dim_);
KALDI_ASSERT(&v != this);
cblas_Xaxpy(dim_, alpha, v.Data(), 1, data_, 1);
}
template<typename Real>
void VectorBase<Real>::AddMatVec(const Real alpha,
const MatrixBase<Real> &M,
MatrixTransposeType trans,
const VectorBase<Real> &v,
const Real beta) {
KALDI_ASSERT((trans == kNoTrans && M.NumCols() == v.dim_ && M.NumRows() == dim_)
|| (trans == kTrans && M.NumRows() == v.dim_ && M.NumCols() == dim_));
KALDI_ASSERT(&v != this);
cblas_Xgemv(trans, M.NumRows(), M.NumCols(), alpha, M.Data(), M.Stride(),
v.Data(), 1, beta, data_, 1);
}
template<typename Real>
void VectorBase<Real>::AddMatSvec(const Real alpha,
const MatrixBase<Real> &M,
MatrixTransposeType trans,
const VectorBase<Real> &v,
const Real beta) {
KALDI_ASSERT((trans == kNoTrans && M.NumCols() == v.dim_ && M.NumRows() == dim_)
|| (trans == kTrans && M.NumRows() == v.dim_ && M.NumCols() == dim_));
KALDI_ASSERT(&v != this);
Xgemv_sparsevec(trans, M.NumRows(), M.NumCols(), alpha, M.Data(), M.Stride(),
v.Data(), 1, beta, data_, 1);
return;
/*
MatrixIndexT this_dim = this->dim_, v_dim = v.dim_,
M_stride = M.Stride();
Real *this_data = this->data_;
const Real *M_data = M.Data(), *v_data = v.data_;
if (beta != 1.0) this->Scale(beta);
if (trans == kNoTrans) {
for (MatrixIndexT i = 0; i < v_dim; i++) {
Real v_i = v_data[i];
if (v_i == 0.0) continue;
// Add to *this, the i'th column of the Matrix, times v_i.
cblas_Xaxpy(this_dim, v_i * alpha, M_data + i, M_stride, this_data, 1);
}
} else { // The transposed case is slightly more efficient, I guess.
for (MatrixIndexT i = 0; i < v_dim; i++) {
Real v_i = v.data_[i];
if (v_i == 0.0) continue;
// Add to *this, the i'th row of the Matrix, times v_i.
cblas_Xaxpy(this_dim, v_i * alpha,
M_data + (i * M_stride), 1, this_data, 1);
}
}*/
}
template<typename Real>
void VectorBase<Real>::AddSpVec(const Real alpha,
const SpMatrix<Real> &M,
const VectorBase<Real> &v,
const Real beta) {
KALDI_ASSERT(M.NumRows() == v.dim_ && dim_ == v.dim_);
KALDI_ASSERT(&v != this);
cblas_Xspmv(alpha, M.NumRows(), M.Data(), v.Data(), 1, beta, data_, 1);
}
template<typename Real>
void VectorBase<Real>::MulTp(const TpMatrix<Real> &M,
const MatrixTransposeType trans) {
KALDI_ASSERT(M.NumRows() == dim_);
cblas_Xtpmv(trans,M.Data(),M.NumRows(),data_,1);
}
template<typename Real>
void VectorBase<Real>::Solve(const TpMatrix<Real> &M,
const MatrixTransposeType trans) {
KALDI_ASSERT(M.NumRows() == dim_);
cblas_Xtpsv(trans, M.Data(), M.NumRows(), data_, 1);
}
template<typename Real> template<typename Real>
inline void Vector<Real>::Init(const MatrixIndexT dim) { inline void Vector<Real>::Init(const MatrixIndexT dim) {
KALDI_ASSERT(dim >= 0); KALDI_ASSERT(dim >= 0);
...@@ -232,6 +99,7 @@ void VectorBase<Real>::CopyFromVec(const VectorBase<Real> &v) { ...@@ -232,6 +99,7 @@ void VectorBase<Real>::CopyFromVec(const VectorBase<Real> &v) {
} }
} }
/*
template<typename Real> template<typename Real>
template<typename OtherReal> template<typename OtherReal>
void VectorBase<Real>::CopyFromPacked(const PackedMatrix<OtherReal>& M) { void VectorBase<Real>::CopyFromPacked(const PackedMatrix<OtherReal>& M) {
...@@ -249,7 +117,7 @@ template<typename Real> ...@@ -249,7 +117,7 @@ template<typename Real>
void VectorBase<Real>::CopyFromPtr(const Real *data, MatrixIndexT sz) { void VectorBase<Real>::CopyFromPtr(const Real *data, MatrixIndexT sz) {
KALDI_ASSERT(dim_ == sz); KALDI_ASSERT(dim_ == sz);
std::memcpy(this->data_, data, Dim() * sizeof(Real)); std::memcpy(this->data_, data, Dim() * sizeof(Real));
} }*/
template<typename Real> template<typename Real>
template<typename OtherReal> template<typename OtherReal>
...@@ -297,6 +165,7 @@ bool VectorBase<Real>::IsZero(Real cutoff) const { ...@@ -297,6 +165,7 @@ bool VectorBase<Real>::IsZero(Real cutoff) const {
return (abs_max <= cutoff); return (abs_max <= cutoff);
} }
/*
template<typename Real> template<typename Real>
void VectorBase<Real>::SetRandn() { void VectorBase<Real>::SetRandn() {
kaldi::RandomState rstate; kaldi::RandomState rstate;
...@@ -330,7 +199,7 @@ MatrixIndexT VectorBase<Real>::RandCategorical() const { ...@@ -330,7 +199,7 @@ MatrixIndexT VectorBase<Real>::RandCategorical() const {
} }
return dim_ - 1; // Should only happen if RandUniform() return dim_ - 1; // Should only happen if RandUniform()
// returns exactly 1, or due to roundoff. // returns exactly 1, or due to roundoff.
} }*/
template<typename Real> template<typename Real>
void VectorBase<Real>::Set(Real f) { void VectorBase<Real>::Set(Real f) {
...@@ -426,6 +295,7 @@ void VectorBase<float>::CopyRowFromMat(const MatrixBase<double> &mat, MatrixInde ...@@ -426,6 +295,7 @@ void VectorBase<float>::CopyRowFromMat(const MatrixBase<double> &mat, MatrixInde
template template
void VectorBase<double>::CopyRowFromMat(const MatrixBase<float> &mat, MatrixIndexT row); void VectorBase<double>::CopyRowFromMat(const MatrixBase<float> &mat, MatrixIndexT row);
/*
template<typename Real> template<typename Real>
template<typename OtherReal> template<typename OtherReal>
void VectorBase<Real>::CopyRowFromSp(const SpMatrix<OtherReal> &sp, MatrixIndexT row) { void VectorBase<Real>::CopyRowFromSp(const SpMatrix<OtherReal> &sp, MatrixIndexT row) {
...@@ -451,28 +321,6 @@ void VectorBase<float>::CopyRowFromSp(const SpMatrix<float> &mat, MatrixIndexT r ...@@ -451,28 +321,6 @@ void VectorBase<float>::CopyRowFromSp(const SpMatrix<float> &mat, MatrixIndexT r
template template
void VectorBase<double>::CopyRowFromSp(const SpMatrix<double> &mat, MatrixIndexT row); void VectorBase<double>::CopyRowFromSp(const SpMatrix<double> &mat, MatrixIndexT row);
#ifdef HAVE_MKL
template<>
void VectorBase<float>::Pow(const VectorBase<float> &v, float power) {
vsPowx(dim_, data_, power, v.data_);
}
template<>
void VectorBase<double>::Pow(const VectorBase<double> &v, double power) {
vdPowx(dim_, data_, power, v.data_);
}
#else
// takes elements to a power. Does not check output.
template<typename Real>
void VectorBase<Real>::Pow(const VectorBase<Real> &v, Real power) {
KALDI_ASSERT(dim_ == v.dim_);
for (MatrixIndexT i = 0; i < dim_; i++) {
data_[i] = pow(v.data_[i], power);
}
}
#endif
// takes absolute value of the elements to a power. // takes absolute value of the elements to a power.
// Throws exception if could not (but only for power != 1 and power != 2). // Throws exception if could not (but only for power != 1 and power != 2).
template<typename Real> template<typename Real>
...@@ -648,7 +496,7 @@ Real VectorBase<Real>::Min(MatrixIndexT *index_out) const { ...@@ -648,7 +496,7 @@ Real VectorBase<Real>::Min(MatrixIndexT *index_out) const {
if (data[i] < ans) { ans = data[i]; index = i; } if (data[i] < ans) { ans = data[i]; index = i; }
*index_out = index; *index_out = index;
return ans; return ans;
} }*/
template<typename Real> template<typename Real>
...@@ -670,434 +518,424 @@ void VectorBase<double>::CopyColFromMat(const MatrixBase<float> &mat, MatrixInde ...@@ -670,434 +518,424 @@ void VectorBase<double>::CopyColFromMat(const MatrixBase<float> &mat, MatrixInde
template template
void VectorBase<double>::CopyColFromMat(const MatrixBase<double> &mat, MatrixIndexT col); void VectorBase<double>::CopyColFromMat(const MatrixBase<double> &mat, MatrixIndexT col);
template<typename Real> //template<typename Real>
void VectorBase<Real>::CopyDiagFromMat(const MatrixBase<Real> &M) { //void VectorBase<Real>::CopyDiagFromMat(const MatrixBase<Real> &M) {
KALDI_ASSERT(dim_ == std::min(M.NumRows(), M.NumCols())); //KALDI_ASSERT(dim_ == std::min(M.NumRows(), M.NumCols()));
cblas_Xcopy(dim_, M.Data(), M.Stride() + 1, data_, 1); //cblas_Xcopy(dim_, M.Data(), M.Stride() + 1, data_, 1);
} //}
template<typename Real> //template<typename Real>
void VectorBase<Real>::CopyDiagFromPacked(const PackedMatrix<Real> &M) { //void VectorBase<Real>::CopyDiagFromPacked(const PackedMatrix<Real> &M) {
KALDI_ASSERT(dim_ == M.NumCols()); //KALDI_ASSERT(dim_ == M.NumCols());
for (MatrixIndexT i = 0; i < dim_; i++) //for (MatrixIndexT i = 0; i < dim_; i++)
data_[i] = M(i, i); //data_[i] = M(i, i);
// could make this more efficient. //// could make this more efficient.
} //}
template<typename Real> //template<typename Real>
Real VectorBase<Real>::Sum() const { //Real VectorBase<Real>::Sum() const {
// Do a dot-product with a size-1 array with a stride of 0 to //// Do a dot-product with a size-1 array with a stride of 0 to
// implement sum. This allows us to access SIMD operations in a //// implement sum. This allows us to access SIMD operations in a
// cross-platform way via your BLAS library. //// cross-platform way via your BLAS library.
Real one(1); //Real one(1);
return cblas_Xdot(dim_, data_, 1, &one, 0); //return cblas_Xdot(dim_, data_, 1, &one, 0);
} //}
template<typename Real> //template<typename Real>
Real VectorBase<Real>::SumLog() const { //Real VectorBase<Real>::SumLog() const {
double sum_log = 0.0; //double sum_log = 0.0;
double prod = 1.0; //double prod = 1.0;
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
prod *= data_[i]; //prod *= data_[i];
// Possible future work (arnab): change these magic values to pre-defined //// Possible future work (arnab): change these magic values to pre-defined
// constants //// constants
if (prod < 1.0e-10 || prod > 1.0e+10) { //if (prod < 1.0e-10 || prod > 1.0e+10) {
sum_log += Log(prod); //sum_log += Log(prod);
prod = 1.0; //prod = 1.0;
} //}
} //}
if (prod != 1.0) sum_log += Log(prod); //if (prod != 1.0) sum_log += Log(prod);
return sum_log; //return sum_log;
} //}
template<typename Real> //template<typename Real>
void VectorBase<Real>::AddRowSumMat(Real alpha, const MatrixBase<Real> &M, Real beta) { //void VectorBase<Real>::AddRowSumMat(Real alpha, const MatrixBase<Real> &M, Real beta) {
KALDI_ASSERT(dim_ == M.NumCols()); //KALDI_ASSERT(dim_ == M.NumCols());
MatrixIndexT num_rows = M.NumRows(), stride = M.Stride(), dim = dim_; //MatrixIndexT num_rows = M.NumRows(), stride = M.Stride(), dim = dim_;
Real *data = data_; //Real *data = data_;
// implement the function according to a dimension cutoff for computation efficiency //// implement the function according to a dimension cutoff for computation efficiency
if (num_rows <= 64) { //if (num_rows <= 64) {
cblas_Xscal(dim, beta, data, 1); //cblas_Xscal(dim, beta, data, 1);
const Real *m_data = M.Data(); //const Real *m_data = M.Data();
for (MatrixIndexT i = 0; i < num_rows; i++, m_data += stride) //for (MatrixIndexT i = 0; i < num_rows; i++, m_data += stride)
cblas_Xaxpy(dim, alpha, m_data, 1, data, 1); //cblas_Xaxpy(dim, alpha, m_data, 1, data, 1);
} else { //} else {
Vector<Real> ones(M.NumRows()); //Vector<Real> ones(M.NumRows());
ones.Set(1.0); //ones.Set(1.0);
this->AddMatVec(alpha, M, kTrans, ones, beta); //this->AddMatVec(alpha, M, kTrans, ones, beta);
} //}
} //}
template<typename Real> //template<typename Real>
void VectorBase<Real>::AddColSumMat(Real alpha, const MatrixBase<Real> &M, Real beta) { //void VectorBase<Real>::AddColSumMat(Real alpha, const MatrixBase<Real> &M, Real beta) {
KALDI_ASSERT(dim_ == M.NumRows()); //KALDI_ASSERT(dim_ == M.NumRows());
MatrixIndexT num_cols = M.NumCols(); //MatrixIndexT num_cols = M.NumCols();
// implement the function according to a dimension cutoff for computation efficiency //// implement the function according to a dimension cutoff for computation efficiency
if (num_cols <= 64) { //if (num_cols <= 64) {
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
double sum = 0.0; //double sum = 0.0;
const Real *src = M.RowData(i); //const Real *src = M.RowData(i);
for (MatrixIndexT j = 0; j < num_cols; j++) //for (MatrixIndexT j = 0; j < num_cols; j++)
sum += src[j]; //sum += src[j];
data_[i] = alpha * sum + beta * data_[i]; //data_[i] = alpha * sum + beta * data_[i];
} //}
} else { //} else {
Vector<Real> ones(M.NumCols()); //Vector<Real> ones(M.NumCols());
ones.Set(1.0); //ones.Set(1.0);
this->AddMatVec(alpha, M, kNoTrans, ones, beta); //this->AddMatVec(alpha, M, kNoTrans, ones, beta);
} //}
} //}
template<typename Real> //template<typename Real>
Real VectorBase<Real>::LogSumExp(Real prune) const { //Real VectorBase<Real>::LogSumExp(Real prune) const {
Real sum; //Real sum;
if (sizeof(sum) == 8) sum = kLogZeroDouble; //if (sizeof(sum) == 8) sum = kLogZeroDouble;
else sum = kLogZeroFloat; //else sum = kLogZeroFloat;
Real max_elem = Max(), cutoff; //Real max_elem = Max(), cutoff;
if (sizeof(Real) == 4) cutoff = max_elem + kMinLogDiffFloat; //if (sizeof(Real) == 4) cutoff = max_elem + kMinLogDiffFloat;
else cutoff = max_elem + kMinLogDiffDouble; //else cutoff = max_elem + kMinLogDiffDouble;
if (prune > 0.0 && max_elem - prune > cutoff) // explicit pruning... //if (prune > 0.0 && max_elem - prune > cutoff) // explicit pruning...
cutoff = max_elem - prune; //cutoff = max_elem - prune;
double sum_relto_max_elem = 0.0; //double sum_relto_max_elem = 0.0;
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
BaseFloat f = data_[i]; //BaseFloat f = data_[i];
if (f >= cutoff) //if (f >= cutoff)
sum_relto_max_elem += Exp(f - max_elem); //sum_relto_max_elem += Exp(f - max_elem);
} //}
return max_elem + Log(sum_relto_max_elem); //return max_elem + Log(sum_relto_max_elem);
} //}
template<typename Real> //template<typename Real>
void VectorBase<Real>::InvertElements() { //void VectorBase<Real>::InvertElements() {
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
data_[i] = static_cast<Real>(1 / data_[i]); //data_[i] = static_cast<Real>(1 / data_[i]);
} //}
} //}
template<typename Real> //template<typename Real>
void VectorBase<Real>::ApplyLog() { //void VectorBase<Real>::ApplyLog() {
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
if (data_[i] < 0.0) //if (data_[i] < 0.0)
KALDI_ERR << "Trying to take log of a negative number."; //KALDI_ERR << "Trying to take log of a negative number.";
data_[i] = Log(data_[i]); //data_[i] = Log(data_[i]);
} //}
} //}
template<typename Real> //template<typename Real>
void VectorBase<Real>::ApplyLogAndCopy(const VectorBase<Real> &v) { //void VectorBase<Real>::ApplyLogAndCopy(const VectorBase<Real> &v) {
KALDI_ASSERT(dim_ == v.Dim()); //KALDI_ASSERT(dim_ == v.Dim());
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
data_[i] = Log(v(i)); //data_[i] = Log(v(i));
} //}
} //}
template<typename Real> //template<typename Real>
void VectorBase<Real>::ApplyExp() { //void VectorBase<Real>::ApplyExp() {
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
data_[i] = Exp(data_[i]); //data_[i] = Exp(data_[i]);
} //}
} //}
template<typename Real> //template<typename Real>
void VectorBase<Real>::ApplyAbs() { //void VectorBase<Real>::ApplyAbs() {
for (MatrixIndexT i = 0; i < dim_; i++) { data_[i] = std::abs(data_[i]); } //for (MatrixIndexT i = 0; i < dim_; i++) { data_[i] = std::abs(data_[i]); }
} //}
template<typename Real> //template<typename Real>
void VectorBase<Real>::Floor(const VectorBase<Real> &v, Real floor_val, MatrixIndexT *floored_count) { //void VectorBase<Real>::Floor(const VectorBase<Real> &v, Real floor_val, MatrixIndexT *floored_count) {
KALDI_ASSERT(dim_ == v.dim_); //KALDI_ASSERT(dim_ == v.dim_);
if (floored_count == nullptr) { //if (floored_count == nullptr) {
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
data_[i] = std::max(v.data_[i], floor_val); //data_[i] = std::max(v.data_[i], floor_val);
} //}
} else { //} else {
MatrixIndexT num_floored = 0; //MatrixIndexT num_floored = 0;
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
if (v.data_[i] < floor_val) { //if (v.data_[i] < floor_val) {
data_[i] = floor_val; //data_[i] = floor_val;
num_floored++; //num_floored++;
} else { //} else {
data_[i] = v.data_[i]; //data_[i] = v.data_[i];
} //}
} //}
*floored_count = num_floored; //*floored_count = num_floored;
} //}
} //}
template<typename Real> //template<typename Real>
void VectorBase<Real>::Ceiling(const VectorBase<Real> &v, Real ceil_val, MatrixIndexT *ceiled_count) { //void VectorBase<Real>::Ceiling(const VectorBase<Real> &v, Real ceil_val, MatrixIndexT *ceiled_count) {
KALDI_ASSERT(dim_ == v.dim_); //KALDI_ASSERT(dim_ == v.dim_);
if (ceiled_count == nullptr) { //if (ceiled_count == nullptr) {
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
data_[i] = std::min(v.data_[i], ceil_val); //data_[i] = std::min(v.data_[i], ceil_val);
} //}
} else { //} else {
MatrixIndexT num_changed = 0; //MatrixIndexT num_changed = 0;
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
if (v.data_[i] > ceil_val) { //if (v.data_[i] > ceil_val) {
data_[i] = ceil_val; //data_[i] = ceil_val;
num_changed++; //num_changed++;
} else { //} else {
data_[i] = v.data_[i]; //data_[i] = v.data_[i];
} //}
} //}
*ceiled_count = num_changed; //*ceiled_count = num_changed;
} //}
} //}
template<typename Real> //template<typename Real>
MatrixIndexT VectorBase<Real>::ApplyFloor(const VectorBase<Real> &floor_vec) { //MatrixIndexT VectorBase<Real>::ApplyFloor(const VectorBase<Real> &floor_vec) {
KALDI_ASSERT(floor_vec.Dim() == dim_); //KALDI_ASSERT(floor_vec.Dim() == dim_);
MatrixIndexT num_floored = 0; //MatrixIndexT num_floored = 0;
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
if (data_[i] < floor_vec(i)) { //if (data_[i] < floor_vec(i)) {
data_[i] = floor_vec(i); //data_[i] = floor_vec(i);
num_floored++; //num_floored++;
} //}
} //}
return num_floored; //return num_floored;
} //}
template<typename Real> //template<typename Real>
Real VectorBase<Real>::ApplySoftMax() { //Real VectorBase<Real>::ApplySoftMax() {
Real max = this->Max(), sum = 0.0; //Real max = this->Max(), sum = 0.0;
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
sum += (data_[i] = Exp(data_[i] - max)); //sum += (data_[i] = Exp(data_[i] - max));
} //}
this->Scale(1.0 / sum); //this->Scale(1.0 / sum);
return max + Log(sum); //return max + Log(sum);
} //}
template<typename Real> //template<typename Real>
Real VectorBase<Real>::ApplyLogSoftMax() { //Real VectorBase<Real>::ApplyLogSoftMax() {
Real max = this->Max(), sum = 0.0; //Real max = this->Max(), sum = 0.0;
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
sum += Exp((data_[i] -= max)); //sum += Exp((data_[i] -= max));
} //}
sum = Log(sum); //sum = Log(sum);
this->Add(-1.0 * sum); //this->Add(-1.0 * sum);
return max + sum; //return max + sum;
} //}
#ifdef HAVE_MKL //#ifdef HAVE_MKL
template<> //template<>
void VectorBase<float>::Tanh(const VectorBase<float> &src) { //void VectorBase<float>::Tanh(const VectorBase<float> &src) {
KALDI_ASSERT(dim_ == src.dim_); //KALDI_ASSERT(dim_ == src.dim_);
vsTanh(dim_, src.data_, data_); //vsTanh(dim_, src.data_, data_);
} //}
template<> //template<>
void VectorBase<double>::Tanh(const VectorBase<double> &src) { //void VectorBase<double>::Tanh(const VectorBase<double> &src) {
KALDI_ASSERT(dim_ == src.dim_); //KALDI_ASSERT(dim_ == src.dim_);
vdTanh(dim_, src.data_, data_); //vdTanh(dim_, src.data_, data_);
} //}
#else //#else
template<typename Real> //template<typename Real>
void VectorBase<Real>::Tanh(const VectorBase<Real> &src) { //void VectorBase<Real>::Tanh(const VectorBase<Real> &src) {
KALDI_ASSERT(dim_ == src.dim_); //KALDI_ASSERT(dim_ == src.dim_);
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
Real x = src.data_[i]; //Real x = src.data_[i];
if (x > 0.0) { //if (x > 0.0) {
Real inv_expx = Exp(-x); //Real inv_expx = Exp(-x);
x = -1.0 + 2.0 / (1.0 + inv_expx * inv_expx); //x = -1.0 + 2.0 / (1.0 + inv_expx * inv_expx);
} else { //} else {
Real expx = Exp(x); //Real expx = Exp(x);
x = 1.0 - 2.0 / (1.0 + expx * expx); //x = 1.0 - 2.0 / (1.0 + expx * expx);
} //}
data_[i] = x; //data_[i] = x;
} //}
} //}
#endif //#endif
#ifdef HAVE_MKL //#ifdef HAVE_MKL
// Implementing sigmoid based on tanh. //// Implementing sigmoid based on tanh.
template<> //template<>
void VectorBase<float>::Sigmoid(const VectorBase<float> &src) { //void VectorBase<float>::Sigmoid(const VectorBase<float> &src) {
KALDI_ASSERT(dim_ == src.dim_); //KALDI_ASSERT(dim_ == src.dim_);
this->CopyFromVec(src); //this->CopyFromVec(src);
this->Scale(0.5); //this->Scale(0.5);
vsTanh(dim_, data_, data_); //vsTanh(dim_, data_, data_);
this->Add(1.0); //this->Add(1.0);
this->Scale(0.5); //this->Scale(0.5);
} //}
template<> //template<>
void VectorBase<double>::Sigmoid(const VectorBase<double> &src) { //void VectorBase<double>::Sigmoid(const VectorBase<double> &src) {
KALDI_ASSERT(dim_ == src.dim_); //KALDI_ASSERT(dim_ == src.dim_);
this->CopyFromVec(src); //this->CopyFromVec(src);
this->Scale(0.5); //this->Scale(0.5);
vdTanh(dim_, data_, data_); //vdTanh(dim_, data_, data_);
this->Add(1.0); //this->Add(1.0);
this->Scale(0.5); //this->Scale(0.5);
} //}
#else //#else
template<typename Real> //template<typename Real>
void VectorBase<Real>::Sigmoid(const VectorBase<Real> &src) { //void VectorBase<Real>::Sigmoid(const VectorBase<Real> &src) {
KALDI_ASSERT(dim_ == src.dim_); //KALDI_ASSERT(dim_ == src.dim_);
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
Real x = src.data_[i]; //Real x = src.data_[i];
// We aim to avoid floating-point overflow here. //// We aim to avoid floating-point overflow here.
if (x > 0.0) { //if (x > 0.0) {
x = 1.0 / (1.0 + Exp(-x)); //x = 1.0 / (1.0 + Exp(-x));
} else { //} else {
Real ex = Exp(x); //Real ex = Exp(x);
x = ex / (ex + 1.0); //x = ex / (ex + 1.0);
} //}
data_[i] = x; //data_[i] = x;
} //}
} //}
#endif //#endif
template<typename Real> //template<typename Real>
void VectorBase<Real>::Add(Real c) { //void VectorBase<Real>::Add(Real c) {
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
data_[i] += c; //data_[i] += c;
} //}
} //}
template<typename Real> //template<typename Real>
void VectorBase<Real>::Scale(Real alpha) { //void VectorBase<Real>::Scale(Real alpha) {
cblas_Xscal(dim_, alpha, data_, 1); //cblas_Xscal(dim_, alpha, data_, 1);
} //}
template<typename Real> //template<typename Real>
void VectorBase<Real>::MulElements(const VectorBase<Real> &v) { //void VectorBase<Real>::MulElements(const VectorBase<Real> &v) {
KALDI_ASSERT(dim_ == v.dim_); //KALDI_ASSERT(dim_ == v.dim_);
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
data_[i] *= v.data_[i]; //data_[i] *= v.data_[i];
} //}
} //}
template<typename Real> // Set each element to y = (x == orig ? changed : x). //template<typename Real> // Set each element to y = (x == orig ? changed : x).
void VectorBase<Real>::ReplaceValue(Real orig, Real changed) { //void VectorBase<Real>::ReplaceValue(Real orig, Real changed) {
Real *data = data_; //Real *data = data_;
for (MatrixIndexT i = 0; i < dim_; i++) //for (MatrixIndexT i = 0; i < dim_; i++)
if (data[i] == orig) data[i] = changed; //if (data[i] == orig) data[i] = changed;
} //}
template<typename Real> //template<typename Real>
template<typename OtherReal> //template<typename OtherReal>
void VectorBase<Real>::MulElements(const VectorBase<OtherReal> &v) { //void VectorBase<Real>::MulElements(const VectorBase<OtherReal> &v) {
KALDI_ASSERT(dim_ == v.Dim()); //KALDI_ASSERT(dim_ == v.Dim());
const OtherReal *other_ptr = v.Data(); //const OtherReal *other_ptr = v.Data();
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
data_[i] *= other_ptr[i]; //data_[i] *= other_ptr[i];
} //}
} //}
// instantiate template. //// instantiate template.
template //template
void VectorBase<float>::MulElements(const VectorBase<double> &v); //void VectorBase<float>::MulElements(const VectorBase<double> &v);
template //template
void VectorBase<double>::MulElements(const VectorBase<float> &v); //void VectorBase<double>::MulElements(const VectorBase<float> &v);
template<typename Real> //template<typename Real>
void VectorBase<Real>::AddVecVec(Real alpha, const VectorBase<Real> &v, //void VectorBase<Real>::AddVecVec(Real alpha, const VectorBase<Real> &v,
const VectorBase<Real> &r, Real beta) { //const VectorBase<Real> &r, Real beta) {
KALDI_ASSERT(v.data_ != this->data_ && r.data_ != this->data_); //KALDI_ASSERT(v.data_ != this->data_ && r.data_ != this->data_);
// We pretend that v is a band-diagonal matrix. //// We pretend that v is a band-diagonal matrix.
KALDI_ASSERT(dim_ == v.dim_ && dim_ == r.dim_); //KALDI_ASSERT(dim_ == v.dim_ && dim_ == r.dim_);
cblas_Xgbmv(kNoTrans, dim_, dim_, 0, 0, alpha, v.data_, 1, //cblas_Xgbmv(kNoTrans, dim_, dim_, 0, 0, alpha, v.data_, 1,
r.data_, 1, beta, this->data_, 1); //r.data_, 1, beta, this->data_, 1);
} //}
template<typename Real> //template<typename Real>
void VectorBase<Real>::DivElements(const VectorBase<Real> &v) { //void VectorBase<Real>::DivElements(const VectorBase<Real> &v) {
KALDI_ASSERT(dim_ == v.dim_); //KALDI_ASSERT(dim_ == v.dim_);
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
data_[i] /= v.data_[i]; //data_[i] /= v.data_[i];
} //}
} //}
template<typename Real> //template<typename Real>
template<typename OtherReal> //template<typename OtherReal>
void VectorBase<Real>::DivElements(const VectorBase<OtherReal> &v) { //void VectorBase<Real>::DivElements(const VectorBase<OtherReal> &v) {
KALDI_ASSERT(dim_ == v.Dim()); //KALDI_ASSERT(dim_ == v.Dim());
const OtherReal *other_ptr = v.Data(); //const OtherReal *other_ptr = v.Data();
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
data_[i] /= other_ptr[i]; //data_[i] /= other_ptr[i];
} //}
} //}
// instantiate template. //// instantiate template.
template //template
void VectorBase<float>::DivElements(const VectorBase<double> &v); //void VectorBase<float>::DivElements(const VectorBase<double> &v);
template //template
void VectorBase<double>::DivElements(const VectorBase<float> &v); //void VectorBase<double>::DivElements(const VectorBase<float> &v);
template<typename Real> //template<typename Real>
void VectorBase<Real>::AddVecDivVec(Real alpha, const VectorBase<Real> &v, //void VectorBase<Real>::AddVecDivVec(Real alpha, const VectorBase<Real> &v,
const VectorBase<Real> &rr, Real beta) { //const VectorBase<Real> &rr, Real beta) {
KALDI_ASSERT((dim_ == v.dim_ && dim_ == rr.dim_)); //KALDI_ASSERT((dim_ == v.dim_ && dim_ == rr.dim_));
for (MatrixIndexT i = 0; i < dim_; i++) { //for (MatrixIndexT i = 0; i < dim_; i++) {
data_[i] = alpha * v.data_[i]/rr.data_[i] + beta * data_[i] ; //data_[i] = alpha * v.data_[i]/rr.data_[i] + beta * data_[i] ;
} //}
} //}
template<typename Real> //template<typename Real>
template<typename OtherReal> //template<typename OtherReal>
void VectorBase<Real>::AddVec(const Real alpha, const VectorBase<OtherReal> &v) { //void VectorBase<Real>::AddVec(const Real alpha, const VectorBase<OtherReal> &v) {
KALDI_ASSERT(dim_ == v.dim_); //KALDI_ASSERT(dim_ == v.dim_);
// remove __restrict__ if it causes compilation problems. //// remove __restrict__ if it causes compilation problems.
Real *__restrict__ data = data_; //Real *__restrict__ data = data_;
OtherReal *__restrict__ other_data = v.data_; //OtherReal *__restrict__ other_data = v.data_;
MatrixIndexT dim = dim_; //MatrixIndexT dim = dim_;
if (alpha != 1.0) //if (alpha != 1.0)
for (MatrixIndexT i = 0; i < dim; i++) //for (MatrixIndexT i = 0; i < dim; i++)
data[i] += alpha * other_data[i]; //data[i] += alpha * other_data[i];
else //else
for (MatrixIndexT i = 0; i < dim; i++) //for (MatrixIndexT i = 0; i < dim; i++)
data[i] += other_data[i]; //data[i] += other_data[i];
} //}
template //template
void VectorBase<float>::AddVec(const float alpha, const VectorBase<double> &v); //void VectorBase<float>::AddVec(const float alpha, const VectorBase<double> &v);
template //template
void VectorBase<double>::AddVec(const double alpha, const VectorBase<float> &v); //void VectorBase<double>::AddVec(const double alpha, const VectorBase<float> &v);
template<typename Real> //template<typename Real>
template<typename OtherReal> //template<typename OtherReal>
void VectorBase<Real>::AddVec2(const Real alpha, const VectorBase<OtherReal> &v) { //void VectorBase<Real>::AddVec2(const Real alpha, const VectorBase<OtherReal> &v) {
KALDI_ASSERT(dim_ == v.dim_); //KALDI_ASSERT(dim_ == v.dim_);
// remove __restrict__ if it causes compilation problems. //// remove __restrict__ if it causes compilation problems.
Real *__restrict__ data = data_; //Real *__restrict__ data = data_;
OtherReal *__restrict__ other_data = v.data_; //OtherReal *__restrict__ other_data = v.data_;
MatrixIndexT dim = dim_; //MatrixIndexT dim = dim_;
if (alpha != 1.0) //if (alpha != 1.0)
for (MatrixIndexT i = 0; i < dim; i++) //for (MatrixIndexT i = 0; i < dim; i++)
data[i] += alpha * other_data[i] * other_data[i]; //data[i] += alpha * other_data[i] * other_data[i];
else //else
for (MatrixIndexT i = 0; i < dim; i++) //for (MatrixIndexT i = 0; i < dim; i++)
data[i] += other_data[i] * other_data[i]; //data[i] += other_data[i] * other_data[i];
} //}
template //template
void VectorBase<float>::AddVec2(const float alpha, const VectorBase<double> &v); //void VectorBase<float>::AddVec2(const float alpha, const VectorBase<double> &v);
template //template
void VectorBase<double>::AddVec2(const double alpha, const VectorBase<float> &v); //void VectorBase<double>::AddVec2(const double alpha, const VectorBase<float> &v);
template<typename Real> template<typename Real>
void VectorBase<Real>::Read(std::istream &is, bool binary, bool add) { void VectorBase<Real>::Read(std::istream &is, bool binary) {
if (add) {
Vector<Real> tmp(Dim());
tmp.Read(is, binary, false); // read without adding.
if (this->Dim() != tmp.Dim()) {
KALDI_ERR << "VectorBase::Read, size mismatch " << this->Dim()<<" vs. "<<tmp.Dim();
}
this->AddVec(1.0, tmp);
return;
} // now assume add == false.
// In order to avoid rewriting this, we just declare a Vector and // In order to avoid rewriting this, we just declare a Vector and
// use it to read the data, then copy. // use it to read the data, then copy.
Vector<Real> tmp; Vector<Real> tmp;
tmp.Read(is, binary, false); tmp.Read(is, binary);
if (tmp.Dim() != Dim()) if (tmp.Dim() != Dim())
KALDI_ERR << "VectorBase<Real>::Read, size mismatch " KALDI_ERR << "VectorBase<Real>::Read, size mismatch "
<< Dim() << " vs. " << tmp.Dim(); << Dim() << " vs. " << tmp.Dim();
...@@ -1106,19 +944,7 @@ void VectorBase<Real>::Read(std::istream &is, bool binary, bool add) { ...@@ -1106,19 +944,7 @@ void VectorBase<Real>::Read(std::istream &is, bool binary, bool add) {
template<typename Real> template<typename Real>
void Vector<Real>::Read(std::istream &is, bool binary, bool add) { void Vector<Real>::Read(std::istream &is, bool binary) {
if (add) {
Vector<Real> tmp(this->Dim());
tmp.Read(is, binary, false); // read without adding.
if (this->Dim() == 0) this->Resize(tmp.Dim());
if (this->Dim() != tmp.Dim()) {
KALDI_ERR << "Vector<Real>::Read, adding but dimensions mismatch "
<< this->Dim() << " vs. " << tmp.Dim();
}
this->AddVec(1.0, tmp);
return;
} // now assume add == false.
std::ostringstream specific_error; std::ostringstream specific_error;
MatrixIndexT pos_at_start = is.tellg(); MatrixIndexT pos_at_start = is.tellg();
...@@ -1129,7 +955,7 @@ void Vector<Real>::Read(std::istream &is, bool binary, bool add) { ...@@ -1129,7 +955,7 @@ void Vector<Real>::Read(std::istream &is, bool binary, bool add) {
if (peekval == other_token_start) { // need to instantiate the other type to read it. if (peekval == other_token_start) { // need to instantiate the other type to read it.
typedef typename OtherReal<Real>::Real OtherType; // if Real == float, OtherType == double, and vice versa. typedef typename OtherReal<Real>::Real OtherType; // if Real == float, OtherType == double, and vice versa.
Vector<OtherType> other(this->Dim()); Vector<OtherType> other(this->Dim());
other.Read(is, binary, false); // add is false at this point. other.Read(is, binary); // add is false at this point.
if (this->Dim() != other.Dim()) this->Resize(other.Dim()); if (this->Dim() != other.Dim()) this->Resize(other.Dim());
this->CopyFromVec(other); this->CopyFromVec(other);
return; return;
...@@ -1251,47 +1077,47 @@ void VectorBase<Real>::Write(std::ostream & os, bool binary) const { ...@@ -1251,47 +1077,47 @@ void VectorBase<Real>::Write(std::ostream & os, bool binary) const {
} }
template<typename Real> //template<typename Real>
void VectorBase<Real>::AddVec2(const Real alpha, const VectorBase<Real> &v) { //void VectorBase<Real>::AddVec2(const Real alpha, const VectorBase<Real> &v) {
KALDI_ASSERT(dim_ == v.dim_); //KALDI_ASSERT(dim_ == v.dim_);
for (MatrixIndexT i = 0; i < dim_; i++) //for (MatrixIndexT i = 0; i < dim_; i++)
data_[i] += alpha * v.data_[i] * v.data_[i]; //data_[i] += alpha * v.data_[i] * v.data_[i];
} //}
// this <-- beta*this + alpha*M*v. //// this <-- beta*this + alpha*M*v.
template<typename Real> //template<typename Real>
void VectorBase<Real>::AddTpVec(const Real alpha, const TpMatrix<Real> &M, //void VectorBase<Real>::AddTpVec(const Real alpha, const TpMatrix<Real> &M,
const MatrixTransposeType trans, //const MatrixTransposeType trans,
const VectorBase<Real> &v, //const VectorBase<Real> &v,
const Real beta) { //const Real beta) {
KALDI_ASSERT(dim_ == v.dim_ && dim_ == M.NumRows()); //KALDI_ASSERT(dim_ == v.dim_ && dim_ == M.NumRows());
if (beta == 0.0) { //if (beta == 0.0) {
if (&v != this) CopyFromVec(v); //if (&v != this) CopyFromVec(v);
MulTp(M, trans); //MulTp(M, trans);
if (alpha != 1.0) Scale(alpha); //if (alpha != 1.0) Scale(alpha);
} else { //} else {
Vector<Real> tmp(v); //Vector<Real> tmp(v);
tmp.MulTp(M, trans); //tmp.MulTp(M, trans);
if (beta != 1.0) Scale(beta); // *this <-- beta * *this //if (beta != 1.0) Scale(beta); // *this <-- beta * *this
AddVec(alpha, tmp); // *this += alpha * M * v //AddVec(alpha, tmp); // *this += alpha * M * v
} //}
} //}
template<typename Real> //template<typename Real>
Real VecMatVec(const VectorBase<Real> &v1, const MatrixBase<Real> &M, //Real VecMatVec(const VectorBase<Real> &v1, const MatrixBase<Real> &M,
const VectorBase<Real> &v2) { //const VectorBase<Real> &v2) {
KALDI_ASSERT(v1.Dim() == M.NumRows() && v2.Dim() == M.NumCols()); //KALDI_ASSERT(v1.Dim() == M.NumRows() && v2.Dim() == M.NumCols());
Vector<Real> vtmp(M.NumRows()); //Vector<Real> vtmp(M.NumRows());
vtmp.AddMatVec(1.0, M, kNoTrans, v2, 0.0); //vtmp.AddMatVec(1.0, M, kNoTrans, v2, 0.0);
return VecVec(v1, vtmp); //return VecVec(v1, vtmp);
} //}
template //template
float VecMatVec(const VectorBase<float> &v1, const MatrixBase<float> &M, //float VecMatVec(const VectorBase<float> &v1, const MatrixBase<float> &M,
const VectorBase<float> &v2); //const VectorBase<float> &v2);
template //template
double VecMatVec(const VectorBase<double> &v1, const MatrixBase<double> &M, //double VecMatVec(const VectorBase<double> &v1, const MatrixBase<double> &M,
const VectorBase<double> &v2); //const VectorBase<double> &v2);
template<typename Real> template<typename Real>
void Vector<Real>::Swap(Vector<Real> *other) { void Vector<Real>::Swap(Vector<Real> *other) {
...@@ -1300,51 +1126,51 @@ void Vector<Real>::Swap(Vector<Real> *other) { ...@@ -1300,51 +1126,51 @@ void Vector<Real>::Swap(Vector<Real> *other) {
} }
template<typename Real> //template<typename Real>
void VectorBase<Real>::AddDiagMat2( //void VectorBase<Real>::AddDiagMat2(
Real alpha, const MatrixBase<Real> &M, //Real alpha, const MatrixBase<Real> &M,
MatrixTransposeType trans, Real beta) { //MatrixTransposeType trans, Real beta) {
if (trans == kNoTrans) { //if (trans == kNoTrans) {
KALDI_ASSERT(this->dim_ == M.NumRows()); //KALDI_ASSERT(this->dim_ == M.NumRows());
MatrixIndexT rows = this->dim_, cols = M.NumCols(), //MatrixIndexT rows = this->dim_, cols = M.NumCols(),
mat_stride = M.Stride(); //mat_stride = M.Stride();
Real *data = this->data_; //Real *data = this->data_;
const Real *mat_data = M.Data(); //const Real *mat_data = M.Data();
for (MatrixIndexT i = 0; i < rows; i++, mat_data += mat_stride, data++) //for (MatrixIndexT i = 0; i < rows; i++, mat_data += mat_stride, data++)
*data = beta * *data + alpha * cblas_Xdot(cols,mat_data,1,mat_data,1); //*data = beta * *data + alpha * cblas_Xdot(cols,mat_data,1,mat_data,1);
} else { //} else {
KALDI_ASSERT(this->dim_ == M.NumCols()); //KALDI_ASSERT(this->dim_ == M.NumCols());
MatrixIndexT rows = M.NumRows(), cols = this->dim_, //MatrixIndexT rows = M.NumRows(), cols = this->dim_,
mat_stride = M.Stride(); //mat_stride = M.Stride();
Real *data = this->data_; //Real *data = this->data_;
const Real *mat_data = M.Data(); //const Real *mat_data = M.Data();
for (MatrixIndexT i = 0; i < cols; i++, mat_data++, data++) //for (MatrixIndexT i = 0; i < cols; i++, mat_data++, data++)
*data = beta * *data + alpha * cblas_Xdot(rows, mat_data, mat_stride, //*data = beta * *data + alpha * cblas_Xdot(rows, mat_data, mat_stride,
mat_data, mat_stride); //mat_data, mat_stride);
} //}
} //}
template<typename Real> //template<typename Real>
void VectorBase<Real>::AddDiagMatMat( //void VectorBase<Real>::AddDiagMatMat(
Real alpha, //Real alpha,
const MatrixBase<Real> &M, MatrixTransposeType transM, //const MatrixBase<Real> &M, MatrixTransposeType transM,
const MatrixBase<Real> &N, MatrixTransposeType transN, //const MatrixBase<Real> &N, MatrixTransposeType transN,
Real beta) { //Real beta) {
MatrixIndexT dim = this->dim_, //MatrixIndexT dim = this->dim_,
M_col_dim = (transM == kTrans ? M.NumRows() : M.NumCols()), //M_col_dim = (transM == kTrans ? M.NumRows() : M.NumCols()),
N_row_dim = (transN == kTrans ? N.NumCols() : N.NumRows()); //N_row_dim = (transN == kTrans ? N.NumCols() : N.NumRows());
KALDI_ASSERT(M_col_dim == N_row_dim); // this is the dimension we sum over //KALDI_ASSERT(M_col_dim == N_row_dim); // this is the dimension we sum over
MatrixIndexT M_row_stride = M.Stride(), M_col_stride = 1; //MatrixIndexT M_row_stride = M.Stride(), M_col_stride = 1;
if (transM == kTrans) std::swap(M_row_stride, M_col_stride); //if (transM == kTrans) std::swap(M_row_stride, M_col_stride);
MatrixIndexT N_row_stride = N.Stride(), N_col_stride = 1; //MatrixIndexT N_row_stride = N.Stride(), N_col_stride = 1;
if (transN == kTrans) std::swap(N_row_stride, N_col_stride); //if (transN == kTrans) std::swap(N_row_stride, N_col_stride);
Real *data = this->data_; //Real *data = this->data_;
const Real *Mdata = M.Data(), *Ndata = N.Data(); //const Real *Mdata = M.Data(), *Ndata = N.Data();
for (MatrixIndexT i = 0; i < dim; i++, Mdata += M_row_stride, Ndata += N_col_stride, data++) { //for (MatrixIndexT i = 0; i < dim; i++, Mdata += M_row_stride, Ndata += N_col_stride, data++) {
*data = beta * *data + alpha * cblas_Xdot(M_col_dim, Mdata, M_col_stride, Ndata, N_row_stride); //*data = beta * *data + alpha * cblas_Xdot(M_col_dim, Mdata, M_col_stride, Ndata, N_row_stride);
} //}
} //}
template class Vector<float>; template class Vector<float>;
......
...@@ -49,17 +49,6 @@ class VectorBase { ...@@ -49,17 +49,6 @@ class VectorBase {
/// Set all members of a vector to a specified value. /// Set all members of a vector to a specified value.
void Set(Real f); void Set(Real f);
/// Set vector to random normally-distributed noise.
void SetRandn();
/// Sets to numbers uniformly distributed on (0,1)
void SetRandUniform();
/// This function returns a random index into this vector,
/// chosen with probability proportional to the corresponding
/// element. Requires that this->Min() >= 0 and this->Sum() > 0.
MatrixIndexT RandCategorical() const;
/// Returns the dimension of the vector. /// Returns the dimension of the vector.
inline MatrixIndexT Dim() const { return dim_; } inline MatrixIndexT Dim() const { return dim_; }
...@@ -108,178 +97,15 @@ class VectorBase { ...@@ -108,178 +97,15 @@ class VectorBase {
/// Copy data from another vector (must match own size). /// Copy data from another vector (must match own size).
void CopyFromVec(const VectorBase<Real> &v); void CopyFromVec(const VectorBase<Real> &v);
/// Copy data from a SpMatrix or TpMatrix (must match own size).
template<typename OtherReal>
void CopyFromPacked(const PackedMatrix<OtherReal> &M);
/// Copy data from another vector of different type (double vs. float) /// Copy data from another vector of different type (double vs. float)
template<typename OtherReal> template<typename OtherReal>
void CopyFromVec(const VectorBase<OtherReal> &v); void CopyFromVec(const VectorBase<OtherReal> &v);
/// Copy from CuVector. This is defined in ../cudamatrix/cu-vector.h
template<typename OtherReal>
void CopyFromVec(const CuVectorBase<OtherReal> &v);
/// Applies floor to all elements. Returns number of elements
/// floored in floored_count if it is non-null.
void Floor(const VectorBase<Real> &v, Real floor_val, MatrixIndexT *floored_count = nullptr);
/// Applies ceiling to all elements. Returns number of elements
/// changed in ceiled_count if it is non-null.
void Ceiling(const VectorBase<Real> &v, Real ceil_val, MatrixIndexT *ceiled_count = nullptr);
void Pow(const VectorBase<Real> &v, Real power);
/// Apply natural log to all elements. Throw if any element of
/// the vector is negative (but doesn't complain about zero; the
/// log will be -infinity
void ApplyLog();
/// Apply natural log to another vector and put result in *this.
void ApplyLogAndCopy(const VectorBase<Real> &v);
/// Apply exponential to each value in vector.
void ApplyExp();
/// Take absolute value of each of the elements
void ApplyAbs();
/// Applies floor to all elements. Returns number of elements
/// floored in floored_count if it is non-null.
inline void ApplyFloor(Real floor_val, MatrixIndexT *floored_count = nullptr) {
this->Floor(*this, floor_val, floored_count);
};
/// Applies ceiling to all elements. Returns number of elements
/// changed in ceiled_count if it is non-null.
inline void ApplyCeiling(Real ceil_val, MatrixIndexT *ceiled_count = nullptr) {
this->Ceiling(*this, ceil_val, ceiled_count);
};
/// Applies floor to all elements. Returns number of elements floored.
MatrixIndexT ApplyFloor(const VectorBase<Real> &floor_vec);
/// Apply soft-max to vector and return normalizer (log sum of exponentials).
/// This is the same as: \f$ x(i) = exp(x(i)) / \sum_i exp(x(i)) \f$
Real ApplySoftMax();
/// Applies log soft-max to vector and returns normalizer (log sum of
/// exponentials).
/// This is the same as: \f$ x(i) = x(i) - log(\sum_i exp(x(i))) \f$
Real ApplyLogSoftMax();
/// Sets each element of *this to the tanh of the corresponding element of "src".
void Tanh(const VectorBase<Real> &src);
/// Sets each element of *this to the sigmoid function of the corresponding
/// element of "src".
void Sigmoid(const VectorBase<Real> &src);
/// Take all elements of vector to a power.
inline void ApplyPow(Real power) {
this->Pow(*this, power);
};
/// Take the absolute value of all elements of a vector to a power.
/// Include the sign of the input element if include_sign == true.
/// If power is negative and the input value is zero, the output is set zero.
void ApplyPowAbs(Real power, bool include_sign=false);
/// Compute the p-th norm of the vector.
Real Norm(Real p) const;
/// Returns true if ((*this)-other).Norm(2.0) <= tol * (*this).Norm(2.0).
bool ApproxEqual(const VectorBase<Real> &other, float tol = 0.01) const;
/// Invert all elements.
void InvertElements();
/// Add vector : *this = *this + alpha * rv (with casting between floats and
/// doubles)
template<typename OtherReal>
void AddVec(const Real alpha, const VectorBase<OtherReal> &v);
/// Add vector : *this = *this + alpha * rv^2 [element-wise squaring].
void AddVec2(const Real alpha, const VectorBase<Real> &v);
/// Add vector : *this = *this + alpha * rv^2 [element-wise squaring],
/// with casting between floats and doubles.
template<typename OtherReal>
void AddVec2(const Real alpha, const VectorBase<OtherReal> &v);
/// Add matrix times vector : this <-- beta*this + alpha*M*v.
/// Calls BLAS GEMV.
void AddMatVec(const Real alpha, const MatrixBase<Real> &M,
const MatrixTransposeType trans, const VectorBase<Real> &v,
const Real beta); // **beta previously defaulted to 0.0**
/// This is as AddMatVec, except optimized for where v contains a lot
/// of zeros.
void AddMatSvec(const Real alpha, const MatrixBase<Real> &M,
const MatrixTransposeType trans, const VectorBase<Real> &v,
const Real beta); // **beta previously defaulted to 0.0**
/// Add symmetric positive definite matrix times vector:
/// this <-- beta*this + alpha*M*v. Calls BLAS SPMV.
void AddSpVec(const Real alpha, const SpMatrix<Real> &M,
const VectorBase<Real> &v, const Real beta); // **beta previously defaulted to 0.0**
/// Add triangular matrix times vector: this <-- beta*this + alpha*M*v.
/// Works even if rv == *this.
void AddTpVec(const Real alpha, const TpMatrix<Real> &M,
const MatrixTransposeType trans, const VectorBase<Real> &v,
const Real beta); // **beta previously defaulted to 0.0**
/// Set each element to y = (x == orig ? changed : x).
void ReplaceValue(Real orig, Real changed);
/// Multiply element-by-element by another vector.
void MulElements(const VectorBase<Real> &v);
/// Multiply element-by-element by another vector of different type.
template<typename OtherReal>
void MulElements(const VectorBase<OtherReal> &v);
/// Divide element-by-element by a vector.
void DivElements(const VectorBase<Real> &v);
/// Divide element-by-element by a vector of different type.
template<typename OtherReal>
void DivElements(const VectorBase<OtherReal> &v);
/// Add a constant to each element of a vector.
void Add(Real c);
/// Add element-by-element product of vectors:
// this <-- alpha * v .* r + beta*this .
void AddVecVec(Real alpha, const VectorBase<Real> &v,
const VectorBase<Real> &r, Real beta);
/// Add element-by-element quotient of two vectors.
/// this <---- alpha*v/r + beta*this
void AddVecDivVec(Real alpha, const VectorBase<Real> &v,
const VectorBase<Real> &r, Real beta);
/// Multiplies all elements by this constant.
void Scale(Real alpha);
/// Multiplies this vector by lower-triangular matrix: *this <-- *this *M
void MulTp(const TpMatrix<Real> &M, const MatrixTransposeType trans);
/// If trans == kNoTrans, solves M x = b, where b is the value of *this at input
/// and x is the value of *this at output.
/// If trans == kTrans, solves M' x = b.
/// Does not test for M being singular or near-singular, so test it before
/// calling this routine.
void Solve(const TpMatrix<Real> &M, const MatrixTransposeType trans);
/// Performs a row stack of the matrix M /// Performs a row stack of the matrix M
void CopyRowsFromMat(const MatrixBase<Real> &M); void CopyRowsFromMat(const MatrixBase<Real> &M);
template<typename OtherReal> template<typename OtherReal>
void CopyRowsFromMat(const MatrixBase<OtherReal> &M); void CopyRowsFromMat(const MatrixBase<OtherReal> &M);
/// The following is implemented in ../cudamatrix/cu-matrix.cc
void CopyRowsFromMat(const CuMatrixBase<Real> &M);
/// Performs a column stack of the matrix M /// Performs a column stack of the matrix M
void CopyColsFromMat(const MatrixBase<Real> &M); void CopyColsFromMat(const MatrixBase<Real> &M);
...@@ -290,85 +116,19 @@ class VectorBase { ...@@ -290,85 +116,19 @@ class VectorBase {
template<typename OtherReal> template<typename OtherReal>
void CopyRowFromMat(const MatrixBase<OtherReal> &M, MatrixIndexT row); void CopyRowFromMat(const MatrixBase<OtherReal> &M, MatrixIndexT row);
/// Extracts a row of the symmetric matrix S.
template<typename OtherReal>
void CopyRowFromSp(const SpMatrix<OtherReal> &S, MatrixIndexT row);
/// Extracts a column of the matrix M. /// Extracts a column of the matrix M.
template<typename OtherReal> template<typename OtherReal>
void CopyColFromMat(const MatrixBase<OtherReal> &M , MatrixIndexT col); void CopyColFromMat(const MatrixBase<OtherReal> &M , MatrixIndexT col);
/// Extracts the diagonal of the matrix M.
void CopyDiagFromMat(const MatrixBase<Real> &M);
/// Extracts the diagonal of a packed matrix M; works for Sp or Tp.
void CopyDiagFromPacked(const PackedMatrix<Real> &M);
/// Extracts the diagonal of a symmetric matrix.
inline void CopyDiagFromSp(const SpMatrix<Real> &M) { CopyDiagFromPacked(M); }
/// Extracts the diagonal of a triangular matrix.
inline void CopyDiagFromTp(const TpMatrix<Real> &M) { CopyDiagFromPacked(M); }
/// Returns the maximum value of any element, or -infinity for the empty vector.
Real Max() const;
/// Returns the maximum value of any element, and the associated index.
/// Error if vector is empty.
Real Max(MatrixIndexT *index) const;
/// Returns the minimum value of any element, or +infinity for the empty vector.
Real Min() const;
/// Returns the minimum value of any element, and the associated index.
/// Error if vector is empty.
Real Min(MatrixIndexT *index) const;
/// Returns sum of the elements
Real Sum() const;
/// Returns sum of the logs of the elements. More efficient than
/// just taking log of each. Will return NaN if any elements are
/// negative.
Real SumLog() const;
/// Does *this = alpha * (sum of rows of M) + beta * *this.
void AddRowSumMat(Real alpha, const MatrixBase<Real> &M, Real beta = 1.0);
/// Does *this = alpha * (sum of columns of M) + beta * *this.
void AddColSumMat(Real alpha, const MatrixBase<Real> &M, Real beta = 1.0);
/// Add the diagonal of a matrix times itself:
/// *this = diag(M M^T) + beta * *this (if trans == kNoTrans), or
/// *this = diag(M^T M) + beta * *this (if trans == kTrans).
void AddDiagMat2(Real alpha, const MatrixBase<Real> &M,
MatrixTransposeType trans = kNoTrans, Real beta = 1.0);
/// Add the diagonal of a matrix product: *this = diag(M N), assuming the
/// "trans" arguments are both kNoTrans; for transpose arguments, it behaves
/// as you would expect.
void AddDiagMatMat(Real alpha, const MatrixBase<Real> &M, MatrixTransposeType transM,
const MatrixBase<Real> &N, MatrixTransposeType transN,
Real beta = 1.0);
/// Returns log(sum(exp())) without exp overflow
/// If prune > 0.0, ignores terms less than the max - prune.
/// [Note: in future, if prune = 0.0, it will take the max.
/// For now, use -1 if you don't want it to prune.]
Real LogSumExp(Real prune = -1.0) const;
/// Reads from C++ stream (option to add to existing contents). /// Reads from C++ stream (option to add to existing contents).
/// Throws exception on failure /// Throws exception on failure
void Read(std::istream &in, bool binary, bool add = false); void Read(std::istream &in, bool binary);
/// Writes to C++ stream (option to write in binary). /// Writes to C++ stream (option to write in binary).
void Write(std::ostream &Out, bool binary) const; void Write(std::ostream &Out, bool binary) const;
friend class VectorBase<double>; friend class VectorBase<double>;
friend class VectorBase<float>; friend class VectorBase<float>;
friend class CuVectorBase<Real>;
friend class CuVector<Real>;
protected: protected:
/// Destructor; does not deallocate memory, this is handled by child classes. /// Destructor; does not deallocate memory, this is handled by child classes.
/// This destructor is protected so this object can only be /// This destructor is protected so this object can only be
...@@ -380,17 +140,6 @@ class VectorBase { ...@@ -380,17 +140,6 @@ class VectorBase {
KALDI_ASSERT_IS_FLOATING_TYPE(Real); KALDI_ASSERT_IS_FLOATING_TYPE(Real);
} }
// Took this out since it is not currently used, and it is possible to create
// objects where the allocated memory is not the same size as dim_ : Arnab
// /// Initializer from a pointer and a size; keeps the pointer internally
// /// (ownership or non-ownership depends on the child class).
// explicit VectorBase(Real* data, MatrixIndexT dim)
// : data_(data), dim_(dim) {}
// Arnab : made this protected since it is unsafe too.
/// Load data into the vector: sz must match own size.
void CopyFromPtr(const Real* Data, MatrixIndexT sz);
/// data memory area /// data memory area
Real* data_; Real* data_;
/// dimension of vector /// dimension of vector
...@@ -416,8 +165,8 @@ class Vector: public VectorBase<Real> { ...@@ -416,8 +165,8 @@ class Vector: public VectorBase<Real> {
/// Copy constructor from CUDA vector /// Copy constructor from CUDA vector
/// This is defined in ../cudamatrix/cu-vector.h /// This is defined in ../cudamatrix/cu-vector.h
template<typename OtherReal> //template<typename OtherReal>
explicit Vector(const CuVectorBase<OtherReal> &cu); //explicit Vector(const CuVectorBase<OtherReal> &cu);
/// Copy constructor. The need for this is controversial. /// Copy constructor. The need for this is controversial.
Vector(const Vector<Real> &v) : VectorBase<Real>() { // (cannot be explicit) Vector(const Vector<Real> &v) : VectorBase<Real>() { // (cannot be explicit)
...@@ -455,7 +204,7 @@ class Vector: public VectorBase<Real> { ...@@ -455,7 +204,7 @@ class Vector: public VectorBase<Real> {
/// Read function using C++ streams. Can also add to existing contents /// Read function using C++ streams. Can also add to existing contents
/// of matrix. /// of matrix.
void Read(std::istream &in, bool binary, bool add = false); void Read(std::istream &in, bool binary);
/// Set vector to a specified size (can be zero). /// Set vector to a specified size (can be zero).
/// The value of the new data depends on resize_type: /// The value of the new data depends on resize_type:
...@@ -516,10 +265,10 @@ class SubVector : public VectorBase<Real> { ...@@ -516,10 +265,10 @@ class SubVector : public VectorBase<Real> {
/// This constructor initializes the vector to point at the contents /// This constructor initializes the vector to point at the contents
/// of this packed matrix (SpMatrix or TpMatrix). /// of this packed matrix (SpMatrix or TpMatrix).
SubVector(const PackedMatrix<Real> &M) { // SubVector(const PackedMatrix<Real> &M) {
VectorBase<Real>::data_ = const_cast<Real*> (M.Data()); //VectorBase<Real>::data_ = const_cast<Real*> (M.Data());
VectorBase<Real>::dim_ = (M.NumRows()*(M.NumRows()+1))/2; //VectorBase<Real>::dim_ = (M.NumRows()*(M.NumRows()+1))/2;
} //}
/// Copy constructor /// Copy constructor
SubVector(const SubVector &other) : VectorBase<Real> () { SubVector(const SubVector &other) : VectorBase<Real> () {
...@@ -572,34 +321,18 @@ std::istream & operator >> (std::istream & in, Vector<Real> & v); ...@@ -572,34 +321,18 @@ std::istream & operator >> (std::istream & in, Vector<Real> & v);
/// @{ /// @{
template<typename Real> //template<typename Real>
bool ApproxEqual(const VectorBase<Real> &a, //bool ApproxEqual(const VectorBase<Real> &a,
const VectorBase<Real> &b, Real tol = 0.01) { //const VectorBase<Real> &b, Real tol = 0.01) {
return a.ApproxEqual(b, tol); //return a.ApproxEqual(b, tol);
} //}
template<typename Real>
inline void AssertEqual(VectorBase<Real> &a, VectorBase<Real> &b,
float tol = 0.01) {
KALDI_ASSERT(a.ApproxEqual(b, tol));
}
//template<typename Real>
//inline void AssertEqual(VectorBase<Real> &a, VectorBase<Real> &b,
//float tol = 0.01) {
//KALDI_ASSERT(a.ApproxEqual(b, tol));
//}
/// Returns dot product between v1 and v2.
template<typename Real>
Real VecVec(const VectorBase<Real> &v1, const VectorBase<Real> &v2);
template<typename Real, typename OtherReal>
Real VecVec(const VectorBase<Real> &v1, const VectorBase<OtherReal> &v2);
/// Returns \f$ v_1^T M v_2 \f$ .
/// Not as efficient as it could be where v1 == v2.
template<typename Real>
Real VecMatVec(const VectorBase<Real> &v1, const MatrixBase<Real> &M,
const VectorBase<Real> &v2);
/// @} End of "addtogroup matrix_funcs_scalar"
} // namespace kaldi } // namespace kaldi
......
...@@ -59,26 +59,7 @@ template<typename Real> class SubVector; ...@@ -59,26 +59,7 @@ template<typename Real> class SubVector;
template<typename Real> class MatrixBase; template<typename Real> class MatrixBase;
template<typename Real> class SubMatrix; template<typename Real> class SubMatrix;
template<typename Real> class Matrix; template<typename Real> class Matrix;
template<typename Real> class SpMatrix;
template<typename Real> class TpMatrix;
template<typename Real> class PackedMatrix;
template<typename Real> class SparseMatrix;
// these are classes that won't be defined in this
// directory; they're mostly needed for friend declarations.
template<typename Real> class CuMatrixBase;
template<typename Real> class CuSubMatrix;
template<typename Real> class CuMatrix;
template<typename Real> class CuVectorBase;
template<typename Real> class CuSubVector;
template<typename Real> class CuVector;
template<typename Real> class CuPackedMatrix;
template<typename Real> class CuSpMatrix;
template<typename Real> class CuTpMatrix;
template<typename Real> class CuSparseMatrix;
class CompressedMatrix;
class GeneralMatrix;
/// This class provides a way for switching between double and float types. /// This class provides a way for switching between double and float types.
template<typename T> class OtherReal { }; // useful in reading+writing routines template<typename T> class OtherReal { }; // useful in reading+writing routines
......
...@@ -5,8 +5,6 @@ ${CMAKE_CURRENT_SOURCE_DIR} ...@@ -5,8 +5,6 @@ ${CMAKE_CURRENT_SOURCE_DIR}
add_subdirectory(base) add_subdirectory(base)
add_subdirectory(util) add_subdirectory(util)
add_subdirectory(feat)
add_subdirectory(matrix)
add_subdirectory(lat) add_subdirectory(lat)
add_subdirectory(fstext) add_subdirectory(fstext)
add_subdirectory(decoder) add_subdirectory(decoder)
......
add_library(kaldi-mfcc
feature-mfcc.cc
)
target_link_libraries(kaldi-mfcc PUBLIC kaldi-feat-common)
add_library(kaldi-fbank
feature-fbank.cc
)
target_link_libraries(kaldi-fbank PUBLIC kaldi-feat-common)
add_library(kaldi-feat-common
wave-reader.cc
signal.cc
feature-functions.cc
feature-window.cc
resample.cc
mel-computations.cc
cmvn.cc
)
target_link_libraries(kaldi-feat-common PUBLIC kaldi-base kaldi-matrix kaldi-util)
// transform/cmvn.cc
// Copyright 2009-2013 Microsoft Corporation
// Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "feat/cmvn.h"
namespace kaldi {
void InitCmvnStats(int32 dim, Matrix<double> *stats) {
KALDI_ASSERT(dim > 0);
stats->Resize(2, dim+1);
}
void AccCmvnStats(const VectorBase<BaseFloat> &feats, BaseFloat weight, MatrixBase<double> *stats) {
int32 dim = feats.Dim();
KALDI_ASSERT(stats != NULL);
KALDI_ASSERT(stats->NumRows() == 2 && stats->NumCols() == dim + 1);
// Remove these __restrict__ modifiers if they cause compilation problems.
// It's just an optimization.
double *__restrict__ mean_ptr = stats->RowData(0),
*__restrict__ var_ptr = stats->RowData(1),
*__restrict__ count_ptr = mean_ptr + dim;
const BaseFloat * __restrict__ feats_ptr = feats.Data();
*count_ptr += weight;
// Careful-- if we change the format of the matrix, the "mean_ptr < count_ptr"
// statement below might become wrong.
for (; mean_ptr < count_ptr; mean_ptr++, var_ptr++, feats_ptr++) {
*mean_ptr += *feats_ptr * weight;
*var_ptr += *feats_ptr * *feats_ptr * weight;
}
}
void AccCmvnStats(const MatrixBase<BaseFloat> &feats,
const VectorBase<BaseFloat> *weights,
MatrixBase<double> *stats) {
int32 num_frames = feats.NumRows();
if (weights != NULL) {
KALDI_ASSERT(weights->Dim() == num_frames);
}
for (int32 i = 0; i < num_frames; i++) {
SubVector<BaseFloat> this_frame = feats.Row(i);
BaseFloat weight = (weights == NULL ? 1.0 : (*weights)(i));
if (weight != 0.0)
AccCmvnStats(this_frame, weight, stats);
}
}
void ApplyCmvn(const MatrixBase<double> &stats,
bool var_norm,
MatrixBase<BaseFloat> *feats) {
KALDI_ASSERT(feats != NULL);
int32 dim = stats.NumCols() - 1;
if (stats.NumRows() > 2 || stats.NumRows() < 1 || feats->NumCols() != dim) {
KALDI_ERR << "Dim mismatch: cmvn "
<< stats.NumRows() << 'x' << stats.NumCols()
<< ", feats " << feats->NumRows() << 'x' << feats->NumCols();
}
if (stats.NumRows() == 1 && var_norm)
KALDI_ERR << "You requested variance normalization but no variance stats "
<< "are supplied.";
double count = stats(0, dim);
// Do not change the threshold of 1.0 here: in the balanced-cmvn code, when
// computing an offset and representing it as stats, we use a count of one.
if (count < 1.0)
KALDI_ERR << "Insufficient stats for cepstral mean and variance normalization: "
<< "count = " << count;
if (!var_norm) {
Vector<BaseFloat> offset(dim);
SubVector<double> mean_stats(stats.RowData(0), dim);
offset.AddVec(-1.0 / count, mean_stats);
feats->AddVecToRows(1.0, offset);
return;
}
// norm(0, d) = mean offset;
// norm(1, d) = scale, e.g. x(d) <-- x(d)*norm(1, d) + norm(0, d).
Matrix<BaseFloat> norm(2, dim);
for (int32 d = 0; d < dim; d++) {
double mean, offset, scale;
mean = stats(0, d)/count;
double var = (stats(1, d)/count) - mean*mean,
floor = 1.0e-20;
if (var < floor) {
KALDI_WARN << "Flooring cepstral variance from " << var << " to "
<< floor;
var = floor;
}
scale = 1.0 / sqrt(var);
if (scale != scale || 1/scale == 0.0)
KALDI_ERR << "NaN or infinity in cepstral mean/variance computation";
offset = -(mean*scale);
norm(0, d) = offset;
norm(1, d) = scale;
}
// Apply the normalization.
feats->MulColsVec(norm.Row(1));
feats->AddVecToRows(1.0, norm.Row(0));
}
void ApplyCmvnReverse(const MatrixBase<double> &stats,
bool var_norm,
MatrixBase<BaseFloat> *feats) {
KALDI_ASSERT(feats != NULL);
int32 dim = stats.NumCols() - 1;
if (stats.NumRows() > 2 || stats.NumRows() < 1 || feats->NumCols() != dim) {
KALDI_ERR << "Dim mismatch: cmvn "
<< stats.NumRows() << 'x' << stats.NumCols()
<< ", feats " << feats->NumRows() << 'x' << feats->NumCols();
}
if (stats.NumRows() == 1 && var_norm)
KALDI_ERR << "You requested variance normalization but no variance stats "
<< "are supplied.";
double count = stats(0, dim);
// Do not change the threshold of 1.0 here: in the balanced-cmvn code, when
// computing an offset and representing it as stats, we use a count of one.
if (count < 1.0)
KALDI_ERR << "Insufficient stats for cepstral mean and variance normalization: "
<< "count = " << count;
Matrix<BaseFloat> norm(2, dim); // norm(0, d) = mean offset
// norm(1, d) = scale, e.g. x(d) <-- x(d)*norm(1, d) + norm(0, d).
for (int32 d = 0; d < dim; d++) {
double mean, offset, scale;
mean = stats(0, d) / count;
if (!var_norm) {
scale = 1.0;
offset = mean;
} else {
double var = (stats(1, d)/count) - mean*mean,
floor = 1.0e-20;
if (var < floor) {
KALDI_WARN << "Flooring cepstral variance from " << var << " to "
<< floor;
var = floor;
}
// we aim to transform zero-mean, unit-variance input into data
// with the given mean and variance.
scale = sqrt(var);
offset = mean;
}
norm(0, d) = offset;
norm(1, d) = scale;
}
if (var_norm)
feats->MulColsVec(norm.Row(1));
feats->AddVecToRows(1.0, norm.Row(0));
}
void FakeStatsForSomeDims(const std::vector<int32> &dims,
MatrixBase<double> *stats) {
KALDI_ASSERT(stats->NumRows() == 2 && stats->NumCols() > 1);
int32 dim = stats->NumCols() - 1;
double count = (*stats)(0, dim);
for (size_t i = 0; i < dims.size(); i++) {
int32 d = dims[i];
KALDI_ASSERT(d >= 0 && d < dim);
(*stats)(0, d) = 0.0;
(*stats)(1, d) = count;
}
}
} // namespace kaldi
// transform/cmvn.h
// Copyright 2009-2013 Microsoft Corporation
// Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_TRANSFORM_CMVN_H_
#define KALDI_TRANSFORM_CMVN_H_
#include "base/kaldi-common.h"
#include "matrix/matrix-lib.h"
namespace kaldi {
/// This function initializes the matrix to dimension 2 by (dim+1);
/// 1st "dim" elements of 1st row are mean stats, 1st "dim" elements
/// of 2nd row are var stats, last element of 1st row is count,
/// last element of 2nd row is zero.
void InitCmvnStats(int32 dim, Matrix<double> *stats);
/// Accumulation from a single frame (weighted).
void AccCmvnStats(const VectorBase<BaseFloat> &feat,
BaseFloat weight,
MatrixBase<double> *stats);
/// Accumulation from a feature file (possibly weighted-- useful in excluding silence).
void AccCmvnStats(const MatrixBase<BaseFloat> &feats,
const VectorBase<BaseFloat> *weights, // or NULL
MatrixBase<double> *stats);
/// Apply cepstral mean and variance normalization to a matrix of features.
/// If norm_vars == true, expects stats to be of dimension 2 by (dim+1), but
/// if norm_vars == false, will accept stats of dimension 1 by (dim+1); these
/// are produced by the balanced-cmvn code when it computes an offset and
/// represents it as "fake stats".
void ApplyCmvn(const MatrixBase<double> &stats,
bool norm_vars,
MatrixBase<BaseFloat> *feats);
/// This is as ApplyCmvn, but does so in the reverse sense, i.e. applies a transform
/// that would take zero-mean, unit-variance input and turn it into output with the
/// stats of "stats". This can be useful if you trained without CMVN but later want
/// to correct a mismatch, so you would first apply CMVN and then do the "reverse"
/// CMVN with the summed stats of your training data.
void ApplyCmvnReverse(const MatrixBase<double> &stats,
bool norm_vars,
MatrixBase<BaseFloat> *feats);
/// Modify the stats so that for some dimensions (specified in "dims"), we
/// replace them with "fake" stats that have zero mean and unit variance; this
/// is done to disable CMVN for those dimensions.
void FakeStatsForSomeDims(const std::vector<int32> &dims,
MatrixBase<double> *stats);
} // namespace kaldi
#endif // KALDI_TRANSFORM_CMVN_H_
// feat/feature-common-inl.h
// Copyright 2016 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FEAT_FEATURE_COMMON_INL_H_
#define KALDI_FEAT_FEATURE_COMMON_INL_H_
#include "feat/resample.h"
// Do not include this file directly. It is included by feat/feature-common.h
namespace kaldi {
template <class F>
void OfflineFeatureTpl<F>::ComputeFeatures(
const VectorBase<BaseFloat> &wave,
BaseFloat sample_freq,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output) {
KALDI_ASSERT(output != NULL);
BaseFloat new_sample_freq = computer_.GetFrameOptions().samp_freq;
if (sample_freq == new_sample_freq) {
Compute(wave, vtln_warp, output);
} else {
if (new_sample_freq < sample_freq &&
! computer_.GetFrameOptions().allow_downsample)
KALDI_ERR << "Waveform and config sample Frequency mismatch: "
<< sample_freq << " .vs " << new_sample_freq
<< " (use --allow-downsample=true to allow "
<< " downsampling the waveform).";
else if (new_sample_freq > sample_freq &&
! computer_.GetFrameOptions().allow_upsample)
KALDI_ERR << "Waveform and config sample Frequency mismatch: "
<< sample_freq << " .vs " << new_sample_freq
<< " (use --allow-upsample=true option to allow "
<< " upsampling the waveform).";
// Resample the waveform.
Vector<BaseFloat> resampled_wave(wave);
ResampleWaveform(sample_freq, wave,
new_sample_freq, &resampled_wave);
Compute(resampled_wave, vtln_warp, output);
}
}
template <class F>
void OfflineFeatureTpl<F>::Compute(
const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output) {
KALDI_ASSERT(output != NULL);
int32 rows_out = NumFrames(wave.Dim(), computer_.GetFrameOptions()),
cols_out = computer_.Dim();
if (rows_out == 0) {
output->Resize(0, 0);
return;
}
output->Resize(rows_out, cols_out);
Vector<BaseFloat> window; // windowed waveform.
bool use_raw_log_energy = computer_.NeedRawLogEnergy();
for (int32 r = 0; r < rows_out; r++) { // r is frame index.
BaseFloat raw_log_energy = 0.0;
ExtractWindow(0, wave, r, computer_.GetFrameOptions(),
feature_window_function_, &window,
(use_raw_log_energy ? &raw_log_energy : NULL));
SubVector<BaseFloat> output_row(*output, r);
computer_.Compute(raw_log_energy, vtln_warp, &window, &output_row);
}
}
template <class F>
void OfflineFeatureTpl<F>::Compute(
const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output) const {
OfflineFeatureTpl<F> temp(*this);
// call the non-const version of Compute() on a temporary copy of this object.
// This is a workaround for const-ness that may sometimes be useful in
// multi-threaded code, although it's not optimally efficient.
temp.Compute(wave, vtln_warp, output);
}
} // end namespace kaldi
#endif
// feat/feature-common.h
// Copyright 2016 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABILITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FEAT_FEATURE_COMMON_H_
#define KALDI_FEAT_FEATURE_COMMON_H_
#include <map>
#include <string>
#include "feat/feature-window.h"
namespace kaldi {
/// @addtogroup feat FeatureExtraction
/// @{
/// This class is only added for documentation, it is not intended to ever be
/// used.
struct ExampleFeatureComputerOptions {
FrameExtractionOptions frame_opts;
// .. more would go here.
};
/// This class is only added for documentation, it is not intended to ever be
/// used. It documents the interface of the *Computer classes which wrap the
/// low-level feature extraction. The template argument F of OfflineFeatureTpl must
/// follow this interface. This interface is intended for features such as
/// MFCCs and PLPs which can be computed frame by frame.
class ExampleFeatureComputer {
public:
typedef ExampleFeatureComputerOptions Options;
/// Returns a reference to the frame-extraction options class, which
/// will be part of our own options class.
const FrameExtractionOptions &GetFrameOptions() const {
return opts_.frame_opts;
}
/// Returns the feature dimension
int32 Dim() const;
/// Returns true if this function may inspect the raw log-energy of the signal
/// (before windowing and pre-emphasis); it's safe to always return true, but
/// setting it to false enables an optimization.
bool NeedRawLogEnergy() const { return true; }
/// constructor from options class; it should not store a reference or pointer
/// to the options class but should copy it.
explicit ExampleFeatureComputer(const ExampleFeatureComputerOptions &opts):
opts_(opts) { }
/// Copy constructor; all of these classes must have one.
ExampleFeatureComputer(const ExampleFeatureComputer &other);
/**
Function that computes one frame of features from
one frame of signal.
@param [in] signal_raw_log_energy The log-energy of the frame of the signal
prior to windowing and pre-emphasis, or
log(numeric_limits<float>::min()), whichever is greater. Must be
ignored by this function if this class returns false from
this->NeedRawLogEnergy().
@param [in] vtln_warp The VTLN warping factor that the user wants
to be applied when computing features for this utterance. Will
normally be 1.0, meaning no warping is to be done. The value will
be ignored for feature types that don't support VLTN, such as
spectrogram features.
@param [in] signal_frame One frame of the signal,
as extracted using the function ExtractWindow() using the options
returned by this->GetFrameOptions(). The function will use the
vector as a workspace, which is why it's a non-const pointer.
@param [out] feature Pointer to a vector of size this->Dim(), to which
the computed feature will be written.
*/
void Compute(BaseFloat signal_raw_log_energy,
BaseFloat vtln_warp,
VectorBase<BaseFloat> *signal_frame,
VectorBase<BaseFloat> *feature);
private:
// disallow assignment.
ExampleFeatureComputer &operator = (const ExampleFeatureComputer &in);
Options opts_;
};
/// This templated class is intended for offline feature extraction, i.e. where
/// you have access to the entire signal at the start. It exists mainly to be
/// drop-in replacement for the old (pre-2016) classes Mfcc, Plp and so on, for
/// use in the offline case. In April 2016 we reorganized the online
/// feature-computation code for greater modularity and to have correct support
/// for the snip-edges=false option.
template <class F>
class OfflineFeatureTpl {
public:
typedef typename F::Options Options;
// Note: feature_window_function_ is the windowing function, which initialized
// using the options class, that we cache at this level.
OfflineFeatureTpl(const Options &opts):
computer_(opts),
feature_window_function_(computer_.GetFrameOptions()) { }
// Internal (and back-compatibility) interface for computing features, which
// requires that the user has already checked that the sampling frequency
// of the waveform is equal to the sampling frequency specified in
// the frame-extraction options.
void Compute(const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output);
// This const version of Compute() is a wrapper that
// calls the non-const version on a temporary object.
// It's less efficient than the non-const version.
void Compute(const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output) const;
/**
Computes the features for one file (one sequence of features).
This is the newer interface where you specify the sample frequency
of the input waveform.
@param [in] wave The input waveform
@param [in] sample_freq The sampling frequency with which
'wave' was sampled.
if sample_freq is higher than the frequency
specified in the config, we will downsample
the waveform, but if lower, it's an error.
@param [in] vtln_warp The VTLN warping factor (will normally
be 1.0)
@param [out] output The matrix of features, where the row-index
is the frame index.
*/
void ComputeFeatures(const VectorBase<BaseFloat> &wave,
BaseFloat sample_freq,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output);
int32 Dim() const { return computer_.Dim(); }
// Copy constructor.
OfflineFeatureTpl(const OfflineFeatureTpl<F> &other):
computer_(other.computer_),
feature_window_function_(other.feature_window_function_) { }
private:
// Disallow assignment.
OfflineFeatureTpl<F> &operator =(const OfflineFeatureTpl<F> &other);
F computer_;
FeatureWindowFunction feature_window_function_;
};
/// @} End of "addtogroup feat"
} // namespace kaldi
#include "feat/feature-common-inl.h"
#endif // KALDI_FEAT_FEATURE_COMMON_H_
// feat/feature-fbank.cc
// Copyright 2009-2012 Karel Vesely
// 2016 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "feat/feature-fbank.h"
namespace kaldi {
FbankComputer::FbankComputer(const FbankOptions &opts):
opts_(opts), srfft_(NULL) {
if (opts.energy_floor > 0.0)
log_energy_floor_ = Log(opts.energy_floor);
int32 padded_window_size = opts.frame_opts.PaddedWindowSize();
if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two...
srfft_ = new SplitRadixRealFft<BaseFloat>(padded_window_size);
// We'll definitely need the filterbanks info for VTLN warping factor 1.0.
// [note: this call caches it.]
GetMelBanks(1.0);
}
FbankComputer::FbankComputer(const FbankComputer &other):
opts_(other.opts_), log_energy_floor_(other.log_energy_floor_),
mel_banks_(other.mel_banks_), srfft_(NULL) {
for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
iter != mel_banks_.end();
++iter)
iter->second = new MelBanks(*(iter->second));
if (other.srfft_)
srfft_ = new SplitRadixRealFft<BaseFloat>(*(other.srfft_));
}
FbankComputer::~FbankComputer() {
for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
iter != mel_banks_.end(); ++iter)
delete iter->second;
delete srfft_;
}
const MelBanks* FbankComputer::GetMelBanks(BaseFloat vtln_warp) {
MelBanks *this_mel_banks = NULL;
std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.find(vtln_warp);
if (iter == mel_banks_.end()) {
this_mel_banks = new MelBanks(opts_.mel_opts,
opts_.frame_opts,
vtln_warp);
mel_banks_[vtln_warp] = this_mel_banks;
} else {
this_mel_banks = iter->second;
}
return this_mel_banks;
}
void FbankComputer::Compute(BaseFloat signal_raw_log_energy,
BaseFloat vtln_warp,
VectorBase<BaseFloat> *signal_frame,
VectorBase<BaseFloat> *feature) {
const MelBanks &mel_banks = *(GetMelBanks(vtln_warp));
KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() &&
feature->Dim() == this->Dim());
// Compute energy after window function (not the raw one).
if (opts_.use_energy && !opts_.raw_energy)
signal_raw_log_energy = Log(std::max<BaseFloat>(VecVec(*signal_frame, *signal_frame),
std::numeric_limits<float>::epsilon()));
if (srfft_ != NULL) // Compute FFT using split-radix algorithm.
srfft_->Compute(signal_frame->Data(), true);
else // An alternative algorithm that works for non-powers-of-two.
RealFft(signal_frame, true);
// Convert the FFT into a power spectrum.
ComputePowerSpectrum(signal_frame);
SubVector<BaseFloat> power_spectrum(*signal_frame, 0,
signal_frame->Dim() / 2 + 1);
// Use magnitude instead of power if requested.
if (!opts_.use_power)
power_spectrum.ApplyPow(0.5);
int32 mel_offset = ((opts_.use_energy && !opts_.htk_compat) ? 1 : 0);
SubVector<BaseFloat> mel_energies(*feature,
mel_offset,
opts_.mel_opts.num_bins);
// Sum with mel fiterbanks over the power spectrum
mel_banks.Compute(power_spectrum, &mel_energies);
if (opts_.use_log_fbank) {
// Avoid log of zero (which should be prevented anyway by dithering).
mel_energies.ApplyFloor(std::numeric_limits<float>::epsilon());
mel_energies.ApplyLog(); // take the log.
}
// Copy energy as first value (or the last, if htk_compat == true).
if (opts_.use_energy) {
if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_) {
signal_raw_log_energy = log_energy_floor_;
}
int32 energy_index = opts_.htk_compat ? opts_.mel_opts.num_bins : 0;
(*feature)(energy_index) = signal_raw_log_energy;
}
}
} // namespace kaldi
// feat/feature-fbank.h
// Copyright 2009-2012 Karel Vesely
// 2016 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FEAT_FEATURE_FBANK_H_
#define KALDI_FEAT_FEATURE_FBANK_H_
#include <map>
#include <string>
#include "feat/feature-common.h"
#include "feat/feature-functions.h"
#include "feat/feature-window.h"
#include "feat/mel-computations.h"
namespace kaldi {
/// @addtogroup feat FeatureExtraction
/// @{
/// FbankOptions contains basic options for computing filterbank features.
/// It only includes things that can be done in a "stateless" way, i.e.
/// it does not include energy max-normalization.
/// It does not include delta computation.
struct FbankOptions {
FrameExtractionOptions frame_opts;
MelBanksOptions mel_opts;
bool use_energy; // append an extra dimension with energy to the filter banks
BaseFloat energy_floor;
bool raw_energy; // If true, compute energy before preemphasis and windowing
bool htk_compat; // If true, put energy last (if using energy)
bool use_log_fbank; // if true (default), produce log-filterbank, else linear
bool use_power; // if true (default), use power in filterbank analysis, else magnitude.
FbankOptions(): mel_opts(23),
// defaults the #mel-banks to 23 for the FBANK computations.
// this seems to be common for 16khz-sampled data,
// but for 8khz-sampled data, 15 may be better.
use_energy(false),
energy_floor(0.0),
raw_energy(true),
htk_compat(false),
use_log_fbank(true),
use_power(true) {}
void Register(OptionsItf *opts) {
frame_opts.Register(opts);
mel_opts.Register(opts);
opts->Register("use-energy", &use_energy,
"Add an extra dimension with energy to the FBANK output.");
opts->Register("energy-floor", &energy_floor,
"Floor on energy (absolute, not relative) in FBANK computation. "
"Only makes a difference if --use-energy=true; only necessary if "
"--dither=0.0. Suggested values: 0.1 or 1.0");
opts->Register("raw-energy", &raw_energy,
"If true, compute energy before preemphasis and windowing");
opts->Register("htk-compat", &htk_compat, "If true, put energy last. "
"Warning: not sufficient to get HTK compatible features (need "
"to change other parameters).");
opts->Register("use-log-fbank", &use_log_fbank,
"If true, produce log-filterbank, else produce linear.");
opts->Register("use-power", &use_power,
"If true, use power, else use magnitude.");
}
};
/// Class for computing mel-filterbank features; see \ref feat_mfcc for more
/// information.
class FbankComputer {
public:
typedef FbankOptions Options;
explicit FbankComputer(const FbankOptions &opts);
FbankComputer(const FbankComputer &other);
int32 Dim() const {
return opts_.mel_opts.num_bins + (opts_.use_energy ? 1 : 0);
}
bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }
const FrameExtractionOptions &GetFrameOptions() const {
return opts_.frame_opts;
}
/**
Function that computes one frame of features from
one frame of signal.
@param [in] signal_raw_log_energy The log-energy of the frame of the signal
prior to windowing and pre-emphasis, or
log(numeric_limits<float>::min()), whichever is greater. Must be
ignored by this function if this class returns false from
this->NeedsRawLogEnergy().
@param [in] vtln_warp The VTLN warping factor that the user wants
to be applied when computing features for this utterance. Will
normally be 1.0, meaning no warping is to be done. The value will
be ignored for feature types that don't support VLTN, such as
spectrogram features.
@param [in] signal_frame One frame of the signal,
as extracted using the function ExtractWindow() using the options
returned by this->GetFrameOptions(). The function will use the
vector as a workspace, which is why it's a non-const pointer.
@param [out] feature Pointer to a vector of size this->Dim(), to which
the computed feature will be written.
*/
void Compute(BaseFloat signal_raw_log_energy,
BaseFloat vtln_warp,
VectorBase<BaseFloat> *signal_frame,
VectorBase<BaseFloat> *feature);
~FbankComputer();
const MelBanks *GetMelBanks(BaseFloat vtln_warp);
private:
FbankOptions opts_;
BaseFloat log_energy_floor_;
std::map<BaseFloat, MelBanks*> mel_banks_; // BaseFloat is VTLN coefficient.
SplitRadixRealFft<BaseFloat> *srfft_;
// Disallow assignment.
FbankComputer &operator =(const FbankComputer &other);
};
typedef OfflineFeatureTpl<FbankComputer> Fbank;
/// @} End of "addtogroup feat"
} // namespace kaldi
#endif // KALDI_FEAT_FEATURE_FBANK_H_
// feat/feature-functions.cc
// Copyright 2009-2011 Karel Vesely; Petr Motlicek; Microsoft Corporation
// 2013 Johns Hopkins University (author: Daniel Povey)
// 2014 IMSL, PKU-HKUST (author: Wei Shi)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "feat/feature-functions.h"
#include "matrix/matrix-functions.h"
namespace kaldi {
void ComputePowerSpectrum(VectorBase<BaseFloat> *waveform) {
int32 dim = waveform->Dim();
// no, letting it be non-power-of-two for now.
// KALDI_ASSERT(dim > 0 && (dim & (dim-1) == 0)); // make sure a power of two.. actually my FFT code
// does not require this (dan) but this is better in case we use different code [dan].
// RealFft(waveform, true); // true == forward (not inverse) FFT; makes no difference here,
// as we just want power spectrum.
// now we have in waveform, first half of complex spectrum
// it's stored as [real0, realN/2, real1, im1, real2, im2, ...]
int32 half_dim = dim/2;
BaseFloat first_energy = (*waveform)(0) * (*waveform)(0),
last_energy = (*waveform)(1) * (*waveform)(1); // handle this special case
for (int32 i = 1; i < half_dim; i++) {
BaseFloat real = (*waveform)(i*2), im = (*waveform)(i*2 + 1);
(*waveform)(i) = real*real + im*im;
}
(*waveform)(0) = first_energy;
(*waveform)(half_dim) = last_energy; // Will actually never be used, and anyway
// if the signal has been bandlimited sensibly this should be zero.
}
DeltaFeatures::DeltaFeatures(const DeltaFeaturesOptions &opts): opts_(opts) {
KALDI_ASSERT(opts.order >= 0 && opts.order < 1000); // just make sure we don't get binary junk.
// opts will normally be 2 or 3.
KALDI_ASSERT(opts.window > 0 && opts.window < 1000); // again, basic sanity check.
// normally the window size will be two.
scales_.resize(opts.order+1);
scales_[0].Resize(1);
scales_[0](0) = 1.0; // trivial window for 0th order delta [i.e. baseline feats]
for (int32 i = 1; i <= opts.order; i++) {
Vector<BaseFloat> &prev_scales = scales_[i-1],
&cur_scales = scales_[i];
int32 window = opts.window; // this code is designed to still
// work if instead we later make it an array and do opts.window[i-1],
// or something like that. "window" is a parameter specifying delta-window
// width which is actually 2*window + 1.
KALDI_ASSERT(window != 0);
int32 prev_offset = (static_cast<int32>(prev_scales.Dim()-1))/2,
cur_offset = prev_offset + window;
cur_scales.Resize(prev_scales.Dim() + 2*window); // also zeros it.
BaseFloat normalizer = 0.0;
for (int32 j = -window; j <= window; j++) {
normalizer += j*j;
for (int32 k = -prev_offset; k <= prev_offset; k++) {
cur_scales(j+k+cur_offset) +=
static_cast<BaseFloat>(j) * prev_scales(k+prev_offset);
}
}
cur_scales.Scale(1.0 / normalizer);
}
}
void DeltaFeatures::Process(const MatrixBase<BaseFloat> &input_feats,
int32 frame,
VectorBase<BaseFloat> *output_frame) const {
KALDI_ASSERT(frame < input_feats.NumRows());
int32 num_frames = input_feats.NumRows(),
feat_dim = input_feats.NumCols();
KALDI_ASSERT(static_cast<int32>(output_frame->Dim()) == feat_dim * (opts_.order+1));
output_frame->SetZero();
for (int32 i = 0; i <= opts_.order; i++) {
const Vector<BaseFloat> &scales = scales_[i];
int32 max_offset = (scales.Dim() - 1) / 2;
SubVector<BaseFloat> output(*output_frame, i*feat_dim, feat_dim);
for (int32 j = -max_offset; j <= max_offset; j++) {
// if asked to read
int32 offset_frame = frame + j;
if (offset_frame < 0) offset_frame = 0;
else if (offset_frame >= num_frames)
offset_frame = num_frames - 1;
BaseFloat scale = scales(j + max_offset);
if (scale != 0.0)
output.AddVec(scale, input_feats.Row(offset_frame));
}
}
}
ShiftedDeltaFeatures::ShiftedDeltaFeatures(
const ShiftedDeltaFeaturesOptions &opts): opts_(opts) {
KALDI_ASSERT(opts.window > 0 && opts.window < 1000);
// Default window is 1.
int32 window = opts.window;
KALDI_ASSERT(window != 0);
scales_.Resize(1 + 2*window); // also zeros it.
BaseFloat normalizer = 0.0;
for (int32 j = -window; j <= window; j++) {
normalizer += j*j;
scales_(j + window) += static_cast<BaseFloat>(j);
}
scales_.Scale(1.0 / normalizer);
}
void ShiftedDeltaFeatures::Process(const MatrixBase<BaseFloat> &input_feats,
int32 frame,
SubVector<BaseFloat> *output_frame) const {
KALDI_ASSERT(frame < input_feats.NumRows());
int32 num_frames = input_feats.NumRows(),
feat_dim = input_feats.NumCols();
KALDI_ASSERT(static_cast<int32>(output_frame->Dim())
== feat_dim * (opts_.num_blocks + 1));
output_frame->SetZero();
// The original features
SubVector<BaseFloat> output(*output_frame, 0, feat_dim);
output.AddVec(1.0, input_feats.Row(frame));
// Concatenate the delta-blocks. Each block is block_shift
// (usually 3) frames apart.
for (int32 i = 0; i < opts_.num_blocks; i++) {
int32 max_offset = (scales_.Dim() - 1) / 2;
SubVector<BaseFloat> output(*output_frame, (i + 1) * feat_dim, feat_dim);
for (int32 j = -max_offset; j <= max_offset; j++) {
int32 offset_frame = frame + j + i * opts_.block_shift;
if (offset_frame < 0) offset_frame = 0;
else if (offset_frame >= num_frames)
offset_frame = num_frames - 1;
BaseFloat scale = scales_(j + max_offset);
if (scale != 0.0)
output.AddVec(scale, input_feats.Row(offset_frame));
}
}
}
void ComputeDeltas(const DeltaFeaturesOptions &delta_opts,
const MatrixBase<BaseFloat> &input_features,
Matrix<BaseFloat> *output_features) {
output_features->Resize(input_features.NumRows(),
input_features.NumCols()
*(delta_opts.order + 1));
DeltaFeatures delta(delta_opts);
for (int32 r = 0; r < static_cast<int32>(input_features.NumRows()); r++) {
SubVector<BaseFloat> row(*output_features, r);
delta.Process(input_features, r, &row);
}
}
void ComputeShiftedDeltas(const ShiftedDeltaFeaturesOptions &delta_opts,
const MatrixBase<BaseFloat> &input_features,
Matrix<BaseFloat> *output_features) {
output_features->Resize(input_features.NumRows(),
input_features.NumCols()
* (delta_opts.num_blocks + 1));
ShiftedDeltaFeatures delta(delta_opts);
for (int32 r = 0; r < static_cast<int32>(input_features.NumRows()); r++) {
SubVector<BaseFloat> row(*output_features, r);
delta.Process(input_features, r, &row);
}
}
void InitIdftBases(int32 n_bases, int32 dimension, Matrix<BaseFloat> *mat_out) {
BaseFloat angle = M_PI / static_cast<BaseFloat>(dimension - 1);
BaseFloat scale = 1.0f / (2.0 * static_cast<BaseFloat>(dimension - 1));
mat_out->Resize(n_bases, dimension);
for (int32 i = 0; i < n_bases; i++) {
(*mat_out)(i, 0) = 1.0 * scale;
BaseFloat i_fl = static_cast<BaseFloat>(i);
for (int32 j = 1; j < dimension - 1; j++) {
BaseFloat j_fl = static_cast<BaseFloat>(j);
(*mat_out)(i, j) = 2.0 * scale * cos(angle * i_fl * j_fl);
}
(*mat_out)(i, dimension -1)
= scale * cos(angle * i_fl * static_cast<BaseFloat>(dimension-1));
}
}
void SpliceFrames(const MatrixBase<BaseFloat> &input_features,
int32 left_context,
int32 right_context,
Matrix<BaseFloat> *output_features) {
int32 T = input_features.NumRows(), D = input_features.NumCols();
if (T == 0 || D == 0)
KALDI_ERR << "SpliceFrames: empty input";
KALDI_ASSERT(left_context >= 0 && right_context >= 0);
int32 N = 1 + left_context + right_context;
output_features->Resize(T, D*N);
for (int32 t = 0; t < T; t++) {
SubVector<BaseFloat> dst_row(*output_features, t);
for (int32 j = 0; j < N; j++) {
int32 t2 = t + j - left_context;
if (t2 < 0) t2 = 0;
if (t2 >= T) t2 = T-1;
SubVector<BaseFloat> dst(dst_row, j*D, D),
src(input_features, t2);
dst.CopyFromVec(src);
}
}
}
void ReverseFrames(const MatrixBase<BaseFloat> &input_features,
Matrix<BaseFloat> *output_features) {
int32 T = input_features.NumRows(), D = input_features.NumCols();
if (T == 0 || D == 0)
KALDI_ERR << "ReverseFrames: empty input";
output_features->Resize(T, D);
for (int32 t = 0; t < T; t++) {
SubVector<BaseFloat> dst_row(*output_features, t);
SubVector<BaseFloat> src_row(input_features, T-1-t);
dst_row.CopyFromVec(src_row);
}
}
void SlidingWindowCmnOptions::Check() const {
KALDI_ASSERT(cmn_window > 0);
if (center)
KALDI_ASSERT(min_window > 0 && min_window <= cmn_window);
// else ignored so value doesn't matter.
}
// Internal version of SlidingWindowCmn with double-precision arguments.
void SlidingWindowCmnInternal(const SlidingWindowCmnOptions &opts,
const MatrixBase<double> &input,
MatrixBase<double> *output) {
opts.Check();
int32 num_frames = input.NumRows(), dim = input.NumCols(),
last_window_start = -1, last_window_end = -1,
warning_count = 0;
Vector<double> cur_sum(dim), cur_sumsq(dim);
for (int32 t = 0; t < num_frames; t++) {
int32 window_start, window_end; // note: window_end will be one
// past the end of the window we use for normalization.
if (opts.center) {
window_start = t - (opts.cmn_window / 2);
window_end = window_start + opts.cmn_window;
} else {
window_start = t - opts.cmn_window;
window_end = t + 1;
}
if (window_start < 0) { // shift window right if starts <0.
window_end -= window_start;
window_start = 0; // or: window_start -= window_start
}
if (!opts.center) {
if (window_end > t)
window_end = std::max(t + 1, opts.min_window);
}
if (window_end > num_frames) {
window_start -= (window_end - num_frames);
window_end = num_frames;
if (window_start < 0) window_start = 0;
}
if (last_window_start == -1) {
SubMatrix<double> input_part(input,
window_start, window_end - window_start,
0, dim);
cur_sum.AddRowSumMat(1.0, input_part , 0.0);
if (opts.normalize_variance)
cur_sumsq.AddDiagMat2(1.0, input_part, kTrans, 0.0);
} else {
if (window_start > last_window_start) {
KALDI_ASSERT(window_start == last_window_start + 1);
SubVector<double> frame_to_remove(input, last_window_start);
cur_sum.AddVec(-1.0, frame_to_remove);
if (opts.normalize_variance)
cur_sumsq.AddVec2(-1.0, frame_to_remove);
}
if (window_end > last_window_end) {
KALDI_ASSERT(window_end == last_window_end + 1);
SubVector<double> frame_to_add(input, last_window_end);
cur_sum.AddVec(1.0, frame_to_add);
if (opts.normalize_variance)
cur_sumsq.AddVec2(1.0, frame_to_add);
}
}
int32 window_frames = window_end - window_start;
last_window_start = window_start;
last_window_end = window_end;
KALDI_ASSERT(window_frames > 0);
SubVector<double> input_frame(input, t),
output_frame(*output, t);
output_frame.CopyFromVec(input_frame);
output_frame.AddVec(-1.0 / window_frames, cur_sum);
if (opts.normalize_variance) {
if (window_frames == 1) {
output_frame.Set(0.0);
} else {
Vector<double> variance(cur_sumsq);
variance.Scale(1.0 / window_frames);
variance.AddVec2(-1.0 / (window_frames * window_frames), cur_sum);
// now "variance" is the variance of the features in the window,
// around their own mean.
int32 num_floored;
variance.ApplyFloor(1.0e-10, &num_floored);
if (num_floored > 0 && num_frames > 1) {
if (opts.max_warnings == warning_count) {
KALDI_WARN << "Suppressing the remaining variance flooring "
<< "warnings. Run program with --max-warnings=-1 to "
<< "see all warnings.";
}
// If opts.max_warnings is a negative number, we won't restrict the
// number of times that the warning is printed out.
else if (opts.max_warnings < 0
|| opts.max_warnings > warning_count) {
KALDI_WARN << "Flooring when normalizing variance, floored "
<< num_floored << " elements; num-frames was "
<< window_frames;
}
warning_count++;
}
variance.ApplyPow(-0.5); // get inverse standard deviation.
output_frame.MulElements(variance);
}
}
}
}
void SlidingWindowCmn(const SlidingWindowCmnOptions &opts,
const MatrixBase<BaseFloat> &input,
MatrixBase<BaseFloat> *output) {
KALDI_ASSERT(SameDim(input, *output) && input.NumRows() > 0);
Matrix<double> input_dbl(input), output_dbl(input.NumRows(), input.NumCols());
// call double-precision version
SlidingWindowCmnInternal(opts, input_dbl, &output_dbl);
output->CopyFromMat(output_dbl);
}
} // namespace kaldi
// feat/feature-functions.h
// Copyright 2009-2011 Karel Vesely; Petr Motlicek; Microsoft Corporation
// 2014 IMSL, PKU-HKUST (author: Wei Shi)
// 2016 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FEAT_FEATURE_FUNCTIONS_H_
#define KALDI_FEAT_FEATURE_FUNCTIONS_H_
#include <string>
#include <vector>
#include "matrix/matrix-lib.h"
#include "util/common-utils.h"
#include "base/kaldi-error.h"
namespace kaldi {
/// @addtogroup feat FeatureExtraction
/// @{
// ComputePowerSpectrum converts a complex FFT (as produced by the FFT
// functions in matrix/matrix-functions.h), and converts it into
// a power spectrum. If the complex FFT is a vector of size n (representing
// half the complex FFT of a real signal of size n, as described there),
// this function computes in the first (n/2) + 1 elements of it, the
// energies of the fft bins from zero to the Nyquist frequency. Contents of the
// remaining (n/2) - 1 elements are undefined at output.
void ComputePowerSpectrum(VectorBase<BaseFloat> *complex_fft);
struct DeltaFeaturesOptions {
int32 order;
int32 window; // e.g. 2; controls window size (window size is 2*window + 1)
// the behavior at the edges is to replicate the first or last frame.
// this is not configurable.
DeltaFeaturesOptions(int32 order = 2, int32 window = 2):
order(order), window(window) { }
void Register(OptionsItf *opts) {
opts->Register("delta-order", &order, "Order of delta computation");
opts->Register("delta-window", &window,
"Parameter controlling window for delta computation (actual window"
" size for each delta order is 1 + 2*delta-window-size)");
}
};
class DeltaFeatures {
public:
// This class provides a low-level function to compute delta features.
// The function takes as input a matrix of features and a frame index
// that it should compute the deltas on. It puts its output in an object
// of type VectorBase, of size (original-feature-dimension) * (opts.order+1).
// This is not the most efficient way to do the computation, but it's
// state-free and thus easier to understand
explicit DeltaFeatures(const DeltaFeaturesOptions &opts);
void Process(const MatrixBase<BaseFloat> &input_feats,
int32 frame,
VectorBase<BaseFloat> *output_frame) const;
private:
DeltaFeaturesOptions opts_;
std::vector<Vector<BaseFloat> > scales_; // a scaling window for each
// of the orders, including zero: multiply the features for each
// dimension by this window.
};
struct ShiftedDeltaFeaturesOptions {
int32 window, // The time delay and advance
num_blocks,
block_shift; // Distance between consecutive blocks
ShiftedDeltaFeaturesOptions():
window(1), num_blocks(7), block_shift(3) { }
void Register(OptionsItf *opts) {
opts->Register("delta-window", &window, "Size of delta advance and delay.");
opts->Register("num-blocks", &num_blocks, "Number of delta blocks in advance"
" of each frame to be concatenated");
opts->Register("block-shift", &block_shift, "Distance between each block");
}
};
class ShiftedDeltaFeatures {
public:
// This class provides a low-level function to compute shifted
// delta cesptra (SDC).
// The function takes as input a matrix of features and a frame index
// that it should compute the deltas on. It puts its output in an object
// of type VectorBase, of size original-feature-dimension + (1 * num_blocks).
explicit ShiftedDeltaFeatures(const ShiftedDeltaFeaturesOptions &opts);
void Process(const MatrixBase<BaseFloat> &input_feats,
int32 frame,
SubVector<BaseFloat> *output_frame) const;
private:
ShiftedDeltaFeaturesOptions opts_;
Vector<BaseFloat> scales_; // a scaling window for each
};
// ComputeDeltas is a convenience function that computes deltas on a feature
// file. If you want to deal with features coming in bit by bit you would have
// to use the DeltaFeatures class directly, and do the computation frame by
// frame. Later we will have to come up with a nice mechanism to do this for
// features coming in.
void ComputeDeltas(const DeltaFeaturesOptions &delta_opts,
const MatrixBase<BaseFloat> &input_features,
Matrix<BaseFloat> *output_features);
// ComputeShiftedDeltas computes deltas from a feature file by applying
// ShiftedDeltaFeatures over the frames. This function is provided for
// convenience, however, ShiftedDeltaFeatures can be used directly.
void ComputeShiftedDeltas(const ShiftedDeltaFeaturesOptions &delta_opts,
const MatrixBase<BaseFloat> &input_features,
Matrix<BaseFloat> *output_features);
// SpliceFrames will normally be used together with LDA.
// It splices frames together to make a window. At the
// start and end of an utterance, it duplicates the first
// and last frames.
// Will throw if input features are empty.
// left_context and right_context must be nonnegative.
// these both represent a number of frames (e.g. 4, 4 is
// a good choice).
void SpliceFrames(const MatrixBase<BaseFloat> &input_features,
int32 left_context,
int32 right_context,
Matrix<BaseFloat> *output_features);
// ReverseFrames reverses the frames in time (used for backwards decoding)
void ReverseFrames(const MatrixBase<BaseFloat> &input_features,
Matrix<BaseFloat> *output_features);
void InitIdftBases(int32 n_bases, int32 dimension, Matrix<BaseFloat> *mat_out);
// This is used for speaker-id. Also see OnlineCmnOptions in ../online2/, which
// is online CMN with no latency, for online speech recognition.
struct SlidingWindowCmnOptions {
int32 cmn_window;
int32 min_window;
int32 max_warnings;
bool normalize_variance;
bool center;
SlidingWindowCmnOptions():
cmn_window(600),
min_window(100),
max_warnings(5),
normalize_variance(false),
center(false) { }
void Register(OptionsItf *opts) {
opts->Register("cmn-window", &cmn_window, "Window in frames for running "
"average CMN computation");
opts->Register("min-cmn-window", &min_window, "Minimum CMN window "
"used at start of decoding (adds latency only at start). "
"Only applicable if center == false, ignored if center==true");
opts->Register("max-warnings", &max_warnings, "Maximum warnings to report "
"per utterance. 0 to disable, -1 to show all.");
opts->Register("norm-vars", &normalize_variance, "If true, normalize "
"variance to one."); // naming this as in apply-cmvn.cc
opts->Register("center", &center, "If true, use a window centered on the "
"current frame (to the extent possible, modulo end effects). "
"If false, window is to the left.");
}
void Check() const;
};
/// Applies sliding-window cepstral mean and/or variance normalization. See the
/// strings registering the options in the options class for information on how
/// this works and what the options are. input and output must have the same
/// dimension.
void SlidingWindowCmn(const SlidingWindowCmnOptions &opts,
const MatrixBase<BaseFloat> &input,
MatrixBase<BaseFloat> *output);
/// @} End of "addtogroup feat"
} // namespace kaldi
#endif // KALDI_FEAT_FEATURE_FUNCTIONS_H_
// feat/feature-mfcc.cc
// Copyright 2009-2011 Karel Vesely; Petr Motlicek
// 2016 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "feat/feature-mfcc.h"
namespace kaldi {
void MfccComputer::Compute(BaseFloat signal_raw_log_energy,
BaseFloat vtln_warp,
VectorBase<BaseFloat> *signal_frame,
VectorBase<BaseFloat> *feature) {
KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() &&
feature->Dim() == this->Dim());
const MelBanks &mel_banks = *(GetMelBanks(vtln_warp));
if (opts_.use_energy && !opts_.raw_energy)
signal_raw_log_energy = Log(std::max<BaseFloat>(VecVec(*signal_frame, *signal_frame),
std::numeric_limits<float>::epsilon()));
if (srfft_ != NULL) // Compute FFT using the split-radix algorithm.
srfft_->Compute(signal_frame->Data(), true);
else // An alternative algorithm that works for non-powers-of-two.
RealFft(signal_frame, true);
// Convert the FFT into a power spectrum.
ComputePowerSpectrum(signal_frame);
SubVector<BaseFloat> power_spectrum(*signal_frame, 0,
signal_frame->Dim() / 2 + 1);
mel_banks.Compute(power_spectrum, &mel_energies_);
// avoid log of zero (which should be prevented anyway by dithering).
mel_energies_.ApplyFloor(std::numeric_limits<float>::epsilon());
mel_energies_.ApplyLog(); // take the log.
feature->SetZero(); // in case there were NaNs.
// feature = dct_matrix_ * mel_energies [which now have log]
feature->AddMatVec(1.0, dct_matrix_, kNoTrans, mel_energies_, 0.0);
if (opts_.cepstral_lifter != 0.0)
feature->MulElements(lifter_coeffs_);
if (opts_.use_energy) {
if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_)
signal_raw_log_energy = log_energy_floor_;
(*feature)(0) = signal_raw_log_energy;
}
if (opts_.htk_compat) {
BaseFloat energy = (*feature)(0);
for (int32 i = 0; i < opts_.num_ceps - 1; i++)
(*feature)(i) = (*feature)(i+1);
if (!opts_.use_energy)
energy *= M_SQRT2; // scale on C0 (actually removing a scale
// we previously added that's part of one common definition of
// the cosine transform.)
(*feature)(opts_.num_ceps - 1) = energy;
}
}
MfccComputer::MfccComputer(const MfccOptions &opts):
opts_(opts), srfft_(NULL),
mel_energies_(opts.mel_opts.num_bins) {
int32 num_bins = opts.mel_opts.num_bins;
if (opts.num_ceps > num_bins)
KALDI_ERR << "num-ceps cannot be larger than num-mel-bins."
<< " It should be smaller or equal. You provided num-ceps: "
<< opts.num_ceps << " and num-mel-bins: "
<< num_bins;
Matrix<BaseFloat> dct_matrix(num_bins, num_bins);
ComputeDctMatrix(&dct_matrix);
// Note that we include zeroth dct in either case. If using the
// energy we replace this with the energy. This means a different
// ordering of features than HTK.
SubMatrix<BaseFloat> dct_rows(dct_matrix, 0, opts.num_ceps, 0, num_bins);
dct_matrix_.Resize(opts.num_ceps, num_bins);
dct_matrix_.CopyFromMat(dct_rows); // subset of rows.
if (opts.cepstral_lifter != 0.0) {
lifter_coeffs_.Resize(opts.num_ceps);
ComputeLifterCoeffs(opts.cepstral_lifter, &lifter_coeffs_);
}
if (opts.energy_floor > 0.0)
log_energy_floor_ = Log(opts.energy_floor);
int32 padded_window_size = opts.frame_opts.PaddedWindowSize();
if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two...
srfft_ = new SplitRadixRealFft<BaseFloat>(padded_window_size);
// We'll definitely need the filterbanks info for VTLN warping factor 1.0.
// [note: this call caches it.]
GetMelBanks(1.0);
}
MfccComputer::MfccComputer(const MfccComputer &other):
opts_(other.opts_), lifter_coeffs_(other.lifter_coeffs_),
dct_matrix_(other.dct_matrix_),
log_energy_floor_(other.log_energy_floor_),
mel_banks_(other.mel_banks_),
srfft_(NULL),
mel_energies_(other.mel_energies_.Dim(), kUndefined) {
for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
iter != mel_banks_.end(); ++iter)
iter->second = new MelBanks(*(iter->second));
if (other.srfft_ != NULL)
srfft_ = new SplitRadixRealFft<BaseFloat>(*(other.srfft_));
}
MfccComputer::~MfccComputer() {
for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
iter != mel_banks_.end();
++iter)
delete iter->second;
delete srfft_;
}
const MelBanks *MfccComputer::GetMelBanks(BaseFloat vtln_warp) {
MelBanks *this_mel_banks = NULL;
std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.find(vtln_warp);
if (iter == mel_banks_.end()) {
this_mel_banks = new MelBanks(opts_.mel_opts,
opts_.frame_opts,
vtln_warp);
mel_banks_[vtln_warp] = this_mel_banks;
} else {
this_mel_banks = iter->second;
}
return this_mel_banks;
}
} // namespace kaldi
// feat/feature-mfcc.h
// Copyright 2009-2011 Karel Vesely; Petr Motlicek; Saarland University
// 2014-2016 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FEAT_FEATURE_MFCC_H_
#define KALDI_FEAT_FEATURE_MFCC_H_
#include <map>
#include <string>
#include "feat/feature-common.h"
#include "feat/feature-functions.h"
#include "feat/feature-window.h"
#include "feat/mel-computations.h"
namespace kaldi {
/// @addtogroup feat FeatureExtraction
/// @{
/// MfccOptions contains basic options for computing MFCC features.
struct MfccOptions {
FrameExtractionOptions frame_opts;
MelBanksOptions mel_opts;
int32 num_ceps; // e.g. 13: num cepstral coeffs, counting zero.
bool use_energy; // use energy; else C0
BaseFloat energy_floor; // 0 by default; set to a value like 1.0 or 0.1 if
// you disable dithering.
bool raw_energy; // If true, compute energy before preemphasis and windowing
BaseFloat cepstral_lifter; // Scaling factor on cepstra for HTK compatibility.
// if 0.0, no liftering is done.
bool htk_compat; // if true, put energy/C0 last and introduce a factor of
// sqrt(2) on C0 to be the same as HTK.
MfccOptions() : mel_opts(23),
// defaults the #mel-banks to 23 for the MFCC computations.
// this seems to be common for 16khz-sampled data,
// but for 8khz-sampled data, 15 may be better.
num_ceps(13),
use_energy(true),
energy_floor(0.0),
raw_energy(true),
cepstral_lifter(22.0),
htk_compat(false) {}
void Register(OptionsItf *opts) {
frame_opts.Register(opts);
mel_opts.Register(opts);
opts->Register("num-ceps", &num_ceps,
"Number of cepstra in MFCC computation (including C0)");
opts->Register("use-energy", &use_energy,
"Use energy (not C0) in MFCC computation");
opts->Register("energy-floor", &energy_floor,
"Floor on energy (absolute, not relative) in MFCC computation. "
"Only makes a difference if --use-energy=true; only necessary if "
"--dither=0.0. Suggested values: 0.1 or 1.0");
opts->Register("raw-energy", &raw_energy,
"If true, compute energy before preemphasis and windowing");
opts->Register("cepstral-lifter", &cepstral_lifter,
"Constant that controls scaling of MFCCs");
opts->Register("htk-compat", &htk_compat,
"If true, put energy or C0 last and use a factor of sqrt(2) on "
"C0. Warning: not sufficient to get HTK compatible features "
"(need to change other parameters).");
}
};
// This is the new-style interface to the MFCC computation.
class MfccComputer {
public:
typedef MfccOptions Options;
explicit MfccComputer(const MfccOptions &opts);
MfccComputer(const MfccComputer &other);
const FrameExtractionOptions &GetFrameOptions() const {
return opts_.frame_opts;
}
int32 Dim() const { return opts_.num_ceps; }
bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }
/**
Function that computes one frame of features from
one frame of signal.
@param [in] signal_raw_log_energy The log-energy of the frame of the signal
prior to windowing and pre-emphasis, or
log(numeric_limits<float>::min()), whichever is greater. Must be
ignored by this function if this class returns false from
this->NeedsRawLogEnergy().
@param [in] vtln_warp The VTLN warping factor that the user wants
to be applied when computing features for this utterance. Will
normally be 1.0, meaning no warping is to be done. The value will
be ignored for feature types that don't support VLTN, such as
spectrogram features.
@param [in] signal_frame One frame of the signal,
as extracted using the function ExtractWindow() using the options
returned by this->GetFrameOptions(). The function will use the
vector as a workspace, which is why it's a non-const pointer.
@param [out] feature Pointer to a vector of size this->Dim(), to which
the computed feature will be written.
*/
void Compute(BaseFloat signal_raw_log_energy,
BaseFloat vtln_warp,
VectorBase<BaseFloat> *signal_frame,
VectorBase<BaseFloat> *feature);
~MfccComputer();
private:
// disallow assignment.
MfccComputer &operator = (const MfccComputer &in);
protected:
const MelBanks *GetMelBanks(BaseFloat vtln_warp);
MfccOptions opts_;
Vector<BaseFloat> lifter_coeffs_;
Matrix<BaseFloat> dct_matrix_; // matrix we left-multiply by to perform DCT.
BaseFloat log_energy_floor_;
std::map<BaseFloat, MelBanks*> mel_banks_; // BaseFloat is VTLN coefficient.
SplitRadixRealFft<BaseFloat> *srfft_;
// note: mel_energies_ is specific to the frame we're processing, it's
// just a temporary workspace.
Vector<BaseFloat> mel_energies_;
};
typedef OfflineFeatureTpl<MfccComputer> Mfcc;
/// @} End of "addtogroup feat"
} // namespace kaldi
#endif // KALDI_FEAT_FEATURE_MFCC_H_
// feat/feature-plp.cc
// Copyright 2009-2011 Petr Motlicek; Karel Vesely
// 2016 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "feat/feature-plp.h"
namespace kaldi {
PlpComputer::PlpComputer(const PlpOptions &opts):
opts_(opts), srfft_(NULL),
mel_energies_duplicated_(opts_.mel_opts.num_bins + 2, kUndefined),
autocorr_coeffs_(opts_.lpc_order + 1, kUndefined),
lpc_coeffs_(opts_.lpc_order, kUndefined),
raw_cepstrum_(opts_.lpc_order, kUndefined) {
if (opts.cepstral_lifter != 0.0) {
lifter_coeffs_.Resize(opts.num_ceps);
ComputeLifterCoeffs(opts.cepstral_lifter, &lifter_coeffs_);
}
InitIdftBases(opts_.lpc_order + 1, opts_.mel_opts.num_bins + 2,
&idft_bases_);
if (opts.energy_floor > 0.0)
log_energy_floor_ = Log(opts.energy_floor);
int32 padded_window_size = opts.frame_opts.PaddedWindowSize();
if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two...
srfft_ = new SplitRadixRealFft<BaseFloat>(padded_window_size);
// We'll definitely need the filterbanks info for VTLN warping factor 1.0.
// [note: this call caches it.]
GetMelBanks(1.0);
}
PlpComputer::PlpComputer(const PlpComputer &other):
opts_(other.opts_), lifter_coeffs_(other.lifter_coeffs_),
idft_bases_(other.idft_bases_), log_energy_floor_(other.log_energy_floor_),
mel_banks_(other.mel_banks_), equal_loudness_(other.equal_loudness_),
srfft_(NULL),
mel_energies_duplicated_(opts_.mel_opts.num_bins + 2, kUndefined),
autocorr_coeffs_(opts_.lpc_order + 1, kUndefined),
lpc_coeffs_(opts_.lpc_order, kUndefined),
raw_cepstrum_(opts_.lpc_order, kUndefined) {
for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
iter != mel_banks_.end(); ++iter)
iter->second = new MelBanks(*(iter->second));
for (std::map<BaseFloat, Vector<BaseFloat>*>::iterator
iter = equal_loudness_.begin();
iter != equal_loudness_.end(); ++iter)
iter->second = new Vector<BaseFloat>(*(iter->second));
if (other.srfft_ != NULL)
srfft_ = new SplitRadixRealFft<BaseFloat>(*(other.srfft_));
}
PlpComputer::~PlpComputer() {
for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
iter != mel_banks_.end(); ++iter)
delete iter->second;
for (std::map<BaseFloat, Vector<BaseFloat>* >::iterator
iter = equal_loudness_.begin();
iter != equal_loudness_.end(); ++iter)
delete iter->second;
delete srfft_;
}
const MelBanks *PlpComputer::GetMelBanks(BaseFloat vtln_warp) {
MelBanks *this_mel_banks = NULL;
std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.find(vtln_warp);
if (iter == mel_banks_.end()) {
this_mel_banks = new MelBanks(opts_.mel_opts,
opts_.frame_opts,
vtln_warp);
mel_banks_[vtln_warp] = this_mel_banks;
} else {
this_mel_banks = iter->second;
}
return this_mel_banks;
}
const Vector<BaseFloat> *PlpComputer::GetEqualLoudness(BaseFloat vtln_warp) {
const MelBanks *this_mel_banks = GetMelBanks(vtln_warp);
Vector<BaseFloat> *ans = NULL;
std::map<BaseFloat, Vector<BaseFloat>*>::iterator iter
= equal_loudness_.find(vtln_warp);
if (iter == equal_loudness_.end()) {
ans = new Vector<BaseFloat>;
GetEqualLoudnessVector(*this_mel_banks, ans);
equal_loudness_[vtln_warp] = ans;
} else {
ans = iter->second;
}
return ans;
}
void PlpComputer::Compute(BaseFloat signal_raw_log_energy,
BaseFloat vtln_warp,
VectorBase<BaseFloat> *signal_frame,
VectorBase<BaseFloat> *feature) {
KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() &&
feature->Dim() == this->Dim());
const MelBanks &mel_banks = *GetMelBanks(vtln_warp);
const Vector<BaseFloat> &equal_loudness = *GetEqualLoudness(vtln_warp);
KALDI_ASSERT(opts_.num_ceps <= opts_.lpc_order+1); // our num-ceps includes C0.
if (opts_.use_energy && !opts_.raw_energy)
signal_raw_log_energy = Log(std::max<BaseFloat>(VecVec(*signal_frame, *signal_frame),
std::numeric_limits<float>::min()));
if (srfft_ != NULL) // Compute FFT using split-radix algorithm.
srfft_->Compute(signal_frame->Data(), true);
else // An alternative algorithm that works for non-powers-of-two.
RealFft(signal_frame, true);
// Convert the FFT into a power spectrum.
ComputePowerSpectrum(signal_frame); // elements 0 ... signal_frame->Dim()/2
SubVector<BaseFloat> power_spectrum(*signal_frame,
0, signal_frame->Dim() / 2 + 1);
int32 num_mel_bins = opts_.mel_opts.num_bins;
SubVector<BaseFloat> mel_energies(mel_energies_duplicated_, 1, num_mel_bins);
mel_banks.Compute(power_spectrum, &mel_energies);
mel_energies.MulElements(equal_loudness);
mel_energies.ApplyPow(opts_.compress_factor);
// duplicate first and last elements
mel_energies_duplicated_(0) = mel_energies_duplicated_(1);
mel_energies_duplicated_(num_mel_bins + 1) =
mel_energies_duplicated_(num_mel_bins);
autocorr_coeffs_.SetZero(); // In case of NaNs or infs
autocorr_coeffs_.AddMatVec(1.0, idft_bases_, kNoTrans,
mel_energies_duplicated_, 0.0);
BaseFloat residual_log_energy = ComputeLpc(autocorr_coeffs_, &lpc_coeffs_);
residual_log_energy = std::max<BaseFloat>(residual_log_energy,
std::numeric_limits<float>::min());
Lpc2Cepstrum(opts_.lpc_order, lpc_coeffs_.Data(), raw_cepstrum_.Data());
feature->Range(1, opts_.num_ceps - 1).CopyFromVec(
raw_cepstrum_.Range(0, opts_.num_ceps - 1));
(*feature)(0) = residual_log_energy;
if (opts_.cepstral_lifter != 0.0)
feature->MulElements(lifter_coeffs_);
if (opts_.cepstral_scale != 1.0)
feature->Scale(opts_.cepstral_scale);
if (opts_.use_energy) {
if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_)
signal_raw_log_energy = log_energy_floor_;
(*feature)(0) = signal_raw_log_energy;
}
if (opts_.htk_compat) { // reorder the features.
BaseFloat log_energy = (*feature)(0);
for (int32 i = 0; i < opts_.num_ceps-1; i++)
(*feature)(i) = (*feature)(i+1);
(*feature)(opts_.num_ceps-1) = log_energy;
}
}
} // namespace kaldi
// feat/feature-plp.h
// Copyright 2009-2011 Petr Motlicek; Karel Vesely
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FEAT_FEATURE_PLP_H_
#define KALDI_FEAT_FEATURE_PLP_H_
#include <map>
#include <string>
#include "feat/feature-common.h"
#include "feat/feature-functions.h"
#include "feat/feature-window.h"
#include "feat/mel-computations.h"
#include "util/options-itf.h"
namespace kaldi {
/// @addtogroup feat FeatureExtraction
/// @{
/// PlpOptions contains basic options for computing PLP features.
/// It only includes things that can be done in a "stateless" way, i.e.
/// it does not include energy max-normalization.
/// It does not include delta computation.
struct PlpOptions {
FrameExtractionOptions frame_opts;
MelBanksOptions mel_opts;
int32 lpc_order;
int32 num_ceps; // num cepstra including zero
bool use_energy; // use energy; else C0
BaseFloat energy_floor;
bool raw_energy; // If true, compute energy before preemphasis and windowing
BaseFloat compress_factor;
int32 cepstral_lifter;
BaseFloat cepstral_scale;
bool htk_compat; // if true, put energy/C0 last and introduce a factor of
// sqrt(2) on C0 to be the same as HTK.
PlpOptions() : mel_opts(23),
// default number of mel-banks for the PLP computation; this
// seems to be common for 16kHz-sampled data. For 8kHz-sampled
// data, 15 may be better.
lpc_order(12),
num_ceps(13),
use_energy(true),
energy_floor(0.0),
raw_energy(true),
compress_factor(0.33333),
cepstral_lifter(22),
cepstral_scale(1.0),
htk_compat(false) {}
void Register(OptionsItf *opts) {
frame_opts.Register(opts);
mel_opts.Register(opts);
opts->Register("lpc-order", &lpc_order,
"Order of LPC analysis in PLP computation");
opts->Register("num-ceps", &num_ceps,
"Number of cepstra in PLP computation (including C0)");
opts->Register("use-energy", &use_energy,
"Use energy (not C0) for zeroth PLP feature");
opts->Register("energy-floor", &energy_floor,
"Floor on energy (absolute, not relative) in PLP computation. "
"Only makes a difference if --use-energy=true; only necessary if "
"--dither=0.0. Suggested values: 0.1 or 1.0");
opts->Register("raw-energy", &raw_energy,
"If true, compute energy before preemphasis and windowing");
opts->Register("compress-factor", &compress_factor,
"Compression factor in PLP computation");
opts->Register("cepstral-lifter", &cepstral_lifter,
"Constant that controls scaling of PLPs");
opts->Register("cepstral-scale", &cepstral_scale,
"Scaling constant in PLP computation");
opts->Register("htk-compat", &htk_compat,
"If true, put energy or C0 last. Warning: not sufficient "
"to get HTK compatible features (need to change other "
"parameters).");
}
};
/// This is the new-style interface to the PLP computation.
class PlpComputer {
public:
typedef PlpOptions Options;
explicit PlpComputer(const PlpOptions &opts);
PlpComputer(const PlpComputer &other);
const FrameExtractionOptions &GetFrameOptions() const {
return opts_.frame_opts;
}
int32 Dim() const { return opts_.num_ceps; }
bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }
/**
Function that computes one frame of features from
one frame of signal.
@param [in] signal_raw_log_energy The log-energy of the frame of the signal
prior to windowing and pre-emphasis, or
log(numeric_limits<float>::min()), whichever is greater. Must be
ignored by this function if this class returns false from
this->NeedsRawLogEnergy().
@param [in] vtln_warp The VTLN warping factor that the user wants
to be applied when computing features for this utterance. Will
normally be 1.0, meaning no warping is to be done. The value will
be ignored for feature types that don't support VLTN, such as
spectrogram features.
@param [in] signal_frame One frame of the signal,
as extracted using the function ExtractWindow() using the options
returned by this->GetFrameOptions(). The function will use the
vector as a workspace, which is why it's a non-const pointer.
@param [out] feature Pointer to a vector of size this->Dim(), to which
the computed feature will be written.
*/
void Compute(BaseFloat signal_raw_log_energy,
BaseFloat vtln_warp,
VectorBase<BaseFloat> *signal_frame,
VectorBase<BaseFloat> *feature);
~PlpComputer();
private:
const MelBanks *GetMelBanks(BaseFloat vtln_warp);
const Vector<BaseFloat> *GetEqualLoudness(BaseFloat vtln_warp);
PlpOptions opts_;
Vector<BaseFloat> lifter_coeffs_;
Matrix<BaseFloat> idft_bases_;
BaseFloat log_energy_floor_;
std::map<BaseFloat, MelBanks*> mel_banks_; // BaseFloat is VTLN coefficient.
std::map<BaseFloat, Vector<BaseFloat>* > equal_loudness_;
SplitRadixRealFft<BaseFloat> *srfft_;
// temporary vector used inside Compute; size is opts_.mel_opts.num_bins + 2
Vector<BaseFloat> mel_energies_duplicated_;
// temporary vector used inside Compute; size is opts_.lpc_order + 1
Vector<BaseFloat> autocorr_coeffs_;
// temporary vector used inside Compute; size is opts_.lpc_order
Vector<BaseFloat> lpc_coeffs_;
// temporary vector used inside Compute; size is opts_.lpc_order
Vector<BaseFloat> raw_cepstrum_;
// Disallow assignment.
PlpComputer &operator =(const PlpComputer &other);
};
typedef OfflineFeatureTpl<PlpComputer> Plp;
/// @} End of "addtogroup feat"
} // namespace kaldi
#endif // KALDI_FEAT_FEATURE_PLP_H_
// feat/feature-spectrogram.cc
// Copyright 2009-2012 Karel Vesely
// Copyright 2012 Navdeep Jaitly
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "feat/feature-spectrogram.h"
namespace kaldi {
SpectrogramComputer::SpectrogramComputer(const SpectrogramOptions &opts)
: opts_(opts), srfft_(NULL) {
if (opts.energy_floor > 0.0)
log_energy_floor_ = Log(opts.energy_floor);
int32 padded_window_size = opts.frame_opts.PaddedWindowSize();
if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two
srfft_ = new SplitRadixRealFft<BaseFloat>(padded_window_size);
}
SpectrogramComputer::SpectrogramComputer(const SpectrogramComputer &other):
opts_(other.opts_), log_energy_floor_(other.log_energy_floor_), srfft_(NULL) {
if (other.srfft_ != NULL)
srfft_ = new SplitRadixRealFft<BaseFloat>(*other.srfft_);
}
SpectrogramComputer::~SpectrogramComputer() {
delete srfft_;
}
void SpectrogramComputer::Compute(BaseFloat signal_raw_log_energy,
BaseFloat vtln_warp,
VectorBase<BaseFloat> *signal_frame,
VectorBase<BaseFloat> *feature) {
KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() &&
feature->Dim() == this->Dim());
// Compute energy after window function (not the raw one)
if (!opts_.raw_energy)
signal_raw_log_energy = Log(std::max<BaseFloat>(VecVec(*signal_frame, *signal_frame),
std::numeric_limits<float>::epsilon()));
if (srfft_ != NULL) // Compute FFT using split-radix algorithm.
srfft_->Compute(signal_frame->Data(), true);
else // An alternative algorithm that works for non-powers-of-two
RealFft(signal_frame, true);
// Convert the FFT into a power spectrum.
ComputePowerSpectrum(signal_frame);
SubVector<BaseFloat> power_spectrum(*signal_frame,
0, signal_frame->Dim() / 2 + 1);
power_spectrum.ApplyFloor(std::numeric_limits<float>::epsilon());
power_spectrum.ApplyLog();
feature->CopyFromVec(power_spectrum);
if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_)
signal_raw_log_energy = log_energy_floor_;
// The zeroth spectrogram component is always set to the signal energy,
// instead of the square of the constant component of the signal.
(*feature)(0) = signal_raw_log_energy;
}
} // namespace kaldi
// feat/feature-spectrogram.h
// Copyright 2009-2012 Karel Vesely
// Copyright 2012 Navdeep Jaitly
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FEAT_FEATURE_SPECTROGRAM_H_
#define KALDI_FEAT_FEATURE_SPECTROGRAM_H_
#include <string>
#include "feat/feature-common.h"
#include "feat/feature-functions.h"
#include "feat/feature-window.h"
namespace kaldi {
/// @addtogroup feat FeatureExtraction
/// @{
/// SpectrogramOptions contains basic options for computing spectrogram
/// features.
struct SpectrogramOptions {
FrameExtractionOptions frame_opts;
BaseFloat energy_floor;
bool raw_energy; // If true, compute energy before preemphasis and windowing
SpectrogramOptions() :
energy_floor(0.0),
raw_energy(true) {}
void Register(OptionsItf *opts) {
frame_opts.Register(opts);
opts->Register("energy-floor", &energy_floor,
"Floor on energy (absolute, not relative) in Spectrogram "
"computation. Caution: this floor is applied to the zeroth "
"component, representing the total signal energy. The "
"floor on the individual spectrogram elements is fixed at "
"std::numeric_limits<float>::epsilon().");
opts->Register("raw-energy", &raw_energy,
"If true, compute energy before preemphasis and windowing");
}
};
/// Class for computing spectrogram features.
class SpectrogramComputer {
public:
typedef SpectrogramOptions Options;
explicit SpectrogramComputer(const SpectrogramOptions &opts);
SpectrogramComputer(const SpectrogramComputer &other);
const FrameExtractionOptions& GetFrameOptions() const {
return opts_.frame_opts;
}
int32 Dim() const { return opts_.frame_opts.PaddedWindowSize() / 2 + 1; }
bool NeedRawLogEnergy() const { return opts_.raw_energy; }
/**
Function that computes one frame of spectrogram features from
one frame of signal.
@param [in] signal_raw_log_energy The log-energy of the frame of the signal
prior to windowing and pre-emphasis, or
log(numeric_limits<float>::min()), whichever is greater. Must be
ignored by this function if this class returns false from
this->NeedsRawLogEnergy().
@param [in] vtln_warp This is ignored by this function, it's only
needed for interface compatibility.
@param [in] signal_frame One frame of the signal,
as extracted using the function ExtractWindow() using the options
returned by this->GetFrameOptions(). The function will use the
vector as a workspace, which is why it's a non-const pointer.
@param [out] feature Pointer to a vector of size this->Dim(), to which
the computed feature will be written.
*/
void Compute(BaseFloat signal_raw_log_energy,
BaseFloat vtln_warp,
VectorBase<BaseFloat> *signal_frame,
VectorBase<BaseFloat> *feature);
~SpectrogramComputer();
private:
SpectrogramOptions opts_;
BaseFloat log_energy_floor_;
SplitRadixRealFft<BaseFloat> *srfft_;
// Disallow assignment.
SpectrogramComputer &operator=(const SpectrogramComputer &other);
};
typedef OfflineFeatureTpl<SpectrogramComputer> Spectrogram;
/// @} End of "addtogroup feat"
} // namespace kaldi
#endif // KALDI_FEAT_FEATURE_SPECTROGRAM_H_
// feat/feature-window.cc
// Copyright 2009-2011 Karel Vesely; Petr Motlicek; Microsoft Corporation
// 2013-2016 Johns Hopkins University (author: Daniel Povey)
// 2014 IMSL, PKU-HKUST (author: Wei Shi)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "feat/feature-window.h"
#include "matrix/matrix-functions.h"
namespace kaldi {
int64 FirstSampleOfFrame(int32 frame,
const FrameExtractionOptions &opts) {
int64 frame_shift = opts.WindowShift();
if (opts.snip_edges) {
return frame * frame_shift;
} else {
int64 midpoint_of_frame = frame_shift * frame + frame_shift / 2,
beginning_of_frame = midpoint_of_frame - opts.WindowSize() / 2;
return beginning_of_frame;
}
}
int32 NumFrames(int64 num_samples,
const FrameExtractionOptions &opts,
bool flush) {
int64 frame_shift = opts.WindowShift();
int64 frame_length = opts.WindowSize();
if (opts.snip_edges) {
// with --snip-edges=true (the default), we use a HTK-like approach to
// determining the number of frames-- all frames have to fit completely into
// the waveform, and the first frame begins at sample zero.
if (num_samples < frame_length)
return 0;
else
return (1 + ((num_samples - frame_length) / frame_shift));
// You can understand the expression above as follows: 'num_samples -
// frame_length' is how much room we have to shift the frame within the
// waveform; 'frame_shift' is how much we shift it each time; and the ratio
// is how many times we can shift it (integer arithmetic rounds down).
} else {
// if --snip-edges=false, the number of frames is determined by rounding the
// (file-length / frame-shift) to the nearest integer. The point of this
// formula is to make the number of frames an obvious and predictable
// function of the frame shift and signal length, which makes many
// segmentation-related questions simpler.
//
// Because integer division in C++ rounds toward zero, we add (half the
// frame-shift minus epsilon) before dividing, to have the effect of
// rounding towards the closest integer.
int32 num_frames = (num_samples + (frame_shift / 2)) / frame_shift;
if (flush)
return num_frames;
// note: 'end' always means the last plus one, i.e. one past the last.
int64 end_sample_of_last_frame = FirstSampleOfFrame(num_frames - 1, opts)
+ frame_length;
// the following code is optimized more for clarity than efficiency.
// If flush == false, we can't output frames that extend past the end
// of the signal.
while (num_frames > 0 && end_sample_of_last_frame > num_samples) {
num_frames--;
end_sample_of_last_frame -= frame_shift;
}
return num_frames;
}
}
void Dither(VectorBase<BaseFloat> *waveform, BaseFloat dither_value) {
if (dither_value == 0.0)
return;
int32 dim = waveform->Dim();
BaseFloat *data = waveform->Data();
RandomState rstate;
for (int32 i = 0; i < dim; i++)
data[i] += RandGauss(&rstate) * dither_value;
}
void Preemphasize(VectorBase<BaseFloat> *waveform, BaseFloat preemph_coeff) {
if (preemph_coeff == 0.0) return;
KALDI_ASSERT(preemph_coeff >= 0.0 && preemph_coeff <= 1.0);
for (int32 i = waveform->Dim()-1; i > 0; i--)
(*waveform)(i) -= preemph_coeff * (*waveform)(i-1);
(*waveform)(0) -= preemph_coeff * (*waveform)(0);
}
FeatureWindowFunction::FeatureWindowFunction(const FrameExtractionOptions &opts) {
int32 frame_length = opts.WindowSize();
KALDI_ASSERT(frame_length > 0);
window.Resize(frame_length);
double a = M_2PI / (frame_length-1);
for (int32 i = 0; i < frame_length; i++) {
double i_fl = static_cast<double>(i);
if (opts.window_type == "hanning") {
window(i) = 0.5 - 0.5*cos(a * i_fl);
} else if (opts.window_type == "hamming") {
window(i) = 0.54 - 0.46*cos(a * i_fl);
} else if (opts.window_type == "povey") { // like hamming but goes to zero at edges.
window(i) = pow(0.5 - 0.5*cos(a * i_fl), 0.85);
} else if (opts.window_type == "rectangular") {
window(i) = 1.0;
} else if (opts.window_type == "blackman") {
window(i) = opts.blackman_coeff - 0.5*cos(a * i_fl) +
(0.5 - opts.blackman_coeff) * cos(2 * a * i_fl);
} else {
KALDI_ERR << "Invalid window type " << opts.window_type;
}
}
}
void ProcessWindow(const FrameExtractionOptions &opts,
const FeatureWindowFunction &window_function,
VectorBase<BaseFloat> *window,
BaseFloat *log_energy_pre_window) {
int32 frame_length = opts.WindowSize();
KALDI_ASSERT(window->Dim() == frame_length);
if (opts.dither != 0.0)
Dither(window, opts.dither);
if (opts.remove_dc_offset)
window->Add(-window->Sum() / frame_length);
if (log_energy_pre_window != NULL) {
BaseFloat energy = std::max<BaseFloat>(VecVec(*window, *window),
std::numeric_limits<float>::epsilon());
*log_energy_pre_window = Log(energy);
}
if (opts.preemph_coeff != 0.0)
Preemphasize(window, opts.preemph_coeff);
window->MulElements(window_function.window);
}
// ExtractWindow extracts a windowed frame of waveform with a power-of-two,
// padded size. It does mean subtraction, pre-emphasis and dithering as
// requested.
void ExtractWindow(int64 sample_offset,
const VectorBase<BaseFloat> &wave,
int32 f, // with 0 <= f < NumFrames(feats, opts)
const FrameExtractionOptions &opts,
const FeatureWindowFunction &window_function,
Vector<BaseFloat> *window,
BaseFloat *log_energy_pre_window) {
KALDI_ASSERT(sample_offset >= 0 && wave.Dim() != 0);
int32 frame_length = opts.WindowSize(),
frame_length_padded = opts.PaddedWindowSize();
int64 num_samples = sample_offset + wave.Dim(),
start_sample = FirstSampleOfFrame(f, opts),
end_sample = start_sample + frame_length;
if (opts.snip_edges) {
KALDI_ASSERT(start_sample >= sample_offset &&
end_sample <= num_samples);
} else {
KALDI_ASSERT(sample_offset == 0 || start_sample >= sample_offset);
}
if (window->Dim() != frame_length_padded)
window->Resize(frame_length_padded, kUndefined);
// wave_start and wave_end are start and end indexes into 'wave', for the
// piece of wave that we're trying to extract.
int32 wave_start = int32(start_sample - sample_offset),
wave_end = wave_start + frame_length;
if (wave_start >= 0 && wave_end <= wave.Dim()) {
// the normal case-- no edge effects to consider.
window->Range(0, frame_length).CopyFromVec(
wave.Range(wave_start, frame_length));
} else {
// Deal with any end effects by reflection, if needed. This code will only
// be reached for about two frames per utterance, so we don't concern
// ourselves excessively with efficiency.
int32 wave_dim = wave.Dim();
for (int32 s = 0; s < frame_length; s++) {
int32 s_in_wave = s + wave_start;
while (s_in_wave < 0 || s_in_wave >= wave_dim) {
// reflect around the beginning or end of the wave.
// e.g. -1 -> 0, -2 -> 1.
// dim -> dim - 1, dim + 1 -> dim - 2.
// the code supports repeated reflections, although this
// would only be needed in pathological cases.
if (s_in_wave < 0) s_in_wave = - s_in_wave - 1;
else s_in_wave = 2 * wave_dim - 1 - s_in_wave;
}
(*window)(s) = wave(s_in_wave);
}
}
if (frame_length_padded > frame_length)
window->Range(frame_length, frame_length_padded - frame_length).SetZero();
SubVector<BaseFloat> frame(*window, 0, frame_length);
ProcessWindow(opts, window_function, &frame, log_energy_pre_window);
}
} // namespace kaldi
// feat/feature-window.h
// Copyright 2009-2011 Karel Vesely; Petr Motlicek; Saarland University
// 2014-2016 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FEAT_FEATURE_WINDOW_H_
#define KALDI_FEAT_FEATURE_WINDOW_H_
#include <map>
#include <string>
#include "matrix/matrix-lib.h"
#include "util/common-utils.h"
#include "base/kaldi-error.h"
namespace kaldi {
/// @addtogroup feat FeatureExtraction
/// @{
struct FrameExtractionOptions {
BaseFloat samp_freq;
BaseFloat frame_shift_ms; // in milliseconds.
BaseFloat frame_length_ms; // in milliseconds.
BaseFloat dither; // Amount of dithering, 0.0 means no dither.
BaseFloat preemph_coeff; // Preemphasis coefficient.
bool remove_dc_offset; // Subtract mean of wave before FFT.
std::string window_type; // e.g. Hamming window
// May be "hamming", "rectangular", "povey", "hanning", "blackman"
// "povey" is a window I made to be similar to Hamming but to go to zero at the
// edges, it's pow((0.5 - 0.5*cos(n/N*2*pi)), 0.85)
// I just don't think the Hamming window makes sense as a windowing function.
bool round_to_power_of_two;
BaseFloat blackman_coeff;
bool snip_edges;
bool allow_downsample;
bool allow_upsample;
int max_feature_vectors;
FrameExtractionOptions():
samp_freq(16000),
frame_shift_ms(10.0),
frame_length_ms(25.0),
dither(1.0),
preemph_coeff(0.97),
remove_dc_offset(true),
window_type("povey"),
round_to_power_of_two(true),
blackman_coeff(0.42),
snip_edges(true),
allow_downsample(false),
allow_upsample(false),
max_feature_vectors(-1)
{ }
void Register(OptionsItf *opts) {
opts->Register("sample-frequency", &samp_freq,
"Waveform data sample frequency (must match the waveform file, "
"if specified there)");
opts->Register("frame-length", &frame_length_ms, "Frame length in milliseconds");
opts->Register("frame-shift", &frame_shift_ms, "Frame shift in milliseconds");
opts->Register("preemphasis-coefficient", &preemph_coeff,
"Coefficient for use in signal preemphasis");
opts->Register("remove-dc-offset", &remove_dc_offset,
"Subtract mean from waveform on each frame");
opts->Register("dither", &dither, "Dithering constant (0.0 means no dither). "
"If you turn this off, you should set the --energy-floor "
"option, e.g. to 1.0 or 0.1");
opts->Register("window-type", &window_type, "Type of window "
"(\"hamming\"|\"hanning\"|\"povey\"|\"rectangular\""
"|\"blackmann\")");
opts->Register("blackman-coeff", &blackman_coeff,
"Constant coefficient for generalized Blackman window.");
opts->Register("round-to-power-of-two", &round_to_power_of_two,
"If true, round window size to power of two by zero-padding "
"input to FFT.");
opts->Register("snip-edges", &snip_edges,
"If true, end effects will be handled by outputting only frames that "
"completely fit in the file, and the number of frames depends on the "
"frame-length. If false, the number of frames depends only on the "
"frame-shift, and we reflect the data at the ends.");
opts->Register("allow-downsample", &allow_downsample,
"If true, allow the input waveform to have a higher frequency than "
"the specified --sample-frequency (and we'll downsample).");
opts->Register("max-feature-vectors", &max_feature_vectors,
"Memory optimization. If larger than 0, periodically remove feature "
"vectors so that only this number of the latest feature vectors is "
"retained.");
opts->Register("allow-upsample", &allow_upsample,
"If true, allow the input waveform to have a lower frequency than "
"the specified --sample-frequency (and we'll upsample).");
}
int32 WindowShift() const {
return static_cast<int32>(samp_freq * 0.001 * frame_shift_ms);
}
int32 WindowSize() const {
return static_cast<int32>(samp_freq * 0.001 * frame_length_ms);
}
int32 PaddedWindowSize() const {
return (round_to_power_of_two ? RoundUpToNearestPowerOfTwo(WindowSize()) :
WindowSize());
}
};
struct FeatureWindowFunction {
FeatureWindowFunction() {}
explicit FeatureWindowFunction(const FrameExtractionOptions &opts);
FeatureWindowFunction(const FeatureWindowFunction &other):
window(other.window) { }
Vector<BaseFloat> window;
};
/**
This function returns the number of frames that we can extract from a wave
file with the given number of samples in it (assumed to have the same
sampling rate as specified in 'opts').
@param [in] num_samples The number of samples in the wave file.
@param [in] opts The frame-extraction options class
@param [in] flush True if we are asserting that this number of samples is
'all there is', false if we expecting more data to possibly come
in. This only makes a difference to the answer if opts.snips_edges
== false. For offline feature extraction you always want flush ==
true. In an online-decoding context, once you know (or decide) that
no more data is coming in, you'd call it with flush == true at the
end to flush out any remaining data.
*/
int32 NumFrames(int64 num_samples,
const FrameExtractionOptions &opts,
bool flush = true);
/*
This function returns the index of the first sample of the frame indexed
'frame'. If snip-edges=true, it just returns frame * opts.WindowShift(); if
snip-edges=false, the formula is a little more complicated and the result may
be negative.
*/
int64 FirstSampleOfFrame(int32 frame,
const FrameExtractionOptions &opts);
void Dither(VectorBase<BaseFloat> *waveform, BaseFloat dither_value);
void Preemphasize(VectorBase<BaseFloat> *waveform, BaseFloat preemph_coeff);
/**
This function does all the windowing steps after actually
extracting the windowed signal: depending on the
configuration, it does dithering, dc offset removal,
preemphasis, and multiplication by the windowing function.
@param [in] opts The options class to be used
@param [in] window_function The windowing function-- should have
been initialized using 'opts'.
@param [in,out] window A vector of size opts.WindowSize(). Note:
it will typically be a sub-vector of a larger vector of size
opts.PaddedWindowSize(), with the remaining samples zero,
as the FFT code is more efficient if it operates on data with
power-of-two size.
@param [out] log_energy_pre_window If non-NULL, then after dithering and
DC offset removal, this function will write to this pointer the log of
the total energy (i.e. sum-squared) of the frame.
*/
void ProcessWindow(const FrameExtractionOptions &opts,
const FeatureWindowFunction &window_function,
VectorBase<BaseFloat> *window,
BaseFloat *log_energy_pre_window = NULL);
/*
ExtractWindow() extracts a windowed frame of waveform (possibly with a
power-of-two, padded size, depending on the config), including all the
proessing done by ProcessWindow().
@param [in] sample_offset If 'wave' is not the entire waveform, but
part of it to the left has been discarded, then the
number of samples prior to 'wave' that we have
already discarded. Set this to zero if you are
processing the entire waveform in one piece, or
if you get 'no matching function' compilation
errors when updating the code.
@param [in] wave The waveform
@param [in] f The frame index to be extracted, with
0 <= f < NumFrames(sample_offset + wave.Dim(), opts, true)
@param [in] opts The options class to be used
@param [in] window_function The windowing function, as derived from the
options class.
@param [out] window The windowed, possibly-padded waveform to be
extracted. Will be resized as needed.
@param [out] log_energy_pre_window If non-NULL, the log-energy of
the signal prior to pre-emphasis and multiplying by
the windowing function will be written to here.
*/
void ExtractWindow(int64 sample_offset,
const VectorBase<BaseFloat> &wave,
int32 f,
const FrameExtractionOptions &opts,
const FeatureWindowFunction &window_function,
Vector<BaseFloat> *window,
BaseFloat *log_energy_pre_window = NULL);
/// @} End of "addtogroup feat"
} // namespace kaldi
#endif // KALDI_FEAT_FEATURE_WINDOW_H_
// feat/mel-computations.cc
// Copyright 2009-2011 Phonexia s.r.o.; Karel Vesely; Microsoft Corporation
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include <stdio.h>
#include <stdlib.h>
#include <float.h>
#include <algorithm>
#include <iostream>
#include "feat/feature-functions.h"
#include "feat/feature-window.h"
#include "feat/mel-computations.h"
namespace kaldi {
MelBanks::MelBanks(const MelBanksOptions &opts,
const FrameExtractionOptions &frame_opts,
BaseFloat vtln_warp_factor):
htk_mode_(opts.htk_mode) {
int32 num_bins = opts.num_bins;
if (num_bins < 3) KALDI_ERR << "Must have at least 3 mel bins";
BaseFloat sample_freq = frame_opts.samp_freq;
int32 window_length_padded = frame_opts.PaddedWindowSize();
KALDI_ASSERT(window_length_padded % 2 == 0);
int32 num_fft_bins = window_length_padded / 2;
BaseFloat nyquist = 0.5 * sample_freq;
BaseFloat low_freq = opts.low_freq, high_freq;
if (opts.high_freq > 0.0)
high_freq = opts.high_freq;
else
high_freq = nyquist + opts.high_freq;
if (low_freq < 0.0 || low_freq >= nyquist
|| high_freq <= 0.0 || high_freq > nyquist
|| high_freq <= low_freq)
KALDI_ERR << "Bad values in options: low-freq " << low_freq
<< " and high-freq " << high_freq << " vs. nyquist "
<< nyquist;
BaseFloat fft_bin_width = sample_freq / window_length_padded;
// fft-bin width [think of it as Nyquist-freq / half-window-length]
BaseFloat mel_low_freq = MelScale(low_freq);
BaseFloat mel_high_freq = MelScale(high_freq);
debug_ = opts.debug_mel;
// divide by num_bins+1 in next line because of end-effects where the bins
// spread out to the sides.
BaseFloat mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins+1);
BaseFloat vtln_low = opts.vtln_low,
vtln_high = opts.vtln_high;
if (vtln_high < 0.0) {
vtln_high += nyquist;
}
if (vtln_warp_factor != 1.0 &&
(vtln_low < 0.0 || vtln_low <= low_freq
|| vtln_low >= high_freq
|| vtln_high <= 0.0 || vtln_high >= high_freq
|| vtln_high <= vtln_low))
KALDI_ERR << "Bad values in options: vtln-low " << vtln_low
<< " and vtln-high " << vtln_high << ", versus "
<< "low-freq " << low_freq << " and high-freq "
<< high_freq;
bins_.resize(num_bins);
center_freqs_.Resize(num_bins);
for (int32 bin = 0; bin < num_bins; bin++) {
BaseFloat left_mel = mel_low_freq + bin * mel_freq_delta,
center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
if (vtln_warp_factor != 1.0) {
left_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
vtln_warp_factor, left_mel);
center_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
vtln_warp_factor, center_mel);
right_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
vtln_warp_factor, right_mel);
}
center_freqs_(bin) = InverseMelScale(center_mel);
// this_bin will be a vector of coefficients that is only
// nonzero where this mel bin is active.
Vector<BaseFloat> this_bin(num_fft_bins);
int32 first_index = -1, last_index = -1;
for (int32 i = 0; i < num_fft_bins; i++) {
BaseFloat freq = (fft_bin_width * i); // Center frequency of this fft
// bin.
BaseFloat mel = MelScale(freq);
if (mel > left_mel && mel < right_mel) {
BaseFloat weight;
if (mel <= center_mel)
weight = (mel - left_mel) / (center_mel - left_mel);
else
weight = (right_mel-mel) / (right_mel-center_mel);
this_bin(i) = weight;
if (first_index == -1)
first_index = i;
last_index = i;
}
}
//KALDI_ASSERT(first_index != -1 && last_index >= first_index
// && "You may have set --num-mel-bins too large.");
bins_[bin].first = first_index;
int32 size = last_index + 1 - first_index;
bins_[bin].second.Resize(size);
bins_[bin].second.CopyFromVec(this_bin.Range(first_index, size));
// Replicate a bug in HTK, for testing purposes.
if (opts.htk_mode && bin == 0 && mel_low_freq != 0.0)
bins_[bin].second(0) = 0.0;
}
if (debug_) {
for (size_t i = 0; i < bins_.size(); i++) {
KALDI_LOG << "bin " << i << ", offset = " << bins_[i].first
<< ", vec = " << bins_[i].second;
}
}
}
MelBanks::MelBanks(const MelBanks &other):
center_freqs_(other.center_freqs_),
bins_(other.bins_),
debug_(other.debug_),
htk_mode_(other.htk_mode_) { }
BaseFloat MelBanks::VtlnWarpFreq(BaseFloat vtln_low_cutoff, // upper+lower frequency cutoffs for VTLN.
BaseFloat vtln_high_cutoff,
BaseFloat low_freq, // upper+lower frequency cutoffs in mel computation
BaseFloat high_freq,
BaseFloat vtln_warp_factor,
BaseFloat freq) {
/// This computes a VTLN warping function that is not the same as HTK's one,
/// but has similar inputs (this function has the advantage of never producing
/// empty bins).
/// This function computes a warp function F(freq), defined between low_freq and
/// high_freq inclusive, with the following properties:
/// F(low_freq) == low_freq
/// F(high_freq) == high_freq
/// The function is continuous and piecewise linear with two inflection
/// points.
/// The lower inflection point (measured in terms of the unwarped
/// frequency) is at frequency l, determined as described below.
/// The higher inflection point is at a frequency h, determined as
/// described below.
/// If l <= f <= h, then F(f) = f/vtln_warp_factor.
/// If the higher inflection point (measured in terms of the unwarped
/// frequency) is at h, then max(h, F(h)) == vtln_high_cutoff.
/// Since (by the last point) F(h) == h/vtln_warp_factor, then
/// max(h, h/vtln_warp_factor) == vtln_high_cutoff, so
/// h = vtln_high_cutoff / max(1, 1/vtln_warp_factor).
/// = vtln_high_cutoff * min(1, vtln_warp_factor).
/// If the lower inflection point (measured in terms of the unwarped
/// frequency) is at l, then min(l, F(l)) == vtln_low_cutoff
/// This implies that l = vtln_low_cutoff / min(1, 1/vtln_warp_factor)
/// = vtln_low_cutoff * max(1, vtln_warp_factor)
if (freq < low_freq || freq > high_freq) return freq; // in case this gets called
// for out-of-range frequencies, just return the freq.
KALDI_ASSERT(vtln_low_cutoff > low_freq &&
"be sure to set the --vtln-low option higher than --low-freq");
KALDI_ASSERT(vtln_high_cutoff < high_freq &&
"be sure to set the --vtln-high option lower than --high-freq [or negative]");
BaseFloat one = 1.0;
BaseFloat l = vtln_low_cutoff * std::max(one, vtln_warp_factor);
BaseFloat h = vtln_high_cutoff * std::min(one, vtln_warp_factor);
BaseFloat scale = 1.0 / vtln_warp_factor;
BaseFloat Fl = scale * l; // F(l);
BaseFloat Fh = scale * h; // F(h);
KALDI_ASSERT(l > low_freq && h < high_freq);
// slope of left part of the 3-piece linear function
BaseFloat scale_left = (Fl - low_freq) / (l - low_freq);
// [slope of center part is just "scale"]
// slope of right part of the 3-piece linear function
BaseFloat scale_right = (high_freq - Fh) / (high_freq - h);
if (freq < l) {
return low_freq + scale_left * (freq - low_freq);
} else if (freq < h) {
return scale * freq;
} else { // freq >= h
return high_freq + scale_right * (freq - high_freq);
}
}
BaseFloat MelBanks::VtlnWarpMelFreq(BaseFloat vtln_low_cutoff, // upper+lower frequency cutoffs for VTLN.
BaseFloat vtln_high_cutoff,
BaseFloat low_freq, // upper+lower frequency cutoffs in mel computation
BaseFloat high_freq,
BaseFloat vtln_warp_factor,
BaseFloat mel_freq) {
return MelScale(VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff,
low_freq, high_freq,
vtln_warp_factor, InverseMelScale(mel_freq)));
}
// "power_spectrum" contains fft energies.
void MelBanks::Compute(const VectorBase<BaseFloat> &power_spectrum,
VectorBase<BaseFloat> *mel_energies_out) const {
int32 num_bins = bins_.size();
KALDI_ASSERT(mel_energies_out->Dim() == num_bins);
for (int32 i = 0; i < num_bins; i++) {
int32 offset = bins_[i].first;
const Vector<BaseFloat> &v(bins_[i].second);
BaseFloat energy = VecVec(v, power_spectrum.Range(offset, v.Dim()));
// HTK-like flooring- for testing purposes (we prefer dither)
if (htk_mode_ && energy < 1.0) energy = 1.0;
(*mel_energies_out)(i) = energy;
// The following assert was added due to a problem with OpenBlas that
// we had at one point (it was a bug in that library). Just to detect
// it early.
KALDI_ASSERT(!KALDI_ISNAN((*mel_energies_out)(i)));
}
if (debug_) {
fprintf(stderr, "MEL BANKS:\n");
for (int32 i = 0; i < num_bins; i++)
fprintf(stderr, " %f", (*mel_energies_out)(i));
fprintf(stderr, "\n");
}
}
void ComputeLifterCoeffs(BaseFloat Q, VectorBase<BaseFloat> *coeffs) {
// Compute liftering coefficients (scaling on cepstral coeffs)
// coeffs are numbered slightly differently from HTK: the zeroth
// index is C0, which is not affected.
for (int32 i = 0; i < coeffs->Dim(); i++)
(*coeffs)(i) = 1.0 + 0.5 * Q * sin (M_PI * i / Q);
}
// Durbin's recursion - converts autocorrelation coefficients to the LPC
// pTmp - temporal place [n]
// pAC - autocorrelation coefficients [n + 1]
// pLP - linear prediction coefficients [n] (predicted_sn = sum_1^P{a[i-1] * s[n-i]}})
// F(z) = 1 / (1 - A(z)), 1 is not stored in the demoninator
BaseFloat Durbin(int n, const BaseFloat *pAC, BaseFloat *pLP, BaseFloat *pTmp) {
BaseFloat ki; // reflection coefficient
int i;
int j;
BaseFloat E = pAC[0];
for (i = 0; i < n; i++) {
// next reflection coefficient
ki = pAC[i + 1];
for (j = 0; j < i; j++)
ki += pLP[j] * pAC[i - j];
ki = ki / E;
// new error
BaseFloat c = 1 - ki * ki;
if (c < 1.0e-5) // remove NaNs for constan signal
c = 1.0e-5;
E *= c;
// new LP coefficients
pTmp[i] = -ki;
for (j = 0; j < i; j++)
pTmp[j] = pLP[j] - ki * pLP[i - j - 1];
for (j = 0; j <= i; j++)
pLP[j] = pTmp[j];
}
return E;
}
void Lpc2Cepstrum(int n, const BaseFloat *pLPC, BaseFloat *pCepst) {
for (int32 i = 0; i < n; i++) {
double sum = 0.0;
int j;
for (j = 0; j < i; j++) {
sum += static_cast<BaseFloat>(i - j) * pLPC[j] * pCepst[i - j - 1];
}
pCepst[i] = -pLPC[i] - sum / static_cast<BaseFloat>(i + 1);
}
}
void GetEqualLoudnessVector(const MelBanks &mel_banks,
Vector<BaseFloat> *ans) {
int32 n = mel_banks.NumBins();
// Central frequency of each mel bin.
const Vector<BaseFloat> &f0 = mel_banks.GetCenterFreqs();
ans->Resize(n);
for (int32 i = 0; i < n; i++) {
BaseFloat fsq = f0(i) * f0(i);
BaseFloat fsub = fsq / (fsq + 1.6e5);
(*ans)(i) = fsub * fsub * ((fsq + 1.44e6) / (fsq + 9.61e6));
}
}
// Compute LP coefficients from autocorrelation coefficients.
BaseFloat ComputeLpc(const VectorBase<BaseFloat> &autocorr_in,
Vector<BaseFloat> *lpc_out) {
int32 n = autocorr_in.Dim() - 1;
KALDI_ASSERT(lpc_out->Dim() == n);
Vector<BaseFloat> tmp(n);
BaseFloat ans = Durbin(n, autocorr_in.Data(),
lpc_out->Data(),
tmp.Data());
if (ans <= 0.0)
KALDI_WARN << "Zero energy in LPC computation";
return -Log(1.0 / ans); // forms the C0 value
}
} // namespace kaldi
// feat/mel-computations.h
// Copyright 2009-2011 Phonexia s.r.o.; Microsoft Corporation
// 2016 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FEAT_MEL_COMPUTATIONS_H_
#define KALDI_FEAT_MEL_COMPUTATIONS_H_
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <complex>
#include <utility>
#include <vector>
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "matrix/matrix-lib.h"
namespace kaldi {
/// @addtogroup feat FeatureExtraction
/// @{
struct FrameExtractionOptions; // defined in feature-window.h
struct MelBanksOptions {
int32 num_bins; // e.g. 25; number of triangular bins
BaseFloat low_freq; // e.g. 20; lower frequency cutoff
BaseFloat high_freq; // an upper frequency cutoff; 0 -> no cutoff, negative
// ->added to the Nyquist frequency to get the cutoff.
BaseFloat vtln_low; // vtln lower cutoff of warping function.
BaseFloat vtln_high; // vtln upper cutoff of warping function: if negative, added
// to the Nyquist frequency to get the cutoff.
bool debug_mel;
// htk_mode is a "hidden" config, it does not show up on command line.
// Enables more exact compatibility with HTK, for testing purposes. Affects
// mel-energy flooring and reproduces a bug in HTK.
bool htk_mode;
explicit MelBanksOptions(int num_bins = 25)
: num_bins(num_bins), low_freq(20), high_freq(0), vtln_low(100),
vtln_high(-500), debug_mel(false), htk_mode(false) {}
void Register(OptionsItf *opts) {
opts->Register("num-mel-bins", &num_bins,
"Number of triangular mel-frequency bins");
opts->Register("low-freq", &low_freq,
"Low cutoff frequency for mel bins");
opts->Register("high-freq", &high_freq,
"High cutoff frequency for mel bins (if <= 0, offset from Nyquist)");
opts->Register("vtln-low", &vtln_low,
"Low inflection point in piecewise linear VTLN warping function");
opts->Register("vtln-high", &vtln_high,
"High inflection point in piecewise linear VTLN warping function"
" (if negative, offset from high-mel-freq");
opts->Register("debug-mel", &debug_mel,
"Print out debugging information for mel bin computation");
}
};
class MelBanks {
public:
static inline BaseFloat InverseMelScale(BaseFloat mel_freq) {
return 700.0f * (expf (mel_freq / 1127.0f) - 1.0f);
}
static inline BaseFloat MelScale(BaseFloat freq) {
return 1127.0f * logf (1.0f + freq / 700.0f);
}
static BaseFloat VtlnWarpFreq(BaseFloat vtln_low_cutoff,
BaseFloat vtln_high_cutoff, // discontinuities in warp func
BaseFloat low_freq,
BaseFloat high_freq, // upper+lower frequency cutoffs in
// the mel computation
BaseFloat vtln_warp_factor,
BaseFloat freq);
static BaseFloat VtlnWarpMelFreq(BaseFloat vtln_low_cutoff,
BaseFloat vtln_high_cutoff,
BaseFloat low_freq,
BaseFloat high_freq,
BaseFloat vtln_warp_factor,
BaseFloat mel_freq);
MelBanks(const MelBanksOptions &opts,
const FrameExtractionOptions &frame_opts,
BaseFloat vtln_warp_factor);
/// Compute Mel energies (note: not log enerties).
/// At input, "fft_energies" contains the FFT energies (not log).
void Compute(const VectorBase<BaseFloat> &fft_energies,
VectorBase<BaseFloat> *mel_energies_out) const;
int32 NumBins() const { return bins_.size(); }
// returns vector of central freq of each bin; needed by plp code.
const Vector<BaseFloat> &GetCenterFreqs() const { return center_freqs_; }
const std::vector<std::pair<int32, Vector<BaseFloat> > >& GetBins() const {
return bins_;
}
// Copy constructor
MelBanks(const MelBanks &other);
private:
// Disallow assignment
MelBanks &operator = (const MelBanks &other);
// center frequencies of bins, numbered from 0 ... num_bins-1.
// Needed by GetCenterFreqs().
Vector<BaseFloat> center_freqs_;
// the "bins_" vector is a vector, one for each bin, of a pair:
// (the first nonzero fft-bin), (the vector of weights).
std::vector<std::pair<int32, Vector<BaseFloat> > > bins_;
bool debug_;
bool htk_mode_;
};
// Compute liftering coefficients (scaling on cepstral coeffs)
// coeffs are numbered slightly differently from HTK: the zeroth
// index is C0, which is not affected.
void ComputeLifterCoeffs(BaseFloat Q, VectorBase<BaseFloat> *coeffs);
// Durbin's recursion - converts autocorrelation coefficients to the LPC
// pTmp - temporal place [n]
// pAC - autocorrelation coefficients [n + 1]
// pLP - linear prediction coefficients [n] (predicted_sn = sum_1^P{a[i-1] * s[n-i]}})
// F(z) = 1 / (1 - A(z)), 1 is not stored in the denominator
// Returns log energy of residual (I think)
BaseFloat Durbin(int n, const BaseFloat *pAC, BaseFloat *pLP, BaseFloat *pTmp);
// Compute LP coefficients from autocorrelation coefficients.
// Returns log energy of residual (I think)
BaseFloat ComputeLpc(const VectorBase<BaseFloat> &autocorr_in,
Vector<BaseFloat> *lpc_out);
void Lpc2Cepstrum(int n, const BaseFloat *pLPC, BaseFloat *pCepst);
void GetEqualLoudnessVector(const MelBanks &mel_banks,
Vector<BaseFloat> *ans);
/// @} End of "addtogroup feat"
} // namespace kaldi
#endif // KALDI_FEAT_MEL_COMPUTATIONS_H_
// feat/online-feature-itf.h
// Copyright 2013 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FEAT_ONLINE_FEATURE_ITF_H_
#define KALDI_FEAT_ONLINE_FEATURE_ITF_H_ 1
#include "base/kaldi-common.h"
#include "matrix/matrix-lib.h"
namespace kaldi {
/// @ingroup Interfaces
/// @{
/**
OnlineFeatureInterface is an interface for online feature processing (it is
also usable in the offline setting, but currently we're not using it for
that). This is for use in the online2/ directory, and it supersedes the
interface in ../online/online-feat-input.h. We have a slightly different
model that puts more control in the hands of the calling thread, and won't
involve waiting on semaphores in the decoding thread.
This interface only specifies how the object *outputs* the features.
How it obtains the features, e.g. from a previous object or objects of type
OnlineFeatureInterface, is not specified in the interface and you will
likely define new constructors or methods in the derived type to do that.
You should appreciate that this interface is designed to allow random
access to features, as long as they are ready. That is, the user
can call GetFrame for any frame less than NumFramesReady(), and when
implementing a child class you must not make assumptions about the
order in which the user makes these calls.
*/
class OnlineFeatureInterface {
public:
virtual int32 Dim() const = 0; /// returns the feature dimension.
/// Returns the total number of frames, since the start of the utterance, that
/// are now available. In an online-decoding context, this will likely
/// increase with time as more data becomes available.
virtual int32 NumFramesReady() const = 0;
/// Returns true if this is the last frame. Frame indices are zero-based, so the
/// first frame is zero. IsLastFrame(-1) will return false, unless the file
/// is empty (which is a case that I'm not sure all the code will handle, so
/// be careful). This function may return false for some frame if
/// we haven't yet decided to terminate decoding, but later true if we decide
/// to terminate decoding. This function exists mainly to correctly handle
/// end effects in feature extraction, and is not a mechanism to determine how
/// many frames are in the decodable object (as it used to be, and for backward
/// compatibility, still is, in the Decodable interface).
virtual bool IsLastFrame(int32 frame) const = 0;
/// Gets the feature vector for this frame. Before calling this for a given
/// frame, it is assumed that you called NumFramesReady() and it returned a
/// number greater than "frame". Otherwise this call will likely crash with
/// an assert failure. This function is not declared const, in case there is
/// some kind of caching going on, but most of the time it shouldn't modify
/// the class.
virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat) = 0;
/// This is like GetFrame() but for a collection of frames. There is a
/// default implementation that just gets the frames one by one, but it
/// may be overridden for efficiency by child classes (since sometimes
/// it's more efficient to do things in a batch).
virtual void GetFrames(const std::vector<int32> &frames,
MatrixBase<BaseFloat> *feats) {
KALDI_ASSERT(static_cast<int32>(frames.size()) == feats->NumRows());
for (size_t i = 0; i < frames.size(); i++) {
SubVector<BaseFloat> feat(*feats, i);
GetFrame(frames[i], &feat);
}
}
// Returns frame shift in seconds. Helps to estimate duration from frame
// counts.
virtual BaseFloat FrameShiftInSeconds() const = 0;
/// Virtual destructor. Note: constructors that take another member of
/// type OnlineFeatureInterface are not expected to take ownership of
/// that pointer; the caller needs to keep track of that manually.
virtual ~OnlineFeatureInterface() { }
};
/// Add a virtual class for "source" features such as MFCC or PLP or pitch
/// features.
class OnlineBaseFeature: public OnlineFeatureInterface {
public:
/// This would be called from the application, when you get more wave data.
/// Note: the sampling_rate is typically only provided so the code can assert
/// that it matches the sampling rate expected in the options.
virtual void AcceptWaveform(BaseFloat sampling_rate,
const VectorBase<BaseFloat> &waveform) = 0;
/// InputFinished() tells the class you won't be providing any
/// more waveform. This will help flush out the last few frames
/// of delta or LDA features (it will typically affect the return value
/// of IsLastFrame.
virtual void InputFinished() = 0;
};
/// @}
} // namespace Kaldi
#endif // KALDI_ITF_ONLINE_FEATURE_ITF_H_
// feat/online-feature.cc
// Copyright 2013 Johns Hopkins University (author: Daniel Povey)
// 2014 Yanqing Sun, Junjie Wang,
// Daniel Povey, Korbinian Riedhammer
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "feat/online-feature.h"
#include "transform/cmvn.h"
namespace kaldi {
RecyclingVector::RecyclingVector(int items_to_hold):
items_to_hold_(items_to_hold == 0 ? -1 : items_to_hold),
first_available_index_(0) {
}
RecyclingVector::~RecyclingVector() {
for (auto *item : items_) {
delete item;
}
}
Vector<BaseFloat> *RecyclingVector::At(int index) const {
if (index < first_available_index_) {
KALDI_ERR << "Attempted to retrieve feature vector that was "
"already removed by the RecyclingVector (index = "
<< index << "; "
<< "first_available_index = " << first_available_index_ << "; "
<< "size = " << Size() << ")";
}
// 'at' does size checking.
return items_.at(index - first_available_index_);
}
void RecyclingVector::PushBack(Vector<BaseFloat> *item) {
if (items_.size() == items_to_hold_) {
delete items_.front();
items_.pop_front();
++first_available_index_;
}
items_.push_back(item);
}
int RecyclingVector::Size() const {
return first_available_index_ + items_.size();
}
template <class C>
void OnlineGenericBaseFeature<C>::GetFrame(int32 frame,
VectorBase<BaseFloat> *feat) {
feat->CopyFromVec(*(features_.At(frame)));
};
template <class C>
OnlineGenericBaseFeature<C>::OnlineGenericBaseFeature(
const typename C::Options &opts):
computer_(opts), window_function_(computer_.GetFrameOptions()),
features_(opts.frame_opts.max_feature_vectors),
input_finished_(false), waveform_offset_(0) {
// RE the following assert: search for ONLINE_IVECTOR_LIMIT in
// online-ivector-feature.cc.
// Casting to uint32, an unsigned type, means that -1 would be treated
// as `very large`.
KALDI_ASSERT(static_cast<uint32>(opts.frame_opts.max_feature_vectors) > 200);
}
template <class C>
void OnlineGenericBaseFeature<C>::MaybeCreateResampler(
BaseFloat sampling_rate) {
BaseFloat expected_sampling_rate = computer_.GetFrameOptions().samp_freq;
if (resampler_ != nullptr) {
KALDI_ASSERT(resampler_->GetInputSamplingRate() == sampling_rate);
KALDI_ASSERT(resampler_->GetOutputSamplingRate() == expected_sampling_rate);
} else if (((sampling_rate < expected_sampling_rate) &&
computer_.GetFrameOptions().allow_downsample) ||
((sampling_rate > expected_sampling_rate) &&
computer_.GetFrameOptions().allow_upsample)) {
resampler_.reset(new LinearResample(
sampling_rate, expected_sampling_rate,
std::min(sampling_rate / 2, expected_sampling_rate / 2), 6));
} else if (sampling_rate != expected_sampling_rate) {
KALDI_ERR << "Sampling frequency mismatch, expected "
<< expected_sampling_rate << ", got " << sampling_rate
<< "\nPerhaps you want to use the options "
"--allow_{upsample,downsample}";
}
}
template <class C>
void OnlineGenericBaseFeature<C>::InputFinished() {
if (resampler_ != nullptr) {
// There may be a few samples left once we flush the resampler_ object, telling it
// that the file has finished. This should rarely make any difference.
Vector<BaseFloat> appended_wave;
Vector<BaseFloat> resampled_wave;
resampler_->Resample(appended_wave, true, &resampled_wave);
if (resampled_wave.Dim() != 0) {
appended_wave.Resize(waveform_remainder_.Dim() +
resampled_wave.Dim());
if (waveform_remainder_.Dim() != 0)
appended_wave.Range(0, waveform_remainder_.Dim())
.CopyFromVec(waveform_remainder_);
appended_wave.Range(waveform_remainder_.Dim(), resampled_wave.Dim())
.CopyFromVec(resampled_wave);
waveform_remainder_.Swap(&appended_wave);
}
}
input_finished_ = true;
ComputeFeatures();
}
template <class C>
void OnlineGenericBaseFeature<C>::AcceptWaveform(
BaseFloat sampling_rate, const VectorBase<BaseFloat> &original_waveform) {
if (original_waveform.Dim() == 0)
return; // Nothing to do.
if (input_finished_)
KALDI_ERR << "AcceptWaveform called after InputFinished() was called.";
Vector<BaseFloat> appended_wave;
Vector<BaseFloat> resampled_wave;
const VectorBase<BaseFloat> *waveform;
MaybeCreateResampler(sampling_rate);
if (resampler_ == nullptr) {
waveform = &original_waveform;
} else {
resampler_->Resample(original_waveform, false, &resampled_wave);
waveform = &resampled_wave;
}
appended_wave.Resize(waveform_remainder_.Dim() + waveform->Dim());
if (waveform_remainder_.Dim() != 0)
appended_wave.Range(0, waveform_remainder_.Dim())
.CopyFromVec(waveform_remainder_);
appended_wave.Range(waveform_remainder_.Dim(), waveform->Dim())
.CopyFromVec(*waveform);
waveform_remainder_.Swap(&appended_wave);
ComputeFeatures();
}
template <class C>
void OnlineGenericBaseFeature<C>::ComputeFeatures() {
const FrameExtractionOptions &frame_opts = computer_.GetFrameOptions();
int64 num_samples_total = waveform_offset_ + waveform_remainder_.Dim();
int32 num_frames_old = features_.Size(),
num_frames_new = NumFrames(num_samples_total, frame_opts,
input_finished_);
KALDI_ASSERT(num_frames_new >= num_frames_old);
Vector<BaseFloat> window;
bool need_raw_log_energy = computer_.NeedRawLogEnergy();
for (int32 frame = num_frames_old; frame < num_frames_new; frame++) {
BaseFloat raw_log_energy = 0.0;
ExtractWindow(waveform_offset_, waveform_remainder_, frame,
frame_opts, window_function_, &window,
need_raw_log_energy ? &raw_log_energy : NULL);
Vector<BaseFloat> *this_feature = new Vector<BaseFloat>(computer_.Dim(),
kUndefined);
// note: this online feature-extraction code does not support VTLN.
BaseFloat vtln_warp = 1.0;
computer_.Compute(raw_log_energy, vtln_warp, &window, this_feature);
features_.PushBack(this_feature);
}
// OK, we will now discard any portion of the signal that will not be
// necessary to compute frames in the future.
int64 first_sample_of_next_frame = FirstSampleOfFrame(num_frames_new,
frame_opts);
int32 samples_to_discard = first_sample_of_next_frame - waveform_offset_;
if (samples_to_discard > 0) {
// discard the leftmost part of the waveform that we no longer need.
int32 new_num_samples = waveform_remainder_.Dim() - samples_to_discard;
if (new_num_samples <= 0) {
// odd, but we'll try to handle it.
waveform_offset_ += waveform_remainder_.Dim();
waveform_remainder_.Resize(0);
} else {
Vector<BaseFloat> new_remainder(new_num_samples);
new_remainder.CopyFromVec(waveform_remainder_.Range(samples_to_discard,
new_num_samples));
waveform_offset_ += samples_to_discard;
waveform_remainder_.Swap(&new_remainder);
}
}
}
// instantiate the templates defined here for MFCC, PLP and filterbank classes.
template class OnlineGenericBaseFeature<MfccComputer>;
template class OnlineGenericBaseFeature<PlpComputer>;
template class OnlineGenericBaseFeature<FbankComputer>;
OnlineCmvnState::OnlineCmvnState(const OnlineCmvnState &other):
speaker_cmvn_stats(other.speaker_cmvn_stats),
global_cmvn_stats(other.global_cmvn_stats),
frozen_state(other.frozen_state) { }
void OnlineCmvnState::Write(std::ostream &os, bool binary) const {
WriteToken(os, binary, "<OnlineCmvnState>"); // magic string.
WriteToken(os, binary, "<SpeakerCmvnStats>");
speaker_cmvn_stats.Write(os, binary);
WriteToken(os, binary, "<GlobalCmvnStats>");
global_cmvn_stats.Write(os, binary);
WriteToken(os, binary, "<FrozenState>");
frozen_state.Write(os, binary);
WriteToken(os, binary, "</OnlineCmvnState>");
}
void OnlineCmvnState::Read(std::istream &is, bool binary) {
ExpectToken(is, binary, "<OnlineCmvnState>"); // magic string.
ExpectToken(is, binary, "<SpeakerCmvnStats>");
speaker_cmvn_stats.Read(is, binary);
ExpectToken(is, binary, "<GlobalCmvnStats>");
global_cmvn_stats.Read(is, binary);
ExpectToken(is, binary, "<FrozenState>");
frozen_state.Read(is, binary);
ExpectToken(is, binary, "</OnlineCmvnState>");
}
OnlineCmvn::OnlineCmvn(const OnlineCmvnOptions &opts,
const OnlineCmvnState &cmvn_state,
OnlineFeatureInterface *src):
opts_(opts), temp_stats_(2, src->Dim() + 1),
temp_feats_(src->Dim()), temp_feats_dbl_(src->Dim()),
src_(src) {
SetState(cmvn_state);
if (!SplitStringToIntegers(opts.skip_dims, ":", false, &skip_dims_))
KALDI_ERR << "Bad --skip-dims option (should be colon-separated list of "
<< "integers)";
}
OnlineCmvn::OnlineCmvn(const OnlineCmvnOptions &opts,
OnlineFeatureInterface *src):
opts_(opts), temp_stats_(2, src->Dim() + 1),
temp_feats_(src->Dim()), temp_feats_dbl_(src->Dim()),
src_(src) {
if (!SplitStringToIntegers(opts.skip_dims, ":", false, &skip_dims_))
KALDI_ERR << "Bad --skip-dims option (should be colon-separated list of "
<< "integers)";
}
void OnlineCmvn::GetMostRecentCachedFrame(int32 frame,
int32 *cached_frame,
MatrixBase<double> *stats) {
KALDI_ASSERT(frame >= 0);
InitRingBufferIfNeeded();
// look for a cached frame on a previous frame as close as possible in time
// to "frame". Return if we get one.
for (int32 t = frame; t >= 0 && t >= frame - opts_.ring_buffer_size; t--) {
if (t % opts_.modulus == 0) {
// if this frame should be cached in cached_stats_modulo_, then
// we'll look there, and we won't go back any further in time.
break;
}
int32 index = t % opts_.ring_buffer_size;
if (cached_stats_ring_[index].first == t) {
*cached_frame = t;
stats->CopyFromMat(cached_stats_ring_[index].second);
return;
}
}
int32 n = frame / opts_.modulus;
if (n >= cached_stats_modulo_.size()) {
if (cached_stats_modulo_.size() == 0) {
*cached_frame = -1;
stats->SetZero();
return;
} else {
n = static_cast<int32>(cached_stats_modulo_.size() - 1);
}
}
*cached_frame = n * opts_.modulus;
KALDI_ASSERT(cached_stats_modulo_[n] != NULL);
stats->CopyFromMat(*(cached_stats_modulo_[n]));
}
// Initialize ring buffer for caching stats.
void OnlineCmvn::InitRingBufferIfNeeded() {
if (cached_stats_ring_.empty() && opts_.ring_buffer_size > 0) {
Matrix<double> temp(2, this->Dim() + 1);
cached_stats_ring_.resize(opts_.ring_buffer_size,
std::pair<int32, Matrix<double> >(-1, temp));
}
}
void OnlineCmvn::CacheFrame(int32 frame, const MatrixBase<double> &stats) {
KALDI_ASSERT(frame >= 0);
if (frame % opts_.modulus == 0) { // store in cached_stats_modulo_.
int32 n = frame / opts_.modulus;
if (n >= cached_stats_modulo_.size()) {
// The following assert is a limitation on in what order you can call
// CacheFrame. Fortunately the calling code always calls it in sequence,
// which it has to because you need a previous frame to compute the
// current one.
KALDI_ASSERT(n == cached_stats_modulo_.size());
cached_stats_modulo_.push_back(new Matrix<double>(stats));
} else {
KALDI_WARN << "Did not expect to reach this part of code.";
// do what seems right, but we shouldn't get here.
cached_stats_modulo_[n]->CopyFromMat(stats);
}
} else { // store in the ring buffer.
InitRingBufferIfNeeded();
if (!cached_stats_ring_.empty()) {
int32 index = frame % cached_stats_ring_.size();
cached_stats_ring_[index].first = frame;
cached_stats_ring_[index].second.CopyFromMat(stats);
}
}
}
OnlineCmvn::~OnlineCmvn() {
for (size_t i = 0; i < cached_stats_modulo_.size(); i++)
delete cached_stats_modulo_[i];
cached_stats_modulo_.clear();
}
void OnlineCmvn::ComputeStatsForFrame(int32 frame,
MatrixBase<double> *stats_out) {
KALDI_ASSERT(frame >= 0 && frame < src_->NumFramesReady());
int32 dim = this->Dim(), cur_frame;
GetMostRecentCachedFrame(frame, &cur_frame, stats_out);
Vector<BaseFloat> &feats(temp_feats_);
Vector<double> &feats_dbl(temp_feats_dbl_);
while (cur_frame < frame) {
cur_frame++;
src_->GetFrame(cur_frame, &feats);
feats_dbl.CopyFromVec(feats);
stats_out->Row(0).Range(0, dim).AddVec(1.0, feats_dbl);
if (opts_.normalize_variance)
stats_out->Row(1).Range(0, dim).AddVec2(1.0, feats_dbl);
(*stats_out)(0, dim) += 1.0;
// it's a sliding buffer; a frame at the back may be
// leaving the buffer so we have to subtract that.
int32 prev_frame = cur_frame - opts_.cmn_window;
if (prev_frame >= 0) {
// we need to subtract frame prev_f from the stats.
src_->GetFrame(prev_frame, &feats);
feats_dbl.CopyFromVec(feats);
stats_out->Row(0).Range(0, dim).AddVec(-1.0, feats_dbl);
if (opts_.normalize_variance)
stats_out->Row(1).Range(0, dim).AddVec2(-1.0, feats_dbl);
(*stats_out)(0, dim) -= 1.0;
}
CacheFrame(cur_frame, (*stats_out));
}
}
// static
void OnlineCmvn::SmoothOnlineCmvnStats(const MatrixBase<double> &speaker_stats,
const MatrixBase<double> &global_stats,
const OnlineCmvnOptions &opts,
MatrixBase<double> *stats) {
if (speaker_stats.NumRows() == 2 && !opts.normalize_variance) {
// this is just for efficiency: don't operate on the variance if it's not
// needed.
int32 cols = speaker_stats.NumCols(); // dim + 1
SubMatrix<double> stats_temp(*stats, 0, 1, 0, cols);
SmoothOnlineCmvnStats(speaker_stats.RowRange(0, 1),
global_stats.RowRange(0, 1),
opts, &stats_temp);
return;
}
int32 dim = stats->NumCols() - 1;
double cur_count = (*stats)(0, dim);
// If count exceeded cmn_window it would be an error in how "window_stats"
// was accumulated.
KALDI_ASSERT(cur_count <= 1.001 * opts.cmn_window);
if (cur_count >= opts.cmn_window)
return;
if (speaker_stats.NumRows() != 0) { // if we have speaker stats..
double count_from_speaker = opts.cmn_window - cur_count,
speaker_count = speaker_stats(0, dim);
if (count_from_speaker > opts.speaker_frames)
count_from_speaker = opts.speaker_frames;
if (count_from_speaker > speaker_count)
count_from_speaker = speaker_count;
if (count_from_speaker > 0.0)
stats->AddMat(count_from_speaker / speaker_count,
speaker_stats);
cur_count = (*stats)(0, dim);
}
if (cur_count >= opts.cmn_window)
return;
if (global_stats.NumRows() != 0) {
double count_from_global = opts.cmn_window - cur_count,
global_count = global_stats(0, dim);
KALDI_ASSERT(global_count > 0.0);
if (count_from_global > opts.global_frames)
count_from_global = opts.global_frames;
if (count_from_global > 0.0)
stats->AddMat(count_from_global / global_count,
global_stats);
} else {
KALDI_ERR << "Global CMN stats are required";
}
}
void OnlineCmvn::GetFrame(int32 frame,
VectorBase<BaseFloat> *feat) {
src_->GetFrame(frame, feat);
KALDI_ASSERT(feat->Dim() == this->Dim());
int32 dim = feat->Dim();
Matrix<double> &stats(temp_stats_);
stats.Resize(2, dim + 1, kUndefined); // Will do nothing if size was correct.
if (frozen_state_.NumRows() != 0) { // the CMVN state has been frozen.
stats.CopyFromMat(frozen_state_);
} else {
// first get the raw CMVN stats (this involves caching..)
this->ComputeStatsForFrame(frame, &stats);
// now smooth them.
SmoothOnlineCmvnStats(orig_state_.speaker_cmvn_stats,
orig_state_.global_cmvn_stats,
opts_,
&stats);
}
if (!skip_dims_.empty())
FakeStatsForSomeDims(skip_dims_, &stats);
// call the function ApplyCmvn declared in ../transform/cmvn.h, which
// requires a matrix.
// 1 row; num-cols == dim; stride == dim.
SubMatrix<BaseFloat> feat_mat(feat->Data(), 1, dim, dim);
// the function ApplyCmvn takes a matrix, so form a one-row matrix to give it.
if (opts_.normalize_mean)
ApplyCmvn(stats, opts_.normalize_variance, &feat_mat);
else
KALDI_ASSERT(!opts_.normalize_variance);
}
void OnlineCmvn::Freeze(int32 cur_frame) {
int32 dim = this->Dim();
Matrix<double> stats(2, dim + 1);
// get the raw CMVN stats
this->ComputeStatsForFrame(cur_frame, &stats);
// now smooth them.
SmoothOnlineCmvnStats(orig_state_.speaker_cmvn_stats,
orig_state_.global_cmvn_stats,
opts_,
&stats);
this->frozen_state_ = stats;
}
void OnlineCmvn::GetState(int32 cur_frame,
OnlineCmvnState *state_out) {
*state_out = this->orig_state_;
{ // This block updates state_out->speaker_cmvn_stats
int32 dim = this->Dim();
if (state_out->speaker_cmvn_stats.NumRows() == 0)
state_out->speaker_cmvn_stats.Resize(2, dim + 1);
Vector<BaseFloat> feat(dim);
Vector<double> feat_dbl(dim);
for (int32 t = 0; t <= cur_frame; t++) {
src_->GetFrame(t, &feat);
feat_dbl.CopyFromVec(feat);
state_out->speaker_cmvn_stats(0, dim) += 1.0;
state_out->speaker_cmvn_stats.Row(0).Range(0, dim).AddVec(1.0, feat_dbl);
state_out->speaker_cmvn_stats.Row(1).Range(0, dim).AddVec2(1.0, feat_dbl);
}
}
// Store any frozen state (the effect of the user possibly
// having called Freeze().
state_out->frozen_state = frozen_state_;
}
void OnlineCmvn::SetState(const OnlineCmvnState &cmvn_state) {
KALDI_ASSERT(cached_stats_modulo_.empty() &&
"You cannot call SetState() after processing data.");
orig_state_ = cmvn_state;
frozen_state_ = cmvn_state.frozen_state;
}
int32 OnlineSpliceFrames::NumFramesReady() const {
int32 num_frames = src_->NumFramesReady();
if (num_frames > 0 && src_->IsLastFrame(num_frames - 1))
return num_frames;
else
return std::max<int32>(0, num_frames - right_context_);
}
void OnlineSpliceFrames::GetFrame(int32 frame, VectorBase<BaseFloat> *feat) {
KALDI_ASSERT(left_context_ >= 0 && right_context_ >= 0);
KALDI_ASSERT(frame >= 0 && frame < NumFramesReady());
int32 dim_in = src_->Dim();
KALDI_ASSERT(feat->Dim() == dim_in * (1 + left_context_ + right_context_));
int32 T = src_->NumFramesReady();
for (int32 t2 = frame - left_context_; t2 <= frame + right_context_; t2++) {
int32 t2_limited = t2;
if (t2_limited < 0) t2_limited = 0;
if (t2_limited >= T) t2_limited = T - 1;
int32 n = t2 - (frame - left_context_); // 0 for left-most frame,
// increases to the right.
SubVector<BaseFloat> part(*feat, n * dim_in, dim_in);
src_->GetFrame(t2_limited, &part);
}
}
OnlineTransform::OnlineTransform(const MatrixBase<BaseFloat> &transform,
OnlineFeatureInterface *src):
src_(src) {
int32 src_dim = src_->Dim();
if (transform.NumCols() == src_dim) { // Linear transform
linear_term_ = transform;
offset_.Resize(transform.NumRows()); // Resize() will zero it.
} else if (transform.NumCols() == src_dim + 1) { // Affine transform
linear_term_ = transform.Range(0, transform.NumRows(), 0, src_dim);
offset_.Resize(transform.NumRows());
offset_.CopyColFromMat(transform, src_dim);
} else {
KALDI_ERR << "Dimension mismatch: source features have dimension "
<< src_dim << " and LDA #cols is " << transform.NumCols();
}
}
void OnlineTransform::GetFrame(int32 frame, VectorBase<BaseFloat> *feat) {
Vector<BaseFloat> input_feat(linear_term_.NumCols());
src_->GetFrame(frame, &input_feat);
feat->CopyFromVec(offset_);
feat->AddMatVec(1.0, linear_term_, kNoTrans, input_feat, 1.0);
}
void OnlineTransform::GetFrames(
const std::vector<int32> &frames, MatrixBase<BaseFloat> *feats) {
KALDI_ASSERT(static_cast<int32>(frames.size()) == feats->NumRows());
int32 num_frames = feats->NumRows(),
input_dim = linear_term_.NumCols();
Matrix<BaseFloat> input_feats(num_frames, input_dim, kUndefined);
src_->GetFrames(frames, &input_feats);
feats->CopyRowsFromVec(offset_);
feats->AddMatMat(1.0, input_feats, kNoTrans, linear_term_, kTrans, 1.0);
}
int32 OnlineDeltaFeature::Dim() const {
int32 src_dim = src_->Dim();
return src_dim * (1 + opts_.order);
}
int32 OnlineDeltaFeature::NumFramesReady() const {
int32 num_frames = src_->NumFramesReady(),
context = opts_.order * opts_.window;
// "context" is the number of frames on the left or (more relevant
// here) right which we need in order to produce the output.
if (num_frames > 0 && src_->IsLastFrame(num_frames-1))
return num_frames;
else
return std::max<int32>(0, num_frames - context);
}
void OnlineDeltaFeature::GetFrame(int32 frame,
VectorBase<BaseFloat> *feat) {
KALDI_ASSERT(frame >= 0 && frame < NumFramesReady());
KALDI_ASSERT(feat->Dim() == Dim());
// We'll produce a temporary matrix containing the features we want to
// compute deltas on, but truncated to the necessary context.
int32 context = opts_.order * opts_.window;
int32 left_frame = frame - context,
right_frame = frame + context,
src_frames_ready = src_->NumFramesReady();
if (left_frame < 0) left_frame = 0;
if (right_frame >= src_frames_ready)
right_frame = src_frames_ready - 1;
KALDI_ASSERT(right_frame >= left_frame);
int32 temp_num_frames = right_frame + 1 - left_frame,
src_dim = src_->Dim();
Matrix<BaseFloat> temp_src(temp_num_frames, src_dim);
for (int32 t = left_frame; t <= right_frame; t++) {
SubVector<BaseFloat> temp_row(temp_src, t - left_frame);
src_->GetFrame(t, &temp_row);
}
int32 temp_t = frame - left_frame; // temp_t is the offset of frame "frame"
// within temp_src
delta_features_.Process(temp_src, temp_t, feat);
}
OnlineDeltaFeature::OnlineDeltaFeature(const DeltaFeaturesOptions &opts,
OnlineFeatureInterface *src):
src_(src), opts_(opts), delta_features_(opts) { }
void OnlineCacheFeature::GetFrame(int32 frame, VectorBase<BaseFloat> *feat) {
KALDI_ASSERT(frame >= 0);
if (static_cast<size_t>(frame) < cache_.size() && cache_[frame] != NULL) {
feat->CopyFromVec(*(cache_[frame]));
} else {
if (static_cast<size_t>(frame) >= cache_.size())
cache_.resize(frame + 1, NULL);
int32 dim = this->Dim();
cache_[frame] = new Vector<BaseFloat>(dim);
// The following call will crash if frame "frame" is not ready.
src_->GetFrame(frame, cache_[frame]);
feat->CopyFromVec(*(cache_[frame]));
}
}
void OnlineCacheFeature::GetFrames(
const std::vector<int32> &frames, MatrixBase<BaseFloat> *feats) {
int32 num_frames = frames.size();
// non_cached_frames will be the subset of 't' values in 'frames' which were
// not previously cached, which we therefore need to get from src_.
std::vector<int32> non_cached_frames;
// 'non_cached_indexes' stores the indexes 'i' into 'frames' corresponding to
// the corresponding frames in 'non_cached_frames'.
std::vector<int32> non_cached_indexes;
non_cached_frames.reserve(frames.size());
non_cached_indexes.reserve(frames.size());
for (int32 i = 0; i < num_frames; i++) {
int32 t = frames[i];
if (static_cast<size_t>(t) < cache_.size() && cache_[t] != NULL) {
feats->Row(i).CopyFromVec(*(cache_[t]));
} else {
non_cached_frames.push_back(t);
non_cached_indexes.push_back(i);
}
}
if (non_cached_frames.empty())
return;
int32 num_non_cached_frames = non_cached_frames.size(),
dim = this->Dim();
Matrix<BaseFloat> non_cached_feats(num_non_cached_frames, dim,
kUndefined);
src_->GetFrames(non_cached_frames, &non_cached_feats);
for (int32 i = 0; i < num_non_cached_frames; i++) {
int32 t = non_cached_frames[i];
if (static_cast<size_t>(t) < cache_.size() && cache_[t] != NULL) {
// We can reach this point due to repeat indexes in 'non_cached_frames'.
feats->Row(non_cached_indexes[i]).CopyFromVec(*(cache_[t]));
} else {
SubVector<BaseFloat> this_feat(non_cached_feats, i);
feats->Row(non_cached_indexes[i]).CopyFromVec(this_feat);
if (static_cast<size_t>(t) >= cache_.size())
cache_.resize(t + 1, NULL);
cache_[t] = new Vector<BaseFloat>(this_feat);
}
}
}
void OnlineCacheFeature::ClearCache() {
for (size_t i = 0; i < cache_.size(); i++)
delete cache_[i];
cache_.resize(0);
}
void OnlineAppendFeature::GetFrame(int32 frame, VectorBase<BaseFloat> *feat) {
KALDI_ASSERT(feat->Dim() == Dim());
SubVector<BaseFloat> feat1(*feat, 0, src1_->Dim());
SubVector<BaseFloat> feat2(*feat, src1_->Dim(), src2_->Dim());
src1_->GetFrame(frame, &feat1);
src2_->GetFrame(frame, &feat2);
};
} // namespace kaldi
// feat/online-feature.h
// Copyright 2013 Johns Hopkins University (author: Daniel Povey)
// 2014 Yanqing Sun, Junjie Wang,
// Daniel Povey, Korbinian Riedhammer
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FEAT_ONLINE_FEATURE_H_
#define KALDI_FEAT_ONLINE_FEATURE_H_
#include <string>
#include <vector>
#include <deque>
#include "matrix/matrix-lib.h"
#include "util/common-utils.h"
#include "base/kaldi-error.h"
#include "feat/feature-functions.h"
#include "feat/feature-mfcc.h"
#include "feat/feature-plp.h"
#include "feat/feature-fbank.h"
#include "feat/online-feature-itf.h"
namespace kaldi {
/// @addtogroup onlinefeat OnlineFeatureExtraction
/// @{
/// This class serves as a storage for feature vectors with an option to limit
/// the memory usage by removing old elements. The deleted frames indices are
/// "remembered" so that regardless of the MAX_ITEMS setting, the user always
/// provides the indices as if no deletion was being performed.
/// This is useful when processing very long recordings which would otherwise
/// cause the memory to eventually blow up when the features are not being removed.
class RecyclingVector {
public:
/// By default it does not remove any elements.
RecyclingVector(int items_to_hold = -1);
/// The ownership is being retained by this collection - do not delete the item.
Vector<BaseFloat> *At(int index) const;
/// The ownership of the item is passed to this collection - do not delete the item.
void PushBack(Vector<BaseFloat> *item);
/// This method returns the size as if no "recycling" had happened,
/// i.e. equivalent to the number of times the PushBack method has been called.
int Size() const;
~RecyclingVector();
private:
std::deque<Vector<BaseFloat>*> items_;
int items_to_hold_;
int first_available_index_;
};
/// This is a templated class for online feature extraction;
/// it's templated on a class like MfccComputer or PlpComputer
/// that does the basic feature extraction.
template<class C>
class OnlineGenericBaseFeature: public OnlineBaseFeature {
public:
//
// First, functions that are present in the interface:
//
virtual int32 Dim() const { return computer_.Dim(); }
// Note: IsLastFrame() will only ever return true if you have called
// InputFinished() (and this frame is the last frame).
virtual bool IsLastFrame(int32 frame) const {
return input_finished_ && frame == NumFramesReady() - 1;
}
virtual BaseFloat FrameShiftInSeconds() const {
return computer_.GetFrameOptions().frame_shift_ms / 1000.0f;
}
virtual int32 NumFramesReady() const { return features_.Size(); }
virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
// Next, functions that are not in the interface.
// Constructor from options class
explicit OnlineGenericBaseFeature(const typename C::Options &opts);
// This would be called from the application, when you get
// more wave data. Note: the sampling_rate is only provided so
// the code can assert that it matches the sampling rate
// expected in the options.
virtual void AcceptWaveform(BaseFloat sampling_rate,
const VectorBase<BaseFloat> &waveform);
// InputFinished() tells the class you won't be providing any
// more waveform. This will help flush out the last frame or two
// of features, in the case where snip-edges == false; it also
// affects the return value of IsLastFrame().
virtual void InputFinished();
private:
// This function computes any additional feature frames that it is possible to
// compute from 'waveform_remainder_', which at this point may contain more
// than just a remainder-sized quantity (because AcceptWaveform() appends to
// waveform_remainder_ before calling this function). It adds these feature
// frames to features_, and shifts off any now-unneeded samples of input from
// waveform_remainder_ while incrementing waveform_offset_ by the same amount.
void ComputeFeatures();
void MaybeCreateResampler(BaseFloat sampling_rate);
C computer_; // class that does the MFCC or PLP or filterbank computation
// resampler in cases when the input sampling frequency is not equal to
// the expected sampling rate
std::unique_ptr<LinearResample> resampler_;
FeatureWindowFunction window_function_;
// features_ is the Mfcc or Plp or Fbank features that we have already computed.
RecyclingVector features_;
// True if the user has called "InputFinished()"
bool input_finished_;
// The sampling frequency, extracted from the config. Should
// be identical to the waveform supplied.
BaseFloat sampling_frequency_;
// waveform_offset_ is the number of samples of waveform that we have
// already discarded, i.e. that were prior to 'waveform_remainder_'.
int64 waveform_offset_;
// waveform_remainder_ is a short piece of waveform that we may need to keep
// after extracting all the whole frames we can (whatever length of feature
// will be required for the next phase of computation).
Vector<BaseFloat> waveform_remainder_;
};
typedef OnlineGenericBaseFeature<MfccComputer> OnlineMfcc;
typedef OnlineGenericBaseFeature<PlpComputer> OnlinePlp;
typedef OnlineGenericBaseFeature<FbankComputer> OnlineFbank;
/// This class takes a Matrix<BaseFloat> and wraps it as an
/// OnlineFeatureInterface: this can be useful where some earlier stage of
/// feature processing has been done offline but you want to use part of the
/// online pipeline.
class OnlineMatrixFeature: public OnlineFeatureInterface {
public:
/// Caution: this class maintains the const reference from the constructor, so
/// don't let it go out of scope while this object exists.
explicit OnlineMatrixFeature(const MatrixBase<BaseFloat> &mat): mat_(mat) { }
virtual int32 Dim() const { return mat_.NumCols(); }
virtual BaseFloat FrameShiftInSeconds() const {
return 0.01f;
}
virtual int32 NumFramesReady() const { return mat_.NumRows(); }
virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat) {
feat->CopyFromVec(mat_.Row(frame));
}
virtual bool IsLastFrame(int32 frame) const {
return (frame + 1 == mat_.NumRows());
}
private:
const MatrixBase<BaseFloat> &mat_;
};
// Note the similarity with SlidingWindowCmnOptions, but there
// are also differences. One which doesn't appear in the config
// itself, because it's a difference between the setups, is that
// in OnlineCmn, we carry over data from the previous utterance,
// or, if no previous utterance is available, from global stats,
// or, if previous utterances are available but the total amount
// of data is less than prev_frames, we pad with up to "global_frames"
// frames from the global stats.
struct OnlineCmvnOptions {
int32 cmn_window;
int32 speaker_frames; // must be <= cmn_window
int32 global_frames; // must be <= speaker_frames.
bool normalize_mean; // Must be true if normalize_variance==true.
bool normalize_variance;
int32 modulus; // not configurable from command line, relates to how the
// class computes the cmvn internally. smaller->more
// time-efficient but less memory-efficient. Must be >= 1.
int32 ring_buffer_size; // not configurable from command line; size of ring
// buffer used for caching CMVN stats. Must be >=
// modulus.
std::string skip_dims; // Colon-separated list of dimensions to skip normalization
// of, e.g. 13:14:15.
OnlineCmvnOptions():
cmn_window(600),
speaker_frames(600),
global_frames(200),
normalize_mean(true),
normalize_variance(false),
modulus(20),
ring_buffer_size(20),
skip_dims("") { }
void Check() const {
KALDI_ASSERT(speaker_frames <= cmn_window && global_frames <= speaker_frames
&& modulus > 0);
}
void Register(ParseOptions *po) {
po->Register("cmn-window", &cmn_window, "Number of frames of sliding "
"context for cepstral mean normalization.");
po->Register("global-frames", &global_frames, "Number of frames of "
"global-average cepstral mean normalization stats to use for "
"first utterance of a speaker");
po->Register("speaker-frames", &speaker_frames, "Number of frames of "
"previous utterance(s) from this speaker to use in cepstral "
"mean normalization");
// we name the config string "norm-vars" for compatibility with
// ../featbin/apply-cmvn.cc
po->Register("norm-vars", &normalize_variance, "If true, do "
"cepstral variance normalization in addition to cepstral mean "
"normalization ");
po->Register("norm-means", &normalize_mean, "If true, do mean normalization "
"(note: you cannot normalize the variance but not the mean)");
po->Register("skip-dims", &skip_dims, "Dimensions to skip normalization of "
"(colon-separated list of integers)");}
};
/** Struct OnlineCmvnState stores the state of CMVN adaptation between
utterances (but not the state of the computation within an utterance). It
stores the global CMVN stats and the stats of the current speaker (if we
have seen previous utterances for this speaker), and possibly will have a
member "frozen_state": if the user has called the function Freeze() of class
OnlineCmvn, to fix the CMVN so we can estimate fMLLR on top of the fixed
value of cmvn. If nonempty, "frozen_state" will reflect how we were
normalizing the mean and (if applicable) variance at the time when that
function was called.
*/
struct OnlineCmvnState {
// The following is the total CMVN stats for this speaker (up till now), in
// the same format.
Matrix<double> speaker_cmvn_stats;
// The following is the global CMVN stats, in the usual
// format, of dimension 2 x (dim+1), as [ sum-stats count
// sum-squared-stats 0 ]
Matrix<double> global_cmvn_stats;
// If nonempty, contains CMVN stats representing the "frozen" state
// of CMVN that reflects how we were normalizing the data when the
// user called the Freeze() function in class OnlineCmvn.
Matrix<double> frozen_state;
OnlineCmvnState() { }
explicit OnlineCmvnState(const Matrix<double> &global_stats):
global_cmvn_stats(global_stats) { }
// Copy constructor
OnlineCmvnState(const OnlineCmvnState &other);
void Write(std::ostream &os, bool binary) const;
void Read(std::istream &is, bool binary);
// Use the default assignment operator.
};
/**
This class does an online version of the cepstral mean and [optionally]
variance, but note that this is not equivalent to the offline version. This
is necessarily so, as the offline computation involves looking into the
future. If you plan to use features normalized with this type of CMVN then
you need to train in a `matched' way, i.e. with the same type of features.
We normally only do so in the "online" GMM-based decoding, e.g. in
online2bin/online2-wav-gmm-latgen-faster.cc; see also the script
steps/online/prepare_online_decoding.sh and steps/online/decode.sh.
In the steady state (in the middle of a long utterance), this class
accumulates CMVN statistics from the previous "cmn_window" frames (default 600
frames, or 6 seconds), and uses these to normalize the mean and possibly
variance of the current frame.
The config variables "speaker_frames" and "global_frames" relate to what
happens at the beginning of the utterance when we have seen fewer than
"cmn_window" frames of context, and so might not have very good stats to
normalize with. Basically, we first augment any existing stats with up
to "speaker_frames" frames of stats from previous utterances of the current
speaker, and if this doesn't take us up to the required "cmn_window" frame
count, we further augment with up to "global_frames" frames of global
stats. The global stats are CMVN stats accumulated from training or testing
data, that give us a reasonable source of mean and variance for "typical"
data.
*/
class OnlineCmvn: public OnlineFeatureInterface {
public:
//
// First, functions that are present in the interface:
//
virtual int32 Dim() const { return src_->Dim(); }
virtual bool IsLastFrame(int32 frame) const {
return src_->IsLastFrame(frame);
}
virtual BaseFloat FrameShiftInSeconds() const {
return src_->FrameShiftInSeconds();
}
// The online cmvn does not introduce any additional latency.
virtual int32 NumFramesReady() const { return src_->NumFramesReady(); }
virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
//
// Next, functions that are not in the interface.
//
/// Initializer that sets the cmvn state. If you don't have previous
/// utterances from the same speaker you are supposed to initialize the CMVN
/// state from some global CMVN stats, which you can get from summing all cmvn
/// stats you have in your training data using "sum-matrix". This just gives
/// it a reasonable starting point at the start of the file.
/// If you do have previous utterances from the same speaker or at least a
/// similar environment, you are supposed to initialize it by calling GetState
/// from the previous utterance
OnlineCmvn(const OnlineCmvnOptions &opts,
const OnlineCmvnState &cmvn_state,
OnlineFeatureInterface *src);
/// Initializer that does not set the cmvn state:
/// after calling this, you should call SetState().
OnlineCmvn(const OnlineCmvnOptions &opts,
OnlineFeatureInterface *src);
// Outputs any state information from this utterance to "cmvn_state".
// The value of "cmvn_state" before the call does not matter: the output
// depends on the value of OnlineCmvnState the class was initialized
// with, the input feature values up to cur_frame, and the effects
// of the user possibly having called Freeze().
// If cur_frame is -1, it will just output the unmodified original
// state that was supplied to this object.
void GetState(int32 cur_frame,
OnlineCmvnState *cmvn_state);
// This function can be used to modify the state of the CMVN computation
// from outside, but must only be called before you have processed any data
// (otherwise it will crash). This "state" is really just the information
// that is propagated between utterances, not the state of the computation
// inside an utterance.
void SetState(const OnlineCmvnState &cmvn_state);
// From this point it will freeze the CMN to what it would have been if
// measured at frame "cur_frame", and it will stop it from changing
// further. This also applies retroactively for this utterance, so if you
// call GetFrame() on previous frames, it will use the CMVN stats
// from cur_frame; and it applies in the future too if you then
// call OutputState() and use this state to initialize the next
// utterance's CMVN object.
void Freeze(int32 cur_frame);
virtual ~OnlineCmvn();
private:
/// Smooth the CMVN stats "stats" (which are stored in the normal format as a
/// 2 x (dim+1) matrix), by possibly adding some stats from "global_stats"
/// and/or "speaker_stats", controlled by the config. The best way to
/// understand the smoothing rule we use is just to look at the code.
static void SmoothOnlineCmvnStats(const MatrixBase<double> &speaker_stats,
const MatrixBase<double> &global_stats,
const OnlineCmvnOptions &opts,
MatrixBase<double> *stats);
/// Get the most recent cached frame of CMVN stats. [If no frames
/// were cached, sets up empty stats for frame zero and returns that].
void GetMostRecentCachedFrame(int32 frame,
int32 *cached_frame,
MatrixBase<double> *stats);
/// Cache this frame of stats.
void CacheFrame(int32 frame, const MatrixBase<double> &stats);
/// Initialize ring buffer for caching stats.
inline void InitRingBufferIfNeeded();
/// Computes the raw CMVN stats for this frame, making use of (and updating if
/// necessary) the cached statistics in raw_stats_. This means the (x,
/// x^2, count) stats for the last up to opts_.cmn_window frames.
void ComputeStatsForFrame(int32 frame,
MatrixBase<double> *stats);
OnlineCmvnOptions opts_;
std::vector<int32> skip_dims_; // Skip CMVN for these dimensions. Derived from opts_.
OnlineCmvnState orig_state_; // reflects the state before we saw this
// utterance.
Matrix<double> frozen_state_; // If the user called Freeze(), this variable
// will reflect the CMVN state that we froze
// at.
// The variable below reflects the raw (count, x, x^2) statistics of the
// input, computed every opts_.modulus frames. raw_stats_[n / opts_.modulus]
// contains the (count, x, x^2) statistics for the frames from
// std::max(0, n - opts_.cmn_window) through n.
std::vector<Matrix<double>*> cached_stats_modulo_;
// the variable below is a ring-buffer of cached stats. the int32 is the
// frame index.
std::vector<std::pair<int32, Matrix<double> > > cached_stats_ring_;
// Some temporary variables used inside functions of this class, which
// put here to avoid reallocation.
Matrix<double> temp_stats_;
Vector<BaseFloat> temp_feats_;
Vector<double> temp_feats_dbl_;
OnlineFeatureInterface *src_; // Not owned here
};
struct OnlineSpliceOptions {
int32 left_context;
int32 right_context;
OnlineSpliceOptions(): left_context(4), right_context(4) { }
void Register(ParseOptions *po) {
po->Register("left-context", &left_context, "Left-context for frame "
"splicing prior to LDA");
po->Register("right-context", &right_context, "Right-context for frame "
"splicing prior to LDA");
}
};
class OnlineSpliceFrames: public OnlineFeatureInterface {
public:
//
// First, functions that are present in the interface:
//
virtual int32 Dim() const {
return src_->Dim() * (1 + left_context_ + right_context_);
}
virtual bool IsLastFrame(int32 frame) const {
return src_->IsLastFrame(frame);
}
virtual BaseFloat FrameShiftInSeconds() const {
return src_->FrameShiftInSeconds();
}
virtual int32 NumFramesReady() const;
virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
//
// Next, functions that are not in the interface.
//
OnlineSpliceFrames(const OnlineSpliceOptions &opts,
OnlineFeatureInterface *src):
left_context_(opts.left_context), right_context_(opts.right_context),
src_(src) { }
private:
int32 left_context_;
int32 right_context_;
OnlineFeatureInterface *src_; // Not owned here
};
/// This online-feature class implements any affine or linear transform.
class OnlineTransform: public OnlineFeatureInterface {
public:
//
// First, functions that are present in the interface:
//
virtual int32 Dim() const { return offset_.Dim(); }
virtual bool IsLastFrame(int32 frame) const {
return src_->IsLastFrame(frame);
}
virtual BaseFloat FrameShiftInSeconds() const {
return src_->FrameShiftInSeconds();
}
virtual int32 NumFramesReady() const { return src_->NumFramesReady(); }
virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
virtual void GetFrames(const std::vector<int32> &frames,
MatrixBase<BaseFloat> *feats);
//
// Next, functions that are not in the interface.
//
/// The transform can be a linear transform, or an affine transform
/// where the last column is the offset.
OnlineTransform(const MatrixBase<BaseFloat> &transform,
OnlineFeatureInterface *src);
private:
OnlineFeatureInterface *src_; // Not owned here
Matrix<BaseFloat> linear_term_;
Vector<BaseFloat> offset_;
};
class OnlineDeltaFeature: public OnlineFeatureInterface {
public:
//
// First, functions that are present in the interface:
//
virtual int32 Dim() const;
virtual bool IsLastFrame(int32 frame) const {
return src_->IsLastFrame(frame);
}
virtual BaseFloat FrameShiftInSeconds() const {
return src_->FrameShiftInSeconds();
}
virtual int32 NumFramesReady() const;
virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
//
// Next, functions that are not in the interface.
//
OnlineDeltaFeature(const DeltaFeaturesOptions &opts,
OnlineFeatureInterface *src);
private:
OnlineFeatureInterface *src_; // Not owned here
DeltaFeaturesOptions opts_;
DeltaFeatures delta_features_; // This class contains just a few
// coefficients.
};
/// This feature type can be used to cache its input, to avoid
/// repetition of computation in a multi-pass decoding context.
class OnlineCacheFeature: public OnlineFeatureInterface {
public:
virtual int32 Dim() const { return src_->Dim(); }
virtual bool IsLastFrame(int32 frame) const {
return src_->IsLastFrame(frame);
}
virtual BaseFloat FrameShiftInSeconds() const {
return src_->FrameShiftInSeconds();
}
virtual int32 NumFramesReady() const { return src_->NumFramesReady(); }
virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
virtual void GetFrames(const std::vector<int32> &frames,
MatrixBase<BaseFloat> *feats);
virtual ~OnlineCacheFeature() { ClearCache(); }
// Things that are not in the shared interface:
void ClearCache(); // this should be called if you change the underlying
// features in some way.
explicit OnlineCacheFeature(OnlineFeatureInterface *src): src_(src) { }
private:
OnlineFeatureInterface *src_; // Not owned here
std::vector<Vector<BaseFloat>* > cache_;
};
/// This online-feature class implements combination of two feature
/// streams (such as pitch, plp) into one stream.
class OnlineAppendFeature: public OnlineFeatureInterface {
public:
virtual int32 Dim() const { return src1_->Dim() + src2_->Dim(); }
virtual bool IsLastFrame(int32 frame) const {
return (src1_->IsLastFrame(frame) || src2_->IsLastFrame(frame));
}
// Hopefully sources have the same rate
virtual BaseFloat FrameShiftInSeconds() const {
return src1_->FrameShiftInSeconds();
}
virtual int32 NumFramesReady() const {
return std::min(src1_->NumFramesReady(), src2_->NumFramesReady());
}
virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
virtual ~OnlineAppendFeature() { }
OnlineAppendFeature(OnlineFeatureInterface *src1,
OnlineFeatureInterface *src2): src1_(src1), src2_(src2) { }
private:
OnlineFeatureInterface *src1_;
OnlineFeatureInterface *src2_;
};
/// @} End of "addtogroup onlinefeat"
} // namespace kaldi
#endif // KALDI_FEAT_ONLINE_FEATURE_H_
// feat/pitch-functions.cc
// Copyright 2013 Pegah Ghahremani
// 2014 IMSL, PKU-HKUST (author: Wei Shi)
// 2014 Yanqing Sun, Junjie Wang,
// Daniel Povey, Korbinian Riedhammer
// Xin Lei
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <limits>
#include "feat/feature-functions.h"
#include "feat/mel-computations.h"
#include "feat/online-feature.h"
#include "feat/pitch-functions.h"
#include "feat/resample.h"
#include "matrix/matrix-functions.h"
namespace kaldi {
/**
This function processes the NCCF n to a POV feature f by applying the formula
f = (1.0001 - n)^0.15 - 1.0
This is a nonlinear function designed to make the output reasonably Gaussian
distributed. Before doing this, the NCCF distribution is in the range [-1,
1] but has a strong peak just before 1.0, which this function smooths out.
*/
BaseFloat NccfToPovFeature(BaseFloat n) {
if (n > 1.0) {
n = 1.0;
} else if (n < -1.0) {
n = -1.0;
}
BaseFloat f = pow((1.0001 - n), 0.15) - 1.0;
KALDI_ASSERT(f - f == 0); // check for NaN,inf.
return f;
}
/**
This function processes the NCCF n to a reasonably accurate probability
of voicing p by applying the formula:
n' = fabs(n)
r = -5.2 + 5.4 * exp(7.5 * (n' - 1.0)) +
4.8 * n' - 2.0 * exp(-10.0 * n') + 4.2 * exp(20.0 * (n' - 1.0));
p = 1.0 / (1 + exp(-1.0 * r));
How did we get this formula? We plotted the empirical log-prob-ratio of voicing
r = log( p[voiced] / p[not-voiced] )
[on the Keele database where voicing is marked], as a function of the NCCF at
the delay picked by our algorithm. This was done on intervals of the NCCF, so
we had enough statistics to get that ratio. The NCCF covers [-1, 1]; almost
all of the probability mass is on [0, 1] but the empirical POV seems fairly
symmetric with a minimum near zero, so we chose to make it a function of n' = fabs(n).
Then we manually tuned a function (the one you see above) that approximated
the log-prob-ratio of voicing fairly well as a function of the absolute-value
NCCF n'; however, wasn't a very exact match since we were also trying to make
the transformed NCCF fairly Gaussian distributed, with a view to using it as
a feature-- an idea we later abandoned after a simpler formula worked better.
*/
BaseFloat NccfToPov(BaseFloat n) {
BaseFloat ndash = fabs(n);
if (ndash > 1.0) ndash = 1.0; // just in case it was slightly outside [-1, 1]
BaseFloat r = -5.2 + 5.4 * Exp(7.5 * (ndash - 1.0)) + 4.8 * ndash -
2.0 * Exp(-10.0 * ndash) + 4.2 * Exp(20.0 * (ndash - 1.0));
// r is the approximate log-prob-ratio of voicing, log(p/(1-p)).
BaseFloat p = 1.0 / (1 + Exp(-1.0 * r));
KALDI_ASSERT(p - p == 0); // Check for NaN/inf
return p;
}
/**
This function computes some dot products that are required
while computing the NCCF.
For each integer lag from start to end-1, this function
outputs to (*inner_prod)(lag - start), the dot-product
of a window starting at 0 with a window starting at
lag. All windows are of length nccf_window_size. It
outputs to (*norm_prod)(lag - start), e1 * e2, where
e1 is the dot-product of the un-shifted window with itself,
and d2 is the dot-product of the window shifted by "lag"
with itself.
*/
void ComputeCorrelation(const VectorBase<BaseFloat> &wave,
int32 first_lag, int32 last_lag,
int32 nccf_window_size,
VectorBase<BaseFloat> *inner_prod,
VectorBase<BaseFloat> *norm_prod) {
Vector<BaseFloat> zero_mean_wave(wave);
// TODO: possibly fix this, the mean normalization is done in a strange way.
SubVector<BaseFloat> wave_part(wave, 0, nccf_window_size);
// subtract mean-frame from wave
zero_mean_wave.Add(-wave_part.Sum() / nccf_window_size);
BaseFloat e1, e2, sum;
SubVector<BaseFloat> sub_vec1(zero_mean_wave, 0, nccf_window_size);
e1 = VecVec(sub_vec1, sub_vec1);
for (int32 lag = first_lag; lag <= last_lag; lag++) {
SubVector<BaseFloat> sub_vec2(zero_mean_wave, lag, nccf_window_size);
e2 = VecVec(sub_vec2, sub_vec2);
sum = VecVec(sub_vec1, sub_vec2);
(*inner_prod)(lag - first_lag) = sum;
(*norm_prod)(lag - first_lag) = e1 * e2;
}
}
/**
Computes the NCCF as a fraction of the numerator term (a dot product between
two vectors) and a denominator term which equals sqrt(e1*e2 + nccf_ballast)
where e1 and e2 are both dot-products of bits of the wave with themselves,
and e1*e2 is supplied as "norm_prod". These quantities are computed by
"ComputeCorrelation".
*/
void ComputeNccf(const VectorBase<BaseFloat> &inner_prod,
const VectorBase<BaseFloat> &norm_prod,
BaseFloat nccf_ballast,
VectorBase<BaseFloat> *nccf_vec) {
KALDI_ASSERT(inner_prod.Dim() == norm_prod.Dim() &&
inner_prod.Dim() == nccf_vec->Dim());
for (int32 lag = 0; lag < inner_prod.Dim(); lag++) {
BaseFloat numerator = inner_prod(lag),
denominator = pow(norm_prod(lag) + nccf_ballast, 0.5),
nccf;
if (denominator != 0.0) {
nccf = numerator / denominator;
} else {
KALDI_ASSERT(numerator == 0.0);
nccf = 0.0;
}
KALDI_ASSERT(nccf < 1.01 && nccf > -1.01);
(*nccf_vec)(lag) = nccf;
}
}
/**
This function selects the lags at which we measure the NCCF: we need
to select lags from 1/max_f0 to 1/min_f0, in a geometric progression
with ratio 1 + d.
*/
void SelectLags(const PitchExtractionOptions &opts,
Vector<BaseFloat> *lags) {
// choose lags relative to acceptable pitch tolerance
BaseFloat min_lag = 1.0 / opts.max_f0, max_lag = 1.0 / opts.min_f0;
std::vector<BaseFloat> tmp_lags;
for (BaseFloat lag = min_lag; lag <= max_lag; lag *= 1.0 + opts.delta_pitch)
tmp_lags.push_back(lag);
lags->Resize(tmp_lags.size());
std::copy(tmp_lags.begin(), tmp_lags.end(), lags->Data());
}
/**
This function computes the local-cost for the Viterbi computation,
see eq. (5) in the paper.
@param opts The options as provided by the user
@param nccf_pitch The nccf as computed for the pitch computation (with ballast).
@param lags The log-spaced lags at which nccf_pitch is sampled.
@param local_cost We output the local-cost to here.
*/
void ComputeLocalCost(const VectorBase<BaseFloat> &nccf_pitch,
const VectorBase<BaseFloat> &lags,
const PitchExtractionOptions &opts,
VectorBase<BaseFloat> *local_cost) {
// from the paper, eq. 5, local_cost = 1 - Phi(t,i)(1 - soft_min_f0 L_i)
// nccf is the nccf on this frame measured at the lags in "lags".
KALDI_ASSERT(nccf_pitch.Dim() == local_cost->Dim() &&
nccf_pitch.Dim() == lags.Dim());
local_cost->Set(1.0);
// add the term -Phi(t,i):
local_cost->AddVec(-1.0, nccf_pitch);
// add the term soft_min_f0 Phi(t,i) L_i
local_cost->AddVecVec(opts.soft_min_f0, lags, nccf_pitch, 1.0);
}
// class PitchFrameInfo is used inside class OnlinePitchFeatureImpl.
// It stores the information we need to keep around for a single frame
// of the pitch computation.
class PitchFrameInfo {
public:
/// This function resizes the arrays for this object and updates the reference
/// counts for the previous object (by decrementing those reference counts
/// when we destroy a StateInfo object). A StateInfo object is considered to
/// be destroyed when we delete it, not when its reference counts goes to
/// zero.
void Cleanup(PitchFrameInfo *prev_frame);
/// This function may be called for the last (most recent) PitchFrameInfo
/// object with the best state (obtained from the externally held
/// forward-costs). It traces back as far as needed to set the
/// cur_best_state_, and as it's going it sets the lag-index and pov_nccf in
/// pitch_pov_iter, which when it's called is an iterator to where to put the
/// info for the final state; the iterator will be decremented inside this
/// function.
void SetBestState(int32 best_state,
std::vector<std::pair<int32, BaseFloat> > &lag_nccf);
/// This function may be called on the last (most recent) PitchFrameInfo
/// object; it computes how many frames of latency there is because the
/// traceback has not yet settled on a single value for frames in the past.
/// It actually returns the minimum of max_latency and the actual latency,
/// which is an optimization because we won't care about latency past
/// a user-specified maximum latency.
int32 ComputeLatency(int32 max_latency);
/// This function updates
bool UpdatePreviousBestState(PitchFrameInfo *prev_frame);
/// This constructor is used for frame -1; it sets the costs to be all zeros
/// the pov_nccf's to zero and the backpointers to -1.
explicit PitchFrameInfo(int32 num_states);
/// This constructor is used for subsequent frames (not -1).
PitchFrameInfo(PitchFrameInfo *prev);
/// Record the nccf_pov value.
/// @param nccf_pov The nccf as computed for the POV computation (without ballast).
void SetNccfPov(const VectorBase<BaseFloat> &nccf_pov);
/// This constructor is used for frames apart from frame -1; the bulk of
/// the Viterbi computation takes place inside this constructor.
/// @param opts The options as provided by the user
/// @param nccf_pitch The nccf as computed for the pitch computation
/// (with ballast).
/// @param nccf_pov The nccf as computed for the POV computation
/// (without ballast).
/// @param lags The log-spaced lags at which nccf_pitch and
/// nccf_pov are sampled.
/// @param prev_frame_forward_cost The forward-cost vector for the
/// previous frame.
/// @param index_info A pointer to a temporary vector used by this function
/// @param this_forward_cost The forward-cost vector for this frame
/// (to be computed).
void ComputeBacktraces(const PitchExtractionOptions &opts,
const VectorBase<BaseFloat> &nccf_pitch,
const VectorBase<BaseFloat> &lags,
const VectorBase<BaseFloat> &prev_forward_cost,
std::vector<std::pair<int32, int32> > *index_info,
VectorBase<BaseFloat> *this_forward_cost);
private:
// struct StateInfo is the information we keep for a single one of the
// log-spaced lags, for a single frame. This is a state in the Viterbi
// computation.
struct StateInfo {
/// The state index on the previous frame that is the best preceding state
/// for this state.
int32 backpointer;
/// the version of the NCCF we keep for the POV computation (without the
/// ballast term).
BaseFloat pov_nccf;
StateInfo(): backpointer(0), pov_nccf(0.0) { }
};
std::vector<StateInfo> state_info_;
/// the state index of the first entry in "state_info"; this will initially be
/// zero, but after cleanup might be nonzero.
int32 state_offset_;
/// The current best state in the backtrace from the end.
int32 cur_best_state_;
/// The structure for the previous frame.
PitchFrameInfo *prev_info_;
};
// This constructor is used for frame -1; it sets the costs to be all zeros
// the pov_nccf's to zero and the backpointers to -1.
PitchFrameInfo::PitchFrameInfo(int32 num_states)
:state_info_(num_states), state_offset_(0),
cur_best_state_(-1), prev_info_(NULL) { }
bool pitch_use_naive_search = false; // This is used in unit-tests.
PitchFrameInfo::PitchFrameInfo(PitchFrameInfo *prev_info):
state_info_(prev_info->state_info_.size()), state_offset_(0),
cur_best_state_(-1), prev_info_(prev_info) { }
void PitchFrameInfo::SetNccfPov(const VectorBase<BaseFloat> &nccf_pov) {
int32 num_states = nccf_pov.Dim();
KALDI_ASSERT(num_states == state_info_.size());
for (int32 i = 0; i < num_states; i++)
state_info_[i].pov_nccf = nccf_pov(i);
}
void PitchFrameInfo::ComputeBacktraces(
const PitchExtractionOptions &opts,
const VectorBase<BaseFloat> &nccf_pitch,
const VectorBase<BaseFloat> &lags,
const VectorBase<BaseFloat> &prev_forward_cost_vec,
std::vector<std::pair<int32, int32> > *index_info,
VectorBase<BaseFloat> *this_forward_cost_vec) {
int32 num_states = nccf_pitch.Dim();
Vector<BaseFloat> local_cost(num_states, kUndefined);
ComputeLocalCost(nccf_pitch, lags, opts, &local_cost);
const BaseFloat delta_pitch_sq = pow(Log(1.0 + opts.delta_pitch), 2.0),
inter_frame_factor = delta_pitch_sq * opts.penalty_factor;
// index local_cost, prev_forward_cost and this_forward_cost using raw pointer
// indexing not operator (), since this is the very inner loop and a lot of
// time is taken here.
const BaseFloat *prev_forward_cost = prev_forward_cost_vec.Data();
BaseFloat *this_forward_cost = this_forward_cost_vec->Data();
if (index_info->empty())
index_info->resize(num_states);
// make it a reference for more concise indexing.
std::vector<std::pair<int32, int32> > &bounds = *index_info;
/* bounds[i].first will be a lower bound on the backpointer for state i,
bounds[i].second will be an upper bound on it. We progressively tighten
these bounds till we know the backpointers exactly.
*/
if (pitch_use_naive_search) {
// This branch is only taken in unit-testing code.
for (int32 i = 0; i < num_states; i++) {
BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
int32 best_j = -1;
for (int32 j = 0; j < num_states; j++) {
BaseFloat this_cost = (j - i) * (j - i) * inter_frame_factor
+ prev_forward_cost[j];
if (this_cost < best_cost) {
best_cost = this_cost;
best_j = j;
}
}
this_forward_cost[i] = best_cost;
state_info_[i].backpointer = best_j;
}
} else {
int32 last_backpointer = 0;
for (int32 i = 0; i < num_states; i++) {
int32 start_j = last_backpointer;
BaseFloat best_cost = (start_j - i) * (start_j - i) * inter_frame_factor
+ prev_forward_cost[start_j];
int32 best_j = start_j;
for (int32 j = start_j + 1; j < num_states; j++) {
BaseFloat this_cost = (j - i) * (j - i) * inter_frame_factor
+ prev_forward_cost[j];
if (this_cost < best_cost) {
best_cost = this_cost;
best_j = j;
} else { // as soon as the costs stop improving, we stop searching.
break; // this is a loose lower bound we're getting.
}
}
state_info_[i].backpointer = best_j;
this_forward_cost[i] = best_cost;
bounds[i].first = best_j; // this is now a lower bound on the
// backpointer.
bounds[i].second = num_states - 1; // we have no meaningful upper bound
// yet.
last_backpointer = best_j;
}
// We iterate, progressively refining the upper and lower bounds until they
// meet and we know that the resulting backtraces are optimal. Each
// iteration takes time linear in num_states. We won't normally iterate as
// far as num_states; normally we only do two iterations; when printing out
// the number of iterations, it's rarely more than that (once I saw seven
// iterations). Anyway, this part of the computation does not dominate.
for (int32 iter = 0; iter < num_states; iter++) {
bool changed = false;
if (iter % 2 == 0) { // go backwards through the states
last_backpointer = num_states - 1;
for (int32 i = num_states - 1; i >= 0; i--) {
int32 lower_bound = bounds[i].first,
upper_bound = std::min(last_backpointer, bounds[i].second);
if (upper_bound == lower_bound) {
last_backpointer = lower_bound;
continue;
}
BaseFloat best_cost = this_forward_cost[i];
int32 best_j = state_info_[i].backpointer, initial_best_j = best_j;
if (best_j == upper_bound) {
// if best_j already equals upper bound, don't bother tightening the
// upper bound, we'll tighten the lower bound when the time comes.
last_backpointer = best_j;
continue;
}
// Below, we have j > lower_bound + 1 because we know we've already
// evaluated lower_bound and lower_bound + 1 [via knowledge of
// this algorithm.]
for (int32 j = upper_bound; j > lower_bound + 1; j--) {
BaseFloat this_cost = (j - i) * (j - i) * inter_frame_factor
+ prev_forward_cost[j];
if (this_cost < best_cost) {
best_cost = this_cost;
best_j = j;
} else { // as soon as the costs stop improving, we stop searching,
// unless the best j is still lower than j, in which case
// we obviously need to keep moving.
if (best_j > j)
break; // this is a loose lower bound we're getting.
}
}
// our "best_j" is now an upper bound on the backpointer.
bounds[i].second = best_j;
if (best_j != initial_best_j) {
this_forward_cost[i] = best_cost;
state_info_[i].backpointer = best_j;
changed = true;
}
last_backpointer = best_j;
}
} else { // go forwards through the states.
last_backpointer = 0;
for (int32 i = 0; i < num_states; i++) {
int32 lower_bound = std::max(last_backpointer, bounds[i].first),
upper_bound = bounds[i].second;
if (upper_bound == lower_bound) {
last_backpointer = lower_bound;
continue;
}
BaseFloat best_cost = this_forward_cost[i];
int32 best_j = state_info_[i].backpointer, initial_best_j = best_j;
if (best_j == lower_bound) {
// if best_j already equals lower bound, we don't bother tightening
// the lower bound, we'll tighten the upper bound when the time
// comes.
last_backpointer = best_j;
continue;
}
// Below, we have j < upper_bound because we know we've already
// evaluated that point.
for (int32 j = lower_bound; j < upper_bound - 1; j++) {
BaseFloat this_cost = (j - i) * (j - i) * inter_frame_factor
+ prev_forward_cost[j];
if (this_cost < best_cost) {
best_cost = this_cost;
best_j = j;
} else { // as soon as the costs stop improving, we stop searching,
// unless the best j is still higher than j, in which case
// we obviously need to keep moving.
if (best_j < j)
break; // this is a loose lower bound we're getting.
}
}
// our "best_j" is now a lower bound on the backpointer.
bounds[i].first = best_j;
if (best_j != initial_best_j) {
this_forward_cost[i] = best_cost;
state_info_[i].backpointer = best_j;
changed = true;
}
last_backpointer = best_j;
}
}
if (!changed)
break;
}
}
// The next statement is needed due to RecomputeBacktraces: we have to
// invalidate the previously computed best-state info.
cur_best_state_ = -1;
this_forward_cost_vec->AddVec(1.0, local_cost);
}
void PitchFrameInfo::SetBestState(
int32 best_state,
std::vector<std::pair<int32, BaseFloat> > &lag_nccf) {
// This function would naturally be recursive, but we have coded this to avoid
// recursion, which would otherwise eat up the stack. Think of it as a static
// member function, except we do use "this" right at the beginning.
std::vector<std::pair<int32, BaseFloat> >::reverse_iterator iter = lag_nccf.rbegin();
PitchFrameInfo *this_info = this; // it will change in the loop.
while (this_info != NULL) {
PitchFrameInfo *prev_info = this_info->prev_info_;
if (best_state == this_info->cur_best_state_)
return; // no change
if (prev_info != NULL) // don't write anything for frame -1.
iter->first = best_state;
size_t state_info_index = best_state - this_info->state_offset_;
KALDI_ASSERT(state_info_index < this_info->state_info_.size());
this_info->cur_best_state_ = best_state;
best_state = this_info->state_info_[state_info_index].backpointer;
if (prev_info != NULL) // don't write anything for frame -1.
iter->second = this_info->state_info_[state_info_index].pov_nccf;
this_info = prev_info;
if (this_info != NULL) ++iter;
}
}
int32 PitchFrameInfo::ComputeLatency(int32 max_latency) {
if (max_latency <= 0) return 0;
int32 latency = 0;
// This function would naturally be recursive, but we have coded this to avoid
// recursion, which would otherwise eat up the stack. Think of it as a static
// member function, except we do use "this" right at the beginning.
// This function is called only on the most recent PitchFrameInfo object.
int32 num_states = state_info_.size();
int32 min_living_state = 0, max_living_state = num_states - 1;
PitchFrameInfo *this_info = this; // it will change in the loop.
for (; this_info != NULL && latency < max_latency;) {
int32 offset = this_info->state_offset_;
KALDI_ASSERT(min_living_state >= offset &&
max_living_state - offset < this_info->state_info_.size());
min_living_state =
this_info->state_info_[min_living_state - offset].backpointer;
max_living_state =
this_info->state_info_[max_living_state - offset].backpointer;
if (min_living_state == max_living_state) {
return latency;
}
this_info = this_info->prev_info_;
if (this_info != NULL) // avoid incrementing latency for frame -1,
latency++; // as it's not a real frame.
}
return latency;
}
void PitchFrameInfo::Cleanup(PitchFrameInfo *prev_frame) {
KALDI_ERR << "Cleanup not implemented.";
}
// struct NccfInfo is used to cache certain quantities that we need for online
// operation, for the first "recompute_frame" frames of the file (e.g. 300);
// after that many frames, or after the user calls InputFinished(), we redo the
// initial backtraces, as we'll then have a better estimate of the average signal
// energy.
struct NccfInfo {
Vector<BaseFloat> nccf_pitch_resampled; // resampled nccf_pitch
BaseFloat avg_norm_prod; // average value of e1 * e2.
BaseFloat mean_square_energy; // mean_square energy we used when computing the
// original ballast term for
// "nccf_pitch_resampled".
NccfInfo(BaseFloat avg_norm_prod,
BaseFloat mean_square_energy):
avg_norm_prod(avg_norm_prod),
mean_square_energy(mean_square_energy) { }
};
// We could inherit from OnlineBaseFeature as we have the same interface,
// but this will unnecessary force a lot of our functions to be virtual.
class OnlinePitchFeatureImpl {
public:
explicit OnlinePitchFeatureImpl(const PitchExtractionOptions &opts);
int32 Dim() const { return 2; }
BaseFloat FrameShiftInSeconds() const;
int32 NumFramesReady() const;
bool IsLastFrame(int32 frame) const;
void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
void AcceptWaveform(BaseFloat sampling_rate,
const VectorBase<BaseFloat> &waveform);
void InputFinished();
~OnlinePitchFeatureImpl();
// Copy-constructor, can be used to obtain a new copy of this object,
// any state from this utterance.
OnlinePitchFeatureImpl(const OnlinePitchFeatureImpl &other);
private:
/// This function works out from the signal how many frames are currently
/// available to process (this is called from inside AcceptWaveform()).
/// Note: the number of frames differs slightly from the number the
/// old pitch code gave.
/// Note: the number this returns depends on whether input_finished_ == true;
/// if it is, it will "force out" a final frame or two.
int32 NumFramesAvailable(int64 num_downsampled_samples, bool snip_edges) const;
/// This function extracts from the signal the samples numbered from
/// "sample_index" (numbered in the full downsampled signal, not just this
/// part), and of length equal to window->Dim(). It uses the data members
/// downsampled_samples_discarded_ and downsampled_signal_remainder_, as well
/// as the more recent part of the downsampled wave "downsampled_wave_part"
/// which is provided.
///
/// @param downsampled_wave_part One chunk of the downsampled wave,
/// starting from sample-index downsampled_samples_discarded_.
/// @param sample_index The desired starting sample index (measured from
/// the start of the whole signal, not just this part).
/// @param window The part of the signal is output to here.
void ExtractFrame(const VectorBase<BaseFloat> &downsampled_wave_part,
int64 frame_index,
VectorBase<BaseFloat> *window);
/// This function is called after we reach frame "recompute_frame", or when
/// InputFinished() is called, whichever comes sooner. It recomputes the
/// backtraces for frames zero through recompute_frame, if needed because the
/// average energy of the signal has changed, affecting the nccf ballast term.
/// It works out the average signal energy from
/// downsampled_samples_processed_, signal_sum_ and signal_sumsq_ (which, if
/// you see the calling code, might include more frames than just
/// "recompute_frame", it might include up to the end of the current chunk).
void RecomputeBacktraces();
/// This function updates downsampled_signal_remainder_,
/// downsampled_samples_processed_, signal_sum_ and signal_sumsq_; it's called
/// from AcceptWaveform().
void UpdateRemainder(const VectorBase<BaseFloat> &downsampled_wave_part);
// The following variables don't change throughout the lifetime
// of this object.
PitchExtractionOptions opts_;
// the first lag of the downsampled signal at which we measure NCCF
int32 nccf_first_lag_;
// the last lag of the downsampled signal at which we measure NCCF
int32 nccf_last_lag_;
// The log-spaced lags at which we will resample the NCCF
Vector<BaseFloat> lags_;
// This object is used to resample from evenly spaced to log-evenly-spaced
// nccf values. It's a pointer for convenience of initialization, so we don't
// have to use the initializer from the constructor.
ArbitraryResample *nccf_resampler_;
// The following objects may change during the lifetime of this object.
// This object is used to resample the signal.
LinearResample *signal_resampler_;
// frame_info_ is indexed by [frame-index + 1]. frame_info_[0] is an object
// that corresponds to frame -1, which is not a real frame.
std::vector<PitchFrameInfo*> frame_info_;
// nccf_info_ is indexed by frame-index, from frame 0 to at most
// opts_.recompute_frame - 1. It contains some information we'll
// need to recompute the tracebacks after getting a better estimate
// of the average energy of the signal.
std::vector<NccfInfo*> nccf_info_;
// Current number of frames which we can't output because Viterbi has not
// converged for them, or opts_.max_frames_latency if we have reached that
// limit.
int32 frames_latency_;
// The forward-cost at the current frame (the last frame in frame_info_);
// this has the same dimension as lags_. We normalize each time so
// the lowest cost is zero, for numerical accuracy and so we can use float.
Vector<BaseFloat> forward_cost_;
// stores the constant part of forward_cost_.
double forward_cost_remainder_;
// The resampled-lag index and the NCCF (as computed for POV, without ballast
// term) for each frame, as determined by Viterbi traceback from the best
// final state.
std::vector<std::pair<int32, BaseFloat> > lag_nccf_;
bool input_finished_;
/// sum-squared of previously processed parts of signal; used to get NCCF
/// ballast term. Denominator is downsampled_samples_processed_.
double signal_sumsq_;
/// sum of previously processed parts of signal; used to do mean-subtraction
/// when getting sum-squared, along with signal_sumsq_.
double signal_sum_;
/// downsampled_samples_processed is the number of samples (after
/// downsampling) that we got in previous calls to AcceptWaveform().
int64 downsampled_samples_processed_;
/// This is a small remainder of the previous downsampled signal;
/// it's used by ExtractFrame for frames near the boundary of two
/// waveforms supplied to AcceptWaveform().
Vector<BaseFloat> downsampled_signal_remainder_;
};
OnlinePitchFeatureImpl::OnlinePitchFeatureImpl(
const PitchExtractionOptions &opts):
opts_(opts), forward_cost_remainder_(0.0), input_finished_(false),
signal_sumsq_(0.0), signal_sum_(0.0), downsampled_samples_processed_(0) {
signal_resampler_ = new LinearResample(opts.samp_freq, opts.resample_freq,
opts.lowpass_cutoff,
opts.lowpass_filter_width);
double outer_min_lag = 1.0 / opts.max_f0 -
(opts.upsample_filter_width/(2.0 * opts.resample_freq));
double outer_max_lag = 1.0 / opts.min_f0 +
(opts.upsample_filter_width/(2.0 * opts.resample_freq));
nccf_first_lag_ = ceil(opts.resample_freq * outer_min_lag);
nccf_last_lag_ = floor(opts.resample_freq * outer_max_lag);
frames_latency_ = 0; // will be set in AcceptWaveform()
// Choose the lags at which we resample the NCCF.
SelectLags(opts, &lags_);
// upsample_cutoff is the filter cutoff for upsampling the NCCF, which is the
// Nyquist of the resampling frequency. The NCCF is (almost completely)
// bandlimited to around "lowpass_cutoff" (1000 by default), and when the
// spectrum of this bandlimited signal is convolved with the spectrum of an
// impulse train with frequency "resample_freq", which are separated by 4kHz,
// we get energy at -5000,-3000, -1000...1000, 3000..5000, etc. Filtering at
// half the Nyquist (2000 by default) is sufficient to get only the first
// repetition.
BaseFloat upsample_cutoff = opts.resample_freq * 0.5;
Vector<BaseFloat> lags_offset(lags_);
// lags_offset equals lags_ (which are the log-spaced lag values we want to
// measure the NCCF at) with nccf_first_lag_ / opts.resample_freq subtracted
// from each element, so we can treat the measured NCCF values as as starting
// from sample zero in a signal that starts at the point start /
// opts.resample_freq. This is necessary because the ArbitraryResample code
// assumes that the input signal starts from sample zero.
lags_offset.Add(-nccf_first_lag_ / opts.resample_freq);
int32 num_measured_lags = nccf_last_lag_ + 1 - nccf_first_lag_;
nccf_resampler_ = new ArbitraryResample(num_measured_lags, opts.resample_freq,
upsample_cutoff, lags_offset,
opts.upsample_filter_width);
// add a PitchInfo object for frame -1 (not a real frame).
frame_info_.push_back(new PitchFrameInfo(lags_.Dim()));
// zeroes forward_cost_; this is what we want for the fake frame -1.
forward_cost_.Resize(lags_.Dim());
}
int32 OnlinePitchFeatureImpl::NumFramesAvailable(
int64 num_downsampled_samples, bool snip_edges) const {
int32 frame_shift = opts_.NccfWindowShift(),
frame_length = opts_.NccfWindowSize();
// Use the "full frame length" to compute the number
// of frames only if the input is not finished.
if (!input_finished_)
frame_length += nccf_last_lag_;
if (num_downsampled_samples < frame_length) {
return 0;
} else {
if (!snip_edges) {
if (input_finished_) {
return static_cast<int32>(num_downsampled_samples * 1.0f /
frame_shift + 0.5f);
} else {
return static_cast<int32>((num_downsampled_samples - frame_length / 2) *
1.0f / frame_shift + 0.5f);
}
} else {
return static_cast<int32>((num_downsampled_samples - frame_length) /
frame_shift + 1);
}
}
}
void OnlinePitchFeatureImpl::UpdateRemainder(
const VectorBase<BaseFloat> &downsampled_wave_part) {
// frame_info_ has an extra element at frame-1, so subtract
// one from the length.
int64 num_frames = static_cast<int64>(frame_info_.size()) - 1,
next_frame = num_frames,
frame_shift = opts_.NccfWindowShift(),
next_frame_sample = frame_shift * next_frame;
signal_sumsq_ += VecVec(downsampled_wave_part, downsampled_wave_part);
signal_sum_ += downsampled_wave_part.Sum();
// next_frame_sample is the first sample index we'll need for the
// next frame.
int64 next_downsampled_samples_processed =
downsampled_samples_processed_ + downsampled_wave_part.Dim();
if (next_frame_sample > next_downsampled_samples_processed) {
// this could only happen in the weird situation that the full frame length
// is less than the frame shift.
int32 full_frame_length = opts_.NccfWindowSize() + nccf_last_lag_;
KALDI_ASSERT(full_frame_length < frame_shift && "Code error");
downsampled_signal_remainder_.Resize(0);
} else {
Vector<BaseFloat> new_remainder(next_downsampled_samples_processed -
next_frame_sample);
// note: next_frame_sample is the index into the entire signal, of
// new_remainder(0).
// i is the absolute index of the signal.
for (int64 i = next_frame_sample;
i < next_downsampled_samples_processed; i++) {
if (i >= downsampled_samples_processed_) { // in current signal.
new_remainder(i - next_frame_sample) =
downsampled_wave_part(i - downsampled_samples_processed_);
} else { // in old remainder; only reach here if waveform supplied is
new_remainder(i - next_frame_sample) = // tiny.
downsampled_signal_remainder_(i - downsampled_samples_processed_ +
downsampled_signal_remainder_.Dim());
}
}
downsampled_signal_remainder_.Swap(&new_remainder);
}
downsampled_samples_processed_ = next_downsampled_samples_processed;
}
void OnlinePitchFeatureImpl::ExtractFrame(
const VectorBase<BaseFloat> &downsampled_wave_part,
int64 sample_index,
VectorBase<BaseFloat> *window) {
int32 full_frame_length = window->Dim();
int32 offset = static_cast<int32>(sample_index -
downsampled_samples_processed_);
// Treat edge cases first
if (sample_index < 0) {
// Part of the frame is before the beginning of the signal. This
// should only happen if opts_.snip_edges == false, when we are
// processing the first few frames of signal. In this case
// we pad with zeros.
KALDI_ASSERT(opts_.snip_edges == false);
int32 sub_frame_length = sample_index + full_frame_length;
int32 sub_frame_index = full_frame_length - sub_frame_length;
KALDI_ASSERT(sub_frame_length > 0 && sub_frame_index > 0);
window->SetZero();
SubVector<BaseFloat> sub_window(*window, sub_frame_index, sub_frame_length);
ExtractFrame(downsampled_wave_part, 0, &sub_window);
return;
}
if (offset + full_frame_length > downsampled_wave_part.Dim()) {
// Requested frame is past end of the signal. This should only happen if
// input_finished_ == true, when we're flushing out the last couple of
// frames of signal. In this case we pad with zeros.
KALDI_ASSERT(input_finished_);
int32 sub_frame_length = downsampled_wave_part.Dim() - offset;
KALDI_ASSERT(sub_frame_length > 0);
window->SetZero();
SubVector<BaseFloat> sub_window(*window, 0, sub_frame_length);
ExtractFrame(downsampled_wave_part, sample_index, &sub_window);
return;
}
// "offset" is the offset of the start of the frame, into this
// signal.
if (offset >= 0) {
// frame is full inside the new part of the signal.
window->CopyFromVec(downsampled_wave_part.Range(offset, full_frame_length));
} else {
// frame is partly in the remainder and partly in the new part.
int32 remainder_offset = downsampled_signal_remainder_.Dim() + offset;
KALDI_ASSERT(remainder_offset >= 0); // or we didn't keep enough remainder.
KALDI_ASSERT(offset + full_frame_length > 0); // or we should have
// processed this frame last
// time.
int32 old_length = -offset, new_length = offset + full_frame_length;
window->Range(0, old_length).CopyFromVec(
downsampled_signal_remainder_.Range(remainder_offset, old_length));
window->Range(old_length, new_length).CopyFromVec(
downsampled_wave_part.Range(0, new_length));
}
if (opts_.preemph_coeff != 0.0) {
BaseFloat preemph_coeff = opts_.preemph_coeff;
for (int32 i = window->Dim() - 1; i > 0; i--)
(*window)(i) -= preemph_coeff * (*window)(i-1);
(*window)(0) *= (1.0 - preemph_coeff);
}
}
bool OnlinePitchFeatureImpl::IsLastFrame(int32 frame) const {
int32 T = NumFramesReady();
KALDI_ASSERT(frame < T);
return (input_finished_ && frame + 1 == T);
}
BaseFloat OnlinePitchFeatureImpl::FrameShiftInSeconds() const {
return opts_.frame_shift_ms / 1000.0f;
}
int32 OnlinePitchFeatureImpl::NumFramesReady() const {
int32 num_frames = lag_nccf_.size(),
latency = frames_latency_;
KALDI_ASSERT(latency <= num_frames);
return num_frames - latency;
}
void OnlinePitchFeatureImpl::GetFrame(int32 frame,
VectorBase<BaseFloat> *feat) {
KALDI_ASSERT(frame < NumFramesReady() && feat->Dim() == 2);
(*feat)(0) = lag_nccf_[frame].second;
(*feat)(1) = 1.0 / lags_(lag_nccf_[frame].first);
}
void OnlinePitchFeatureImpl::InputFinished() {
input_finished_ = true;
// Process an empty waveform; this has an effect because
// after setting input_finished_ to true, NumFramesAvailable()
// will return a slightly larger number.
AcceptWaveform(opts_.samp_freq, Vector<BaseFloat>());
int32 num_frames = static_cast<size_t>(frame_info_.size() - 1);
if (num_frames < opts_.recompute_frame && !opts_.nccf_ballast_online)
RecomputeBacktraces();
frames_latency_ = 0;
KALDI_VLOG(3) << "Pitch-tracking Viterbi cost is "
<< (forward_cost_remainder_ / num_frames)
<< " per frame, over " << num_frames << " frames.";
}
// see comment with declaration. This is only relevant for online
// operation (it gets called for non-online mode, but is a no-op).
void OnlinePitchFeatureImpl::RecomputeBacktraces() {
KALDI_ASSERT(!opts_.nccf_ballast_online);
int32 num_frames = static_cast<int32>(frame_info_.size()) - 1;
// The assertion reflects how we believe this function will be called.
KALDI_ASSERT(num_frames <= opts_.recompute_frame);
KALDI_ASSERT(nccf_info_.size() == static_cast<size_t>(num_frames));
if (num_frames == 0)
return;
double num_samp = downsampled_samples_processed_, sum = signal_sum_,
sumsq = signal_sumsq_, mean = sum / num_samp;
BaseFloat mean_square = sumsq / num_samp - mean * mean;
bool must_recompute = false;
BaseFloat threshold = 0.01;
for (int32 frame = 0; frame < num_frames; frame++)
if (!ApproxEqual(nccf_info_[frame]->mean_square_energy,
mean_square, threshold))
must_recompute = true;
if (!must_recompute) {
// Nothing to do. We'll reach here, for instance, if everything was in one
// chunk and opts_.nccf_ballast_online == false. This is the case for
// offline processing.
for (size_t i = 0; i < nccf_info_.size(); i++)
delete nccf_info_[i];
nccf_info_.clear();
return;
}
int32 num_states = forward_cost_.Dim(),
basic_frame_length = opts_.NccfWindowSize();
BaseFloat new_nccf_ballast = pow(mean_square * basic_frame_length, 2) *
opts_.nccf_ballast;
double forward_cost_remainder = 0.0;
Vector<BaseFloat> forward_cost(num_states), // start off at zero.
next_forward_cost(forward_cost);
std::vector<std::pair<int32, int32 > > index_info;
for (int32 frame = 0; frame < num_frames; frame++) {
NccfInfo &nccf_info = *nccf_info_[frame];
BaseFloat old_mean_square = nccf_info_[frame]->mean_square_energy,
avg_norm_prod = nccf_info_[frame]->avg_norm_prod,
old_nccf_ballast = pow(old_mean_square * basic_frame_length, 2) *
opts_.nccf_ballast,
nccf_scale = pow((old_nccf_ballast + avg_norm_prod) /
(new_nccf_ballast + avg_norm_prod),
static_cast<BaseFloat>(0.5));
// The "nccf_scale" is an estimate of the scaling factor by which the NCCF
// would change on this frame, on average, by changing the ballast term from
// "old_nccf_ballast" to "new_nccf_ballast". It's not exact because the
// "avg_norm_prod" is just an average of the product e1 * e2 of frame
// energies of the (frame, shifted-frame), but these won't change that much
// within a frame, and even if they do, the inaccuracy of the scaled NCCF
// will still be very small if the ballast term didn't change much, or if
// it's much larger or smaller than e1*e2. By doing it as a simple scaling,
// we save the overhead of the NCCF resampling, which is a considerable part
// of the whole computation.
nccf_info.nccf_pitch_resampled.Scale(nccf_scale);
frame_info_[frame + 1]->ComputeBacktraces(
opts_, nccf_info.nccf_pitch_resampled, lags_,
forward_cost, &index_info, &next_forward_cost);
forward_cost.Swap(&next_forward_cost);
BaseFloat remainder = forward_cost.Min();
forward_cost_remainder += remainder;
forward_cost.Add(-remainder);
}
KALDI_VLOG(3) << "Forward-cost per frame changed from "
<< (forward_cost_remainder_ / num_frames) << " to "
<< (forward_cost_remainder / num_frames);
forward_cost_remainder_ = forward_cost_remainder;
forward_cost_.Swap(&forward_cost);
int32 best_final_state;
forward_cost_.Min(&best_final_state);
if (lag_nccf_.size() != static_cast<size_t>(num_frames))
lag_nccf_.resize(num_frames);
frame_info_.back()->SetBestState(best_final_state, lag_nccf_);
frames_latency_ =
frame_info_.back()->ComputeLatency(opts_.max_frames_latency);
for (size_t i = 0; i < nccf_info_.size(); i++)
delete nccf_info_[i];
nccf_info_.clear();
}
OnlinePitchFeatureImpl::~OnlinePitchFeatureImpl() {
delete nccf_resampler_;
delete signal_resampler_;
for (size_t i = 0; i < frame_info_.size(); i++)
delete frame_info_[i];
for (size_t i = 0; i < nccf_info_.size(); i++)
delete nccf_info_[i];
}
void OnlinePitchFeatureImpl::AcceptWaveform(
BaseFloat sampling_rate,
const VectorBase<BaseFloat> &wave) {
// flush out the last few samples of input waveform only if input_finished_ ==
// true.
const bool flush = input_finished_;
Vector<BaseFloat> downsampled_wave;
signal_resampler_->Resample(wave, flush, &downsampled_wave);
// these variables will be used to compute the root-mean-square value of the
// signal for the ballast term.
double cur_sumsq = signal_sumsq_, cur_sum = signal_sum_;
int64 cur_num_samp = downsampled_samples_processed_,
prev_frame_end_sample = 0;
if (!opts_.nccf_ballast_online) {
cur_sumsq += VecVec(downsampled_wave, downsampled_wave);
cur_sum += downsampled_wave.Sum();
cur_num_samp += downsampled_wave.Dim();
}
// end_frame is the total number of frames we can now process, including
// previously processed ones.
int32 end_frame = NumFramesAvailable(
downsampled_samples_processed_ + downsampled_wave.Dim(), opts_.snip_edges);
// "start_frame" is the first frame-index we process
int32 start_frame = frame_info_.size() - 1,
num_new_frames = end_frame - start_frame;
if (num_new_frames == 0) {
UpdateRemainder(downsampled_wave);
return;
// continuing to the rest of the code would generate
// an error when sizing matrices with zero rows, and
// anyway is a waste of time.
}
int32 num_measured_lags = nccf_last_lag_ + 1 - nccf_first_lag_,
num_resampled_lags = lags_.Dim(),
frame_shift = opts_.NccfWindowShift(),
basic_frame_length = opts_.NccfWindowSize(),
full_frame_length = basic_frame_length + nccf_last_lag_;
Vector<BaseFloat> window(full_frame_length),
inner_prod(num_measured_lags),
norm_prod(num_measured_lags);
Matrix<BaseFloat> nccf_pitch(num_new_frames, num_measured_lags),
nccf_pov(num_new_frames, num_measured_lags);
Vector<BaseFloat> cur_forward_cost(num_resampled_lags);
// Because the resampling of the NCCF is more efficient when grouped together,
// we first compute the NCCF for all frames, then resample as a matrix, then
// do the Viterbi [that happens inside the constructor of PitchFrameInfo].
for (int32 frame = start_frame; frame < end_frame; frame++) {
// start_sample is index into the whole wave, not just this part.
int64 start_sample;
if (opts_.snip_edges) {
// Usual case: offset starts at 0
start_sample = static_cast<int64>(frame) * frame_shift;
} else {
// When we are not snipping the edges, the first offsets may be
// negative. In this case we will pad with zeros, it should not impact
// the pitch tracker.
start_sample =
static_cast<int64>((frame + 0.5) * frame_shift) - full_frame_length / 2;
}
ExtractFrame(downsampled_wave, start_sample, &window);
if (opts_.nccf_ballast_online) {
// use only up to end of current frame to compute root-mean-square value.
// end_sample will be the sample-index into "downsampled_wave", so
// not really comparable to start_sample.
int64 end_sample = start_sample + full_frame_length -
downsampled_samples_processed_;
KALDI_ASSERT(end_sample > 0); // or should have processed this frame last
// time. Note: end_sample is one past last
// sample.
if (end_sample > downsampled_wave.Dim()) {
KALDI_ASSERT(input_finished_);
end_sample = downsampled_wave.Dim();
}
SubVector<BaseFloat> new_part(downsampled_wave, prev_frame_end_sample,
end_sample - prev_frame_end_sample);
cur_num_samp += new_part.Dim();
cur_sumsq += VecVec(new_part, new_part);
cur_sum += new_part.Sum();
prev_frame_end_sample = end_sample;
}
double mean_square = cur_sumsq / cur_num_samp -
pow(cur_sum / cur_num_samp, 2.0);
ComputeCorrelation(window, nccf_first_lag_, nccf_last_lag_,
basic_frame_length, &inner_prod, &norm_prod);
double nccf_ballast_pov = 0.0,
nccf_ballast_pitch = pow(mean_square * basic_frame_length, 2) *
opts_.nccf_ballast,
avg_norm_prod = norm_prod.Sum() / norm_prod.Dim();
SubVector<BaseFloat> nccf_pitch_row(nccf_pitch, frame - start_frame);
ComputeNccf(inner_prod, norm_prod, nccf_ballast_pitch,
&nccf_pitch_row);
SubVector<BaseFloat> nccf_pov_row(nccf_pov, frame - start_frame);
ComputeNccf(inner_prod, norm_prod, nccf_ballast_pov,
&nccf_pov_row);
if (frame < opts_.recompute_frame)
nccf_info_.push_back(new NccfInfo(avg_norm_prod, mean_square));
}
Matrix<BaseFloat> nccf_pitch_resampled(num_new_frames, num_resampled_lags);
nccf_resampler_->Resample(nccf_pitch, &nccf_pitch_resampled);
nccf_pitch.Resize(0, 0); // no longer needed.
Matrix<BaseFloat> nccf_pov_resampled(num_new_frames, num_resampled_lags);
nccf_resampler_->Resample(nccf_pov, &nccf_pov_resampled);
nccf_pov.Resize(0, 0); // no longer needed.
// We've finished dealing with the waveform so we can call UpdateRemainder
// now; we need to call it before we possibly call RecomputeBacktraces()
// below, which is why we don't do it at the very end.
UpdateRemainder(downsampled_wave);
std::vector<std::pair<int32, int32 > > index_info;
for (int32 frame = start_frame; frame < end_frame; frame++) {
int32 frame_idx = frame - start_frame;
PitchFrameInfo *prev_info = frame_info_.back(),
*cur_info = new PitchFrameInfo(prev_info);
cur_info->SetNccfPov(nccf_pov_resampled.Row(frame_idx));
cur_info->ComputeBacktraces(opts_, nccf_pitch_resampled.Row(frame_idx),
lags_, forward_cost_, &index_info,
&cur_forward_cost);
forward_cost_.Swap(&cur_forward_cost);
// Renormalize forward_cost so smallest element is zero.
BaseFloat remainder = forward_cost_.Min();
forward_cost_remainder_ += remainder;
forward_cost_.Add(-remainder);
frame_info_.push_back(cur_info);
if (frame < opts_.recompute_frame)
nccf_info_[frame]->nccf_pitch_resampled =
nccf_pitch_resampled.Row(frame_idx);
if (frame == opts_.recompute_frame - 1 && !opts_.nccf_ballast_online)
RecomputeBacktraces();
}
// Trace back the best-path.
int32 best_final_state;
forward_cost_.Min(&best_final_state);
lag_nccf_.resize(frame_info_.size() - 1); // will keep any existing data.
frame_info_.back()->SetBestState(best_final_state, lag_nccf_);
frames_latency_ =
frame_info_.back()->ComputeLatency(opts_.max_frames_latency);
KALDI_VLOG(4) << "Latency is " << frames_latency_;
}
// Some functions that forward from OnlinePitchFeature to
// OnlinePitchFeatureImpl.
int32 OnlinePitchFeature::NumFramesReady() const {
return impl_->NumFramesReady();
}
OnlinePitchFeature::OnlinePitchFeature(const PitchExtractionOptions &opts)
:impl_(new OnlinePitchFeatureImpl(opts)) { }
bool OnlinePitchFeature::IsLastFrame(int32 frame) const {
return impl_->IsLastFrame(frame);
}
BaseFloat OnlinePitchFeature::FrameShiftInSeconds() const {
return impl_->FrameShiftInSeconds();
}
void OnlinePitchFeature::GetFrame(int32 frame, VectorBase<BaseFloat> *feat) {
impl_->GetFrame(frame, feat);
}
void OnlinePitchFeature::AcceptWaveform(
BaseFloat sampling_rate,
const VectorBase<BaseFloat> &waveform) {
impl_->AcceptWaveform(sampling_rate, waveform);
}
void OnlinePitchFeature::InputFinished() {
impl_->InputFinished();
}
OnlinePitchFeature::~OnlinePitchFeature() {
delete impl_;
}
/**
This function is called from ComputeKaldiPitch when the user
specifies opts.simulate_first_pass_online == true. It gives
the "first-pass" version of the features, which you would get
on the first decoding pass in an online setting. These may
differ slightly from the final features due to both the
way the Viterbi traceback works (this is affected by
opts.max_frames_latency), and the online way we compute
the average signal energy.
*/
void ComputeKaldiPitchFirstPass(
const PitchExtractionOptions &opts,
const VectorBase<BaseFloat> &wave,
Matrix<BaseFloat> *output) {
int32 cur_rows = 100;
Matrix<BaseFloat> feats(cur_rows, 2);
OnlinePitchFeature pitch_extractor(opts);
KALDI_ASSERT(opts.frames_per_chunk > 0 &&
"--simulate-first-pass-online option does not make sense "
"unless you specify --frames-per-chunk");
int32 cur_offset = 0, cur_frame = 0, samp_per_chunk =
opts.frames_per_chunk * opts.samp_freq * opts.frame_shift_ms / 1000.0f;
while (cur_offset < wave.Dim()) {
int32 num_samp = std::min(samp_per_chunk, wave.Dim() - cur_offset);
SubVector<BaseFloat> wave_chunk(wave, cur_offset, num_samp);
pitch_extractor.AcceptWaveform(opts.samp_freq, wave_chunk);
cur_offset += num_samp;
if (cur_offset == wave.Dim())
pitch_extractor.InputFinished();
// Get each frame as soon as it is ready.
for (; cur_frame < pitch_extractor.NumFramesReady(); cur_frame++) {
if (cur_frame >= cur_rows) {
cur_rows *= 2;
feats.Resize(cur_rows, 2, kCopyData);
}
SubVector<BaseFloat> row(feats, cur_frame);
pitch_extractor.GetFrame(cur_frame, &row);
}
}
if (cur_frame == 0) {
KALDI_WARN << "No features output since wave file too short";
output->Resize(0, 0);
} else {
*output = feats.RowRange(0, cur_frame);
}
}
void ComputeKaldiPitch(const PitchExtractionOptions &opts,
const VectorBase<BaseFloat> &wave,
Matrix<BaseFloat> *output) {
if (opts.simulate_first_pass_online) {
ComputeKaldiPitchFirstPass(opts, wave, output);
return;
}
OnlinePitchFeature pitch_extractor(opts);
if (opts.frames_per_chunk == 0) {
pitch_extractor.AcceptWaveform(opts.samp_freq, wave);
} else {
// the user may set opts.frames_per_chunk for better compatibility with
// online operation.
KALDI_ASSERT(opts.frames_per_chunk > 0);
int32 cur_offset = 0, samp_per_chunk =
opts.frames_per_chunk * opts.samp_freq * opts.frame_shift_ms / 1000.0f;
while (cur_offset < wave.Dim()) {
int32 num_samp = std::min(samp_per_chunk, wave.Dim() - cur_offset);
SubVector<BaseFloat> wave_chunk(wave, cur_offset, num_samp);
pitch_extractor.AcceptWaveform(opts.samp_freq, wave_chunk);
cur_offset += num_samp;
}
}
pitch_extractor.InputFinished();
int32 num_frames = pitch_extractor.NumFramesReady();
if (num_frames == 0) {
KALDI_WARN << "No frames output in pitch extraction";
output->Resize(0, 0);
return;
}
output->Resize(num_frames, 2);
for (int32 frame = 0; frame < num_frames; frame++) {
SubVector<BaseFloat> row(*output, frame);
pitch_extractor.GetFrame(frame, &row);
}
}
/*
This comment describes our invesigation of how much latency the
online-processing algorithm introduces, i.e. how many frames you would
typically have to wait until the traceback converges, if you were to set the
--max-frames-latency to a very large value.
This was done on a couple of files of language-id data.
/home/dpovey/kaldi-online/src/featbin/compute-kaldi-pitch-feats --frames-per-chunk=10 --max-frames-latency=100 --verbose=4 --sample-frequency=8000 --resample-frequency=2600 "scp:head -n 2 data/train/wav.scp |" ark:/dev/null 2>&1 | grep Latency | wc
4871 24355 443991
/home/dpovey/kaldi-online/src/featbin/compute-kaldi-pitch-feats --frames-per-chunk=10 --max-frames-latency=100 --verbose=4 --sample-frequency=8000 --resample-frequency=2600 "scp:head -n 2 data/train/wav.scp |" ark:/dev/null 2>&1 | grep Latency | grep 100 | wc
1534 7670 141128
# as above, but with 50 instead of 10 in the --max-frames-latency and grep statements.
2070 10350 188370
# as above, but with 10 instead of 50.
4067 20335 370097
This says that out of 4871 selected frames [we measured the latency every 10
frames, since --frames-per-chunk=10], in 1534 frames (31%), the latency was
>= 100 frames, i.e. >= 1 second. Including the other numbers, we can see
that
31% of frames had latency >= 1 second
42% of frames had latency >= 0.5 second
83% of frames had latency >= 0.1 second.
This doesn't necessarily mean that we actually have a latency of >= 1 second 31% of
the time when using these features, since by using the --max-frames-latency option
(default: 30 frames), it will limit the latency to, say, 0.3 seconds, and trace back
from the best current pitch. Most of the time this will probably cause no change in
the pitch traceback since the best current pitch is probably the "right" point to
trace back from. And anyway, in the online-decoding, we will most likely rescore
the features at the end anyway, and the traceback gets recomputed, so there will
be no inaccuracy (assuming the first-pass lattice had everything we needed).
Probably the greater source of inaccuracy due to the online algorithm is the
online energy-normalization, which affects the NCCF-ballast term, and which,
for reasons of efficiency, we don't attempt to "correct" in a later rescoring
pass. This will make the most difference in the first few frames of the file,
before the first voicing, where it will tend to produce more pitch movement
than the offline version of the algorithm.
*/
// Function to do data accumulation for on-line usage
template<typename Real>
inline void AppendVector(const VectorBase<Real> &src, Vector<Real> *dst) {
if (src.Dim() == 0) return;
dst->Resize(dst->Dim() + src.Dim(), kCopyData);
dst->Range(dst->Dim() - src.Dim(), src.Dim()).CopyFromVec(src);
}
/**
Note on the implementation of OnlineProcessPitch: the
OnlineFeatureInterface allows random access to features (i.e. not necessarily
sequential order), so we need to support that. But we don't need to support
it very efficiently, and our implementation is most efficient if frames are
accessed in sequential order.
Also note: we have to be a bit careful in this implementation because
the input features may change. That is: if we call
src_->GetFrame(t, &vec) from GetFrame(), we can't guarantee that a later
call to src_->GetFrame(t, &vec) from another GetFrame() will return the
same value. In fact, while designing this class we used some knowledge
of how the OnlinePitchFeature class works to minimize the amount of
re-querying we had to do.
*/
OnlineProcessPitch::OnlineProcessPitch(
const ProcessPitchOptions &opts,
OnlineFeatureInterface *src):
opts_(opts), src_(src),
dim_ ((opts.add_pov_feature ? 1 : 0)
+ (opts.add_normalized_log_pitch ? 1 : 0)
+ (opts.add_delta_pitch ? 1 : 0)
+ (opts.add_raw_log_pitch ? 1 : 0)) {
KALDI_ASSERT(dim_ > 0 &&
" At least one of the pitch features should be chosen. "
"Check your post-process-pitch options.");
KALDI_ASSERT(src->Dim() == kRawFeatureDim &&
"Input feature must be pitch feature (should have dimension 2)");
}
void OnlineProcessPitch::GetFrame(int32 frame,
VectorBase<BaseFloat> *feat) {
int32 frame_delayed = frame < opts_.delay ? 0 : frame - opts_.delay;
KALDI_ASSERT(feat->Dim() == dim_ &&
frame_delayed < NumFramesReady());
int32 index = 0;
if (opts_.add_pov_feature)
(*feat)(index++) = GetPovFeature(frame_delayed);
if (opts_.add_normalized_log_pitch)
(*feat)(index++) = GetNormalizedLogPitchFeature(frame_delayed);
if (opts_.add_delta_pitch)
(*feat)(index++) = GetDeltaPitchFeature(frame_delayed);
if (opts_.add_raw_log_pitch)
(*feat)(index++) = GetRawLogPitchFeature(frame_delayed);
KALDI_ASSERT(index == dim_);
}
BaseFloat OnlineProcessPitch::GetPovFeature(int32 frame) const {
Vector<BaseFloat> tmp(kRawFeatureDim);
src_->GetFrame(frame, &tmp); // (NCCF, pitch) from pitch extractor
BaseFloat nccf = tmp(0);
return opts_.pov_scale * NccfToPovFeature(nccf)
+ opts_.pov_offset;
}
BaseFloat OnlineProcessPitch::GetDeltaPitchFeature(int32 frame) {
// Rather than computing the delta pitch directly in code here,
// which might seem easier, we accumulate a small window of features
// and call ComputeDeltas. This might seem like overkill; the reason
// we do it this way is to ensure that the end effects (at file
// beginning and end) are handled in a consistent way.
int32 context = opts_.delta_window;
int32 start_frame = std::max(0, frame - context),
end_frame = std::min(frame + context + 1, src_->NumFramesReady()),
frames_in_window = end_frame - start_frame;
Matrix<BaseFloat> feats(frames_in_window, 1),
delta_feats;
for (int32 f = start_frame; f < end_frame; f++)
feats(f - start_frame, 0) = GetRawLogPitchFeature(f);
DeltaFeaturesOptions delta_opts;
delta_opts.order = 1;
delta_opts.window = opts_.delta_window;
ComputeDeltas(delta_opts, feats, &delta_feats);
while (delta_feature_noise_.size() <= static_cast<size_t>(frame)) {
delta_feature_noise_.push_back(RandGauss() *
opts_.delta_pitch_noise_stddev);
}
// note: delta_feats will have two columns, second contains deltas.
return (delta_feats(frame - start_frame, 1) + delta_feature_noise_[frame]) *
opts_.delta_pitch_scale;
}
BaseFloat OnlineProcessPitch::GetRawLogPitchFeature(int32 frame) const {
Vector<BaseFloat> tmp(kRawFeatureDim);
src_->GetFrame(frame, &tmp);
BaseFloat pitch = tmp(1);
KALDI_ASSERT(pitch > 0);
return Log(pitch);
}
BaseFloat OnlineProcessPitch::GetNormalizedLogPitchFeature(int32 frame) {
UpdateNormalizationStats(frame);
BaseFloat log_pitch = GetRawLogPitchFeature(frame),
avg_log_pitch = normalization_stats_[frame].sum_log_pitch_pov /
normalization_stats_[frame].sum_pov,
normalized_log_pitch = log_pitch - avg_log_pitch;
return normalized_log_pitch * opts_.pitch_scale;
}
// inline
void OnlineProcessPitch::GetNormalizationWindow(int32 t,
int32 src_frames_ready,
int32 *window_begin,
int32 *window_end) const {
int32 left_context = opts_.normalization_left_context;
int32 right_context = opts_.normalization_right_context;
*window_begin = std::max(0, t - left_context);
*window_end = std::min(t + right_context + 1, src_frames_ready);
}
// Makes sure the entry in normalization_stats_ for this frame is up to date;
// called from GetNormalizedLogPitchFeature.
// the cur_num_frames and input_finished variables are needed because the
// pitch features for a given frame may change as we see more data.
void OnlineProcessPitch::UpdateNormalizationStats(int32 frame) {
KALDI_ASSERT(frame >= 0);
if (normalization_stats_.size() <= frame)
normalization_stats_.resize(frame + 1);
int32 cur_num_frames = src_->NumFramesReady();
bool input_finished = src_->IsLastFrame(cur_num_frames - 1);
NormalizationStats &this_stats = normalization_stats_[frame];
if (this_stats.cur_num_frames == cur_num_frames &&
this_stats.input_finished == input_finished) {
// Stats are fully up-to-date.
return;
}
int32 this_window_begin, this_window_end;
GetNormalizationWindow(frame, cur_num_frames,
&this_window_begin, &this_window_end);
if (frame > 0) {
const NormalizationStats &prev_stats = normalization_stats_[frame - 1];
if (prev_stats.cur_num_frames == cur_num_frames &&
prev_stats.input_finished == input_finished) {
// we'll derive this_stats efficiently from prev_stats.
// Checking that cur_num_frames and input_finished have not changed
// ensures that the underlying features will not have changed.
this_stats = prev_stats;
int32 prev_window_begin, prev_window_end;
GetNormalizationWindow(frame - 1, cur_num_frames,
&prev_window_begin, &prev_window_end);
if (this_window_begin != prev_window_begin) {
KALDI_ASSERT(this_window_begin == prev_window_begin + 1);
Vector<BaseFloat> tmp(kRawFeatureDim);
src_->GetFrame(prev_window_begin, &tmp);
BaseFloat accurate_pov = NccfToPov(tmp(0)),
log_pitch = Log(tmp(1));
this_stats.sum_pov -= accurate_pov;
this_stats.sum_log_pitch_pov -= accurate_pov * log_pitch;
}
if (this_window_end != prev_window_end) {
KALDI_ASSERT(this_window_end == prev_window_end + 1);
Vector<BaseFloat> tmp(kRawFeatureDim);
src_->GetFrame(prev_window_end, &tmp);
BaseFloat accurate_pov = NccfToPov(tmp(0)),
log_pitch = Log(tmp(1));
this_stats.sum_pov += accurate_pov;
this_stats.sum_log_pitch_pov += accurate_pov * log_pitch;
}
return;
}
}
// The way we do it here is not the most efficient way to do it;
// we'll see if it becomes a problem. The issue is we have to redo
// this computation from scratch each time we process a new chunk, which
// may be a little inefficient if the chunk-size is very small.
this_stats.cur_num_frames = cur_num_frames;
this_stats.input_finished = input_finished;
this_stats.sum_pov = 0.0;
this_stats.sum_log_pitch_pov = 0.0;
Vector<BaseFloat> tmp(kRawFeatureDim);
for (int32 f = this_window_begin; f < this_window_end; f++) {
src_->GetFrame(f, &tmp);
BaseFloat accurate_pov = NccfToPov(tmp(0)),
log_pitch = Log(tmp(1));
this_stats.sum_pov += accurate_pov;
this_stats.sum_log_pitch_pov += accurate_pov * log_pitch;
}
}
int32 OnlineProcessPitch::NumFramesReady() const {
int32 src_frames_ready = src_->NumFramesReady();
if (src_frames_ready == 0) {
return 0;
} else if (src_->IsLastFrame(src_frames_ready - 1)) {
return src_frames_ready + opts_.delay;
} else {
return std::max(0, src_frames_ready -
opts_.normalization_right_context + opts_.delay);
}
}
void ProcessPitch(const ProcessPitchOptions &opts,
const MatrixBase<BaseFloat> &input,
Matrix<BaseFloat> *output) {
OnlineMatrixFeature pitch_feat(input);
OnlineProcessPitch online_process_pitch(opts, &pitch_feat);
output->Resize(online_process_pitch.NumFramesReady(),
online_process_pitch.Dim());
for (int32 t = 0; t < online_process_pitch.NumFramesReady(); t++) {
SubVector<BaseFloat> row(*output, t);
online_process_pitch.GetFrame(t, &row);
}
}
void ComputeAndProcessKaldiPitch(
const PitchExtractionOptions &pitch_opts,
const ProcessPitchOptions &process_opts,
const VectorBase<BaseFloat> &wave,
Matrix<BaseFloat> *output) {
OnlinePitchFeature pitch_extractor(pitch_opts);
if (pitch_opts.simulate_first_pass_online) {
KALDI_ASSERT(pitch_opts.frames_per_chunk > 0 &&
"--simulate-first-pass-online option does not make sense "
"unless you specify --frames-per-chunk");
}
OnlineProcessPitch post_process(process_opts, &pitch_extractor);
int32 cur_rows = 100;
Matrix<BaseFloat> feats(cur_rows, post_process.Dim());
int32 cur_offset = 0, cur_frame = 0,
samp_per_chunk = pitch_opts.frames_per_chunk *
pitch_opts.samp_freq * pitch_opts.frame_shift_ms / 1000.0f;
// We request the first-pass features as soon as they are available,
// regardless of whether opts.simulate_first_pass_online == true. If
// opts.simulate_first_pass_online == true this should
// not affect the features generated, but it helps us to test the code
// in a way that's closer to what online decoding would see.
while (cur_offset < wave.Dim()) {
int32 num_samp;
if (samp_per_chunk > 0)
num_samp = std::min(samp_per_chunk, wave.Dim() - cur_offset);
else // user left opts.frames_per_chunk at zero.
num_samp = wave.Dim();
SubVector<BaseFloat> wave_chunk(wave, cur_offset, num_samp);
pitch_extractor.AcceptWaveform(pitch_opts.samp_freq, wave_chunk);
cur_offset += num_samp;
if (cur_offset == wave.Dim())
pitch_extractor.InputFinished();
// Get each frame as soon as it is ready.
for (; cur_frame < post_process.NumFramesReady(); cur_frame++) {
if (cur_frame >= cur_rows) {
cur_rows *= 2;
feats.Resize(cur_rows, post_process.Dim(), kCopyData);
}
SubVector<BaseFloat> row(feats, cur_frame);
post_process.GetFrame(cur_frame, &row);
}
}
if (pitch_opts.simulate_first_pass_online) {
if (cur_frame == 0) {
KALDI_WARN << "No features output since wave file too short";
output->Resize(0, 0);
} else {
*output = feats.RowRange(0, cur_frame);
}
} else {
// want the "final" features for second pass, so get them again.
output->Resize(post_process.NumFramesReady(), post_process.Dim());
for (int32 frame = 0; frame < post_process.NumFramesReady(); frame++) {
SubVector<BaseFloat> row(*output, frame);
post_process.GetFrame(frame, &row);
}
}
}
} // namespace kaldi
// feat/pitch-functions.h
// Copyright 2013 Pegah Ghahremani
// 2014 IMSL, PKU-HKUST (author: Wei Shi)
// 2014 Yanqing Sun, Junjie Wang,
// Daniel Povey, Korbinian Riedhammer
// Xin Lei
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FEAT_PITCH_FUNCTIONS_H_
#define KALDI_FEAT_PITCH_FUNCTIONS_H_
#include <cassert>
#include <cstdlib>
#include <string>
#include <vector>
#include "base/kaldi-error.h"
#include "feat/mel-computations.h"
#include "feat/online-feature-itf.h"
#include "matrix/matrix-lib.h"
#include "util/common-utils.h"
namespace kaldi {
/// @addtogroup feat FeatureExtraction
/// @{
struct PitchExtractionOptions {
// FrameExtractionOptions frame_opts;
BaseFloat samp_freq; // sample frequency in hertz
BaseFloat frame_shift_ms; // in milliseconds.
BaseFloat frame_length_ms; // in milliseconds.
BaseFloat preemph_coeff; // Preemphasis coefficient. [use is deprecated.]
BaseFloat min_f0; // min f0 to search (Hz)
BaseFloat max_f0; // max f0 to search (Hz)
BaseFloat soft_min_f0; // Minimum f0, applied in soft way, must not
// exceed min-f0
BaseFloat penalty_factor; // cost factor for FO change
BaseFloat lowpass_cutoff; // cutoff frequency for Low pass filter
BaseFloat resample_freq; // Integer that determines filter width when
// upsampling NCCF
BaseFloat delta_pitch; // the pitch tolerance in pruning lags
BaseFloat nccf_ballast; // Increasing this factor reduces NCCF for
// quiet frames, helping ensure pitch
// continuity in unvoiced region
int32 lowpass_filter_width; // Integer that determines filter width of
// lowpass filter
int32 upsample_filter_width; // Integer that determines filter width when
// upsampling NCCF
// Below are newer config variables, not present in the original paper,
// that relate to the online pitch extraction algorithm.
// The maximum number of frames of latency that we allow the pitch-processing
// to introduce, for online operation. If you set this to a large value,
// there would be no inaccuracy from the Viterbi traceback (but it might make
// you wait to see the pitch). This is not very relevant for the online
// operation: normalization-right-context is more relevant, you
// can just leave this value at zero.
int32 max_frames_latency;
// Only relevant for the function ComputeKaldiPitch which is called by
// compute-kaldi-pitch-feats. If nonzero, we provide the input as chunks of
// this size. This affects the energy normalization which has a small effect
// on the resulting features, especially at the beginning of a file. For best
// compatibility with online operation (e.g. if you plan to train models for
// the online-deocding setup), you might want to set this to a small value,
// like one frame.
int32 frames_per_chunk;
// Only relevant for the function ComputeKaldiPitch which is called by
// compute-kaldi-pitch-feats, and only relevant if frames_per_chunk is
// nonzero. If true, it will query the features as soon as they are
// available, which simulates the first-pass features you would get in online
// decoding. If false, the features you will get will be the same as those
// available at the end of the utterance, after InputFinished() has been
// called: e.g. during lattice rescoring.
bool simulate_first_pass_online;
// Only relevant for online operation or when emulating online operation
// (e.g. when setting frames_per_chunk). This is the frame-index on which we
// recompute the NCCF (e.g. frame-index 500 = after 5 seconds); if the
// segment ends before this we do it when the segment ends. We do this by
// re-computing the signal average energy, which affects the NCCF via the
// "ballast term", scaling the resampled NCCF by a factor derived from the
// average change in the "ballast term", and re-doing the backtrace
// computation. Making this infinity would be the most exact, but would
// introduce unwanted latency at the end of long utterances, for little
// benefit.
int32 recompute_frame;
// This is a "hidden config" used only for testing the online pitch
// extraction. If true, we compute the signal root-mean-squared for the
// ballast term, only up to the current frame, rather than the end of the
// current chunk of signal. This makes the output insensitive to the
// chunking, which is useful for testing purposes.
bool nccf_ballast_online;
bool snip_edges;
PitchExtractionOptions():
samp_freq(16000),
frame_shift_ms(10.0),
frame_length_ms(25.0),
preemph_coeff(0.0),
min_f0(50),
max_f0(400),
soft_min_f0(10.0),
penalty_factor(0.1),
lowpass_cutoff(1000),
resample_freq(4000),
delta_pitch(0.005),
nccf_ballast(7000),
lowpass_filter_width(1),
upsample_filter_width(5),
max_frames_latency(0),
frames_per_chunk(0),
simulate_first_pass_online(false),
recompute_frame(500),
nccf_ballast_online(false),
snip_edges(true) { }
void Register(OptionsItf *opts) {
opts->Register("sample-frequency", &samp_freq,
"Waveform data sample frequency (must match the waveform "
"file, if specified there)");
opts->Register("frame-length", &frame_length_ms, "Frame length in "
"milliseconds");
opts->Register("frame-shift", &frame_shift_ms, "Frame shift in "
"milliseconds");
opts->Register("preemphasis-coefficient", &preemph_coeff,
"Coefficient for use in signal preemphasis (deprecated)");
opts->Register("min-f0", &min_f0,
"min. F0 to search for (Hz)");
opts->Register("max-f0", &max_f0,
"max. F0 to search for (Hz)");
opts->Register("soft-min-f0", &soft_min_f0,
"Minimum f0, applied in soft way, must not exceed min-f0");
opts->Register("penalty-factor", &penalty_factor,
"cost factor for FO change.");
opts->Register("lowpass-cutoff", &lowpass_cutoff,
"cutoff frequency for LowPass filter (Hz) ");
opts->Register("resample-frequency", &resample_freq,
"Frequency that we down-sample the signal to. Must be "
"more than twice lowpass-cutoff");
opts->Register("delta-pitch", &delta_pitch,
"Smallest relative change in pitch that our algorithm "
"measures");
opts->Register("nccf-ballast", &nccf_ballast,
"Increasing this factor reduces NCCF for quiet frames");
opts->Register("nccf-ballast-online", &nccf_ballast_online,
"This is useful mainly for debug; it affects how the NCCF "
"ballast is computed.");
opts->Register("lowpass-filter-width", &lowpass_filter_width,
"Integer that determines filter width of "
"lowpass filter, more gives sharper filter");
opts->Register("upsample-filter-width", &upsample_filter_width,
"Integer that determines filter width when upsampling NCCF");
opts->Register("frames-per-chunk", &frames_per_chunk, "Only relevant for "
"offline pitch extraction (e.g. compute-kaldi-pitch-feats), "
"you can set it to a small nonzero value, such as 10, for "
"better feature compatibility with online decoding (affects "
"energy normalization in the algorithm)");
opts->Register("simulate-first-pass-online", &simulate_first_pass_online,
"If true, compute-kaldi-pitch-feats will output features "
"that correspond to what an online decoder would see in the "
"first pass of decoding-- not the final version of the "
"features, which is the default. Relevant if "
"--frames-per-chunk > 0");
opts->Register("recompute-frame", &recompute_frame, "Only relevant for "
"online pitch extraction, or for compatibility with online "
"pitch extraction. A non-critical parameter; the frame at "
"which we recompute some of the forward pointers, after "
"revising our estimate of the signal energy. Relevant if"
"--frames-per-chunk > 0");
opts->Register("max-frames-latency", &max_frames_latency, "Maximum number "
"of frames of latency that we allow pitch tracking to "
"introduce into the feature processing (affects output only "
"if --frames-per-chunk > 0 and "
"--simulate-first-pass-online=true");
opts->Register("snip-edges", &snip_edges, "If this is set to false, the "
"incomplete frames near the ending edge won't be snipped, "
"so that the number of frames is the file size divided by "
"the frame-shift. This makes different types of features "
"give the same number of frames.");
}
/// Returns the window-size in samples, after resampling. This is the
/// "basic window size", not the full window size after extending by max-lag.
// Because of floating point representation, it is more reliable to divide
// by 1000 instead of multiplying by 0.001, but it is a bit slower.
int32 NccfWindowSize() const {
return static_cast<int32>(resample_freq * frame_length_ms / 1000.0);
}
/// Returns the window-shift in samples, after resampling.
int32 NccfWindowShift() const {
return static_cast<int32>(resample_freq * frame_shift_ms / 1000.0);
}
};
struct ProcessPitchOptions {
BaseFloat pitch_scale; // the final normalized-log-pitch feature is scaled
// with this value
BaseFloat pov_scale; // the final POV feature is scaled with this value
BaseFloat pov_offset; // An offset that can be added to the final POV
// feature (useful for online-decoding, where we don't
// do CMN to the pitch-derived features.
BaseFloat delta_pitch_scale;
BaseFloat delta_pitch_noise_stddev; // stddev of noise we add to delta-pitch
int32 normalization_left_context; // left-context used for sliding-window
// normalization
int32 normalization_right_context; // this should be reduced in online
// decoding to reduce latency
int32 delta_window;
int32 delay;
bool add_pov_feature;
bool add_normalized_log_pitch;
bool add_delta_pitch;
bool add_raw_log_pitch;
ProcessPitchOptions() :
pitch_scale(2.0),
pov_scale(2.0),
pov_offset(0.0),
delta_pitch_scale(10.0),
delta_pitch_noise_stddev(0.005),
normalization_left_context(75),
normalization_right_context(75),
delta_window(2),
delay(0),
add_pov_feature(true),
add_normalized_log_pitch(true),
add_delta_pitch(true),
add_raw_log_pitch(false) { }
void Register(ParseOptions *opts) {
opts->Register("pitch-scale", &pitch_scale,
"Scaling factor for the final normalized log-pitch value");
opts->Register("pov-scale", &pov_scale,
"Scaling factor for final POV (probability of voicing) "
"feature");
opts->Register("pov-offset", &pov_offset,
"This can be used to add an offset to the POV feature. "
"Intended for use in online decoding as a substitute for "
" CMN.");
opts->Register("delta-pitch-scale", &delta_pitch_scale,
"Term to scale the final delta log-pitch feature");
opts->Register("delta-pitch-noise-stddev", &delta_pitch_noise_stddev,
"Standard deviation for noise we add to the delta log-pitch "
"(before scaling); should be about the same as delta-pitch "
"option to pitch creation. The purpose is to get rid of "
"peaks in the delta-pitch caused by discretization of pitch "
"values.");
opts->Register("normalization-left-context", &normalization_left_context,
"Left-context (in frames) for moving window normalization");
opts->Register("normalization-right-context", &normalization_right_context,
"Right-context (in frames) for moving window normalization");
opts->Register("delta-window", &delta_window,
"Number of frames on each side of central frame, to use for "
"delta window.");
opts->Register("delay", &delay,
"Number of frames by which the pitch information is "
"delayed.");
opts->Register("add-pov-feature", &add_pov_feature,
"If true, the warped NCCF is added to output features");
opts->Register("add-normalized-log-pitch", &add_normalized_log_pitch,
"If true, the log-pitch with POV-weighted mean subtraction "
"over 1.5 second window is added to output features");
opts->Register("add-delta-pitch", &add_delta_pitch,
"If true, time derivative of log-pitch is added to output "
"features");
opts->Register("add-raw-log-pitch", &add_raw_log_pitch,
"If true, log(pitch) is added to output features");
}
};
// We don't want to expose the pitch-extraction internals here as it's
// quite complex, so we use a private implementation.
class OnlinePitchFeatureImpl;
// Note: to start on a new waveform, just construct a new version
// of this object.
class OnlinePitchFeature: public OnlineBaseFeature {
public:
explicit OnlinePitchFeature(const PitchExtractionOptions &opts);
virtual int32 Dim() const { return 2; /* (NCCF, pitch) */ }
virtual int32 NumFramesReady() const;
virtual BaseFloat FrameShiftInSeconds() const;
virtual bool IsLastFrame(int32 frame) const;
/// Outputs the two-dimensional feature consisting of (pitch, NCCF). You
/// should probably post-process this using class OnlineProcessPitch.
virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
virtual void AcceptWaveform(BaseFloat sampling_rate,
const VectorBase<BaseFloat> &waveform);
virtual void InputFinished();
virtual ~OnlinePitchFeature();
private:
OnlinePitchFeatureImpl *impl_;
};
/// This online-feature class implements post processing of pitch features.
/// Inputs are original 2 dims (nccf, pitch). It can produce various
/// kinds of outputs, using the default options it will be (pov-feature,
/// normalized-log-pitch, delta-log-pitch).
class OnlineProcessPitch: public OnlineFeatureInterface {
public:
virtual int32 Dim() const { return dim_; }
virtual bool IsLastFrame(int32 frame) const {
if (frame <= -1)
return src_->IsLastFrame(-1);
else if (frame < opts_.delay)
return src_->IsLastFrame(-1) == true ? false : src_->IsLastFrame(0);
else
return src_->IsLastFrame(frame - opts_.delay);
}
virtual BaseFloat FrameShiftInSeconds() const {
return src_->FrameShiftInSeconds();
}
virtual int32 NumFramesReady() const;
virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
virtual ~OnlineProcessPitch() { }
// Does not take ownership of "src".
OnlineProcessPitch(const ProcessPitchOptions &opts,
OnlineFeatureInterface *src);
private:
enum { kRawFeatureDim = 2}; // anonymous enum to define a constant.
// kRawFeatureDim defines the dimension
// of the input: (nccf, pitch)
ProcessPitchOptions opts_;
OnlineFeatureInterface *src_;
int32 dim_; // Output feature dimension, set in initializer.
struct NormalizationStats {
int32 cur_num_frames; // value of src_->NumFramesReady() when
// "mean_pitch" was set.
bool input_finished; // true if input data was finished when
// "mean_pitch" was computed.
double sum_pov; // sum of pov over relevant range
double sum_log_pitch_pov; // sum of log(pitch) * pov over relevant range
NormalizationStats(): cur_num_frames(-1), input_finished(false),
sum_pov(0.0), sum_log_pitch_pov(0.0) { }
};
std::vector<BaseFloat> delta_feature_noise_;
std::vector<NormalizationStats> normalization_stats_;
/// Computes and returns the POV feature for this frame.
/// Called from GetFrame().
inline BaseFloat GetPovFeature(int32 frame) const;
/// Computes and returns the delta-log-pitch feature for this frame.
/// Called from GetFrame().
inline BaseFloat GetDeltaPitchFeature(int32 frame);
/// Computes and returns the raw log-pitch feature for this frame.
/// Called from GetFrame().
inline BaseFloat GetRawLogPitchFeature(int32 frame) const;
/// Computes and returns the mean-subtracted log-pitch feature for this frame.
/// Called from GetFrame().
inline BaseFloat GetNormalizedLogPitchFeature(int32 frame);
/// Computes the normalization window sizes.
inline void GetNormalizationWindow(int32 frame,
int32 src_frames_ready,
int32 *window_begin,
int32 *window_end) const;
/// Makes sure the entry in normalization_stats_ for this frame is up to date;
/// called from GetNormalizedLogPitchFeature.
inline void UpdateNormalizationStats(int32 frame);
};
/// This function extracts (pitch, NCCF) per frame, using the pitch extraction
/// method described in "A Pitch Extraction Algorithm Tuned for Automatic Speech
/// Recognition", Pegah Ghahremani, Bagher BabaAli, Daniel Povey, Korbinian
/// Riedhammer, Jan Trmal and Sanjeev Khudanpur, ICASSP 2014. The output will
/// have as many rows as there are frames, and two columns corresponding to
/// (NCCF, pitch)
void ComputeKaldiPitch(const PitchExtractionOptions &opts,
const VectorBase<BaseFloat> &wave,
Matrix<BaseFloat> *output);
/// This function processes the raw (NCCF, pitch) quantities computed by
/// ComputeKaldiPitch, and processes them into features. By default it will
/// output three-dimensional features, (POV-feature, mean-subtracted-log-pitch,
/// delta-of-raw-pitch), but this is configurable in the options. The number of
/// rows of "output" will be the number of frames (rows) in "input", and the
/// number of columns will be the number of different types of features
/// requested (by default, 3; 4 is the max). The four config variables
/// --add-pov-feature, --add-normalized-log-pitch, --add-delta-pitch,
/// --add-raw-log-pitch determine which features we create; by default we create
/// the first three.
void ProcessPitch(const ProcessPitchOptions &opts,
const MatrixBase<BaseFloat> &input,
Matrix<BaseFloat> *output);
/// This function combines ComputeKaldiPitch and ProcessPitch. The reason
/// why we need a separate function to do this is in order to be able to
/// accurately simulate the online pitch-processing, for testing and for
/// training models matched to the "first-pass" features. It is sensitive to
/// the variables in pitch_opts that relate to online processing,
/// i.e. max_frames_latency, frames_per_chunk, simulate_first_pass_online,
/// recompute_frame.
void ComputeAndProcessKaldiPitch(const PitchExtractionOptions &pitch_opts,
const ProcessPitchOptions &process_opts,
const VectorBase<BaseFloat> &wave,
Matrix<BaseFloat> *output);
/// @} End of "addtogroup feat"
} // namespace kaldi
#endif // KALDI_FEAT_PITCH_FUNCTIONS_H_
// feat/resample.cc
// Copyright 2013 Pegah Ghahremani
// 2014 IMSL, PKU-HKUST (author: Wei Shi)
// 2014 Yanqing Sun, Junjie Wang
// 2014 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <limits>
#include "feat/feature-functions.h"
#include "matrix/matrix-functions.h"
#include "feat/resample.h"
namespace kaldi {
LinearResample::LinearResample(int32 samp_rate_in_hz,
int32 samp_rate_out_hz,
BaseFloat filter_cutoff_hz,
int32 num_zeros):
samp_rate_in_(samp_rate_in_hz),
samp_rate_out_(samp_rate_out_hz),
filter_cutoff_(filter_cutoff_hz),
num_zeros_(num_zeros) {
KALDI_ASSERT(samp_rate_in_hz > 0.0 &&
samp_rate_out_hz > 0.0 &&
filter_cutoff_hz > 0.0 &&
filter_cutoff_hz*2 <= samp_rate_in_hz &&
filter_cutoff_hz*2 <= samp_rate_out_hz &&
num_zeros > 0);
// base_freq is the frequency of the repeating unit, which is the gcd
// of the input frequencies.
int32 base_freq = Gcd(samp_rate_in_, samp_rate_out_);
input_samples_in_unit_ = samp_rate_in_ / base_freq;
output_samples_in_unit_ = samp_rate_out_ / base_freq;
SetIndexesAndWeights();
Reset();
}
int64 LinearResample::GetNumOutputSamples(int64 input_num_samp,
bool flush) const {
// For exact computation, we measure time in "ticks" of 1.0 / tick_freq,
// where tick_freq is the least common multiple of samp_rate_in_ and
// samp_rate_out_.
int32 tick_freq = Lcm(samp_rate_in_, samp_rate_out_);
int32 ticks_per_input_period = tick_freq / samp_rate_in_;
// work out the number of ticks in the time interval
// [ 0, input_num_samp/samp_rate_in_ ).
int64 interval_length_in_ticks = input_num_samp * ticks_per_input_period;
if (!flush) {
BaseFloat window_width = num_zeros_ / (2.0 * filter_cutoff_);
// To count the window-width in ticks we take the floor. This
// is because since we're looking for the largest integer num-out-samp
// that fits in the interval, which is open on the right, a reduction
// in interval length of less than a tick will never make a difference.
// For example, the largest integer in the interval [ 0, 2 ) and the
// largest integer in the interval [ 0, 2 - 0.9 ) are the same (both one).
// So when we're subtracting the window-width we can ignore the fractional
// part.
int32 window_width_ticks = floor(window_width * tick_freq);
// The time-period of the output that we can sample gets reduced
// by the window-width (which is actually the distance from the
// center to the edge of the windowing function) if we're not
// "flushing the output".
interval_length_in_ticks -= window_width_ticks;
}
if (interval_length_in_ticks <= 0)
return 0;
int32 ticks_per_output_period = tick_freq / samp_rate_out_;
// Get the last output-sample in the closed interval, i.e. replacing [ ) with
// [ ]. Note: integer division rounds down. See
// http://en.wikipedia.org/wiki/Interval_(mathematics) for an explanation of
// the notation.
int64 last_output_samp = interval_length_in_ticks / ticks_per_output_period;
// We need the last output-sample in the open interval, so if it takes us to
// the end of the interval exactly, subtract one.
if (last_output_samp * ticks_per_output_period == interval_length_in_ticks)
last_output_samp--;
// First output-sample index is zero, so the number of output samples
// is the last output-sample plus one.
int64 num_output_samp = last_output_samp + 1;
return num_output_samp;
}
void LinearResample::SetIndexesAndWeights() {
first_index_.resize(output_samples_in_unit_);
weights_.resize(output_samples_in_unit_);
double window_width = num_zeros_ / (2.0 * filter_cutoff_);
for (int32 i = 0; i < output_samples_in_unit_; i++) {
double output_t = i / static_cast<double>(samp_rate_out_);
double min_t = output_t - window_width, max_t = output_t + window_width;
// we do ceil on the min and floor on the max, because if we did it
// the other way around we would unnecessarily include indexes just
// outside the window, with zero coefficients. It's possible
// if the arguments to the ceil and floor expressions are integers
// (e.g. if filter_cutoff_ has an exact ratio with the sample rates),
// that we unnecessarily include something with a zero coefficient,
// but this is only a slight efficiency issue.
int32 min_input_index = ceil(min_t * samp_rate_in_),
max_input_index = floor(max_t * samp_rate_in_),
num_indices = max_input_index - min_input_index + 1;
first_index_[i] = min_input_index;
weights_[i].Resize(num_indices);
for (int32 j = 0; j < num_indices; j++) {
int32 input_index = min_input_index + j;
double input_t = input_index / static_cast<double>(samp_rate_in_),
delta_t = input_t - output_t;
// sign of delta_t doesn't matter.
weights_[i](j) = FilterFunc(delta_t) / samp_rate_in_;
}
}
}
// inline
void LinearResample::GetIndexes(int64 samp_out,
int64 *first_samp_in,
int32 *samp_out_wrapped) const {
// A unit is the smallest nonzero amount of time that is an exact
// multiple of the input and output sample periods. The unit index
// is the answer to "which numbered unit we are in".
int64 unit_index = samp_out / output_samples_in_unit_;
// samp_out_wrapped is equal to samp_out % output_samples_in_unit_
*samp_out_wrapped = static_cast<int32>(samp_out -
unit_index * output_samples_in_unit_);
*first_samp_in = first_index_[*samp_out_wrapped] +
unit_index * input_samples_in_unit_;
}
void LinearResample::Resample(const VectorBase<BaseFloat> &input,
bool flush,
Vector<BaseFloat> *output) {
int32 input_dim = input.Dim();
int64 tot_input_samp = input_sample_offset_ + input_dim,
tot_output_samp = GetNumOutputSamples(tot_input_samp, flush);
KALDI_ASSERT(tot_output_samp >= output_sample_offset_);
output->Resize(tot_output_samp - output_sample_offset_);
// samp_out is the index into the total output signal, not just the part
// of it we are producing here.
for (int64 samp_out = output_sample_offset_;
samp_out < tot_output_samp;
samp_out++) {
int64 first_samp_in;
int32 samp_out_wrapped;
GetIndexes(samp_out, &first_samp_in, &samp_out_wrapped);
const Vector<BaseFloat> &weights = weights_[samp_out_wrapped];
// first_input_index is the first index into "input" that we have a weight
// for.
int32 first_input_index = static_cast<int32>(first_samp_in -
input_sample_offset_);
BaseFloat this_output;
if (first_input_index >= 0 &&
first_input_index + weights.Dim() <= input_dim) {
SubVector<BaseFloat> input_part(input, first_input_index, weights.Dim());
this_output = VecVec(input_part, weights);
} else { // Handle edge cases.
this_output = 0.0;
for (int32 i = 0; i < weights.Dim(); i++) {
BaseFloat weight = weights(i);
int32 input_index = first_input_index + i;
if (input_index < 0 && input_remainder_.Dim() + input_index >= 0) {
this_output += weight *
input_remainder_(input_remainder_.Dim() + input_index);
} else if (input_index >= 0 && input_index < input_dim) {
this_output += weight * input(input_index);
} else if (input_index >= input_dim) {
// We're past the end of the input and are adding zero; should only
// happen if the user specified flush == true, or else we would not
// be trying to output this sample.
KALDI_ASSERT(flush);
}
}
}
int32 output_index = static_cast<int32>(samp_out - output_sample_offset_);
(*output)(output_index) = this_output;
}
if (flush) {
Reset(); // Reset the internal state.
} else {
SetRemainder(input);
input_sample_offset_ = tot_input_samp;
output_sample_offset_ = tot_output_samp;
}
}
void LinearResample::SetRemainder(const VectorBase<BaseFloat> &input) {
Vector<BaseFloat> old_remainder(input_remainder_);
// max_remainder_needed is the width of the filter from side to side,
// measured in input samples. you might think it should be half that,
// but you have to consider that you might be wanting to output samples
// that are "in the past" relative to the beginning of the latest
// input... anyway, storing more remainder than needed is not harmful.
int32 max_remainder_needed = ceil(samp_rate_in_ * num_zeros_ /
filter_cutoff_);
input_remainder_.Resize(max_remainder_needed);
for (int32 index = - input_remainder_.Dim(); index < 0; index++) {
// we interpret "index" as an offset from the end of "input" and
// from the end of input_remainder_.
int32 input_index = index + input.Dim();
if (input_index >= 0)
input_remainder_(index + input_remainder_.Dim()) = input(input_index);
else if (input_index + old_remainder.Dim() >= 0)
input_remainder_(index + input_remainder_.Dim()) =
old_remainder(input_index + old_remainder.Dim());
// else leave it at zero.
}
}
void LinearResample::Reset() {
input_sample_offset_ = 0;
output_sample_offset_ = 0;
input_remainder_.Resize(0);
}
/** Here, t is a time in seconds representing an offset from
the center of the windowed filter function, and FilterFunction(t)
returns the windowed filter function, described
in the header as h(t) = f(t)g(t), evaluated at t.
*/
BaseFloat LinearResample::FilterFunc(BaseFloat t) const {
BaseFloat window, // raised-cosine (Hanning) window of width
// num_zeros_/2*filter_cutoff_
filter; // sinc filter function
if (fabs(t) < num_zeros_ / (2.0 * filter_cutoff_))
window = 0.5 * (1 + cos(M_2PI * filter_cutoff_ / num_zeros_ * t));
else
window = 0.0; // outside support of window function
if (t != 0)
filter = sin(M_2PI * filter_cutoff_ * t) / (M_PI * t);
else
filter = 2 * filter_cutoff_; // limit of the function at t = 0
return filter * window;
}
ArbitraryResample::ArbitraryResample(
int32 num_samples_in, BaseFloat samp_rate_in,
BaseFloat filter_cutoff, const Vector<BaseFloat> &sample_points,
int32 num_zeros):
num_samples_in_(num_samples_in),
samp_rate_in_(samp_rate_in),
filter_cutoff_(filter_cutoff),
num_zeros_(num_zeros) {
KALDI_ASSERT(num_samples_in > 0 && samp_rate_in > 0.0 &&
filter_cutoff > 0.0 &&
filter_cutoff * 2.0 <= samp_rate_in
&& num_zeros > 0);
// set up weights_ and indices_. Please try to keep all functions short and
SetIndexes(sample_points);
SetWeights(sample_points);
}
void ArbitraryResample::Resample(const MatrixBase<BaseFloat> &input,
MatrixBase<BaseFloat> *output) const {
// each row of "input" corresponds to the data to resample;
// the corresponding row of "output" is the resampled data.
KALDI_ASSERT(input.NumRows() == output->NumRows() &&
input.NumCols() == num_samples_in_ &&
output->NumCols() == weights_.size());
Vector<BaseFloat> output_col(output->NumRows());
for (int32 i = 0; i < NumSamplesOut(); i++) {
SubMatrix<BaseFloat> input_part(input, 0, input.NumRows(),
first_index_[i],
weights_[i].Dim());
const Vector<BaseFloat> &weight_vec(weights_[i]);
output_col.AddMatVec(1.0, input_part,
kNoTrans, weight_vec, 0.0);
output->CopyColFromVec(output_col, i);
}
}
void ArbitraryResample::Resample(const VectorBase<BaseFloat> &input,
VectorBase<BaseFloat> *output) const {
KALDI_ASSERT(input.Dim() == num_samples_in_ &&
output->Dim() == weights_.size());
int32 output_dim = output->Dim();
for (int32 i = 0; i < output_dim; i++) {
SubVector<BaseFloat> input_part(input, first_index_[i], weights_[i].Dim());
(*output)(i) = VecVec(input_part, weights_[i]);
}
}
void ArbitraryResample::SetIndexes(const Vector<BaseFloat> &sample_points) {
int32 num_samples = sample_points.Dim();
first_index_.resize(num_samples);
weights_.resize(num_samples);
BaseFloat filter_width = num_zeros_ / (2.0 * filter_cutoff_);
for (int32 i = 0; i < num_samples; i++) {
// the t values are in seconds.
BaseFloat t = sample_points(i),
t_min = t - filter_width, t_max = t + filter_width;
int32 index_min = ceil(samp_rate_in_ * t_min),
index_max = floor(samp_rate_in_ * t_max);
// the ceil on index min and the floor on index_max are because there
// is no point using indices just outside the window (coeffs would be zero).
if (index_min < 0)
index_min = 0;
if (index_max >= num_samples_in_)
index_max = num_samples_in_ - 1;
first_index_[i] = index_min;
weights_[i].Resize(index_max - index_min + 1);
}
}
void ArbitraryResample::SetWeights(const Vector<BaseFloat> &sample_points) {
int32 num_samples_out = NumSamplesOut();
for (int32 i = 0; i < num_samples_out; i++) {
for (int32 j = 0 ; j < weights_[i].Dim(); j++) {
BaseFloat delta_t = sample_points(i) -
(first_index_[i] + j) / samp_rate_in_;
// Include at this point the factor of 1.0 / samp_rate_in_ which
// appears in the math.
weights_[i](j) = FilterFunc(delta_t) / samp_rate_in_;
}
}
}
/** Here, t is a time in seconds representing an offset from
the center of the windowed filter function, and FilterFunction(t)
returns the windowed filter function, described
in the header as h(t) = f(t)g(t), evaluated at t.
*/
BaseFloat ArbitraryResample::FilterFunc(BaseFloat t) const {
BaseFloat window, // raised-cosine (Hanning) window of width
// num_zeros_/2*filter_cutoff_
filter; // sinc filter function
if (fabs(t) < num_zeros_ / (2.0 * filter_cutoff_))
window = 0.5 * (1 + cos(M_2PI * filter_cutoff_ / num_zeros_ * t));
else
window = 0.0; // outside support of window function
if (t != 0.0)
filter = sin(M_2PI * filter_cutoff_ * t) / (M_PI * t);
else
filter = 2.0 * filter_cutoff_; // limit of the function at zero.
return filter * window;
}
void ResampleWaveform(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
BaseFloat new_freq, Vector<BaseFloat> *new_wave) {
BaseFloat min_freq = std::min(orig_freq, new_freq);
BaseFloat lowpass_cutoff = 0.99 * 0.5 * min_freq;
int32 lowpass_filter_width = 6;
LinearResample resampler(orig_freq, new_freq,
lowpass_cutoff, lowpass_filter_width);
resampler.Resample(wave, true, new_wave);
}
} // namespace kaldi
// feat/resample.h
// Copyright 2013 Pegah Ghahremani
// 2014 IMSL, PKU-HKUST (author: Wei Shi)
// 2014 Yanqing Sun, Junjie Wang
// 2014 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FEAT_RESAMPLE_H_
#define KALDI_FEAT_RESAMPLE_H_
#include <cassert>
#include <cstdlib>
#include <string>
#include <vector>
#include "matrix/matrix-lib.h"
#include "util/common-utils.h"
#include "base/kaldi-error.h"
namespace kaldi {
/// @addtogroup feat FeatureExtraction
/// @{
/**
\file[resample.h]
This header contains declarations of classes for resampling signals. The
normal cases of resampling a signal are upsampling and downsampling
(increasing and decreasing the sample rate of a signal, respectively),
although the ArbitraryResample class allows a more generic case where
we want to get samples of a signal at uneven intervals (for instance,
log-spaced).
The input signal is always evenly spaced, say sampled with frequency S, and
we assume the original signal was band-limited to S/2 or lower. The n'th
input sample x_n (with n = 0, 1, ...) is interpreted as the original
signal's value at time n/S.
For resampling, it is convenient to view the input signal as a
continuous function x(t) of t, where each sample x_n becomes a delta function
with magnitude x_n/S, at time n/S. If we band limit this to the Nyquist
frequency S/2, we can show that this is the same as the original signal
that was sampled. [assuming the original signal was periodic and band
limited.] In general we want to bandlimit to lower than S/2, because
we don't have a perfect filter and also because if we want to resample
at a lower frequency than S, we need to bandlimit to below half of that.
Anyway, suppose we want to bandlimit to C, with 0 < C < S/2. The perfect
rectangular filter with cutoff C is the sinc function,
\f[ f(t) = 2C sinc(2Ct), \f]
where sinc is the normalized sinc function \f$ sinc(t) = sin(pi t) / (pi t) \f$, with
\f$ sinc(0) = 1 \f$. This is not a practical filter, though, because it has
infinite support. At the cost of less-than-perfect rolloff, we can choose
a suitable windowing function g(t), and use f(t) g(t) as the filter. For
a windowing function we choose raised-cosine (Hanning) window with support
on [-w/2C, w/2C], where w >= 2 is an integer chosen by the user. w = 1
means we window the sinc function out to its first zero on the left and right,
w = 2 means the second zero, and so on; we normally choose w to be at least two.
We call this num_zeros, not w, in the code.
Convolving the signal x(t) with this windowed filter h(t) = f(t)g(t) and evaluating the resulting
signal s(t) at an arbitrary time t is easy: we have
\f[ s(t) = 1/S \sum_n x_n h(t - n/S) \f].
(note: the sign of t - n/S might be wrong, but it doesn't matter as the filter
and window are symmetric).
This is true for arbitrary values of t. What the class ArbitraryResample does
is to allow you to evaluate the signal for specified values of t.
*/
/**
Class ArbitraryResample allows you to resample a signal (assumed zero outside
the sample region, not periodic) at arbitrary specified time values, which
don't have to be linearly spaced. The low-pass filter cutoff
"filter_cutoff_hz" should be less than half the sample rate;
"num_zeros" should probably be at least two preferably more; higher numbers give
sharper filters but will be less efficient.
*/
class ArbitraryResample {
public:
ArbitraryResample(int32 num_samples_in,
BaseFloat samp_rate_hz,
BaseFloat filter_cutoff_hz,
const Vector<BaseFloat> &sample_points_secs,
int32 num_zeros);
int32 NumSamplesIn() const { return num_samples_in_; }
int32 NumSamplesOut() const { return weights_.size(); }
/// This function does the resampling.
/// input.NumRows() and output.NumRows() should be equal
/// and nonzero.
/// input.NumCols() should equal NumSamplesIn()
/// and output.NumCols() should equal NumSamplesOut().
void Resample(const MatrixBase<BaseFloat> &input,
MatrixBase<BaseFloat> *output) const;
/// This version of the Resample function processes just
/// one vector.
void Resample(const VectorBase<BaseFloat> &input,
VectorBase<BaseFloat> *output) const;
private:
void SetIndexes(const Vector<BaseFloat> &sample_points);
void SetWeights(const Vector<BaseFloat> &sample_points);
BaseFloat FilterFunc(BaseFloat t) const;
int32 num_samples_in_;
BaseFloat samp_rate_in_;
BaseFloat filter_cutoff_;
int32 num_zeros_;
std::vector<int32> first_index_; // The first input-sample index that we sum
// over, for this output-sample index.
std::vector<Vector<BaseFloat> > weights_;
};
/**
LinearResample is a special case of ArbitraryResample, where we want to
resample a signal at linearly spaced intervals (this means we want to
upsample or downsample the signal). It is more efficient than
ArbitraryResample because we can construct it just once.
We require that the input and output sampling rate be specified as
integers, as this is an easy way to specify that their ratio be rational.
*/
class LinearResample {
public:
/// Constructor. We make the input and output sample rates integers, because
/// we are going to need to find a common divisor. This should just remind
/// you that they need to be integers. The filter cutoff needs to be less
/// than samp_rate_in_hz/2 and less than samp_rate_out_hz/2. num_zeros
/// controls the sharpness of the filter, more == sharper but less efficient.
/// We suggest around 4 to 10 for normal use.
LinearResample(int32 samp_rate_in_hz,
int32 samp_rate_out_hz,
BaseFloat filter_cutoff_hz,
int32 num_zeros);
/// This function does the resampling. If you call it with flush == true and
/// you have never called it with flush == false, it just resamples the input
/// signal (it resizes the output to a suitable number of samples).
///
/// You can also use this function to process a signal a piece at a time.
/// suppose you break it into piece1, piece2, ... pieceN. You can call
/// \code{.cc}
/// Resample(piece1, &output1, false);
/// Resample(piece2, &output2, false);
/// Resample(piece3, &output3, true);
/// \endcode
/// If you call it with flush == false, it won't output the last few samples
/// but will remember them, so that if you later give it a second piece of
/// the input signal it can process it correctly.
/// If your most recent call to the object was with flush == false, it will
/// have internal state; you can remove this by calling Reset().
/// Empty input is acceptable.
void Resample(const VectorBase<BaseFloat> &input,
bool flush,
Vector<BaseFloat> *output);
/// Calling the function Reset() resets the state of the object prior to
/// processing a new signal; it is only necessary if you have called
/// Resample(x, y, false) for some signal, leading to a remainder of the
/// signal being called, but then abandon processing the signal before calling
/// Resample(x, y, true) for the last piece. Call it unnecessarily between
/// signals will not do any harm.
void Reset();
//// Return the input and output sampling rates (for checks, for example)
inline int32 GetInputSamplingRate() { return samp_rate_in_; }
inline int32 GetOutputSamplingRate() { return samp_rate_out_; }
private:
/// This function outputs the number of output samples we will output
/// for a signal with "input_num_samp" input samples. If flush == true,
/// we return the largest n such that
/// (n/samp_rate_out_) is in the interval [ 0, input_num_samp/samp_rate_in_ ),
/// and note that the interval is half-open. If flush == false,
/// define window_width as num_zeros / (2.0 * filter_cutoff_);
/// we return the largest n such that (n/samp_rate_out_) is in the interval
/// [ 0, input_num_samp/samp_rate_in_ - window_width ).
int64 GetNumOutputSamples(int64 input_num_samp, bool flush) const;
/// Given an output-sample index, this function outputs to *first_samp_in the
/// first input-sample index that we have a weight on (may be negative),
/// and to *samp_out_wrapped the index into weights_ where we can get the
/// corresponding weights on the input.
inline void GetIndexes(int64 samp_out,
int64 *first_samp_in,
int32 *samp_out_wrapped) const;
void SetRemainder(const VectorBase<BaseFloat> &input);
void SetIndexesAndWeights();
BaseFloat FilterFunc(BaseFloat) const;
// The following variables are provided by the user.
int32 samp_rate_in_;
int32 samp_rate_out_;
BaseFloat filter_cutoff_;
int32 num_zeros_;
int32 input_samples_in_unit_; ///< The number of input samples in the
///< smallest repeating unit: num_samp_in_ =
///< samp_rate_in_hz / Gcd(samp_rate_in_hz,
///< samp_rate_out_hz)
int32 output_samples_in_unit_; ///< The number of output samples in the
///< smallest repeating unit: num_samp_out_ =
///< samp_rate_out_hz / Gcd(samp_rate_in_hz,
///< samp_rate_out_hz)
/// The first input-sample index that we sum over, for this output-sample
/// index. May be negative; any truncation at the beginning is handled
/// separately. This is just for the first few output samples, but we can
/// extrapolate the correct input-sample index for arbitrary output samples.
std::vector<int32> first_index_;
/// Weights on the input samples, for this output-sample index.
std::vector<Vector<BaseFloat> > weights_;
// the following variables keep track of where we are in a particular signal,
// if it is being provided over multiple calls to Resample().
int64 input_sample_offset_; ///< The number of input samples we have
///< already received for this signal
///< (including anything in remainder_)
int64 output_sample_offset_; ///< The number of samples we have already
///< output for this signal.
Vector<BaseFloat> input_remainder_; ///< A small trailing part of the
///< previously seen input signal.
};
/**
Downsample or upsample a waveform. This is a convenience wrapper for the
class 'LinearResample'.
The low-pass filter cutoff used in 'LinearResample' is 0.99 of the Nyquist,
where the Nyquist is half of the minimum of (orig_freq, new_freq). The
resampling is done with a symmetric FIR filter with N_z (number of zeros)
as 6.
We compared the downsampling results with those from the sox resampling
toolkit.
Sox's design is inspired by Laurent De Soras' paper,
https://ccrma.stanford.edu/~jos/resample/Implementation.html
Note: we expect that while orig_freq and new_freq are of type BaseFloat, they
are actually required to have exact integer values (like 16000 or 8000) with
a ratio between them that can be expressed as a rational number with
reasonably small integer factors.
*/
void ResampleWaveform(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
BaseFloat new_freq, Vector<BaseFloat> *new_wave);
/// This function is deprecated. It is provided for backward compatibility, to avoid
/// breaking older code.
inline void DownsampleWaveForm(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
BaseFloat new_freq, Vector<BaseFloat> *new_wave) {
ResampleWaveform(orig_freq, wave, new_freq, new_wave);
}
/// @} End of "addtogroup feat"
} // namespace kaldi
#endif // KALDI_FEAT_RESAMPLE_H_
// feat/signal.cc
// Copyright 2015 Tom Ko
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "feat/signal.h"
namespace kaldi {
void ElementwiseProductOfFft(const Vector<BaseFloat> &a, Vector<BaseFloat> *b) {
int32 num_fft_bins = a.Dim() / 2;
for (int32 i = 0; i < num_fft_bins; i++) {
// do complex multiplication
ComplexMul(a(2*i), a(2*i + 1), &((*b)(2*i)), &((*b)(2*i + 1)));
}
}
void ConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat> *signal) {
int32 signal_length = signal->Dim();
int32 filter_length = filter.Dim();
int32 output_length = signal_length + filter_length - 1;
Vector<BaseFloat> signal_padded(output_length);
signal_padded.SetZero();
for (int32 i = 0; i < signal_length; i++) {
for (int32 j = 0; j < filter_length; j++) {
signal_padded(i + j) += (*signal)(i) * filter(j);
}
}
signal->Resize(output_length);
signal->CopyFromVec(signal_padded);
}
void FFTbasedConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat> *signal) {
int32 signal_length = signal->Dim();
int32 filter_length = filter.Dim();
int32 output_length = signal_length + filter_length - 1;
int32 fft_length = RoundUpToNearestPowerOfTwo(output_length);
KALDI_VLOG(1) << "fft_length for full signal convolution is " << fft_length;
SplitRadixRealFft<BaseFloat> srfft(fft_length);
Vector<BaseFloat> filter_padded(fft_length);
filter_padded.Range(0, filter_length).CopyFromVec(filter);
srfft.Compute(filter_padded.Data(), true);
Vector<BaseFloat> signal_padded(fft_length);
signal_padded.Range(0, signal_length).CopyFromVec(*signal);
srfft.Compute(signal_padded.Data(), true);
ElementwiseProductOfFft(filter_padded, &signal_padded);
srfft.Compute(signal_padded.Data(), false);
signal_padded.Scale(1.0 / fft_length);
signal->Resize(output_length);
signal->CopyFromVec(signal_padded.Range(0, output_length));
}
void FFTbasedBlockConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat> *signal) {
int32 signal_length = signal->Dim();
int32 filter_length = filter.Dim();
int32 output_length = signal_length + filter_length - 1;
signal->Resize(output_length, kCopyData);
KALDI_VLOG(1) << "Length of the filter is " << filter_length;
int32 fft_length = RoundUpToNearestPowerOfTwo(4 * filter_length);
KALDI_VLOG(1) << "Best FFT length is " << fft_length;
int32 block_length = fft_length - filter_length + 1;
KALDI_VLOG(1) << "Block size is " << block_length;
SplitRadixRealFft<BaseFloat> srfft(fft_length);
Vector<BaseFloat> filter_padded(fft_length);
filter_padded.Range(0, filter_length).CopyFromVec(filter);
srfft.Compute(filter_padded.Data(), true);
Vector<BaseFloat> temp_pad(filter_length - 1);
temp_pad.SetZero();
Vector<BaseFloat> signal_block_padded(fft_length);
for (int32 po = 0; po < output_length; po += block_length) {
// get a block of the signal
int32 process_length = std::min(block_length, output_length - po);
signal_block_padded.SetZero();
signal_block_padded.Range(0, process_length).CopyFromVec(signal->Range(po, process_length));
srfft.Compute(signal_block_padded.Data(), true);
ElementwiseProductOfFft(filter_padded, &signal_block_padded);
srfft.Compute(signal_block_padded.Data(), false);
signal_block_padded.Scale(1.0 / fft_length);
// combine the block
if (po + block_length < output_length) { // current block is not the last block
signal->Range(po, block_length).CopyFromVec(signal_block_padded.Range(0, block_length));
signal->Range(po, filter_length - 1).AddVec(1.0, temp_pad);
temp_pad.CopyFromVec(signal_block_padded.Range(block_length, filter_length - 1));
} else {
signal->Range(po, output_length - po).CopyFromVec(
signal_block_padded.Range(0, output_length - po));
if (filter_length - 1 < output_length - po)
signal->Range(po, filter_length - 1).AddVec(1.0, temp_pad);
else
signal->Range(po, output_length - po).AddVec(1.0, temp_pad.Range(0, output_length - po));
}
}
}
}
// feat/signal.h
// Copyright 2015 Tom Ko
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FEAT_SIGNAL_H_
#define KALDI_FEAT_SIGNAL_H_
#include "base/kaldi-common.h"
#include "util/common-utils.h"
namespace kaldi {
/*
The following three functions are having the same functionality but
different implementations so as the efficiency. After the convolution,
the length of the signal will be extended to (original signal length +
filter length - 1).
*/
/*
This function implements a simple non-FFT-based convolution of two signals.
It is suggested to use the FFT-based convolution function which is more
efficient.
*/
void ConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat> *signal);
/*
This function implements FFT-based convolution of two signals.
However this should be an inefficient version of BlockConvolveSignals()
as it processes the entire signal with a single FFT.
*/
void FFTbasedConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat> *signal);
/*
This function implements FFT-based block convolution of two signals using
overlap-add method. This is an efficient way to evaluate the discrete
convolution of a long signal with a finite impulse response filter.
*/
void FFTbasedBlockConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat> *signal);
} // namespace kaldi
#endif // KALDI_FEAT_SIGNAL_H_
add_library(kaldi-matrix
compressed-matrix.cc
kaldi-matrix.cc
kaldi-vector.cc
matrix-functions.cc
optimization.cc
packed-matrix.cc
qr.cc
sparse-matrix.cc
sp-matrix.cc
srfft.cc
tp-matrix.cc
)
target_link_libraries(kaldi-matrix gfortran kaldi-base libopenblas.a)
// matrix/cblas-wrappers.h
// Copyright 2012 Johns Hopkins University (author: Daniel Povey);
// Haihua Xu; Wei Shi
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_MATRIX_CBLAS_WRAPPERS_H_
#define KALDI_MATRIX_CBLAS_WRAPPERS_H_ 1
#include <limits>
#include "matrix/sp-matrix.h"
#include "matrix/kaldi-vector.h"
#include "matrix/kaldi-matrix.h"
#include "matrix/matrix-functions.h"
#include "matrix/kaldi-blas.h"
// Do not include this file directly. It is to be included
// by .cc files in this directory.
namespace kaldi {
inline void cblas_Xcopy(const int N, const float *X, const int incX, float *Y,
const int incY) {
cblas_scopy(N, X, incX, Y, incY);
}
inline void cblas_Xcopy(const int N, const double *X, const int incX, double *Y,
const int incY) {
cblas_dcopy(N, X, incX, Y, incY);
}
inline float cblas_Xasum(const int N, const float *X, const int incX) {
return cblas_sasum(N, X, incX);
}
inline double cblas_Xasum(const int N, const double *X, const int incX) {
return cblas_dasum(N, X, incX);
}
inline void cblas_Xrot(const int N, float *X, const int incX, float *Y,
const int incY, const float c, const float s) {
cblas_srot(N, X, incX, Y, incY, c, s);
}
inline void cblas_Xrot(const int N, double *X, const int incX, double *Y,
const int incY, const double c, const double s) {
cblas_drot(N, X, incX, Y, incY, c, s);
}
inline float cblas_Xdot(const int N, const float *const X,
const int incX, const float *const Y,
const int incY) {
return cblas_sdot(N, X, incX, Y, incY);
}
inline double cblas_Xdot(const int N, const double *const X,
const int incX, const double *const Y,
const int incY) {
return cblas_ddot(N, X, incX, Y, incY);
}
inline void cblas_Xaxpy(const int N, const float alpha, const float *X,
const int incX, float *Y, const int incY) {
cblas_saxpy(N, alpha, X, incX, Y, incY);
}
inline void cblas_Xaxpy(const int N, const double alpha, const double *X,
const int incX, double *Y, const int incY) {
cblas_daxpy(N, alpha, X, incX, Y, incY);
}
inline void cblas_Xscal(const int N, const float alpha, float *data,
const int inc) {
cblas_sscal(N, alpha, data, inc);
}
inline void cblas_Xscal(const int N, const double alpha, double *data,
const int inc) {
cblas_dscal(N, alpha, data, inc);
}
inline void cblas_Xspmv(const float alpha, const int num_rows, const float *Mdata,
const float *v, const int v_inc,
const float beta, float *y, const int y_inc) {
cblas_sspmv(CblasRowMajor, CblasLower, num_rows, alpha, Mdata, v, v_inc, beta, y, y_inc);
}
inline void cblas_Xspmv(const double alpha, const int num_rows, const double *Mdata,
const double *v, const int v_inc,
const double beta, double *y, const int y_inc) {
cblas_dspmv(CblasRowMajor, CblasLower, num_rows, alpha, Mdata, v, v_inc, beta, y, y_inc);
}
inline void cblas_Xtpmv(MatrixTransposeType trans, const float *Mdata,
const int num_rows, float *y, const int y_inc) {
cblas_stpmv(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
CblasNonUnit, num_rows, Mdata, y, y_inc);
}
inline void cblas_Xtpmv(MatrixTransposeType trans, const double *Mdata,
const int num_rows, double *y, const int y_inc) {
cblas_dtpmv(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
CblasNonUnit, num_rows, Mdata, y, y_inc);
}
inline void cblas_Xtpsv(MatrixTransposeType trans, const float *Mdata,
const int num_rows, float *y, const int y_inc) {
cblas_stpsv(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
CblasNonUnit, num_rows, Mdata, y, y_inc);
}
inline void cblas_Xtpsv(MatrixTransposeType trans, const double *Mdata,
const int num_rows, double *y, const int y_inc) {
cblas_dtpsv(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
CblasNonUnit, num_rows, Mdata, y, y_inc);
}
// x = alpha * M * y + beta * x
inline void cblas_Xspmv(MatrixIndexT dim, float alpha, const float *Mdata,
const float *ydata, MatrixIndexT ystride,
float beta, float *xdata, MatrixIndexT xstride) {
cblas_sspmv(CblasRowMajor, CblasLower, dim, alpha, Mdata,
ydata, ystride, beta, xdata, xstride);
}
inline void cblas_Xspmv(MatrixIndexT dim, double alpha, const double *Mdata,
const double *ydata, MatrixIndexT ystride,
double beta, double *xdata, MatrixIndexT xstride) {
cblas_dspmv(CblasRowMajor, CblasLower, dim, alpha, Mdata,
ydata, ystride, beta, xdata, xstride);
}
// Implements A += alpha * (x y' + y x'); A is symmetric matrix.
inline void cblas_Xspr2(MatrixIndexT dim, float alpha, const float *Xdata,
MatrixIndexT incX, const float *Ydata, MatrixIndexT incY,
float *Adata) {
cblas_sspr2(CblasRowMajor, CblasLower, dim, alpha, Xdata,
incX, Ydata, incY, Adata);
}
inline void cblas_Xspr2(MatrixIndexT dim, double alpha, const double *Xdata,
MatrixIndexT incX, const double *Ydata, MatrixIndexT incY,
double *Adata) {
cblas_dspr2(CblasRowMajor, CblasLower, dim, alpha, Xdata,
incX, Ydata, incY, Adata);
}
// Implements A += alpha * (x x'); A is symmetric matrix.
inline void cblas_Xspr(MatrixIndexT dim, float alpha, const float *Xdata,
MatrixIndexT incX, float *Adata) {
cblas_sspr(CblasRowMajor, CblasLower, dim, alpha, Xdata, incX, Adata);
}
inline void cblas_Xspr(MatrixIndexT dim, double alpha, const double *Xdata,
MatrixIndexT incX, double *Adata) {
cblas_dspr(CblasRowMajor, CblasLower, dim, alpha, Xdata, incX, Adata);
}
// sgemv,dgemv: y = alpha M x + beta y.
inline void cblas_Xgemv(MatrixTransposeType trans, MatrixIndexT num_rows,
MatrixIndexT num_cols, float alpha, const float *Mdata,
MatrixIndexT stride, const float *xdata,
MatrixIndexT incX, float beta, float *ydata, MatrixIndexT incY) {
cblas_sgemv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows,
num_cols, alpha, Mdata, stride, xdata, incX, beta, ydata, incY);
}
inline void cblas_Xgemv(MatrixTransposeType trans, MatrixIndexT num_rows,
MatrixIndexT num_cols, double alpha, const double *Mdata,
MatrixIndexT stride, const double *xdata,
MatrixIndexT incX, double beta, double *ydata, MatrixIndexT incY) {
cblas_dgemv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows,
num_cols, alpha, Mdata, stride, xdata, incX, beta, ydata, incY);
}
// sgbmv, dgmmv: y = alpha M x + + beta * y.
inline void cblas_Xgbmv(MatrixTransposeType trans, MatrixIndexT num_rows,
MatrixIndexT num_cols, MatrixIndexT num_below,
MatrixIndexT num_above, float alpha, const float *Mdata,
MatrixIndexT stride, const float *xdata,
MatrixIndexT incX, float beta, float *ydata, MatrixIndexT incY) {
cblas_sgbmv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows,
num_cols, num_below, num_above, alpha, Mdata, stride, xdata,
incX, beta, ydata, incY);
}
inline void cblas_Xgbmv(MatrixTransposeType trans, MatrixIndexT num_rows,
MatrixIndexT num_cols, MatrixIndexT num_below,
MatrixIndexT num_above, double alpha, const double *Mdata,
MatrixIndexT stride, const double *xdata,
MatrixIndexT incX, double beta, double *ydata, MatrixIndexT incY) {
cblas_dgbmv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows,
num_cols, num_below, num_above, alpha, Mdata, stride, xdata,
incX, beta, ydata, incY);
}
template<typename Real>
inline void Xgemv_sparsevec(MatrixTransposeType trans, MatrixIndexT num_rows,
MatrixIndexT num_cols, Real alpha, const Real *Mdata,
MatrixIndexT stride, const Real *xdata,
MatrixIndexT incX, Real beta, Real *ydata,
MatrixIndexT incY) {
if (trans == kNoTrans) {
if (beta != 1.0) cblas_Xscal(num_rows, beta, ydata, incY);
for (MatrixIndexT i = 0; i < num_cols; i++) {
Real x_i = xdata[i * incX];
if (x_i == 0.0) continue;
// Add to ydata, the i'th column of M, times alpha * x_i
cblas_Xaxpy(num_rows, x_i * alpha, Mdata + i, stride, ydata, incY);
}
} else {
if (beta != 1.0) cblas_Xscal(num_cols, beta, ydata, incY);
for (MatrixIndexT i = 0; i < num_rows; i++) {
Real x_i = xdata[i * incX];
if (x_i == 0.0) continue;
// Add to ydata, the i'th row of M, times alpha * x_i
cblas_Xaxpy(num_cols, x_i * alpha,
Mdata + (i * stride), 1, ydata, incY);
}
}
}
inline void cblas_Xgemm(const float alpha,
MatrixTransposeType transA,
const float *Adata,
MatrixIndexT a_num_rows, MatrixIndexT a_num_cols, MatrixIndexT a_stride,
MatrixTransposeType transB,
const float *Bdata, MatrixIndexT b_stride,
const float beta,
float *Mdata,
MatrixIndexT num_rows, MatrixIndexT num_cols,MatrixIndexT stride) {
cblas_sgemm(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(transA),
static_cast<CBLAS_TRANSPOSE>(transB),
num_rows, num_cols, transA == kNoTrans ? a_num_cols : a_num_rows,
alpha, Adata, a_stride, Bdata, b_stride,
beta, Mdata, stride);
}
inline void cblas_Xgemm(const double alpha,
MatrixTransposeType transA,
const double *Adata,
MatrixIndexT a_num_rows, MatrixIndexT a_num_cols, MatrixIndexT a_stride,
MatrixTransposeType transB,
const double *Bdata, MatrixIndexT b_stride,
const double beta,
double *Mdata,
MatrixIndexT num_rows, MatrixIndexT num_cols,MatrixIndexT stride) {
cblas_dgemm(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(transA),
static_cast<CBLAS_TRANSPOSE>(transB),
num_rows, num_cols, transA == kNoTrans ? a_num_cols : a_num_rows,
alpha, Adata, a_stride, Bdata, b_stride,
beta, Mdata, stride);
}
inline void cblas_Xsymm(const float alpha,
MatrixIndexT sz,
const float *Adata,MatrixIndexT a_stride,
const float *Bdata,MatrixIndexT b_stride,
const float beta,
float *Mdata, MatrixIndexT stride) {
cblas_ssymm(CblasRowMajor, CblasLeft, CblasLower, sz, sz, alpha, Adata,
a_stride, Bdata, b_stride, beta, Mdata, stride);
}
inline void cblas_Xsymm(const double alpha,
MatrixIndexT sz,
const double *Adata,MatrixIndexT a_stride,
const double *Bdata,MatrixIndexT b_stride,
const double beta,
double *Mdata, MatrixIndexT stride) {
cblas_dsymm(CblasRowMajor, CblasLeft, CblasLower, sz, sz, alpha, Adata,
a_stride, Bdata, b_stride, beta, Mdata, stride);
}
// ger: M += alpha x y^T.
inline void cblas_Xger(MatrixIndexT num_rows, MatrixIndexT num_cols, float alpha,
const float *xdata, MatrixIndexT incX, const float *ydata,
MatrixIndexT incY, float *Mdata, MatrixIndexT stride) {
cblas_sger(CblasRowMajor, num_rows, num_cols, alpha, xdata, 1, ydata, 1,
Mdata, stride);
}
inline void cblas_Xger(MatrixIndexT num_rows, MatrixIndexT num_cols, double alpha,
const double *xdata, MatrixIndexT incX, const double *ydata,
MatrixIndexT incY, double *Mdata, MatrixIndexT stride) {
cblas_dger(CblasRowMajor, num_rows, num_cols, alpha, xdata, 1, ydata, 1,
Mdata, stride);
}
// syrk: symmetric rank-k update.
// if trans==kNoTrans, then C = alpha A A^T + beta C
// else C = alpha A^T A + beta C.
// note: dim_c is dim(C), other_dim_a is the "other" dimension of A, i.e.
// num-cols(A) if kNoTrans, or num-rows(A) if kTrans.
// We only need the row-major and lower-triangular option of this, and this
// is hard-coded.
inline void cblas_Xsyrk (
const MatrixTransposeType trans, const MatrixIndexT dim_c,
const MatrixIndexT other_dim_a, const float alpha, const float *A,
const MatrixIndexT a_stride, const float beta, float *C,
const MatrixIndexT c_stride) {
cblas_ssyrk(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
dim_c, other_dim_a, alpha, A, a_stride, beta, C, c_stride);
}
inline void cblas_Xsyrk(
const MatrixTransposeType trans, const MatrixIndexT dim_c,
const MatrixIndexT other_dim_a, const double alpha, const double *A,
const MatrixIndexT a_stride, const double beta, double *C,
const MatrixIndexT c_stride) {
cblas_dsyrk(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
dim_c, other_dim_a, alpha, A, a_stride, beta, C, c_stride);
}
/// matrix-vector multiply using a banded matrix; we always call this
/// with b = 1 meaning we're multiplying by a diagonal matrix. This is used for
/// elementwise multiplication. We miss some of the arguments out of this
/// wrapper.
inline void cblas_Xsbmv1(
const MatrixIndexT dim,
const double *A,
const double alpha,
const double *x,
const double beta,
double *y) {
cblas_dsbmv(CblasRowMajor, CblasLower, dim, 0, alpha, A,
1, x, 1, beta, y, 1);
}
inline void cblas_Xsbmv1(
const MatrixIndexT dim,
const float *A,
const float alpha,
const float *x,
const float beta,
float *y) {
cblas_ssbmv(CblasRowMajor, CblasLower, dim, 0, alpha, A,
1, x, 1, beta, y, 1);
}
/// This is not really a wrapper for CBLAS as CBLAS does not have this; in future we could
/// extend this somehow.
inline void mul_elements(
const MatrixIndexT dim,
const double *a,
double *b) { // does b *= a, elementwise.
double c1, c2, c3, c4;
MatrixIndexT i;
for (i = 0; i + 4 <= dim; i += 4) {
c1 = a[i] * b[i];
c2 = a[i+1] * b[i+1];
c3 = a[i+2] * b[i+2];
c4 = a[i+3] * b[i+3];
b[i] = c1;
b[i+1] = c2;
b[i+2] = c3;
b[i+3] = c4;
}
for (; i < dim; i++)
b[i] *= a[i];
}
inline void mul_elements(
const MatrixIndexT dim,
const float *a,
float *b) { // does b *= a, elementwise.
float c1, c2, c3, c4;
MatrixIndexT i;
for (i = 0; i + 4 <= dim; i += 4) {
c1 = a[i] * b[i];
c2 = a[i+1] * b[i+1];
c3 = a[i+2] * b[i+2];
c4 = a[i+3] * b[i+3];
b[i] = c1;
b[i+1] = c2;
b[i+2] = c3;
b[i+3] = c4;
}
for (; i < dim; i++)
b[i] *= a[i];
}
// add clapack here
#if !defined(HAVE_ATLAS)
inline void clapack_Xtptri(KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *result) {
stptri_(const_cast<char *>("U"), const_cast<char *>("N"), num_rows, Mdata, result);
}
inline void clapack_Xtptri(KaldiBlasInt *num_rows, double *Mdata, KaldiBlasInt *result) {
dtptri_(const_cast<char *>("U"), const_cast<char *>("N"), num_rows, Mdata, result);
}
//
inline void clapack_Xgetrf2(KaldiBlasInt *num_rows, KaldiBlasInt *num_cols,
float *Mdata, KaldiBlasInt *stride, KaldiBlasInt *pivot,
KaldiBlasInt *result) {
sgetrf_(num_rows, num_cols, Mdata, stride, pivot, result);
}
inline void clapack_Xgetrf2(KaldiBlasInt *num_rows, KaldiBlasInt *num_cols,
double *Mdata, KaldiBlasInt *stride, KaldiBlasInt *pivot,
KaldiBlasInt *result) {
dgetrf_(num_rows, num_cols, Mdata, stride, pivot, result);
}
//
inline void clapack_Xgetri2(KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *stride,
KaldiBlasInt *pivot, float *p_work,
KaldiBlasInt *l_work, KaldiBlasInt *result) {
sgetri_(num_rows, Mdata, stride, pivot, p_work, l_work, result);
}
inline void clapack_Xgetri2(KaldiBlasInt *num_rows, double *Mdata, KaldiBlasInt *stride,
KaldiBlasInt *pivot, double *p_work,
KaldiBlasInt *l_work, KaldiBlasInt *result) {
dgetri_(num_rows, Mdata, stride, pivot, p_work, l_work, result);
}
//
inline void clapack_Xgesvd(char *v, char *u, KaldiBlasInt *num_cols,
KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *stride,
float *sv, float *Vdata, KaldiBlasInt *vstride,
float *Udata, KaldiBlasInt *ustride, float *p_work,
KaldiBlasInt *l_work, KaldiBlasInt *result) {
sgesvd_(v, u,
num_cols, num_rows, Mdata, stride,
sv, Vdata, vstride, Udata, ustride,
p_work, l_work, result);
}
inline void clapack_Xgesvd(char *v, char *u, KaldiBlasInt *num_cols,
KaldiBlasInt *num_rows, double *Mdata, KaldiBlasInt *stride,
double *sv, double *Vdata, KaldiBlasInt *vstride,
double *Udata, KaldiBlasInt *ustride, double *p_work,
KaldiBlasInt *l_work, KaldiBlasInt *result) {
dgesvd_(v, u,
num_cols, num_rows, Mdata, stride,
sv, Vdata, vstride, Udata, ustride,
p_work, l_work, result);
}
//
void inline clapack_Xsptri(KaldiBlasInt *num_rows, float *Mdata,
KaldiBlasInt *ipiv, float *work, KaldiBlasInt *result) {
ssptri_(const_cast<char *>("U"), num_rows, Mdata, ipiv, work, result);
}
void inline clapack_Xsptri(KaldiBlasInt *num_rows, double *Mdata,
KaldiBlasInt *ipiv, double *work, KaldiBlasInt *result) {
dsptri_(const_cast<char *>("U"), num_rows, Mdata, ipiv, work, result);
}
//
void inline clapack_Xsptrf(KaldiBlasInt *num_rows, float *Mdata,
KaldiBlasInt *ipiv, KaldiBlasInt *result) {
ssptrf_(const_cast<char *>("U"), num_rows, Mdata, ipiv, result);
}
void inline clapack_Xsptrf(KaldiBlasInt *num_rows, double *Mdata,
KaldiBlasInt *ipiv, KaldiBlasInt *result) {
dsptrf_(const_cast<char *>("U"), num_rows, Mdata, ipiv, result);
}
#else
inline void clapack_Xgetrf(MatrixIndexT num_rows, MatrixIndexT num_cols,
float *Mdata, MatrixIndexT stride,
int *pivot, int *result) {
*result = clapack_sgetrf(CblasColMajor, num_rows, num_cols,
Mdata, stride, pivot);
}
inline void clapack_Xgetrf(MatrixIndexT num_rows, MatrixIndexT num_cols,
double *Mdata, MatrixIndexT stride,
int *pivot, int *result) {
*result = clapack_dgetrf(CblasColMajor, num_rows, num_cols,
Mdata, stride, pivot);
}
//
inline int clapack_Xtrtri(int num_rows, float *Mdata, MatrixIndexT stride) {
return clapack_strtri(CblasColMajor, CblasUpper, CblasNonUnit, num_rows,
Mdata, stride);
}
inline int clapack_Xtrtri(int num_rows, double *Mdata, MatrixIndexT stride) {
return clapack_dtrtri(CblasColMajor, CblasUpper, CblasNonUnit, num_rows,
Mdata, stride);
}
//
inline void clapack_Xgetri(MatrixIndexT num_rows, float *Mdata, MatrixIndexT stride,
int *pivot, int *result) {
*result = clapack_sgetri(CblasColMajor, num_rows, Mdata, stride, pivot);
}
inline void clapack_Xgetri(MatrixIndexT num_rows, double *Mdata, MatrixIndexT stride,
int *pivot, int *result) {
*result = clapack_dgetri(CblasColMajor, num_rows, Mdata, stride, pivot);
}
#endif
}
// namespace kaldi
#endif
// matrix/compressed-matrix.cc
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
// Frantisek Skala, Wei Shi
// 2015 Tom Ko
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "matrix/compressed-matrix.h"
#include <algorithm>
namespace kaldi {
//static
MatrixIndexT CompressedMatrix::DataSize(const GlobalHeader &header) {
// Returns size in bytes of the data.
DataFormat format = static_cast<DataFormat>(header.format);
if (format == kOneByteWithColHeaders) {
return sizeof(GlobalHeader) +
header.num_cols * (sizeof(PerColHeader) + header.num_rows);
} else if (format == kTwoByte) {
return sizeof(GlobalHeader) +
2 * header.num_rows * header.num_cols;
} else {
KALDI_ASSERT(format == kOneByte);
return sizeof(GlobalHeader) +
header.num_rows * header.num_cols;
}
}
// scale all element of matrix by scaling floats
// in GlobalHeader with alpha.
void CompressedMatrix::Scale(float alpha) {
if (data_ != NULL) {
GlobalHeader *h = reinterpret_cast<GlobalHeader*>(data_);
// scale the floating point values in each PerColHolder
// and leave all integers the same.
h->min_value *= alpha;
h->range *= alpha;
}
}
template<typename Real> // static inline
void CompressedMatrix::ComputeGlobalHeader(
const MatrixBase<Real> &mat, CompressionMethod method,
GlobalHeader *header) {
if (method == kAutomaticMethod) {
if (mat.NumRows() > 8) method = kSpeechFeature;
else method = kTwoByteAuto;
}
switch (method) {
case kSpeechFeature:
header->format = static_cast<int32>(kOneByteWithColHeaders); // 1.
break;
case kTwoByteAuto: case kTwoByteSignedInteger:
header->format = static_cast<int32>(kTwoByte); // 2.
break;
case kOneByteAuto: case kOneByteUnsignedInteger: case kOneByteZeroOne:
header->format = static_cast<int32>(kOneByte); // 3.
break;
default:
KALDI_ERR << "Invalid compression type: "
<< static_cast<int32>(method);
}
header->num_rows = mat.NumRows();
header->num_cols = mat.NumCols();
// Now compute 'min_value' and 'range'.
switch (method) {
case kSpeechFeature: case kTwoByteAuto: case kOneByteAuto: {
float min_value = mat.Min(), max_value = mat.Max();
// ensure that max_value is strictly greater than min_value, even if matrix is
// constant; this avoids crashes in ComputeColHeader when compressing speech
// featupres.
if (max_value == min_value)
max_value = min_value + (1.0 + fabs(min_value));
KALDI_ASSERT(min_value - min_value == 0 &&
max_value - max_value == 0 &&
"Cannot compress a matrix with Nan's or Inf's");
header->min_value = min_value;
header->range = max_value - min_value;
// we previously checked that max_value != min_value, so their
// difference should be nonzero.
KALDI_ASSERT(header->range > 0.0);
break;
}
case kTwoByteSignedInteger: {
header->min_value = -32768.0;
header->range = 65535.0;
break;
}
case kOneByteUnsignedInteger: {
header->min_value = 0.0;
header->range = 255.0;
break;
}
case kOneByteZeroOne: {
header->min_value = 0.0;
header->range = 1.0;
break;
}
default:
KALDI_ERR << "Unknown compression method = "
<< static_cast<int32>(method);
}
KALDI_COMPILE_TIME_ASSERT(sizeof(*header) == 20); // otherwise
// something weird is happening and our code probably won't work or
// won't be robust across platforms.
}
template<typename Real>
void CompressedMatrix::CopyFromMat(
const MatrixBase<Real> &mat, CompressionMethod method) {
if (data_ != NULL) {
delete [] static_cast<float*>(data_); // call delete [] because was allocated with new float[]
data_ = NULL;
}
if (mat.NumRows() == 0) { return; } // Zero-size matrix stored as zero pointer.
GlobalHeader global_header;
ComputeGlobalHeader(mat, method, &global_header);
int32 data_size = DataSize(global_header);
data_ = AllocateData(data_size);
*(reinterpret_cast<GlobalHeader*>(data_)) = global_header;
DataFormat format = static_cast<DataFormat>(global_header.format);
if (format == kOneByteWithColHeaders) {
PerColHeader *header_data =
reinterpret_cast<PerColHeader*>(static_cast<char*>(data_) +
sizeof(GlobalHeader));
uint8 *byte_data =
reinterpret_cast<uint8*>(header_data + global_header.num_cols);
const Real *matrix_data = mat.Data();
for (int32 col = 0; col < global_header.num_cols; col++) {
CompressColumn(global_header,
matrix_data + col, mat.Stride(),
global_header.num_rows,
header_data, byte_data);
header_data++;
byte_data += global_header.num_rows;
}
} else if (format == kTwoByte) {
uint16 *data = reinterpret_cast<uint16*>(static_cast<char*>(data_) +
sizeof(GlobalHeader));
int32 num_rows = mat.NumRows(), num_cols = mat.NumCols();
for (int32 r = 0; r < num_rows; r++) {
const Real *row_data = mat.RowData(r);
for (int32 c = 0; c < num_cols; c++)
data[c] = FloatToUint16(global_header, row_data[c]);
data += num_cols;
}
} else {
KALDI_ASSERT(format == kOneByte);
uint8 *data = reinterpret_cast<uint8*>(static_cast<char*>(data_) +
sizeof(GlobalHeader));
int32 num_rows = mat.NumRows(), num_cols = mat.NumCols();
for (int32 r = 0; r < num_rows; r++) {
const Real *row_data = mat.RowData(r);
for (int32 c = 0; c < num_cols; c++)
data[c] = FloatToUint8(global_header, row_data[c]);
data += num_cols;
}
}
}
// Instantiate the template for float and double.
template
void CompressedMatrix::CopyFromMat(const MatrixBase<float> &mat,
CompressionMethod method);
template
void CompressedMatrix::CopyFromMat(const MatrixBase<double> &mat,
CompressionMethod method);
CompressedMatrix::CompressedMatrix(
const CompressedMatrix &cmat,
const MatrixIndexT row_offset,
const MatrixIndexT num_rows,
const MatrixIndexT col_offset,
const MatrixIndexT num_cols,
bool allow_padding): data_(NULL) {
int32 old_num_rows = cmat.NumRows(), old_num_cols = cmat.NumCols();
if (old_num_rows == 0) {
KALDI_ASSERT(num_rows == 0 && num_cols == 0);
// The empty matrix is stored as a zero pointer.
return;
}
KALDI_ASSERT(row_offset < old_num_rows);
KALDI_ASSERT(col_offset < old_num_cols);
KALDI_ASSERT(row_offset >= 0 || allow_padding);
KALDI_ASSERT(col_offset >= 0);
KALDI_ASSERT(row_offset + num_rows <= old_num_rows || allow_padding);
KALDI_ASSERT(col_offset + num_cols <= old_num_cols);
if (num_rows == 0 || num_cols == 0) { return; }
bool padding_is_used = (row_offset < 0 ||
row_offset + num_rows > old_num_rows);
GlobalHeader new_global_header;
KALDI_COMPILE_TIME_ASSERT(sizeof(new_global_header) == 20);
GlobalHeader *old_global_header = reinterpret_cast<GlobalHeader*>(cmat.Data());
new_global_header = *old_global_header;
new_global_header.num_cols = num_cols;
new_global_header.num_rows = num_rows;
// We don't switch format from 1 -> 2 (in case of size reduction) yet; if this
// is needed, we will do this below by creating a temporary Matrix.
new_global_header.format = old_global_header->format;
data_ = AllocateData(DataSize(new_global_header)); // allocate memory
*(reinterpret_cast<GlobalHeader*>(data_)) = new_global_header;
DataFormat format = static_cast<DataFormat>(old_global_header->format);
if (format == kOneByteWithColHeaders) {
PerColHeader *old_per_col_header =
reinterpret_cast<PerColHeader*>(old_global_header + 1);
uint8 *old_byte_data =
reinterpret_cast<uint8*>(old_per_col_header +
old_global_header->num_cols);
PerColHeader *new_per_col_header =
reinterpret_cast<PerColHeader*>(
reinterpret_cast<GlobalHeader*>(data_) + 1);
memcpy(new_per_col_header, old_per_col_header + col_offset,
sizeof(PerColHeader) * num_cols);
uint8 *new_byte_data =
reinterpret_cast<uint8*>(new_per_col_header + num_cols);
if (!padding_is_used) {
uint8 *old_start_of_subcol =
old_byte_data + row_offset + (col_offset * old_num_rows),
*new_start_of_col = new_byte_data;
for (int32 i = 0; i < num_cols; i++) {
memcpy(new_start_of_col, old_start_of_subcol, num_rows);
new_start_of_col += num_rows;
old_start_of_subcol += old_num_rows;
}
} else {
uint8 *old_start_of_col =
old_byte_data + (col_offset * old_num_rows),
*new_start_of_col = new_byte_data;
for (int32 i = 0; i < num_cols; i++) {
for (int32 j = 0; j < num_rows; j++) {
int32 old_j = j + row_offset;
if (old_j < 0) old_j = 0;
else if (old_j >= old_num_rows) old_j = old_num_rows - 1;
new_start_of_col[j] = old_start_of_col[old_j];
}
new_start_of_col += num_rows;
old_start_of_col += old_num_rows;
}
}
} else if (format == kTwoByte) {
const uint16 *old_data =
reinterpret_cast<const uint16*>(old_global_header + 1);
uint16 *new_row_data =
reinterpret_cast<uint16*>(reinterpret_cast<GlobalHeader*>(data_) + 1);
for (int32 row = 0; row < num_rows; row++) {
int32 old_row = row + row_offset;
// The next two lines are only relevant if padding_is_used.
if (old_row < 0) old_row = 0;
else if (old_row >= old_num_rows) old_row = old_num_rows - 1;
const uint16 *old_row_data =
old_data + col_offset + (old_num_cols * old_row);
memcpy(new_row_data, old_row_data, sizeof(uint16) * num_cols);
new_row_data += num_cols;
}
} else {
KALDI_ASSERT(format == kOneByte);
const uint8 *old_data =
reinterpret_cast<const uint8*>(old_global_header + 1);
uint8 *new_row_data =
reinterpret_cast<uint8*>(reinterpret_cast<GlobalHeader*>(data_) + 1);
for (int32 row = 0; row < num_rows; row++) {
int32 old_row = row + row_offset;
// The next two lines are only relevant if padding_is_used.
if (old_row < 0) old_row = 0;
else if (old_row >= old_num_rows) old_row = old_num_rows - 1;
const uint8 *old_row_data =
old_data + col_offset + (old_num_cols * old_row);
memcpy(new_row_data, old_row_data, sizeof(uint8) * num_cols);
new_row_data += num_cols;
}
}
if (num_rows < 8 && format == kOneByteWithColHeaders) {
// format was 1 but we want it to be 2 -> create a temporary
// Matrix (uncompress), re-compress, and swap.
// This gives us almost exact reconstruction while saving
// memory (the elements take more space but there will be
// no per-column headers).
Matrix<float> temp(this->NumRows(), this->NumCols(),
kUndefined);
this->CopyToMat(&temp);
CompressedMatrix temp_cmat(temp, kTwoByteAuto);
this->Swap(&temp_cmat);
}
}
template<typename Real>
CompressedMatrix &CompressedMatrix::operator =(const MatrixBase<Real> &mat) {
this->CopyFromMat(mat);
return *this;
}
// Instantiate the template for float and double.
template
CompressedMatrix& CompressedMatrix::operator =(const MatrixBase<float> &mat);
template
CompressedMatrix& CompressedMatrix::operator =(const MatrixBase<double> &mat);
inline uint16 CompressedMatrix::FloatToUint16(
const GlobalHeader &global_header,
float value) {
float f = (value - global_header.min_value) /
global_header.range;
if (f > 1.0) f = 1.0; // Note: this should not happen.
if (f < 0.0) f = 0.0; // Note: this should not happen.
return static_cast<int>(f * 65535 + 0.499); // + 0.499 is to
// round to closest int; avoids bias.
}
inline uint8 CompressedMatrix::FloatToUint8(
const GlobalHeader &global_header,
float value) {
float f = (value - global_header.min_value) /
global_header.range;
if (f > 1.0) f = 1.0; // Note: this should not happen.
if (f < 0.0) f = 0.0; // Note: this should not happen.
return static_cast<int>(f * 255 + 0.499); // + 0.499 is to
// round to closest int; avoids bias.
}
inline float CompressedMatrix::Uint16ToFloat(
const GlobalHeader &global_header,
uint16 value) {
// the constant 1.52590218966964e-05 is 1/65535.
return global_header.min_value
+ global_header.range * 1.52590218966964e-05F * value;
}
template<typename Real> // static
void CompressedMatrix::ComputeColHeader(
const GlobalHeader &global_header,
const Real *data, MatrixIndexT stride,
int32 num_rows, CompressedMatrix::PerColHeader *header) {
KALDI_ASSERT(num_rows > 0);
std::vector<Real> sdata(num_rows); // the sorted data.
for (size_t i = 0, size = sdata.size(); i < size; i++)
sdata[i] = data[i*stride];
if (num_rows >= 5) {
int quarter_nr = num_rows/4;
// std::sort(sdata.begin(), sdata.end());
// The elements at positions 0, quarter_nr,
// 3*quarter_nr, and num_rows-1 need to be in sorted order.
std::nth_element(sdata.begin(), sdata.begin() + quarter_nr, sdata.end());
// Now, sdata.begin() + quarter_nr contains the element that would appear
// in sorted order, in that position.
std::nth_element(sdata.begin(), sdata.begin(), sdata.begin() + quarter_nr);
// Now, sdata.begin() and sdata.begin() + quarter_nr contain the elements
// that would appear at those positions in sorted order.
std::nth_element(sdata.begin() + quarter_nr + 1,
sdata.begin() + (3*quarter_nr), sdata.end());
// Now, sdata.begin(), sdata.begin() + quarter_nr, and sdata.begin() +
// 3*quarter_nr, contain the elements that would appear at those positions
// in sorted order.
std::nth_element(sdata.begin() + (3*quarter_nr) + 1, sdata.end() - 1,
sdata.end());
// Now, sdata.begin(), sdata.begin() + quarter_nr, and sdata.begin() +
// 3*quarter_nr, and sdata.end() - 1, contain the elements that would appear
// at those positions in sorted order.
header->percentile_0 =
std::min<uint16>(FloatToUint16(global_header, sdata[0]), 65532);
header->percentile_25 =
std::min<uint16>(
std::max<uint16>(
FloatToUint16(global_header, sdata[quarter_nr]),
header->percentile_0 + static_cast<uint16>(1)), 65533);
header->percentile_75 =
std::min<uint16>(
std::max<uint16>(
FloatToUint16(global_header, sdata[3*quarter_nr]),
header->percentile_25 + static_cast<uint16>(1)), 65534);
header->percentile_100 = std::max<uint16>(
FloatToUint16(global_header, sdata[num_rows-1]),
header->percentile_75 + static_cast<uint16>(1));
} else { // handle this pathological case.
std::sort(sdata.begin(), sdata.end());
// Note: we know num_rows is at least 1.
header->percentile_0 =
std::min<uint16>(FloatToUint16(global_header, sdata[0]),
65532);
if (num_rows > 1)
header->percentile_25 =
std::min<uint16>(
std::max<uint16>(FloatToUint16(global_header, sdata[1]),
header->percentile_0 + 1), 65533);
else
header->percentile_25 = header->percentile_0 + 1;
if (num_rows > 2)
header->percentile_75 =
std::min<uint16>(
std::max<uint16>(FloatToUint16(global_header, sdata[2]),
header->percentile_25 + 1), 65534);
else
header->percentile_75 = header->percentile_25 + 1;
if (num_rows > 3)
header->percentile_100 =
std::max<uint16>(FloatToUint16(global_header, sdata[3]),
header->percentile_75 + 1);
else
header->percentile_100 = header->percentile_75 + 1;
}
}
// static
inline uint8 CompressedMatrix::FloatToChar(
float p0, float p25, float p75, float p100,
float value) {
int ans;
if (value < p25) { // range [ p0, p25 ) covered by
// characters 0 .. 64. We round to the closest int.
float f = (value - p0) / (p25 - p0);
ans = static_cast<int>(f * 64 + 0.5);
// Note: the checks on the next two lines
// are necessary in pathological cases when all the elements in a row
// are the same and the percentile_* values are separated by one.
if (ans < 0) ans = 0;
if (ans > 64) ans = 64;
} else if (value < p75) { // range [ p25, p75 )covered
// by characters 64 .. 192. We round to the closest int.
float f = (value - p25) / (p75 - p25);
ans = 64 + static_cast<int>(f * 128 + 0.5);
if (ans < 64) ans = 64;
if (ans > 192) ans = 192;
} else { // range [ p75, p100 ] covered by
// characters 192 .. 255. Note: this last range
// has fewer characters than the left range, because
// we go up to 255, not 256.
float f = (value - p75) / (p100 - p75);
ans = 192 + static_cast<int>(f * 63 + 0.5);
if (ans < 192) ans = 192;
if (ans > 255) ans = 255;
}
return static_cast<uint8>(ans);
}
// static
inline float CompressedMatrix::CharToFloat(
float p0, float p25, float p75, float p100,
uint8 value) {
if (value <= 64) {
return p0 + (p25 - p0) * value * (1/64.0);
} else if (value <= 192) {
return p25 + (p75 - p25) * (value - 64) * (1/128.0);
} else {
return p75 + (p100 - p75) * (value - 192) * (1/63.0);
}
}
template<typename Real> // static
void CompressedMatrix::CompressColumn(
const GlobalHeader &global_header,
const Real *data, MatrixIndexT stride,
int32 num_rows, CompressedMatrix::PerColHeader *header,
uint8 *byte_data) {
ComputeColHeader(global_header, data, stride,
num_rows, header);
float p0 = Uint16ToFloat(global_header, header->percentile_0),
p25 = Uint16ToFloat(global_header, header->percentile_25),
p75 = Uint16ToFloat(global_header, header->percentile_75),
p100 = Uint16ToFloat(global_header, header->percentile_100);
for (int32 i = 0; i < num_rows; i++) {
Real this_data = data[i * stride];
byte_data[i] = FloatToChar(p0, p25, p75, p100, this_data);
}
}
// static
void* CompressedMatrix::AllocateData(int32 num_bytes) {
KALDI_ASSERT(num_bytes > 0);
KALDI_COMPILE_TIME_ASSERT(sizeof(float) == 4);
// round size up to nearest number of floats.
return reinterpret_cast<void*>(new float[(num_bytes/3) + 4]);
}
void CompressedMatrix::Write(std::ostream &os, bool binary) const {
if (binary) { // Binary-mode write:
if (data_ != NULL) {
GlobalHeader &h = *reinterpret_cast<GlobalHeader*>(data_);
DataFormat format = static_cast<DataFormat>(h.format);
if (format == kOneByteWithColHeaders) {
WriteToken(os, binary, "CM");
} else if (format == kTwoByte) {
WriteToken(os, binary, "CM2");
} else if (format == kOneByte) {
WriteToken(os, binary, "CM3");
}
MatrixIndexT size = DataSize(h); // total size of data in data_
// We don't write out the "int32 format", hence the + 4, - 4.
os.write(reinterpret_cast<const char*>(data_) + 4, size - 4);
} else { // special case: where data_ == NULL, we treat it as an empty
// matrix.
WriteToken(os, binary, "CM");
GlobalHeader h;
h.range = h.min_value = 0.0;
h.num_rows = h.num_cols = 0;
os.write(reinterpret_cast<const char*>(&h), sizeof(h));
}
} else {
// In text mode, just use the same format as a regular matrix.
// This is not compressed.
Matrix<BaseFloat> temp_mat(this->NumRows(), this->NumCols(),
kUndefined);
this->CopyToMat(&temp_mat);
temp_mat.Write(os, binary);
}
if (os.fail())
KALDI_ERR << "Error writing compressed matrix to stream.";
}
void CompressedMatrix::Read(std::istream &is, bool binary) {
if (data_ != NULL) {
delete [] (static_cast<float*>(data_));
data_ = NULL;
}
if (binary) {
int peekval = Peek(is, binary);
if (peekval == 'C') {
std::string tok; // Should be CM (format 1) or CM2 (format 2)
ReadToken(is, binary, &tok);
GlobalHeader h;
if (tok == "CM") { h.format = 1; } // kOneByteWithColHeaders
else if (tok == "CM2") { h.format = 2; } // kTwoByte
else if (tok == "CM3") { h.format = 3; } // kOneByte
else {
KALDI_ERR << "Unexpected token " << tok << ", expecting CM, CM2 or CM3";
}
// don't read the "format" -> hence + 4, - 4.
is.read(reinterpret_cast<char*>(&h) + 4, sizeof(h) - 4);
if (is.fail())
KALDI_ERR << "Failed to read header";
if (h.num_cols == 0) // empty matrix.
return;
int32 size = DataSize(h), remaining_size = size - sizeof(GlobalHeader);
data_ = AllocateData(size);
*(reinterpret_cast<GlobalHeader*>(data_)) = h;
is.read(reinterpret_cast<char*>(data_) + sizeof(GlobalHeader),
remaining_size);
} else {
// Assume that what we're reading is a regular Matrix. This might be the
// case if you changed your code, making a Matrix into a CompressedMatrix,
// and you want back-compatibility for reading.
Matrix<BaseFloat> M;
M.Read(is, binary); // This will crash if it was not a Matrix.
this->CopyFromMat(M);
}
} else { // Text-mode read. In this case you don't get to
// choose the compression type. Anyway this branch would only
// be taken when debugging.
Matrix<BaseFloat> temp;
temp.Read(is, binary);
this->CopyFromMat(temp);
}
if (is.fail())
KALDI_ERR << "Failed to read data.";
}
template<typename Real>
void CompressedMatrix::CopyToMat(MatrixBase<Real> *mat,
MatrixTransposeType trans) const {
if (trans == kTrans) {
Matrix<Real> temp(this->NumCols(), this->NumRows());
CopyToMat(&temp, kNoTrans);
mat->CopyFromMat(temp, kTrans);
return;
}
if (data_ == NULL) {
KALDI_ASSERT(mat->NumRows() == 0);
KALDI_ASSERT(mat->NumCols() == 0);
return;
}
GlobalHeader *h = reinterpret_cast<GlobalHeader*>(data_);
int32 num_cols = h->num_cols, num_rows = h->num_rows;
KALDI_ASSERT(mat->NumRows() == num_rows);
KALDI_ASSERT(mat->NumCols() == num_cols);
DataFormat format = static_cast<DataFormat>(h->format);
if (format == kOneByteWithColHeaders) {
PerColHeader *per_col_header = reinterpret_cast<PerColHeader*>(h+1);
uint8 *byte_data = reinterpret_cast<uint8*>(per_col_header +
h->num_cols);
for (int32 i = 0; i < num_cols; i++, per_col_header++) {
float p0 = Uint16ToFloat(*h, per_col_header->percentile_0),
p25 = Uint16ToFloat(*h, per_col_header->percentile_25),
p75 = Uint16ToFloat(*h, per_col_header->percentile_75),
p100 = Uint16ToFloat(*h, per_col_header->percentile_100);
for (int32 j = 0; j < num_rows; j++, byte_data++) {
float f = CharToFloat(p0, p25, p75, p100, *byte_data);
(*mat)(j, i) = f;
}
}
} else if (format == kTwoByte) {
const uint16 *data = reinterpret_cast<const uint16*>(h + 1);
float min_value = h->min_value,
increment = h->range * (1.0 / 65535.0);
for (int32 i = 0; i < num_rows; i++) {
Real *row_data = mat->RowData(i);
for (int32 j = 0; j < num_cols; j++)
row_data[j] = min_value + data[j] * increment;
data += num_cols;
}
} else {
KALDI_ASSERT(format == kOneByte);
float min_value = h->min_value, increment = h->range * (1.0 / 255.0);
const uint8 *data = reinterpret_cast<const uint8*>(h + 1);
for (int32 i = 0; i < num_rows; i++) {
Real *row_data = mat->RowData(i);
for (int32 j = 0; j < num_cols; j++)
row_data[j] = min_value + data[j] * increment;
data += num_cols;
}
}
}
// Instantiate the template for float and double.
template
void CompressedMatrix::CopyToMat(MatrixBase<float> *mat,
MatrixTransposeType trans) const;
template
void CompressedMatrix::CopyToMat(MatrixBase<double> *mat,
MatrixTransposeType trans) const;
template<typename Real>
void CompressedMatrix::CopyRowToVec(MatrixIndexT row,
VectorBase<Real> *v) const {
KALDI_ASSERT(row < this->NumRows());
KALDI_ASSERT(row >= 0);
KALDI_ASSERT(v->Dim() == this->NumCols());
GlobalHeader *h = reinterpret_cast<GlobalHeader*>(data_);
DataFormat format = static_cast<DataFormat>(h->format);
if (format == kOneByteWithColHeaders) {
PerColHeader *per_col_header = reinterpret_cast<PerColHeader*>(h+1);
uint8 *byte_data = reinterpret_cast<uint8*>(per_col_header +
h->num_cols);
byte_data += row; // point to first value we are interested in
for (int32 i = 0; i < h->num_cols;
i++, per_col_header++, byte_data += h->num_rows) {
float p0 = Uint16ToFloat(*h, per_col_header->percentile_0),
p25 = Uint16ToFloat(*h, per_col_header->percentile_25),
p75 = Uint16ToFloat(*h, per_col_header->percentile_75),
p100 = Uint16ToFloat(*h, per_col_header->percentile_100);
float f = CharToFloat(p0, p25, p75, p100, *byte_data);
(*v)(i) = f;
}
} else if (format == kTwoByte) {
int32 num_cols = h->num_cols;
float min_value = h->min_value,
increment = h->range * (1.0 / 65535.0);
const uint16 *row_data = reinterpret_cast<uint16*>(h + 1) + (num_cols * row);
Real *v_data = v->Data();
for (int32 c = 0; c < num_cols; c++)
v_data[c] = min_value + row_data[c] * increment;
} else {
KALDI_ASSERT(format == kOneByte);
int32 num_cols = h->num_cols;
float min_value = h->min_value,
increment = h->range * (1.0 / 255.0);
const uint8 *row_data = reinterpret_cast<uint8*>(h + 1) + (num_cols * row);
Real *v_data = v->Data();
for (int32 c = 0; c < num_cols; c++)
v_data[c] = min_value + row_data[c] * increment;
}
}
template<typename Real>
void CompressedMatrix::CopyColToVec(MatrixIndexT col,
VectorBase<Real> *v) const {
KALDI_ASSERT(col < this->NumCols());
KALDI_ASSERT(col >= 0);
KALDI_ASSERT(v->Dim() == this->NumRows());
GlobalHeader *h = reinterpret_cast<GlobalHeader*>(data_);
DataFormat format = static_cast<DataFormat>(h->format);
if (format == kOneByteWithColHeaders) {
PerColHeader *per_col_header = reinterpret_cast<PerColHeader*>(h+1);
uint8 *byte_data = reinterpret_cast<uint8*>(per_col_header +
h->num_cols);
byte_data += col*h->num_rows; // point to first value in the column we want
per_col_header += col;
float p0 = Uint16ToFloat(*h, per_col_header->percentile_0),
p25 = Uint16ToFloat(*h, per_col_header->percentile_25),
p75 = Uint16ToFloat(*h, per_col_header->percentile_75),
p100 = Uint16ToFloat(*h, per_col_header->percentile_100);
for (int32 i = 0; i < h->num_rows; i++, byte_data++) {
float f = CharToFloat(p0, p25, p75, p100, *byte_data);
(*v)(i) = f;
}
} else if (format == kTwoByte) {
int32 num_rows = h->num_rows, num_cols = h->num_cols;
float min_value = h->min_value,
increment = h->range * (1.0 / 65535.0);
const uint16 *col_data = reinterpret_cast<uint16*>(h + 1) + col;
Real *v_data = v->Data();
for (int32 r = 0; r < num_rows; r++)
v_data[r] = min_value + increment * col_data[r * num_cols];
} else {
KALDI_ASSERT(format == kOneByte);
int32 num_rows = h->num_rows, num_cols = h->num_cols;
float min_value = h->min_value,
increment = h->range * (1.0 / 255.0);
const uint8 *col_data = reinterpret_cast<uint8*>(h + 1) + col;
Real *v_data = v->Data();
for (int32 r = 0; r < num_rows; r++)
v_data[r] = min_value + increment * col_data[r * num_cols];
}
}
// instantiate the templates.
template void
CompressedMatrix::CopyColToVec(MatrixIndexT, VectorBase<double> *) const;
template void
CompressedMatrix::CopyColToVec(MatrixIndexT, VectorBase<float> *) const;
template void
CompressedMatrix::CopyRowToVec(MatrixIndexT, VectorBase<double> *) const;
template void
CompressedMatrix::CopyRowToVec(MatrixIndexT, VectorBase<float> *) const;
template<typename Real>
void CompressedMatrix::CopyToMat(int32 row_offset,
int32 col_offset,
MatrixBase<Real> *dest) const {
KALDI_PARANOID_ASSERT(row_offset < this->NumRows());
KALDI_PARANOID_ASSERT(col_offset < this->NumCols());
KALDI_PARANOID_ASSERT(row_offset >= 0);
KALDI_PARANOID_ASSERT(col_offset >= 0);
KALDI_ASSERT(row_offset+dest->NumRows() <= this->NumRows());
KALDI_ASSERT(col_offset+dest->NumCols() <= this->NumCols());
// everything is OK
GlobalHeader *h = reinterpret_cast<GlobalHeader*>(data_);
int32 num_rows = h->num_rows, num_cols = h->num_cols,
tgt_cols = dest->NumCols(), tgt_rows = dest->NumRows();
DataFormat format = static_cast<DataFormat>(h->format);
if (format == kOneByteWithColHeaders) {
PerColHeader *per_col_header = reinterpret_cast<PerColHeader*>(h+1);
uint8 *byte_data = reinterpret_cast<uint8*>(per_col_header +
h->num_cols);
uint8 *start_of_subcol = byte_data+row_offset; // skip appropriate
// number of columns
start_of_subcol += col_offset*num_rows; // skip appropriate number of rows
per_col_header += col_offset; // skip the appropriate number of headers
for (int32 i = 0;
i < tgt_cols;
i++, per_col_header++, start_of_subcol+=num_rows) {
byte_data = start_of_subcol;
float p0 = Uint16ToFloat(*h, per_col_header->percentile_0),
p25 = Uint16ToFloat(*h, per_col_header->percentile_25),
p75 = Uint16ToFloat(*h, per_col_header->percentile_75),
p100 = Uint16ToFloat(*h, per_col_header->percentile_100);
for (int32 j = 0; j < tgt_rows; j++, byte_data++) {
float f = CharToFloat(p0, p25, p75, p100, *byte_data);
(*dest)(j, i) = f;
}
}
} else if (format == kTwoByte) {
const uint16 *data = reinterpret_cast<const uint16*>(h+1) + col_offset +
(num_cols * row_offset);
float min_value = h->min_value,
increment = h->range * (1.0 / 65535.0);
for (int32 row = 0; row < tgt_rows; row++) {
Real *dest_row = dest->RowData(row);
for (int32 col = 0; col < tgt_cols; col++)
dest_row[col] = min_value + increment * data[col];
data += num_cols;
}
} else {
KALDI_ASSERT(format == kOneByte);
const uint8 *data = reinterpret_cast<const uint8*>(h+1) + col_offset +
(num_cols * row_offset);
float min_value = h->min_value,
increment = h->range * (1.0 / 255.0);
for (int32 row = 0; row < tgt_rows; row++) {
Real *dest_row = dest->RowData(row);
for (int32 col = 0; col < tgt_cols; col++)
dest_row[col] = min_value + increment * data[col];
data += num_cols;
}
}
}
// instantiate the templates.
template void CompressedMatrix::CopyToMat(int32,
int32,
MatrixBase<float> *dest) const;
template void CompressedMatrix::CopyToMat(int32,
int32,
MatrixBase<double> *dest) const;
void CompressedMatrix::Clear() {
if (data_ != NULL) {
delete [] static_cast<float*>(data_);
data_ = NULL;
}
}
CompressedMatrix::CompressedMatrix(const CompressedMatrix &mat): data_(NULL) {
*this = mat; // use assignment operator.
}
CompressedMatrix &CompressedMatrix::operator = (const CompressedMatrix &mat) {
Clear(); // now this->data_ == NULL.
if (mat.data_ != NULL) {
MatrixIndexT data_size = DataSize(*static_cast<GlobalHeader*>(mat.data_));
data_ = AllocateData(data_size);
memcpy(static_cast<void*>(data_),
static_cast<void*>(mat.data_),
data_size);
}
return *this;
}
} // namespace kaldi
// matrix/compressed-matrix.h
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
// Frantisek Skala, Wei Shi
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_MATRIX_COMPRESSED_MATRIX_H_
#define KALDI_MATRIX_COMPRESSED_MATRIX_H_ 1
#include "matrix/kaldi-matrix.h"
namespace kaldi {
/// \addtogroup matrix_group
/// @{
/*
The enum CompressionMethod is used when creating a CompressedMatrix (a lossily
compressed matrix) from a regular Matrix. It dictates how we choose the
compressed format and how we choose the ranges of floats that are represented
by particular integers.
kAutomaticMethod = 1 This is the default when you don't specify the
compression method. It is a shorthand for using
kSpeechFeature if the num-rows is more than 8, and
kTwoByteAuto otherwise.
kSpeechFeature = 2 This is the most complicated of the compression methods,
and was designed for speech features which have a roughly
Gaussian distribution with different ranges for each
dimension. Each element is stored in one byte, but there
is an 8-byte header per column; the spacing of the
integer values is not uniform but is in 3 ranges.
kTwoByteAuto = 3 Each element is stored in two bytes as a uint16, with
the representable range of values chosen automatically
with the minimum and maximum elements of the matrix as
its edges.
kTwoByteSignedInteger = 4
Each element is stored in two bytes as a uint16, with
the representable range of value chosen to coincide with
what you'd get if you stored signed integers, i.e.
[-32768.0, 32767.0]. Suitable for waveform data that
was previously stored as 16-bit PCM.
kOneByteAuto = 5 Each element is stored in one byte as a uint8, with the
representable range of values chosen automatically with
the minimum and maximum elements of the matrix as its
edges.
kOneByteUnsignedInteger = 6 Each element is stored in
one byte as a uint8, with the representable range of
values equal to [0.0, 255.0].
kOneByteZeroOne = 7 Each element is stored in
one byte as a uint8, with the representable range of
values equal to [0.0, 1.0]. Suitable for image data
that has previously been compressed as int8.
// We can add new methods here as needed: if they just imply different ways
// of selecting the min_value and range, and a num-bytes = 1 or 2, they will
// be trivial to implement.
*/
enum CompressionMethod {
kAutomaticMethod = 1,
kSpeechFeature = 2,
kTwoByteAuto = 3,
kTwoByteSignedInteger = 4,
kOneByteAuto = 5,
kOneByteUnsignedInteger = 6,
kOneByteZeroOne = 7
};
/*
This class does lossy compression of a matrix. It supports various compression
methods, see enum CompressionMethod.
*/
class CompressedMatrix {
public:
CompressedMatrix(): data_(NULL) { }
~CompressedMatrix() { Clear(); }
template<typename Real>
explicit CompressedMatrix(const MatrixBase<Real> &mat,
CompressionMethod method = kAutomaticMethod):
data_(NULL) { CopyFromMat(mat, method); }
/// Initializer that can be used to select part of an existing
/// CompressedMatrix without un-compressing and re-compressing (note: unlike
/// similar initializers for class Matrix, it doesn't point to the same memory
/// location).
///
/// This creates a CompressedMatrix with the size (num_rows, num_cols)
/// starting at (row_offset, col_offset).
///
/// If you specify allow_padding = true,
/// it is permitted to have row_offset < 0 and
/// row_offset + num_rows > mat.NumRows(), and the result will contain
/// repeats of the first and last rows of 'mat' as necessary.
CompressedMatrix(const CompressedMatrix &mat,
const MatrixIndexT row_offset,
const MatrixIndexT num_rows,
const MatrixIndexT col_offset,
const MatrixIndexT num_cols,
bool allow_padding = false);
void *Data() const { return this->data_; }
/// This will resize *this and copy the contents of mat to *this.
template<typename Real>
void CopyFromMat(const MatrixBase<Real> &mat,
CompressionMethod method = kAutomaticMethod);
CompressedMatrix(const CompressedMatrix &mat);
CompressedMatrix &operator = (const CompressedMatrix &mat); // assignment operator.
template<typename Real>
CompressedMatrix &operator = (const MatrixBase<Real> &mat); // assignment operator.
/// Copies contents to matrix. Note: mat must have the correct size.
/// The kTrans case uses a temporary.
template<typename Real>
void CopyToMat(MatrixBase<Real> *mat,
MatrixTransposeType trans = kNoTrans) const;
void Write(std::ostream &os, bool binary) const;
void Read(std::istream &is, bool binary);
/// Returns number of rows (or zero for emtpy matrix).
inline MatrixIndexT NumRows() const { return (data_ == NULL) ? 0 :
(*reinterpret_cast<GlobalHeader*>(data_)).num_rows; }
/// Returns number of columns (or zero for emtpy matrix).
inline MatrixIndexT NumCols() const { return (data_ == NULL) ? 0 :
(*reinterpret_cast<GlobalHeader*>(data_)).num_cols; }
/// Copies row #row of the matrix into vector v.
/// Note: v must have same size as #cols.
template<typename Real>
void CopyRowToVec(MatrixIndexT row, VectorBase<Real> *v) const;
/// Copies column #col of the matrix into vector v.
/// Note: v must have same size as #rows.
template<typename Real>
void CopyColToVec(MatrixIndexT col, VectorBase<Real> *v) const;
/// Copies submatrix of compressed matrix into matrix dest.
/// Submatrix starts at row row_offset and column column_offset and its size
/// is defined by size of provided matrix dest
template<typename Real>
void CopyToMat(int32 row_offset,
int32 column_offset,
MatrixBase<Real> *dest) const;
void Swap(CompressedMatrix *other) { std::swap(data_, other->data_); }
void Clear();
/// scales all elements of matrix by alpha.
/// It scales the floating point values in GlobalHeader by alpha.
void Scale(float alpha);
friend class Matrix<float>;
friend class Matrix<double>;
private:
// This enum describes the different compressed-data formats: these are
// distinct from the compression methods although all of the methods apart
// from kAutomaticMethod dictate a particular compressed-data format.
//
// kOneByteWithColHeaders means there is a GlobalHeader and each
// column has a PerColHeader; the actual data is stored in
// one byte per element, in column-major order (the mapping
// from integers to floats is a little complicated).
// kTwoByte means there is a global header but no PerColHeader;
// the actual data is stored in two bytes per element in
// row-major order; it's decompressed as:
// uint16 i; GlobalHeader g;
// float f = g.min_value + i * (g.range / 65535.0)
// kOneByte means there is a global header but not PerColHeader;
// the data is stored in one byte per element in row-major
// order and is decompressed as:
// uint8 i; GlobalHeader g;
// float f = g.min_value + i * (g.range / 255.0)
enum DataFormat {
kOneByteWithColHeaders = 1,
kTwoByte = 2,
kOneByte = 3
};
// allocates data using new [], ensures byte alignment
// sufficient for float.
static void *AllocateData(int32 num_bytes);
struct GlobalHeader {
int32 format; // Represents the enum DataFormat.
float min_value; // min_value and range represent the ranges of the integer
// data in the kTwoByte and kOneByte formats, and the
// range of the PerColHeader uint16's in the
// kOneByteWithColheaders format.
float range;
int32 num_rows;
int32 num_cols;
};
// This function computes the global header for compressing this data.
template<typename Real>
static inline void ComputeGlobalHeader(const MatrixBase<Real> &mat,
CompressionMethod method,
GlobalHeader *header);
// The number of bytes we need to request when allocating 'data_'.
static MatrixIndexT DataSize(const GlobalHeader &header);
// This struct is only used in format kOneByteWithColHeaders.
struct PerColHeader {
uint16 percentile_0;
uint16 percentile_25;
uint16 percentile_75;
uint16 percentile_100;
};
template<typename Real>
static void CompressColumn(const GlobalHeader &global_header,
const Real *data, MatrixIndexT stride,
int32 num_rows, PerColHeader *header,
uint8 *byte_data);
template<typename Real>
static void ComputeColHeader(const GlobalHeader &global_header,
const Real *data, MatrixIndexT stride,
int32 num_rows, PerColHeader *header);
static inline uint16 FloatToUint16(const GlobalHeader &global_header,
float value);
// this is used only in the kOneByte compression format.
static inline uint8 FloatToUint8(const GlobalHeader &global_header,
float value);
static inline float Uint16ToFloat(const GlobalHeader &global_header,
uint16 value);
// this is used only in the kOneByteWithColHeaders compression format.
static inline uint8 FloatToChar(float p0, float p25,
float p75, float p100,
float value);
// this is used only in the kOneByteWithColHeaders compression format.
static inline float CharToFloat(float p0, float p25,
float p75, float p100,
uint8 value);
void *data_; // first GlobalHeader, then PerColHeader (repeated), then
// the byte data for each column (repeated). Note: don't intersperse
// the byte data with the PerColHeaders, because of alignment issues.
};
/// @} end of \addtogroup matrix_group
} // namespace kaldi
#endif // KALDI_MATRIX_COMPRESSED_MATRIX_H_
// matrix/jama-eig.h
// Copyright 2009-2011 Microsoft Corporation
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
// This file consists of a port and modification of materials from
// JAMA: A Java Matrix Package
// under the following notice: This software is a cooperative product of
// The MathWorks and the National Institute of Standards and Technology (NIST)
// which has been released to the public. This notice and the original code are
// available at http://math.nist.gov/javanumerics/jama/domain.notice
#ifndef KALDI_MATRIX_JAMA_EIG_H_
#define KALDI_MATRIX_JAMA_EIG_H_ 1
#include "matrix/kaldi-matrix.h"
namespace kaldi {
// This class is not to be used externally. See the Eig function in the Matrix
// class in kaldi-matrix.h. This is the external interface.
template<typename Real> class EigenvalueDecomposition {
// This class is based on the EigenvalueDecomposition class from the JAMA
// library (version 1.0.2).
public:
EigenvalueDecomposition(const MatrixBase<Real> &A);
~EigenvalueDecomposition(); // free memory.
void GetV(MatrixBase<Real> *V_out) { // V is what we call P externally; it's the matrix of
// eigenvectors.
KALDI_ASSERT(V_out->NumRows() == static_cast<MatrixIndexT>(n_)
&& V_out->NumCols() == static_cast<MatrixIndexT>(n_));
for (int i = 0; i < n_; i++)
for (int j = 0; j < n_; j++)
(*V_out)(i, j) = V(i, j); // V(i, j) is member function.
}
void GetRealEigenvalues(VectorBase<Real> *r_out) {
// returns real part of eigenvalues.
KALDI_ASSERT(r_out->Dim() == static_cast<MatrixIndexT>(n_));
for (int i = 0; i < n_; i++)
(*r_out)(i) = d_[i];
}
void GetImagEigenvalues(VectorBase<Real> *i_out) {
// returns imaginary part of eigenvalues.
KALDI_ASSERT(i_out->Dim() == static_cast<MatrixIndexT>(n_));
for (int i = 0; i < n_; i++)
(*i_out)(i) = e_[i];
}
private:
inline Real &H(int r, int c) { return H_[r*n_ + c]; }
inline Real &V(int r, int c) { return V_[r*n_ + c]; }
// complex division
inline static void cdiv(Real xr, Real xi, Real yr, Real yi, Real *cdivr, Real *cdivi) {
Real r, d;
if (std::abs(yr) > std::abs(yi)) {
r = yi/yr;
d = yr + r*yi;
*cdivr = (xr + r*xi)/d;
*cdivi = (xi - r*xr)/d;
} else {
r = yr/yi;
d = yi + r*yr;
*cdivr = (r*xr + xi)/d;
*cdivi = (r*xi - xr)/d;
}
}
// Nonsymmetric reduction from Hessenberg to real Schur form.
void Hqr2 ();
int n_; // matrix dimension.
Real *d_, *e_; // real and imaginary parts of eigenvalues.
Real *V_; // the eigenvectors (P in our external notation)
Real *H_; // the nonsymmetric Hessenberg form.
Real *ort_; // working storage for nonsymmetric algorithm.
// Symmetric Householder reduction to tridiagonal form.
void Tred2 ();
// Symmetric tridiagonal QL algorithm.
void Tql2 ();
// Nonsymmetric reduction to Hessenberg form.
void Orthes ();
};
template class EigenvalueDecomposition<float>; // force instantiation.
template class EigenvalueDecomposition<double>; // force instantiation.
template<typename Real> void EigenvalueDecomposition<Real>::Tred2() {
// This is derived from the Algol procedures tred2 by
// Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
// Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
// Fortran subroutine in EISPACK.
for (int j = 0; j < n_; j++) {
d_[j] = V(n_-1, j);
}
// Householder reduction to tridiagonal form.
for (int i = n_-1; i > 0; i--) {
// Scale to avoid under/overflow.
Real scale = 0.0;
Real h = 0.0;
for (int k = 0; k < i; k++) {
scale = scale + std::abs(d_[k]);
}
if (scale == 0.0) {
e_[i] = d_[i-1];
for (int j = 0; j < i; j++) {
d_[j] = V(i-1, j);
V(i, j) = 0.0;
V(j, i) = 0.0;
}
} else {
// Generate Householder vector.
for (int k = 0; k < i; k++) {
d_[k] /= scale;
h += d_[k] * d_[k];
}
Real f = d_[i-1];
Real g = std::sqrt(h);
if (f > 0) {
g = -g;
}
e_[i] = scale * g;
h = h - f * g;
d_[i-1] = f - g;
for (int j = 0; j < i; j++) {
e_[j] = 0.0;
}
// Apply similarity transformation to remaining columns.
for (int j = 0; j < i; j++) {
f = d_[j];
V(j, i) = f;
g =e_[j] + V(j, j) * f;
for (int k = j+1; k <= i-1; k++) {
g += V(k, j) * d_[k];
e_[k] += V(k, j) * f;
}
e_[j] = g;
}
f = 0.0;
for (int j = 0; j < i; j++) {
e_[j] /= h;
f += e_[j] * d_[j];
}
Real hh = f / (h + h);
for (int j = 0; j < i; j++) {
e_[j] -= hh * d_[j];
}
for (int j = 0; j < i; j++) {
f = d_[j];
g = e_[j];
for (int k = j; k <= i-1; k++) {
V(k, j) -= (f * e_[k] + g * d_[k]);
}
d_[j] = V(i-1, j);
V(i, j) = 0.0;
}
}
d_[i] = h;
}
// Accumulate transformations.
for (int i = 0; i < n_-1; i++) {
V(n_-1, i) = V(i, i);
V(i, i) = 1.0;
Real h = d_[i+1];
if (h != 0.0) {
for (int k = 0; k <= i; k++) {
d_[k] = V(k, i+1) / h;
}
for (int j = 0; j <= i; j++) {
Real g = 0.0;
for (int k = 0; k <= i; k++) {
g += V(k, i+1) * V(k, j);
}
for (int k = 0; k <= i; k++) {
V(k, j) -= g * d_[k];
}
}
}
for (int k = 0; k <= i; k++) {
V(k, i+1) = 0.0;
}
}
for (int j = 0; j < n_; j++) {
d_[j] = V(n_-1, j);
V(n_-1, j) = 0.0;
}
V(n_-1, n_-1) = 1.0;
e_[0] = 0.0;
}
template<typename Real> void EigenvalueDecomposition<Real>::Tql2() {
// This is derived from the Algol procedures tql2, by
// Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
// Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
// Fortran subroutine in EISPACK.
for (int i = 1; i < n_; i++) {
e_[i-1] = e_[i];
}
e_[n_-1] = 0.0;
Real f = 0.0;
Real tst1 = 0.0;
Real eps = std::numeric_limits<Real>::epsilon();
for (int l = 0; l < n_; l++) {
// Find small subdiagonal element
tst1 = std::max(tst1, std::abs(d_[l]) + std::abs(e_[l]));
int m = l;
while (m < n_) {
if (std::abs(e_[m]) <= eps*tst1) {
break;
}
m++;
}
// If m == l, d_[l] is an eigenvalue,
// otherwise, iterate.
if (m > l) {
int iter = 0;
do {
iter = iter + 1; // (Could check iteration count here.)
// Compute implicit shift
Real g = d_[l];
Real p = (d_[l+1] - g) / (2.0 *e_[l]);
Real r = Hypot(p, static_cast<Real>(1.0)); // This is a Kaldi version of hypot that works with templates.
if (p < 0) {
r = -r;
}
d_[l] =e_[l] / (p + r);
d_[l+1] =e_[l] * (p + r);
Real dl1 = d_[l+1];
Real h = g - d_[l];
for (int i = l+2; i < n_; i++) {
d_[i] -= h;
}
f = f + h;
// Implicit QL transformation.
p = d_[m];
Real c = 1.0;
Real c2 = c;
Real c3 = c;
Real el1 =e_[l+1];
Real s = 0.0;
Real s2 = 0.0;
for (int i = m-1; i >= l; i--) {
c3 = c2;
c2 = c;
s2 = s;
g = c *e_[i];
h = c * p;
r = Hypot(p, e_[i]); // This is a Kaldi version of Hypot that works with templates.
e_[i+1] = s * r;
s =e_[i] / r;
c = p / r;
p = c * d_[i] - s * g;
d_[i+1] = h + s * (c * g + s * d_[i]);
// Accumulate transformation.
for (int k = 0; k < n_; k++) {
h = V(k, i+1);
V(k, i+1) = s * V(k, i) + c * h;
V(k, i) = c * V(k, i) - s * h;
}
}
p = -s * s2 * c3 * el1 *e_[l] / dl1;
e_[l] = s * p;
d_[l] = c * p;
// Check for convergence.
} while (std::abs(e_[l]) > eps*tst1);
}
d_[l] = d_[l] + f;
e_[l] = 0.0;
}
// Sort eigenvalues and corresponding vectors.
for (int i = 0; i < n_-1; i++) {
int k = i;
Real p = d_[i];
for (int j = i+1; j < n_; j++) {
if (d_[j] < p) {
k = j;
p = d_[j];
}
}
if (k != i) {
d_[k] = d_[i];
d_[i] = p;
for (int j = 0; j < n_; j++) {
p = V(j, i);
V(j, i) = V(j, k);
V(j, k) = p;
}
}
}
}
template<typename Real>
void EigenvalueDecomposition<Real>::Orthes() {
// This is derived from the Algol procedures orthes and ortran,
// by Martin and Wilkinson, Handbook for Auto. Comp.,
// Vol.ii-Linear Algebra, and the corresponding
// Fortran subroutines in EISPACK.
int low = 0;
int high = n_-1;
for (int m = low+1; m <= high-1; m++) {
// Scale column.
Real scale = 0.0;
for (int i = m; i <= high; i++) {
scale = scale + std::abs(H(i, m-1));
}
if (scale != 0.0) {
// Compute Householder transformation.
Real h = 0.0;
for (int i = high; i >= m; i--) {
ort_[i] = H(i, m-1)/scale;
h += ort_[i] * ort_[i];
}
Real g = std::sqrt(h);
if (ort_[m] > 0) {
g = -g;
}
h = h - ort_[m] * g;
ort_[m] = ort_[m] - g;
// Apply Householder similarity transformation
// H = (I-u*u'/h)*H*(I-u*u')/h)
for (int j = m; j < n_; j++) {
Real f = 0.0;
for (int i = high; i >= m; i--) {
f += ort_[i]*H(i, j);
}
f = f/h;
for (int i = m; i <= high; i++) {
H(i, j) -= f*ort_[i];
}
}
for (int i = 0; i <= high; i++) {
Real f = 0.0;
for (int j = high; j >= m; j--) {
f += ort_[j]*H(i, j);
}
f = f/h;
for (int j = m; j <= high; j++) {
H(i, j) -= f*ort_[j];
}
}
ort_[m] = scale*ort_[m];
H(m, m-1) = scale*g;
}
}
// Accumulate transformations (Algol's ortran).
for (int i = 0; i < n_; i++) {
for (int j = 0; j < n_; j++) {
V(i, j) = (i == j ? 1.0 : 0.0);
}
}
for (int m = high-1; m >= low+1; m--) {
if (H(m, m-1) != 0.0) {
for (int i = m+1; i <= high; i++) {
ort_[i] = H(i, m-1);
}
for (int j = m; j <= high; j++) {
Real g = 0.0;
for (int i = m; i <= high; i++) {
g += ort_[i] * V(i, j);
}
// Double division avoids possible underflow
g = (g / ort_[m]) / H(m, m-1);
for (int i = m; i <= high; i++) {
V(i, j) += g * ort_[i];
}
}
}
}
}
template<typename Real> void EigenvalueDecomposition<Real>::Hqr2() {
// This is derived from the Algol procedure hqr2,
// by Martin and Wilkinson, Handbook for Auto. Comp.,
// Vol.ii-Linear Algebra, and the corresponding
// Fortran subroutine in EISPACK.
int nn = n_;
int n = nn-1;
int low = 0;
int high = nn-1;
Real eps = std::numeric_limits<Real>::epsilon();
Real exshift = 0.0;
Real p = 0, q = 0, r = 0, s = 0, z=0, t, w, x, y;
// Store roots isolated by balanc and compute matrix norm
Real norm = 0.0;
for (int i = 0; i < nn; i++) {
if (i < low || i > high) {
d_[i] = H(i, i);
e_[i] = 0.0;
}
for (int j = std::max(i-1, 0); j < nn; j++) {
norm = norm + std::abs(H(i, j));
}
}
// Outer loop over eigenvalue index
int iter = 0;
while (n >= low) {
// Look for single small sub-diagonal element
int l = n;
while (l > low) {
s = std::abs(H(l-1, l-1)) + std::abs(H(l, l));
if (s == 0.0) {
s = norm;
}
if (std::abs(H(l, l-1)) < eps * s) {
break;
}
l--;
}
// Check for convergence
// One root found
if (l == n) {
H(n, n) = H(n, n) + exshift;
d_[n] = H(n, n);
e_[n] = 0.0;
n--;
iter = 0;
// Two roots found
} else if (l == n-1) {
w = H(n, n-1) * H(n-1, n);
p = (H(n-1, n-1) - H(n, n)) / 2.0;
q = p * p + w;
z = std::sqrt(std::abs(q));
H(n, n) = H(n, n) + exshift;
H(n-1, n-1) = H(n-1, n-1) + exshift;
x = H(n, n);
// Real pair
if (q >= 0) {
if (p >= 0) {
z = p + z;
} else {
z = p - z;
}
d_[n-1] = x + z;
d_[n] = d_[n-1];
if (z != 0.0) {
d_[n] = x - w / z;
}
e_[n-1] = 0.0;
e_[n] = 0.0;
x = H(n, n-1);
s = std::abs(x) + std::abs(z);
p = x / s;
q = z / s;
r = std::sqrt(p * p+q * q);
p = p / r;
q = q / r;
// Row modification
for (int j = n-1; j < nn; j++) {
z = H(n-1, j);
H(n-1, j) = q * z + p * H(n, j);
H(n, j) = q * H(n, j) - p * z;
}
// Column modification
for (int i = 0; i <= n; i++) {
z = H(i, n-1);
H(i, n-1) = q * z + p * H(i, n);
H(i, n) = q * H(i, n) - p * z;
}
// Accumulate transformations
for (int i = low; i <= high; i++) {
z = V(i, n-1);
V(i, n-1) = q * z + p * V(i, n);
V(i, n) = q * V(i, n) - p * z;
}
// Complex pair
} else {
d_[n-1] = x + p;
d_[n] = x + p;
e_[n-1] = z;
e_[n] = -z;
}
n = n - 2;
iter = 0;
// No convergence yet
} else {
// Form shift
x = H(n, n);
y = 0.0;
w = 0.0;
if (l < n) {
y = H(n-1, n-1);
w = H(n, n-1) * H(n-1, n);
}
// Wilkinson's original ad hoc shift
if (iter == 10) {
exshift += x;
for (int i = low; i <= n; i++) {
H(i, i) -= x;
}
s = std::abs(H(n, n-1)) + std::abs(H(n-1, n-2));
x = y = 0.75 * s;
w = -0.4375 * s * s;
}
// MATLAB's new ad hoc shift
if (iter == 30) {
s = (y - x) / 2.0;
s = s * s + w;
if (s > 0) {
s = std::sqrt(s);
if (y < x) {
s = -s;
}
s = x - w / ((y - x) / 2.0 + s);
for (int i = low; i <= n; i++) {
H(i, i) -= s;
}
exshift += s;
x = y = w = 0.964;
}
}
iter = iter + 1; // (Could check iteration count here.)
// Look for two consecutive small sub-diagonal elements
int m = n-2;
while (m >= l) {
z = H(m, m);
r = x - z;
s = y - z;
p = (r * s - w) / H(m+1, m) + H(m, m+1);
q = H(m+1, m+1) - z - r - s;
r = H(m+2, m+1);
s = std::abs(p) + std::abs(q) + std::abs(r);
p = p / s;
q = q / s;
r = r / s;
if (m == l) {
break;
}
if (std::abs(H(m, m-1)) * (std::abs(q) + std::abs(r)) <
eps * (std::abs(p) * (std::abs(H(m-1, m-1)) + std::abs(z) +
std::abs(H(m+1, m+1))))) {
break;
}
m--;
}
for (int i = m+2; i <= n; i++) {
H(i, i-2) = 0.0;
if (i > m+2) {
H(i, i-3) = 0.0;
}
}
// Double QR step involving rows l:n and columns m:n
for (int k = m; k <= n-1; k++) {
bool notlast = (k != n-1);
if (k != m) {
p = H(k, k-1);
q = H(k+1, k-1);
r = (notlast ? H(k+2, k-1) : 0.0);
x = std::abs(p) + std::abs(q) + std::abs(r);
if (x != 0.0) {
p = p / x;
q = q / x;
r = r / x;
}
}
if (x == 0.0) {
break;
}
s = std::sqrt(p * p + q * q + r * r);
if (p < 0) {
s = -s;
}
if (s != 0) {
if (k != m) {
H(k, k-1) = -s * x;
} else if (l != m) {
H(k, k-1) = -H(k, k-1);
}
p = p + s;
x = p / s;
y = q / s;
z = r / s;
q = q / p;
r = r / p;
// Row modification
for (int j = k; j < nn; j++) {
p = H(k, j) + q * H(k+1, j);
if (notlast) {
p = p + r * H(k+2, j);
H(k+2, j) = H(k+2, j) - p * z;
}
H(k, j) = H(k, j) - p * x;
H(k+1, j) = H(k+1, j) - p * y;
}
// Column modification
for (int i = 0; i <= std::min(n, k+3); i++) {
p = x * H(i, k) + y * H(i, k+1);
if (notlast) {
p = p + z * H(i, k+2);
H(i, k+2) = H(i, k+2) - p * r;
}
H(i, k) = H(i, k) - p;
H(i, k+1) = H(i, k+1) - p * q;
}
// Accumulate transformations
for (int i = low; i <= high; i++) {
p = x * V(i, k) + y * V(i, k+1);
if (notlast) {
p = p + z * V(i, k+2);
V(i, k+2) = V(i, k+2) - p * r;
}
V(i, k) = V(i, k) - p;
V(i, k+1) = V(i, k+1) - p * q;
}
} // (s != 0)
} // k loop
} // check convergence
} // while (n >= low)
// Backsubstitute to find vectors of upper triangular form
if (norm == 0.0) {
return;
}
for (n = nn-1; n >= 0; n--) {
p = d_[n];
q = e_[n];
// Real vector
if (q == 0) {
int l = n;
H(n, n) = 1.0;
for (int i = n-1; i >= 0; i--) {
w = H(i, i) - p;
r = 0.0;
for (int j = l; j <= n; j++) {
r = r + H(i, j) * H(j, n);
}
if (e_[i] < 0.0) {
z = w;
s = r;
} else {
l = i;
if (e_[i] == 0.0) {
if (w != 0.0) {
H(i, n) = -r / w;
} else {
H(i, n) = -r / (eps * norm);
}
// Solve real equations
} else {
x = H(i, i+1);
y = H(i+1, i);
q = (d_[i] - p) * (d_[i] - p) +e_[i] *e_[i];
t = (x * s - z * r) / q;
H(i, n) = t;
if (std::abs(x) > std::abs(z)) {
H(i+1, n) = (-r - w * t) / x;
} else {
H(i+1, n) = (-s - y * t) / z;
}
}
// Overflow control
t = std::abs(H(i, n));
if ((eps * t) * t > 1) {
for (int j = i; j <= n; j++) {
H(j, n) = H(j, n) / t;
}
}
}
}
// Complex vector
} else if (q < 0) {
int l = n-1;
// Last vector component imaginary so matrix is triangular
if (std::abs(H(n, n-1)) > std::abs(H(n-1, n))) {
H(n-1, n-1) = q / H(n, n-1);
H(n-1, n) = -(H(n, n) - p) / H(n, n-1);
} else {
Real cdivr, cdivi;
cdiv(0.0, -H(n-1, n), H(n-1, n-1)-p, q, &cdivr, &cdivi);
H(n-1, n-1) = cdivr;
H(n-1, n) = cdivi;
}
H(n, n-1) = 0.0;
H(n, n) = 1.0;
for (int i = n-2; i >= 0; i--) {
Real ra, sa, vr, vi;
ra = 0.0;
sa = 0.0;
for (int j = l; j <= n; j++) {
ra = ra + H(i, j) * H(j, n-1);
sa = sa + H(i, j) * H(j, n);
}
w = H(i, i) - p;
if (e_[i] < 0.0) {
z = w;
r = ra;
s = sa;
} else {
l = i;
if (e_[i] == 0) {
Real cdivr, cdivi;
cdiv(-ra, -sa, w, q, &cdivr, &cdivi);
H(i, n-1) = cdivr;
H(i, n) = cdivi;
} else {
Real cdivr, cdivi;
// Solve complex equations
x = H(i, i+1);
y = H(i+1, i);
vr = (d_[i] - p) * (d_[i] - p) +e_[i] *e_[i] - q * q;
vi = (d_[i] - p) * 2.0 * q;
if (vr == 0.0 && vi == 0.0) {
vr = eps * norm * (std::abs(w) + std::abs(q) +
std::abs(x) + std::abs(y) + std::abs(z));
}
cdiv(x*r-z*ra+q*sa, x*s-z*sa-q*ra, vr, vi, &cdivr, &cdivi);
H(i, n-1) = cdivr;
H(i, n) = cdivi;
if (std::abs(x) > (std::abs(z) + std::abs(q))) {
H(i+1, n-1) = (-ra - w * H(i, n-1) + q * H(i, n)) / x;
H(i+1, n) = (-sa - w * H(i, n) - q * H(i, n-1)) / x;
} else {
cdiv(-r-y*H(i, n-1), -s-y*H(i, n), z, q, &cdivr, &cdivi);
H(i+1, n-1) = cdivr;
H(i+1, n) = cdivi;
}
}
// Overflow control
t = std::max(std::abs(H(i, n-1)), std::abs(H(i, n)));
if ((eps * t) * t > 1) {
for (int j = i; j <= n; j++) {
H(j, n-1) = H(j, n-1) / t;
H(j, n) = H(j, n) / t;
}
}
}
}
}
}
// Vectors of isolated roots
for (int i = 0; i < nn; i++) {
if (i < low || i > high) {
for (int j = i; j < nn; j++) {
V(i, j) = H(i, j);
}
}
}
// Back transformation to get eigenvectors of original matrix
for (int j = nn-1; j >= low; j--) {
for (int i = low; i <= high; i++) {
z = 0.0;
for (int k = low; k <= std::min(j, high); k++) {
z = z + V(i, k) * H(k, j);
}
V(i, j) = z;
}
}
}
template<typename Real>
EigenvalueDecomposition<Real>::EigenvalueDecomposition(const MatrixBase<Real> &A) {
KALDI_ASSERT(A.NumCols() == A.NumRows() && A.NumCols() >= 1);
n_ = A.NumRows();
V_ = new Real[n_*n_];
d_ = new Real[n_];
e_ = new Real[n_];
H_ = NULL;
ort_ = NULL;
if (A.IsSymmetric(0.0)) {
for (int i = 0; i < n_; i++)
for (int j = 0; j < n_; j++)
V(i, j) = A(i, j); // Note that V(i, j) is a member function; A(i, j) is an operator
// of the matrix A.
// Tridiagonalize.
Tred2();
// Diagonalize.
Tql2();
} else {
H_ = new Real[n_*n_];
ort_ = new Real[n_];
for (int i = 0; i < n_; i++)
for (int j = 0; j < n_; j++)
H(i, j) = A(i, j); // as before: H is member function, A(i, j) is operator of matrix.
// Reduce to Hessenberg form.
Orthes();
// Reduce Hessenberg to real Schur form.
Hqr2();
}
}
template<typename Real>
EigenvalueDecomposition<Real>::~EigenvalueDecomposition() {
delete [] d_;
delete [] e_;
delete [] V_;
delete [] H_;
delete [] ort_;
}
// see function MatrixBase<Real>::Eig in kaldi-matrix.cc
} // namespace kaldi
#endif // KALDI_MATRIX_JAMA_EIG_H_
// matrix/jama-svd.h
// Copyright 2009-2011 Microsoft Corporation
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
// This file consists of a port and modification of materials from
// JAMA: A Java Matrix Package
// under the following notice: This software is a cooperative product of
// The MathWorks and the National Institute of Standards and Technology (NIST)
// which has been released to the public. This notice and the original code are
// available at http://math.nist.gov/javanumerics/jama/domain.notice
#ifndef KALDI_MATRIX_JAMA_SVD_H_
#define KALDI_MATRIX_JAMA_SVD_H_ 1
#include "matrix/kaldi-matrix.h"
#include "matrix/sp-matrix.h"
#include "matrix/cblas-wrappers.h"
namespace kaldi {
#if defined(HAVE_ATLAS) || defined(USE_KALDI_SVD)
// using ATLAS as our math library, which doesn't have SVD -> need
// to implement it.
// This routine is a modified form of jama_svd.h which is part of the TNT distribution.
// (originally comes from JAMA).
/** Singular Value Decomposition.
* <P>
* For an m-by-n matrix A with m >= n, the singular value decomposition is
* an m-by-n orthogonal matrix U, an n-by-n diagonal matrix S, and
* an n-by-n orthogonal matrix V so that A = U*S*V'.
* <P>
* The singular values, sigma[k] = S(k, k), are ordered so that
* sigma[0] >= sigma[1] >= ... >= sigma[n-1].
* <P>
* The singular value decompostion always exists, so the constructor will
* never fail. The matrix condition number and the effective numerical
* rank can be computed from this decomposition.
* <p>
* (Adapted from JAMA, a Java Matrix Library, developed by jointly
* by the Mathworks and NIST; see http://math.nist.gov/javanumerics/jama).
*/
template<typename Real>
bool MatrixBase<Real>::JamaSvd(VectorBase<Real> *s_in,
MatrixBase<Real> *U_in,
MatrixBase<Real> *V_in) { // Destructive!
KALDI_ASSERT(s_in != NULL && U_in != this && V_in != this);
int wantu = (U_in != NULL), wantv = (V_in != NULL);
Matrix<Real> Utmp, Vtmp;
MatrixBase<Real> &U = (U_in ? *U_in : Utmp), &V = (V_in ? *V_in : Vtmp);
VectorBase<Real> &s = *s_in;
int m = num_rows_, n = num_cols_;
KALDI_ASSERT(m>=n && m != 0 && n != 0);
if (wantu) KALDI_ASSERT((int)U.num_rows_ == m && (int)U.num_cols_ == n);
if (wantv) KALDI_ASSERT((int)V.num_rows_ == n && (int)V.num_cols_ == n);
KALDI_ASSERT((int)s.Dim() == n); // n<=m so n is min.
int nu = n;
U.SetZero(); // make sure all zero.
Vector<Real> e(n);
Vector<Real> work(m);
MatrixBase<Real> &A(*this);
Real *adata = A.Data(), *workdata = work.Data(), *edata = e.Data(),
*udata = U.Data(), *vdata = V.Data();
int astride = static_cast<int>(A.Stride()),
ustride = static_cast<int>(U.Stride()),
vstride = static_cast<int>(V.Stride());
int i = 0, j = 0, k = 0;
// Reduce A to bidiagonal form, storing the diagonal elements
// in s and the super-diagonal elements in e.
int nct = std::min(m-1, n);
int nrt = std::max(0, std::min(n-2, m));
for (k = 0; k < std::max(nct, nrt); k++) {
if (k < nct) {
// Compute the transformation for the k-th column and
// place the k-th diagonal in s(k).
// Compute 2-norm of k-th column without under/overflow.
s(k) = 0;
for (i = k; i < m; i++) {
s(k) = hypot(s(k), A(i, k));
}
if (s(k) != 0.0) {
if (A(k, k) < 0.0) {
s(k) = -s(k);
}
for (i = k; i < m; i++) {
A(i, k) /= s(k);
}
A(k, k) += 1.0;
}
s(k) = -s(k);
}
for (j = k+1; j < n; j++) {
if ((k < nct) && (s(k) != 0.0)) {
// Apply the transformation.
Real t = cblas_Xdot(m - k, adata + astride*k + k, astride,
adata + astride*k + j, astride);
/*for (i = k; i < m; i++) {
t += adata[i*astride + k]*adata[i*astride + j]; // A(i, k)*A(i, j); // 3
}*/
t = -t/A(k, k);
cblas_Xaxpy(m - k, t, adata + k*astride + k, astride,
adata + k*astride + j, astride);
/*for (i = k; i < m; i++) {
adata[i*astride + j] += t*adata[i*astride + k]; // A(i, j) += t*A(i, k); // 5
}*/
}
// Place the k-th row of A into e for the
// subsequent calculation of the row transformation.
e(j) = A(k, j);
}
if (wantu & (k < nct)) {
// Place the transformation in U for subsequent back
// multiplication.
for (i = k; i < m; i++) {
U(i, k) = A(i, k);
}
}
if (k < nrt) {
// Compute the k-th row transformation and place the
// k-th super-diagonal in e(k).
// Compute 2-norm without under/overflow.
e(k) = 0;
for (i = k+1; i < n; i++) {
e(k) = hypot(e(k), e(i));
}
if (e(k) != 0.0) {
if (e(k+1) < 0.0) {
e(k) = -e(k);
}
for (i = k+1; i < n; i++) {
e(i) /= e(k);
}
e(k+1) += 1.0;
}
e(k) = -e(k);
if ((k+1 < m) & (e(k) != 0.0)) {
// Apply the transformation.
for (i = k+1; i < m; i++) {
work(i) = 0.0;
}
for (j = k+1; j < n; j++) {
for (i = k+1; i < m; i++) {
workdata[i] += edata[j] * adata[i*astride + j]; // work(i) += e(j)*A(i, j); // 5
}
}
for (j = k+1; j < n; j++) {
Real t(-e(j)/e(k+1));
cblas_Xaxpy(m - (k+1), t, workdata + (k+1), 1,
adata + (k+1)*astride + j, astride);
/*
for (i = k+1; i < m; i++) {
adata[i*astride + j] += t*workdata[i]; // A(i, j) += t*work(i); // 5
}*/
}
}
if (wantv) {
// Place the transformation in V for subsequent
// back multiplication.
for (i = k+1; i < n; i++) {
V(i, k) = e(i);
}
}
}
}
// Set up the final bidiagonal matrix or order p.
int p = std::min(n, m+1);
if (nct < n) {
s(nct) = A(nct, nct);
}
if (m < p) {
s(p-1) = 0.0;
}
if (nrt+1 < p) {
e(nrt) = A(nrt, p-1);
}
e(p-1) = 0.0;
// If required, generate U.
if (wantu) {
for (j = nct; j < nu; j++) {
for (i = 0; i < m; i++) {
U(i, j) = 0.0;
}
U(j, j) = 1.0;
}
for (k = nct-1; k >= 0; k--) {
if (s(k) != 0.0) {
for (j = k+1; j < nu; j++) {
Real t = cblas_Xdot(m - k, udata + k*ustride + k, ustride, udata + k*ustride + j, ustride);
//for (i = k; i < m; i++) {
// t += udata[i*ustride + k]*udata[i*ustride + j]; // t += U(i, k)*U(i, j); // 8
// }
t = -t/U(k, k);
cblas_Xaxpy(m - k, t, udata + ustride*k + k, ustride,
udata + k*ustride + j, ustride);
/*for (i = k; i < m; i++) {
udata[i*ustride + j] += t*udata[i*ustride + k]; // U(i, j) += t*U(i, k); // 4
}*/
}
for (i = k; i < m; i++ ) {
U(i, k) = -U(i, k);
}
U(k, k) = 1.0 + U(k, k);
for (i = 0; i < k-1; i++) {
U(i, k) = 0.0;
}
} else {
for (i = 0; i < m; i++) {
U(i, k) = 0.0;
}
U(k, k) = 1.0;
}
}
}
// If required, generate V.
if (wantv) {
for (k = n-1; k >= 0; k--) {
if ((k < nrt) & (e(k) != 0.0)) {
for (j = k+1; j < nu; j++) {
Real t = cblas_Xdot(n - (k+1), vdata + (k+1)*vstride + k, vstride,
vdata + (k+1)*vstride + j, vstride);
/*Real t (0.0);
for (i = k+1; i < n; i++) {
t += vdata[i*vstride + k]*vdata[i*vstride + j]; // t += V(i, k)*V(i, j); // 7
}*/
t = -t/V(k+1, k);
cblas_Xaxpy(n - (k+1), t, vdata + (k+1)*vstride + k, vstride,
vdata + (k+1)*vstride + j, vstride);
/*for (i = k+1; i < n; i++) {
vdata[i*vstride + j] += t*vdata[i*vstride + k]; // V(i, j) += t*V(i, k); // 7
}*/
}
}
for (i = 0; i < n; i++) {
V(i, k) = 0.0;
}
V(k, k) = 1.0;
}
}
// Main iteration loop for the singular values.
int pp = p-1;
int iter = 0;
// note: -52.0 is from Jama code; the -23 is the extension
// to float, because mantissa length in (double, float)
// is (52, 23) bits respectively.
Real eps(pow(2.0, sizeof(Real) == 4 ? -23.0 : -52.0));
// Note: the -966 was taken from Jama code, but the -120 is a guess
// of how to extend this to float... the exponent in double goes
// from -1022 .. 1023, and in float from -126..127. I'm not sure
// what the significance of 966 is, so -120 just represents a number
// that's a bit less negative than -126. If we get convergence
// failure in float only, this may mean that we have to make the
// -120 value less negative.
Real tiny(pow(2.0, sizeof(Real) == 4 ? -120.0: -966.0 ));
while (p > 0) {
int k = 0;
int kase = 0;
if (iter == 500 || iter == 750) {
KALDI_WARN << "Svd taking a long time: making convergence criterion less exact.";
eps = pow(static_cast<Real>(0.8), eps);
tiny = pow(static_cast<Real>(0.8), tiny);
}
if (iter > 1000) {
KALDI_WARN << "Svd not converging on matrix of size " << m << " by " <<n;
return false;
}
// This section of the program inspects for
// negligible elements in the s and e arrays. On
// completion the variables kase and k are set as follows.
// kase = 1 if s(p) and e(k-1) are negligible and k < p
// kase = 2 if s(k) is negligible and k < p
// kase = 3 if e(k-1) is negligible, k < p, and
// s(k), ..., s(p) are not negligible (qr step).
// kase = 4 if e(p-1) is negligible (convergence).
for (k = p-2; k >= -1; k--) {
if (k == -1) {
break;
}
if (std::abs(e(k)) <=
tiny + eps*(std::abs(s(k)) + std::abs(s(k+1)))) {
e(k) = 0.0;
break;
}
}
if (k == p-2) {
kase = 4;
} else {
int ks;
for (ks = p-1; ks >= k; ks--) {
if (ks == k) {
break;
}
Real t( (ks != p ? std::abs(e(ks)) : 0.) +
(ks != k+1 ? std::abs(e(ks-1)) : 0.));
if (std::abs(s(ks)) <= tiny + eps*t) {
s(ks) = 0.0;
break;
}
}
if (ks == k) {
kase = 3;
} else if (ks == p-1) {
kase = 1;
} else {
kase = 2;
k = ks;
}
}
k++;
// Perform the task indicated by kase.
switch (kase) {
// Deflate negligible s(p).
case 1: {
Real f(e(p-2));
e(p-2) = 0.0;
for (j = p-2; j >= k; j--) {
Real t( hypot(s(j), f));
Real cs(s(j)/t);
Real sn(f/t);
s(j) = t;
if (j != k) {
f = -sn*e(j-1);
e(j-1) = cs*e(j-1);
}
if (wantv) {
for (i = 0; i < n; i++) {
t = cs*V(i, j) + sn*V(i, p-1);
V(i, p-1) = -sn*V(i, j) + cs*V(i, p-1);
V(i, j) = t;
}
}
}
}
break;
// Split at negligible s(k).
case 2: {
Real f(e(k-1));
e(k-1) = 0.0;
for (j = k; j < p; j++) {
Real t(hypot(s(j), f));
Real cs( s(j)/t);
Real sn(f/t);
s(j) = t;
f = -sn*e(j);
e(j) = cs*e(j);
if (wantu) {
for (i = 0; i < m; i++) {
t = cs*U(i, j) + sn*U(i, k-1);
U(i, k-1) = -sn*U(i, j) + cs*U(i, k-1);
U(i, j) = t;
}
}
}
}
break;
// Perform one qr step.
case 3: {
// Calculate the shift.
Real scale = std::max(std::max(std::max(std::max(
std::abs(s(p-1)), std::abs(s(p-2))), std::abs(e(p-2))),
std::abs(s(k))), std::abs(e(k)));
Real sp = s(p-1)/scale;
Real spm1 = s(p-2)/scale;
Real epm1 = e(p-2)/scale;
Real sk = s(k)/scale;
Real ek = e(k)/scale;
Real b = ((spm1 + sp)*(spm1 - sp) + epm1*epm1)/2.0;
Real c = (sp*epm1)*(sp*epm1);
Real shift = 0.0;
if ((b != 0.0) || (c != 0.0)) {
shift = std::sqrt(b*b + c);
if (b < 0.0) {
shift = -shift;
}
shift = c/(b + shift);
}
Real f = (sk + sp)*(sk - sp) + shift;
Real g = sk*ek;
// Chase zeros.
for (j = k; j < p-1; j++) {
Real t = hypot(f, g);
Real cs = f/t;
Real sn = g/t;
if (j != k) {
e(j-1) = t;
}
f = cs*s(j) + sn*e(j);
e(j) = cs*e(j) - sn*s(j);
g = sn*s(j+1);
s(j+1) = cs*s(j+1);
if (wantv) {
cblas_Xrot(n, vdata + j, vstride, vdata + j+1, vstride, cs, sn);
/*for (i = 0; i < n; i++) {
t = cs*vdata[i*vstride + j] + sn*vdata[i*vstride + j+1]; // t = cs*V(i, j) + sn*V(i, j+1); // 13
vdata[i*vstride + j+1] = -sn*vdata[i*vstride + j] + cs*vdata[i*vstride + j+1]; // V(i, j+1) = -sn*V(i, j) + cs*V(i, j+1); // 5
vdata[i*vstride + j] = t; // V(i, j) = t; // 4
}*/
}
t = hypot(f, g);
cs = f/t;
sn = g/t;
s(j) = t;
f = cs*e(j) + sn*s(j+1);
s(j+1) = -sn*e(j) + cs*s(j+1);
g = sn*e(j+1);
e(j+1) = cs*e(j+1);
if (wantu && (j < m-1)) {
cblas_Xrot(m, udata + j, ustride, udata + j+1, ustride, cs, sn);
/*for (i = 0; i < m; i++) {
t = cs*udata[i*ustride + j] + sn*udata[i*ustride + j+1]; // t = cs*U(i, j) + sn*U(i, j+1); // 7
udata[i*ustride + j+1] = -sn*udata[i*ustride + j] +cs*udata[i*ustride + j+1]; // U(i, j+1) = -sn*U(i, j) + cs*U(i, j+1); // 8
udata[i*ustride + j] = t; // U(i, j) = t; // 1
}*/
}
}
e(p-2) = f;
iter = iter + 1;
}
break;
// Convergence.
case 4: {
// Make the singular values positive.
if (s(k) <= 0.0) {
s(k) = (s(k) < 0.0 ? -s(k) : 0.0);
if (wantv) {
for (i = 0; i <= pp; i++) {
V(i, k) = -V(i, k);
}
}
}
// Order the singular values.
while (k < pp) {
if (s(k) >= s(k+1)) {
break;
}
Real t = s(k);
s(k) = s(k+1);
s(k+1) = t;
if (wantv && (k < n-1)) {
for (i = 0; i < n; i++) {
t = V(i, k+1); V(i, k+1) = V(i, k); V(i, k) = t;
}
}
if (wantu && (k < m-1)) {
for (i = 0; i < m; i++) {
t = U(i, k+1); U(i, k+1) = U(i, k); U(i, k) = t;
}
}
k++;
}
iter = 0;
p--;
}
break;
}
}
return true;
}
#endif // defined(HAVE_ATLAS) || defined(USE_KALDI_SVD)
} // namespace kaldi
#endif // KALDI_MATRIX_JAMA_SVD_H_
// matrix/kaldi-blas.h
// Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_MATRIX_KALDI_BLAS_H_
#define KALDI_MATRIX_KALDI_BLAS_H_
// This file handles the #includes for BLAS, LAPACK and so on.
// It manipulates the declarations into a common format that kaldi can handle.
// However, the kaldi code will check whether HAVE_ATLAS is defined as that
// code is called a bit differently from CLAPACK that comes from other sources.
// There are three alternatives:
// (i) you have ATLAS, which includes the ATLAS implementation of CBLAS
// plus a subset of CLAPACK (but with clapack_ in the function declarations).
// In this case, define HAVE_ATLAS and make sure the relevant directories are
// in the include path.
// (ii) you have CBLAS (some implementation thereof) plus CLAPACK.
// In this case, define HAVE_CLAPACK.
// [Since CLAPACK depends on BLAS, the presence of BLAS is implicit].
// (iii) you have the MKL library, which includes CLAPACK and CBLAS.
// Note that if we are using ATLAS, no Svd implementation is supplied,
// so we define HAVE_Svd to be zero and this directs our implementation to
// supply its own "by hand" implementation which is based on TNT code.
#define HAVE_OPENBLAS
#if (defined(HAVE_CLAPACK) && (defined(HAVE_ATLAS) || defined(HAVE_MKL))) \
|| (defined(HAVE_ATLAS) && defined(HAVE_MKL))
#error "Do not define more than one of HAVE_CLAPACK, HAVE_ATLAS and HAVE_MKL"
#endif
#ifdef HAVE_ATLAS
extern "C" {
#include "cblas.h"
#include "clapack.h"
}
#elif defined(HAVE_CLAPACK)
#ifdef __APPLE__
#ifndef __has_extension
#define __has_extension(x) 0
#endif
#define vImage_Utilities_h
#define vImage_CVUtilities_h
#include <Accelerate/Accelerate.h>
typedef __CLPK_integer integer;
typedef __CLPK_logical logical;
typedef __CLPK_real real;
typedef __CLPK_doublereal doublereal;
typedef __CLPK_complex complex;
typedef __CLPK_doublecomplex doublecomplex;
typedef __CLPK_ftnlen ftnlen;
#else
extern "C" {
// May be in /usr/[local]/include if installed; else this uses the one
// from the tools/CLAPACK_include directory.
#include <cblas.h>
#include <f2c.h>
#include <clapack.h>
// get rid of macros from f2c.h -- these are dangerous.
#undef abs
#undef dabs
#undef min
#undef max
#undef dmin
#undef dmax
#undef bit_test
#undef bit_clear
#undef bit_set
}
#endif
#elif defined(HAVE_MKL)
extern "C" {
#include <mkl.h>
}
#elif defined(HAVE_OPENBLAS)
// getting cblas.h and lapacke.h from <openblas-install-dir>/.
// putting in "" not <> to search -I before system libraries.
#if defined(_MSC_VER)
#include <complex.h>
#define LAPACK_COMPLEX_CUSTOM
#define lapack_complex_float _Fcomplex
#define lapack_complex_double _Dcomplex
#endif
#include "cblas.h"
#include "lapacke.h"
#undef I
#undef complex
// get rid of macros from f2c.h -- these are dangerous.
#undef abs
#undef dabs
#undef min
#undef max
#undef dmin
#undef dmax
#undef bit_test
#undef bit_clear
#undef bit_set
#else
#error "You need to define (using the preprocessor) either HAVE_CLAPACK or HAVE_ATLAS or HAVE_MKL (but not more than one)"
#endif
#ifdef HAVE_OPENBLAS
typedef int KaldiBlasInt; // try int.
#endif
#ifdef HAVE_CLAPACK
typedef integer KaldiBlasInt;
#endif
#ifdef HAVE_MKL
typedef MKL_INT KaldiBlasInt;
#endif
#ifdef HAVE_ATLAS
// in this case there is no need for KaldiBlasInt-- this typedef is only needed
// for Svd code which is not included in ATLAS (we re-implement it).
#endif
#endif // KALDI_MATRIX_KALDI_BLAS_H_
// matrix/matrix-functions-inl.h
// Copyright 2009-2011 Microsoft Corporation
//
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
//
// (*) incorporates, with permission, FFT code from his book
// "Signal Processing with Lapped Transforms", Artech, 1992.
#ifndef KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_
#define KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_
namespace kaldi {
//! ComplexMul implements, inline, the complex multiplication b *= a.
template<typename Real> inline void ComplexMul(const Real &a_re, const Real &a_im,
Real *b_re, Real *b_im) {
Real tmp_re = (*b_re * a_re) - (*b_im * a_im);
*b_im = *b_re * a_im + *b_im * a_re;
*b_re = tmp_re;
}
template<typename Real> inline void ComplexAddProduct(const Real &a_re, const Real &a_im,
const Real &b_re, const Real &b_im,
Real *c_re, Real *c_im) {
*c_re += b_re*a_re - b_im*a_im;
*c_im += b_re*a_im + b_im*a_re;
}
template<typename Real> inline void ComplexImExp(Real x, Real *a_re, Real *a_im) {
*a_re = std::cos(x);
*a_im = std::sin(x);
}
} // end namespace kaldi
#endif // KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_
// matrix/matrix-functions.cc
// Copyright 2009-2011 Microsoft Corporation; Go Vivace Inc.; Jan Silovsky
// Yanmin Qian; Saarland University; Johns Hopkins University (Author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
//
// (*) incorporates, with permission, FFT code from his book
// "Signal Processing with Lapped Transforms", Artech, 1992.
#include "matrix/matrix-functions.h"
#include "matrix/sp-matrix.h"
namespace kaldi {
template<typename Real> void ComplexFt (const VectorBase<Real> &in,
VectorBase<Real> *out, bool forward) {
int exp_sign = (forward ? -1 : 1);
KALDI_ASSERT(out != NULL);
KALDI_ASSERT(in.Dim() == out->Dim());
KALDI_ASSERT(in.Dim() % 2 == 0);
int twoN = in.Dim(), N = twoN / 2;
const Real *data_in = in.Data();
Real *data_out = out->Data();
Real exp1N_re, exp1N_im; // forward -> exp(-2pi / N), backward -> exp(2pi / N).
Real fraction = exp_sign * M_2PI / static_cast<Real>(N); // forward -> -2pi/N, backward->-2pi/N
ComplexImExp(fraction, &exp1N_re, &exp1N_im);
Real expm_re = 1.0, expm_im = 0.0; // forward -> exp(-2pi m / N).
for (int two_m = 0; two_m < twoN; two_m+=2) { // For each output component.
Real expmn_re = 1.0, expmn_im = 0.0; // forward -> exp(-2pi m n / N).
Real sum_re = 0.0, sum_im = 0.0; // complex output for index m (the sum expression)
for (int two_n = 0; two_n < twoN; two_n+=2) {
ComplexAddProduct(data_in[two_n], data_in[two_n+1],
expmn_re, expmn_im,
&sum_re, &sum_im);
ComplexMul(expm_re, expm_im, &expmn_re, &expmn_im);
}
data_out[two_m] = sum_re;
data_out[two_m + 1] = sum_im;
if (two_m % 10 == 0) { // occasionally renew "expm" from scratch to avoid
// loss of precision.
int nextm = 1 + two_m/2;
Real fraction_mult = fraction * nextm;
ComplexImExp(fraction_mult, &expm_re, &expm_im);
} else {
ComplexMul(exp1N_re, exp1N_im, &expm_re, &expm_im);
}
}
}
template
void ComplexFt (const VectorBase<float> &in,
VectorBase<float> *out, bool forward);
template
void ComplexFt (const VectorBase<double> &in,
VectorBase<double> *out, bool forward);
#define KALDI_COMPLEXFFT_BLOCKSIZE 8192
// This #define affects how we recurse in ComplexFftRecursive.
// We assume that memory-caching happens on a scale at
// least as small as this.
//! ComplexFftRecursive is a recursive function that computes the
//! complex FFT of size N. The "nffts" arguments specifies how many
//! separate FFTs to compute in parallel (we assume the data for
//! each one is consecutive in memory). The "forward argument"
//! specifies whether to do the FFT (true) or IFFT (false), although
//! note that we do not include the factor of 1/N (the user should
//! do this if required. The iterators factor_begin and factor_end
//! point to the beginning and end (i.e. one past the last element)
//! of an array of small factors of N (typically prime factors).
//! See the comments below this code for the detailed equations
//! of the recursion.
template<typename Real>
void ComplexFftRecursive (Real *data, int nffts, int N,
const int *factor_begin,
const int *factor_end, bool forward,
Vector<Real> *tmp_vec) {
if (factor_begin == factor_end) {
KALDI_ASSERT(N == 1);
return;
}
{ // an optimization: compute in smaller blocks.
// this block of code could be removed and it would still work.
MatrixIndexT size_perblock = N * 2 * sizeof(Real);
if (nffts > 1 && size_perblock*nffts > KALDI_COMPLEXFFT_BLOCKSIZE) { // can break it up...
// Break up into multiple blocks. This is an optimization. We make
// no progress on the FFT when we do this.
int block_skip = KALDI_COMPLEXFFT_BLOCKSIZE / size_perblock; // n blocks per call
if (block_skip == 0) block_skip = 1;
if (block_skip < nffts) {
int blocks_left = nffts;
while (blocks_left > 0) {
int skip_now = std::min(blocks_left, block_skip);
ComplexFftRecursive(data, skip_now, N, factor_begin, factor_end, forward, tmp_vec);
blocks_left -= skip_now;
data += skip_now * N*2;
}
return;
} // else do the actual algorithm.
} // else do the actual algorithm.
}
int P = *factor_begin;
KALDI_ASSERT(P > 1);
int Q = N / P;
if (P > 1 && Q > 1) { // Do the rearrangement. C.f. eq. (8) below. Transform
// (a) to (b).
Real *data_thisblock = data;
if (tmp_vec->Dim() < (MatrixIndexT)N) tmp_vec->Resize(N);
Real *data_tmp = tmp_vec->Data();
for (int thisfft = 0; thisfft < nffts; thisfft++, data_thisblock+=N*2) {
for (int offset = 0; offset < 2; offset++) { // 0 == real, 1 == im.
for (int p = 0; p < P; p++) {
for (int q = 0; q < Q; q++) {
int aidx = q*P + p, bidx = p*Q + q;
data_tmp[bidx] = data_thisblock[2*aidx+offset];
}
}
for (int n = 0;n < P*Q;n++) data_thisblock[2*n+offset] = data_tmp[n];
}
}
}
{ // Recurse.
ComplexFftRecursive(data, nffts*P, Q, factor_begin+1, factor_end, forward, tmp_vec);
}
int exp_sign = (forward ? -1 : 1);
Real rootN_re, rootN_im; // Nth root of unity.
ComplexImExp(static_cast<Real>(exp_sign * M_2PI / N), &rootN_re, &rootN_im);
Real rootP_re, rootP_im; // Pth root of unity.
ComplexImExp(static_cast<Real>(exp_sign * M_2PI / P), &rootP_re, &rootP_im);
{ // Do the multiplication
// could avoid a bunch of complex multiplies by moving the loop over data_thisblock
// inside.
if (tmp_vec->Dim() < (MatrixIndexT)(P*2)) tmp_vec->Resize(P*2);
Real *temp_a = tmp_vec->Data();
Real *data_thisblock = data, *data_end = data+(N*2*nffts);
for (; data_thisblock != data_end; data_thisblock += N*2) { // for each separate fft.
Real qd_re = 1.0, qd_im = 0.0; // 1^(q'/N)
for (int qd = 0; qd < Q; qd++) {
Real pdQ_qd_re = qd_re, pdQ_qd_im = qd_im; // 1^((p'Q+q') / N) == 1^((p'/P) + (q'/N))
// Initialize to q'/N, corresponding to p' == 0.
for (int pd = 0; pd < P; pd++) { // pd == p'
{ // This is the p = 0 case of the loop below [an optimization].
temp_a[pd*2] = data_thisblock[qd*2];
temp_a[pd*2 + 1] = data_thisblock[qd*2 + 1];
}
{ // This is the p = 1 case of the loop below [an optimization]
// **** MOST OF THE TIME (>60% I think) gets spent here. ***
ComplexAddProduct(pdQ_qd_re, pdQ_qd_im,
data_thisblock[(qd+Q)*2], data_thisblock[(qd+Q)*2 + 1],
&(temp_a[pd*2]), &(temp_a[pd*2 + 1]));
}
if (P > 2) {
Real p_pdQ_qd_re = pdQ_qd_re, p_pdQ_qd_im = pdQ_qd_im; // 1^(p(p'Q+q')/N)
for (int p = 2; p < P; p++) {
ComplexMul(pdQ_qd_re, pdQ_qd_im, &p_pdQ_qd_re, &p_pdQ_qd_im); // p_pdQ_qd *= pdQ_qd.
int data_idx = p*Q + qd;
ComplexAddProduct(p_pdQ_qd_re, p_pdQ_qd_im,
data_thisblock[data_idx*2], data_thisblock[data_idx*2 + 1],
&(temp_a[pd*2]), &(temp_a[pd*2 + 1]));
}
}
if (pd != P-1)
ComplexMul(rootP_re, rootP_im, &pdQ_qd_re, &pdQ_qd_im); // pdQ_qd *= (rootP == 1^{1/P})
// (using 1/P == Q/N)
}
for (int pd = 0; pd < P; pd++) {
data_thisblock[(pd*Q + qd)*2] = temp_a[pd*2];
data_thisblock[(pd*Q + qd)*2 + 1] = temp_a[pd*2 + 1];
}
ComplexMul(rootN_re, rootN_im, &qd_re, &qd_im); // qd *= rootN.
}
}
}
}
/* Equations for ComplexFftRecursive.
We consider here one of the "nffts" separate ffts; it's just a question of
doing them all in parallel. We also write all equations in terms of
complex math (the conversion to real arithmetic is not hard, and anyway
takes place inside function calls).
Let the input (i.e. "data" at start) be a_n, n = 0..N-1, and
the output (Fourier transform) be d_k, k = 0..N-1. We use these letters because
there will be two intermediate variables b and c.
We want to compute:
d_k = \sum_n a_n 1^(kn/N) (1)
where we use 1^x as shorthand for exp(-2pi x) for the forward algorithm
and exp(2pi x) for the backward one.
We factorize N = P Q (P small, Q usually large).
With p = 0..P-1 and q = 0..Q-1, and also p'=0..P-1 and q'=0..P-1, we let:
k == p'Q + q' (2)
n == qP + p (3)
That is, we let p, q, p', q' range over these indices and observe that this way we
can cover all n, k. Expanding (1) using (2) and (3), we can write:
d_k = \sum_{p, q} a_n 1^((p'Q+q')(qP+p)/N)
= \sum_{p, q} a_n 1^(p'pQ/N) 1^(q'qP/N) 1^(q'p/N) (4)
using 1^(PQ/N) = 1 to get rid of the terms with PQ in them. Rearranging (4),
d_k = \sum_p 1^(p'pQ/N) 1^(q'p/N) \sum_q 1^(q'qP/N) a_n (5)
The point here is to separate the index q. Now we can expand out the remaining
instances of k and n using (2) and (3):
d_(p'Q+q') = \sum_p 1^(p'pQ/N) 1^(q'p/N) \sum_q 1^(q'qP/N) a_(qP+p) (6)
The expression \sum_q varies with the indices p and q'. Let us define
C_{p, q'} = \sum_q 1^(q'qP/N) a_(qP+p) (7)
Here, C_{p, q'}, viewed as a sequence in q', is just the DFT of the points
a_(qP+p) for q = 1..Q-1. These points are not consecutive in memory though,
they jump by P each time. Let us define b as a rearranged version of a,
so that
b_(pQ+q) = a_(qP+p) (8)
How to do this rearrangement in place? In
We can rearrange (7) to be written in terms of the b's, using (8), so that
C_{p, q'} = \sum_q 1^(q'q (P/N)) b_(pQ+q) (9)
Here, the sequence of C_{p, q'} over q'=0..Q-1, is just the DFT of the sequence
of b_(pQ) .. b_(p(Q+1)-1). Let's arrange the C_{p, q'} in a single array in
memory in the same way as the b's, i.e. we define
c_(pQ+q') == C_{p, q'}. (10)
Note that we could have written (10) with q in place of q', as there is only
one index of type q present, but q' is just a more natural variable name to use
since we use q' elsewhere to subscript c and C.
Rewriting (9), we have:
c_(pQ+q') = \sum_q 1^(q'q (P/N)) b_(pQ+q) (11)
which is the DFT computed by the recursive call to this function [after computing
the b's by rearranging the a's]. From the c's we want to compute the d's.
Taking (6), substituting in the sum (7), and using (10) to write it as an array,
we have:
d_(p'Q+q') = \sum_p 1^(p'pQ/N) 1^(q'p/N) c_(pQ+q') (12)
This sum is independent for different values of q'. Note that d overwrites c
in memory. We compute this in a direct way, using a little array of size P to
store the computed d values for one value of q' (we reuse the array for each value
of q').
So the overall picture is this:
We get a call to compute DFT on size N.
- If N == 1 we return (nothing to do).
- We factor N = P Q (typically, P is small).
- Using (8), we rearrange the data in memory so that we have b not a in memory
(this is the block "do the rearrangement").
The pseudocode for this is as follows. For simplicity we use a temporary array.
for p = 0..P-1
for q = 0..Q-1
bidx = pQ + q
aidx = qP + p
tmp[bidx] = data[aidx].
end
end
data <-- tmp
else
endif
The reason this accomplishes (8) is that we want pQ+q and qP+p to be swapped
over for each p, q, and the "if m > n" is a convenient way of ensuring that
this swapping happens only once (otherwise it would happen twice, since pQ+q
and qP+p both range over the entire set of numbers 0..N-1).
- We do the DFT on the smaller block size to compute c from b (this eq eq. (11)).
Note that this is actually multiple DFTs, one for each value of p, but this
goes to the "nffts" argument of the function call, which we have ignored up to now.
-We compute eq. (12) via a loop, as follows
allocate temporary array e of size P.
For q' = 0..Q-1:
for p' = 0..P-1:
set sum to zero [this will go in e[p']]
for p = p..P-1:
sum += 1^(p'pQ/N) 1^(q'p/N) c_(pQ+q')
end
e[p'] = sum
end
for p' = 0..P-1:
d_(p'Q+q') = e[p']
end
end
delete temporary array e
*/
// This is the outer-layer calling code for ComplexFftRecursive.
// It factorizes the dimension and then calls the FFT routine.
template<typename Real> void ComplexFft(VectorBase<Real> *v, bool forward, Vector<Real> *tmp_in) {
KALDI_ASSERT(v != NULL);
if (v->Dim()<=1) return;
KALDI_ASSERT(v->Dim() % 2 == 0); // complex input.
int N = v->Dim() / 2;
std::vector<int> factors;
Factorize(N, &factors);
int *factor_beg = NULL;
if (factors.size() > 0)
factor_beg = &(factors[0]);
Vector<Real> tmp; // allocated in ComplexFftRecursive.
ComplexFftRecursive(v->Data(), 1, N, factor_beg, factor_beg+factors.size(), forward, (tmp_in?tmp_in:&tmp));
}
//! Inefficient version of Fourier transform, for testing purposes.
template<typename Real> void RealFftInefficient (VectorBase<Real> *v, bool forward) {
KALDI_ASSERT(v != NULL);
MatrixIndexT N = v->Dim();
KALDI_ASSERT(N%2 == 0);
if (N == 0) return;
Vector<Real> vtmp(N*2); // store as complex.
if (forward) {
for (MatrixIndexT i = 0; i < N; i++) vtmp(i*2) = (*v)(i);
ComplexFft(&vtmp, forward); // this is already tested so we can use this.
v->CopyFromVec( vtmp.Range(0, N) );
(*v)(1) = vtmp(N); // Copy the N/2'th fourier component, which is real,
// to the imaginary part of the 1st complex output.
} else {
// reverse the transformation above to get the complex spectrum.
vtmp(0) = (*v)(0); // copy F_0 which is real
vtmp(N) = (*v)(1); // copy F_{N/2} which is real
for (MatrixIndexT i = 1; i < N/2; i++) {
// Copy i'th to i'th fourier component
vtmp(2*i) = (*v)(2*i);
vtmp(2*i+1) = (*v)(2*i+1);
// Copy i'th to N-i'th, conjugated.
vtmp(2*(N-i)) = (*v)(2*i);
vtmp(2*(N-i)+1) = -(*v)(2*i+1);
}
ComplexFft(&vtmp, forward); // actually backward since forward == false
// Copy back real part. Complex part should be zero.
for (MatrixIndexT i = 0; i < N; i++)
(*v)(i) = vtmp(i*2);
}
}
template void RealFftInefficient (VectorBase<float> *v, bool forward);
template void RealFftInefficient (VectorBase<double> *v, bool forward);
template
void ComplexFft(VectorBase<float> *v, bool forward, Vector<float> *tmp_in);
template
void ComplexFft(VectorBase<double> *v, bool forward, Vector<double> *tmp_in);
// See the long comment below for the math behind this.
template<typename Real> void RealFft (VectorBase<Real> *v, bool forward) {
KALDI_ASSERT(v != NULL);
MatrixIndexT N = v->Dim(), N2 = N/2;
KALDI_ASSERT(N%2 == 0);
if (N == 0) return;
if (forward) ComplexFft(v, true);
Real *data = v->Data();
Real rootN_re, rootN_im; // exp(-2pi/N), forward; exp(2pi/N), backward
int forward_sign = forward ? -1 : 1;
ComplexImExp(static_cast<Real>(M_2PI/N *forward_sign), &rootN_re, &rootN_im);
Real kN_re = -forward_sign, kN_im = 0.0; // exp(-2pik/N), forward; exp(-2pik/N), backward
// kN starts out as 1.0 for forward algorithm but -1.0 for backward.
for (MatrixIndexT k = 1; 2*k <= N2; k++) {
ComplexMul(rootN_re, rootN_im, &kN_re, &kN_im);
Real Ck_re, Ck_im, Dk_re, Dk_im;
// C_k = 1/2 (B_k + B_{N/2 - k}^*) :
Ck_re = 0.5 * (data[2*k] + data[N - 2*k]);
Ck_im = 0.5 * (data[2*k + 1] - data[N - 2*k + 1]);
// re(D_k)= 1/2 (im(B_k) + im(B_{N/2-k})):
Dk_re = 0.5 * (data[2*k + 1] + data[N - 2*k + 1]);
// im(D_k) = -1/2 (re(B_k) - re(B_{N/2-k}))
Dk_im =-0.5 * (data[2*k] - data[N - 2*k]);
// A_k = C_k + 1^(k/N) D_k:
data[2*k] = Ck_re; // A_k <-- C_k
data[2*k+1] = Ck_im;
// now A_k += D_k 1^(k/N)
ComplexAddProduct(Dk_re, Dk_im, kN_re, kN_im, &(data[2*k]), &(data[2*k+1]));
MatrixIndexT kdash = N2 - k;
if (kdash != k) {
// Next we handle the index k' = N/2 - k. This is necessary
// to do now, to avoid invalidating data that we will later need.
// The quantities C_{k'} and D_{k'} are just the conjugates of C_k
// and D_k, so the equations are simple modifications of the above,
// replacing Ck_im and Dk_im with their negatives.
data[2*kdash] = Ck_re; // A_k' <-- C_k'
data[2*kdash+1] = -Ck_im;
// now A_k' += D_k' 1^(k'/N)
// We use 1^(k'/N) = 1^((N/2 - k) / N) = 1^(1/2) 1^(-k/N) = -1 * (1^(k/N))^*
// so it's the same as 1^(k/N) but with the real part negated.
ComplexAddProduct(Dk_re, -Dk_im, -kN_re, kN_im, &(data[2*kdash]), &(data[2*kdash+1]));
}
}
{ // Now handle k = 0.
// In simple terms: after the complex fft, data[0] becomes the sum of real
// parts input[0], input[2]... and data[1] becomes the sum of imaginary
// pats input[1], input[3]...
// "zeroth" [A_0] is just the sum of input[0]+input[1]+input[2]..
// and "n2th" [A_{N/2}] is input[0]-input[1]+input[2]... .
Real zeroth = data[0] + data[1],
n2th = data[0] - data[1];
data[0] = zeroth;
data[1] = n2th;
if (!forward) {
data[0] /= 2;
data[1] /= 2;
}
}
if (!forward) {
ComplexFft(v, false);
v->Scale(2.0); // This is so we get a factor of N increase, rather than N/2 which we would
// otherwise get from [ComplexFft, forward] + [ComplexFft, backward] in dimension N/2.
// It's for consistency with our normal FFT convensions.
}
}
template void RealFft (VectorBase<float> *v, bool forward);
template void RealFft (VectorBase<double> *v, bool forward);
/* Notes for real FFTs.
We are using the same convention as above, 1^x to mean exp(-2\pi x) for the forward transform.
Actually, in a slight abuse of notation, we use this meaning for 1^x in both the forward and
backward cases because it's more convenient in this section.
Suppose we have real data a[0...N-1], with N even, and want to compute its Fourier transform.
We can make do with the first N/2 points of the transform, since the remaining ones are complex
conjugates of the first. We want to compute:
for k = 0...N/2-1,
A_k = \sum_{n = 0}^{N-1} a_n 1^(kn/N) (1)
We treat a[0..N-1] as a complex sequence of length N/2, i.e. a sequence b[0..N/2 - 1].
Viewed as sequences of length N/2, we have:
b = c + i d,
where c = a_0, a_2 ... and d = a_1, a_3 ...
We can recover the length-N/2 Fourier transforms of c and d by doing FT on b and
then doing the equations below. Derivation is marked by (*) in a comment below (search
for it). Let B, C, D be the FTs.
We have
C_k = 1/2 (B_k + B_{N/2 - k}^*) (z0)
D_k =-1/2i (B_k - B_{N/2 - k}^*) (z1)
so: re(D_k)= 1/2 (im(B_k) + im(B_{N/2-k})) (z2)
im(D_k) = -1/2 (re(B_k) - re(B_{N/2-k})) (z3)
To recover the FT A from C and D, we write, rearranging (1):
A_k = \sum_{n = 0, 2, ..., N-2} a_n 1^(kn/N)
+\sum_{n = 1, 3, ..., N-1} a_n 1^(kn/N)
= \sum_{n = 0, 1, ..., N/2-1} a_n 1^(2kn/N) + a_{n+1} 1^(2kn/N) 1^(k/N)
= \sum_{n = 0, 1, ..., N/2-1} c_n 1^(2kn/N) + d_n 1^(2kn/N) 1^(k/N)
A_k = C_k + 1^(k/N) D_k (a0)
This equation is valid for k = 0...N/2-1, which is the range of the sequences B_k and
C_k. We don't use is for k = 0, which is a special case considered below. For
1 < k < N/2, it's convenient to consider the pair k, k', where k' = N/2 - k.
Remember that C_k' = C_k^ *and D_k' = D_k^* [where * is conjugation]. Also,
1^(N/2 / N) = -1. So we have:
A_k' = C_k^* - 1^(k/N) D_k^* (a0b)
We do (a0) and (a0b) together.
By symmetry this gives us the Fourier components for N/2+1, ... N, if we want
them. However, it doesn't give us the value for exactly k = N/2. For k = 0 and k = N/2, it
is easiest to argue directly about the meaning of the A_k, B_k and C_k in terms of
sums of points.
A_0 and A_{N/2} are both real, with A_0=\sum_n a_n, and A_1 an alternating sum
A_1 = a_0 - a_1 + a_2 ...
It's easy to show that
A_0 = B_0 + C_0 (a1)
A_{N/2} = B_0 - C_0. (a2)
Since B_0 and C_0 are both real, B_0 is the real coefficient of D_0 and C_0 is the
imaginary coefficient.
*REVERSING THE PROCESS*
Next we want to reverse this process. We just need to work out C_k and D_k from the
sequence A_k. Then we do the inverse complex fft and we get back where we started.
For 0 and N/2, working from (a1) and (a2) above, we can see that:
B_0 = 1/2 (A_0 + A_{N/2}) (y0)
C_0 = 1/2 (A_0 + A_{N/2}) (y1)
and we use
D_0 = B_0 + i C_0
to get the 1st complex coefficient of D. This is exactly the same as the forward process
except with an extra factor of 1/2.
Consider equations (a0) and (a0b). We want to work out C_k and D_k from A_k and A_k'. Remember
k' = N/2 - k.
Write down
A_k = C_k + 1^(k/N) D_k (copying a0)
A_k'^* = C_k - 1^(k/N) D_k (conjugate of a0b)
So
C_k = 0.5 (A_k + A_k'^*) (p0)
D_k = 1^(-k/N) . 0.5 (A_k - A_k'^*) (p1)
Next, we want to compute B_k and B_k' from C_k and D_k. C.f. (z0)..(z3), and remember
that k' = N/2-k. We can see
that
B_k = C_k + i D_k (p2)
B_k' = C_k - i D_k (p3)
We would like to make the equations (p0) ... (p3) look like the forward equations (z0), (z1),
(a0) and (a0b) so we can reuse the code. Define E_k = -i 1^(k/N) D_k. Then write down (p0)..(p3).
We have
C_k = 0.5 (A_k + A_k'^*) (p0')
E_k = -0.5 i (A_k - A_k'^*) (p1')
B_k = C_k - 1^(-k/N) E_k (p2')
B_k' = C_k + 1^(-k/N) E_k (p3')
So these are exactly the same as (z0), (z1), (a0), (a0b) except replacing 1^(k/N) with
-1^(-k/N) . Remember that we defined 1^x above to be exp(-2pi x/N), so the signs here
might be opposite to what you see in the code.
MODIFICATION: we need to take care of a factor of two. The complex FFT we implemented
does not divide by N in the reverse case. So upon inversion we get larger by N/2.
However, this is not consistent with normal FFT conventions where you get a factor of N.
For this reason we multiply by two after the process described above.
*/
/*
(*) [this token is referred to in a comment above].
Notes for separating 2 real transforms from one complex one. Note that the
letters here (A, B, C and N) are all distinct from the same letters used in the
place where this comment is used.
Suppose we
have two sequences a_n and b_n, n = 0..N-1. We combine them into a complex
number,
c_n = a_n + i b_n.
Then we take the fourier transform to get
C_k = \sum_{n = 0}^{N-1} c_n 1^(n/N) .
Then we use symmetry. Define A_k and B_k as the DFTs of a and b.
We use A_k = A_{N-k}^*, and B_k = B_{N-k}^*, since a and b are real. Using
C_k = A_k + i B_k,
C_{N-k} = A_k^* + i B_k^*
= A_k^* - (i B_k)^*
So:
A_k = 1/2 (C_k + C_{N-k}^*)
i B_k = 1/2 (C_k - C_{N-k}^*)
-> B_k =-1/2i (C_k - C_{N-k}^*)
-> re(B_k) = 1/2 (im(C_k) + im(C_{N-k}))
im(B_k) =-1/2 (re(C_k) - re(C_{N-k}))
*/
template<typename Real> void ComputeDctMatrix(Matrix<Real> *M) {
//KALDI_ASSERT(M->NumRows() == M->NumCols());
MatrixIndexT K = M->NumRows();
MatrixIndexT N = M->NumCols();
KALDI_ASSERT(K > 0);
KALDI_ASSERT(N > 0);
Real normalizer = std::sqrt(1.0 / static_cast<Real>(N)); // normalizer for
// X_0.
for (MatrixIndexT j = 0; j < N; j++) (*M)(0, j) = normalizer;
normalizer = std::sqrt(2.0 / static_cast<Real>(N)); // normalizer for other
// elements.
for (MatrixIndexT k = 1; k < K; k++)
for (MatrixIndexT n = 0; n < N; n++)
(*M)(k, n) = normalizer
* std::cos( static_cast<double>(M_PI)/N * (n + 0.5) * k );
}
template void ComputeDctMatrix(Matrix<float> *M);
template void ComputeDctMatrix(Matrix<double> *M);
template<typename Real>
void ComputePca(const MatrixBase<Real> &X,
MatrixBase<Real> *U,
MatrixBase<Real> *A,
bool print_eigs,
bool exact) {
// Note that some of these matrices may be transposed w.r.t. the
// way it's most natural to describe them in math... it's the rows
// of X and U that correspond to the (data-points, basis elements).
MatrixIndexT N = X.NumRows(), D = X.NumCols();
// N = #points, D = feature dim.
KALDI_ASSERT(U != NULL && U->NumCols() == D);
MatrixIndexT G = U->NumRows(); // # of retained basis elements.
KALDI_ASSERT(A == NULL || (A->NumRows() == N && A->NumCols() == G));
KALDI_ASSERT(G <= N && G <= D);
if (D < N) { // Do conventional PCA.
SpMatrix<Real> Msp(D); // Matrix of outer products.
Msp.AddMat2(1.0, X, kTrans, 0.0); // M <-- X^T X
Matrix<Real> Utmp;
Vector<Real> l;
if (exact) {
Utmp.Resize(D, D);
l.Resize(D);
//Matrix<Real> M(Msp);
//M.DestructiveSvd(&l, &Utmp, NULL);
Msp.Eig(&l, &Utmp);
} else {
Utmp.Resize(D, G);
l.Resize(G);
Msp.TopEigs(&l, &Utmp);
}
SortSvd(&l, &Utmp);
for (MatrixIndexT g = 0; g < G; g++)
U->Row(g).CopyColFromMat(Utmp, g);
if (print_eigs)
KALDI_LOG << (exact ? "" : "Retained ")
<< "PCA eigenvalues are " << l;
if (A != NULL)
A->AddMatMat(1.0, X, kNoTrans, *U, kTrans, 0.0);
} else { // Do inner-product PCA.
SpMatrix<Real> Nsp(N); // Matrix of inner products.
Nsp.AddMat2(1.0, X, kNoTrans, 0.0); // M <-- X X^T
Matrix<Real> Vtmp;
Vector<Real> l;
if (exact) {
Vtmp.Resize(N, N);
l.Resize(N);
Matrix<Real> Nmat(Nsp);
Nmat.DestructiveSvd(&l, &Vtmp, NULL);
} else {
Vtmp.Resize(N, G);
l.Resize(G);
Nsp.TopEigs(&l, &Vtmp);
}
MatrixIndexT num_zeroed = 0;
for (MatrixIndexT g = 0; g < G; g++) {
if (l(g) < 0.0) {
KALDI_WARN << "In PCA, setting element " << l(g) << " to zero.";
l(g) = 0.0;
num_zeroed++;
}
}
SortSvd(&l, &Vtmp); // Make sure zero elements are last, this
// is necessary for Orthogonalize() to work properly later.
Vtmp.Transpose(); // So eigenvalues are the rows.
for (MatrixIndexT g = 0; g < G; g++) {
Real sqrtlg = sqrt(l(g));
if (l(g) != 0.0) {
U->Row(g).AddMatVec(1.0 / sqrtlg, X, kTrans, Vtmp.Row(g), 0.0);
} else {
U->Row(g).SetZero();
(*U)(g, g) = 1.0; // arbitrary direction. Will later orthogonalize.
}
if (A != NULL)
for (MatrixIndexT n = 0; n < N; n++)
(*A)(n, g) = sqrtlg * Vtmp(g, n);
}
// Now orthogonalize. This is mainly useful in
// case there were zero eigenvalues, but we do it
// for all of them.
U->OrthogonalizeRows();
if (print_eigs)
KALDI_LOG << "(inner-product) PCA eigenvalues are " << l;
}
}
template
void ComputePca(const MatrixBase<float> &X,
MatrixBase<float> *U,
MatrixBase<float> *A,
bool print_eigs,
bool exact);
template
void ComputePca(const MatrixBase<double> &X,
MatrixBase<double> *U,
MatrixBase<double> *A,
bool print_eigs,
bool exact);
// Added by Dan, Feb. 13 2012.
// This function does: *plus += max(0, a b^T),
// *minus += max(0, -(a b^T)).
template<typename Real>
void AddOuterProductPlusMinus(Real alpha,
const VectorBase<Real> &a,
const VectorBase<Real> &b,
MatrixBase<Real> *plus,
MatrixBase<Real> *minus) {
KALDI_ASSERT(a.Dim() == plus->NumRows() && b.Dim() == plus->NumCols()
&& a.Dim() == minus->NumRows() && b.Dim() == minus->NumCols());
int32 nrows = a.Dim(), ncols = b.Dim(), pskip = plus->Stride() - ncols,
mskip = minus->Stride() - ncols;
const Real *adata = a.Data(), *bdata = b.Data();
Real *plusdata = plus->Data(), *minusdata = minus->Data();
for (int32 i = 0; i < nrows; i++) {
const Real *btmp = bdata;
Real multiple = alpha * *adata;
if (multiple > 0.0) {
for (int32 j = 0; j < ncols; j++, plusdata++, minusdata++, btmp++) {
if (*btmp > 0.0) *plusdata += multiple * *btmp;
else *minusdata -= multiple * *btmp;
}
} else {
for (int32 j = 0; j < ncols; j++, plusdata++, minusdata++, btmp++) {
if (*btmp < 0.0) *plusdata += multiple * *btmp;
else *minusdata -= multiple * *btmp;
}
}
plusdata += pskip;
minusdata += mskip;
adata++;
}
}
// Instantiate template
template
void AddOuterProductPlusMinus<float>(float alpha,
const VectorBase<float> &a,
const VectorBase<float> &b,
MatrixBase<float> *plus,
MatrixBase<float> *minus);
template
void AddOuterProductPlusMinus<double>(double alpha,
const VectorBase<double> &a,
const VectorBase<double> &b,
MatrixBase<double> *plus,
MatrixBase<double> *minus);
} // end namespace kaldi
// matrix/matrix-functions.h
// Copyright 2009-2011 Microsoft Corporation; Go Vivace Inc.; Jan Silovsky;
// Yanmin Qian; 1991 Henrique (Rico) Malvar (*)
//
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
//
// (*) incorporates, with permission, FFT code from his book
// "Signal Processing with Lapped Transforms", Artech, 1992.
#ifndef KALDI_MATRIX_MATRIX_FUNCTIONS_H_
#define KALDI_MATRIX_MATRIX_FUNCTIONS_H_
#include "matrix/kaldi-vector.h"
#include "matrix/kaldi-matrix.h"
namespace kaldi {
/// @addtogroup matrix_funcs_misc
/// @{
/** The function ComplexFft does an Fft on the vector argument v.
v is a vector of even dimension, interpreted for both input
and output as a vector of complex numbers i.e.
\f[ v = ( re_0, im_0, re_1, im_1, ... ) \f]
If "forward == true" this routine does the Discrete Fourier Transform
(DFT), i.e.:
\f[ vout[m] \leftarrow \sum_{n = 0}^{N-1} vin[i] exp( -2pi m n / N ) \f]
If "backward" it does the Inverse Discrete Fourier Transform (IDFT)
*WITHOUT THE FACTOR 1/N*,
i.e.:
\f[ vout[m] <-- \sum_{n = 0}^{N-1} vin[i] exp( 2pi m n / N ) \f]
[note the sign difference on the 2 pi for the backward one.]
Note that this is the definition of the FT given in most texts, but
it differs from the Numerical Recipes version in which the forward
and backward algorithms are flipped.
Note that you would have to multiply by 1/N after the IDFT to get
back to where you started from. We don't do this because
in some contexts, the transform is made symmetric by multiplying
by sqrt(N) in both passes. The user can do this by themselves.
See also SplitRadixComplexFft, declared in srfft.h, which is more efficient
but only works if the length of the input is a power of 2.
*/
template<typename Real> void ComplexFft (VectorBase<Real> *v, bool forward, Vector<Real> *tmp_work = NULL);
/// ComplexFt is the same as ComplexFft but it implements the Fourier
/// transform in an inefficient way. It is mainly included for testing purposes.
/// See comment for ComplexFft to describe the input and outputs and what it does.
template<typename Real> void ComplexFt (const VectorBase<Real> &in,
VectorBase<Real> *out, bool forward);
/// RealFft is a fourier transform of real inputs. Internally it uses
/// ComplexFft. The input dimension N must be even. If forward == true,
/// it transforms from a sequence of N real points to its complex fourier
/// transform; otherwise it goes in the reverse direction. If you call it
/// in the forward and then reverse direction and multiply by 1.0/N, you
/// will get back the original data.
/// The interpretation of the complex-FFT data is as follows: the array
/// is a sequence of complex numbers C_n of length N/2 with (real, im) format,
/// i.e. [real0, real_{N/2}, real1, im1, real2, im2, real3, im3, ...].
/// See also SplitRadixRealFft, declared in srfft.h, which is more efficient
/// but only works if the length of the input is a power of 2.
template<typename Real> void RealFft (VectorBase<Real> *v, bool forward);
/// RealFt has the same input and output format as RealFft above, but it is
/// an inefficient implementation included for testing purposes.
template<typename Real> void RealFftInefficient (VectorBase<Real> *v, bool forward);
/// ComputeDctMatrix computes a matrix corresponding to the DCT, such that
/// M * v equals the DCT of vector v. M must be square at input.
/// This is the type = III DCT with normalization, corresponding to the
/// following equations, where x is the signal and X is the DCT:
/// X_0 = 1/sqrt(2*N) \sum_{n = 0}^{N-1} x_n
/// X_k = 1/sqrt(N) \sum_{n = 0}^{N-1} x_n cos( \pi/N (n + 1/2) k )
/// This matrix's transpose is its own inverse, so transposing this
/// matrix will give the inverse DCT.
/// Caution: the type III DCT is generally known as the "inverse DCT" (with the
/// type II being the actual DCT), so this function is somewhatd mis-named. It
/// was probably done this way for HTK compatibility. We don't change it
/// because it was this way from the start and changing it would affect the
/// feature generation.
template<typename Real> void ComputeDctMatrix(Matrix<Real> *M);
/// ComplexMul implements, inline, the complex multiplication b *= a.
template<typename Real> inline void ComplexMul(const Real &a_re, const Real &a_im,
Real *b_re, Real *b_im);
/// ComplexMul implements, inline, the complex operation c += (a * b).
template<typename Real> inline void ComplexAddProduct(const Real &a_re, const Real &a_im,
const Real &b_re, const Real &b_im,
Real *c_re, Real *c_im);
/// ComplexImExp implements a <-- exp(i x), inline.
template<typename Real> inline void ComplexImExp(Real x, Real *a_re, Real *a_im);
/**
ComputePCA does a PCA computation, using either outer products
or inner products, whichever is more efficient. Let D be
the dimension of the data points, N be the number of data
points, and G be the PCA dimension we want to retain. We assume
G <= N and G <= D.
@param X [in] An N x D matrix. Each row of X is a point x_i.
@param U [out] A G x D matrix. Each row of U is a basis element u_i.
@param A [out] An N x D matrix, or NULL. Each row of A is a set of coefficients
in the basis for a point x_i, so A(i, g) is the coefficient of u_i
in x_i.
@param print_eigs [in] If true, prints out diagnostic information about the
eigenvalues.
@param exact [in] If true, does the exact computation; if false, does
a much faster (but almost exact) computation based on the Lanczos
method.
*/
template<typename Real>
void ComputePca(const MatrixBase<Real> &X,
MatrixBase<Real> *U,
MatrixBase<Real> *A,
bool print_eigs = false,
bool exact = true);
// This function does: *plus += max(0, a b^T),
// *minus += max(0, -(a b^T)).
template<typename Real>
void AddOuterProductPlusMinus(Real alpha,
const VectorBase<Real> &a,
const VectorBase<Real> &b,
MatrixBase<Real> *plus,
MatrixBase<Real> *minus);
template<typename Real1, typename Real2>
inline void AssertSameDim(const MatrixBase<Real1> &mat1, const MatrixBase<Real2> &mat2) {
KALDI_ASSERT(mat1.NumRows() == mat2.NumRows()
&& mat1.NumCols() == mat2.NumCols());
}
/// @} end of "addtogroup matrix_funcs_misc"
} // end namespace kaldi
#include "matrix/matrix-functions-inl.h"
#endif
// matrix/matrix-lib.h
// Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation; Haihua Xu
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
// Include everything from this directory.
// These files include other stuff that we need.
#ifndef KALDI_MATRIX_MATRIX_LIB_H_
#define KALDI_MATRIX_MATRIX_LIB_H_
#include "base/kaldi-common.h"
#include "matrix/kaldi-vector.h"
#include "matrix/kaldi-matrix.h"
#include "matrix/sp-matrix.h"
#include "matrix/tp-matrix.h"
#include "matrix/matrix-functions.h"
#include "matrix/srfft.h"
#include "matrix/compressed-matrix.h"
#include "matrix/sparse-matrix.h"
#include "matrix/optimization.h"
#endif
// matrix/optimization.cc
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
//
// (*) incorporates, with permission, FFT code from his book
// "Signal Processing with Lapped Transforms", Artech, 1992.
#include <algorithm>
#include "matrix/optimization.h"
#include "matrix/sp-matrix.h"
namespace kaldi {
// Below, N&W refers to Nocedal and Wright, "Numerical Optimization", 2nd Ed.
template<typename Real>
OptimizeLbfgs<Real>::OptimizeLbfgs(const VectorBase<Real> &x,
const LbfgsOptions &opts):
opts_(opts), k_(0), computation_state_(kBeforeStep), H_was_set_(false) {
KALDI_ASSERT(opts.m > 0); // dimension.
MatrixIndexT dim = x.Dim();
KALDI_ASSERT(dim > 0);
x_ = x; // this is the value of x_k
new_x_ = x; // this is where we'll evaluate the function next.
deriv_.Resize(dim);
temp_.Resize(dim);
data_.Resize(2 * opts.m, dim);
rho_.Resize(opts.m);
// Just set f_ to some invalid value, as we haven't yet set it.
f_ = (opts.minimize ? 1 : -1 ) * std::numeric_limits<Real>::infinity();
best_f_ = f_;
best_x_ = x_;
}
template<typename Real>
Real OptimizeLbfgs<Real>::RecentStepLength() const {
size_t n = step_lengths_.size();
if (n == 0) return std::numeric_limits<Real>::infinity();
else {
if (n >= 2 && step_lengths_[n-1] == 0.0 && step_lengths_[n-2] == 0.0)
return 0.0; // two zeros in a row means repeated restarts, which is
// a loop. Short-circuit this by returning zero.
Real avg = 0.0;
for (size_t i = 0; i < n; i++)
avg += step_lengths_[i] / n;
return avg;
}
}
template<typename Real>
void OptimizeLbfgs<Real>::ComputeHifNeeded(const VectorBase<Real> &gradient) {
if (k_ == 0) {
if (H_.Dim() == 0) {
// H was never set up. Set it up for the first time.
Real learning_rate;
if (opts_.first_step_length > 0.0) { // this takes
// precedence over first_step_learning_rate, if set.
// We are setting up H for the first time.
Real gradient_length = gradient.Norm(2.0);
learning_rate = (gradient_length > 0.0 ?
opts_.first_step_length / gradient_length :
1.0);
} else if (opts_.first_step_impr > 0.0) {
Real gradient_length = gradient.Norm(2.0);
learning_rate = (gradient_length > 0.0 ?
opts_.first_step_impr / (gradient_length * gradient_length) :
1.0);
} else {
learning_rate = opts_.first_step_learning_rate;
}
H_.Resize(x_.Dim());
KALDI_ASSERT(learning_rate > 0.0);
H_.Set(opts_.minimize ? learning_rate : -learning_rate);
}
} else { // k_ > 0
if (!H_was_set_) { // The user never specified an approximate
// diagonal inverse Hessian.
// Set it using formula 7.20: H_k^{(0)} = \gamma_k I, where
// \gamma_k = s_{k-1}^T y_{k-1} / y_{k-1}^T y_{k-1}
SubVector<Real> y_km1 = Y(k_-1);
double gamma_k = VecVec(S(k_-1), y_km1) / VecVec(y_km1, y_km1);
if (KALDI_ISNAN(gamma_k) || KALDI_ISINF(gamma_k)) {
KALDI_WARN << "NaN encountered in L-BFGS (already converged?)";
gamma_k = (opts_.minimize ? 1.0 : -1.0);
}
H_.Set(gamma_k);
}
}
}
// This represents the first 2 lines of Algorithm 7.5 (N&W), which
// in fact is mostly a call to Algorithm 7.4.
// Note: this is valid whether we are minimizing or maximizing.
template<typename Real>
void OptimizeLbfgs<Real>::ComputeNewDirection(Real function_value,
const VectorBase<Real> &gradient) {
KALDI_ASSERT(computation_state_ == kBeforeStep);
SignedMatrixIndexT m = M(), k = k_;
ComputeHifNeeded(gradient);
// The rest of this is computing p_k <-- - H_k \nabla f_k using Algorithm
// 7.4 of N&W.
Vector<Real> &q(deriv_), &r(new_x_); // Use deriv_ as a temporary place to put
// q, and new_x_ as a temporay place to put r.
// The if-statement below is just to get rid of spurious warnings from
// valgrind about memcpy source and destination overlap, since sometimes q and
// gradient are the same variable.
if (&q != &gradient)
q.CopyFromVec(gradient); // q <-- \nabla f_k.
Vector<Real> alpha(m);
// for i = k - 1, k - 2, ... k - m
for (SignedMatrixIndexT i = k - 1;
i >= std::max(k - m, static_cast<SignedMatrixIndexT>(0));
i--) {
alpha(i % m) = rho_(i % m) * VecVec(S(i), q); // \alpha_i <-- \rho_i s_i^T q.
q.AddVec(-alpha(i % m), Y(i)); // q <-- q - \alpha_i y_i
}
r.SetZero();
r.AddVecVec(1.0, H_, q, 0.0); // r <-- H_k^{(0)} q.
// for k = k - m, k - m + 1, ... , k - 1
for (SignedMatrixIndexT i = std::max(k - m, static_cast<SignedMatrixIndexT>(0));
i < k;
i++) {
Real beta = rho_(i % m) * VecVec(Y(i), r); // \beta <-- \rho_i y_i^T r
r.AddVec(alpha(i % m) - beta, S(i)); // r <-- r + s_i (\alpha_i - \beta)
}
{ // TEST. Note, -r will be the direction.
Real dot = VecVec(gradient, r);
if ((opts_.minimize && dot < 0) || (!opts_.minimize && dot > 0))
KALDI_WARN << "Step direction has the wrong sign! Routine will fail.";
}
// Now we're out of Alg. 7.4 and back into Alg. 7.5.
// Alg. 7.4 returned r (using new_x_ as the location), and with \alpha_k = 1
// as the initial guess, we're setting x_{k+1} = x_k + \alpha_k p_k, with
// p_k = -r [hence the statement new_x_.Scale(-1.0)]., and \alpha_k = 1.
// This is the first place we'll get the user to evaluate the function;
// any backtracking (or acceptance of that step) occurs inside StepSizeIteration.
// We're still within iteration k; we haven't yet finalized the step size.
new_x_.Scale(-1.0);
new_x_.AddVec(1.0, x_);
if (&deriv_ != &gradient)
deriv_.CopyFromVec(gradient);
f_ = function_value;
d_ = opts_.d;
num_wolfe_i_failures_ = 0;
num_wolfe_ii_failures_ = 0;
last_failure_type_ = kNone;
computation_state_ = kWithinStep;
}
template<typename Real>
bool OptimizeLbfgs<Real>::AcceptStep(Real function_value,
const VectorBase<Real> &gradient) {
// Save s_k = x_{k+1} - x_{k}, and y_k = \nabla f_{k+1} - \nabla f_k.
SubVector<Real> s = S(k_), y = Y(k_);
s.CopyFromVec(new_x_);
s.AddVec(-1.0, x_); // s = new_x_ - x_.
y.CopyFromVec(gradient);
y.AddVec(-1.0, deriv_); // y = gradient - deriv_.
// Warning: there is a division in the next line. This could
// generate inf or nan, but this wouldn't necessarily be an error
// at this point because for zero step size or derivative we should
// terminate the iterations. But this is up to the calling code.
Real prod = VecVec(y, s);
rho_(k_ % opts_.m) = 1.0 / prod;
Real len = s.Norm(2.0);
if ((opts_.minimize && prod <= 1.0e-20) || (!opts_.minimize && prod >= -1.0e-20)
|| len == 0.0)
return false; // This will force restart.
KALDI_VLOG(3) << "Accepted step; length was " << len
<< ", prod was " << prod;
RecordStepLength(len);
// store x_{k+1} and the function value f_{k+1}.
x_.CopyFromVec(new_x_);
f_ = function_value;
k_++;
return true; // We successfully accepted the step.
}
template<typename Real>
void OptimizeLbfgs<Real>::RecordStepLength(Real s) {
step_lengths_.push_back(s);
if (step_lengths_.size() > static_cast<size_t>(opts_.avg_step_length))
step_lengths_.erase(step_lengths_.begin(), step_lengths_.begin() + 1);
}
template<typename Real>
void OptimizeLbfgs<Real>::Restart(const VectorBase<Real> &x,
Real f,
const VectorBase<Real> &gradient) {
// Note: we will consider restarting (the transition of x_ -> x)
// as a step, even if it has zero step size. This is necessary in
// order for convergence to be detected.
{
Vector<Real> &diff(temp_);
diff.CopyFromVec(x);
diff.AddVec(-1.0, x_);
RecordStepLength(diff.Norm(2.0));
}
k_ = 0; // Restart the iterations! [But note that the Hessian,
// whatever it was, stays as before.]
if (&x_ != &x)
x_.CopyFromVec(x);
new_x_.CopyFromVec(x);
f_ = f;
computation_state_ = kBeforeStep;
ComputeNewDirection(f, gradient);
}
template<typename Real>
void OptimizeLbfgs<Real>::StepSizeIteration(Real function_value,
const VectorBase<Real> &gradient) {
KALDI_VLOG(3) << "In step size iteration, function value changed "
<< f_ << " to " << function_value;
// We're in some part of the backtracking, and the user is providing
// the objective function value and gradient.
// We're checking two conditions: Wolfe i) [the Armijo rule] and
// Wolfe ii).
// The Armijo rule (when minimizing) is:
// f(k_k + \alpha_k p_k) <= f(x_k) + c_1 \alpha_k p_k^T \nabla f(x_k), where
// \nabla means the derivative.
// Below, "temp" is the RHS of this equation, where (\alpha_k p_k) equals
// (new_x_ - x_); we don't store \alpha or p_k separately, they are implicit
// as the difference new_x_ - x_.
// Below, pf is \alpha_k p_k^T \nabla f(x_k).
Real pf = VecVec(new_x_, deriv_) - VecVec(x_, deriv_);
Real temp = f_ + opts_.c1 * pf;
bool wolfe_i_ok;
if (opts_.minimize) wolfe_i_ok = (function_value <= temp);
else wolfe_i_ok = (function_value >= temp);
// Wolfe condition ii) can be written as:
// p_k^T \nabla f(x_k + \alpha_k p_k) >= c_2 p_k^T \nabla f(x_k)
// p2f equals \alpha_k p_k^T \nabla f(x_k + \alpha_k p_k), where
// (\alpha_k p_k^T) is (new_x_ - x_).
// Note that in our version of Wolfe condition (ii) we have an extra
// factor alpha, which doesn't affect anything.
Real p2f = VecVec(new_x_, gradient) - VecVec(x_, gradient);
//eps = (sizeof(Real) == 4 ? 1.0e-05 : 1.0e-10) *
//(std::abs(p2f) + std::abs(pf));
bool wolfe_ii_ok;
if (opts_.minimize) wolfe_ii_ok = (p2f >= opts_.c2 * pf);
else wolfe_ii_ok = (p2f <= opts_.c2 * pf);
enum { kDecrease, kNoChange } d_action; // What do do with d_: leave it alone,
// or take the square root.
enum { kAccept, kDecreaseStep, kIncreaseStep, kRestart } iteration_action;
// What we'll do in the overall iteration: accept this value, DecreaseStep
// (reduce the step size), IncreaseStep (increase the step size), or kRestart
// (set k back to zero). Generally when we can't get both conditions to be
// true with a reasonable period of time, it makes sense to restart, because
// probably we've almost converged and got into numerical issues; from here
// we'll just produced NaN's. Restarting is a safe thing to do and the outer
// code will quickly detect convergence.
d_action = kNoChange; // the default.
if (wolfe_i_ok && wolfe_ii_ok) {
iteration_action = kAccept;
d_action = kNoChange; // actually doesn't matter, it'll get reset.
} else if (!wolfe_i_ok) {
// If wolfe i) [the Armijo rule] failed then we went too far (or are
// meeting numerical problems).
if (last_failure_type_ == kWolfeII) { // Last time we failed it was Wolfe ii).
// When we switch between them we decrease d.
d_action = kDecrease;
}
iteration_action = kDecreaseStep;
last_failure_type_ = kWolfeI;
num_wolfe_i_failures_++;
} else if (!wolfe_ii_ok) {
// Curvature condition failed -> we did not go far enough.
if (last_failure_type_ == kWolfeI) // switching between wolfe i and ii failures->
d_action = kDecrease; // decrease value of d.
iteration_action = kIncreaseStep;
last_failure_type_ = kWolfeII;
num_wolfe_ii_failures_++;
}
// Test whether we've been switching too many times betwen wolfe i) and ii)
// failures, or overall have an excessive number of failures. We just give up
// and restart L-BFGS. Probably we've almost converged.
if (num_wolfe_i_failures_ + num_wolfe_ii_failures_ >
opts_.max_line_search_iters) {
KALDI_VLOG(2) << "Too many steps in line search -> restarting.";
iteration_action = kRestart;
}
if (d_action == kDecrease)
d_ = std::sqrt(d_);
KALDI_VLOG(3) << "d = " << d_ << ", iter = " << k_ << ", action = "
<< (iteration_action == kAccept ? "accept" :
(iteration_action == kDecreaseStep ? "decrease" :
(iteration_action == kIncreaseStep ? "increase" :
"reject")));
// Note: even if iteration_action != Restart at this point,
// some code below may set it to Restart.
if (iteration_action == kAccept) {
if (AcceptStep(function_value, gradient)) { // If we did
// not detect a problem while accepting the step..
computation_state_ = kBeforeStep;
ComputeNewDirection(function_value, gradient);
} else {
KALDI_VLOG(2) << "Restarting L-BFGS computation; problem found while "
<< "accepting step.";
iteration_action = kRestart; // We'll have to restart now.
}
}
if (iteration_action == kDecreaseStep || iteration_action == kIncreaseStep) {
Real scale = (iteration_action == kDecreaseStep ? 1.0 / d_ : d_);
temp_.CopyFromVec(new_x_);
new_x_.Scale(scale);
new_x_.AddVec(1.0 - scale, x_);
if (new_x_.ApproxEqual(temp_, 0.0)) {
// Value of new_x_ did not change at all --> we must restart.
KALDI_VLOG(3) << "Value of x did not change, when taking step; "
<< "will restart computation.";
iteration_action = kRestart;
}
if (new_x_.ApproxEqual(temp_, 1.0e-08) &&
std::abs(f_ - function_value) < 1.0e-08 *
std::abs(f_) && iteration_action == kDecreaseStep) {
// This is common and due to roundoff.
KALDI_VLOG(3) << "We appear to be backtracking while we are extremely "
<< "close to the old value; restarting.";
iteration_action = kRestart;
}
if (iteration_action == kDecreaseStep) {
num_wolfe_i_failures_++;
last_failure_type_ = kWolfeI;
} else {
num_wolfe_ii_failures_++;
last_failure_type_ = kWolfeII;
}
}
if (iteration_action == kRestart) {
// We want to restart the computation. If the objf at new_x_ is
// better than it was at x_, we'll start at new_x_, else at x_.
bool use_newx;
if (opts_.minimize) use_newx = (function_value < f_);
else use_newx = (function_value > f_);
KALDI_VLOG(3) << "Restarting computation.";
if (use_newx) Restart(new_x_, function_value, gradient);
else Restart(x_, f_, deriv_);
}
}
template<typename Real>
void OptimizeLbfgs<Real>::DoStep(Real function_value,
const VectorBase<Real> &gradient) {
if (opts_.minimize ? function_value < best_f_ : function_value > best_f_) {
best_f_ = function_value;
best_x_.CopyFromVec(new_x_);
}
if (computation_state_ == kBeforeStep)
ComputeNewDirection(function_value, gradient);
else // kWithinStep{1,2,3}
StepSizeIteration(function_value, gradient);
}
template<typename Real>
void OptimizeLbfgs<Real>::DoStep(Real function_value,
const VectorBase<Real> &gradient,
const VectorBase<Real> &diag_approx_2nd_deriv) {
if (opts_.minimize ? function_value < best_f_ : function_value > best_f_) {
best_f_ = function_value;
best_x_.CopyFromVec(new_x_);
}
if (opts_.minimize) {
KALDI_ASSERT(diag_approx_2nd_deriv.Min() > 0.0);
} else {
KALDI_ASSERT(diag_approx_2nd_deriv.Max() < 0.0);
}
H_was_set_ = true;
H_.CopyFromVec(diag_approx_2nd_deriv);
H_.InvertElements();
DoStep(function_value, gradient);
}
template<typename Real>
const VectorBase<Real>&
OptimizeLbfgs<Real>::GetValue(Real *objf_value) const {
if (objf_value != NULL) *objf_value = best_f_;
return best_x_;
}
// to compute the alpha, we are minimizing f(x) = x^T b - 0.5 x_k^T A x_k along
// direction p_k... consider alpha
// d/dx of f(x) = b - A x_k = r.
// Notation based on Sec. 5.1 of Nocedal and Wright
// Computation based on Alg. 5.2 of Nocedal and Wright (Pg. 112)
// Notation (replicated for convenience):
// To solve Ax=b for x
// k : current iteration
// x_k : estimate of x (at iteration k)
// r_k : residual ( r_k \eqdef A x_k - b )
// \alpha_k : step size
// p_k : A-conjugate direction
// \beta_k : coefficient used in A-conjugate direction computation for next
// iteration
//
// Algo. LinearCG(A,b,x_0)
// ========================
// r_0 = Ax_0 - b
// p_0 = -r_0
// k = 0
//
// while r_k != 0
// \alpha_k = (r_k^T r_k) / (p_k^T A p_k)
// x_{k+1} = x_k + \alpha_k p_k;
// r_{k+1} = r_k + \alpha_k A p_k
// \beta_{k+1} = \frac{r_{k+1}^T r_{k+1}}{r_k^T r_K}
// p_{k+1} = -r_{k+1} + \beta_{k+1} p_k
// k = k + 1
// end
template<class Real>
int32 LinearCgd(const LinearCgdOptions &opts,
const SpMatrix<Real> &A,
const VectorBase<Real> &b,
VectorBase<Real> *x) {
// Initialize the variables
//
int32 M = A.NumCols();
Matrix<Real> storage(4, M);
SubVector<Real> r(storage, 0), p(storage, 1), Ap(storage, 2), x_orig(storage, 3);
p.CopyFromVec(b);
p.AddSpVec(-1.0, A, *x, 1.0); // p_0 = b - A x_0
r.AddVec(-1.0, p); // r_0 = - p_0
x_orig.CopyFromVec(*x); // in case of failure.
Real r_cur_norm_sq = VecVec(r, r),
r_initial_norm_sq = r_cur_norm_sq,
r_recompute_norm_sq = r_cur_norm_sq;
KALDI_VLOG(5) << "In linear CG: initial norm-square of residual = "
<< r_initial_norm_sq;
KALDI_ASSERT(opts.recompute_residual_factor <= 1.0);
Real max_error_sq = std::max<Real>(opts.max_error * opts.max_error,
std::numeric_limits<Real>::min()),
residual_factor = opts.recompute_residual_factor *
opts.recompute_residual_factor,
inv_residual_factor = 1.0 / residual_factor;
// Note: although from a mathematical point of view the method should converge
// after M iterations, in practice (due to roundoff) it does not always
// converge to good precision after that many iterations so we let the maximum
// be M + 5 instead.
int32 k = 0;
for (; k < M + 5 && k != opts.max_iters; k++) {
// Note: we'll break from this loop if we converge sooner due to
// max_error.
Ap.AddSpVec(1.0, A, p, 0.0); // Ap = A p
// Below is how the code used to look.
// // next line: \alpha_k = (r_k^T r_k) / (p_k^T A p_k)
// Real alpha = r_cur_norm_sq / VecVec(p, Ap);
//
// We changed r_cur_norm_sq below to -VecVec(p, r). Although this is
// slightly less efficient, it seems to make the algorithm dramatically more
// robust. Note that -p^T r is the mathematically more natural quantity to
// use here, that corresponds to minimizing along that direction... r^T r is
// recommended in Nocedal and Wright only as a kind of optimization as it is
// supposed to be the same as -p^T r and we already have it computed.
Real alpha = -VecVec(p, r) / VecVec(p, Ap);
// next line: x_{k+1} = x_k + \alpha_k p_k;
x->AddVec(alpha, p);
// next line: r_{k+1} = r_k + \alpha_k A p_k
r.AddVec(alpha, Ap);
Real r_next_norm_sq = VecVec(r, r);
if (r_next_norm_sq < residual_factor * r_recompute_norm_sq ||
r_next_norm_sq > inv_residual_factor * r_recompute_norm_sq) {
// Recompute the residual from scratch if the residual norm has decreased
// a lot; this costs an extra matrix-vector multiply, but helps keep the
// residual accurate.
// Also do the same if the residual norm has increased a lot since
// the last time we recomputed... this shouldn't happen often, but
// it can indicate bad stuff is happening.
// r_{k+1} = A x_{k+1} - b
r.AddSpVec(1.0, A, *x, 0.0);
r.AddVec(-1.0, b);
r_next_norm_sq = VecVec(r, r);
r_recompute_norm_sq = r_next_norm_sq;
KALDI_VLOG(5) << "In linear CG: recomputing residual.";
}
KALDI_VLOG(5) << "In linear CG: k = " << k
<< ", r_next_norm_sq = " << r_next_norm_sq;
// Check if converged.
if (r_next_norm_sq <= max_error_sq)
break;
// next line: \beta_{k+1} = \frac{r_{k+1}^T r_{k+1}}{r_k^T r_K}
Real beta_next = r_next_norm_sq / r_cur_norm_sq;
// next lines: p_{k+1} = -r_{k+1} + \beta_{k+1} p_k
Vector<Real> p_old(p);
p.Scale(beta_next);
p.AddVec(-1.0, r);
r_cur_norm_sq = r_next_norm_sq;
}
// note: the first element of the && is only there to save compute.
// the residual r is A x - b, and r_cur_norm_sq and r_initial_norm_sq are
// of the form r * r, so it's clear that b * b has the right dimension to
// compare with the residual.
if (r_cur_norm_sq > r_initial_norm_sq &&
r_cur_norm_sq > r_initial_norm_sq + 1.0e-10 * VecVec(b, b)) {
KALDI_WARN << "Doing linear CGD in dimension " << A.NumRows() << ", after " << k
<< " iterations the squared residual has got worse, "
<< r_cur_norm_sq << " > " << r_initial_norm_sq
<< ". Will do an exact optimization.";
SolverOptions opts("called-from-linearCGD");
x->CopyFromVec(x_orig);
SolveQuadraticProblem(A, b, opts, x);
}
return k;
}
// Instantiate the class for float and double.
template
class OptimizeLbfgs<float>;
template
class OptimizeLbfgs<double>;
template
int32 LinearCgd<float>(const LinearCgdOptions &opts,
const SpMatrix<float> &A, const VectorBase<float> &b,
VectorBase<float> *x);
template
int32 LinearCgd<double>(const LinearCgdOptions &opts,
const SpMatrix<double> &A, const VectorBase<double> &b,
VectorBase<double> *x);
} // end namespace kaldi
// matrix/optimization.h
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
//
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
//
// (*) incorporates, with permission, FFT code from his book
// "Signal Processing with Lapped Transforms", Artech, 1992.
#ifndef KALDI_MATRIX_OPTIMIZATION_H_
#define KALDI_MATRIX_OPTIMIZATION_H_
#include "matrix/kaldi-vector.h"
#include "matrix/kaldi-matrix.h"
namespace kaldi {
/// @addtogroup matrix_optimization
/// @{
struct LinearCgdOptions {
int32 max_iters; // Maximum number of iters (if >= 0).
BaseFloat max_error; // Maximum 2-norm of the residual A x - b (convergence
// test)
// Every time the residual 2-norm decreases by this recompute_residual_factor
// since the last time it was computed from scratch, recompute it from
// scratch. This helps to keep the computed residual accurate even in the
// presence of roundoff.
BaseFloat recompute_residual_factor;
LinearCgdOptions(): max_iters(-1),
max_error(0.0),
recompute_residual_factor(0.01) { }
};
/*
This function uses linear conjugate gradient descent to approximately solve
the system A x = b. The value of x at entry corresponds to the initial guess
of x. The algorithm continues until the number of iterations equals b.Dim(),
or until the 2-norm of (A x - b) is <= max_error, or until the number of
iterations equals max_iter, whichever happens sooner. It is a requirement
that A be positive definite.
It returns the number of iterations that were actually executed (this is
useful for testing purposes).
*/
template<typename Real>
int32 LinearCgd(const LinearCgdOptions &opts,
const SpMatrix<Real> &A, const VectorBase<Real> &b,
VectorBase<Real> *x);
/**
This is an implementation of L-BFGS. It pushes responsibility for
determining when to stop, onto the user. There is no call-back here:
everything is done via calls to the class itself (see the example in
matrix-lib-test.cc). This does not implement constrained L-BFGS, but it will
handle constrained problems correctly as long as the function approaches
+infinity (or -infinity for maximization problems) when it gets close to the
bound of the constraint. In these types of problems, you just let the
function value be +infinity for minimization problems, or -infinity for
maximization problems, outside these bounds).
*/
struct LbfgsOptions {
bool minimize; // if true, we're minimizing, else maximizing.
int m; // m is the number of stored vectors L-BFGS keeps.
float first_step_learning_rate; // The very first step of L-BFGS is
// like gradient descent. If you want to configure the size of that step,
// you can do it using this variable.
float first_step_length; // If this variable is >0.0, it overrides
// first_step_learning_rate; on the first step we choose an approximate
// Hessian that is the multiple of the identity that would generate this
// step-length, or 1.0 if the gradient is zero.
float first_step_impr; // If this variable is >0.0, it overrides
// first_step_learning_rate; on the first step we choose an approximate
// Hessian that is the multiple of the identity that would generate this
// amount of objective function improvement (assuming the "real" objf
// was linear).
float c1; // A constant in Armijo rule = Wolfe condition i)
float c2; // A constant in Wolfe condition ii)
float d; // An amount > 1.0 (default 2.0) that we initially multiply or
// divide the step length by, in the line search.
int max_line_search_iters; // after this many iters we restart L-BFGS.
int avg_step_length; // number of iters to avg step length over, in
// RecentStepLength().
LbfgsOptions (bool minimize = true):
minimize(minimize),
m(10),
first_step_learning_rate(1.0),
first_step_length(0.0),
first_step_impr(0.0),
c1(1.0e-04),
c2(0.9),
d(2.0),
max_line_search_iters(50),
avg_step_length(4) { }
};
template<typename Real>
class OptimizeLbfgs {
public:
/// Initializer takes the starting value of x.
OptimizeLbfgs(const VectorBase<Real> &x,
const LbfgsOptions &opts);
/// This returns the value of the variable x that has the best objective
/// function so far, and the corresponding objective function value if
/// requested. This would typically be called only at the end.
const VectorBase<Real>& GetValue(Real *objf_value = NULL) const;
/// This returns the value at which the function wants us
/// to compute the objective function and gradient.
const VectorBase<Real>& GetProposedValue() const { return new_x_; }
/// Returns the average magnitude of the last n steps (but not
/// more than the number we have stored). Before we have taken
/// any steps, returns +infinity. Note: if the most recent
/// step length was 0, it returns 0, regardless of the other
/// step lengths. This makes it suitable as a convergence test
/// (else we'd generate NaN's).
Real RecentStepLength() const;
/// The user calls this function to provide the class with the
/// function and gradient info at the point GetProposedValue().
/// If this point is outside the constraints you can set function_value
/// to {+infinity,-infinity} for {minimization,maximization} problems.
/// In this case the gradient, and also the second derivative (if you call
/// the second overloaded version of this function) will be ignored.
void DoStep(Real function_value,
const VectorBase<Real> &gradient);
/// The user can call this version of DoStep() if it is desired to set some
/// kind of approximate Hessian on this iteration. Note: it is a prerequisite
/// that diag_approx_2nd_deriv must be strictly positive (minimizing), or
/// negative (maximizing).
void DoStep(Real function_value,
const VectorBase<Real> &gradient,
const VectorBase<Real> &diag_approx_2nd_deriv);
private:
KALDI_DISALLOW_COPY_AND_ASSIGN(OptimizeLbfgs);
// The following variable says what stage of the computation we're at.
// Refer to Algorithm 7.5 (L-BFGS) of Nodecdal & Wright, "Numerical
// Optimization", 2nd edition.
// kBeforeStep means we're about to do
/// "compute p_k <-- - H_k \delta f_k" (i.e. Algorithm 7.4).
// kWithinStep means we're at some point within line search; note
// that line search is iterative so we can stay in this state more
// than one time on each iteration.
enum ComputationState {
kBeforeStep,
kWithinStep, // This means we're within the step-size computation, and
// have not yet done the 1st function evaluation.
};
inline MatrixIndexT Dim() { return x_.Dim(); }
inline MatrixIndexT M() { return opts_.m; }
SubVector<Real> Y(MatrixIndexT i) {
return SubVector<Real>(data_, (i % M()) * 2); // vector y_i
}
SubVector<Real> S(MatrixIndexT i) {
return SubVector<Real>(data_, (i % M()) * 2 + 1); // vector s_i
}
// The following are subroutines within DoStep():
bool AcceptStep(Real function_value,
const VectorBase<Real> &gradient);
void Restart(const VectorBase<Real> &x,
Real function_value,
const VectorBase<Real> &gradient);
void ComputeNewDirection(Real function_value,
const VectorBase<Real> &gradient);
void ComputeHifNeeded(const VectorBase<Real> &gradient);
void StepSizeIteration(Real function_value,
const VectorBase<Real> &gradient);
void RecordStepLength(Real s);
LbfgsOptions opts_;
SignedMatrixIndexT k_; // Iteration number, starts from zero. Gets set back to zero
// when we restart.
ComputationState computation_state_;
bool H_was_set_; // True if the user specified H_; if false,
// we'll use a heuristic to estimate it.
Vector<Real> x_; // current x.
Vector<Real> new_x_; // the x proposed in the line search.
Vector<Real> best_x_; // the x with the best objective function so far
// (either the same as x_ or something in the current line search.)
Vector<Real> deriv_; // The most recently evaluated derivative-- at x_k.
Vector<Real> temp_;
Real f_; // The function evaluated at x_k.
Real best_f_; // the best objective function so far.
Real d_; // a number d > 1.0, but during an iteration we may decrease this, when
// we switch between armijo and wolfe failures.
int num_wolfe_i_failures_; // the num times we decreased step size.
int num_wolfe_ii_failures_; // the num times we increased step size.
enum { kWolfeI, kWolfeII, kNone } last_failure_type_; // last type of step-search
// failure on this iter.
Vector<Real> H_; // Current inverse-Hessian estimate. May be computed by this class itself,
// or provided by user using 2nd form of SetGradientInfo().
Matrix<Real> data_; // dimension (m*2) x dim. Even rows store
// gradients y_i, odd rows store steps s_i.
Vector<Real> rho_; // dimension m; rho_(m) = 1/(y_m^T s_m), Eq. 7.17.
std::vector<Real> step_lengths_; // The step sizes we took on the last
// (up to m) iterations; these are not stored in a rotating buffer but
// are shifted by one each time (this is more convenient when we
// restart, as we keep this info past restarting).
};
/// @}
} // end namespace kaldi
#endif
// matrix/packed-matrix.cc
// Copyright 2009-2012 Microsoft Corporation Saarland University
// Johns Hopkins University (Author: Daniel Povey);
// Haihua Xu
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
/**
* @file packed-matrix.cc
*
* Implementation of specialized PackedMatrix template methods
*/
#include "matrix/cblas-wrappers.h"
#include "matrix/packed-matrix.h"
#include "matrix/kaldi-vector.h"
namespace kaldi {
template<typename Real>
void PackedMatrix<Real>::Scale(Real alpha) {
size_t nr = num_rows_,
sz = (nr * (nr + 1)) / 2;
cblas_Xscal(sz, alpha, data_, 1);
}
template<typename Real>
void PackedMatrix<Real>::AddPacked(const Real alpha, const PackedMatrix<Real> &rMa) {
KALDI_ASSERT(num_rows_ == rMa.NumRows());
size_t nr = num_rows_,
sz = (nr * (nr + 1)) / 2;
cblas_Xaxpy(sz, alpha, rMa.Data(), 1, data_, 1);
}
template<typename Real>
void PackedMatrix<Real>::SetRandn() {
Real *data = data_;
size_t dim = num_rows_, size = ((dim*(dim+1))/2);
for (size_t i = 0; i < size; i++)
data[i] = RandGauss();
}
template<typename Real>
inline void PackedMatrix<Real>::Init(MatrixIndexT r) {
if (r == 0) {
num_rows_ = 0;
data_ = 0;
return;
}
size_t size = ((static_cast<size_t>(r) * static_cast<size_t>(r + 1)) / 2);
if (static_cast<size_t>(static_cast<MatrixIndexT>(size)) != size) {
KALDI_WARN << "Allocating packed matrix whose full dimension does not fit "
<< "in MatrixIndexT: not all code is tested for this case.";
}
void *data; // aligned memory block
void *temp;
if ((data = KALDI_MEMALIGN(16, size * sizeof(Real), &temp)) != NULL) {
this->data_ = static_cast<Real *> (data);
this->num_rows_ = r;
} else {
throw std::bad_alloc();
}
}
template<typename Real>
void PackedMatrix<Real>::Swap(PackedMatrix<Real> *other) {
std::swap(data_, other->data_);
std::swap(num_rows_, other->num_rows_);
}
template<typename Real>
void PackedMatrix<Real>::Swap(Matrix<Real> *other) {
std::swap(data_, other->data_);
std::swap(num_rows_, other->num_rows_);
}
template<typename Real>
void PackedMatrix<Real>::Resize(MatrixIndexT r, MatrixResizeType resize_type) {
// the next block uses recursion to handle what we have to do if
// resize_type == kCopyData.
if (resize_type == kCopyData) {
if (this->data_ == NULL || r == 0) resize_type = kSetZero; // nothing to copy.
else if (this->num_rows_ == r) { return; } // nothing to do.
else {
// set tmp to a packed matrix of the desired size.
PackedMatrix<Real> tmp(r, kUndefined);
size_t r_min = std::min(r, num_rows_);
size_t mem_size_min = sizeof(Real) * (r_min*(r_min+1))/2,
mem_size_full = sizeof(Real) * (r*(r+1))/2;
// Copy the contents to tmp.
memcpy(tmp.data_, data_, mem_size_min);
char *ptr = static_cast<char*>(static_cast<void*>(tmp.data_));
// Set the rest of the contents of tmp to zero.
memset(static_cast<void*>(ptr + mem_size_min), 0, mem_size_full-mem_size_min);
tmp.Swap(this);
return;
}
}
if (data_ != NULL) Destroy();
Init(r);
if (resize_type == kSetZero) SetZero();
}
template<typename Real>
void PackedMatrix<Real>::AddToDiag(Real r) {
Real *ptr = data_;
for (MatrixIndexT i = 2; i <= num_rows_+1; i++) {
*ptr += r;
ptr += i;
}
}
template<typename Real>
void PackedMatrix<Real>::ScaleDiag(Real alpha) {
Real *ptr = data_;
for (MatrixIndexT i = 2; i <= num_rows_+1; i++) {
*ptr *= alpha;
ptr += i;
}
}
template<typename Real>
void PackedMatrix<Real>::SetDiag(Real alpha) {
Real *ptr = data_;
for (MatrixIndexT i = 2; i <= num_rows_+1; i++) {
*ptr = alpha;
ptr += i;
}
}
template<typename Real>
template<typename OtherReal>
void PackedMatrix<Real>::CopyFromPacked(const PackedMatrix<OtherReal> &orig) {
KALDI_ASSERT(NumRows() == orig.NumRows());
if (sizeof(Real) == sizeof(OtherReal)) {
memcpy(data_, orig.Data(), SizeInBytes());
} else {
Real *dst = data_;
const OtherReal *src = orig.Data();
size_t nr = NumRows(),
size = (nr * (nr + 1)) / 2;
for (size_t i = 0; i < size; i++, dst++, src++)
*dst = *src;
}
}
// template instantiations.
template
void PackedMatrix<float>::CopyFromPacked(const PackedMatrix<double> &orig);
template
void PackedMatrix<double>::CopyFromPacked(const PackedMatrix<float> &orig);
template
void PackedMatrix<double>::CopyFromPacked(const PackedMatrix<double> &orig);
template
void PackedMatrix<float>::CopyFromPacked(const PackedMatrix<float> &orig);
template<typename Real>
template<typename OtherReal>
void PackedMatrix<Real>::CopyFromVec(const SubVector<OtherReal> &vec) {
MatrixIndexT size = (NumRows()*(NumRows()+1)) / 2;
KALDI_ASSERT(vec.Dim() == size);
if (sizeof(Real) == sizeof(OtherReal)) {
memcpy(data_, vec.Data(), size * sizeof(Real));
} else {
Real *dst = data_;
const OtherReal *src = vec.Data();
for (MatrixIndexT i = 0; i < size; i++, dst++, src++)
*dst = *src;
}
}
// template instantiations.
template
void PackedMatrix<float>::CopyFromVec(const SubVector<double> &orig);
template
void PackedMatrix<double>::CopyFromVec(const SubVector<float> &orig);
template
void PackedMatrix<double>::CopyFromVec(const SubVector<double> &orig);
template
void PackedMatrix<float>::CopyFromVec(const SubVector<float> &orig);
template<typename Real>
void PackedMatrix<Real>::SetZero() {
memset(data_, 0, SizeInBytes());
}
template<typename Real>
void PackedMatrix<Real>::SetUnit() {
memset(data_, 0, SizeInBytes());
for (MatrixIndexT row = 0;row < num_rows_;row++)
(*this)(row, row) = 1.0;
}
template<typename Real>
Real PackedMatrix<Real>::Trace() const {
Real ans = 0.0;
for (MatrixIndexT row = 0;row < num_rows_;row++)
ans += (*this)(row, row);
return ans;
}
template<typename Real>
void PackedMatrix<Real>::Destroy() {
// we need to free the data block if it was defined
if (data_ != NULL) KALDI_MEMALIGN_FREE(data_);
data_ = NULL;
num_rows_ = 0;
}
template<typename Real>
void PackedMatrix<Real>::Write(std::ostream &os, bool binary) const {
if (!os.good()) {
KALDI_ERR << "Failed to write vector to stream: stream not good";
}
int32 size = this->NumRows(); // make the size 32-bit on disk.
KALDI_ASSERT(this->NumRows() == (MatrixIndexT) size);
MatrixIndexT num_elems = ((size+1)*(MatrixIndexT)size)/2;
if(binary) {
std::string my_token = (sizeof(Real) == 4 ? "FP" : "DP");
WriteToken(os, binary, my_token);
WriteBasicType(os, binary, size);
// We don't use the built-in Kaldi write routines for the floats, as they are
// not efficient enough.
os.write((const char*) data_, sizeof(Real) * num_elems);
}
else {
if(size == 0)
os<<"[ ]\n";
else {
os<<"[\n";
MatrixIndexT i = 0;
for (int32 j = 0; j < size; j++) {
for (int32 k = 0; k < j + 1; k++) {
WriteBasicType(os, binary, data_[i++]);
}
os << ( (j==size-1)? "]\n" : "\n");
}
KALDI_ASSERT(i == num_elems);
}
}
if (os.fail()) {
KALDI_ERR << "Failed to write packed matrix to stream";
}
}
// template<typename Real>
// void Save (std::ostream & os, const PackedMatrix<Real>& rM)
// {
// const Real* p_elem = rM.data();
// for (MatrixIndexT i = 0; i < rM.NumRows(); i++) {
// for (MatrixIndexT j = 0; j <= i ; j++) {
// os << *p_elem;
// p_elem++;
// if (j == i) {
// os << '\n';
// }
// else {
// os << ' ';
// }
// }
// }
// if (os.fail())
// KALDI_ERR("Failed to write packed matrix to stream");
// }
template<typename Real>
void PackedMatrix<Real>::Read(std::istream& is, bool binary, bool add) {
if (add) {
PackedMatrix<Real> tmp;
tmp.Read(is, binary, false); // read without adding.
if (this->NumRows() == 0) this->Resize(tmp.NumRows());
else {
if (this->NumRows() != tmp.NumRows()) {
if (tmp.NumRows() == 0) return; // do nothing in this case.
else KALDI_ERR << "PackedMatrix::Read, size mismatch " << this->NumRows()
<< " vs. " << tmp.NumRows();
}
}
this->AddPacked(1.0, tmp);
return;
} // now assume add == false.
std::ostringstream specific_error;
MatrixIndexT pos_at_start = is.tellg();
int peekval = Peek(is, binary);
const char *my_token = (sizeof(Real) == 4 ? "FP" : "DP");
const char *new_format_token = "[";
bool is_new_format = false;//added by hxu
char other_token_start = (sizeof(Real) == 4 ? 'D' : 'F');
int32 size;
MatrixIndexT num_elems;
if (peekval == other_token_start) { // need to instantiate the other type to read it.
typedef typename OtherReal<Real>::Real OtherType; // if Real == float, OtherType == double, and vice versa.
PackedMatrix<OtherType> other(this->NumRows());
other.Read(is, binary, false); // add is false at this point.
this->Resize(other.NumRows());
this->CopyFromPacked(other);
return;
}
std::string token;
ReadToken(is, binary, &token);
if (token != my_token) {
if(token != new_format_token) {
specific_error << ": Expected token " << my_token << ", got " << token;
goto bad;
}
//new format it is
is_new_format = true;
}
if(!is_new_format) {
ReadBasicType(is, binary, &size); // throws on error.
if ((MatrixIndexT)size != this->NumRows()) {
KALDI_ASSERT(size>=0);
this->Resize(size);
}
num_elems = ((size+1)*(MatrixIndexT)size)/2;
if (!binary) {
for (MatrixIndexT i = 0; i < num_elems; i++) {
ReadBasicType(is, false, data_+i); // will throw on error.
}
} else {
if (num_elems)
is.read(reinterpret_cast<char*>(data_), sizeof(Real)*num_elems);
}
if (is.fail()) goto bad;
return;
}
else {
std::vector<Real> data;
while(1) {
int32 num_lines = 0;
int i = is.peek();
if (i == -1) { specific_error << "Got EOF while reading matrix data"; goto bad; }
else if (static_cast<char>(i) == ']') { // Finished reading matrix.
is.get(); // eat the "]".
i = is.peek();
if (static_cast<char>(i) == '\r') {
is.get();
is.get(); // get \r\n (must eat what we wrote)
}// I don't actually understand what it's doing here
else if (static_cast<char>(i) == '\n') { is.get(); } // get \n (must eat what we wrote)
if (is.fail()) {
KALDI_WARN << "After end of matrix data, read error.";
// we got the data we needed, so just warn for this error.
}
//now process the data:
num_lines = int32(sqrt(data.size()*2));
KALDI_ASSERT(data.size() == num_lines*(num_lines+1)/2);
this->Resize(num_lines);
//std::cout<<data.size()<<' '<<num_lines<<'\n';
for(int32 i = 0; i < data.size(); i++) {
data_[i] = data[i];
}
return;
//std::cout<<"here!!!!!hxu!!!!!"<<std::endl;
}
else if ( (i >= '0' && i <= '9') || i == '-' ) { // A number...
Real r;
is >> r;
if (is.fail()) {
specific_error << "Stream failure/EOF while reading matrix data.";
goto bad;
}
data.push_back(r);
}
else if (isspace(i)) {
is.get(); // eat the space and do nothing.
} else { // NaN or inf or error.
std::string str;
is >> str;
if (!KALDI_STRCASECMP(str.c_str(), "inf") ||
!KALDI_STRCASECMP(str.c_str(), "infinity")) {
data.push_back(std::numeric_limits<Real>::infinity());
KALDI_WARN << "Reading infinite value into matrix.";
} else if (!KALDI_STRCASECMP(str.c_str(), "nan")) {
data.push_back(std::numeric_limits<Real>::quiet_NaN());
KALDI_WARN << "Reading NaN value into matrix.";
} else {
specific_error << "Expecting numeric matrix data, got " << str;
goto bad;
}
}
}
}
bad:
KALDI_ERR << "Failed to read packed matrix from stream. " << specific_error.str()
<< " File position at start is "
<< pos_at_start << ", currently " << is.tellg();
}
// Instantiate PackedMatrix for float and double.
template
class PackedMatrix<float>;
template
class PackedMatrix<double>;
} // namespace kaldi
// matrix/packed-matrix.h
// Copyright 2009-2013 Ondrej Glembek; Lukas Burget; Microsoft Corporation;
// Saarland University; Yanmin Qian;
// Johns Hopkins University (Author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_MATRIX_PACKED_MATRIX_H_
#define KALDI_MATRIX_PACKED_MATRIX_H_
#include "matrix/matrix-common.h"
#include <algorithm>
namespace kaldi {
/// \addtogroup matrix_funcs_io
// we need to declare the friend << operator here
template<typename Real>
std::ostream & operator <<(std::ostream & out, const PackedMatrix<Real>& M);
/// \addtogroup matrix_group
/// @{
/// @brief Packed matrix: base class for triangular and symmetric matrices.
template<typename Real> class PackedMatrix {
friend class CuPackedMatrix<Real>;
public:
//friend class CuPackedMatrix<Real>;
PackedMatrix() : data_(NULL), num_rows_(0) {}
explicit PackedMatrix(MatrixIndexT r, MatrixResizeType resize_type = kSetZero):
data_(NULL) { Resize(r, resize_type); }
explicit PackedMatrix(const PackedMatrix<Real> &orig) : data_(NULL) {
Resize(orig.num_rows_, kUndefined);
CopyFromPacked(orig);
}
template<typename OtherReal>
explicit PackedMatrix(const PackedMatrix<OtherReal> &orig) : data_(NULL) {
Resize(orig.NumRows(), kUndefined);
CopyFromPacked(orig);
}
void SetZero(); /// < Set to zero
void SetUnit(); /// < Set to unit matrix.
void SetRandn(); /// < Set to random values of a normal distribution
Real Trace() const;
// Needed for inclusion in std::vector
PackedMatrix<Real> & operator =(const PackedMatrix<Real> &other) {
Resize(other.NumRows());
CopyFromPacked(other);
return *this;
}
~PackedMatrix() {
Destroy();
}
/// Set packed matrix to a specified size (can be zero).
/// The value of the new data depends on resize_type:
/// -if kSetZero, the new data will be zero
/// -if kUndefined, the new data will be undefined
/// -if kCopyData, the new data will be the same as the old data in any
/// shared positions, and zero elsewhere.
/// This function takes time proportional to the number of data elements.
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type = kSetZero);
void AddToDiag(const Real r); // Adds r to diaginal
void ScaleDiag(const Real alpha); // Scales diagonal by alpha.
void SetDiag(const Real alpha); // Sets diagonal to this value.
template<typename OtherReal>
void CopyFromPacked(const PackedMatrix<OtherReal> &orig);
/// CopyFromVec just interprets the vector as having the same layout
/// as the packed matrix. Must have the same dimension, i.e.
/// orig.Dim() == (NumRows()*(NumRows()+1)) / 2;
template<typename OtherReal>
void CopyFromVec(const SubVector<OtherReal> &orig);
Real* Data() { return data_; }
const Real* Data() const { return data_; }
inline MatrixIndexT NumRows() const { return num_rows_; }
inline MatrixIndexT NumCols() const { return num_rows_; }
size_t SizeInBytes() const {
size_t nr = static_cast<size_t>(num_rows_);
return ((nr * (nr+1)) / 2) * sizeof(Real);
}
//MatrixIndexT Stride() const { return stride_; }
// This code is duplicated in child classes to avoid extra levels of calls.
Real operator() (MatrixIndexT r, MatrixIndexT c) const {
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
static_cast<UnsignedMatrixIndexT>(num_rows_) &&
static_cast<UnsignedMatrixIndexT>(c) <
static_cast<UnsignedMatrixIndexT>(num_rows_)
&& c <= r);
return *(data_ + (r * (r + 1)) / 2 + c);
}
// This code is duplicated in child classes to avoid extra levels of calls.
Real &operator() (MatrixIndexT r, MatrixIndexT c) {
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
static_cast<UnsignedMatrixIndexT>(num_rows_) &&
static_cast<UnsignedMatrixIndexT>(c) <
static_cast<UnsignedMatrixIndexT>(num_rows_)
&& c <= r);
return *(data_ + (r * (r + 1)) / 2 + c);
}
Real Max() const {
KALDI_ASSERT(num_rows_ > 0);
return * (std::max_element(data_, data_ + ((num_rows_*(num_rows_+1))/2) ));
}
Real Min() const {
KALDI_ASSERT(num_rows_ > 0);
return * (std::min_element(data_, data_ + ((num_rows_*(num_rows_+1))/2) ));
}
void Scale(Real c);
friend std::ostream & operator << <> (std::ostream & out,
const PackedMatrix<Real> &m);
// Use instead of stream<<*this, if you want to add to existing contents.
// Will throw exception on failure.
void Read(std::istream &in, bool binary, bool add = false);
void Write(std::ostream &out, bool binary) const;
void Destroy();
/// Swaps the contents of *this and *other. Shallow swap.
void Swap(PackedMatrix<Real> *other);
void Swap(Matrix<Real> *other);
protected:
// Will only be called from this class or derived classes.
void AddPacked(const Real alpha, const PackedMatrix<Real>& M);
Real *data_;
MatrixIndexT num_rows_;
//MatrixIndexT stride_;
private:
/// Init assumes the current contents of the class are is invalid (i.e. junk or
/// has already been freed), and it sets the matrixd to newly allocated memory
/// with the specified dimension. dim == 0 is acceptable. The memory contents
/// pointed to by data_ will be undefined.
void Init(MatrixIndexT dim);
};
/// @} end "addtogroup matrix_group"
/// \addtogroup matrix_funcs_io
/// @{
template<typename Real>
std::ostream & operator << (std::ostream & os, const PackedMatrix<Real>& M) {
M.Write(os, false);
return os;
}
template<typename Real>
std::istream & operator >> (std::istream &is, PackedMatrix<Real> &M) {
M.Read(is, false);
return is;
}
/// @}
} // namespace kaldi
#endif
// matrix/qr.cc
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include <limits>
#include "matrix/sp-matrix.h"
#include "matrix/kaldi-vector.h"
#include "matrix/kaldi-matrix.h"
#include "matrix/matrix-functions.h"
#include "matrix/cblas-wrappers.h"
// This file contains an implementation of the Symmetric QR Algorithm
// for the symmetric eigenvalue problem. See Golub and Van Loan,
// 3rd ed., Algorithm 8.3.3.
namespace kaldi {
/* This is from Golub and Van Loan 3rd ed., sec. 5.1.3,
p210.
x is the input of dimenson 'dim', v is the output of dimension
dim, and beta is a scalar. Note: we use zero-based
not one-based indexing. */
/*
// We are commenting out the function below ("House") because it's not
// needed, but we keep it just to show how we came up with HouseBackward.
template<typename Real>
void House(MatrixIndexT dim, const Real *x, Real *v, Real *beta) {
KALDI_ASSERT(dim > 0);
// To avoid overflow, we first compute the max of x_ (or
// one if that's zero, and we'll replace "x" by x/max(x_i)
// below. The householder vector is anyway invariant to
// the magnitude of x. We could actually avoid this extra loop
// over x if we wanted to be a bit smarter, but anyway this
// doesn't dominate the O(N) performance of the algorithm.
Real s; // s is a scale on x.
{
Real max_x = std::numeric_limits<Real>::min();
for (MatrixIndexT i = 0; i < dim; i++)
max_x = std::max(max_x, (x[i] < 0 ? -x[i] : x[i]));
if (max_x == 0.0) max_x = 1.0;
s = 1.0 / max_x;
}
Real sigma = 0.0;
v[0] = 1.0;
for (MatrixIndexT i = 1; i < dim; i++) {
sigma += (x[i]*s) * (x[i]*s);
v[i] = x[i]*s;
}
if (sigma == 0.0) *beta = 0.0;
else {
// When we say x1 = x[0], we reference the one-based indexing
// in Golub and Van Loan.
Real x1 = x[0] * s, mu = std::sqrt(x1*x1 + sigma);
if (x1 <= 0) {
v[0] = x1 - mu;
} else {
v[0] = -sigma / (x1 + mu);
KALDI_ASSERT(KALDI_ISFINITE(v[dim-1]));
}
Real v1 = v[0];
Real v1sq = v1 * v1;
*beta = 2 * v1sq / (sigma + v1sq);
Real inv_v1 = 1.0 / v1;
if (KALDI_ISINF(inv_v1)) {
// can happen if v1 is denormal.
KALDI_ASSERT(v1 == v1 && v1 != 0.0);
for (MatrixIndexT i = 0; i < dim; i++) v[i] /= v1;
} else {
cblas_Xscal(dim, inv_v1, v, 1);
}
if (KALDI_ISNAN(inv_v1)) {
KALDI_ERR << "NaN encountered in HouseBackward";
}
}
}
*/
// This is a backward version of the "House" routine above:
// backward because it's the last index, not the first index of
// the vector that is "special". This is convenient in
// the Tridiagonalize routine that uses reversed indexes for
// compatibility with the packed lower triangular format.
template<typename Real>
void HouseBackward(MatrixIndexT dim, const Real *x, Real *v, Real *beta) {
KALDI_ASSERT(dim > 0);
// To avoid overflow, we first compute the max of x_ (or
// one if that's zero, and we'll replace "x" by x/max(x_i)
// below. The householder vector is anyway invariant to
// the magnitude of x. We could actually avoid this extra loop
// over x if we wanted to be a bit smarter, but anyway this
// doesn't dominate the O(N) performance of the algorithm.
Real s; // s is a scale on x.
{
Real max_x = std::numeric_limits<Real>::min();
for (MatrixIndexT i = 0; i < dim; i++)
max_x = std::max(max_x, (x[i] < 0 ? -x[i] : x[i]));
s = 1.0 / max_x;
}
Real sigma = 0.0;
v[dim-1] = 1.0;
for (MatrixIndexT i = 0; i + 1 < dim; i++) {
sigma += (x[i] * s) * (x[i] * s);
v[i] = x[i] * s;
}
KALDI_ASSERT(KALDI_ISFINITE(sigma) &&
"Tridiagonalizing matrix that is too large or has NaNs.");
if (sigma == 0.0) *beta = 0.0;
else {
Real x1 = x[dim-1] * s, mu = std::sqrt(x1 * x1 + sigma);
if (x1 <= 0) {
v[dim-1] = x1 - mu;
} else {
v[dim-1] = -sigma / (x1 + mu);
KALDI_ASSERT(KALDI_ISFINITE(v[dim-1]));
}
Real v1 = v[dim-1];
Real v1sq = v1 * v1;
*beta = 2 * v1sq / (sigma + v1sq);
Real inv_v1 = 1.0 / v1;
if (KALDI_ISINF(inv_v1)) {
// can happen if v1 is denormal.
KALDI_ASSERT(v1 == v1 && v1 != 0.0);
for (MatrixIndexT i = 0; i < dim; i++) v[i] /= v1;
} else {
cblas_Xscal(dim, inv_v1, v, 1);
}
if (KALDI_ISNAN(inv_v1)) {
KALDI_ERR << "NaN encountered in HouseBackward";
}
}
}
/**
This routine tridiagonalizes *this. C.f. Golub and Van Loan 3rd ed., sec.
8.3.1 (p415). We reverse the order of the indices as it's more natural
with packed lower-triangular matrices to do it this way. There's also
a shift from one-based to zero-based indexing, so the index
k is transformed k -> n - k, and a corresponding transpose...
Let the original *this be A. This algorithms replaces *this with
a tridiagonal matrix T such that T = Q A Q^T for an orthogonal Q.
Caution: Q is transposed vs. Golub and Van Loan.
If Q != NULL it outputs Q.
*/
template<typename Real>
void SpMatrix<Real>::Tridiagonalize(MatrixBase<Real> *Q) {
MatrixIndexT n = this->NumRows();
KALDI_ASSERT(Q == NULL || (Q->NumRows() == n &&
Q->NumCols() == n));
if (Q != NULL) Q->SetUnit();
Real *data = this->Data();
Real *qdata = (Q == NULL ? NULL : Q->Data());
MatrixIndexT qstride = (Q == NULL ? 0 : Q->Stride());
Vector<Real> tmp_v(n-1), tmp_p(n);
Real beta, *v = tmp_v.Data(), *p = tmp_p.Data(), *w = p, *x = p;
for (MatrixIndexT k = n-1; k >= 2; k--) {
MatrixIndexT ksize = ((k+1)*k)/2;
// ksize is the packed size of the lower-triangular matrix of size k,
// which is the size of "all rows previous to this one."
Real *Arow = data + ksize; // In Golub+Van Loan it was A(k+1:n, k), we
// have Arow = A(k, 0:k-1).
HouseBackward(k, Arow, v, &beta); // sets v and beta.
cblas_Xspmv(k, beta, data, v, 1, 0.0, p, 1); // p = beta * A(0:k-1,0:k-1) v
Real minus_half_beta_pv = -0.5 * beta * cblas_Xdot(k, p, 1, v, 1);
cblas_Xaxpy(k, minus_half_beta_pv, v, 1, w, 1); // w = p - (beta p^T v/2) v;
// this relies on the fact that w and p are the same pointer.
// We're doing A(k, k-1) = ||Arow||. It happens that this element
// is indexed at ksize + k - 1 in the packed lower-triangular format.
data[ksize + k - 1] = std::sqrt(cblas_Xdot(k, Arow, 1, Arow, 1));
for (MatrixIndexT i = 0; i + 1 < k; i++)
data[ksize + i] = 0; // This is not in Golub and Van Loan but is
// necessary if we're not using parts of A to store the Householder
// vectors.
// We're doing A(0:k-1,0:k-1) -= (v w' + w v')
cblas_Xspr2(k, -1.0, v, 1, w, 1, data);
if (Q != NULL) { // C.f. Golub, Q is H_1 .. H_n-2... in this
// case we apply them in the opposite order so it's H_n-1 .. H_1,
// but also Q is transposed so we really have Q = H_1 .. H_n-1.
// It's a double negative.
// Anyway, we left-multiply Q by each one. The H_n would each be
// diag(I + beta v v', I) but we don't ever touch the last dims.
// We do (in Matlab notation):
// Q(0:k-1,:) = (I - beta v v') * Q, i.e.:
// Q(:,0:i-1) += -beta v (v' Q(:,0:k-1)v .. let x = -beta Q(0:k-1,:)^T v.
cblas_Xgemv(kTrans, k, n, -beta, qdata, qstride, v, 1, 0.0, x, 1);
// now x = -beta Q(:,0:k-1) v.
// The next line does: Q(:,0:k-1) += v x'.
cblas_Xger(k, n, 1.0, v, 1, x, 1, qdata, qstride);
}
}
}
// Instantiate these functions, as it wasn't implemented in sp-matrix.cc
// where we instantiated the whole class.
template
void SpMatrix<float>::Tridiagonalize(MatrixBase<float> *Q);
template
void SpMatrix<double>::Tridiagonalize(MatrixBase<double> *Q);
/// Create Givens rotations, as in Golub and Van Loan 3rd ed., page 216.
template<typename Real>
inline void Givens(Real a, Real b, Real *c, Real *s) {
if (b == 0) {
*c = 1;
*s = 0;
} else {
if (std::abs(b) > std::abs(a)) {
Real tau = -a / b;
*s = 1 / std::sqrt(1 + tau*tau);
*c = *s * tau;
} else {
Real tau = -b / a;
*c = 1 / std::sqrt(1 + tau*tau);
*s = *c * tau;
}
}
}
// Some internal code for the QR algorithm: one "QR step".
// This is Golub and Van Loan 3rd ed., Algorithm 8.3.2 "Implicit Symmetric QR step
// with Wilkinson shift." A couple of differences: this code is
// in zero based arithmetic, and we represent Q transposed from
// their Q for memory locality with row-major-indexed matrices.
template <typename Real>
void QrStep(MatrixIndexT n,
Real *diag,
Real *off_diag,
MatrixBase<Real> *Q) {
KALDI_ASSERT(n >= 2);
// below, "scale" could be any number; we introduce it to keep the
// floating point quantities within a good range.
Real d = (diag[n-2] - diag[n-1]) / 2.0,
t = off_diag[n-2],
inv_scale = std::max(std::max(std::abs(d), std::abs(t)),
std::numeric_limits<Real>::min()),
scale = 1.0 / inv_scale,
d_scaled = d * scale,
off_diag_n2_scaled = off_diag[n-2] * scale,
t2_n_n1_scaled = off_diag_n2_scaled * off_diag_n2_scaled,
sgn_d = (d > 0.0 ? 1.0 : -1.0),
mu = diag[n-1] - inv_scale * t2_n_n1_scaled /
(d_scaled + sgn_d * std::sqrt(d_scaled * d_scaled + t2_n_n1_scaled)),
x = diag[0] - mu,
z = off_diag[0];
KALDI_ASSERT(KALDI_ISFINITE(x));
Real *Qdata = (Q == NULL ? NULL : Q->Data());
MatrixIndexT Qstride = (Q == NULL ? 0 : Q->Stride()),
Qcols = (Q == NULL ? 0 : Q->NumCols());
for (MatrixIndexT k = 0; k < n-1; k++) {
Real c, s;
Givens(x, z, &c, &s);
// Rotate dimensions k and k+1 with the Givens matrix G, as
// T <== G^T T G.
// In 2d, a Givens matrix is [ c s; -s c ]. Forget about
// the dimension-indexing issues and assume we have a 2x2
// symmetric matrix [ p q ; q r ]
// We ask our friends at Wolfram Alpha about
// { { c, -s}, {s, c} } * { {p, q}, {q, r} } * { { c, s}, {-s, c} }
// Interpreting the result as [ p', q' ; q', r ]
// p' = c (c p - s q) - s (c q - s r)
// q' = s (c p - s q) + c (c q - s r)
// r' = s (s p + c q) + c (s q + c r)
Real p = diag[k], q = off_diag[k], r = diag[k+1];
// p is element k,k; r is element k+1,k+1; q is element k,k+1 or k+1,k.
// We'll let the compiler optimize this.
diag[k] = c * (c*p - s*q) - s * (c*q - s*r);
off_diag[k] = s * (c*p - s*q) + c * (c*q - s*r);
diag[k+1] = s * (s*p + c*q) + c * (s*q + c*r);
// We also have some other elements to think of that
// got rotated in a simpler way: if k>0,
// then element (k, k-1) and (k+1, k-1) get rotated. Here,
// element k+1, k-1 will be present as z; it's the out-of-band
// element that we remembered from last time. This is
// on the left as it's the row indexes that differ, so think of
// this as being premultiplied by G^T. In fact we're multiplying
// T by in some sense the opposite/transpose of the Givens rotation.
if (k > 0) { // Note, in rotations, going backward, (x,y) -> ((cx - sy), (sx + cy))
Real &elem_k_km1 = off_diag[k-1],
elem_kp1_km1 = z; // , tmp = elem_k_km1;
elem_k_km1 = c*elem_k_km1 - s*elem_kp1_km1;
// The next line will set elem_kp1_km1 to zero and we'll never access this
// value, so we comment it out.
// elem_kp1_km1 = s*tmp + c*elem_kp1_km1;
}
if (Q != NULL)
cblas_Xrot(Qcols, Qdata + k*Qstride, 1,
Qdata + (k+1)*Qstride, 1, c, -s);
if (k < n-2) {
// Next is the elements (k+2, k) and (k+2, k-1), to be rotated, again
// backwards.
Real &elem_kp2_k = z,
&elem_kp2_kp1 = off_diag[k+1];
// Note: elem_kp2_k == z would start off as zero because it's
// two off the diagonal, and not been touched yet. Therefore
// we eliminate it in expressions below, commenting it out.
// If we didn't do this we should set it to zero first.
elem_kp2_k = - s * elem_kp2_kp1; // + c*elem_kp2_k
elem_kp2_kp1 = c * elem_kp2_kp1; // + s*elem_kp2_k (original value).
// The next part is from the algorithm they describe: x = t_{k+1,k}
x = off_diag[k];
}
}
}
// Internal code for the QR algorithm, where the diagonal
// and off-diagonal of the symmetric matrix are represented as
// vectors of length n and n-1.
template <typename Real>
void QrInternal(MatrixIndexT n,
Real *diag,
Real *off_diag,
MatrixBase<Real> *Q) {
KALDI_ASSERT(Q == NULL || Q->NumCols() == n); // We may
// later relax the condition that Q->NumCols() == n.
MatrixIndexT counter = 0, max_iters = 500 + 4*n, // Should never take this many iters.
large_iters = 100 + 2*n;
Real epsilon = (pow(2.0, sizeof(Real) == 4 ? -23.0 : -52.0));
for (; counter < max_iters; counter++) { // this takes the place of "until
// q=n"... we'll break out of the
// loop when we converge.
if (counter == large_iters ||
(counter > large_iters && (counter - large_iters) % 50 == 0)) {
KALDI_WARN << "Took " << counter
<< " iterations in QR (dim is " << n << "), doubling epsilon.";
SubVector<Real> d(diag, n), o(off_diag, n-1);
KALDI_WARN << "Diag, off-diag are " << d << " and " << o;
epsilon *= 2.0;
}
for (MatrixIndexT i = 0; i+1 < n; i++) {
if (std::abs(off_diag[i]) <= epsilon *
(std::abs(diag[i]) + std::abs(diag[i+1])))
off_diag[i] = 0.0;
}
// The next code works out p, q, and npq which is n - p - q.
// For the definitions of q and p, see Golub and Van Loan; we
// partition the n dims into pieces of size (p, n-p-q, q) where
// the part of size q is diagonal and the part of size n-p-p is
// "unreduced", i.e. has no zero off-diagonal elements.
MatrixIndexT q = 0;
// Note: below, "n-q < 2" should more clearly be "n-2-q < 0", but that
// causes problems if MatrixIndexT is unsigned.
while (q < n && (n-q < 2 || off_diag[n-2-q] == 0.0))
q++;
if (q == n) break; // we're done. It's diagonal.
KALDI_ASSERT(n - q >= 2);
MatrixIndexT npq = 2; // Value of n - p - q, where n - p - q must be
// unreduced. This is the size of "middle" band of elements. If q != n,
// we must have hit a nonzero off-diag element, so the size of this
// band must be at least two.
while (npq + q < n && (n-q-npq-1 < 0 || off_diag[n-q-npq-1] != 0.0))
npq++;
MatrixIndexT p = n - q - npq;
{ // Checks.
for (MatrixIndexT i = 0; i+1 < npq; i++)
KALDI_ASSERT(off_diag[p + i] != 0.0);
for (MatrixIndexT i = 0; i+1 < q; i++)
KALDI_ASSERT(off_diag[p + npq - 1 + i] == 0.0);
if (p > 1) // Something must have stopped npq from growing further..
KALDI_ASSERT(off_diag[p-1] == 0.0); // so last off-diag elem in
// group of size p must be zero.
}
if (Q != NULL) {
// Do one QR step on the middle part of Q only.
// Qpart will be a subset of the rows of Q.
SubMatrix<Real> Qpart(*Q, p, npq, 0, Q->NumCols());
QrStep(npq, diag + p, off_diag + p, &Qpart);
} else {
QrStep(npq, diag + p, off_diag + p,
static_cast<MatrixBase<Real>*>(NULL));
}
}
if (counter == max_iters) {
KALDI_WARN << "Failure to converge in QR algorithm. "
<< "Exiting with partial output.";
}
}
/**
This is the symmetric QR algorithm, from Golub and Van Loan 3rd ed., Algorithm
8.3.3. Q is transposed w.r.t. there, though.
*/
template <typename Real>
void SpMatrix<Real>::Qr(MatrixBase<Real> *Q) {
KALDI_ASSERT(this->IsTridiagonal());
// We envisage that Q would be square but we don't check for this,
// as there are situations where you might not want this.
KALDI_ASSERT(Q == NULL || Q->NumRows() == this->NumRows());
// Note: the first couple of lines of the algorithm they give would be done
// outside of this function, by calling Tridiagonalize().
MatrixIndexT n = this->NumRows();
Vector<Real> diag(n), off_diag(n-1);
for (MatrixIndexT i = 0; i < n; i++) {
diag(i) = (*this)(i, i);
if (i > 0) off_diag(i-1) = (*this)(i, i-1);
}
QrInternal(n, diag.Data(), off_diag.Data(), Q);
// Now set *this to the value represented by diag and off_diag.
this->SetZero();
for (MatrixIndexT i = 0; i < n; i++) {
(*this)(i, i) = diag(i);
if (i > 0) (*this)(i, i-1) = off_diag(i-1);
}
}
template<typename Real>
void SpMatrix<Real>::Eig(VectorBase<Real> *s, MatrixBase<Real> *P) const {
MatrixIndexT dim = this->NumRows();
KALDI_ASSERT(s->Dim() == dim);
KALDI_ASSERT(P == NULL || (P->NumRows() == dim && P->NumCols() == dim));
SpMatrix<Real> A(*this); // Copy *this, since the tridiagonalization
// and QR decomposition are destructive.
// Note: for efficiency of memory access, the tridiagonalization
// algorithm makes the *rows* of P the eigenvectors, not the columns.
// We'll transpose P before we exit.
// Also note: P may be null if you don't want the eigenvectors. This
// will make this function more efficient.
A.Tridiagonalize(P); // Tridiagonalizes.
A.Qr(P); // Diagonalizes.
if(P) P->Transpose();
s->CopyDiagFromPacked(A);
}
template<typename Real>
void SpMatrix<Real>::TopEigs(VectorBase<Real> *s, MatrixBase<Real> *P,
MatrixIndexT lanczos_dim) const {
const SpMatrix<Real> &S(*this); // call this "S" for easy notation.
MatrixIndexT eig_dim = s->Dim(); // Space of dim we want to retain.
if (lanczos_dim <= 0)
lanczos_dim = std::max(eig_dim + 50, eig_dim + eig_dim/2);
MatrixIndexT dim = this->NumRows();
if (lanczos_dim >= dim) {
// There would be no speed advantage in using this method, so just
// use the regular approach.
Vector<Real> s_tmp(dim);
Matrix<Real> P_tmp(dim, dim);
this->Eig(&s_tmp, &P_tmp);
SortSvd(&s_tmp, &P_tmp);
s->CopyFromVec(s_tmp.Range(0, eig_dim));
P->CopyFromMat(P_tmp.Range(0, dim, 0, eig_dim));
return;
}
KALDI_ASSERT(eig_dim <= dim && eig_dim > 0);
KALDI_ASSERT(P->NumRows() == dim && P->NumCols() == eig_dim); // each column
// is one eigenvector.
Matrix<Real> Q(lanczos_dim, dim); // The rows of Q will be the
// orthogonal vectors of the Krylov subspace.
SpMatrix<Real> T(lanczos_dim); // This will be equal to Q S Q^T,
// i.e. *this projected into the Krylov subspace. Note: only the
// diagonal and off-diagonal fo T are nonzero, i.e. it's tridiagonal,
// but we don't have access to the low-level algorithms that work
// on that type of matrix (since we want to use ATLAS). So we just
// do normal SVD, on a full matrix; it won't typically dominate.
Q.Row(0).SetRandn();
Q.Row(0).Scale(1.0 / Q.Row(0).Norm(2));
for (MatrixIndexT d = 0; d < lanczos_dim; d++) {
Vector<Real> r(dim);
r.AddSpVec(1.0, S, Q.Row(d), 0.0);
// r = S * q_d
MatrixIndexT counter = 0;
Real end_prod;
while (1) { // Normally we'll do this loop only once:
// we repeat to handle cases where r gets very much smaller
// and we want to orthogonalize again.
// We do "full orthogonalization" to preserve stability,
// even though this is usually a waste of time.
Real start_prod = VecVec(r, r);
for (SignedMatrixIndexT e = d; e >= 0; e--) { // e must be signed!
SubVector<Real> q_e(Q, e);
Real prod = VecVec(r, q_e);
if (counter == 0 && static_cast<MatrixIndexT>(e) + 1 >= d) // Keep T tridiagonal, which
T(d, e) = prod; // mathematically speaking, it is.
r.AddVec(-prod, q_e); // Subtract component in q_e.
}
if (d+1 == lanczos_dim) break;
end_prod = VecVec(r, r);
if (end_prod <= 0.1 * start_prod) {
// also handles case where both are 0.
// We're not confident any more that it's completely
// orthogonal to the rest so we want to re-do.
if (end_prod == 0.0)
r.SetRandn(); // "Restarting".
counter++;
if (counter > 100)
KALDI_ERR << "Loop detected in Lanczos iteration.";
} else {
break;
}
}
if (d+1 != lanczos_dim) {
// OK, at this point we're satisfied that r is orthogonal
// to all previous rows.
KALDI_ASSERT(end_prod != 0.0); // should have looped.
r.Scale(1.0 / std::sqrt(end_prod)); // make it unit.
Q.Row(d+1).CopyFromVec(r);
}
}
Matrix<Real> R(lanczos_dim, lanczos_dim);
R.SetUnit();
T.Qr(&R); // Diagonalizes T.
Vector<Real> s_tmp(lanczos_dim);
s_tmp.CopyDiagFromSp(T);
// Now T = R * diag(s_tmp) * R^T.
// The next call sorts the elements of s from greatest to least absolute value,
// and moves around the rows of R in the corresponding way. This picks out
// the largest (absolute) eigenvalues.
SortSvd(&s_tmp, static_cast<Matrix<Real>*>(NULL), &R);
// Keep only the initial rows of R, those corresponding to greatest (absolute)
// eigenvalues.
SubMatrix<Real> Rsub(R, 0, eig_dim, 0, lanczos_dim);
SubVector<Real> s_sub(s_tmp, 0, eig_dim);
s->CopyFromVec(s_sub);
// For working out what to do now, just assume the other eigenvalues were
// zero. This is just for purposes of knowing how to get the result, and
// not getting things wrongly transposed.
// We have T = Rsub^T * diag(s_sub) * Rsub.
// Now, T = Q S Q^T, with Q orthogonal, so S = Q^T T Q = Q^T Rsub^T * diag(s) * Rsub * Q.
// The output is P and we want S = P * diag(s) * P^T, so we need P = Q^T Rsub^T.
P->AddMatMat(1.0, Q, kTrans, Rsub, kTrans, 0.0);
}
// Instantiate the templates for Eig and TopEig.
template
void SpMatrix<float>::Eig(VectorBase<float>*, MatrixBase<float>*) const;
template
void SpMatrix<double>::Eig(VectorBase<double>*, MatrixBase<double>*) const;
template
void SpMatrix<float>::TopEigs(VectorBase<float>*, MatrixBase<float>*, MatrixIndexT) const;
template
void SpMatrix<double>::TopEigs(VectorBase<double>*, MatrixBase<double>*, MatrixIndexT) const;
// Someone had a problem with the Intel compiler with -O3, with Qr not being
// defined for some strange reason (should automatically happen when
// we instantiate Eig and TopEigs), so we explicitly instantiate it here.
template
void SpMatrix<float>::Qr(MatrixBase<float> *Q);
template
void SpMatrix<double>::Qr(MatrixBase<double> *Q);
}
// namespace kaldi
// matrix/sp-matrix-inl.h
// Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation; Haihua Xu
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_MATRIX_SP_MATRIX_INL_H_
#define KALDI_MATRIX_SP_MATRIX_INL_H_
#include "matrix/tp-matrix.h"
namespace kaldi {
// All the lines in this file seem to be declaring template specializations.
// These tell the compiler that we'll implement the templated function
// separately for the different template arguments (float, double).
template<>
double SolveQuadraticProblem(const SpMatrix<double> &H, const VectorBase<double> &g,
const SolverOptions &opts, VectorBase<double> *x);
template<>
float SolveQuadraticProblem(const SpMatrix<float> &H, const VectorBase<float> &g,
const SolverOptions &opts, VectorBase<float> *x);
} // namespace kaldi
#endif // KALDI_MATRIX_SP_MATRIX_INL_H_
// matrix/sp-matrix.cc
// Copyright 2009-2011 Lukas Burget; Ondrej Glembek; Microsoft Corporation
// Saarland University; Petr Schwarz; Yanmin Qian;
// Haihua Xu
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include <limits>
#include "matrix/sp-matrix.h"
#include "matrix/kaldi-vector.h"
#include "matrix/kaldi-matrix.h"
#include "matrix/matrix-functions.h"
#include "matrix/cblas-wrappers.h"
namespace kaldi {
// ****************************************************************************
// Returns the log-determinant if +ve definite, else KALDI_ERR.
// ****************************************************************************
template<typename Real>
Real SpMatrix<Real>::LogPosDefDet() const {
TpMatrix<Real> chol(this->NumRows());
double det = 0.0;
double diag;
chol.Cholesky(*this); // Will throw exception if not +ve definite!
for (MatrixIndexT i = 0; i < this->NumRows(); i++) {
diag = static_cast<double>(chol(i, i));
det += kaldi::Log(diag);
}
return static_cast<Real>(2*det);
}
template<typename Real>
void SpMatrix<Real>::Swap(SpMatrix<Real> *other) {
std::swap(this->data_, other->data_);
std::swap(this->num_rows_, other->num_rows_);
}
template<typename Real>
void SpMatrix<Real>::SymPosSemiDefEig(VectorBase<Real> *s,
MatrixBase<Real> *P,
Real tolerance) const {
Eig(s, P);
Real max = s->Max(), min = s->Min();
KALDI_ASSERT(-min <= tolerance * max);
s->ApplyFloor(0.0);
}
template<typename Real>
Real SpMatrix<Real>::MaxAbsEig() const {
Vector<Real> s(this->NumRows());
this->Eig(&s, static_cast<MatrixBase<Real>*>(NULL));
return std::max(s.Max(), -s.Min());
}
// returns true if positive definite--uses cholesky.
template<typename Real>
bool SpMatrix<Real>::IsPosDef() const {
MatrixIndexT D = (*this).NumRows();
KALDI_ASSERT(D > 0);
try {
TpMatrix<Real> C(D);
C.Cholesky(*this);
for (MatrixIndexT r = 0; r < D; r++)
if (C(r, r) == 0.0) return false;
return true;
}
catch(...) { // not positive semidefinite.
return false;
}
}
template<typename Real>
void SpMatrix<Real>::ApplyPow(Real power) {
if (power == 1) return; // can do nothing.
MatrixIndexT D = this->NumRows();
KALDI_ASSERT(D > 0);
Matrix<Real> U(D, D);
Vector<Real> l(D);
(*this).SymPosSemiDefEig(&l, &U);
Vector<Real> l_copy(l);
try {
l.ApplyPow(power * 0.5);
}
catch(...) {
KALDI_ERR << "Error taking power " << (power * 0.5) << " of vector "
<< l_copy;
}
U.MulColsVec(l);
(*this).AddMat2(1.0, U, kNoTrans, 0.0);
}
template<typename Real>
void SpMatrix<Real>::CopyFromMat(const MatrixBase<Real> &M,
SpCopyType copy_type) {
KALDI_ASSERT(this->NumRows() == M.NumRows() && M.NumRows() == M.NumCols());
MatrixIndexT D = this->NumRows();
switch (copy_type) {
case kTakeMeanAndCheck:
{
Real good_sum = 0.0, bad_sum = 0.0;
for (MatrixIndexT i = 0; i < D; i++) {
for (MatrixIndexT j = 0; j < i; j++) {
Real a = M(i, j), b = M(j, i), avg = 0.5*(a+b), diff = 0.5*(a-b);
(*this)(i, j) = avg;
good_sum += std::abs(avg);
bad_sum += std::abs(diff);
}
good_sum += std::abs(M(i, i));
(*this)(i, i) = M(i, i);
}
if (bad_sum > 0.01 * good_sum) {
KALDI_ERR << "SpMatrix::Copy(), source matrix is not symmetric: "
<< bad_sum << ">" << good_sum;
}
break;
}
case kTakeMean:
{
for (MatrixIndexT i = 0; i < D; i++) {
for (MatrixIndexT j = 0; j < i; j++) {
(*this)(i, j) = 0.5*(M(i, j) + M(j, i));
}
(*this)(i, i) = M(i, i);
}
break;
}
case kTakeLower:
{ // making this one a bit more efficient.
const Real *src = M.Data();
Real *dest = this->data_;
MatrixIndexT stride = M.Stride();
for (MatrixIndexT i = 0; i < D; i++) {
for (MatrixIndexT j = 0; j <= i; j++)
dest[j] = src[j];
dest += i + 1;
src += stride;
}
}
break;
case kTakeUpper:
for (MatrixIndexT i = 0; i < D; i++)
for (MatrixIndexT j = 0; j <= i; j++)
(*this)(i, j) = M(j, i);
break;
default:
KALDI_ASSERT("Invalid argument to SpMatrix::CopyFromMat");
}
}
template<typename Real>
Real SpMatrix<Real>::Trace() const {
const Real *data = this->data_;
MatrixIndexT num_rows = this->num_rows_;
Real ans = 0.0;
for (int32 i = 1; i <= num_rows; i++, data += i)
ans += *data;
return ans;
}
// diagonal update, this <-- this + diag(v)
template<typename Real>
template<typename OtherReal>
void SpMatrix<Real>::AddDiagVec(const Real alpha, const VectorBase<OtherReal> &v) {
int32 num_rows = this->num_rows_;
KALDI_ASSERT(num_rows == v.Dim() && num_rows > 0);
const OtherReal *src = v.Data();
Real *dst = this->data_;
if (alpha == 1.0)
for (int32 i = 1; i <= num_rows; i++, src++, dst += i)
*dst += *src;
else
for (int32 i = 1; i <= num_rows; i++, src++, dst += i)
*dst += alpha * *src;
}
// instantiate the template above.
template
void SpMatrix<float>::AddDiagVec(const float alpha,
const VectorBase<double> &v);
template
void SpMatrix<double>::AddDiagVec(const double alpha,
const VectorBase<float> &v);
template
void SpMatrix<float>::AddDiagVec(const float alpha,
const VectorBase<float> &v);
template
void SpMatrix<double>::AddDiagVec(const double alpha,
const VectorBase<double> &v);
template<>
template<>
void SpMatrix<double>::AddVec2(const double alpha, const VectorBase<double> &v);
#ifndef HAVE_ATLAS
template<typename Real>
void SpMatrix<Real>::Invert(Real *logdet, Real *det_sign, bool need_inverse) {
// these are CLAPACK types
KaldiBlasInt result;
KaldiBlasInt rows = static_cast<int>(this->num_rows_);
KaldiBlasInt* p_ipiv = new KaldiBlasInt[rows];
Real *p_work; // workspace for the lapack function
void *temp;
if ((p_work = static_cast<Real*>(
KALDI_MEMALIGN(16, sizeof(Real) * rows, &temp))) == NULL) {
delete[] p_ipiv;
throw std::bad_alloc();
}
#ifdef HAVE_OPENBLAS
memset(p_work, 0, sizeof(Real) * rows); // gets rid of a probably
// spurious Valgrind warning about jumps depending upon uninitialized values.
#endif
// NOTE: Even though "U" is for upper, lapack assumes column-wise storage
// of the data. We have a row-wise storage, therefore, we need to "invert"
clapack_Xsptrf(&rows, this->data_, p_ipiv, &result);
KALDI_ASSERT(result >= 0 && "Call to CLAPACK ssptrf_ called with wrong arguments");
if (result > 0) { // Singular...
if (det_sign) *det_sign = 0;
if (logdet) *logdet = -std::numeric_limits<Real>::infinity();
if (need_inverse) KALDI_ERR << "CLAPACK stptrf_ : factorization failed";
} else { // Not singular.. compute log-determinant if needed.
if (logdet != NULL || det_sign != NULL) {
Real prod = 1.0, log_prod = 0.0;
int sign = 1;
for (int i = 0; i < (int)this->num_rows_; i++) {
if (p_ipiv[i] > 0) { // not a 2x2 block...
// if (p_ipiv[i] != i+1) sign *= -1; // row swap.
Real diag = (*this)(i, i);
prod *= diag;
} else { // negative: 2x2 block. [we are in first of the two].
i++; // skip over the first of the pair.
// each 2x2 block...
Real diag1 = (*this)(i, i), diag2 = (*this)(i-1, i-1),
offdiag = (*this)(i, i-1);
Real thisdet = diag1*diag2 - offdiag*offdiag;
// thisdet == determinant of 2x2 block.
// The following line is more complex than it looks: there are 2 offsets of
// 1 that cancel.
prod *= thisdet;
}
if (i == (int)(this->num_rows_-1) || fabs(prod) < 1.0e-10 || fabs(prod) > 1.0e+10) {
if (prod < 0) { prod = -prod; sign *= -1; }
log_prod += kaldi::Log(std::abs(prod));
prod = 1.0;
}
}
if (logdet != NULL) *logdet = log_prod;
if (det_sign != NULL) *det_sign = sign;
}
}
if (!need_inverse) {
delete [] p_ipiv;
KALDI_MEMALIGN_FREE(p_work);
return; // Don't need what is computed next.
}
// NOTE: Even though "U" is for upper, lapack assumes column-wise storage
// of the data. We have a row-wise storage, therefore, we need to "invert"
clapack_Xsptri(&rows, this->data_, p_ipiv, p_work, &result);
KALDI_ASSERT(result >=0 &&
"Call to CLAPACK ssptri_ called with wrong arguments");
if (result != 0) {
KALDI_ERR << "CLAPACK ssptrf_ : Matrix is singular";
}
delete [] p_ipiv;
KALDI_MEMALIGN_FREE(p_work);
}
#else
// in the ATLAS case, these are not implemented using a library and we back off to something else.
template<typename Real>
void SpMatrix<Real>::Invert(Real *logdet, Real *det_sign, bool need_inverse) {
Matrix<Real> M(this->NumRows(), this->NumCols());
M.CopyFromSp(*this);
M.Invert(logdet, det_sign, need_inverse);
if (need_inverse)
for (MatrixIndexT i = 0; i < this->NumRows(); i++)
for (MatrixIndexT j = 0; j <= i; j++)
(*this)(i, j) = M(i, j);
}
#endif
template<typename Real>
void SpMatrix<Real>::InvertDouble(Real *logdet, Real *det_sign,
bool inverse_needed) {
SpMatrix<double> dmat(*this);
double logdet_tmp, det_sign_tmp;
dmat.Invert(logdet ? &logdet_tmp : NULL,
det_sign ? &det_sign_tmp : NULL,
inverse_needed);
if (logdet) *logdet = logdet_tmp;
if (det_sign) *det_sign = det_sign_tmp;
(*this).CopyFromSp(dmat);
}
double TraceSpSp(const SpMatrix<double> &A, const SpMatrix<double> &B) {
KALDI_ASSERT(A.NumRows() == B.NumRows());
const double *Aptr = A.Data();
const double *Bptr = B.Data();
MatrixIndexT R = A.NumRows();
MatrixIndexT RR = (R * (R + 1)) / 2;
double all_twice = 2.0 * cblas_Xdot(RR, Aptr, 1, Bptr, 1);
// "all_twice" contains twice the vector-wise dot-product... this is
// what we want except the diagonal elements are represented
// twice.
double diag_once = 0.0;
for (MatrixIndexT row_plus_two = 2; row_plus_two <= R + 1; row_plus_two++) {
diag_once += *Aptr * *Bptr;
Aptr += row_plus_two;
Bptr += row_plus_two;
}
return all_twice - diag_once;
}
float TraceSpSp(const SpMatrix<float> &A, const SpMatrix<float> &B) {
KALDI_ASSERT(A.NumRows() == B.NumRows());
const float *Aptr = A.Data();
const float *Bptr = B.Data();
MatrixIndexT R = A.NumRows();
MatrixIndexT RR = (R * (R + 1)) / 2;
float all_twice = 2.0 * cblas_Xdot(RR, Aptr, 1, Bptr, 1);
// "all_twice" contains twice the vector-wise dot-product... this is
// what we want except the diagonal elements are represented
// twice.
float diag_once = 0.0;
for (MatrixIndexT row_plus_two = 2; row_plus_two <= R + 1; row_plus_two++) {
diag_once += *Aptr * *Bptr;
Aptr += row_plus_two;
Bptr += row_plus_two;
}
return all_twice - diag_once;
}
template<typename Real, typename OtherReal>
Real TraceSpSp(const SpMatrix<Real> &A, const SpMatrix<OtherReal> &B) {
KALDI_ASSERT(A.NumRows() == B.NumRows());
Real ans = 0.0;
const Real *Aptr = A.Data();
const OtherReal *Bptr = B.Data();
MatrixIndexT row, col, R = A.NumRows();
for (row = 0; row < R; row++) {
for (col = 0; col < row; col++)
ans += 2.0 * *(Aptr++) * *(Bptr++);
ans += *(Aptr++) * *(Bptr++); // Diagonal.
}
return ans;
}
template
float TraceSpSp<float, double>(const SpMatrix<float> &A, const SpMatrix<double> &B);
template
double TraceSpSp<double, float>(const SpMatrix<double> &A, const SpMatrix<float> &B);
template<typename Real>
Real TraceSpMat(const SpMatrix<Real> &A, const MatrixBase<Real> &B) {
KALDI_ASSERT(A.NumRows() == B.NumRows() && A.NumCols() == B.NumCols() &&
"KALDI_ERR: TraceSpMat: arguments have mismatched dimension");
MatrixIndexT R = A.NumRows();
Real ans = (Real)0.0;
const Real *Aptr = A.Data(), *Bptr = B.Data();
MatrixIndexT bStride = B.Stride();
for (MatrixIndexT r = 0;r < R;r++) {
for (MatrixIndexT c = 0;c < r;c++) {
// ans += A(r, c) * (B(r, c) + B(c, r));
ans += *(Aptr++) * (Bptr[r*bStride + c] + Bptr[c*bStride + r]);
}
// ans += A(r, r) * B(r, r);
ans += *(Aptr++) * Bptr[r*bStride + r];
}
return ans;
}
template
float TraceSpMat(const SpMatrix<float> &A, const MatrixBase<float> &B);
template
double TraceSpMat(const SpMatrix<double> &A, const MatrixBase<double> &B);
template<typename Real>
Real TraceMatSpMat(const MatrixBase<Real> &A, MatrixTransposeType transA,
const SpMatrix<Real> &B, const MatrixBase<Real> &C,
MatrixTransposeType transC) {
KALDI_ASSERT((transA == kTrans?A.NumCols():A.NumRows()) ==
(transC == kTrans?C.NumRows():C.NumCols()) &&
(transA == kTrans?A.NumRows():A.NumCols()) == B.NumRows() &&
(transC == kTrans?C.NumCols():C.NumRows()) == B.NumRows() &&
"TraceMatSpMat: arguments have wrong dimension.");
Matrix<Real> tmp(B.NumRows(), B.NumRows());
tmp.AddMatMat(1.0, C, transC, A, transA, 0.0); // tmp = C * A.
return TraceSpMat(B, tmp);
}
template
float TraceMatSpMat(const MatrixBase<float> &A, MatrixTransposeType transA,
const SpMatrix<float> &B, const MatrixBase<float> &C,
MatrixTransposeType transC);
template
double TraceMatSpMat(const MatrixBase<double> &A, MatrixTransposeType transA,
const SpMatrix<double> &B, const MatrixBase<double> &C,
MatrixTransposeType transC);
template<typename Real>
Real TraceMatSpMatSp(const MatrixBase<Real> &A, MatrixTransposeType transA,
const SpMatrix<Real> &B, const MatrixBase<Real> &C,
MatrixTransposeType transC, const SpMatrix<Real> &D) {
KALDI_ASSERT((transA == kTrans ?A.NumCols():A.NumRows() == D.NumCols()) &&
(transA == kTrans ? A.NumRows():A.NumCols() == B.NumRows()) &&
(transC == kTrans ? A.NumCols():A.NumRows() == B.NumCols()) &&
(transC == kTrans ? A.NumRows():A.NumCols() == D.NumRows()) &&
"KALDI_ERR: TraceMatSpMatSp: arguments have mismatched dimension.");
// Could perhaps optimize this more depending on dimensions of quantities.
Matrix<Real> tmpAB(transA == kTrans ? A.NumCols():A.NumRows(), B.NumCols());
tmpAB.AddMatSp(1.0, A, transA, B, 0.0);
Matrix<Real> tmpCD(transC == kTrans ? C.NumCols():C.NumRows(), D.NumCols());
tmpCD.AddMatSp(1.0, C, transC, D, 0.0);
return TraceMatMat(tmpAB, tmpCD, kNoTrans);
}
template
float TraceMatSpMatSp(const MatrixBase<float> &A, MatrixTransposeType transA,
const SpMatrix<float> &B, const MatrixBase<float> &C,
MatrixTransposeType transC, const SpMatrix<float> &D);
template
double TraceMatSpMatSp(const MatrixBase<double> &A, MatrixTransposeType transA,
const SpMatrix<double> &B, const MatrixBase<double> &C,
MatrixTransposeType transC, const SpMatrix<double> &D);
template<typename Real>
bool SpMatrix<Real>::IsDiagonal(Real cutoff) const {
MatrixIndexT R = this->NumRows();
Real bad_sum = 0.0, good_sum = 0.0;
for (MatrixIndexT i = 0; i < R; i++) {
for (MatrixIndexT j = 0; j <= i; j++) {
if (i == j)
good_sum += std::abs((*this)(i, j));
else
bad_sum += std::abs((*this)(i, j));
}
}
return (!(bad_sum > good_sum * cutoff));
}
template<typename Real>
bool SpMatrix<Real>::IsUnit(Real cutoff) const {
MatrixIndexT R = this->NumRows();
Real max = 0.0; // max error
for (MatrixIndexT i = 0; i < R; i++)
for (MatrixIndexT j = 0; j <= i; j++)
max = std::max(max, static_cast<Real>(std::abs((*this)(i, j) -
(i == j ? 1.0 : 0.0))));
return (max <= cutoff);
}
template<typename Real>
bool SpMatrix<Real>::IsTridiagonal(Real cutoff) const {
MatrixIndexT R = this->NumRows();
Real max_abs_2diag = 0.0, max_abs_offdiag = 0.0;
for (MatrixIndexT i = 0; i < R; i++)
for (MatrixIndexT j = 0; j <= i; j++) {
if (j+1 < i)
max_abs_offdiag = std::max(max_abs_offdiag,
std::abs((*this)(i, j)));
else
max_abs_2diag = std::max(max_abs_2diag,
std::abs((*this)(i, j)));
}
return (max_abs_offdiag <= cutoff * max_abs_2diag);
}
template<typename Real>
bool SpMatrix<Real>::IsZero(Real cutoff) const {
if (this->num_rows_ == 0) return true;
return (this->Max() <= cutoff && this->Min() >= -cutoff);
}
template<typename Real>
Real SpMatrix<Real>::FrobeniusNorm() const {
Real sum = 0.0;
MatrixIndexT R = this->NumRows();
for (MatrixIndexT i = 0; i < R; i++) {
for (MatrixIndexT j = 0; j < i; j++)
sum += (*this)(i, j) * (*this)(i, j) * 2;
sum += (*this)(i, i) * (*this)(i, i);
}
return std::sqrt(sum);
}
template<typename Real>
bool SpMatrix<Real>::ApproxEqual(const SpMatrix<Real> &other, float tol) const {
if (this->NumRows() != other.NumRows())
KALDI_ERR << "SpMatrix::AproxEqual, size mismatch, "
<< this->NumRows() << " vs. " << other.NumRows();
SpMatrix<Real> tmp(*this);
tmp.AddSp(-1.0, other);
return (tmp.FrobeniusNorm() <= tol * std::max(this->FrobeniusNorm(), other.FrobeniusNorm()));
}
// function Floor: A = Floor(B, alpha * C) ... see tutorial document.
template<typename Real>
int SpMatrix<Real>::ApplyFloor(const SpMatrix<Real> &C, Real alpha,
bool verbose) {
MatrixIndexT dim = this->NumRows();
int nfloored = 0;
KALDI_ASSERT(C.NumRows() == dim);
KALDI_ASSERT(alpha > 0);
TpMatrix<Real> L(dim);
L.Cholesky(C);
L.Scale(std::sqrt(alpha)); // equivalent to scaling C by alpha.
TpMatrix<Real> LInv(L);
LInv.Invert();
SpMatrix<Real> D(dim);
{ // D = L^{-1} * (*this) * L^{-T}
Matrix<Real> LInvFull(LInv);
D.AddMat2Sp(1.0, LInvFull, kNoTrans, (*this), 0.0);
}
Vector<Real> l(dim);
Matrix<Real> U(dim, dim);
D.Eig(&l, &U);
if (verbose) {
KALDI_LOG << "ApplyFloor: flooring following diagonal to 1: " << l;
}
for (MatrixIndexT i = 0; i < l.Dim(); i++) {
if (l(i) < 1.0) {
nfloored++;
l(i) = 1.0;
}
}
l.ApplyPow(0.5);
U.MulColsVec(l);
D.AddMat2(1.0, U, kNoTrans, 0.0);
{ // D' := U * diag(l') * U^T ... l'=floor(l, 1)
Matrix<Real> LFull(L);
(*this).AddMat2Sp(1.0, LFull, kNoTrans, D, 0.0); // A := L * D' * L^T
}
return nfloored;
}
template<typename Real>
Real SpMatrix<Real>::LogDet(Real *det_sign) const {
Real log_det;
SpMatrix<Real> tmp(*this);
// false== output not needed (saves some computation).
tmp.Invert(&log_det, det_sign, false);
return log_det;
}
template<typename Real>
int SpMatrix<Real>::ApplyFloor(Real floor) {
MatrixIndexT Dim = this->NumRows();
int nfloored = 0;
Vector<Real> s(Dim);
Matrix<Real> P(Dim, Dim);
(*this).Eig(&s, &P);
for (MatrixIndexT i = 0; i < Dim; i++) {
if (s(i) < floor) {
nfloored++;
s(i) = floor;
}
}
(*this).AddMat2Vec(1.0, P, kNoTrans, s, 0.0);
return nfloored;
}
template<typename Real>
MatrixIndexT SpMatrix<Real>::LimitCond(Real maxCond, bool invert) { // e.g. maxCond = 1.0e+05.
MatrixIndexT Dim = this->NumRows();
Vector<Real> s(Dim);
Matrix<Real> P(Dim, Dim);
(*this).SymPosSemiDefEig(&s, &P);
KALDI_ASSERT(maxCond > 1);
Real floor = s.Max() / maxCond;
if (floor < 0) floor = 0;
if (floor < 1.0e-40) {
KALDI_WARN << "LimitCond: limiting " << floor << " to 1.0e-40";
floor = 1.0e-40;
}
MatrixIndexT nfloored = 0;
for (MatrixIndexT i = 0; i < Dim; i++) {
if (s(i) <= floor) nfloored++;
if (invert)
s(i) = 1.0 / std::sqrt(std::max(s(i), floor));
else
s(i) = std::sqrt(std::max(s(i), floor));
}
P.MulColsVec(s);
(*this).AddMat2(1.0, P, kNoTrans, 0.0); // (*this) = P*P^T. ... (*this) = P * floor(s) * P^T ... if P was original P.
return nfloored;
}
void SolverOptions::Check() const {
KALDI_ASSERT(K>10 && eps<1.0e-10);
}
template<> double SolveQuadraticProblem(const SpMatrix<double> &H,
const VectorBase<double> &g,
const SolverOptions &opts,
VectorBase<double> *x) {
KALDI_ASSERT(H.NumRows() == g.Dim() && g.Dim() == x->Dim() && x->Dim() != 0);
opts.Check();
MatrixIndexT dim = x->Dim();
if (H.IsZero(0.0)) {
KALDI_WARN << "Zero quadratic term in quadratic vector problem for "
<< opts.name << ": leaving it unchanged.";
return 0.0;
}
if (opts.diagonal_precondition) {
// We can re-cast the problem with a diagonal preconditioner to
// make H better-conditioned.
Vector<double> H_diag(dim);
H_diag.CopyDiagFromSp(H);
H_diag.ApplyFloor(std::numeric_limits<double>::min() * 1.0E+3);
Vector<double> H_diag_sqrt(H_diag);
H_diag_sqrt.ApplyPow(0.5);
Vector<double> H_diag_inv_sqrt(H_diag_sqrt);
H_diag_inv_sqrt.InvertElements();
Vector<double> x_scaled(*x);
x_scaled.MulElements(H_diag_sqrt);
Vector<double> g_scaled(g);
g_scaled.MulElements(H_diag_inv_sqrt);
SpMatrix<double> H_scaled(dim);
H_scaled.AddVec2Sp(1.0, H_diag_inv_sqrt, H, 0.0);
double ans;
SolverOptions new_opts(opts);
new_opts.diagonal_precondition = false;
ans = SolveQuadraticProblem(H_scaled, g_scaled, new_opts, &x_scaled);
x->CopyFromVec(x_scaled);
x->MulElements(H_diag_inv_sqrt);
return ans;
}
Vector<double> gbar(g);
if (opts.optimize_delta) gbar.AddSpVec(-1.0, H, *x, 1.0); // gbar = g - H x
Matrix<double> U(dim, dim);
Vector<double> l(dim);
H.SymPosSemiDefEig(&l, &U); // does svd H = U L V^T and checks that H == U L U^T to within a tolerance.
// floor l.
double f = std::max(static_cast<double>(opts.eps), l.Max() / opts.K);
MatrixIndexT nfloored = 0;
for (MatrixIndexT i = 0; i < dim; i++) { // floor l.
if (l(i) < f) {
nfloored++;
l(i) = f;
}
}
if (nfloored != 0 && opts.print_debug_output) {
KALDI_LOG << "Solving quadratic problem for " << opts.name
<< ": floored " << nfloored<< " eigenvalues. ";
}
Vector<double> tmp(dim);
tmp.AddMatVec(1.0, U, kTrans, gbar, 0.0); // tmp = U^T \bar{g}
tmp.DivElements(l); // divide each element of tmp by l: tmp = \tilde{L}^{-1} U^T \bar{g}
Vector<double> delta(dim);
delta.AddMatVec(1.0, U, kNoTrans, tmp, 0.0); // delta = U tmp = U \tilde{L}^{-1} U^T \bar{g}
Vector<double> &xhat(tmp);
xhat.CopyFromVec(delta);
if (opts.optimize_delta) xhat.AddVec(1.0, *x); // xhat = x + delta.
double auxf_before = VecVec(g, *x) - 0.5 * VecSpVec(*x, H, *x),
auxf_after = VecVec(g, xhat) - 0.5 * VecSpVec(xhat, H, xhat);
if (auxf_after < auxf_before) { // Reject change.
if (auxf_after < auxf_before - 1.0e-10 && opts.print_debug_output)
KALDI_WARN << "Optimizing vector auxiliary function for "
<< opts.name<< ": auxf decreased " << auxf_before
<< " to " << auxf_after << ", change is "
<< (auxf_after-auxf_before);
return 0.0;
} else {
x->CopyFromVec(xhat);
return auxf_after - auxf_before;
}
}
template<> float SolveQuadraticProblem(const SpMatrix<float> &H,
const VectorBase<float> &g,
const SolverOptions &opts,
VectorBase<float> *x) {
KALDI_ASSERT(H.NumRows() == g.Dim() && g.Dim() == x->Dim() && x->Dim() != 0);
SpMatrix<double> Hd(H);
Vector<double> gd(g);
Vector<double> xd(*x);
float ans = static_cast<float>(SolveQuadraticProblem(Hd, gd, opts, &xd));
x->CopyFromVec(xd);
return ans;
}
// Maximizes the auxiliary function Q(x) = tr(M^T SigmaInv Y) - 0.5 tr(SigmaInv M Q M^T).
// Like a numerically stable version of M := Y Q^{-1}.
template<typename Real>
Real
SolveQuadraticMatrixProblem(const SpMatrix<Real> &Q,
const MatrixBase<Real> &Y,
const SpMatrix<Real> &SigmaInv,
const SolverOptions &opts,
MatrixBase<Real> *M) {
KALDI_ASSERT(Q.NumRows() == M->NumCols() &&
SigmaInv.NumRows() == M->NumRows() && Y.NumRows() == M->NumRows()
&& Y.NumCols() == M->NumCols() && M->NumCols() != 0);
opts.Check();
MatrixIndexT rows = M->NumRows(), cols = M->NumCols();
if (Q.IsZero(0.0)) {
KALDI_WARN << "Zero quadratic term in quadratic matrix problem for "
<< opts.name << ": leaving it unchanged.";
return 0.0;
}
if (opts.diagonal_precondition) {
// We can re-cast the problem with a diagonal preconditioner in the space
// of Q (columns of M). Helps to improve the condition of Q.
Vector<Real> Q_diag(cols);
Q_diag.CopyDiagFromSp(Q);
Q_diag.ApplyFloor(std::numeric_limits<Real>::min() * 1.0E+3);
Vector<Real> Q_diag_sqrt(Q_diag);
Q_diag_sqrt.ApplyPow(0.5);
Vector<Real> Q_diag_inv_sqrt(Q_diag_sqrt);
Q_diag_inv_sqrt.InvertElements();
Matrix<Real> M_scaled(*M);
M_scaled.MulColsVec(Q_diag_sqrt);
Matrix<Real> Y_scaled(Y);
Y_scaled.MulColsVec(Q_diag_inv_sqrt);
SpMatrix<Real> Q_scaled(cols);
Q_scaled.AddVec2Sp(1.0, Q_diag_inv_sqrt, Q, 0.0);
Real ans;
SolverOptions new_opts(opts);
new_opts.diagonal_precondition = false;
ans = SolveQuadraticMatrixProblem(Q_scaled, Y_scaled, SigmaInv,
new_opts, &M_scaled);
M->CopyFromMat(M_scaled);
M->MulColsVec(Q_diag_inv_sqrt);
return ans;
}
Matrix<Real> Ybar(Y);
if (opts.optimize_delta) {
Matrix<Real> Qfull(Q);
Ybar.AddMatMat(-1.0, *M, kNoTrans, Qfull, kNoTrans, 1.0);
} // Ybar = Y - M Q.
Matrix<Real> U(cols, cols);
Vector<Real> l(cols);
Q.SymPosSemiDefEig(&l, &U); // does svd Q = U L V^T and checks that Q == U L U^T to within a tolerance.
// floor l.
Real f = std::max<Real>(static_cast<Real>(opts.eps), l.Max() / opts.K);
MatrixIndexT nfloored = 0;
for (MatrixIndexT i = 0; i < cols; i++) { // floor l.
if (l(i) < f) { nfloored++; l(i) = f; }
}
if (nfloored != 0 && opts.print_debug_output)
KALDI_LOG << "Solving matrix problem for " << opts.name
<< ": floored " << nfloored << " eigenvalues. ";
Matrix<Real> tmpDelta(rows, cols);
tmpDelta.AddMatMat(1.0, Ybar, kNoTrans, U, kNoTrans, 0.0); // tmpDelta = Ybar * U.
l.InvertElements(); KALDI_ASSERT(1.0/l.Max() != 0); // check not infinite. eps should take care of this.
tmpDelta.MulColsVec(l); // tmpDelta = Ybar * U * \tilde{L}^{-1}
Matrix<Real> Delta(rows, cols);
Delta.AddMatMat(1.0, tmpDelta, kNoTrans, U, kTrans, 0.0); // Delta = Ybar * U * \tilde{L}^{-1} * U^T
Real auxf_before, auxf_after;
SpMatrix<Real> MQM(rows);
Matrix<Real> &SigmaInvY(tmpDelta);
{ Matrix<Real> SigmaInvFull(SigmaInv); SigmaInvY.AddMatMat(1.0, SigmaInvFull, kNoTrans, Y, kNoTrans, 0.0); }
{ // get auxf_before. Q(x) = tr(M^T SigmaInv Y) - 0.5 tr(SigmaInv M Q M^T).
MQM.AddMat2Sp(1.0, *M, kNoTrans, Q, 0.0);
auxf_before = TraceMatMat(*M, SigmaInvY, kaldi::kTrans) - 0.5*TraceSpSp(SigmaInv, MQM);
}
Matrix<Real> Mhat(Delta);
if (opts.optimize_delta) Mhat.AddMat(1.0, *M); // Mhat = Delta + M.
{ // get auxf_after.
MQM.AddMat2Sp(1.0, Mhat, kNoTrans, Q, 0.0);
auxf_after = TraceMatMat(Mhat, SigmaInvY, kaldi::kTrans) - 0.5*TraceSpSp(SigmaInv, MQM);
}
if (auxf_after < auxf_before) {
if (auxf_after < auxf_before - 1.0e-10)
KALDI_WARN << "Optimizing matrix auxiliary function for "
<< opts.name << ", auxf decreased "
<< auxf_before << " to " << auxf_after << ", change is "
<< (auxf_after-auxf_before);
return 0.0;
} else {
M->CopyFromMat(Mhat);
return auxf_after - auxf_before;
}
}
template<typename Real>
Real SolveDoubleQuadraticMatrixProblem(const MatrixBase<Real> &G,
const SpMatrix<Real> &P1,
const SpMatrix<Real> &P2,
const SpMatrix<Real> &Q1,
const SpMatrix<Real> &Q2,
const SolverOptions &opts,
MatrixBase<Real> *M) {
KALDI_ASSERT(Q1.NumRows() == M->NumCols() && P1.NumRows() == M->NumRows() &&
G.NumRows() == M->NumRows() && G.NumCols() == M->NumCols() &&
M->NumCols() != 0 && Q2.NumRows() == M->NumCols() &&
P2.NumRows() == M->NumRows());
MatrixIndexT rows = M->NumRows(), cols = M->NumCols();
// The following check should not fail as we stipulate P1, P2 and one of Q1
// or Q2 must be +ve def and other Q1 or Q2 must be +ve semidef.
TpMatrix<Real> LInv(rows);
LInv.Cholesky(P1);
LInv.Invert(); // Will throw exception if fails.
SpMatrix<Real> S(rows);
Matrix<Real> LInvFull(LInv);
S.AddMat2Sp(1.0, LInvFull, kNoTrans, P2, 0.0); // S := L^{-1} P_2 L^{-T}
Matrix<Real> U(rows, rows);
Vector<Real> d(rows);
S.SymPosSemiDefEig(&d, &U);
Matrix<Real> T(rows, rows);
T.AddMatMat(1.0, U, kTrans, LInvFull, kNoTrans, 0.0); // T := U^T * L^{-1}
#ifdef KALDI_PARANOID // checking mainly for errors in the code or math.
{
SpMatrix<Real> P1Trans(rows);
P1Trans.AddMat2Sp(1.0, T, kNoTrans, P1, 0.0);
KALDI_ASSERT(P1Trans.IsUnit(0.01));
}
{
SpMatrix<Real> P2Trans(rows);
P2Trans.AddMat2Sp(1.0, T, kNoTrans, P2, 0.0);
KALDI_ASSERT(P2Trans.IsDiagonal(0.01));
}
#endif
Matrix<Real> TInv(T);
TInv.Invert();
Matrix<Real> Gdash(rows, cols);
Gdash.AddMatMat(1.0, T, kNoTrans, G, kNoTrans, 0.0); // G' = T G
Matrix<Real> MdashOld(rows, cols);
MdashOld.AddMatMat(1.0, TInv, kTrans, *M, kNoTrans, 0.0); // M' = T^{-T} M
Matrix<Real> MdashNew(MdashOld);
Real objf_impr = 0.0;
for (MatrixIndexT n = 0; n < rows; n++) {
SpMatrix<Real> Qsum(Q1);
Qsum.AddSp(d(n), Q2);
SubVector<Real> mdash_n = MdashNew.Row(n);
SubVector<Real> gdash_n = Gdash.Row(n);
Matrix<Real> QsumInv(Qsum);
try {
QsumInv.Invert();
Real old_objf = VecVec(mdash_n, gdash_n)
- 0.5 * VecSpVec(mdash_n, Qsum, mdash_n);
mdash_n.AddMatVec(1.0, QsumInv, kNoTrans, gdash_n, 0.0); // m'_n := g'_n * (Q_1 + d_n Q_2)^{-1}
Real new_objf = VecVec(mdash_n, gdash_n)
- 0.5 * VecSpVec(mdash_n, Qsum, mdash_n);
if (new_objf < old_objf) {
if (new_objf < old_objf - 1.0e-05) {
KALDI_WARN << "In double quadratic matrix problem: objective "
"function decreasing during optimization of " << opts.name
<< ", " << old_objf << "->" << new_objf << ", change is "
<< (new_objf - old_objf);
KALDI_ERR << "Auxiliary function decreasing."; // Will be caught.
} else { // Reset to old value, didn't improve (very close to optimum).
MdashNew.Row(n).CopyFromVec(MdashOld.Row(n));
}
}
objf_impr += new_objf - old_objf;
}
catch (...) {
KALDI_WARN << "Matrix inversion or optimization failed during double "
"quadratic problem, solving for" << opts.name
<< ": trying more stable approach.";
objf_impr += SolveQuadraticProblem(Qsum, gdash_n, opts, &mdash_n);
}
}
M->AddMatMat(1.0, T, kTrans, MdashNew, kNoTrans, 0.0); // M := T^T M'.
return objf_impr;
}
// rank-one update, this <-- this + alpha V V'
template<>
template<>
void SpMatrix<float>::AddVec2(const float alpha, const VectorBase<float> &v) {
KALDI_ASSERT(v.Dim() == this->NumRows());
cblas_Xspr(v.Dim(), alpha, v.Data(), 1,
this->data_);
}
template<class Real>
void SpMatrix<Real>::AddVec2Sp(const Real alpha, const VectorBase<Real> &v,
const SpMatrix<Real> &S, const Real beta) {
KALDI_ASSERT(v.Dim() == this->NumRows() && S.NumRows() == this->NumRows());
const Real *Sdata = S.Data();
const Real *vdata = v.Data();
Real *data = this->data_;
MatrixIndexT dim = this->num_rows_;
for (MatrixIndexT r = 0; r < dim; r++)
for (MatrixIndexT c = 0; c <= r; c++, Sdata++, data++)
*data = beta * *data + alpha * vdata[r] * vdata[c] * *Sdata;
}
// rank-one update, this <-- this + alpha V V'
template<>
template<>
void SpMatrix<double>::AddVec2(const double alpha, const VectorBase<double> &v) {
KALDI_ASSERT(v.Dim() == num_rows_);
cblas_Xspr(v.Dim(), alpha, v.Data(), 1, data_);
}
template<typename Real>
template<typename OtherReal>
void SpMatrix<Real>::AddVec2(const Real alpha, const VectorBase<OtherReal> &v) {
KALDI_ASSERT(v.Dim() == this->NumRows());
Real *data = this->data_;
const OtherReal *v_data = v.Data();
MatrixIndexT nr = this->num_rows_;
for (MatrixIndexT i = 0; i < nr; i++)
for (MatrixIndexT j = 0; j <= i; j++, data++)
*data += alpha * v_data[i] * v_data[j];
}
// instantiate the template above.
template
void SpMatrix<float>::AddVec2(const float alpha, const VectorBase<double> &v);
template
void SpMatrix<double>::AddVec2(const double alpha, const VectorBase<float> &v);
template<typename Real>
Real VecSpVec(const VectorBase<Real> &v1, const SpMatrix<Real> &M,
const VectorBase<Real> &v2) {
MatrixIndexT D = M.NumRows();
KALDI_ASSERT(v1.Dim() == D && v1.Dim() == v2.Dim());
Vector<Real> tmp_vec(D);
cblas_Xspmv(D, 1.0, M.Data(), v1.Data(), 1, 0.0, tmp_vec.Data(), 1);
return VecVec(tmp_vec, v2);
}
template
float VecSpVec(const VectorBase<float> &v1, const SpMatrix<float> &M,
const VectorBase<float> &v2);
template
double VecSpVec(const VectorBase<double> &v1, const SpMatrix<double> &M,
const VectorBase<double> &v2);
template<typename Real>
void SpMatrix<Real>::AddMat2Sp(
const Real alpha, const MatrixBase<Real> &M,
MatrixTransposeType transM, const SpMatrix<Real> &A, const Real beta) {
if (transM == kNoTrans) {
KALDI_ASSERT(M.NumCols() == A.NumRows() && M.NumRows() == this->num_rows_);
} else {
KALDI_ASSERT(M.NumRows() == A.NumRows() && M.NumCols() == this->num_rows_);
}
Vector<Real> tmp_vec(A.NumRows());
Real *tmp_vec_data = tmp_vec.Data();
SpMatrix<Real> tmp_A;
const Real *p_A_data = A.Data();
Real *p_row_data = this->Data();
MatrixIndexT M_other_dim = (transM == kNoTrans ? M.NumCols() : M.NumRows()),
M_same_dim = (transM == kNoTrans ? M.NumRows() : M.NumCols()),
M_stride = M.Stride(), dim = this->NumRows();
KALDI_ASSERT(M_same_dim == dim);
const Real *M_data = M.Data();
if (this->Data() <= A.Data() + A.SizeInBytes() &&
this->Data() + this->SizeInBytes() >= A.Data()) {
// Matrices A and *this overlap. Make copy of A
tmp_A.Resize(A.NumRows());
tmp_A.CopyFromSp(A);
p_A_data = tmp_A.Data();
}
if (transM == kNoTrans) {
for (MatrixIndexT r = 0; r < dim; r++, p_row_data += r) {
cblas_Xspmv(A.NumRows(), 1.0, p_A_data, M.RowData(r), 1, 0.0, tmp_vec_data, 1);
cblas_Xgemv(transM, r+1, M_other_dim, alpha, M_data, M_stride,
tmp_vec_data, 1, beta, p_row_data, 1);
}
} else {
for (MatrixIndexT r = 0; r < dim; r++, p_row_data += r) {
cblas_Xspmv(A.NumRows(), 1.0, p_A_data, M.Data() + r, M.Stride(), 0.0, tmp_vec_data, 1);
cblas_Xgemv(transM, M_other_dim, r+1, alpha, M_data, M_stride,
tmp_vec_data, 1, beta, p_row_data, 1);
}
}
}
template<typename Real>
void SpMatrix<Real>::AddSmat2Sp(
const Real alpha, const MatrixBase<Real> &M,
MatrixTransposeType transM, const SpMatrix<Real> &A,
const Real beta) {
KALDI_ASSERT((transM == kNoTrans && M.NumCols() == A.NumRows()) ||
(transM == kTrans && M.NumRows() == A.NumRows()));
if (transM == kNoTrans) {
KALDI_ASSERT(M.NumCols() == A.NumRows() && M.NumRows() == this->num_rows_);
} else {
KALDI_ASSERT(M.NumRows() == A.NumRows() && M.NumCols() == this->num_rows_);
}
MatrixIndexT Adim = A.NumRows(), dim = this->num_rows_;
Matrix<Real> temp_A(A); // represent A as full matrix.
Matrix<Real> temp_MA(dim, Adim);
temp_MA.AddSmatMat(1.0, M, transM, temp_A, kNoTrans, 0.0);
// Next-- we want to do *this = alpha * temp_MA * M^T + beta * *this.
// To make it sparse vector multiplies, since M is sparse, we'd like
// to do: for each column c, (*this column c) += temp_MA * (M^T's column c.)
// [ignoring the alpha and beta here.]
// It's not convenient to process columns in the symmetric
// packed format because they don't have a constant stride. However,
// we can use the fact that temp_MA * M is symmetric, to just assign
// each row of *this instead of each column.
// So the final iteration is:
// for i = 0... dim-1,
// [the i'th row of *this] = beta * [the i'th row of *this] + alpha *
// temp_MA * [the i'th column of M].
// Of course, we only process the first 0 ... i elements of this row,
// as that's all that are kept in the symmetric packed format.
Matrix<Real> temp_this(*this);
Real *data = this->data_;
const Real *Mdata = M.Data(), *MAdata = temp_MA.Data();
MatrixIndexT temp_MA_stride = temp_MA.Stride(), Mstride = M.Stride();
if (transM == kNoTrans) {
// The column of M^T corresponds to the rows of the supplied matrix.
for (MatrixIndexT i = 0; i < dim; i++, data += i) {
MatrixIndexT num_rows = i + 1, num_cols = Adim;
Xgemv_sparsevec(kNoTrans, num_rows, num_cols, alpha, MAdata,
temp_MA_stride, Mdata + (i * Mstride), 1, beta, data, 1);
}
} else {
// The column of M^T corresponds to the columns of the supplied matrix.
for (MatrixIndexT i = 0; i < dim; i++, data += i) {
MatrixIndexT num_rows = i + 1, num_cols = Adim;
Xgemv_sparsevec(kNoTrans, num_rows, num_cols, alpha, MAdata,
temp_MA_stride, Mdata + i, Mstride, beta, data, 1);
}
}
}
template<typename Real>
void SpMatrix<Real>::AddMat2Vec(const Real alpha,
const MatrixBase<Real> &M,
MatrixTransposeType transM,
const VectorBase<Real> &v,
const Real beta) {
this->Scale(beta);
KALDI_ASSERT((transM == kNoTrans && this->NumRows() == M.NumRows() &&
M.NumCols() == v.Dim()) ||
(transM == kTrans && this->NumRows() == M.NumCols() &&
M.NumRows() == v.Dim()));
if (transM == kNoTrans) {
const Real *Mdata = M.Data(), *vdata = v.Data();
Real *data = this->data_;
MatrixIndexT dim = this->NumRows(), mcols = M.NumCols(),
mstride = M.Stride();
for (MatrixIndexT col = 0; col < mcols; col++, vdata++, Mdata += 1)
cblas_Xspr(dim, *vdata*alpha, Mdata, mstride, data);
} else {
const Real *Mdata = M.Data(), *vdata = v.Data();
Real *data = this->data_;
MatrixIndexT dim = this->NumRows(), mrows = M.NumRows(),
mstride = M.Stride();
for (MatrixIndexT row = 0; row < mrows; row++, vdata++, Mdata += mstride)
cblas_Xspr(dim, *vdata*alpha, Mdata, 1, data);
}
}
template<typename Real>
void SpMatrix<Real>::AddMat2(const Real alpha, const MatrixBase<Real> &M,
MatrixTransposeType transM, const Real beta) {
KALDI_ASSERT((transM == kNoTrans && this->NumRows() == M.NumRows())
|| (transM == kTrans && this->NumRows() == M.NumCols()));
// Cblas has no function *sprk (i.e. symmetric packed rank-k update), so we
// use as temporary storage a regular matrix of which we only access its lower
// triangle
MatrixIndexT this_dim = this->NumRows(),
m_other_dim = (transM == kNoTrans ? M.NumCols() : M.NumRows());
if (this_dim == 0) return;
if (alpha == 0.0) {
if (beta != 1.0) this->Scale(beta);
return;
}
Matrix<Real> temp_mat(*this); // wastefully copies upper triangle too, but this
// doesn't dominate O(N) time.
// This function call is hard-coded to update the lower triangle.
cblas_Xsyrk(transM, this_dim, m_other_dim, alpha, M.Data(),
M.Stride(), beta, temp_mat.Data(), temp_mat.Stride());
this->CopyFromMat(temp_mat, kTakeLower);
}
template<typename Real>
void SpMatrix<Real>::AddTp2Sp(const Real alpha, const TpMatrix<Real> &T,
MatrixTransposeType transM, const SpMatrix<Real> &A,
const Real beta) {
Matrix<Real> Tmat(T);
AddMat2Sp(alpha, Tmat, transM, A, beta);
}
template<typename Real>
void SpMatrix<Real>::AddVecVec(const Real alpha, const VectorBase<Real> &v,
const VectorBase<Real> &w) {
int32 dim = this->NumRows();
KALDI_ASSERT(dim == v.Dim() && dim == w.Dim() && dim > 0);
cblas_Xspr2(dim, alpha, v.Data(), 1, w.Data(), 1, this->data_);
}
template<typename Real>
void SpMatrix<Real>::AddTp2(const Real alpha, const TpMatrix<Real> &T,
MatrixTransposeType transM, const Real beta) {
Matrix<Real> Tmat(T);
AddMat2(alpha, Tmat, transM, beta);
}
// Explicit instantiation of the class.
// This needs to be after the definition of all the class member functions.
template class SpMatrix<float>;
template class SpMatrix<double>;
template<typename Real>
Real TraceSpSpLower(const SpMatrix<Real> &A, const SpMatrix<Real> &B) {
MatrixIndexT adim = A.NumRows();
KALDI_ASSERT(adim == B.NumRows());
MatrixIndexT dim = (adim*(adim+1))/2;
return cblas_Xdot(dim, A.Data(), 1, B.Data(), 1);
}
// Instantiate the template above.
template
double TraceSpSpLower(const SpMatrix<double> &A, const SpMatrix<double> &B);
template
float TraceSpSpLower(const SpMatrix<float> &A, const SpMatrix<float> &B);
// Instantiate the template above.
template float SolveQuadraticMatrixProblem(const SpMatrix<float> &Q,
const MatrixBase<float> &Y,
const SpMatrix<float> &SigmaInv,
const SolverOptions &opts,
MatrixBase<float> *M);
template double SolveQuadraticMatrixProblem(const SpMatrix<double> &Q,
const MatrixBase<double> &Y,
const SpMatrix<double> &SigmaInv,
const SolverOptions &opts,
MatrixBase<double> *M);
// Instantiate the template above.
template float SolveDoubleQuadraticMatrixProblem(
const MatrixBase<float> &G,
const SpMatrix<float> &P1,
const SpMatrix<float> &P2,
const SpMatrix<float> &Q1,
const SpMatrix<float> &Q2,
const SolverOptions &opts,
MatrixBase<float> *M);
template double SolveDoubleQuadraticMatrixProblem(
const MatrixBase<double> &G,
const SpMatrix<double> &P1,
const SpMatrix<double> &P2,
const SpMatrix<double> &Q1,
const SpMatrix<double> &Q2,
const SolverOptions &opts,
MatrixBase<double> *M);
} // namespace kaldi
// matrix/sp-matrix.h
// Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation; Lukas Burget;
// Saarland University; Ariya Rastrow; Yanmin Qian;
// Jan Silovsky
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_MATRIX_SP_MATRIX_H_
#define KALDI_MATRIX_SP_MATRIX_H_
#include <algorithm>
#include <vector>
#include "matrix/packed-matrix.h"
namespace kaldi {
/// \addtogroup matrix_group
/// @{
template<typename Real> class SpMatrix;
/**
* @brief Packed symetric matrix class
*/
template<typename Real>
class SpMatrix : public PackedMatrix<Real> {
friend class CuSpMatrix<Real>;
public:
// so it can use our assignment operator.
friend class std::vector<Matrix<Real> >;
SpMatrix(): PackedMatrix<Real>() {}
/// Copy constructor from CUDA version of SpMatrix
/// This is defined in ../cudamatrix/cu-sp-matrix.h
explicit SpMatrix(const CuSpMatrix<Real> &cu);
explicit SpMatrix(MatrixIndexT r, MatrixResizeType resize_type = kSetZero)
: PackedMatrix<Real>(r, resize_type) {}
SpMatrix(const SpMatrix<Real> &orig)
: PackedMatrix<Real>(orig) {}
template<typename OtherReal>
explicit SpMatrix(const SpMatrix<OtherReal> &orig)
: PackedMatrix<Real>(orig) {}
#ifdef KALDI_PARANOID
explicit SpMatrix(const MatrixBase<Real> & orig,
SpCopyType copy_type = kTakeMeanAndCheck)
: PackedMatrix<Real>(orig.NumRows(), kUndefined) {
CopyFromMat(orig, copy_type);
}
#else
explicit SpMatrix(const MatrixBase<Real> & orig,
SpCopyType copy_type = kTakeMean)
: PackedMatrix<Real>(orig.NumRows(), kUndefined) {
CopyFromMat(orig, copy_type);
}
#endif
/// Shallow swap.
void Swap(SpMatrix *other);
inline void Resize(MatrixIndexT nRows, MatrixResizeType resize_type = kSetZero) {
PackedMatrix<Real>::Resize(nRows, resize_type);
}
void CopyFromSp(const SpMatrix<Real> &other) {
PackedMatrix<Real>::CopyFromPacked(other);
}
template<typename OtherReal>
void CopyFromSp(const SpMatrix<OtherReal> &other) {
PackedMatrix<Real>::CopyFromPacked(other);
}
#ifdef KALDI_PARANOID
void CopyFromMat(const MatrixBase<Real> &orig,
SpCopyType copy_type = kTakeMeanAndCheck);
#else // different default arg if non-paranoid mode.
void CopyFromMat(const MatrixBase<Real> &orig,
SpCopyType copy_type = kTakeMean);
#endif
inline Real operator() (MatrixIndexT r, MatrixIndexT c) const {
// if column is less than row, then swap these as matrix is stored
// as upper-triangular... only allowed for const matrix object.
if (static_cast<UnsignedMatrixIndexT>(c) >
static_cast<UnsignedMatrixIndexT>(r))
std::swap(c, r);
// c<=r now so don't have to check c.
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
static_cast<UnsignedMatrixIndexT>(this->num_rows_));
return *(this->data_ + (r*(r+1)) / 2 + c);
// Duplicating code from PackedMatrix.h
}
inline Real &operator() (MatrixIndexT r, MatrixIndexT c) {
if (static_cast<UnsignedMatrixIndexT>(c) >
static_cast<UnsignedMatrixIndexT>(r))
std::swap(c, r);
// c<=r now so don't have to check c.
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
static_cast<UnsignedMatrixIndexT>(this->num_rows_));
return *(this->data_ + (r * (r + 1)) / 2 + c);
// Duplicating code from PackedMatrix.h
}
SpMatrix<Real>& operator=(const SpMatrix<Real> &other) {
PackedMatrix<Real>::operator=(other);
return *this;
}
using PackedMatrix<Real>::Scale;
/// matrix inverse.
/// if inverse_needed = false, will fill matrix with garbage.
/// (only useful if logdet wanted).
void Invert(Real *logdet = NULL, Real *det_sign= NULL,
bool inverse_needed = true);
// Below routine does inversion in double precision,
// even for single-precision object.
void InvertDouble(Real *logdet = NULL, Real *det_sign = NULL,
bool inverse_needed = true);
/// Returns maximum ratio of singular values.
inline Real Cond() const {
Matrix<Real> tmp(*this);
return tmp.Cond();
}
/// Takes matrix to a fraction power via Svd.
/// Will throw exception if matrix is not positive semidefinite
/// (to within a tolerance)
void ApplyPow(Real exponent);
/// This is the version of SVD that we implement for symmetric positive
/// definite matrices. This exists for historical reasons; right now its
/// internal implementation is the same as Eig(). It computes the eigenvalue
/// decomposition (*this) = P * diag(s) * P^T with P orthogonal. Will throw
/// exception if input is not positive semidefinite to within a tolerance.
void SymPosSemiDefEig(VectorBase<Real> *s, MatrixBase<Real> *P,
Real tolerance = 0.001) const;
/// Solves the symmetric eigenvalue problem: at end we should have (*this) = P
/// * diag(s) * P^T. We solve the problem using the symmetric QR method.
/// P may be NULL.
/// Implemented in qr.cc.
/// If you need the eigenvalues sorted, the function SortSvd declared in
/// kaldi-matrix is suitable.
void Eig(VectorBase<Real> *s, MatrixBase<Real> *P = NULL) const;
/// This function gives you, approximately, the largest eigenvalues of the
/// symmetric matrix and the corresponding eigenvectors. (largest meaning,
/// further from zero). It does this by doing a SVD within the Krylov
/// subspace generated by this matrix and a random vector. This is
/// a form of the Lanczos method with complete reorthogonalization, followed
/// by SVD within a smaller dimension ("lanczos_dim").
///
/// If *this is m by m, s should be of dimension n and P should be of
/// dimension m by n, with n <= m. The *columns* of P are the approximate
/// eigenvectors; P * diag(s) * P^T would be a low-rank reconstruction of
/// *this. The columns of P will be orthogonal, and the elements of s will be
/// the eigenvalues of *this projected into that subspace, but beyond that
/// there are no exact guarantees. (This is because the convergence of this
/// method is statistical). Note: it only makes sense to use this
/// method if you are in very high dimension and n is substantially smaller
/// than m: for example, if you want the 100 top eigenvalues of a 10k by 10k
/// matrix. This function calls Rand() to initialize the lanczos
/// iterations and also for restarting.
/// If lanczos_dim is zero, it will default to the greater of:
/// s->Dim() + 50 or s->Dim() + s->Dim()/2, but not more than this->Dim().
/// If lanczos_dim == this->Dim(), you might as well just call the function
/// Eig() since the result will be the same, and Eig() would be faster; the
/// whole point of this function is to reduce the dimension of the SVD
/// computation.
void TopEigs(VectorBase<Real> *s, MatrixBase<Real> *P,
MatrixIndexT lanczos_dim = 0) const;
/// Returns the maximum of the absolute values of any of the
/// eigenvalues.
Real MaxAbsEig() const;
void PrintEigs(const char *name) {
Vector<Real> s((*this).NumRows());
Matrix<Real> P((*this).NumRows(), (*this).NumCols());
SymPosSemiDefEig(&s, &P);
KALDI_LOG << "PrintEigs: " << name << ": " << s;
}
bool IsPosDef() const; // returns true if Cholesky succeeds.
void AddSp(const Real alpha, const SpMatrix<Real> &Ma) {
this->AddPacked(alpha, Ma);
}
/// Computes log determinant but only for +ve-def matrices
/// (it uses Cholesky).
/// If matrix is not +ve-def, it will throw an exception
/// was LogPDDeterminant()
Real LogPosDefDet() const;
Real LogDet(Real *det_sign = NULL) const;
/// rank-one update, this <-- this + alpha v v'
template<typename OtherReal>
void AddVec2(const Real alpha, const VectorBase<OtherReal> &v);
/// rank-two update, this <-- this + alpha (v w' + w v').
void AddVecVec(const Real alpha, const VectorBase<Real> &v,
const VectorBase<Real> &w);
/// Does *this = beta * *thi + alpha * diag(v) * S * diag(v)
void AddVec2Sp(const Real alpha, const VectorBase<Real> &v,
const SpMatrix<Real> &S, const Real beta);
/// diagonal update, this <-- this + diag(v)
template<typename OtherReal>
void AddDiagVec(const Real alpha, const VectorBase<OtherReal> &v);
/// rank-N update:
/// if (transM == kNoTrans)
/// (*this) = beta*(*this) + alpha * M * M^T,
/// or (if transM == kTrans)
/// (*this) = beta*(*this) + alpha * M^T * M
/// Note: beta used to default to 0.0.
void AddMat2(const Real alpha, const MatrixBase<Real> &M,
MatrixTransposeType transM, const Real beta);
/// Extension of rank-N update:
/// this <-- beta*this + alpha * M * A * M^T.
/// (*this) and A are allowed to be the same.
/// If transM == kTrans, then we do it as M^T * A * M.
void AddMat2Sp(const Real alpha, const MatrixBase<Real> &M,
MatrixTransposeType transM, const SpMatrix<Real> &A,
const Real beta = 0.0);
/// This is a version of AddMat2Sp specialized for when M is fairly sparse.
/// This was required for making the raw-fMLLR code efficient.
void AddSmat2Sp(const Real alpha, const MatrixBase<Real> &M,
MatrixTransposeType transM, const SpMatrix<Real> &A,
const Real beta = 0.0);
/// The following function does:
/// this <-- beta*this + alpha * T * A * T^T.
/// (*this) and A are allowed to be the same.
/// If transM == kTrans, then we do it as alpha * T^T * A * T.
/// Currently it just calls AddMat2Sp, but if needed we
/// can implement it more efficiently.
void AddTp2Sp(const Real alpha, const TpMatrix<Real> &T,
MatrixTransposeType transM, const SpMatrix<Real> &A,
const Real beta = 0.0);
/// The following function does:
/// this <-- beta*this + alpha * T * T^T.
/// (*this) and A are allowed to be the same.
/// If transM == kTrans, then we do it as alpha * T^T * T
/// Currently it just calls AddMat2, but if needed we
/// can implement it more efficiently.
void AddTp2(const Real alpha, const TpMatrix<Real> &T,
MatrixTransposeType transM, const Real beta = 0.0);
/// Extension of rank-N update:
/// this <-- beta*this + alpha * M * diag(v) * M^T.
/// if transM == kTrans, then
/// this <-- beta*this + alpha * M^T * diag(v) * M.
void AddMat2Vec(const Real alpha, const MatrixBase<Real> &M,
MatrixTransposeType transM, const VectorBase<Real> &v,
const Real beta = 0.0);
/// Floors this symmetric matrix to the matrix
/// alpha * Floor, where the matrix Floor is positive
/// definite.
/// It is floored in the sense that after flooring,
/// x^T (*this) x >= x^T (alpha*Floor) x.
/// This is accomplished using an Svd. It will crash
/// if Floor is not positive definite. Returns the number of
/// elements that were floored.
int ApplyFloor(const SpMatrix<Real> &Floor, Real alpha = 1.0,
bool verbose = false);
/// Floor: Given a positive semidefinite matrix, floors the eigenvalues
/// to the specified quantity. A previous version of this function had
/// a tolerance which is now no longer needed since we have code to
/// do the symmetric eigenvalue decomposition and no longer use the SVD
/// code for that purose.
int ApplyFloor(Real floor);
bool IsDiagonal(Real cutoff = 1.0e-05) const;
bool IsUnit(Real cutoff = 1.0e-05) const;
bool IsZero(Real cutoff = 1.0e-05) const;
bool IsTridiagonal(Real cutoff = 1.0e-05) const;
/// sqrt of sum of square elements.
Real FrobeniusNorm() const;
/// Returns true if ((*this)-other).FrobeniusNorm() <=
/// tol*(*this).FrobeniusNorma()
bool ApproxEqual(const SpMatrix<Real> &other, float tol = 0.01) const;
// LimitCond:
// Limits the condition of symmetric positive semidefinite matrix to
// a specified value
// by flooring all eigenvalues to a positive number which is some multiple
// of the largest one (or zero if there are no positive eigenvalues).
// Takes the condition number we are willing to accept, and floors
// eigenvalues to the largest eigenvalue divided by this.
// Returns #eigs floored or already equal to the floor.
// Throws exception if input is not positive definite.
// returns #floored.
MatrixIndexT LimitCond(Real maxCond = 1.0e+5, bool invert = false);
// as LimitCond but all done in double precision. // returns #floored.
MatrixIndexT LimitCondDouble(Real maxCond = 1.0e+5, bool invert = false) {
SpMatrix<double> dmat(*this);
MatrixIndexT ans = dmat.LimitCond(maxCond, invert);
(*this).CopyFromSp(dmat);
return ans;
}
Real Trace() const;
/// Tridiagonalize the matrix with an orthogonal transformation. If
/// *this starts as S, produce T (and Q, if non-NULL) such that
/// T = Q A Q^T, i.e. S = Q^T T Q. Caution: this is the other way
/// round from most authors (it's more efficient in row-major indexing).
void Tridiagonalize(MatrixBase<Real> *Q);
/// The symmetric QR algorithm. This will mostly be useful in internal code.
/// Typically, you will call this after Tridiagonalize(), on the same object.
/// When called, *this (call it A at this point) must be tridiagonal; at exit,
/// *this will be a diagonal matrix D that is similar to A via orthogonal
/// transformations. This algorithm right-multiplies Q by orthogonal
/// transformations. It turns *this from a tridiagonal into a diagonal matrix
/// while maintaining that (Q *this Q^T) has the same value at entry and exit.
/// At entry Q should probably be either NULL or orthogonal, but we don't check
/// this.
void Qr(MatrixBase<Real> *Q);
private:
void EigInternal(VectorBase<Real> *s, MatrixBase<Real> *P,
Real tolerance, int recurse) const;
};
/// @} end of "addtogroup matrix_group"
/// \addtogroup matrix_funcs_scalar
/// @{
/// Returns tr(A B).
float TraceSpSp(const SpMatrix<float> &A, const SpMatrix<float> &B);
double TraceSpSp(const SpMatrix<double> &A, const SpMatrix<double> &B);
template<typename Real>
inline bool ApproxEqual(const SpMatrix<Real> &A,
const SpMatrix<Real> &B, Real tol = 0.01) {
return A.ApproxEqual(B, tol);
}
template<typename Real>
inline void AssertEqual(const SpMatrix<Real> &A,
const SpMatrix<Real> &B, Real tol = 0.01) {
KALDI_ASSERT(ApproxEqual(A, B, tol));
}
/// Returns tr(A B).
template<typename Real, typename OtherReal>
Real TraceSpSp(const SpMatrix<Real> &A, const SpMatrix<OtherReal> &B);
// TraceSpSpLower is the same as Trace(A B) except the lower-diagonal elements
// are counted only once not twice as they should be. It is useful in certain
// optimizations.
template<typename Real>
Real TraceSpSpLower(const SpMatrix<Real> &A, const SpMatrix<Real> &B);
/// Returns tr(A B).
/// No option to transpose B because would make no difference.
template<typename Real>
Real TraceSpMat(const SpMatrix<Real> &A, const MatrixBase<Real> &B);
/// Returns tr(A B C)
/// (A and C may be transposed as specified by transA and transC).
template<typename Real>
Real TraceMatSpMat(const MatrixBase<Real> &A, MatrixTransposeType transA,
const SpMatrix<Real> &B, const MatrixBase<Real> &C,
MatrixTransposeType transC);
/// Returns tr (A B C D)
/// (A and C may be transposed as specified by transA and transB).
template<typename Real>
Real TraceMatSpMatSp(const MatrixBase<Real> &A, MatrixTransposeType transA,
const SpMatrix<Real> &B, const MatrixBase<Real> &C,
MatrixTransposeType transC, const SpMatrix<Real> &D);
/** Computes v1^T * M * v2. Not as efficient as it could be where v1 == v2
* (but no suitable blas routines available).
*/
/// Returns \f$ v_1^T M v_2 \f$
/// Not as efficient as it could be where v1 == v2.
template<typename Real>
Real VecSpVec(const VectorBase<Real> &v1, const SpMatrix<Real> &M,
const VectorBase<Real> &v2);
/// @} \addtogroup matrix_funcs_scalar
/// \addtogroup matrix_funcs_misc
/// @{
/// This class describes the options for maximizing various quadratic objective
/// functions. It's mostly as described in the SGMM paper "the subspace
/// Gaussian mixture model -- a structured model for speech recognition", but
/// the diagonal_precondition option is newly added, to handle problems where
/// different dimensions have very different scaling (we recommend to use the
/// option but it's set false for back compatibility).
struct SolverOptions {
BaseFloat K; // maximum condition number
BaseFloat eps;
std::string name;
bool optimize_delta;
bool diagonal_precondition;
bool print_debug_output;
explicit SolverOptions(const std::string &name):
K(1.0e+4), eps(1.0e-40), name(name),
optimize_delta(true), diagonal_precondition(false),
print_debug_output(true) { }
SolverOptions(): K(1.0e+4), eps(1.0e-40), name("[unknown]"),
optimize_delta(true), diagonal_precondition(false),
print_debug_output(true) { }
void Check() const;
};
/// Maximizes the auxiliary function
/// \f[ Q(x) = x.g - 0.5 x^T H x \f]
/// using a numerically stable method. Like a numerically stable version of
/// \f$ x := Q^{-1} g. \f$
/// Assumes H positive semidefinite.
/// Returns the objective-function change.
template<typename Real>
Real SolveQuadraticProblem(const SpMatrix<Real> &H,
const VectorBase<Real> &g,
const SolverOptions &opts,
VectorBase<Real> *x);
/// Maximizes the auxiliary function :
/// \f[ Q(x) = tr(M^T P Y) - 0.5 tr(P M Q M^T) \f]
/// Like a numerically stable version of \f$ M := Y Q^{-1} \f$.
/// Assumes Q and P positive semidefinite, and matrix dimensions match
/// enough to make expressions meaningful.
/// This is mostly as described in the SGMM paper "the subspace Gaussian mixture
/// model -- a structured model for speech recognition", but the
/// diagonal_precondition option is newly added, to handle problems
/// where different dimensions have very different scaling (we recommend to use
/// the option but it's set false for back compatibility).
template<typename Real>
Real SolveQuadraticMatrixProblem(const SpMatrix<Real> &Q,
const MatrixBase<Real> &Y,
const SpMatrix<Real> &P,
const SolverOptions &opts,
MatrixBase<Real> *M);
/// Maximizes the auxiliary function :
/// \f[ Q(M) = tr(M^T G) -0.5 tr(P_1 M Q_1 M^T) -0.5 tr(P_2 M Q_2 M^T). \f]
/// Encountered in matrix update with a prior. We also apply a limit on the
/// condition but it should be less frequently necessary, and can be set larger.
template<typename Real>
Real SolveDoubleQuadraticMatrixProblem(const MatrixBase<Real> &G,
const SpMatrix<Real> &P1,
const SpMatrix<Real> &P2,
const SpMatrix<Real> &Q1,
const SpMatrix<Real> &Q2,
const SolverOptions &opts,
MatrixBase<Real> *M);
/// @} End of "addtogroup matrix_funcs_misc"
} // namespace kaldi
// Including the implementation (now actually just includes some
// template specializations).
#include "matrix/sp-matrix-inl.h"
#endif // KALDI_MATRIX_SP_MATRIX_H_
// matrix/sparse-matrix.cc
// Copyright 2015 Johns Hopkins University (author: Daniel Povey)
// 2015 Guoguo Chen
// 2017 Shiyin Kang
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <limits>
#include <string>
#include "matrix/sparse-matrix.h"
#include "matrix/kaldi-matrix.h"
namespace kaldi {
template <typename Real>
std::pair<MatrixIndexT, Real>* SparseVector<Real>::Data() {
if (pairs_.empty())
return NULL;
else
return &(pairs_[0]);
}
template <typename Real>
const std::pair<MatrixIndexT, Real>* SparseVector<Real>::Data() const {
if (pairs_.empty())
return NULL;
else
return &(pairs_[0]);
}
template <typename Real>
Real SparseVector<Real>::Sum() const {
Real sum = 0;
for (int32 i = 0; i < pairs_.size(); ++i) {
sum += pairs_[i].second;
}
return sum;
}
template<typename Real>
void SparseVector<Real>::Scale(Real alpha) {
for (int32 i = 0; i < pairs_.size(); ++i)
pairs_[i].second *= alpha;
}
template <typename Real>
template <typename OtherReal>
void SparseVector<Real>::CopyElementsToVec(VectorBase<OtherReal> *vec) const {
KALDI_ASSERT(vec->Dim() == this->dim_);
vec->SetZero();
OtherReal *other_data = vec->Data();
typename std::vector<std::pair<MatrixIndexT, Real> >::const_iterator
iter = pairs_.begin(), end = pairs_.end();
for (; iter != end; ++iter)
other_data[iter->first] = iter->second;
}
template
void SparseVector<float>::CopyElementsToVec(VectorBase<float> *vec) const;
template
void SparseVector<float>::CopyElementsToVec(VectorBase<double> *vec) const;
template
void SparseVector<double>::CopyElementsToVec(VectorBase<float> *vec) const;
template
void SparseVector<double>::CopyElementsToVec(VectorBase<double> *vec) const;
template <typename Real>
template <typename OtherReal>
void SparseVector<Real>::AddToVec(Real alpha,
VectorBase<OtherReal> *vec) const {
KALDI_ASSERT(vec->Dim() == dim_);
OtherReal *other_data = vec->Data();
typename std::vector<std::pair<MatrixIndexT, Real> >::const_iterator
iter = pairs_.begin(), end = pairs_.end();
if (alpha == 1.0) { // treat alpha==1.0 case specially.
for (; iter != end; ++iter)
other_data[iter->first] += iter->second;
} else {
for (; iter != end; ++iter)
other_data[iter->first] += alpha * iter->second;
}
}
template
void SparseVector<float>::AddToVec(float alpha, VectorBase<float> *vec) const;
template
void SparseVector<float>::AddToVec(float alpha, VectorBase<double> *vec) const;
template
void SparseVector<double>::AddToVec(double alpha, VectorBase<float> *vec) const;
template
void SparseVector<double>::AddToVec(double alpha,
VectorBase<double> *vec) const;
template <typename Real>
template <typename OtherReal>
void SparseVector<Real>::CopyFromSvec(const SparseVector<OtherReal> &other) {
dim_ = other.Dim();
pairs_.clear();
if (dim_ == 0) return;
for (int32 i = 0; i < other.NumElements(); ++i) {
pairs_.push_back(std::make_pair(
other.GetElement(i).first,
static_cast<Real>(other.GetElement(i).second)));
}
}
template
void SparseVector<float>::CopyFromSvec(const SparseVector<float> &svec);
template
void SparseVector<float>::CopyFromSvec(const SparseVector<double> &svec);
template
void SparseVector<double>::CopyFromSvec(const SparseVector<float> &svec);
template
void SparseVector<double>::CopyFromSvec(const SparseVector<double> &svec);
template <typename Real>
SparseVector<Real>& SparseVector<Real>::operator = (
const SparseVector<Real> &other) {
this->CopyFromSvec(other);
dim_ = other.dim_;
pairs_ = other.pairs_;
return *this;
}
template <typename Real>
void SparseVector<Real>::Swap(SparseVector<Real> *other) {
pairs_.swap(other->pairs_);
std::swap(dim_, other->dim_);
}
template <typename Real>
void SparseVector<Real>::Write(std::ostream &os, bool binary) const {
if (binary) {
WriteToken(os, binary, "SV");
WriteBasicType(os, binary, dim_);
MatrixIndexT num_elems = pairs_.size();
WriteBasicType(os, binary, num_elems);
typename std::vector<std::pair<MatrixIndexT, Real> >::const_iterator
iter = pairs_.begin(), end = pairs_.end();
for (; iter != end; ++iter) {
WriteBasicType(os, binary, iter->first);
WriteBasicType(os, binary, iter->second);
}
} else {
// In text-mode, use a human-friendly, script-friendly format;
// format is "dim=5 [ 0 0.2 3 0.9 ] "
os << "dim=" << dim_ << " [ ";
typename std::vector<std::pair<MatrixIndexT, Real> >::const_iterator
iter = pairs_.begin(), end = pairs_.end();
for (; iter != end; ++iter)
os << iter->first << ' ' << iter->second << ' ';
os << "] ";
}
}
template <typename Real>
void SparseVector<Real>::Read(std::istream &is, bool binary) {
if (binary) {
ExpectToken(is, binary, "SV");
ReadBasicType(is, binary, &dim_);
KALDI_ASSERT(dim_ >= 0);
int32 num_elems;
ReadBasicType(is, binary, &num_elems);
KALDI_ASSERT(num_elems >= 0 && num_elems <= dim_);
pairs_.resize(num_elems);
typename std::vector<std::pair<MatrixIndexT, Real> >::iterator
iter = pairs_.begin(), end = pairs_.end();
for (; iter != end; ++iter) {
ReadBasicType(is, binary, &(iter->first));
ReadBasicType(is, binary, &(iter->second));
}
} else {
// In text-mode, format is "dim=5 [ 0 0.2 3 0.9 ]
std::string str;
is >> str;
if (str.substr(0, 4) != "dim=")
KALDI_ERR << "Reading sparse vector, expected 'dim=xxx', got " << str;
std::string dim_str = str.substr(4, std::string::npos);
std::istringstream dim_istr(dim_str);
int32 dim = -1;
dim_istr >> dim;
if (dim < 0 || dim_istr.fail()) {
KALDI_ERR << "Reading sparse vector, expected 'dim=[int]', got " << str;
}
dim_ = dim;
is >> std::ws;
is >> str;
if (str != "[")
KALDI_ERR << "Reading sparse vector, expected '[', got " << str;
pairs_.clear();
while (1) {
is >> std::ws;
if (is.peek() == ']') {
is.get();
break;
}
MatrixIndexT i;
BaseFloat p;
is >> i >> p;
if (is.fail())
KALDI_ERR << "Error reading sparse vector, expecting numbers.";
KALDI_ASSERT(i >= 0 && i < dim
&& (pairs_.empty() || i > pairs_.back().first));
pairs_.push_back(std::pair<MatrixIndexT, BaseFloat>(i, p));
}
}
}
namespace sparse_vector_utils {
template <typename Real>
struct CompareFirst {
inline bool operator() (const std::pair<MatrixIndexT, Real> &p1,
const std::pair<MatrixIndexT, Real> &p2) const {
return p1.first < p2.first;
}
};
}
template <typename Real>
SparseVector<Real>::SparseVector(
MatrixIndexT dim, const std::vector<std::pair<MatrixIndexT, Real> > &pairs):
dim_(dim),
pairs_(pairs) {
std::sort(pairs_.begin(), pairs_.end(),
sparse_vector_utils::CompareFirst<Real>());
typename std::vector<std::pair<MatrixIndexT, Real> >::iterator
out = pairs_.begin(), in = out, end = pairs_.end();
// special case: while there is nothing to be changed, skip over
// initial input (avoids unnecessary copying).
while (in + 1 < end && in[0].first != in[1].first && in[0].second != 0.0) {
in++;
out++;
}
while (in < end) {
// We reach this point only at the first element of
// each stretch of identical .first elements.
*out = *in;
++in;
while (in < end && in->first == out->first) {
out->second += in->second; // this is the merge operation.
++in;
}
if (out->second != Real(0.0)) // Don't keep zero elements.
out++;
}
pairs_.erase(out, end);
if (!pairs_.empty()) {
// range check.
KALDI_ASSERT(pairs_.front().first >= 0 && pairs_.back().first < dim_);
}
}
template <typename Real>
void SparseVector<Real>::SetRandn(BaseFloat zero_prob) {
pairs_.clear();
KALDI_ASSERT(zero_prob >= 0 && zero_prob <= 1.0);
for (MatrixIndexT i = 0; i < dim_; i++)
if (WithProb(1.0 - zero_prob))
pairs_.push_back(std::pair<MatrixIndexT, Real>(i, RandGauss()));
}
template <typename Real>
void SparseVector<Real>::Resize(MatrixIndexT dim,
MatrixResizeType resize_type) {
if (resize_type != kCopyData || dim == 0)
pairs_.clear();
KALDI_ASSERT(dim >= 0);
if (dim < dim_ && resize_type == kCopyData)
while (!pairs_.empty() && pairs_.back().first >= dim)
pairs_.pop_back();
dim_ = dim;
}
template <typename Real>
MatrixIndexT SparseMatrix<Real>::NumRows() const {
return rows_.size();
}
template <typename Real>
MatrixIndexT SparseMatrix<Real>::NumCols() const {
if (rows_.empty())
return 0.0;
else
return rows_[0].Dim();
}
template <typename Real>
MatrixIndexT SparseMatrix<Real>::NumElements() const {
int32 num_elements = 0;
for (int32 i = 0; i < rows_.size(); ++i) {
num_elements += rows_[i].NumElements();
}
return num_elements;
}
template <typename Real>
SparseVector<Real>* SparseMatrix<Real>::Data() {
if (rows_.empty())
return NULL;
else
return rows_.data();
}
template <typename Real>
const SparseVector<Real>* SparseMatrix<Real>::Data() const {
if (rows_.empty())
return NULL;
else
return rows_.data();
}
template <typename Real>
Real SparseMatrix<Real>::Sum() const {
Real sum = 0;
for (int32 i = 0; i < rows_.size(); ++i) {
sum += rows_[i].Sum();
}
return sum;
}
template<typename Real>
Real SparseMatrix<Real>::FrobeniusNorm() const {
Real squared_sum = 0;
for (int32 i = 0; i < rows_.size(); ++i) {
const std::pair<MatrixIndexT, Real> *row_data = rows_[i].Data();
for (int32 j = 0; j < rows_[i].NumElements(); ++j) {
squared_sum += row_data[j].second * row_data[j].second;
}
}
return std::sqrt(squared_sum);
}
template <typename Real>
template <typename OtherReal>
void SparseMatrix<Real>::CopyToMat(MatrixBase<OtherReal> *other,
MatrixTransposeType trans) const {
if (trans == kNoTrans) {
MatrixIndexT num_rows = rows_.size();
KALDI_ASSERT(other->NumRows() == num_rows);
for (MatrixIndexT i = 0; i < num_rows; i++) {
SubVector<OtherReal> vec(*other, i);
rows_[i].CopyElementsToVec(&vec);
}
} else {
OtherReal *other_col_data = other->Data();
MatrixIndexT other_stride = other->Stride(),
num_rows = NumRows(), num_cols = NumCols();
KALDI_ASSERT(num_rows == other->NumCols() && num_cols == other->NumRows());
other->SetZero();
for (MatrixIndexT row = 0; row < num_rows; row++, other_col_data++) {
const SparseVector<Real> &svec = rows_[row];
MatrixIndexT num_elems = svec.NumElements();
const std::pair<MatrixIndexT, Real> *sdata = svec.Data();
for (MatrixIndexT e = 0; e < num_elems; e++)
other_col_data[sdata[e].first * other_stride] = sdata[e].second;
}
}
}
template
void SparseMatrix<float>::CopyToMat(MatrixBase<float> *other,
MatrixTransposeType trans) const;
template
void SparseMatrix<float>::CopyToMat(MatrixBase<double> *other,
MatrixTransposeType trans) const;
template
void SparseMatrix<double>::CopyToMat(MatrixBase<float> *other,
MatrixTransposeType trans) const;
template
void SparseMatrix<double>::CopyToMat(MatrixBase<double> *other,
MatrixTransposeType trans) const;
template <typename Real>
void SparseMatrix<Real>::CopyElementsToVec(VectorBase<Real> *other) const {
KALDI_ASSERT(other->Dim() == NumElements());
Real *dst_data = other->Data();
int32 dst_index = 0;
for (int32 i = 0; i < rows_.size(); ++i) {
for (int32 j = 0; j < rows_[i].NumElements(); ++j) {
dst_data[dst_index] =
static_cast<Real>(rows_[i].GetElement(j).second);
dst_index++;
}
}
}
template<typename Real>
template<typename OtherReal>
void SparseMatrix<Real>::CopyFromSmat(const SparseMatrix<OtherReal> &other,
MatrixTransposeType trans) {
if (trans == kNoTrans) {
rows_.resize(other.NumRows());
if (rows_.size() == 0)
return;
for (int32 r = 0; r < rows_.size(); ++r) {
rows_[r].CopyFromSvec(other.Row(r));
}
} else {
std::vector<std::vector<std::pair<MatrixIndexT, Real> > > pairs(
other.NumCols());
for (MatrixIndexT i = 0; i < other.NumRows(); ++i) {
for (int id = 0; id < other.Row(i).NumElements(); ++id) {
MatrixIndexT j = other.Row(i).GetElement(id).first;
Real v = static_cast<Real>(other.Row(i).GetElement(id).second);
pairs[j].push_back( { i, v });
}
}
SparseMatrix<Real> temp(other.NumRows(), pairs);
Swap(&temp);
}
}
template
void SparseMatrix<float>::CopyFromSmat(const SparseMatrix<float> &other,
MatrixTransposeType trans);
template
void SparseMatrix<float>::CopyFromSmat(const SparseMatrix<double> &other,
MatrixTransposeType trans);
template
void SparseMatrix<double>::CopyFromSmat(const SparseMatrix<float> &other,
MatrixTransposeType trans);
template
void SparseMatrix<double>::CopyFromSmat(const SparseMatrix<double> &other,
MatrixTransposeType trans);
template <typename Real>
void SparseMatrix<Real>::Write(std::ostream &os, bool binary) const {
if (binary) {
// Note: we can use the same marker for float and double SparseMatrix,
// because internally we use WriteBasicType and ReadBasicType to read the
// floats and doubles, and this will automatically take care of type
// conversion.
WriteToken(os, binary, "SM");
int32 num_rows = rows_.size();
WriteBasicType(os, binary, num_rows);
for (int32 row = 0; row < num_rows; row++)
rows_[row].Write(os, binary);
} else {
// The format is "rows=10 dim=20 [ 1 0.4 9 1.2 ] dim=20 [ 3 1.7 19 0.6 ] ..
// not 100% efficient, but easy to work with, and we can re-use the
// read/write code from SparseVector.
int32 num_rows = rows_.size();
os << "rows=" << num_rows << " ";
for (int32 row = 0; row < num_rows; row++)
rows_[row].Write(os, binary);
os << "\n"; // Might make it a little more readable.
}
}
template <typename Real>
void SparseMatrix<Real>::Read(std::istream &is, bool binary) {
if (binary) {
ExpectToken(is, binary, "SM");
int32 num_rows;
ReadBasicType(is, binary, &num_rows);
KALDI_ASSERT(num_rows >= 0 && num_rows < 10000000);
rows_.resize(num_rows);
for (int32 row = 0; row < num_rows; row++)
rows_[row].Read(is, binary);
} else {
std::string str;
is >> str;
if (str.substr(0, 5) != "rows=")
KALDI_ERR << "Reading sparse matrix, expected 'rows=xxx', got " << str;
std::string rows_str = str.substr(5, std::string::npos);
std::istringstream rows_istr(rows_str);
int32 num_rows = -1;
rows_istr >> num_rows;
if (num_rows < 0 || rows_istr.fail()) {
KALDI_ERR << "Reading sparse vector, expected 'rows=[int]', got " << str;
}
rows_.resize(num_rows);
for (int32 row = 0; row < num_rows; row++)
rows_[row].Read(is, binary);
}
}
template <typename Real>
void SparseMatrix<Real>::AddToMat(BaseFloat alpha,
MatrixBase<Real> *other,
MatrixTransposeType trans) const {
if (trans == kNoTrans) {
MatrixIndexT num_rows = rows_.size();
KALDI_ASSERT(other->NumRows() == num_rows);
for (MatrixIndexT i = 0; i < num_rows; i++) {
SubVector<Real> vec(*other, i);
rows_[i].AddToVec(alpha, &vec);
}
} else {
Real *other_col_data = other->Data();
MatrixIndexT other_stride = other->Stride(),
num_rows = NumRows(), num_cols = NumCols();
KALDI_ASSERT(num_rows == other->NumCols() && num_cols == other->NumRows());
for (MatrixIndexT row = 0; row < num_rows; row++, other_col_data++) {
const SparseVector<Real> &svec = rows_[row];
MatrixIndexT num_elems = svec.NumElements();
const std::pair<MatrixIndexT, Real> *sdata = svec.Data();
for (MatrixIndexT e = 0; e < num_elems; e++)
other_col_data[sdata[e].first * other_stride] +=
alpha * sdata[e].second;
}
}
}
template <typename Real>
Real VecSvec(const VectorBase<Real> &vec,
const SparseVector<Real> &svec) {
KALDI_ASSERT(vec.Dim() == svec.Dim());
MatrixIndexT n = svec.NumElements();
const std::pair<MatrixIndexT, Real> *sdata = svec.Data();
const Real *data = vec.Data();
Real ans = 0.0;
for (MatrixIndexT i = 0; i < n; i++)
ans += data[sdata[i].first] * sdata[i].second;
return ans;
}
template
float VecSvec(const VectorBase<float> &vec,
const SparseVector<float> &svec);
template
double VecSvec(const VectorBase<double> &vec,
const SparseVector<double> &svec);
template <typename Real>
const SparseVector<Real> &SparseMatrix<Real>::Row(MatrixIndexT r) const {
KALDI_ASSERT(static_cast<size_t>(r) < rows_.size());
return rows_[r];
}
template <typename Real>
void SparseMatrix<Real>::SetRow(int32 r, const SparseVector<Real> &vec) {
KALDI_ASSERT(static_cast<size_t>(r) < rows_.size() &&
vec.Dim() == rows_[0].Dim());
rows_[r] = vec;
}
template<typename Real>
void SparseMatrix<Real>::SelectRows(const std::vector<int32> &row_indexes,
const SparseMatrix<Real> &smat_other) {
Resize(row_indexes.size(), smat_other.NumCols());
for (int i = 0; i < row_indexes.size(); ++i) {
SetRow(i, smat_other.Row(row_indexes[i]));
}
}
template<typename Real>
SparseMatrix<Real>::SparseMatrix(const std::vector<int32> &indexes, int32 dim,
MatrixTransposeType trans) {
const std::vector<int32>& idx = indexes;
std::vector<std::vector<std::pair<MatrixIndexT, Real> > > pair(idx.size());
for (int i = 0; i < idx.size(); ++i) {
if (idx[i] >= 0) {
pair[i].push_back( { idx[i], Real(1) });
}
}
SparseMatrix<Real> smat_cpu(dim, pair);
if (trans == kNoTrans) {
this->Swap(&smat_cpu);
} else {
SparseMatrix<Real> tmp(smat_cpu, kTrans);
this->Swap(&tmp);
}
}
template<typename Real>
SparseMatrix<Real>::SparseMatrix(const std::vector<int32> &indexes,
const VectorBase<Real> &weights, int32 dim,
MatrixTransposeType trans) {
const std::vector<int32>& idx = indexes;
const VectorBase<Real>& w = weights;
std::vector<std::vector<std::pair<MatrixIndexT, Real> > > pair(idx.size());
for (int i = 0; i < idx.size(); ++i) {
if (idx[i] >= 0) {
pair[i].push_back( { idx[i], w(i) });
}
}
SparseMatrix<Real> smat_cpu(dim, pair);
if (trans == kNoTrans) {
this->Swap(&smat_cpu);
} else {
SparseMatrix<Real> tmp(smat_cpu, kTrans);
this->Swap(&tmp);
}
}
template <typename Real>
SparseMatrix<Real>& SparseMatrix<Real>::operator = (
const SparseMatrix<Real> &other) {
rows_ = other.rows_;
return *this;
}
template <typename Real>
void SparseMatrix<Real>::Swap(SparseMatrix<Real> *other) {
rows_.swap(other->rows_);
}
template<typename Real>
SparseMatrix<Real>::SparseMatrix(
MatrixIndexT dim,
const std::vector<std::vector<std::pair<MatrixIndexT, Real> > > &pairs):
rows_(pairs.size()) {
MatrixIndexT num_rows = pairs.size();
for (MatrixIndexT row = 0; row < num_rows; row++) {
SparseVector<Real> svec(dim, pairs[row]);
rows_[row].Swap(&svec);
}
}
template <typename Real>
void SparseMatrix<Real>::SetRandn(BaseFloat zero_prob) {
MatrixIndexT num_rows = rows_.size();
for (MatrixIndexT row = 0; row < num_rows; row++)
rows_[row].SetRandn(zero_prob);
}
template <typename Real>
void SparseMatrix<Real>::Resize(MatrixIndexT num_rows,
MatrixIndexT num_cols,
MatrixResizeType resize_type) {
KALDI_ASSERT(num_rows >= 0 && num_cols >= 0);
if (resize_type == kSetZero || resize_type == kUndefined) {
rows_.clear();
Resize(num_rows, num_cols, kCopyData);
} else {
// Assume resize_type == kCopyData from here.
int32 old_num_rows = rows_.size(), old_num_cols = NumCols();
SparseVector<Real> initializer(num_cols);
rows_.resize(num_rows, initializer);
if (num_cols != old_num_cols)
for (int32 row = 0; row < old_num_rows; row++)
rows_[row].Resize(num_cols, kCopyData);
}
}
template <typename Real>
void SparseMatrix<Real>::AppendSparseMatrixRows(
std::vector<SparseMatrix<Real> > *inputs) {
rows_.clear();
size_t num_rows = 0;
typename std::vector<SparseMatrix<Real> >::iterator
input_iter = inputs->begin(),
input_end = inputs->end();
for (; input_iter != input_end; ++input_iter)
num_rows += input_iter->rows_.size();
rows_.resize(num_rows);
typename std::vector<SparseVector<Real> >::iterator
row_iter = rows_.begin(),
row_end = rows_.end();
for (input_iter = inputs->begin(); input_iter != input_end; ++input_iter) {
typename std::vector<SparseVector<Real> >::iterator
input_row_iter = input_iter->rows_.begin(),
input_row_end = input_iter->rows_.end();
for (; input_row_iter != input_row_end; ++input_row_iter, ++row_iter)
row_iter->Swap(&(*input_row_iter));
}
KALDI_ASSERT(row_iter == row_end);
int32 num_cols = NumCols();
for (row_iter = rows_.begin(); row_iter != row_end; ++row_iter) {
if (row_iter->Dim() != num_cols)
KALDI_ERR << "Appending rows with inconsistent dimensions, "
<< row_iter->Dim() << " vs. " << num_cols;
}
inputs->clear();
}
template<typename Real>
void SparseMatrix<Real>::Scale(Real alpha) {
MatrixIndexT num_rows = rows_.size();
for (MatrixIndexT row = 0; row < num_rows; row++)
rows_[row].Scale(alpha);
}
template<typename Real>
SparseMatrix<Real>::SparseMatrix(const MatrixBase<Real> &mat) {
MatrixIndexT num_rows = mat.NumRows();
rows_.resize(num_rows);
for (int32 row = 0; row < num_rows; row++) {
SparseVector<Real> this_row(mat.Row(row));
rows_[row].Swap(&this_row);
}
}
template<typename Real>
Real TraceMatSmat(const MatrixBase<Real> &A,
const SparseMatrix<Real> &B,
MatrixTransposeType trans) {
Real sum = 0.0;
if (trans == kTrans) {
MatrixIndexT num_rows = A.NumRows();
KALDI_ASSERT(B.NumRows() == num_rows);
for (MatrixIndexT r = 0; r < num_rows; r++)
sum += VecSvec(A.Row(r), B.Row(r));
} else {
const Real *A_col_data = A.Data();
MatrixIndexT Astride = A.Stride(), Acols = A.NumCols(), Arows = A.NumRows();
KALDI_ASSERT(Arows == B.NumCols() && Acols == B.NumRows());
sum = 0.0;
for (MatrixIndexT i = 0; i < Acols; i++, A_col_data++) {
Real col_sum = 0.0;
const SparseVector<Real> &svec = B.Row(i);
MatrixIndexT num_elems = svec.NumElements();
const std::pair<MatrixIndexT, Real> *sdata = svec.Data();
for (MatrixIndexT e = 0; e < num_elems; e++)
col_sum += A_col_data[Astride * sdata[e].first] * sdata[e].second;
sum += col_sum;
}
}
return sum;
}
template
float TraceMatSmat(const MatrixBase<float> &A,
const SparseMatrix<float> &B,
MatrixTransposeType trans);
template
double TraceMatSmat(const MatrixBase<double> &A,
const SparseMatrix<double> &B,
MatrixTransposeType trans);
void GeneralMatrix::Clear() {
mat_.Resize(0, 0);
cmat_.Clear();
smat_.Resize(0, 0);
}
GeneralMatrix& GeneralMatrix::operator= (const MatrixBase<BaseFloat> &mat) {
Clear();
mat_ = mat;
return *this;
}
GeneralMatrix& GeneralMatrix::operator= (const CompressedMatrix &cmat) {
Clear();
cmat_ = cmat;
return *this;
}
GeneralMatrix& GeneralMatrix::operator= (const SparseMatrix<BaseFloat> &smat) {
Clear();
smat_ = smat;
return *this;
}
GeneralMatrix& GeneralMatrix::operator= (const GeneralMatrix &gmat) {
mat_ = gmat.mat_;
smat_ = gmat.smat_;
cmat_ = gmat.cmat_;
return *this;
}
GeneralMatrixType GeneralMatrix::Type() const {
if (smat_.NumRows() != 0)
return kSparseMatrix;
else if (cmat_.NumRows() != 0)
return kCompressedMatrix;
else
return kFullMatrix;
}
MatrixIndexT GeneralMatrix::NumRows() const {
MatrixIndexT r = smat_.NumRows();
if (r != 0)
return r;
r = cmat_.NumRows();
if (r != 0)
return r;
return mat_.NumRows();
}
MatrixIndexT GeneralMatrix::NumCols() const {
MatrixIndexT r = smat_.NumCols();
if (r != 0)
return r;
r = cmat_.NumCols();
if (r != 0)
return r;
return mat_.NumCols();
}
void GeneralMatrix::Compress() {
if (mat_.NumRows() != 0) {
cmat_.CopyFromMat(mat_);
mat_.Resize(0, 0);
}
}
void GeneralMatrix::Uncompress() {
if (cmat_.NumRows() != 0) {
mat_.Resize(cmat_.NumRows(), cmat_.NumCols(), kUndefined);
cmat_.CopyToMat(&mat_);
cmat_.Clear();
}
}
void GeneralMatrix::GetMatrix(Matrix<BaseFloat> *mat) const {
if (mat_.NumRows() !=0) {
*mat = mat_;
} else if (cmat_.NumRows() != 0) {
mat->Resize(cmat_.NumRows(), cmat_.NumCols(), kUndefined);
cmat_.CopyToMat(mat);
} else if (smat_.NumRows() != 0) {
mat->Resize(smat_.NumRows(), smat_.NumCols(), kUndefined);
smat_.CopyToMat(mat);
} else {
mat->Resize(0, 0);
}
}
void GeneralMatrix::CopyToMat(MatrixBase<BaseFloat> *mat,
MatrixTransposeType trans) const {
if (mat_.NumRows() !=0) {
mat->CopyFromMat(mat_, trans);
} else if (cmat_.NumRows() != 0) {
cmat_.CopyToMat(mat, trans);
} else if (smat_.NumRows() != 0) {
smat_.CopyToMat(mat, trans);
} else {
KALDI_ASSERT(mat->NumRows() == 0);
}
}
void GeneralMatrix::Scale(BaseFloat alpha) {
if (mat_.NumRows() != 0) {
mat_.Scale(alpha);
} else if (cmat_.NumRows() != 0) {
cmat_.Scale(alpha);
} else if (smat_.NumRows() != 0) {
smat_.Scale(alpha);
}
}
const SparseMatrix<BaseFloat>& GeneralMatrix::GetSparseMatrix() const {
if (mat_.NumRows() != 0 || cmat_.NumRows() != 0)
KALDI_ERR << "GetSparseMatrix called on GeneralMatrix of wrong type.";
return smat_;
}
void GeneralMatrix::SwapSparseMatrix(SparseMatrix<BaseFloat> *smat) {
if (mat_.NumRows() != 0 || cmat_.NumRows() != 0)
KALDI_ERR << "GetSparseMatrix called on GeneralMatrix of wrong type.";
smat->Swap(&smat_);
}
void GeneralMatrix::SwapCompressedMatrix(CompressedMatrix *cmat) {
if (mat_.NumRows() != 0 || smat_.NumRows() != 0)
KALDI_ERR << "GetSparseMatrix called on GeneralMatrix of wrong type.";
cmat->Swap(&cmat_);
}
const CompressedMatrix &GeneralMatrix::GetCompressedMatrix() const {
if (mat_.NumRows() != 0 || smat_.NumRows() != 0)
KALDI_ERR << "GetCompressedMatrix called on GeneralMatrix of wrong type.";
return cmat_;
}
const Matrix<BaseFloat> &GeneralMatrix::GetFullMatrix() const {
if (smat_.NumRows() != 0 || cmat_.NumRows() != 0)
KALDI_ERR << "GetFullMatrix called on GeneralMatrix of wrong type.";
return mat_;
}
void GeneralMatrix::SwapFullMatrix(Matrix<BaseFloat> *mat) {
if (cmat_.NumRows() != 0 || smat_.NumRows() != 0)
KALDI_ERR << "SwapMatrix called on GeneralMatrix of wrong type.";
mat->Swap(&mat_);
}
void GeneralMatrix::Write(std::ostream &os, bool binary) const {
if (smat_.NumRows() != 0) {
smat_.Write(os, binary);
} else if (cmat_.NumRows() != 0) {
cmat_.Write(os, binary);
} else {
mat_.Write(os, binary);
}
}
void GeneralMatrix::Read(std::istream &is, bool binary) {
Clear();
if (binary) {
int peekval = is.peek();
if (peekval == 'C') {
// Token CM for compressed matrix
cmat_.Read(is, binary);
} else if (peekval == 'S') {
// Token SM for sparse matrix
smat_.Read(is, binary);
} else {
mat_.Read(is, binary);
}
} else {
// note: in text mode we will only ever read regular
// or sparse matrices, because the compressed-matrix format just
// gets written as a regular matrix in text mode.
is >> std::ws; // Eat up white space.
int peekval = is.peek();
if (peekval == 'r') { // sparse format starts rows=[int].
smat_.Read(is, binary);
} else {
mat_.Read(is, binary);
}
}
}
void AppendGeneralMatrixRows(const std::vector<const GeneralMatrix *> &src,
GeneralMatrix *mat) {
mat->Clear();
int32 size = src.size();
if (size == 0)
return;
bool all_sparse = true;
for (int32 i = 0; i < size; i++) {
if (src[i]->Type() != kSparseMatrix && src[i]->NumRows() != 0) {
all_sparse = false;
break;
}
}
if (all_sparse) {
std::vector<SparseMatrix<BaseFloat> > sparse_mats(size);
for (int32 i = 0; i < size; i++)
sparse_mats[i] = src[i]->GetSparseMatrix();
SparseMatrix<BaseFloat> appended_mat;
appended_mat.AppendSparseMatrixRows(&sparse_mats);
mat->SwapSparseMatrix(&appended_mat);
} else {
int32 tot_rows = 0, num_cols = -1;
for (int32 i = 0; i < size; i++) {
const GeneralMatrix &src_mat = *(src[i]);
int32 src_rows = src_mat.NumRows(), src_cols = src_mat.NumCols();
if (src_rows != 0) {
tot_rows += src_rows;
if (num_cols == -1) num_cols = src_cols;
else if (num_cols != src_cols)
KALDI_ERR << "Appending rows of matrices with inconsistent num-cols: "
<< num_cols << " vs. " << src_cols;
}
}
Matrix<BaseFloat> appended_mat(tot_rows, num_cols, kUndefined);
int32 row_offset = 0;
for (int32 i = 0; i < size; i++) {
const GeneralMatrix &src_mat = *(src[i]);
int32 src_rows = src_mat.NumRows();
if (src_rows != 0) {
SubMatrix<BaseFloat> dest_submat(appended_mat, row_offset, src_rows,
0, num_cols);
src_mat.CopyToMat(&dest_submat);
row_offset += src_rows;
}
}
KALDI_ASSERT(row_offset == tot_rows);
mat->SwapFullMatrix(&appended_mat);
}
}
void FilterCompressedMatrixRows(const CompressedMatrix &in,
const std::vector<bool> &keep_rows,
Matrix<BaseFloat> *out) {
KALDI_ASSERT(keep_rows.size() == static_cast<size_t>(in.NumRows()));
int32 num_kept_rows = 0;
std::vector<bool>::const_iterator iter = keep_rows.begin(),
end = keep_rows.end();
for (; iter != end; ++iter)
if (*iter)
num_kept_rows++;
if (num_kept_rows == 0)
KALDI_ERR << "No kept rows";
if (num_kept_rows == static_cast<int32>(keep_rows.size())) {
out->Resize(in.NumRows(), in.NumCols(), kUndefined);
in.CopyToMat(out);
return;
}
const BaseFloat heuristic = 0.33;
// should be > 0 and < 1.0. represents the performance hit we get from
// iterating row-wise versus column-wise in compressed-matrix uncompression.
if (num_kept_rows > heuristic * in.NumRows()) {
// if quite a few of the the rows are kept, it may be more efficient
// to uncompress the entire compressed matrix, since per-column operation
// is more efficient.
Matrix<BaseFloat> full_mat(in);
FilterMatrixRows(full_mat, keep_rows, out);
} else {
out->Resize(num_kept_rows, in.NumCols(), kUndefined);
iter = keep_rows.begin();
int32 out_row = 0;
for (int32 in_row = 0; iter != end; ++iter, ++in_row) {
if (*iter) {
SubVector<BaseFloat> dest(*out, out_row);
in.CopyRowToVec(in_row, &dest);
out_row++;
}
}
KALDI_ASSERT(out_row == num_kept_rows);
}
}
template <typename Real>
void FilterMatrixRows(const Matrix<Real> &in,
const std::vector<bool> &keep_rows,
Matrix<Real> *out) {
KALDI_ASSERT(keep_rows.size() == static_cast<size_t>(in.NumRows()));
int32 num_kept_rows = 0;
std::vector<bool>::const_iterator iter = keep_rows.begin(),
end = keep_rows.end();
for (; iter != end; ++iter)
if (*iter)
num_kept_rows++;
if (num_kept_rows == 0)
KALDI_ERR << "No kept rows";
if (num_kept_rows == static_cast<int32>(keep_rows.size())) {
*out = in;
return;
}
out->Resize(num_kept_rows, in.NumCols(), kUndefined);
iter = keep_rows.begin();
int32 out_row = 0;
for (int32 in_row = 0; iter != end; ++iter, ++in_row) {
if (*iter) {
SubVector<Real> src(in, in_row);
SubVector<Real> dest(*out, out_row);
dest.CopyFromVec(src);
out_row++;
}
}
KALDI_ASSERT(out_row == num_kept_rows);
}
template
void FilterMatrixRows(const Matrix<float> &in,
const std::vector<bool> &keep_rows,
Matrix<float> *out);
template
void FilterMatrixRows(const Matrix<double> &in,
const std::vector<bool> &keep_rows,
Matrix<double> *out);
template <typename Real>
void FilterSparseMatrixRows(const SparseMatrix<Real> &in,
const std::vector<bool> &keep_rows,
SparseMatrix<Real> *out) {
KALDI_ASSERT(keep_rows.size() == static_cast<size_t>(in.NumRows()));
int32 num_kept_rows = 0;
std::vector<bool>::const_iterator iter = keep_rows.begin(),
end = keep_rows.end();
for (; iter != end; ++iter)
if (*iter)
num_kept_rows++;
if (num_kept_rows == 0)
KALDI_ERR << "No kept rows";
if (num_kept_rows == static_cast<int32>(keep_rows.size())) {
*out = in;
return;
}
out->Resize(num_kept_rows, in.NumCols(), kUndefined);
iter = keep_rows.begin();
int32 out_row = 0;
for (int32 in_row = 0; iter != end; ++iter, ++in_row) {
if (*iter) {
out->SetRow(out_row, in.Row(in_row));
out_row++;
}
}
KALDI_ASSERT(out_row == num_kept_rows);
}
template
void FilterSparseMatrixRows(const SparseMatrix<float> &in,
const std::vector<bool> &keep_rows,
SparseMatrix<float> *out);
template
void FilterSparseMatrixRows(const SparseMatrix<double> &in,
const std::vector<bool> &keep_rows,
SparseMatrix<double> *out);
void FilterGeneralMatrixRows(const GeneralMatrix &in,
const std::vector<bool> &keep_rows,
GeneralMatrix *out) {
out->Clear();
KALDI_ASSERT(keep_rows.size() == static_cast<size_t>(in.NumRows()));
int32 num_kept_rows = 0;
std::vector<bool>::const_iterator iter = keep_rows.begin(),
end = keep_rows.end();
for (; iter != end; ++iter)
if (*iter)
num_kept_rows++;
if (num_kept_rows == 0)
KALDI_ERR << "No kept rows";
if (num_kept_rows == static_cast<int32>(keep_rows.size())) {
*out = in;
return;
}
switch (in.Type()) {
case kCompressedMatrix: {
const CompressedMatrix &cmat = in.GetCompressedMatrix();
Matrix<BaseFloat> full_mat;
FilterCompressedMatrixRows(cmat, keep_rows, &full_mat);
out->SwapFullMatrix(&full_mat);
return;
}
case kSparseMatrix: {
const SparseMatrix<BaseFloat> &smat = in.GetSparseMatrix();
SparseMatrix<BaseFloat> smat_out;
FilterSparseMatrixRows(smat, keep_rows, &smat_out);
out->SwapSparseMatrix(&smat_out);
return;
}
case kFullMatrix: {
const Matrix<BaseFloat> &full_mat = in.GetFullMatrix();
Matrix<BaseFloat> full_mat_out;
FilterMatrixRows(full_mat, keep_rows, &full_mat_out);
out->SwapFullMatrix(&full_mat_out);
return;
}
default:
KALDI_ERR << "Invalid general-matrix type.";
}
}
void GeneralMatrix::AddToMat(BaseFloat alpha, MatrixBase<BaseFloat> *mat,
MatrixTransposeType trans) const {
switch (this->Type()) {
case kFullMatrix: {
mat->AddMat(alpha, mat_, trans);
break;
}
case kSparseMatrix: {
smat_.AddToMat(alpha, mat, trans);
break;
}
case kCompressedMatrix: {
Matrix<BaseFloat> temp_mat(cmat_);
mat->AddMat(alpha, temp_mat, trans);
break;
}
default:
KALDI_ERR << "Invalid general-matrix type.";
}
}
template <class Real>
Real SparseVector<Real>::Max(int32 *index_out) const {
KALDI_ASSERT(dim_ > 0 && pairs_.size() <= static_cast<size_t>(dim_));
Real ans = -std::numeric_limits<Real>::infinity();
int32 index = 0;
typename std::vector<std::pair<MatrixIndexT, Real> >::const_iterator
iter = pairs_.begin(), end = pairs_.end();
for (; iter != end; ++iter) {
if (iter->second > ans) {
ans = iter->second;
index = iter->first;
}
}
if (ans >= 0 || pairs_.size() == dim_) {
// ans >= 0 will be the normal case.
// if pairs_.size() == dim_ then we need to return
// even a negative answer as there are no spaces (hence no unlisted zeros).
*index_out = index;
return ans;
}
// all the stored elements are < 0, but there are unlisted
// elements -> pick the first unlisted element.
// Note that this class requires that the indexes are sorted
// and unique.
index = 0; // "index" will always be the next index, that
// we haven't seen listed yet.
iter = pairs_.begin();
for (; iter != end; ++iter) {
if (iter->first > index) { // index "index" is not listed.
*index_out = index;
return 0.0;
} else {
// index is the next potential gap in the indexes.
index = iter->first + 1;
}
}
// we can reach here if either pairs_.empty(), or
// pairs_ is nonempty but contains a sequence (0, 1, 2,...).
if (!pairs_.empty())
index = pairs_.back().first + 1;
// else leave index at zero
KALDI_ASSERT(index < dim_);
*index_out = index;
return 0.0;
}
template <typename Real>
SparseVector<Real>::SparseVector(const VectorBase<Real> &vec) {
MatrixIndexT dim = vec.Dim();
dim_ = dim;
if (dim == 0)
return;
const Real *ptr = vec.Data();
for (MatrixIndexT i = 0; i < dim; i++) {
Real val = ptr[i];
if (val != 0.0)
pairs_.push_back(std::pair<MatrixIndexT,Real>(i,val));
}
}
void GeneralMatrix::Swap(GeneralMatrix *other) {
mat_.Swap(&(other->mat_));
cmat_.Swap(&(other->cmat_));
smat_.Swap(&(other->smat_));
}
void ExtractRowRangeWithPadding(
const GeneralMatrix &in,
int32 row_offset,
int32 num_rows,
GeneralMatrix *out) {
// make sure 'out' is empty to start with.
Matrix<BaseFloat> empty_mat;
*out = empty_mat;
if (num_rows == 0) return;
switch (in.Type()) {
case kFullMatrix: {
const Matrix<BaseFloat> &mat_in = in.GetFullMatrix();
int32 num_rows_in = mat_in.NumRows(), num_cols = mat_in.NumCols();
KALDI_ASSERT(num_rows_in > 0); // we can't extract >0 rows from an empty
// matrix.
Matrix<BaseFloat> mat_out(num_rows, num_cols, kUndefined);
for (int32 row = 0; row < num_rows; row++) {
int32 row_in = row + row_offset;
if (row_in < 0) row_in = 0;
else if (row_in >= num_rows_in) row_in = num_rows_in - 1;
SubVector<BaseFloat> vec_in(mat_in, row_in),
vec_out(mat_out, row);
vec_out.CopyFromVec(vec_in);
}
out->SwapFullMatrix(&mat_out);
break;
}
case kSparseMatrix: {
const SparseMatrix<BaseFloat> &smat_in = in.GetSparseMatrix();
int32 num_rows_in = smat_in.NumRows(),
num_cols = smat_in.NumCols();
KALDI_ASSERT(num_rows_in > 0); // we can't extract >0 rows from an empty
// matrix.
SparseMatrix<BaseFloat> smat_out(num_rows, num_cols);
for (int32 row = 0; row < num_rows; row++) {
int32 row_in = row + row_offset;
if (row_in < 0) row_in = 0;
else if (row_in >= num_rows_in) row_in = num_rows_in - 1;
smat_out.SetRow(row, smat_in.Row(row_in));
}
out->SwapSparseMatrix(&smat_out);
break;
}
case kCompressedMatrix: {
const CompressedMatrix &cmat_in = in.GetCompressedMatrix();
bool allow_padding = true;
CompressedMatrix cmat_out(cmat_in, row_offset, num_rows,
0, cmat_in.NumCols(), allow_padding);
out->SwapCompressedMatrix(&cmat_out);
break;
}
default:
KALDI_ERR << "Bad matrix type.";
}
}
template class SparseVector<float>;
template class SparseVector<double>;
template class SparseMatrix<float>;
template class SparseMatrix<double>;
} // namespace kaldi
// matrix/sparse-matrix.h
// Copyright 2015 Johns Hopkins University (author: Daniel Povey)
// 2015 Guoguo Chen
// 2017 Shiyin Kang
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_MATRIX_SPARSE_MATRIX_H_
#define KALDI_MATRIX_SPARSE_MATRIX_H_ 1
#include <utility>
#include <vector>
#include "matrix/matrix-common.h"
#include "matrix/kaldi-matrix.h"
#include "matrix/kaldi-vector.h"
#include "matrix/compressed-matrix.h"
namespace kaldi {
/// \addtogroup matrix_group
/// @{
template <typename Real>
class SparseVector {
public:
MatrixIndexT Dim() const { return dim_; }
Real Sum() const;
template <class OtherReal>
void CopyElementsToVec(VectorBase<OtherReal> *vec) const;
// *vec += alpha * *this.
template <class OtherReal>
void AddToVec(Real alpha,
VectorBase<OtherReal> *vec) const;
template <class OtherReal>
void CopyFromSvec(const SparseVector<OtherReal> &other);
SparseVector<Real> &operator = (const SparseVector<Real> &other);
SparseVector(const SparseVector<Real> &other) { *this = other; }
void Swap(SparseVector<Real> *other);
// Returns the maximum value in this row and outputs the index associated with
// it. This is not the index into the Data() pointer, it is the index into
// the vector it represents, i.e. the .first value in the pair.
// If this vector's Dim() is zero it is an error to call this function.
// If all the elements stored were negative and there underlying vector had
// zero indexes not listed in the elements, or if no elements are stored, it
// will return the first un-listed index, whose value (implicitly) is zero.
Real Max(int32 *index) const;
/// Returns the number of nonzero elements.
MatrixIndexT NumElements() const { return pairs_.size(); }
/// get an indexed element (0 <= i < NumElements()).
const std::pair<MatrixIndexT, Real> &GetElement(MatrixIndexT i) const {
return pairs_[i];
}
// returns pointer to element data, or NULL if empty (use with NumElements()).
std::pair<MatrixIndexT, Real> *Data();
// returns pointer to element data, or NULL if empty (use with NumElements());
// const version
const std::pair<MatrixIndexT, Real> *Data() const;
/// Sets elements to zero with probability zero_prob, else normally
/// distributed. Useful in testing.
void SetRandn(BaseFloat zero_prob);
SparseVector(): dim_(0) { }
explicit SparseVector(MatrixIndexT dim): dim_(dim) { KALDI_ASSERT(dim >= 0); }
// constructor from pairs; does not assume input pairs are sorted and uniq
SparseVector(MatrixIndexT dim,
const std::vector<std::pair<MatrixIndexT, Real> > &pairs);
// constructor from a VectorBase that keeps only the nonzero elements of 'vec'.
explicit SparseVector(const VectorBase<Real> &vec);
/// Resizes to this dimension. resize_type == kUndefined
/// behaves the same as kSetZero.
void Resize(MatrixIndexT dim, MatrixResizeType resize_type = kSetZero);
void Write(std::ostream &os, bool binary) const;
void Read(std::istream &os, bool binary);
/// Scale all elements of sparse vector.
void Scale(Real alpha);
private:
MatrixIndexT dim_;
// pairs of (row-index, value). Stored in sorted order with no duplicates.
// For now we use std::vector, but we could change this.
std::vector<std::pair<MatrixIndexT, Real> > pairs_;
};
template <typename Real>
Real VecSvec(const VectorBase<Real> &vec,
const SparseVector<Real> &svec);
template <typename Real>
class SparseMatrix {
public:
MatrixIndexT NumRows() const;
MatrixIndexT NumCols() const;
MatrixIndexT NumElements() const;
Real Sum() const;
Real FrobeniusNorm() const;
/// This constructor creates a SparseMatrix that just contains the nonzero
/// elements of 'mat'.
explicit SparseMatrix(const MatrixBase<Real> &mat);
/// Copy to matrix. It must already have the correct size.
template <class OtherReal>
void CopyToMat(MatrixBase<OtherReal> *other,
MatrixTransposeType t = kNoTrans) const;
/// Copies the values of all the elements in SparseMatrix into a VectorBase
/// object.
void CopyElementsToVec(VectorBase<Real> *other) const;
/// Copies data from another sparse matrix.
template<class OtherReal>
void CopyFromSmat(const SparseMatrix<OtherReal> &other,
MatrixTransposeType trans = kNoTrans);
/// Does *other = *other + alpha * *this.
void AddToMat(BaseFloat alpha, MatrixBase<Real> *other,
MatrixTransposeType t = kNoTrans) const;
SparseMatrix<Real> &operator = (const SparseMatrix<Real> &other);
SparseMatrix(const SparseMatrix<Real> &other, MatrixTransposeType trans =
kNoTrans) {
this->CopyFromSmat(other, trans);
}
void Swap(SparseMatrix<Real> *other);
// returns pointer to element data, or NULL if empty (use with NumElements()).
SparseVector<Real> *Data();
// returns pointer to element data, or NULL if empty (use with NumElements());
// const version
const SparseVector<Real> *Data() const;
// initializer from the type that elsewhere in Kaldi is referred to as type
// Posterior. indexed first by row-index; the pairs are (column-index, value),
// and the constructor does not require them to be sorted and uniq.
SparseMatrix(
int32 dim,
const std::vector<std::vector<std::pair<MatrixIndexT, Real> > > &pairs);
/// Sets up to a pseudo-randomly initialized matrix, with each element zero
/// with probability zero_prob and else normally distributed- mostly for
/// purposes of testing.
void SetRandn(BaseFloat zero_prob);
void Write(std::ostream &os, bool binary) const;
void Read(std::istream &os, bool binary);
const SparseVector<Real> &Row(MatrixIndexT r) const;
/// Sets row r to "vec"; makes sure it has the correct dimension.
void SetRow(int32 r, const SparseVector<Real> &vec);
/// Select a subset of the rows of a SparseMatrix.
/// Sets *this to only the rows of 'smat_other' that are listed
/// in 'row_indexes'.
/// 'row_indexes' must satisfy 0 <= row_indexes[i] < smat_other.NumRows().
void SelectRows(const std::vector<int32> &row_indexes,
const SparseMatrix<Real> &smat_other);
/// Sets *this to all the rows of *inputs appended together; this
/// function is destructive of the inputs. Requires, obviously,
/// that the inputs all have the same dimension (although some may be
/// empty).
void AppendSparseMatrixRows(std::vector<SparseMatrix<Real> > *inputs);
SparseMatrix() { }
SparseMatrix(int32 num_rows, int32 num_cols) { Resize(num_rows, num_cols); }
/// Constructor from an array of indexes.
/// If trans == kNoTrans, construct a sparse matrix
/// with num-rows == indexes.Dim() and num-cols = 'dim'.
/// 'indexes' is expected to contain elements in the
/// range [0, dim - 1]. Each row 'i' of *this after
/// calling the constructor will contain a single
/// element at column-index indexes[i] with value 1.0.
///
/// If trans == kTrans, the result will be the transpose
/// of the sparse matrix described above.
SparseMatrix(const std::vector<int32> &indexes, int32 dim,
MatrixTransposeType trans = kNoTrans);
/// Constructor from an array of indexes and an array of
/// weights; requires indexes.Dim() == weights.Dim().
/// If trans == kNoTrans, construct a sparse matrix
/// with num-rows == indexes.Dim() and num-cols = 'dim'.
/// 'indexes' is expected to contain elements in the
/// range [0, dim - 1]. Each row 'i' of *this after
/// calling the constructor will contain a single
/// element at column-index indexes[i] with value weights[i].
/// If trans == kTrans, the result will be the transpose
/// of the sparse matrix described above.
SparseMatrix(const std::vector<int32> &indexes,
const VectorBase<Real> &weights, int32 dim,
MatrixTransposeType trans = kNoTrans);
/// Resizes the matrix; analogous to Matrix::Resize(). resize_type ==
/// kUndefined behaves the same as kSetZero.
void Resize(MatrixIndexT rows, MatrixIndexT cols,
MatrixResizeType resize_type = kSetZero);
/// Scale all elements in sparse matrix.
void Scale(Real alpha);
// Use the Matrix::CopyFromSmat() function to copy from this to Matrix. Also
// see Matrix::AddSmat(). There is not very extensive functionality for
// SparseMat just yet (e.g. no matrix multiply); we will add things as needed
// and as it seems necessary.
private:
// vector of SparseVectors, all of same dime (use an stl vector for now; this
// could change).
std::vector<SparseVector<Real> > rows_;
};
template<typename Real>
Real TraceMatSmat(const MatrixBase<Real> &A,
const SparseMatrix<Real> &B,
MatrixTransposeType trans = kNoTrans);
enum GeneralMatrixType {
kFullMatrix,
kCompressedMatrix,
kSparseMatrix
};
/// This class is a wrapper that enables you to store a matrix
/// in one of three forms: either as a Matrix<BaseFloat>, or a CompressedMatrix,
/// or a SparseMatrix<BaseFloat>. It handles the I/O for you, i.e. you read
/// and write a single object type. It is useful for neural-net training
/// targets which might be sparse or not, and might be compressed or not.
class GeneralMatrix {
public:
/// Returns the type of the matrix: kSparseMatrix, kCompressedMatrix or
/// kFullMatrix. If this matrix is empty, returns kFullMatrix.
GeneralMatrixType Type() const;
void Compress(); // If it was a full matrix, compresses, changing Type() to
// kCompressedMatrix; otherwise does nothing.
void Uncompress(); // If it was a compressed matrix, uncompresses, changing
// Type() to kFullMatrix; otherwise does nothing.
void Write(std::ostream &os, bool binary) const;
/// Note: if you write a compressed matrix in text form, it will be read as
/// a regular full matrix.
void Read(std::istream &is, bool binary);
/// Returns the contents as a SparseMatrix. This will only work if
/// Type() returns kSparseMatrix, or NumRows() == 0; otherwise it will crash.
const SparseMatrix<BaseFloat> &GetSparseMatrix() const;
/// Swaps the with the given SparseMatrix. This will only work if
/// Type() returns kSparseMatrix, or NumRows() == 0.
void SwapSparseMatrix(SparseMatrix<BaseFloat> *smat);
/// Returns the contents as a compressed matrix. This will only work if
/// Type() returns kCompressedMatrix, or NumRows() == 0; otherwise it will
/// crash.
const CompressedMatrix &GetCompressedMatrix() const;
/// Swaps the with the given CompressedMatrix. This will only work if
/// Type() returns kCompressedMatrix, or NumRows() == 0.
void SwapCompressedMatrix(CompressedMatrix *cmat);
/// Returns the contents as a Matrix<BaseFloat>. This will only work if
/// Type() returns kFullMatrix, or NumRows() == 0; otherwise it will crash.
const Matrix<BaseFloat>& GetFullMatrix() const;
/// Outputs the contents as a matrix. This will work regardless of
/// Type(). Sizes its output, unlike CopyToMat().
void GetMatrix(Matrix<BaseFloat> *mat) const;
/// Swaps the with the given Matrix. This will only work if
/// Type() returns kFullMatrix, or NumRows() == 0.
void SwapFullMatrix(Matrix<BaseFloat> *mat);
/// Copies contents, regardless of type, to "mat", which must be correctly
/// sized. See also GetMatrix(), which will size its output for you.
void CopyToMat(MatrixBase<BaseFloat> *mat,
MatrixTransposeType trans = kNoTrans) const;
/// Copies contents, regardless of type, to "cu_mat", which must be
/// correctly sized. Implemented in ../cudamatrix/cu-sparse-matrix.cc
void CopyToMat(CuMatrixBase<BaseFloat> *cu_mat,
MatrixTransposeType trans = kNoTrans) const;
/// Adds alpha times *this to mat.
void AddToMat(BaseFloat alpha, MatrixBase<BaseFloat> *mat,
MatrixTransposeType trans = kNoTrans) const;
/// Adds alpha times *this to cu_mat.
/// Implemented in ../cudamatrix/cu-sparse-matrix.cc
void AddToMat(BaseFloat alpha, CuMatrixBase<BaseFloat> *cu_mat,
MatrixTransposeType trans = kNoTrans) const;
/// Scale each element of matrix by alpha.
void Scale(BaseFloat alpha);
/// Assignment from regular matrix.
GeneralMatrix &operator= (const MatrixBase<BaseFloat> &mat);
/// Assignment from compressed matrix.
GeneralMatrix &operator= (const CompressedMatrix &mat);
/// Assignment from SparseMatrix<BaseFloat>
GeneralMatrix &operator= (const SparseMatrix<BaseFloat> &smat);
MatrixIndexT NumRows() const;
MatrixIndexT NumCols() const;
explicit GeneralMatrix(const MatrixBase<BaseFloat> &mat) { *this = mat; }
explicit GeneralMatrix(const CompressedMatrix &cmat) { *this = cmat; }
explicit GeneralMatrix(const SparseMatrix<BaseFloat> &smat) { *this = smat; }
GeneralMatrix() { }
// Assignment operator.
GeneralMatrix &operator =(const GeneralMatrix &other);
// Copy constructor
GeneralMatrix(const GeneralMatrix &other) { *this = other; }
// Sets to the empty matrix.
void Clear();
// shallow swap
void Swap(GeneralMatrix *other);
private:
// We don't explicitly store the type of the matrix. Rather, we make
// sure that only one of the matrices is ever nonempty, and the Type()
// returns that one, or kFullMatrix if all are empty.
Matrix<BaseFloat> mat_;
CompressedMatrix cmat_;
SparseMatrix<BaseFloat> smat_;
};
/// Appends all the matrix rows of a list of GeneralMatrixes, to get a single
/// GeneralMatrix. Preserves sparsity if all inputs were sparse (or empty).
/// Does not preserve compression, if inputs were compressed; you have to
/// re-compress manually, if that's what you need.
void AppendGeneralMatrixRows(const std::vector<const GeneralMatrix *> &src,
GeneralMatrix *mat);
/// Outputs a SparseMatrix<Real> containing only the rows r of "in" such that
/// keep_rows[r] == true. keep_rows.size() must equal in.NumRows(), and rows
/// must contain at least one "true" element.
template <typename Real>
void FilterSparseMatrixRows(const SparseMatrix<Real> &in,
const std::vector<bool> &keep_rows,
SparseMatrix<Real> *out);
/// Outputs a Matrix<Real> containing only the rows r of "in" such that
/// keep_keep_rows[r] == true. keep_rows.size() must equal in.NumRows(), and
/// keep_rows must contain at least one "true" element.
template <typename Real>
void FilterMatrixRows(const Matrix<Real> &in,
const std::vector<bool> &keep_rows,
Matrix<Real> *out);
/// Outputs a Matrix<Real> containing only the rows r of "in" such that
/// keep_rows[r] == true. keep_rows.size() must equal in.NumRows(), and rows
/// must contain at least one "true" element.
void FilterCompressedMatrixRows(const CompressedMatrix &in,
const std::vector<bool> &keep_rows,
Matrix<BaseFloat> *out);
/// Outputs a GeneralMatrix containing only the rows r of "in" such that
/// keep_rows[r] == true. keep_rows.size() must equal in.NumRows(), and
/// keep_rows must contain at least one "true" element. If in.Type() is
/// kCompressedMatrix, the result will not be compressed; otherwise, the type
/// is preserved.
void FilterGeneralMatrixRows(const GeneralMatrix &in,
const std::vector<bool> &keep_rows,
GeneralMatrix *out);
/// This function extracts a row-range of a GeneralMatrix and writes
/// as a GeneralMatrix containing the same type of underlying
/// matrix. If the row-range is partly outside the row-range of 'in'
/// (i.e. if row_offset < 0 or row_offset + num_rows > in.NumRows())
/// then it will pad with copies of the first and last row as
/// needed.
/// This is more efficient than un-compressing and
/// re-compressing the underlying CompressedMatrix, and causes
/// less accuracy loss due to re-compression (no loss in most cases).
void ExtractRowRangeWithPadding(
const GeneralMatrix &in,
int32 row_offset,
int32 num_rows,
GeneralMatrix *out);
/// @} end of \addtogroup matrix_group
} // namespace kaldi
#endif // KALDI_MATRIX_SPARSE_MATRIX_H_
// matrix/srfft.cc
// Copyright 2009-2011 Microsoft Corporation; Go Vivace Inc.
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
//
// This file includes a modified version of code originally published in Malvar,
// H., "Signal processing with lapped transforms, " Artech House, Inc., 1992. The
// current copyright holder of the original code, Henrique S. Malvar, has given
// his permission for the release of this modified version under the Apache
// License v2.0.
#include "matrix/srfft.h"
#include "matrix/matrix-functions.h"
namespace kaldi {
template<typename Real>
SplitRadixComplexFft<Real>::SplitRadixComplexFft(MatrixIndexT N) {
if ( (N & (N-1)) != 0 || N <= 1)
KALDI_ERR << "SplitRadixComplexFft called with invalid number of points "
<< N;
N_ = N;
logn_ = 0;
while (N > 1) {
N >>= 1;
logn_ ++;
}
ComputeTables();
}
template <typename Real>
SplitRadixComplexFft<Real>::SplitRadixComplexFft(
const SplitRadixComplexFft<Real> &other):
N_(other.N_), logn_(other.logn_) {
// This code duplicates tables from a previously computed object.
// Compare with the code in ComputeTables().
MatrixIndexT lg2 = logn_ >> 1;
if (logn_ & 1) lg2++;
MatrixIndexT brseed_size = 1 << lg2;
brseed_ = new MatrixIndexT[brseed_size];
std::memcpy(brseed_, other.brseed_, sizeof(MatrixIndexT) * brseed_size);
if (logn_ < 4) {
tab_ = NULL;
} else {
tab_ = new Real*[logn_ - 3];
for (MatrixIndexT i = logn_; i >= 4 ; i--) {
MatrixIndexT m = 1 << i, m2 = m / 2, m4 = m2 / 2;
MatrixIndexT this_array_size = 6 * (m4 - 2);
tab_[i-4] = new Real[this_array_size];
std::memcpy(tab_[i-4], other.tab_[i-4],
sizeof(Real) * this_array_size);
}
}
}
template<typename Real>
void SplitRadixComplexFft<Real>::ComputeTables() {
MatrixIndexT imax, lg2, i, j;
MatrixIndexT m, m2, m4, m8, nel, n;
Real *cn, *spcn, *smcn, *c3n, *spc3n, *smc3n;
Real ang, c, s;
lg2 = logn_ >> 1;
if (logn_ & 1) lg2++;
brseed_ = new MatrixIndexT[1 << lg2];
brseed_[0] = 0;
brseed_[1] = 1;
for (j = 2; j <= lg2; j++) {
imax = 1 << (j - 1);
for (i = 0; i < imax; i++) {
brseed_[i] <<= 1;
brseed_[i + imax] = brseed_[i] + 1;
}
}
if (logn_ < 4) {
tab_ = NULL;
} else {
tab_ = new Real* [logn_-3];
for (i = logn_; i>=4 ; i--) {
/* Compute a few constants */
m = 1 << i; m2 = m / 2; m4 = m2 / 2; m8 = m4 /2;
/* Allocate memory for tables */
nel = m4 - 2;
tab_[i-4] = new Real[6*nel];
/* Initialize pointers */
cn = tab_[i-4]; spcn = cn + nel; smcn = spcn + nel;
c3n = smcn + nel; spc3n = c3n + nel; smc3n = spc3n + nel;
/* Compute tables */
for (n = 1; n < m4; n++) {
if (n == m8) continue;
ang = n * M_2PI / m;
c = std::cos(ang); s = std::sin(ang);
*cn++ = c; *spcn++ = - (s + c); *smcn++ = s - c;
ang = 3 * n * M_2PI / m;
c = std::cos(ang); s = std::sin(ang);
*c3n++ = c; *spc3n++ = - (s + c); *smc3n++ = s - c;
}
}
}
}
template<typename Real>
SplitRadixComplexFft<Real>::~SplitRadixComplexFft() {
delete [] brseed_;
if (tab_ != NULL) {
for (MatrixIndexT i = 0; i < logn_-3; i++)
delete [] tab_[i];
delete [] tab_;
}
}
template<typename Real>
void SplitRadixComplexFft<Real>::Compute(Real *xr, Real *xi, bool forward) const {
if (!forward) { // reverse real and imaginary parts for complex FFT.
Real *tmp = xr;
xr = xi;
xi = tmp;
}
ComputeRecursive(xr, xi, logn_);
if (logn_ > 1) {
BitReversePermute(xr, logn_);
BitReversePermute(xi, logn_);
}
}
template<typename Real>
void SplitRadixComplexFft<Real>::Compute(Real *x, bool forward,
std::vector<Real> *temp_buffer) const {
KALDI_ASSERT(temp_buffer != NULL);
if (temp_buffer->size() != N_)
temp_buffer->resize(N_);
Real *temp_ptr = &((*temp_buffer)[0]);
for (MatrixIndexT i = 0; i < N_; i++) {
x[i] = x[i * 2]; // put the real part in the first half of x.
temp_ptr[i] = x[i * 2 + 1]; // put the imaginary part in temp_buffer.
}
// copy the imaginary part back to the second half of x.
memcpy(static_cast<void*>(x + N_),
static_cast<void*>(temp_ptr),
sizeof(Real) * N_);
Compute(x, x + N_, forward);
// Now change the format back to interleaved.
memcpy(static_cast<void*>(temp_ptr),
static_cast<void*>(x + N_),
sizeof(Real) * N_);
for (MatrixIndexT i = N_-1; i > 0; i--) { // don't include 0,
// in case MatrixIndexT is unsigned, the loop would not terminate.
// Treat it as a special case.
x[i*2] = x[i];
x[i*2 + 1] = temp_ptr[i];
}
x[1] = temp_ptr[0]; // special case of i = 0.
}
template<typename Real>
void SplitRadixComplexFft<Real>::Compute(Real *x, bool forward) {
this->Compute(x, forward, &temp_buffer_);
}
template<typename Real>
void SplitRadixComplexFft<Real>::BitReversePermute(Real *x, MatrixIndexT logn) const {
MatrixIndexT i, j, lg2, n;
MatrixIndexT off, fj, gno, *brp;
Real tmp, *xp, *xq;
lg2 = logn >> 1;
n = 1 << lg2;
if (logn & 1) lg2++;
/* Unshuffling loop */
for (off = 1; off < n; off++) {
fj = n * brseed_[off]; i = off; j = fj;
tmp = x[i]; x[i] = x[j]; x[j] = tmp;
xp = &x[i];
brp = &(brseed_[1]);
for (gno = 1; gno < brseed_[off]; gno++) {
xp += n;
j = fj + *brp++;
xq = x + j;
tmp = *xp; *xp = *xq; *xq = tmp;
}
}
}
template<typename Real>
void SplitRadixComplexFft<Real>::ComputeRecursive(Real *xr, Real *xi, MatrixIndexT logn) const {
MatrixIndexT m, m2, m4, m8, nel, n;
Real *xr1, *xr2, *xi1, *xi2;
Real *cn = nullptr, *spcn = nullptr, *smcn = nullptr, *c3n = nullptr,
*spc3n = nullptr, *smc3n = nullptr;
Real tmp1, tmp2;
Real sqhalf = M_SQRT1_2;
/* Check range of logn */
if (logn < 0)
KALDI_ERR << "Error: logn is out of bounds in SRFFT";
/* Compute trivial cases */
if (logn < 3) {
if (logn == 2) { /* length m = 4 */
xr2 = xr + 2;
xi2 = xi + 2;
tmp1 = *xr + *xr2;
*xr2 = *xr - *xr2;
*xr = tmp1;
tmp1 = *xi + *xi2;
*xi2 = *xi - *xi2;
*xi = tmp1;
xr1 = xr + 1;
xi1 = xi + 1;
xr2++;
xi2++;
tmp1 = *xr1 + *xr2;
*xr2 = *xr1 - *xr2;
*xr1 = tmp1;
tmp1 = *xi1 + *xi2;
*xi2 = *xi1 - *xi2;
*xi1 = tmp1;
xr2 = xr + 1;
xi2 = xi + 1;
tmp1 = *xr + *xr2;
*xr2 = *xr - *xr2;
*xr = tmp1;
tmp1 = *xi + *xi2;
*xi2 = *xi - *xi2;
*xi = tmp1;
xr1 = xr + 2;
xi1 = xi + 2;
xr2 = xr + 3;
xi2 = xi + 3;
tmp1 = *xr1 + *xi2;
tmp2 = *xi1 + *xr2;
*xi1 = *xi1 - *xr2;
*xr2 = *xr1 - *xi2;
*xr1 = tmp1;
*xi2 = tmp2;
return;
}
else if (logn == 1) { /* length m = 2 */
xr2 = xr + 1;
xi2 = xi + 1;
tmp1 = *xr + *xr2;
*xr2 = *xr - *xr2;
*xr = tmp1;
tmp1 = *xi + *xi2;
*xi2 = *xi - *xi2;
*xi = tmp1;
return;
}
else if (logn == 0) return; /* length m = 1 */
}
/* Compute a few constants */
m = 1 << logn; m2 = m / 2; m4 = m2 / 2; m8 = m4 /2;
/* Step 1 */
xr1 = xr; xr2 = xr1 + m2;
xi1 = xi; xi2 = xi1 + m2;
for (n = 0; n < m2; n++) {
tmp1 = *xr1 + *xr2;
*xr2 = *xr1 - *xr2;
xr2++;
*xr1++ = tmp1;
tmp2 = *xi1 + *xi2;
*xi2 = *xi1 - *xi2;
xi2++;
*xi1++ = tmp2;
}
/* Step 2 */
xr1 = xr + m2; xr2 = xr1 + m4;
xi1 = xi + m2; xi2 = xi1 + m4;
for (n = 0; n < m4; n++) {
tmp1 = *xr1 + *xi2;
tmp2 = *xi1 + *xr2;
*xi1 = *xi1 - *xr2;
xi1++;
*xr2++ = *xr1 - *xi2;
*xr1++ = tmp1;
*xi2++ = tmp2;
// xr1++; xr2++; xi1++; xi2++;
}
/* Steps 3 & 4 */
xr1 = xr + m2; xr2 = xr1 + m4;
xi1 = xi + m2; xi2 = xi1 + m4;
if (logn >= 4) {
nel = m4 - 2;
cn = tab_[logn-4]; spcn = cn + nel; smcn = spcn + nel;
c3n = smcn + nel; spc3n = c3n + nel; smc3n = spc3n + nel;
}
xr1++; xr2++; xi1++; xi2++;
// xr1++; xi1++;
for (n = 1; n < m4; n++) {
if (n == m8) {
tmp1 = sqhalf * (*xr1 + *xi1);
*xi1 = sqhalf * (*xi1 - *xr1);
*xr1 = tmp1;
tmp2 = sqhalf * (*xi2 - *xr2);
*xi2 = -sqhalf * (*xr2 + *xi2);
*xr2 = tmp2;
} else {
tmp2 = *cn++ * (*xr1 + *xi1);
tmp1 = *spcn++ * *xr1 + tmp2;
*xr1 = *smcn++ * *xi1 + tmp2;
*xi1 = tmp1;
tmp2 = *c3n++ * (*xr2 + *xi2);
tmp1 = *spc3n++ * *xr2 + tmp2;
*xr2 = *smc3n++ * *xi2 + tmp2;
*xi2 = tmp1;
}
xr1++; xr2++; xi1++; xi2++;
}
/* Call ssrec again with half DFT length */
ComputeRecursive(xr, xi, logn-1);
/* Call ssrec again twice with one quarter DFT length.
Constants have to be recomputed, because they are static! */
// m = 1 << logn; m2 = m / 2;
ComputeRecursive(xr + m2, xi + m2, logn - 2);
// m = 1 << logn;
m4 = 3 * (m / 4);
ComputeRecursive(xr + m4, xi + m4, logn - 2);
}
template<typename Real>
void SplitRadixRealFft<Real>::Compute(Real *data, bool forward) {
Compute(data, forward, &this->temp_buffer_);
}
// This code is mostly the same as the RealFft function. It would be
// possible to replace it with more efficient code from Rico's book.
template<typename Real>
void SplitRadixRealFft<Real>::Compute(Real *data, bool forward,
std::vector<Real> *temp_buffer) const {
MatrixIndexT N = N_, N2 = N/2;
KALDI_ASSERT(N%2 == 0);
if (forward) // call to base class
SplitRadixComplexFft<Real>::Compute(data, true, temp_buffer);
Real rootN_re, rootN_im; // exp(-2pi/N), forward; exp(2pi/N), backward
int forward_sign = forward ? -1 : 1;
ComplexImExp(static_cast<Real>(M_2PI/N *forward_sign), &rootN_re, &rootN_im);
Real kN_re = -forward_sign, kN_im = 0.0; // exp(-2pik/N), forward; exp(-2pik/N), backward
// kN starts out as 1.0 for forward algorithm but -1.0 for backward.
for (MatrixIndexT k = 1; 2*k <= N2; k++) {
ComplexMul(rootN_re, rootN_im, &kN_re, &kN_im);
Real Ck_re, Ck_im, Dk_re, Dk_im;
// C_k = 1/2 (B_k + B_{N/2 - k}^*) :
Ck_re = 0.5 * (data[2*k] + data[N - 2*k]);
Ck_im = 0.5 * (data[2*k + 1] - data[N - 2*k + 1]);
// re(D_k)= 1/2 (im(B_k) + im(B_{N/2-k})):
Dk_re = 0.5 * (data[2*k + 1] + data[N - 2*k + 1]);
// im(D_k) = -1/2 (re(B_k) - re(B_{N/2-k}))
Dk_im =-0.5 * (data[2*k] - data[N - 2*k]);
// A_k = C_k + 1^(k/N) D_k:
data[2*k] = Ck_re; // A_k <-- C_k
data[2*k+1] = Ck_im;
// now A_k += D_k 1^(k/N)
ComplexAddProduct(Dk_re, Dk_im, kN_re, kN_im, &(data[2*k]), &(data[2*k+1]));
MatrixIndexT kdash = N2 - k;
if (kdash != k) {
// Next we handle the index k' = N/2 - k. This is necessary
// to do now, to avoid invalidating data that we will later need.
// The quantities C_{k'} and D_{k'} are just the conjugates of C_k
// and D_k, so the equations are simple modifications of the above,
// replacing Ck_im and Dk_im with their negatives.
data[2*kdash] = Ck_re; // A_k' <-- C_k'
data[2*kdash+1] = -Ck_im;
// now A_k' += D_k' 1^(k'/N)
// We use 1^(k'/N) = 1^((N/2 - k) / N) = 1^(1/2) 1^(-k/N) = -1 * (1^(k/N))^*
// so it's the same as 1^(k/N) but with the real part negated.
ComplexAddProduct(Dk_re, -Dk_im, -kN_re, kN_im, &(data[2*kdash]), &(data[2*kdash+1]));
}
}
{ // Now handle k = 0.
// In simple terms: after the complex fft, data[0] becomes the sum of real
// parts input[0], input[2]... and data[1] becomes the sum of imaginary
// pats input[1], input[3]...
// "zeroth" [A_0] is just the sum of input[0]+input[1]+input[2]..
// and "n2th" [A_{N/2}] is input[0]-input[1]+input[2]... .
Real zeroth = data[0] + data[1],
n2th = data[0] - data[1];
data[0] = zeroth;
data[1] = n2th;
if (!forward) {
data[0] /= 2;
data[1] /= 2;
}
}
if (!forward) { // call to base class
SplitRadixComplexFft<Real>::Compute(data, false, temp_buffer);
for (MatrixIndexT i = 0; i < N; i++)
data[i] *= 2.0;
// This is so we get a factor of N increase, rather than N/2 which we would
// otherwise get from [ComplexFft, forward] + [ComplexFft, backward] in dimension N/2.
// It's for consistency with our normal FFT convensions.
}
}
template class SplitRadixComplexFft<float>;
template class SplitRadixComplexFft<double>;
template class SplitRadixRealFft<float>;
template class SplitRadixRealFft<double>;
} // end namespace kaldi
// matrix/srfft.h
// Copyright 2009-2011 Microsoft Corporation; Go Vivace Inc.
// 2014 Daniel Povey
//
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
//
// This file includes a modified version of code originally published in Malvar,
// H., "Signal processing with lapped transforms, " Artech House, Inc., 1992. The
// current copyright holder of the original code, Henrique S. Malvar, has given
// his permission for the release of this modified version under the Apache
// License v2.0.
#ifndef KALDI_MATRIX_SRFFT_H_
#define KALDI_MATRIX_SRFFT_H_
#include "matrix/kaldi-vector.h"
#include "matrix/kaldi-matrix.h"
namespace kaldi {
/// @addtogroup matrix_funcs_misc
/// @{
// This class is based on code by Henrique (Rico) Malvar, from his book
// "Signal Processing with Lapped Transforms" (1992). Copied with
// permission, optimized by Go Vivace Inc., and converted into C++ by
// Microsoft Corporation
// This is a more efficient way of doing the complex FFT than ComplexFft
// (declared in matrix-functios.h), but it only works for powers of 2.
// Note: in multi-threaded code, you would need to have one of these objects per
// thread, because multiple calls to Compute in parallel would not work.
template<typename Real>
class SplitRadixComplexFft {
public:
typedef MatrixIndexT Integer;
// N is the number of complex points (must be a power of two, or this
// will crash). Note that the constructor does some work so it's best to
// initialize the object once and do the computation many times.
SplitRadixComplexFft(Integer N);
// Copy constructor
SplitRadixComplexFft(const SplitRadixComplexFft &other);
// Does the FFT computation, given pointers to the real and
// imaginary parts. If "forward", do the forward FFT; else
// do the inverse FFT (without the 1/N factor).
// xr and xi are pointers to zero-based arrays of size N,
// containing the real and imaginary parts
// respectively.
void Compute(Real *xr, Real *xi, bool forward) const;
// This version of Compute takes a single array of size N*2,
// containing [ r0 im0 r1 im1 ... ]. Otherwise its behavior is the
// same as the version above.
void Compute(Real *x, bool forward);
// This version of Compute is const; it operates on an array of size N*2
// containing [ r0 im0 r1 im1 ... ], but it uses the argument "temp_buffer" as
// temporary storage instead of a class-member variable. It will allocate it if
// needed.
void Compute(Real *x, bool forward, std::vector<Real> *temp_buffer) const;
~SplitRadixComplexFft();
protected:
// temp_buffer_ is allocated only if someone calls Compute with only one Real*
// argument and we need a temporary buffer while creating interleaved data.
std::vector<Real> temp_buffer_;
private:
void ComputeTables();
void ComputeRecursive(Real *xr, Real *xi, Integer logn) const;
void BitReversePermute(Real *x, Integer logn) const;
Integer N_;
Integer logn_; // log(N)
Integer *brseed_;
// brseed is Evans' seed table, ref: (Ref: D. M. W.
// Evans, "An improved digit-reversal permutation algorithm ...",
// IEEE Trans. ASSP, Aug. 1987, pp. 1120-1125).
Real **tab_; // Tables of butterfly coefficients.
// Disallow assignment.
SplitRadixComplexFft &operator =(const SplitRadixComplexFft<Real> &other);
};
template<typename Real>
class SplitRadixRealFft: private SplitRadixComplexFft<Real> {
public:
SplitRadixRealFft(MatrixIndexT N): // will fail unless N>=4 and N is a power of 2.
SplitRadixComplexFft<Real> (N/2), N_(N) { }
// Copy constructor
SplitRadixRealFft(const SplitRadixRealFft<Real> &other):
SplitRadixComplexFft<Real>(other), N_(other.N_) { }
/// If forward == true, this function transforms from a sequence of N real points to its complex fourier
/// transform; otherwise it goes in the reverse direction. If you call it
/// in the forward and then reverse direction and multiply by 1.0/N, you
/// will get back the original data.
/// The interpretation of the complex-FFT data is as follows: the array
/// is a sequence of complex numbers C_n of length N/2 with (real, im) format,
/// i.e. [real0, real_{N/2}, real1, im1, real2, im2, real3, im3, ...].
void Compute(Real *x, bool forward);
/// This is as the other Compute() function, but it is a const version that
/// uses a user-supplied buffer.
void Compute(Real *x, bool forward, std::vector<Real> *temp_buffer) const;
private:
// Disallow assignment.
SplitRadixRealFft &operator =(const SplitRadixRealFft<Real> &other);
int N_;
};
/// @} end of "addtogroup matrix_funcs_misc"
} // end namespace kaldi
#endif
// matrix/tp-matrix.cc
// Copyright 2009-2011 Ondrej Glembek; Lukas Burget; Microsoft Corporation
// Saarland University; Yanmin Qian; Haihua Xu
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "matrix/tp-matrix.h"
#include "matrix/sp-matrix.h"
#include "matrix/kaldi-matrix.h"
#include "matrix/cblas-wrappers.h"
namespace kaldi {
#ifndef HAVE_ATLAS
template<typename Real>
void TpMatrix<Real>::Invert() {
// these are CLAPACK types
KaldiBlasInt result;
KaldiBlasInt rows = static_cast<int>(this->num_rows_);
// clapack call
// NOTE: Even though "U" is for upper, lapack assumes column-wise storage
// of the data. We have a row-wise storage, therefore, we need to "invert"
clapack_Xtptri(&rows, this->data_, &result);
if (result < 0) {
KALDI_ERR << "Call to CLAPACK stptri_ function failed";
} else if (result > 0) {
KALDI_ERR << "Matrix is singular";
}
}
#else
template<typename Real>
void TpMatrix<Real>::Invert() {
// ATLAS doesn't implement triangular matrix inversion in packed
// format, so we temporarily put in non-packed format.
Matrix<Real> tmp(*this);
int rows = static_cast<int>(this->num_rows_);
// ATLAS call. It's really row-major ordering and a lower triangular matrix,
// but there is some weirdness with Fortran-style indexing that we need to
// take account of, so everything gets swapped.
int result = clapack_Xtrtri( rows, tmp.Data(), tmp.Stride());
// Let's hope ATLAS has the same return value conventions as clapack.
// I couldn't find any documentation online.
if (result < 0) {
KALDI_ERR << "Call to ATLAS strtri function failed";
} else if (result > 0) {
KALDI_ERR << "Matrix is singular";
}
(*this).CopyFromMat(tmp);
}
#endif
template<typename Real>
Real TpMatrix<Real>::Determinant() {
double det = 1.0;
for (MatrixIndexT i = 0; i<this->NumRows(); i++) {
det *= (*this)(i, i);
}
return static_cast<Real>(det);
}
template<typename Real>
void TpMatrix<Real>::Swap(TpMatrix<Real> *other) {
std::swap(this->data_, other->data_);
std::swap(this->num_rows_, other->num_rows_);
}
template<typename Real>
void TpMatrix<Real>::Cholesky(const SpMatrix<Real> &orig) {
KALDI_ASSERT(orig.NumRows() == this->NumRows());
MatrixIndexT n = this->NumRows();
this->SetZero();
Real *data = this->data_, *jdata = data; // start of j'th row of matrix.
const Real *orig_jdata = orig.Data(); // start of j'th row of matrix.
for (MatrixIndexT j = 0; j < n; j++, jdata += j, orig_jdata += j) {
Real *kdata = data; // start of k'th row of matrix.
Real d(0.0);
for (MatrixIndexT k = 0; k < j; k++, kdata += k) {
Real s = cblas_Xdot(k, kdata, 1, jdata, 1);
// (*this)(j, k) = s = (orig(j, k) - s)/(*this)(k, k);
jdata[k] = s = (orig_jdata[k] - s)/kdata[k];
d = d + s*s;
}
// d = orig(j, j) - d;
d = orig_jdata[j] - d;
if (d >= 0.0) {
// (*this)(j, j) = std::sqrt(d);
jdata[j] = std::sqrt(d);
} else {
KALDI_ERR << "Cholesky decomposition failed. Maybe matrix "
"is not positive definite.";
}
}
}
template<typename Real>
void TpMatrix<Real>::CopyFromMat(const MatrixBase<Real> &M,
MatrixTransposeType Trans) {
if (Trans == kNoTrans) {
KALDI_ASSERT(this->NumRows() == M.NumRows() && M.NumRows() == M.NumCols());
MatrixIndexT D = this->NumRows();
const Real *in_i = M.Data();
MatrixIndexT stride = M.Stride();
Real *out_i = this->data_;
for (MatrixIndexT i = 0; i < D; i++, in_i += stride, out_i += i)
for (MatrixIndexT j = 0; j <= i; j++)
out_i[j] = in_i[j];
} else {
KALDI_ASSERT(this->NumRows() == M.NumRows() && M.NumRows() == M.NumCols());
MatrixIndexT D = this->NumRows();
const Real *in_i = M.Data();
MatrixIndexT stride = M.Stride();
Real *out_i = this->data_;
for (MatrixIndexT i = 0; i < D; i++, in_i++, out_i += i) {
for (MatrixIndexT j = 0; j <= i; j++)
out_i[j] = in_i[stride*j];
}
}
}
template class TpMatrix<float>;
template class TpMatrix<double>;
} // namespace kaldi
// matrix/tp-matrix.h
// Copyright 2009-2011 Ondrej Glembek; Lukas Burget; Microsoft Corporation;
// Saarland University; Yanmin Qian; Haihua Xu
// 2013 Johns Hopkins Universith (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_MATRIX_TP_MATRIX_H_
#define KALDI_MATRIX_TP_MATRIX_H_
#include "matrix/packed-matrix.h"
namespace kaldi {
/// \addtogroup matrix_group
/// @{
template<typename Real> class TpMatrix;
/// @brief Packed symetric matrix class
template<typename Real>
class TpMatrix : public PackedMatrix<Real> {
friend class CuTpMatrix<float>;
friend class CuTpMatrix<double>;
public:
TpMatrix() : PackedMatrix<Real>() {}
explicit TpMatrix(MatrixIndexT r, MatrixResizeType resize_type = kSetZero)
: PackedMatrix<Real>(r, resize_type) {}
TpMatrix(const TpMatrix<Real>& orig) : PackedMatrix<Real>(orig) {}
/// Copy constructor from CUDA TpMatrix
/// This is defined in ../cudamatrix/cu-tp-matrix.cc
explicit TpMatrix(const CuTpMatrix<Real> &cu);
template<typename OtherReal> explicit TpMatrix(const TpMatrix<OtherReal>& orig)
: PackedMatrix<Real>(orig) {}
Real operator() (MatrixIndexT r, MatrixIndexT c) const {
if (static_cast<UnsignedMatrixIndexT>(c) >
static_cast<UnsignedMatrixIndexT>(r)) {
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(c) <
static_cast<UnsignedMatrixIndexT>(this->num_rows_));
return 0;
}
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
static_cast<UnsignedMatrixIndexT>(this->num_rows_));
// c<=r now so don't have to check c.
return *(this->data_ + (r*(r+1)) / 2 + c);
// Duplicating code from PackedMatrix.h
}
Real &operator() (MatrixIndexT r, MatrixIndexT c) {
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
static_cast<UnsignedMatrixIndexT>(this->num_rows_));
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(c) <=
static_cast<UnsignedMatrixIndexT>(r) &&
"you cannot access the upper triangle of TpMatrix using "
"a non-const matrix object.");
return *(this->data_ + (r*(r+1)) / 2 + c);
// Duplicating code from PackedMatrix.h
}
// Note: Cholesky may throw KaldiFatalError.
void Cholesky(const SpMatrix<Real>& orig);
void Invert();
// Inverts in double precision.
void InvertDouble() {
TpMatrix<double> dmat(*this);
dmat.Invert();
(*this).CopyFromTp(dmat);
}
/// Shallow swap
void Swap(TpMatrix<Real> *other);
/// Returns the determinant of the matrix (product of diagonals)
Real Determinant();
/// CopyFromMat copies the lower triangle of M into *this
/// (or the upper triangle, if Trans == kTrans).
void CopyFromMat(const MatrixBase<Real> &M,
MatrixTransposeType Trans = kNoTrans);
/// This is implemented in ../cudamatrix/cu-tp-matrix.cc
void CopyFromMat(const CuTpMatrix<Real> &other);
/// CopyFromTp copies another triangular matrix into this one.
void CopyFromTp(const TpMatrix<Real> &other) {
PackedMatrix<Real>::CopyFromPacked(other);
}
template<typename OtherReal> void CopyFromTp(const TpMatrix<OtherReal> &other) {
PackedMatrix<Real>::CopyFromPacked(other);
}
/// AddTp does *this += alpha * M.
void AddTp(const Real alpha, const TpMatrix<Real> &M) {
this->AddPacked(alpha, M);
}
TpMatrix<Real>& operator=(const TpMatrix<Real> &other) {
PackedMatrix<Real>::operator=(other);
return *this;
}
using PackedMatrix<Real>::Scale;
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type = kSetZero) {
PackedMatrix<Real>::Resize(nRows, resize_type);
}
};
/// @} end of "addtogroup matrix_group".
} // namespace kaldi
#endif
...@@ -754,53 +754,53 @@ class TokenVectorHolder { ...@@ -754,53 +754,53 @@ class TokenVectorHolder {
}; };
class HtkMatrixHolder { //class HtkMatrixHolder {
public: //public:
typedef std::pair<Matrix<BaseFloat>, HtkHeader> T; //typedef std::pair<Matrix<BaseFloat>, HtkHeader> T;
HtkMatrixHolder() {} //HtkMatrixHolder() {}
static bool Write(std::ostream &os, bool binary, const T &t) { //static bool Write(std::ostream &os, bool binary, const T &t) {
if (!binary) //if (!binary)
KALDI_ERR << "Non-binary HTK-format write not supported."; //KALDI_ERR << "Non-binary HTK-format write not supported.";
bool ans = WriteHtk(os, t.first, t.second); //bool ans = WriteHtk(os, t.first, t.second);
if (!ans) //if (!ans)
KALDI_WARN << "Error detected writing HTK-format matrix."; //KALDI_WARN << "Error detected writing HTK-format matrix.";
return ans; //return ans;
} //}
void Clear() { t_.first.Resize(0, 0); } //void Clear() { t_.first.Resize(0, 0); }
// Reads into the holder. //// Reads into the holder.
bool Read(std::istream &is) { //bool Read(std::istream &is) {
bool ans = ReadHtk(is, &t_.first, &t_.second); //bool ans = ReadHtk(is, &t_.first, &t_.second);
if (!ans) { //if (!ans) {
KALDI_WARN << "Error detected reading HTK-format matrix."; //KALDI_WARN << "Error detected reading HTK-format matrix.";
return false; //return false;
} //}
return ans; //return ans;
} //}
// HTK-format matrices only read in binary. //// HTK-format matrices only read in binary.
static bool IsReadInBinary() { return true; } //static bool IsReadInBinary() { return true; }
T &Value() { return t_; } //T &Value() { return t_; }
void Swap(HtkMatrixHolder *other) { //void Swap(HtkMatrixHolder *other) {
t_.first.Swap(&(other->t_.first)); //t_.first.Swap(&(other->t_.first));
std::swap(t_.second, other->t_.second); //std::swap(t_.second, other->t_.second);
} //}
bool ExtractRange(const HtkMatrixHolder &other, //bool ExtractRange(const HtkMatrixHolder &other,
const std::string &range) { //const std::string &range) {
KALDI_ERR << "ExtractRange is not defined for this type of holder."; //KALDI_ERR << "ExtractRange is not defined for this type of holder.";
return false; //return false;
} //}
// Default destructor. //// Default destructor.
private: //private:
KALDI_DISALLOW_COPY_AND_ASSIGN(HtkMatrixHolder); //KALDI_DISALLOW_COPY_AND_ASSIGN(HtkMatrixHolder);
T t_; //T t_;
}; //};
// SphinxMatrixHolder can be used to read and write feature files in // SphinxMatrixHolder can be used to read and write feature files in
// CMU Sphinx format. 13-dimensional big-endian features are assumed. // CMU Sphinx format. 13-dimensional big-endian features are assumed.
...@@ -813,104 +813,104 @@ class HtkMatrixHolder { ...@@ -813,104 +813,104 @@ class HtkMatrixHolder {
// be no problem, because the usage help of Sphinx' "wave2feat" for example // be no problem, because the usage help of Sphinx' "wave2feat" for example
// says that Sphinx features are always big endian. // says that Sphinx features are always big endian.
// Note: the kFeatDim defaults to 13, see forward declaration in kaldi-holder.h // Note: the kFeatDim defaults to 13, see forward declaration in kaldi-holder.h
template<int kFeatDim> class SphinxMatrixHolder { //template<int kFeatDim> class SphinxMatrixHolder {
public: //public:
typedef Matrix<BaseFloat> T; //typedef Matrix<BaseFloat> T;
SphinxMatrixHolder() {} //SphinxMatrixHolder() {}
void Clear() { feats_.Resize(0, 0); } //void Clear() { feats_.Resize(0, 0); }
// Writes Sphinx-format features //// Writes Sphinx-format features
static bool Write(std::ostream &os, bool binary, const T &m) { //static bool Write(std::ostream &os, bool binary, const T &m) {
if (!binary) { //if (!binary) {
KALDI_WARN << "SphinxMatrixHolder can't write Sphinx features in text "; //KALDI_WARN << "SphinxMatrixHolder can't write Sphinx features in text ";
return false; //return false;
} //}
int32 size = m.NumRows() * m.NumCols(); //int32 size = m.NumRows() * m.NumCols();
if (MachineIsLittleEndian()) //if (MachineIsLittleEndian())
KALDI_SWAP4(size); //KALDI_SWAP4(size);
// write the header //// write the header
os.write(reinterpret_cast<char*> (&size), sizeof(size)); //os.write(reinterpret_cast<char*> (&size), sizeof(size));
for (MatrixIndexT i = 0; i < m.NumRows(); i++) { //for (MatrixIndexT i = 0; i < m.NumRows(); i++) {
std::vector<float32> tmp(m.NumCols()); //std::vector<float32> tmp(m.NumCols());
for (MatrixIndexT j = 0; j < m.NumCols(); j++) { //for (MatrixIndexT j = 0; j < m.NumCols(); j++) {
tmp[j] = static_cast<float32>(m(i, j)); //tmp[j] = static_cast<float32>(m(i, j));
if (MachineIsLittleEndian()) //if (MachineIsLittleEndian())
KALDI_SWAP4(tmp[j]); //KALDI_SWAP4(tmp[j]);
} //}
os.write(reinterpret_cast<char*>(&(tmp[0])), //os.write(reinterpret_cast<char*>(&(tmp[0])),
tmp.size() * 4); //tmp.size() * 4);
} //}
return true; //return true;
} //}
// Reads the features into a Kaldi Matrix //// Reads the features into a Kaldi Matrix
bool Read(std::istream &is) { //bool Read(std::istream &is) {
int32 nmfcc; //int32 nmfcc;
is.read(reinterpret_cast<char*> (&nmfcc), sizeof(nmfcc)); //is.read(reinterpret_cast<char*> (&nmfcc), sizeof(nmfcc));
if (MachineIsLittleEndian()) //if (MachineIsLittleEndian())
KALDI_SWAP4(nmfcc); //KALDI_SWAP4(nmfcc);
KALDI_VLOG(2) << "#feats: " << nmfcc; //KALDI_VLOG(2) << "#feats: " << nmfcc;
int32 nfvec = nmfcc / kFeatDim; //int32 nfvec = nmfcc / kFeatDim;
if ((nmfcc % kFeatDim) != 0) { //if ((nmfcc % kFeatDim) != 0) {
KALDI_WARN << "Sphinx feature count is inconsistent with vector length "; //KALDI_WARN << "Sphinx feature count is inconsistent with vector length ";
return false; //return false;
} //}
feats_.Resize(nfvec, kFeatDim); //feats_.Resize(nfvec, kFeatDim);
for (MatrixIndexT i = 0; i < feats_.NumRows(); i++) { //for (MatrixIndexT i = 0; i < feats_.NumRows(); i++) {
if (sizeof(BaseFloat) == sizeof(float32)) { //if (sizeof(BaseFloat) == sizeof(float32)) {
is.read(reinterpret_cast<char*> (feats_.RowData(i)), //is.read(reinterpret_cast<char*> (feats_.RowData(i)),
kFeatDim * sizeof(float32)); //kFeatDim * sizeof(float32));
if (!is.good()) { //if (!is.good()) {
KALDI_WARN << "Unexpected error/EOF while reading Sphinx features "; //KALDI_WARN << "Unexpected error/EOF while reading Sphinx features ";
return false; //return false;
} //}
if (MachineIsLittleEndian()) { //if (MachineIsLittleEndian()) {
for (MatrixIndexT j = 0; j < kFeatDim; j++) //for (MatrixIndexT j = 0; j < kFeatDim; j++)
KALDI_SWAP4(feats_(i, j)); //KALDI_SWAP4(feats_(i, j));
} //}
} else { // KALDI_DOUBLEPRECISION=1 //} else { // KALDI_DOUBLEPRECISION=1
float32 tmp[kFeatDim]; //float32 tmp[kFeatDim];
is.read(reinterpret_cast<char*> (tmp), sizeof(tmp)); //is.read(reinterpret_cast<char*> (tmp), sizeof(tmp));
if (!is.good()) { //if (!is.good()) {
KALDI_WARN << "Unexpected error/EOF while reading Sphinx features "; //KALDI_WARN << "Unexpected error/EOF while reading Sphinx features ";
return false; //return false;
} //}
for (MatrixIndexT j = 0; j < kFeatDim; j++) { //for (MatrixIndexT j = 0; j < kFeatDim; j++) {
if (MachineIsLittleEndian()) //if (MachineIsLittleEndian())
KALDI_SWAP4(tmp[j]); //KALDI_SWAP4(tmp[j]);
feats_(i, j) = static_cast<BaseFloat>(tmp[j]); //feats_(i, j) = static_cast<BaseFloat>(tmp[j]);
} //}
} //}
} //}
return true; //return true;
} //}
// Only read in binary //// Only read in binary
static bool IsReadInBinary() { return true; } //static bool IsReadInBinary() { return true; }
T &Value() { return feats_; } //T &Value() { return feats_; }
void Swap(SphinxMatrixHolder *other) { //void Swap(SphinxMatrixHolder *other) {
feats_.Swap(&(other->feats_)); //feats_.Swap(&(other->feats_));
} //}
bool ExtractRange(const SphinxMatrixHolder &other, //bool ExtractRange(const SphinxMatrixHolder &other,
const std::string &range) { //const std::string &range) {
KALDI_ERR << "ExtractRange is not defined for this type of holder."; //KALDI_ERR << "ExtractRange is not defined for this type of holder.";
return false; //return false;
} //}
private: //private:
KALDI_DISALLOW_COPY_AND_ASSIGN(SphinxMatrixHolder); //KALDI_DISALLOW_COPY_AND_ASSIGN(SphinxMatrixHolder);
T feats_; //T feats_;
}; //};
/// @} end "addtogroup holders" /// @} end "addtogroup holders"
......
...@@ -85,7 +85,7 @@ bool ParseMatrixRangeSpecifier(const std::string &range, ...@@ -85,7 +85,7 @@ bool ParseMatrixRangeSpecifier(const std::string &range,
return status; return status;
} }
bool ExtractObjectRange(const GeneralMatrix &input, const std::string &range, /*bool ExtractObjectRange(const GeneralMatrix &input, const std::string &range,
GeneralMatrix *output) { GeneralMatrix *output) {
// We just inspect input's type and forward to the correct implementation // We just inspect input's type and forward to the correct implementation
// if available. For kSparseMatrix, we do just fairly inefficient conversion // if available. For kSparseMatrix, we do just fairly inefficient conversion
...@@ -135,6 +135,7 @@ template bool ExtractObjectRange(const CompressedMatrix &, const std::string &, ...@@ -135,6 +135,7 @@ template bool ExtractObjectRange(const CompressedMatrix &, const std::string &,
template bool ExtractObjectRange(const CompressedMatrix &, const std::string &, template bool ExtractObjectRange(const CompressedMatrix &, const std::string &,
Matrix<double> *); Matrix<double> *);
*/
template<class Real> template<class Real>
bool ExtractObjectRange(const Matrix<Real> &input, const std::string &range, bool ExtractObjectRange(const Matrix<Real> &input, const std::string &range,
Matrix<Real> *output) { Matrix<Real> *output) {
......
...@@ -27,7 +27,6 @@ ...@@ -27,7 +27,6 @@
#include "util/kaldi-io.h" #include "util/kaldi-io.h"
#include "util/text-utils.h" #include "util/text-utils.h"
#include "matrix/kaldi-vector.h" #include "matrix/kaldi-vector.h"
#include "matrix/sparse-matrix.h"
namespace kaldi { namespace kaldi {
...@@ -214,10 +213,10 @@ class TokenVectorHolder; ...@@ -214,10 +213,10 @@ class TokenVectorHolder;
/// A class for reading/writing HTK-format matrices. /// A class for reading/writing HTK-format matrices.
/// T == std::pair<Matrix<BaseFloat>, HtkHeader> /// T == std::pair<Matrix<BaseFloat>, HtkHeader>
class HtkMatrixHolder; //class HtkMatrixHolder;
/// A class for reading/writing Sphinx format matrices. /// A class for reading/writing Sphinx format matrices.
template<int kFeatDim = 13> class SphinxMatrixHolder; //template<int kFeatDim = 13> class SphinxMatrixHolder;
/// This templated function exists so that we can write .scp files with /// This templated function exists so that we can write .scp files with
/// 'object ranges' specified: the canonical example is a [first:last] range /// 'object ranges' specified: the canonical example is a [first:last] range
...@@ -249,15 +248,15 @@ bool ExtractObjectRange(const Vector<Real> &input, const std::string &range, ...@@ -249,15 +248,15 @@ bool ExtractObjectRange(const Vector<Real> &input, const std::string &range,
Vector<Real> *output); Vector<Real> *output);
/// GeneralMatrix is always of type BaseFloat /// GeneralMatrix is always of type BaseFloat
bool ExtractObjectRange(const GeneralMatrix &input, const std::string &range, //bool ExtractObjectRange(const GeneralMatrix &input, const std::string &range,
GeneralMatrix *output); // GeneralMatrix *output);
/// CompressedMatrix is always of the type BaseFloat but it is more /// CompressedMatrix is always of the type BaseFloat but it is more
/// efficient to provide template as it uses CompressedMatrix's own /// efficient to provide template as it uses CompressedMatrix's own
/// conversion to Matrix<Real> /// conversion to Matrix<Real>
template <class Real> //template <class Real>
bool ExtractObjectRange(const CompressedMatrix &input, const std::string &range, //bool ExtractObjectRange(const CompressedMatrix &input, const std::string &range,
Matrix<Real> *output); // Matrix<Real> *output);
// In SequentialTableReaderScriptImpl and RandomAccessTableReaderScriptImpl, for // In SequentialTableReaderScriptImpl and RandomAccessTableReaderScriptImpl, for
// cases where the scp contained 'range specifiers' (things in square brackets // cases where the scp contained 'range specifiers' (things in square brackets
......
...@@ -23,7 +23,8 @@ ...@@ -23,7 +23,8 @@
#include "base/kaldi-common.h" #include "base/kaldi-common.h"
#include "util/kaldi-table.h" #include "util/kaldi-table.h"
#include "util/kaldi-holder.h" #include "util/kaldi-holder.h"
#include "matrix/matrix-lib.h" #include "matrix/kaldi-matrix.h"
#include "matrix/kaldi-vector.h"
namespace kaldi { namespace kaldi {
...@@ -51,8 +52,8 @@ typedef RandomAccessTableReader<KaldiObjectHolder<Matrix<double> > > ...@@ -51,8 +52,8 @@ typedef RandomAccessTableReader<KaldiObjectHolder<Matrix<double> > >
typedef RandomAccessTableReaderMapped<KaldiObjectHolder<Matrix<double> > > typedef RandomAccessTableReaderMapped<KaldiObjectHolder<Matrix<double> > >
RandomAccessDoubleMatrixReaderMapped; RandomAccessDoubleMatrixReaderMapped;
typedef TableWriter<KaldiObjectHolder<CompressedMatrix> > //typedef TableWriter<KaldiObjectHolder<CompressedMatrix> >
CompressedMatrixWriter; //CompressedMatrixWriter;
typedef TableWriter<KaldiObjectHolder<Vector<BaseFloat> > > typedef TableWriter<KaldiObjectHolder<Vector<BaseFloat> > >
BaseFloatVectorWriter; BaseFloatVectorWriter;
...@@ -70,39 +71,39 @@ typedef SequentialTableReader<KaldiObjectHolder<Vector<double> > > ...@@ -70,39 +71,39 @@ typedef SequentialTableReader<KaldiObjectHolder<Vector<double> > >
typedef RandomAccessTableReader<KaldiObjectHolder<Vector<double> > > typedef RandomAccessTableReader<KaldiObjectHolder<Vector<double> > >
RandomAccessDoubleVectorReader; RandomAccessDoubleVectorReader;
typedef TableWriter<KaldiObjectHolder<CuMatrix<BaseFloat> > > //typedef TableWriter<KaldiObjectHolder<CuMatrix<BaseFloat> > >
BaseFloatCuMatrixWriter; //BaseFloatCuMatrixWriter;
typedef SequentialTableReader<KaldiObjectHolder<CuMatrix<BaseFloat> > > //typedef SequentialTableReader<KaldiObjectHolder<CuMatrix<BaseFloat> > >
SequentialBaseFloatCuMatrixReader; //SequentialBaseFloatCuMatrixReader;
typedef RandomAccessTableReader<KaldiObjectHolder<CuMatrix<BaseFloat> > > //typedef RandomAccessTableReader<KaldiObjectHolder<CuMatrix<BaseFloat> > >
RandomAccessBaseFloatCuMatrixReader; //RandomAccessBaseFloatCuMatrixReader;
typedef RandomAccessTableReaderMapped<KaldiObjectHolder<CuMatrix<BaseFloat> > > //typedef RandomAccessTableReaderMapped<KaldiObjectHolder<CuMatrix<BaseFloat> > >
RandomAccessBaseFloatCuMatrixReaderMapped; //RandomAccessBaseFloatCuMatrixReaderMapped;
typedef TableWriter<KaldiObjectHolder<CuMatrix<double> > > //typedef TableWriter<KaldiObjectHolder<CuMatrix<double> > >
DoubleCuMatrixWriter; //DoubleCuMatrixWriter;
typedef SequentialTableReader<KaldiObjectHolder<CuMatrix<double> > > //typedef SequentialTableReader<KaldiObjectHolder<CuMatrix<double> > >
SequentialDoubleCuMatrixReader; //SequentialDoubleCuMatrixReader;
typedef RandomAccessTableReader<KaldiObjectHolder<CuMatrix<double> > > //typedef RandomAccessTableReader<KaldiObjectHolder<CuMatrix<double> > >
RandomAccessDoubleCuMatrixReader; //RandomAccessDoubleCuMatrixReader;
typedef RandomAccessTableReaderMapped<KaldiObjectHolder<CuMatrix<double> > > //typedef RandomAccessTableReaderMapped<KaldiObjectHolder<CuMatrix<double> > >
RandomAccessDoubleCuMatrixReaderMapped; //RandomAccessDoubleCuMatrixReaderMapped;
typedef TableWriter<KaldiObjectHolder<CuVector<BaseFloat> > > //typedef TableWriter<KaldiObjectHolder<CuVector<BaseFloat> > >
BaseFloatCuVectorWriter; //BaseFloatCuVectorWriter;
typedef SequentialTableReader<KaldiObjectHolder<CuVector<BaseFloat> > > //typedef SequentialTableReader<KaldiObjectHolder<CuVector<BaseFloat> > >
SequentialBaseFloatCuVectorReader; //SequentialBaseFloatCuVectorReader;
typedef RandomAccessTableReader<KaldiObjectHolder<CuVector<BaseFloat> > > //typedef RandomAccessTableReader<KaldiObjectHolder<CuVector<BaseFloat> > >
RandomAccessBaseFloatCuVectorReader; //RandomAccessBaseFloatCuVectorReader;
typedef RandomAccessTableReaderMapped<KaldiObjectHolder<CuVector<BaseFloat> > > //typedef RandomAccessTableReaderMapped<KaldiObjectHolder<CuVector<BaseFloat> > >
RandomAccessBaseFloatCuVectorReaderMapped; //RandomAccessBaseFloatCuVectorReaderMapped;
typedef TableWriter<KaldiObjectHolder<CuVector<double> > > //typedef TableWriter<KaldiObjectHolder<CuVector<double> > >
DoubleCuVectorWriter; //DoubleCuVectorWriter;
typedef SequentialTableReader<KaldiObjectHolder<CuVector<double> > > //typedef SequentialTableReader<KaldiObjectHolder<CuVector<double> > >
SequentialDoubleCuVectorReader; //SequentialDoubleCuVectorReader;
typedef RandomAccessTableReader<KaldiObjectHolder<CuVector<double> > > //typedef RandomAccessTableReader<KaldiObjectHolder<CuVector<double> > >
RandomAccessDoubleCuVectorReader; //RandomAccessDoubleCuVectorReader;
typedef TableWriter<BasicHolder<int32> > Int32Writer; typedef TableWriter<BasicHolder<int32> > Int32Writer;
...@@ -150,8 +151,6 @@ typedef TableWriter<BasicHolder<bool> > BoolWriter; ...@@ -150,8 +151,6 @@ typedef TableWriter<BasicHolder<bool> > BoolWriter;
typedef SequentialTableReader<BasicHolder<bool> > SequentialBoolReader; typedef SequentialTableReader<BasicHolder<bool> > SequentialBoolReader;
typedef RandomAccessTableReader<BasicHolder<bool> > RandomAccessBoolReader; typedef RandomAccessTableReader<BasicHolder<bool> > RandomAccessBoolReader;
/// TokenWriter is a writer specialized for std::string where the strings /// TokenWriter is a writer specialized for std::string where the strings
/// are nonempty and whitespace-free. T == std::string /// are nonempty and whitespace-free. T == std::string
typedef TableWriter<TokenHolder> TokenWriter; typedef TableWriter<TokenHolder> TokenWriter;
...@@ -169,14 +168,14 @@ typedef RandomAccessTableReader<TokenVectorHolder> ...@@ -169,14 +168,14 @@ typedef RandomAccessTableReader<TokenVectorHolder>
RandomAccessTokenVectorReader; RandomAccessTokenVectorReader;
typedef TableWriter<KaldiObjectHolder<GeneralMatrix> > //typedef TableWriter<KaldiObjectHolder<GeneralMatrix> >
GeneralMatrixWriter; // GeneralMatrixWriter;
typedef SequentialTableReader<KaldiObjectHolder<GeneralMatrix> > //typedef SequentialTableReader<KaldiObjectHolder<GeneralMatrix> >
SequentialGeneralMatrixReader; // SequentialGeneralMatrixReader;
typedef RandomAccessTableReader<KaldiObjectHolder<GeneralMatrix> > //typedef RandomAccessTableReader<KaldiObjectHolder<GeneralMatrix> >
RandomAccessGeneralMatrixReader; // RandomAccessGeneralMatrixReader;
typedef RandomAccessTableReaderMapped<KaldiObjectHolder<GeneralMatrix> > //typedef RandomAccessTableReaderMapped<KaldiObjectHolder<GeneralMatrix> >
RandomAccessGeneralMatrixReaderMapped; // RandomAccessGeneralMatrixReaderMapped;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册