提交 f9cd0e85 编写于 作者: Z zhouyang 提交者: SmileGoat

add feat of kaldi

上级 49076280
......@@ -49,29 +49,32 @@ FetchContent_Declare(
)
FetchContent_MakeAvailable(libsndfile)
add_subdirectory(speechx)
#openblas
#set(OpenBLAS_INSTALL_PREFIX ${fc_patch}/OpenBLAS)
#set(OpenBLAS_SOURCE_DIR ${fc_patch}/OpenBLAS-src)
#ExternalProject_Add(
# OpenBLAS
# GIT_REPOSITORY https://github.com/xianyi/OpenBLAS
# GIT_TAG v0.3.13
# GIT_SHALLOW TRUE
# GIT_PROGRESS TRUE
# CONFIGURE_COMMAND ""
# BUILD_IN_SOURCE TRUE
# BUILD_COMMAND make USE_LOCKING=1 USE_THREAD=0
# INSTALL_COMMAND make PREFIX=${OpenBLAS_INSTALL_PREFIX} install
# UPDATE_DISCONNECTED TRUE
#)
###############################################################################
# Add local library
###############################################################################
# system lib
find_package()
#find_package()
# if dir have CmakeLists.txt
add_subdirectory()
#add_subdirectory(speechx)
# if dir do not have CmakeLists.txt
add_library(lib_name STATIC file.cc)
target_link_libraries(lib_name item0 item1)
add_dependencies(lib_name depend-target)
###############################################################################
# Library installation
###############################################################################
install()
###############################################################################
# Build binary file
###############################################################################
add_executable()
target_link_libraries()
#add_library(lib_name STATIC file.cc)
#target_link_libraries(lib_name item0 item1)
#add_dependencies(lib_name depend-target)
\ No newline at end of file
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
project(speechx LANGUAGES CXX)
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/openblas)
include_directories(
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/kaldi
)
add_subdirectory(kaldi)
add_executable(mfcc-test codelab/feat_test/feature-mfcc-test.cc)
target_link_libraries(mfcc-test kaldi-mfcc)
# codelab
This directory is here for testing some funcitons temporaril.
此差异已折叠。
project(kaldi)
add_subdirectory(base)
add_subdirectory(util)
add_subdirectory(feat)
add_subdirectory(matrix)
add_library(kaldi-base
io-funcs.cc
kaldi-error.cc
kaldi-math.cc
kaldi-utils.cc
timer.cc)
\ No newline at end of file
// base/io-funcs-inl.h
// Copyright 2009-2011 Microsoft Corporation; Saarland University;
// Jan Silovsky; Yanmin Qian;
// Johns Hopkins University (Author: Daniel Povey)
// 2016 Xiaohui Zhang
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_BASE_IO_FUNCS_INL_H_
#define KALDI_BASE_IO_FUNCS_INL_H_ 1
// Do not include this file directly. It is included by base/io-funcs.h
#include <limits>
#include <vector>
namespace kaldi {
// Template that covers integers.
template<class T> void WriteBasicType(std::ostream &os,
bool binary, T t) {
// Compile time assertion that this is not called with a wrong type.
KALDI_ASSERT_IS_INTEGER_TYPE(T);
if (binary) {
char len_c = (std::numeric_limits<T>::is_signed ? 1 : -1)
* static_cast<char>(sizeof(t));
os.put(len_c);
os.write(reinterpret_cast<const char *>(&t), sizeof(t));
} else {
if (sizeof(t) == 1)
os << static_cast<int16>(t) << " ";
else
os << t << " ";
}
if (os.fail()) {
KALDI_ERR << "Write failure in WriteBasicType.";
}
}
// Template that covers integers.
template<class T> inline void ReadBasicType(std::istream &is,
bool binary, T *t) {
KALDI_PARANOID_ASSERT(t != NULL);
// Compile time assertion that this is not called with a wrong type.
KALDI_ASSERT_IS_INTEGER_TYPE(T);
if (binary) {
int len_c_in = is.get();
if (len_c_in == -1)
KALDI_ERR << "ReadBasicType: encountered end of stream.";
char len_c = static_cast<char>(len_c_in), len_c_expected
= (std::numeric_limits<T>::is_signed ? 1 : -1)
* static_cast<char>(sizeof(*t));
if (len_c != len_c_expected) {
KALDI_ERR << "ReadBasicType: did not get expected integer type, "
<< static_cast<int>(len_c)
<< " vs. " << static_cast<int>(len_c_expected)
<< ". You can change this code to successfully"
<< " read it later, if needed.";
// insert code here to read "wrong" type. Might have a switch statement.
}
is.read(reinterpret_cast<char *>(t), sizeof(*t));
} else {
if (sizeof(*t) == 1) {
int16 i;
is >> i;
*t = i;
} else {
is >> *t;
}
}
if (is.fail()) {
KALDI_ERR << "Read failure in ReadBasicType, file position is "
<< is.tellg() << ", next char is " << is.peek();
}
}
// Template that covers integers.
template<class T>
inline void WriteIntegerPairVector(std::ostream &os, bool binary,
const std::vector<std::pair<T, T> > &v) {
// Compile time assertion that this is not called with a wrong type.
KALDI_ASSERT_IS_INTEGER_TYPE(T);
if (binary) {
char sz = sizeof(T); // this is currently just a check.
os.write(&sz, 1);
int32 vecsz = static_cast<int32>(v.size());
KALDI_ASSERT((size_t)vecsz == v.size());
os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
if (vecsz != 0) {
os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz * 2);
}
} else {
// focus here is on prettiness of text form rather than
// efficiency of reading-in.
// reading-in is dominated by low-level operations anyway:
// for efficiency use binary.
os << "[ ";
typename std::vector<std::pair<T, T> >::const_iterator iter = v.begin(),
end = v.end();
for (; iter != end; ++iter) {
if (sizeof(T) == 1)
os << static_cast<int16>(iter->first) << ','
<< static_cast<int16>(iter->second) << ' ';
else
os << iter->first << ','
<< iter->second << ' ';
}
os << "]\n";
}
if (os.fail()) {
KALDI_ERR << "Write failure in WriteIntegerPairVector.";
}
}
// Template that covers integers.
template<class T>
inline void ReadIntegerPairVector(std::istream &is, bool binary,
std::vector<std::pair<T, T> > *v) {
KALDI_ASSERT_IS_INTEGER_TYPE(T);
KALDI_ASSERT(v != NULL);
if (binary) {
int sz = is.peek();
if (sz == sizeof(T)) {
is.get();
} else { // this is currently just a check.
KALDI_ERR << "ReadIntegerPairVector: expected to see type of size "
<< sizeof(T) << ", saw instead " << sz << ", at file position "
<< is.tellg();
}
int32 vecsz;
is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
if (is.fail() || vecsz < 0) goto bad;
v->resize(vecsz);
if (vecsz > 0) {
is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T)*vecsz*2);
}
} else {
std::vector<std::pair<T, T> > tmp_v; // use temporary so v doesn't use extra memory
// due to resizing.
is >> std::ws;
if (is.peek() != static_cast<int>('[')) {
KALDI_ERR << "ReadIntegerPairVector: expected to see [, saw "
<< is.peek() << ", at file position " << is.tellg();
}
is.get(); // consume the '['.
is >> std::ws; // consume whitespace.
while (is.peek() != static_cast<int>(']')) {
if (sizeof(T) == 1) { // read/write chars as numbers.
int16 next_t1, next_t2;
is >> next_t1;
if (is.fail()) goto bad;
if (is.peek() != static_cast<int>(','))
KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
<< is.peek() << ", at file position " << is.tellg();
is.get(); // consume the ','.
is >> next_t2 >> std::ws;
if (is.fail()) goto bad;
else
tmp_v.push_back(std::make_pair<T, T>((T)next_t1, (T)next_t2));
} else {
T next_t1, next_t2;
is >> next_t1;
if (is.fail()) goto bad;
if (is.peek() != static_cast<int>(','))
KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
<< is.peek() << ", at file position " << is.tellg();
is.get(); // consume the ','.
is >> next_t2 >> std::ws;
if (is.fail()) goto bad;
else
tmp_v.push_back(std::pair<T, T>(next_t1, next_t2));
}
}
is.get(); // get the final ']'.
*v = tmp_v; // could use std::swap to use less temporary memory, but this
// uses less permanent memory.
}
if (!is.fail()) return;
bad:
KALDI_ERR << "ReadIntegerPairVector: read failure at file position "
<< is.tellg();
}
template<class T> inline void WriteIntegerVector(std::ostream &os, bool binary,
const std::vector<T> &v) {
// Compile time assertion that this is not called with a wrong type.
KALDI_ASSERT_IS_INTEGER_TYPE(T);
if (binary) {
char sz = sizeof(T); // this is currently just a check.
os.write(&sz, 1);
int32 vecsz = static_cast<int32>(v.size());
KALDI_ASSERT((size_t)vecsz == v.size());
os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
if (vecsz != 0) {
os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T)*vecsz);
}
} else {
// focus here is on prettiness of text form rather than
// efficiency of reading-in.
// reading-in is dominated by low-level operations anyway:
// for efficiency use binary.
os << "[ ";
typename std::vector<T>::const_iterator iter = v.begin(), end = v.end();
for (; iter != end; ++iter) {
if (sizeof(T) == 1)
os << static_cast<int16>(*iter) << " ";
else
os << *iter << " ";
}
os << "]\n";
}
if (os.fail()) {
KALDI_ERR << "Write failure in WriteIntegerVector.";
}
}
template<class T> inline void ReadIntegerVector(std::istream &is,
bool binary,
std::vector<T> *v) {
KALDI_ASSERT_IS_INTEGER_TYPE(T);
KALDI_ASSERT(v != NULL);
if (binary) {
int sz = is.peek();
if (sz == sizeof(T)) {
is.get();
} else { // this is currently just a check.
KALDI_ERR << "ReadIntegerVector: expected to see type of size "
<< sizeof(T) << ", saw instead " << sz << ", at file position "
<< is.tellg();
}
int32 vecsz;
is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
if (is.fail() || vecsz < 0) goto bad;
v->resize(vecsz);
if (vecsz > 0) {
is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T)*vecsz);
}
} else {
std::vector<T> tmp_v; // use temporary so v doesn't use extra memory
// due to resizing.
is >> std::ws;
if (is.peek() != static_cast<int>('[')) {
KALDI_ERR << "ReadIntegerVector: expected to see [, saw "
<< is.peek() << ", at file position " << is.tellg();
}
is.get(); // consume the '['.
is >> std::ws; // consume whitespace.
while (is.peek() != static_cast<int>(']')) {
if (sizeof(T) == 1) { // read/write chars as numbers.
int16 next_t;
is >> next_t >> std::ws;
if (is.fail()) goto bad;
else
tmp_v.push_back((T)next_t);
} else {
T next_t;
is >> next_t >> std::ws;
if (is.fail()) goto bad;
else
tmp_v.push_back(next_t);
}
}
is.get(); // get the final ']'.
*v = tmp_v; // could use std::swap to use less temporary memory, but this
// uses less permanent memory.
}
if (!is.fail()) return;
bad:
KALDI_ERR << "ReadIntegerVector: read failure at file position "
<< is.tellg();
}
// Initialize an opened stream for writing by writing an optional binary
// header and modifying the floating-point precision.
inline void InitKaldiOutputStream(std::ostream &os, bool binary) {
// This does not throw exceptions (does not check for errors).
if (binary) {
os.put('\0');
os.put('B');
}
// Note, in non-binary mode we may at some point want to mess with
// the precision a bit.
// 7 is a bit more than the precision of float..
if (os.precision() < 7)
os.precision(7);
}
/// Initialize an opened stream for reading by detecting the binary header and
// setting the "binary" value appropriately.
inline bool InitKaldiInputStream(std::istream &is, bool *binary) {
// Sets the 'binary' variable.
// Throws exception in the very unusual situation that stream
// starts with '\0' but not then 'B'.
if (is.peek() == '\0') { // seems to be binary
is.get();
if (is.peek() != 'B') {
return false;
}
is.get();
*binary = true;
return true;
} else {
*binary = false;
return true;
}
}
} // end namespace kaldi.
#endif // KALDI_BASE_IO_FUNCS_INL_H_
// base/io-funcs.cc
// Copyright 2009-2011 Microsoft Corporation; Saarland University
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/io-funcs.h"
#include "base/kaldi-math.h"
namespace kaldi {
template<>
void WriteBasicType<bool>(std::ostream &os, bool binary, bool b) {
os << (b ? "T":"F");
if (!binary) os << " ";
if (os.fail())
KALDI_ERR << "Write failure in WriteBasicType<bool>";
}
template<>
void ReadBasicType<bool>(std::istream &is, bool binary, bool *b) {
KALDI_PARANOID_ASSERT(b != NULL);
if (!binary) is >> std::ws; // eat up whitespace.
char c = is.peek();
if (c == 'T') {
*b = true;
is.get();
} else if (c == 'F') {
*b = false;
is.get();
} else {
KALDI_ERR << "Read failure in ReadBasicType<bool>, file position is "
<< is.tellg() << ", next char is " << CharToString(c);
}
}
template<>
void WriteBasicType<float>(std::ostream &os, bool binary, float f) {
if (binary) {
char c = sizeof(f);
os.put(c);
os.write(reinterpret_cast<const char *>(&f), sizeof(f));
} else {
os << f << " ";
}
}
template<>
void WriteBasicType<double>(std::ostream &os, bool binary, double f) {
if (binary) {
char c = sizeof(f);
os.put(c);
os.write(reinterpret_cast<const char *>(&f), sizeof(f));
} else {
os << f << " ";
}
}
template<>
void ReadBasicType<float>(std::istream &is, bool binary, float *f) {
KALDI_PARANOID_ASSERT(f != NULL);
if (binary) {
double d;
int c = is.peek();
if (c == sizeof(*f)) {
is.get();
is.read(reinterpret_cast<char*>(f), sizeof(*f));
} else if (c == sizeof(d)) {
ReadBasicType(is, binary, &d);
*f = d;
} else {
KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
<< ", at file position " << is.tellg();
}
} else {
is >> *f;
}
if (is.fail()) {
KALDI_ERR << "ReadBasicType: failed to read, at file position "
<< is.tellg();
}
}
template<>
void ReadBasicType<double>(std::istream &is, bool binary, double *d) {
KALDI_PARANOID_ASSERT(d != NULL);
if (binary) {
float f;
int c = is.peek();
if (c == sizeof(*d)) {
is.get();
is.read(reinterpret_cast<char*>(d), sizeof(*d));
} else if (c == sizeof(f)) {
ReadBasicType(is, binary, &f);
*d = f;
} else {
KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
<< ", at file position " << is.tellg();
}
} else {
is >> *d;
}
if (is.fail()) {
KALDI_ERR << "ReadBasicType: failed to read, at file position "
<< is.tellg();
}
}
void CheckToken(const char *token) {
if (*token == '\0')
KALDI_ERR << "Token is empty (not a valid token)";
const char *orig_token = token;
while (*token != '\0') {
if (::isspace(*token))
KALDI_ERR << "Token is not a valid token (contains space): '"
<< orig_token << "'";
token++;
}
}
void WriteToken(std::ostream &os, bool binary, const char *token) {
// binary mode is ignored;
// we use space as termination character in either case.
KALDI_ASSERT(token != NULL);
CheckToken(token); // make sure it's valid (can be read back)
os << token << " ";
if (os.fail()) {
KALDI_ERR << "Write failure in WriteToken.";
}
}
int Peek(std::istream &is, bool binary) {
if (!binary) is >> std::ws; // eat up whitespace.
return is.peek();
}
void WriteToken(std::ostream &os, bool binary, const std::string & token) {
WriteToken(os, binary, token.c_str());
}
void ReadToken(std::istream &is, bool binary, std::string *str) {
KALDI_ASSERT(str != NULL);
if (!binary) is >> std::ws; // consume whitespace.
is >> *str;
if (is.fail()) {
KALDI_ERR << "ReadToken, failed to read token at file position "
<< is.tellg();
}
if (!isspace(is.peek())) {
KALDI_ERR << "ReadToken, expected space after token, saw instead "
<< CharToString(static_cast<char>(is.peek()))
<< ", at file position " << is.tellg();
}
is.get(); // consume the space.
}
int PeekToken(std::istream &is, bool binary) {
if (!binary) is >> std::ws; // consume whitespace.
bool read_bracket;
if (static_cast<char>(is.peek()) == '<') {
read_bracket = true;
is.get();
} else {
read_bracket = false;
}
int ans = is.peek();
if (read_bracket) {
if (!is.unget()) {
// Clear the bad bit. This code can be (and is in fact) reached, since the
// C++ standard does not guarantee that a call to unget() must succeed.
is.clear();
}
}
return ans;
}
void ExpectToken(std::istream &is, bool binary, const char *token) {
int pos_at_start = is.tellg();
KALDI_ASSERT(token != NULL);
CheckToken(token); // make sure it's valid (can be read back)
if (!binary) is >> std::ws; // consume whitespace.
std::string str;
is >> str;
is.get(); // consume the space.
if (is.fail()) {
KALDI_ERR << "Failed to read token [started at file position "
<< pos_at_start << "], expected " << token;
}
// The second half of the '&&' expression below is so that if we're expecting
// "<Foo>", we will accept "Foo>" instead. This is so that the model-reading
// code will tolerate errors in PeekToken where is.unget() failed; search for
// is.clear() in PeekToken() for an explanation.
if (strcmp(str.c_str(), token) != 0 &&
!(token[0] == '<' && strcmp(str.c_str(), token + 1) == 0)) {
KALDI_ERR << "Expected token \"" << token << "\", got instead \""
<< str <<"\".";
}
}
void ExpectToken(std::istream &is, bool binary, const std::string &token) {
ExpectToken(is, binary, token.c_str());
}
} // end namespace kaldi
// base/io-funcs.h
// Copyright 2009-2011 Microsoft Corporation; Saarland University;
// Jan Silovsky; Yanmin Qian
// 2016 Xiaohui Zhang
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_BASE_IO_FUNCS_H_
#define KALDI_BASE_IO_FUNCS_H_
// This header only contains some relatively low-level I/O functions.
// The full Kaldi I/O declarations are in ../util/kaldi-io.h
// and ../util/kaldi-table.h
// They were put in util/ in order to avoid making the Matrix library
// dependent on them.
#include <cctype>
#include <vector>
#include <string>
#include "base/kaldi-common.h"
#include "base/io-funcs-inl.h"
namespace kaldi {
/*
This comment describes the Kaldi approach to I/O. All objects can be written
and read in two modes: binary and text. In addition we want to make the I/O
work if we redefine the typedef "BaseFloat" between floats and doubles.
We also want to have control over whitespace in text mode without affecting
the meaning of the file, for pretty-printing purposes.
Errors are handled by throwing a KaldiFatalError exception.
For integer and floating-point types (and boolean values):
WriteBasicType(std::ostream &, bool binary, const T&);
ReadBasicType(std::istream &, bool binary, T*);
and we expect these functions to be defined in such a way that they work when
the type T changes between float and double, so you can read float into double
and vice versa]. Note that for efficiency and space-saving reasons, the Vector
and Matrix classes do not use these functions [but they preserve the type
interchangeability in their own way]
For a class (or struct) C:
class C {
..
Write(std::ostream &, bool binary, [possibly extra optional args for specific classes]) const;
Read(std::istream &, bool binary, [possibly extra optional args for specific classes]);
..
}
NOTE: The only actual optional args we used are the "add" arguments in
Vector/Matrix classes, which specify whether we should sum the data already
in the class with the data being read.
For types which are typedef's involving stl classes, I/O is as follows:
typedef std::vector<std::pair<A, B> > MyTypedefName;
The user should define something like:
WriteMyTypedefName(std::ostream &, bool binary, const MyTypedefName &t);
ReadMyTypedefName(std::ostream &, bool binary, MyTypedefName *t);
The user would have to write these functions.
For a type std::vector<T>:
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector<T> &v);
void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
For other types, e.g. vectors of pairs, the user should create a routine of the
type WriteMyTypedefName. This is to avoid introducing confusing templated functions;
we could easily create templated functions to handle most of these cases but they
would have to share the same name.
It also often happens that the user needs to write/read special tokens as part
of a file. These might be class headers, or separators/identifiers in the class.
We provide special functions for manipulating these. These special tokens must
be nonempty and must not contain any whitespace.
void WriteToken(std::ostream &os, bool binary, const char*);
void WriteToken(std::ostream &os, bool binary, const std::string & token);
int Peek(std::istream &is, bool binary);
void ReadToken(std::istream &is, bool binary, std::string *str);
void PeekToken(std::istream &is, bool binary, std::string *str);
WriteToken writes the token and one space (whether in binary or text mode).
Peek returns the first character of the next token, by consuming whitespace
(in text mode) and then returning the peek() character. It returns -1 at EOF;
it doesn't throw. It's useful if a class can have various forms based on
typedefs and virtual classes, and wants to know which version to read.
ReadToken allows the caller to obtain the next token. PeekToken works just
like ReadToken, but seeks back to the beginning of the token. A subsequent
call to ReadToken will read the same token again. This is useful when
different object types are written to the same file; using PeekToken one can
decide which of the objects to read.
There is currently no special functionality for writing/reading strings (where the strings
contain data rather than "special tokens" that are whitespace-free and nonempty). This is
because Kaldi is structured in such a way that strings don't appear, except as OpenFst symbol
table entries (and these have their own format).
NOTE: you should not call ReadIntegerType and WriteIntegerType with types,
such as int and size_t, that are machine-independent -- at least not
if you want your file formats to port between machines. Use int32 and
int64 where necessary. There is no way to detect this using compile-time
assertions because C++ only keeps track of the internal representation of
the type.
*/
/// \addtogroup io_funcs_basic
/// @{
/// WriteBasicType is the name of the write function for bool, integer types,
/// and floating-point types. They all throw on error.
template<class T> void WriteBasicType(std::ostream &os, bool binary, T t);
/// ReadBasicType is the name of the read function for bool, integer types,
/// and floating-point types. They all throw on error.
template<class T> void ReadBasicType(std::istream &is, bool binary, T *t);
// Declare specialization for bool.
template<>
void WriteBasicType<bool>(std::ostream &os, bool binary, bool b);
template <>
void ReadBasicType<bool>(std::istream &is, bool binary, bool *b);
// Declare specializations for float and double.
template<>
void WriteBasicType<float>(std::ostream &os, bool binary, float f);
template<>
void WriteBasicType<double>(std::ostream &os, bool binary, double f);
template<>
void ReadBasicType<float>(std::istream &is, bool binary, float *f);
template<>
void ReadBasicType<double>(std::istream &is, bool binary, double *f);
// Define ReadBasicType that accepts an "add" parameter to add to
// the destination. Caution: if used in Read functions, be careful
// to initialize the parameters concerned to zero in the default
// constructor.
template<class T>
inline void ReadBasicType(std::istream &is, bool binary, T *t, bool add) {
if (!add) {
ReadBasicType(is, binary, t);
} else {
T tmp = T(0);
ReadBasicType(is, binary, &tmp);
*t += tmp;
}
}
/// Function for writing STL vectors of integer types.
template<class T> inline void WriteIntegerVector(std::ostream &os, bool binary,
const std::vector<T> &v);
/// Function for reading STL vector of integer types.
template<class T> inline void ReadIntegerVector(std::istream &is, bool binary,
std::vector<T> *v);
/// Function for writing STL vectors of pairs of integer types.
template<class T>
inline void WriteIntegerPairVector(std::ostream &os, bool binary,
const std::vector<std::pair<T, T> > &v);
/// Function for reading STL vector of pairs of integer types.
template<class T>
inline void ReadIntegerPairVector(std::istream &is, bool binary,
std::vector<std::pair<T, T> > *v);
/// The WriteToken functions are for writing nonempty sequences of non-space
/// characters. They are not for general strings.
void WriteToken(std::ostream &os, bool binary, const char *token);
void WriteToken(std::ostream &os, bool binary, const std::string & token);
/// Peek consumes whitespace (if binary == false) and then returns the peek()
/// value of the stream.
int Peek(std::istream &is, bool binary);
/// ReadToken gets the next token and puts it in str (exception on failure). If
/// PeekToken() had been previously called, it is possible that the stream had
/// failed to unget the starting '<' character. In this case ReadToken() returns
/// the token string without the leading '<'. You must be prepared to handle
/// this case. ExpectToken() handles this internally, and is not affected.
void ReadToken(std::istream &is, bool binary, std::string *token);
/// PeekToken will return the first character of the next token, or -1 if end of
/// file. It's the same as Peek(), except if the first character is '<' it will
/// skip over it and will return the next character. It will attempt to unget
/// the '<' so the stream is where it was before you did PeekToken(), however,
/// this is not guaranteed (see ReadToken()).
int PeekToken(std::istream &is, bool binary);
/// ExpectToken tries to read in the given token, and throws an exception
/// on failure.
void ExpectToken(std::istream &is, bool binary, const char *token);
void ExpectToken(std::istream &is, bool binary, const std::string & token);
/// ExpectPretty attempts to read the text in "token", but only in non-binary
/// mode. Throws exception on failure. It expects an exact match except that
/// arbitrary whitespace matches arbitrary whitespace.
void ExpectPretty(std::istream &is, bool binary, const char *token);
void ExpectPretty(std::istream &is, bool binary, const std::string & token);
/// @} end "addtogroup io_funcs_basic"
/// InitKaldiOutputStream initializes an opened stream for writing by writing an
/// optional binary header and modifying the floating-point precision; it will
/// typically not be called by users directly.
inline void InitKaldiOutputStream(std::ostream &os, bool binary);
/// InitKaldiInputStream initializes an opened stream for reading by detecting
/// the binary header and setting the "binary" value appropriately;
/// It will typically not be called by users directly.
inline bool InitKaldiInputStream(std::istream &is, bool *binary);
} // end namespace kaldi.
#endif // KALDI_BASE_IO_FUNCS_H_
// base/kaldi-common.h
// Copyright 2009-2011 Microsoft Corporation
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_BASE_KALDI_COMMON_H_
#define KALDI_BASE_KALDI_COMMON_H_ 1
#include <cstddef>
#include <cstdlib>
#include <cstring> // C string stuff like strcpy
#include <string>
#include <sstream>
#include <stdexcept>
#include <cassert>
#include <vector>
#include <iostream>
#include <fstream>
#include "base/kaldi-utils.h"
#include "base/kaldi-error.h"
#include "base/kaldi-types.h"
#include "base/io-funcs.h"
#include "base/kaldi-math.h"
#include "base/timer.h"
#endif // KALDI_BASE_KALDI_COMMON_H_
// base/kaldi-error.cc
// Copyright 2019 LAIX (Yi Sun)
// Copyright 2019 SmartAction LLC (kkm)
// Copyright 2016 Brno University of Technology (author: Karel Vesely)
// Copyright 2009-2011 Microsoft Corporation; Lukas Burget; Ondrej Glembek
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifdef HAVE_EXECINFO_H
#include <execinfo.h> // To get stack trace in error messages.
// If this #include fails there is an error in the Makefile, it does not
// support your platform well. Make sure HAVE_EXECINFO_H is undefined,
// and the code will compile.
#ifdef HAVE_CXXABI_H
#include <cxxabi.h> // For name demangling.
// Useful to decode the stack trace, but only used if we have execinfo.h
#endif // HAVE_CXXABI_H
#endif // HAVE_EXECINFO_H
#include "base/kaldi-common.h"
#include "base/kaldi-error.h"
#include "base/version.h"
namespace kaldi {
/***** GLOBAL VARIABLES FOR LOGGING *****/
int32 g_kaldi_verbose_level = 0;
static std::string program_name;
static LogHandler log_handler = NULL;
void SetProgramName(const char *basename) {
// Using the 'static std::string' for the program name is mostly harmless,
// because (a) Kaldi logging is undefined before main(), and (b) no stdc++
// string implementation has been found in the wild that would not be just
// an empty string when zero-initialized but not yet constructed.
program_name = basename;
}
/***** HELPER FUNCTIONS *****/
// Trim filename to at most 1 trailing directory long. Given a filename like
// "/a/b/c/d/e/f.cc", return "e/f.cc". Support both '/' and '\' as the path
// separator.
static const char *GetShortFileName(const char *path) {
if (path == nullptr)
return "";
const char *prev = path, *last = path;
while ((path = std::strpbrk(path, "\\/")) != nullptr) {
++path;
prev = last;
last = path;
}
return prev;
}
/***** STACK TRACE *****/
namespace internal {
bool LocateSymbolRange(const std::string &trace_name, size_t *begin,
size_t *end) {
// Find the first '_' with leading ' ' or '('.
*begin = std::string::npos;
for (size_t i = 1; i < trace_name.size(); i++) {
if (trace_name[i] != '_') {
continue;
}
if (trace_name[i - 1] == ' ' || trace_name[i - 1] == '(') {
*begin = i;
break;
}
}
if (*begin == std::string::npos) {
return false;
}
*end = trace_name.find_first_of(" +", *begin);
return *end != std::string::npos;
}
} // namespace internal
#ifdef HAVE_EXECINFO_H
static std::string Demangle(std::string trace_name) {
#ifndef HAVE_CXXABI_H
return trace_name;
#else // HAVE_CXXABI_H
// Try demangle the symbol. We are trying to support the following formats
// produced by different platforms:
//
// Linux:
// ./kaldi-error-test(_ZN5kaldi13UnitTestErrorEv+0xb) [0x804965d]
//
// Mac:
// 0 server 0x000000010f67614d _ZNK5kaldi13MessageLogger10LogMessageEv + 813
//
// We want to extract the name e.g., '_ZN5kaldi13UnitTestErrorEv' and
// demangle it info a readable name like kaldi::UnitTextError.
size_t begin, end;
if (!internal::LocateSymbolRange(trace_name, &begin, &end)) {
return trace_name;
}
std::string symbol = trace_name.substr(begin, end - begin);
int status;
char *demangled_name = abi::__cxa_demangle(symbol.c_str(), 0, 0, &status);
if (status == 0 && demangled_name != nullptr) {
symbol = demangled_name;
free(demangled_name);
}
return trace_name.substr(0, begin) + symbol +
trace_name.substr(end, std::string::npos);
#endif // HAVE_CXXABI_H
}
#endif // HAVE_EXECINFO_H
static std::string KaldiGetStackTrace() {
std::string ans;
#ifdef HAVE_EXECINFO_H
const size_t KALDI_MAX_TRACE_SIZE = 50;
const size_t KALDI_MAX_TRACE_PRINT = 50; // Must be even.
// Buffer for the trace.
void *trace[KALDI_MAX_TRACE_SIZE];
// Get the trace.
size_t size = backtrace(trace, KALDI_MAX_TRACE_SIZE);
// Get the trace symbols.
char **trace_symbol = backtrace_symbols(trace, size);
if (trace_symbol == NULL)
return ans;
// Compose a human-readable backtrace string.
ans += "[ Stack-Trace: ]\n";
if (size <= KALDI_MAX_TRACE_PRINT) {
for (size_t i = 0; i < size; i++) {
ans += Demangle(trace_symbol[i]) + "\n";
}
} else { // Print out first+last (e.g.) 5.
for (size_t i = 0; i < KALDI_MAX_TRACE_PRINT / 2; i++) {
ans += Demangle(trace_symbol[i]) + "\n";
}
ans += ".\n.\n.\n";
for (size_t i = size - KALDI_MAX_TRACE_PRINT / 2; i < size; i++) {
ans += Demangle(trace_symbol[i]) + "\n";
}
if (size == KALDI_MAX_TRACE_SIZE)
ans += ".\n.\n.\n"; // Stack was too long, probably a bug.
}
// We must free the array of pointers allocated by backtrace_symbols(),
// but not the strings themselves.
free(trace_symbol);
#endif // HAVE_EXECINFO_H
return ans;
}
/***** KALDI LOGGING *****/
MessageLogger::MessageLogger(LogMessageEnvelope::Severity severity,
const char *func, const char *file, int32 line) {
// Obviously, we assume the strings survive the destruction of this object.
envelope_.severity = severity;
envelope_.func = func;
envelope_.file = GetShortFileName(file); // Points inside 'file'.
envelope_.line = line;
}
void MessageLogger::LogMessage() const {
// Send to the logging handler if provided.
if (log_handler != NULL) {
log_handler(envelope_, GetMessage().c_str());
return;
}
// Otherwise, use the default Kaldi logging.
// Build the log-message header.
std::stringstream full_message;
if (envelope_.severity > LogMessageEnvelope::kInfo) {
full_message << "VLOG[" << envelope_.severity << "] (";
} else {
switch (envelope_.severity) {
case LogMessageEnvelope::kInfo:
full_message << "LOG (";
break;
case LogMessageEnvelope::kWarning:
full_message << "WARNING (";
break;
case LogMessageEnvelope::kAssertFailed:
full_message << "ASSERTION_FAILED (";
break;
case LogMessageEnvelope::kError:
default: // If not the ERROR, it still an error!
full_message << "ERROR (";
break;
}
}
// Add other info from the envelope and the message text.
full_message << program_name.c_str() << "[" KALDI_VERSION "]" << ':'
<< envelope_.func << "():" << envelope_.file << ':'
<< envelope_.line << ") " << GetMessage().c_str();
// Add stack trace for errors and assertion failures, if available.
if (envelope_.severity < LogMessageEnvelope::kWarning) {
const std::string &stack_trace = KaldiGetStackTrace();
if (!stack_trace.empty()) {
full_message << "\n\n" << stack_trace;
}
}
// Print the complete message to stderr.
full_message << "\n";
std::cerr << full_message.str();
}
/***** KALDI ASSERTS *****/
void KaldiAssertFailure_(const char *func, const char *file, int32 line,
const char *cond_str) {
MessageLogger::Log() =
MessageLogger(LogMessageEnvelope::kAssertFailed, func, file, line)
<< "Assertion failed: (" << cond_str << ")";
fflush(NULL); // Flush all pending buffers, abort() may not flush stderr.
std::abort();
}
/***** THIRD-PARTY LOG-HANDLER *****/
LogHandler SetLogHandler(LogHandler handler) {
LogHandler old_handler = log_handler;
log_handler = handler;
return old_handler;
}
} // namespace kaldi
// base/kaldi-error.h
// Copyright 2019 LAIX (Yi Sun)
// Copyright 2019 SmartAction LLC (kkm)
// Copyright 2016 Brno University of Technology (author: Karel Vesely)
// Copyright 2009-2011 Microsoft Corporation; Ondrej Glembek; Lukas Burget;
// Saarland University
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_BASE_KALDI_ERROR_H_
#define KALDI_BASE_KALDI_ERROR_H_ 1
#include <cstdio>
#include <cstring>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>
#include "base/kaldi-types.h"
#include "base/kaldi-utils.h"
/* Important that this file does not depend on any other kaldi headers. */
#ifdef _MSC_VER
#define __func__ __FUNCTION__
#endif
namespace kaldi {
/// \addtogroup error_group
/// @{
/***** PROGRAM NAME AND VERBOSITY LEVEL *****/
/// Called by ParseOptions to set base name (no directory) of the executing
/// program. The name is printed in logging code along with every message,
/// because in our scripts, we often mix together the stderr of many programs.
/// This function is very thread-unsafe.
void SetProgramName(const char *basename);
/// This is set by util/parse-options.{h,cc} if you set --verbose=? option.
/// Do not use directly, prefer {Get,Set}VerboseLevel().
extern int32 g_kaldi_verbose_level;
/// Get verbosity level, usually set via command line '--verbose=' switch.
inline int32 GetVerboseLevel() { return g_kaldi_verbose_level; }
/// This should be rarely used, except by programs using Kaldi as library;
/// command-line programs set the verbose level automatically from ParseOptions.
inline void SetVerboseLevel(int32 i) { g_kaldi_verbose_level = i; }
/***** KALDI LOGGING *****/
/// Log message severity and source location info.
struct LogMessageEnvelope {
/// Message severity. In addition to these levels, positive values (1 to 6)
/// specify verbose logging level. Verbose messages are produced only when
/// SetVerboseLevel() has been called to set logging level to at least the
/// corresponding value.
enum Severity {
kAssertFailed = -3, //!< Assertion failure. abort() will be called.
kError = -2, //!< Fatal error. KaldiFatalError will be thrown.
kWarning = -1, //!< Indicates a recoverable but abnormal condition.
kInfo = 0, //!< Informational message.
};
int severity; //!< A Severity value, or positive verbosity level.
const char *func; //!< Name of the function invoking the logging.
const char *file; //!< Source file name with up to 1 leading directory.
int32 line; //<! Line number in the source file.
};
/// Kaldi fatal runtime error exception. This exception is thrown from any use
/// of the KALDI_ERR logging macro after the logging function, either set by
/// SetLogHandler(), or the Kaldi's internal one, has returned.
class KaldiFatalError : public std::runtime_error {
public:
explicit KaldiFatalError(const std::string &message)
: std::runtime_error(message) {}
explicit KaldiFatalError(const char *message) : std::runtime_error(message) {}
/// Returns the exception name, "kaldi::KaldiFatalError".
virtual const char *what() const noexcept override {
return "kaldi::KaldiFatalError";
}
/// Returns the Kaldi error message logged by KALDI_ERR.
const char *KaldiMessage() const { return std::runtime_error::what(); }
};
// Class MessageLogger is the workhorse behind the KALDI_ASSERT, KALDI_ERR,
// KALDI_WARN, KALDI_LOG and KALDI_VLOG macros. It formats the message, then
// either prints it to stderr or passes to the custom logging handler if
// provided. Then, in case of the error, throws a KaldiFatalError exception, or
// in case of failed KALDI_ASSERT, calls std::abort().
class MessageLogger {
public:
/// The constructor stores the message's "envelope", a set of data which
// identifies the location in source which is sending the message to log.
// The pointers to strings are stored internally, and not owned or copied,
// so that their storage must outlive this object.
MessageLogger(LogMessageEnvelope::Severity severity, const char *func,
const char *file, int32 line);
// The stream insertion operator, used in e.g. 'KALDI_LOG << "Message"'.
template <typename T> MessageLogger &operator<<(const T &val) {
ss_ << val;
return *this;
}
// When assigned a MessageLogger, log its contents.
struct Log final {
void operator=(const MessageLogger &logger) { logger.LogMessage(); }
};
// When assigned a MessageLogger, log its contents and then throw
// a KaldiFatalError.
struct LogAndThrow final {
[[noreturn]] void operator=(const MessageLogger &logger) {
logger.LogMessage();
throw KaldiFatalError(logger.GetMessage());
}
};
private:
std::string GetMessage() const { return ss_.str(); }
void LogMessage() const;
LogMessageEnvelope envelope_;
std::ostringstream ss_;
};
// Logging macros.
#define KALDI_ERR \
::kaldi::MessageLogger::LogAndThrow() = ::kaldi::MessageLogger( \
::kaldi::LogMessageEnvelope::kError, __func__, __FILE__, __LINE__)
#define KALDI_WARN \
::kaldi::MessageLogger::Log() = ::kaldi::MessageLogger( \
::kaldi::LogMessageEnvelope::kWarning, __func__, __FILE__, __LINE__)
#define KALDI_LOG \
::kaldi::MessageLogger::Log() = ::kaldi::MessageLogger( \
::kaldi::LogMessageEnvelope::kInfo, __func__, __FILE__, __LINE__)
#define KALDI_VLOG(v) \
if ((v) <= ::kaldi::GetVerboseLevel()) \
::kaldi::MessageLogger::Log() = \
::kaldi::MessageLogger((::kaldi::LogMessageEnvelope::Severity)(v), \
__func__, __FILE__, __LINE__)
/***** KALDI ASSERTS *****/
[[noreturn]] void KaldiAssertFailure_(const char *func, const char *file,
int32 line, const char *cond_str);
// Note on KALDI_ASSERT and KALDI_PARANOID_ASSERT:
//
// A single block {} around if /else does not work, because it causes
// syntax error (unmatched else block) in the following code:
//
// if (condition)
// KALDI_ASSERT(condition2);
// else
// SomethingElse();
//
// do {} while(0) -- note there is no semicolon at the end! -- works nicely,
// and compilers will be able to optimize the loop away (as the condition
// is always false).
//
// Also see KALDI_COMPILE_TIME_ASSERT, defined in base/kaldi-utils.h, and
// KALDI_ASSERT_IS_INTEGER_TYPE and KALDI_ASSERT_IS_FLOATING_TYPE, also defined
// there.
#ifndef NDEBUG
#define KALDI_ASSERT(cond) \
do { \
if (cond) \
(void)0; \
else \
::kaldi::KaldiAssertFailure_(__func__, __FILE__, __LINE__, #cond); \
} while (0)
#else
#define KALDI_ASSERT(cond) (void)0
#endif
// Some more expensive asserts only checked if this defined.
#ifdef KALDI_PARANOID
#define KALDI_PARANOID_ASSERT(cond) \
do { \
if (cond) \
(void)0; \
else \
::kaldi::KaldiAssertFailure_(__func__, __FILE__, __LINE__, #cond); \
} while (0)
#else
#define KALDI_PARANOID_ASSERT(cond) (void)0
#endif
/***** THIRD-PARTY LOG-HANDLER *****/
/// Type of third-party logging function.
typedef void (*LogHandler)(const LogMessageEnvelope &envelope,
const char *message);
/// Set logging handler. If called with a non-NULL function pointer, the
/// function pointed by it is called to send messages to a caller-provided log.
/// If called with a NULL pointer, restores default Kaldi error logging to
/// stderr. This function is obviously not thread safe; the log handler must be.
/// Returns a previously set logging handler pointer, or NULL.
LogHandler SetLogHandler(LogHandler);
/// @} end "addtogroup error_group"
// Functions within internal is exported for testing only, do not use.
namespace internal {
bool LocateSymbolRange(const std::string &trace_name, size_t *begin,
size_t *end);
} // namespace internal
} // namespace kaldi
#endif // KALDI_BASE_KALDI_ERROR_H_
// base/kaldi-math.cc
// Copyright 2009-2011 Microsoft Corporation; Yanmin Qian;
// Saarland University; Jan Silovsky
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-math.h"
#ifndef _MSC_VER
#include <stdlib.h>
#include <unistd.h>
#endif
#include <string>
#include <mutex>
namespace kaldi {
// These routines are tested in matrix/matrix-test.cc
int32 RoundUpToNearestPowerOfTwo(int32 n) {
KALDI_ASSERT(n > 0);
n--;
n |= n >> 1;
n |= n >> 2;
n |= n >> 4;
n |= n >> 8;
n |= n >> 16;
return n+1;
}
static std::mutex _RandMutex;
int Rand(struct RandomState* state) {
#if !defined(_POSIX_THREAD_SAFE_FUNCTIONS)
// On Windows and Cygwin, just call Rand()
return rand();
#else
if (state) {
return rand_r(&(state->seed));
} else {
std::lock_guard<std::mutex> lock(_RandMutex);
return rand();
}
#endif
}
RandomState::RandomState() {
// we initialize it as Rand() + 27437 instead of just Rand(), because on some
// systems, e.g. at the very least Mac OSX Yosemite and later, it seems to be
// the case that rand_r when initialized with rand() will give you the exact
// same sequence of numbers that rand() will give if you keep calling rand()
// after that initial call. This can cause problems with repeated sequences.
// For example if you initialize two RandomState structs one after the other
// without calling rand() in between, they would give you the same sequence
// offset by one (if we didn't have the "+ 27437" in the code). 27437 is just
// a randomly chosen prime number.
seed = Rand() + 27437;
}
bool WithProb(BaseFloat prob, struct RandomState* state) {
KALDI_ASSERT(prob >= 0 && prob <= 1.1); // prob should be <= 1.0,
// but we allow slightly larger values that could arise from roundoff in
// previous calculations.
KALDI_COMPILE_TIME_ASSERT(RAND_MAX > 128 * 128);
if (prob == 0) return false;
else if (prob == 1.0) return true;
else if (prob * RAND_MAX < 128.0) {
// prob is very small but nonzero, and the "main algorithm"
// wouldn't work that well. So: with probability 1/128, we
// return WithProb (prob * 128), else return false.
if (Rand(state) < RAND_MAX / 128) { // with probability 128...
// Note: we know that prob * 128.0 < 1.0, because
// we asserted RAND_MAX > 128 * 128.
return WithProb(prob * 128.0);
} else {
return false;
}
} else {
return (Rand(state) < ((RAND_MAX + static_cast<BaseFloat>(1.0)) * prob));
}
}
int32 RandInt(int32 min_val, int32 max_val, struct RandomState* state) {
// This is not exact.
KALDI_ASSERT(max_val >= min_val);
if (max_val == min_val) return min_val;
#ifdef _MSC_VER
// RAND_MAX is quite small on Windows -> may need to handle larger numbers.
if (RAND_MAX > (max_val-min_val)*8) {
// *8 to avoid large inaccuracies in probability, from the modulus...
return min_val +
((unsigned int)Rand(state) % (unsigned int)(max_val+1-min_val));
} else {
if ((unsigned int)(RAND_MAX*RAND_MAX) >
(unsigned int)((max_val+1-min_val)*8)) {
// *8 to avoid inaccuracies in probability, from the modulus...
return min_val + ( (unsigned int)( (Rand(state)+RAND_MAX*Rand(state)))
% (unsigned int)(max_val+1-min_val));
} else {
KALDI_ERR << "rand_int failed because we do not support such large "
"random numbers. (Extend this function).";
}
}
#else
return min_val +
(static_cast<int32>(Rand(state)) % static_cast<int32>(max_val+1-min_val));
#endif
}
// Returns poisson-distributed random number.
// Take care: this takes time proportional
// to lambda. Faster algorithms exist but are more complex.
int32 RandPoisson(float lambda, struct RandomState* state) {
// Knuth's algorithm.
KALDI_ASSERT(lambda >= 0);
float L = expf(-lambda), p = 1.0;
int32 k = 0;
do {
k++;
float u = RandUniform(state);
p *= u;
} while (p > L);
return k-1;
}
void RandGauss2(float *a, float *b, RandomState *state) {
KALDI_ASSERT(a);
KALDI_ASSERT(b);
float u1 = RandUniform(state);
float u2 = RandUniform(state);
u1 = sqrtf(-2.0f * logf(u1));
u2 = 2.0f * M_PI * u2;
*a = u1 * cosf(u2);
*b = u1 * sinf(u2);
}
void RandGauss2(double *a, double *b, RandomState *state) {
KALDI_ASSERT(a);
KALDI_ASSERT(b);
float a_float, b_float;
// Just because we're using doubles doesn't mean we need super-high-quality
// random numbers, so we just use the floating-point version internally.
RandGauss2(&a_float, &b_float, state);
*a = a_float;
*b = b_float;
}
} // end namespace kaldi
// base/kaldi-math.h
// Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation; Yanmin Qian;
// Jan Silovsky; Saarland University
//
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_BASE_KALDI_MATH_H_
#define KALDI_BASE_KALDI_MATH_H_ 1
#ifdef _MSC_VER
#include <float.h>
#endif
#include <cmath>
#include <limits>
#include <vector>
#include "base/kaldi-types.h"
#include "base/kaldi-common.h"
#ifndef DBL_EPSILON
#define DBL_EPSILON 2.2204460492503131e-16
#endif
#ifndef FLT_EPSILON
#define FLT_EPSILON 1.19209290e-7f
#endif
#ifndef M_PI
#define M_PI 3.1415926535897932384626433832795
#endif
#ifndef M_SQRT2
#define M_SQRT2 1.4142135623730950488016887
#endif
#ifndef M_2PI
#define M_2PI 6.283185307179586476925286766559005
#endif
#ifndef M_SQRT1_2
#define M_SQRT1_2 0.7071067811865475244008443621048490
#endif
#ifndef M_LOG_2PI
#define M_LOG_2PI 1.8378770664093454835606594728112
#endif
#ifndef M_LN2
#define M_LN2 0.693147180559945309417232121458
#endif
#ifndef M_LN10
#define M_LN10 2.302585092994045684017991454684
#endif
#define KALDI_ISNAN std::isnan
#define KALDI_ISINF std::isinf
#define KALDI_ISFINITE(x) std::isfinite(x)
#if !defined(KALDI_SQR)
# define KALDI_SQR(x) ((x) * (x))
#endif
namespace kaldi {
#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
inline double Exp(double x) { return exp(x); }
#ifndef KALDI_NO_EXPF
inline float Exp(float x) { return expf(x); }
#else
inline float Exp(float x) { return exp(static_cast<double>(x)); }
#endif // KALDI_NO_EXPF
#else
inline double Exp(double x) { return exp(x); }
#if !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
// Microsoft CL v18.0 buggy 64-bit implementation of
// expf() incorrectly returns -inf for exp(-inf).
inline float Exp(float x) { return exp(static_cast<double>(x)); }
#else
inline float Exp(float x) { return expf(x); }
#endif // !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
#endif // !defined(_MSC_VER) || (_MSC_VER >= 1900)
inline double Log(double x) { return log(x); }
inline float Log(float x) { return logf(x); }
#if !defined(_MSC_VER) || (_MSC_VER >= 1700)
inline double Log1p(double x) { return log1p(x); }
inline float Log1p(float x) { return log1pf(x); }
#else
inline double Log1p(double x) {
const double cutoff = 1.0e-08;
if (x < cutoff)
return x - 0.5 * x * x;
else
return Log(1.0 + x);
}
inline float Log1p(float x) {
const float cutoff = 1.0e-07;
if (x < cutoff)
return x - 0.5 * x * x;
else
return Log(1.0 + x);
}
#endif
static const double kMinLogDiffDouble = Log(DBL_EPSILON); // negative!
static const float kMinLogDiffFloat = Log(FLT_EPSILON); // negative!
// -infinity
const float kLogZeroFloat = -std::numeric_limits<float>::infinity();
const double kLogZeroDouble = -std::numeric_limits<double>::infinity();
const BaseFloat kLogZeroBaseFloat = -std::numeric_limits<BaseFloat>::infinity();
// Returns a random integer between 0 and RAND_MAX, inclusive
int Rand(struct RandomState* state = NULL);
// State for thread-safe random number generator
struct RandomState {
RandomState();
unsigned seed;
};
// Returns a random integer between first and last inclusive.
int32 RandInt(int32 first, int32 last, struct RandomState* state = NULL);
// Returns true with probability "prob",
bool WithProb(BaseFloat prob, struct RandomState* state = NULL);
// with 0 <= prob <= 1 [we check this].
// Internally calls Rand(). This function is carefully implemented so
// that it should work even if prob is very small.
/// Returns a random number strictly between 0 and 1.
inline float RandUniform(struct RandomState* state = NULL) {
return static_cast<float>((Rand(state) + 1.0) / (RAND_MAX+2.0));
}
inline float RandGauss(struct RandomState* state = NULL) {
return static_cast<float>(sqrtf (-2 * Log(RandUniform(state)))
* cosf(2*M_PI*RandUniform(state)));
}
// Returns poisson-distributed random number. Uses Knuth's algorithm.
// Take care: this takes time proportional
// to lambda. Faster algorithms exist but are more complex.
int32 RandPoisson(float lambda, struct RandomState* state = NULL);
// Returns a pair of gaussian random numbers. Uses Box-Muller transform
void RandGauss2(float *a, float *b, RandomState *state = NULL);
void RandGauss2(double *a, double *b, RandomState *state = NULL);
// Also see Vector<float,double>::RandCategorical().
// This is a randomized pruning mechanism that preserves expectations,
// that we typically use to prune posteriors.
template<class Float>
inline Float RandPrune(Float post, BaseFloat prune_thresh,
struct RandomState* state = NULL) {
KALDI_ASSERT(prune_thresh >= 0.0);
if (post == 0.0 || std::abs(post) >= prune_thresh)
return post;
return (post >= 0 ? 1.0 : -1.0) *
(RandUniform(state) <= fabs(post)/prune_thresh ? prune_thresh : 0.0);
}
// returns log(exp(x) + exp(y)).
inline double LogAdd(double x, double y) {
double diff;
if (x < y) {
diff = x - y;
x = y;
} else {
diff = y - x;
}
// diff is negative. x is now the larger one.
if (diff >= kMinLogDiffDouble) {
double res;
res = x + Log1p(Exp(diff));
return res;
} else {
return x; // return the larger one.
}
}
// returns log(exp(x) + exp(y)).
inline float LogAdd(float x, float y) {
float diff;
if (x < y) {
diff = x - y;
x = y;
} else {
diff = y - x;
}
// diff is negative. x is now the larger one.
if (diff >= kMinLogDiffFloat) {
float res;
res = x + Log1p(Exp(diff));
return res;
} else {
return x; // return the larger one.
}
}
// returns log(exp(x) - exp(y)).
inline double LogSub(double x, double y) {
if (y >= x) { // Throws exception if y>=x.
if (y == x)
return kLogZeroDouble;
else
KALDI_ERR << "Cannot subtract a larger from a smaller number.";
}
double diff = y - x; // Will be negative.
double res = x + Log(1.0 - Exp(diff));
// res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
if (KALDI_ISNAN(res))
return kLogZeroDouble;
return res;
}
// returns log(exp(x) - exp(y)).
inline float LogSub(float x, float y) {
if (y >= x) { // Throws exception if y>=x.
if (y == x)
return kLogZeroDouble;
else
KALDI_ERR << "Cannot subtract a larger from a smaller number.";
}
float diff = y - x; // Will be negative.
float res = x + Log(1.0f - Exp(diff));
// res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
if (KALDI_ISNAN(res))
return kLogZeroFloat;
return res;
}
/// return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
static inline bool ApproxEqual(float a, float b,
float relative_tolerance = 0.001) {
// a==b handles infinities.
if (a == b) return true;
float diff = std::abs(a-b);
if (diff == std::numeric_limits<float>::infinity()
|| diff != diff) return false; // diff is +inf or nan.
return (diff <= relative_tolerance*(std::abs(a)+std::abs(b)));
}
/// assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
static inline void AssertEqual(float a, float b,
float relative_tolerance = 0.001) {
// a==b handles infinities.
KALDI_ASSERT(ApproxEqual(a, b, relative_tolerance));
}
// RoundUpToNearestPowerOfTwo does the obvious thing. It crashes if n <= 0.
int32 RoundUpToNearestPowerOfTwo(int32 n);
/// Returns a / b, rounding towards negative infinity in all cases.
static inline int32 DivideRoundingDown(int32 a, int32 b) {
KALDI_ASSERT(b != 0);
if (a * b >= 0)
return a / b;
else if (a < 0)
return (a - b + 1) / b;
else
return (a - b - 1) / b;
}
template<class I> I Gcd(I m, I n) {
if (m == 0 || n == 0) {
if (m == 0 && n == 0) { // gcd not defined, as all integers are divisors.
KALDI_ERR << "Undefined GCD since m = 0, n = 0.";
}
return (m == 0 ? (n > 0 ? n : -n) : ( m > 0 ? m : -m));
// return absolute value of whichever is nonzero
}
// could use compile-time assertion
// but involves messing with complex template stuff.
KALDI_ASSERT(std::numeric_limits<I>::is_integer);
while (1) {
m %= n;
if (m == 0) return (n > 0 ? n : -n);
n %= m;
if (n == 0) return (m > 0 ? m : -m);
}
}
/// Returns the least common multiple of two integers. Will
/// crash unless the inputs are positive.
template<class I> I Lcm(I m, I n) {
KALDI_ASSERT(m > 0 && n > 0);
I gcd = Gcd(m, n);
return gcd * (m/gcd) * (n/gcd);
}
template<class I> void Factorize(I m, std::vector<I> *factors) {
// Splits a number into its prime factors, in sorted order from
// least to greatest, with duplication. A very inefficient
// algorithm, which is mainly intended for use in the
// mixed-radix FFT computation (where we assume most factors
// are small).
KALDI_ASSERT(factors != NULL);
KALDI_ASSERT(m >= 1); // Doesn't work for zero or negative numbers.
factors->clear();
I small_factors[10] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29 };
// First try small factors.
for (I i = 0; i < 10; i++) {
if (m == 1) return; // We're done.
while (m % small_factors[i] == 0) {
m /= small_factors[i];
factors->push_back(small_factors[i]);
}
}
// Next try all odd numbers starting from 31.
for (I j = 31;; j += 2) {
if (m == 1) return;
while (m % j == 0) {
m /= j;
factors->push_back(j);
}
}
}
inline double Hypot(double x, double y) { return hypot(x, y); }
inline float Hypot(float x, float y) { return hypotf(x, y); }
} // namespace kaldi
#endif // KALDI_BASE_KALDI_MATH_H_
// base/kaldi-types.h
// Copyright 2009-2011 Microsoft Corporation; Saarland University;
// Jan Silovsky; Yanmin Qian
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_BASE_KALDI_TYPES_H_
#define KALDI_BASE_KALDI_TYPES_H_ 1
namespace kaldi {
// TYPEDEFS ..................................................................
#if (KALDI_DOUBLEPRECISION != 0)
typedef double BaseFloat;
#else
typedef float BaseFloat;
#endif
}
#ifdef _MSC_VER
#include <basetsd.h>
#define ssize_t SSIZE_T
#endif
// we can do this a different way if some platform
// we find in the future lacks stdint.h
#include <stdint.h>
// for discussion on what to do if you need compile kaldi
// without OpenFST, see the bottom of this this file
/*
#include <fst/types.h>
namespace kaldi {
using ::int16;
using ::int32;
using ::int64;
using ::uint16;
using ::uint32;
using ::uint64;
typedef float float32;
typedef double double64;
} // end namespace kaldi
*/
// In a theoretical case you decide compile Kaldi without the OpenFST
// comment the previous namespace statement and uncomment the following
namespace kaldi {
typedef int8_t int8;
typedef int16_t int16;
typedef int32_t int32;
typedef int64_t int64;
typedef uint8_t uint8;
typedef uint16_t uint16;
typedef uint32_t uint32;
typedef uint64_t uint64;
typedef float float32;
typedef double double64;
} // end namespace kaldi
#endif // KALDI_BASE_KALDI_TYPES_H_
// base/kaldi-utils.cc
// Copyright 2009-2011 Karel Vesely; Yanmin Qian; Microsoft Corporation
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifdef _WIN32_WINNT_WIN8
#include <Synchapi.h>
#elif defined(_WIN32) || defined(_MSC_VER) || defined(MINGW)
#include <Windows.h>
#if defined(_MSC_VER) && _MSC_VER < 1900
#define snprintf _snprintf
#endif /* _MSC_VER < 1900 */
#else
#include <unistd.h>
#endif
#include <string>
#include "base/kaldi-common.h"
namespace kaldi {
std::string CharToString(const char &c) {
char buf[20];
if (std::isprint(c))
snprintf(buf, sizeof(buf), "\'%c\'", c);
else
snprintf(buf, sizeof(buf), "[character %d]", static_cast<int>(c));
return (std::string) buf;
}
void Sleep(float seconds) {
#if defined(_MSC_VER) || defined(MINGW)
::Sleep(static_cast<int>(seconds * 1000.0));
#elif defined(__CYGWIN__)
sleep(static_cast<int>(seconds));
#else
usleep(static_cast<int>(seconds * 1000000.0));
#endif
}
} // end namespace kaldi
// base/kaldi-utils.h
// Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation;
// Saarland University; Karel Vesely; Yanmin Qian
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_BASE_KALDI_UTILS_H_
#define KALDI_BASE_KALDI_UTILS_H_ 1
#if defined(_MSC_VER)
# define WIN32_LEAN_AND_MEAN
# define NOMINMAX
# include <windows.h>
#endif
#ifdef _MSC_VER
#include <stdio.h>
#define unlink _unlink
#else
#include <unistd.h>
#endif
#include <limits>
#include <string>
#if defined(_MSC_VER)
#pragma warning(disable: 4244 4056 4305 4800 4267 4996 4756 4661)
#if _MSC_VER < 1400
#define __restrict__
#else
#define __restrict__ __restrict
#endif
#endif
#if defined(_MSC_VER)
# define KALDI_MEMALIGN(align, size, pp_orig) \
(*(pp_orig) = _aligned_malloc(size, align))
# define KALDI_MEMALIGN_FREE(x) _aligned_free(x)
#elif defined(__CYGWIN__)
# define KALDI_MEMALIGN(align, size, pp_orig) \
(*(pp_orig) = aligned_alloc(align, size))
# define KALDI_MEMALIGN_FREE(x) free(x)
#else
# define KALDI_MEMALIGN(align, size, pp_orig) \
(!posix_memalign(pp_orig, align, size) ? *(pp_orig) : NULL)
# define KALDI_MEMALIGN_FREE(x) free(x)
#endif
#ifdef __ICC
#pragma warning(disable: 383) // ICPC remark we don't want.
#pragma warning(disable: 810) // ICPC remark we don't want.
#pragma warning(disable: 981) // ICPC remark we don't want.
#pragma warning(disable: 1418) // ICPC remark we don't want.
#pragma warning(disable: 444) // ICPC remark we don't want.
#pragma warning(disable: 869) // ICPC remark we don't want.
#pragma warning(disable: 1287) // ICPC remark we don't want.
#pragma warning(disable: 279) // ICPC remark we don't want.
#pragma warning(disable: 981) // ICPC remark we don't want.
#endif
namespace kaldi {
// CharToString prints the character in a human-readable form, for debugging.
std::string CharToString(const char &c);
inline int MachineIsLittleEndian() {
int check = 1;
return (*reinterpret_cast<char*>(&check) != 0);
}
// This function kaldi::Sleep() provides a portable way
// to sleep for a possibly fractional
// number of seconds. On Windows it's only accurate to microseconds.
void Sleep(float seconds);
}
#define KALDI_SWAP8(a) { \
int t = (reinterpret_cast<char*>(&a))[0];\
(reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[7];\
(reinterpret_cast<char*>(&a))[7]=t;\
t = (reinterpret_cast<char*>(&a))[1];\
(reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[6];\
(reinterpret_cast<char*>(&a))[6]=t;\
t = (reinterpret_cast<char*>(&a))[2];\
(reinterpret_cast<char*>(&a))[2]=(reinterpret_cast<char*>(&a))[5];\
(reinterpret_cast<char*>(&a))[5]=t;\
t = (reinterpret_cast<char*>(&a))[3];\
(reinterpret_cast<char*>(&a))[3]=(reinterpret_cast<char*>(&a))[4];\
(reinterpret_cast<char*>(&a))[4]=t;}
#define KALDI_SWAP4(a) { \
int t = (reinterpret_cast<char*>(&a))[0];\
(reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[3];\
(reinterpret_cast<char*>(&a))[3]=t;\
t = (reinterpret_cast<char*>(&a))[1];\
(reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[2];\
(reinterpret_cast<char*>(&a))[2]=t;}
#define KALDI_SWAP2(a) { \
int t = (reinterpret_cast<char*>(&a))[0];\
(reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[1];\
(reinterpret_cast<char*>(&a))[1]=t;}
// Makes copy constructor and operator= private.
#define KALDI_DISALLOW_COPY_AND_ASSIGN(type) \
type(const type&); \
void operator = (const type&)
template<bool B> class KaldiCompileTimeAssert { };
template<> class KaldiCompileTimeAssert<true> {
public:
static inline void Check() { }
};
#define KALDI_COMPILE_TIME_ASSERT(b) KaldiCompileTimeAssert<(b)>::Check()
#define KALDI_ASSERT_IS_INTEGER_TYPE(I) \
KaldiCompileTimeAssert<std::numeric_limits<I>::is_specialized \
&& std::numeric_limits<I>::is_integer>::Check()
#define KALDI_ASSERT_IS_FLOATING_TYPE(F) \
KaldiCompileTimeAssert<std::numeric_limits<F>::is_specialized \
&& !std::numeric_limits<F>::is_integer>::Check()
#if defined(_MSC_VER)
#define KALDI_STRCASECMP _stricmp
#elif defined(__CYGWIN__)
#include <strings.h>
#define KALDI_STRCASECMP strcasecmp
#else
#define KALDI_STRCASECMP strcasecmp
#endif
#ifdef _MSC_VER
# define KALDI_STRTOLL(cur_cstr, end_cstr) _strtoi64(cur_cstr, end_cstr, 10);
#else
# define KALDI_STRTOLL(cur_cstr, end_cstr) strtoll(cur_cstr, end_cstr, 10);
#endif
#endif // KALDI_BASE_KALDI_UTILS_H_
// base/timer.cc
// Copyright 2018 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/timer.h"
#include "base/kaldi-error.h"
#include <algorithm>
#include <iomanip>
#include <map>
#include <unordered_map>
namespace kaldi {
class ProfileStats {
public:
void AccStats(const char *function_name, double elapsed) {
std::unordered_map<const char*, ProfileStatsEntry>::iterator
iter = map_.find(function_name);
if (iter == map_.end()) {
map_[function_name] = ProfileStatsEntry(function_name);
map_[function_name].total_time = elapsed;
} else {
iter->second.total_time += elapsed;
}
}
~ProfileStats() {
// This map makes sure we agglomerate the time if there were any duplicate
// addresses of strings.
std::unordered_map<std::string, double> total_time;
for (auto iter = map_.begin(); iter != map_.end(); iter++)
total_time[iter->second.name] += iter->second.total_time;
ReverseSecondComparator comp;
std::vector<std::pair<std::string, double> > pairs(total_time.begin(),
total_time.end());
std::sort(pairs.begin(), pairs.end(), comp);
for (size_t i = 0; i < pairs.size(); i++) {
KALDI_LOG << "Time taken in " << pairs[i].first << " is "
<< std::fixed << std::setprecision(2) << pairs[i].second << "s.";
}
}
private:
struct ProfileStatsEntry {
std::string name;
double total_time;
ProfileStatsEntry() { }
ProfileStatsEntry(const char *name): name(name) { }
};
struct ReverseSecondComparator {
bool operator () (const std::pair<std::string, double> &a,
const std::pair<std::string, double> &b) {
return a.second > b.second;
}
};
// Note: this map is keyed on the address of the string, there is no proper
// hash function. The assumption is that the strings are compile-time
// constants.
std::unordered_map<const char*, ProfileStatsEntry> map_;
};
ProfileStats g_profile_stats;
Profiler::~Profiler() {
g_profile_stats.AccStats(name_, tim_.Elapsed());
}
} // namespace kaldi
// base/timer.h
// Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_BASE_TIMER_H_
#define KALDI_BASE_TIMER_H_
#include "base/kaldi-utils.h"
#include "base/kaldi-error.h"
#if defined(_MSC_VER) || defined(MINGW)
namespace kaldi {
class Timer {
public:
Timer() { Reset(); }
// You can initialize with bool to control whether or not you want the time to
// be set when the object is created.
explicit Timer(bool set_timer) { if (set_timer) Reset(); }
void Reset() {
QueryPerformanceCounter(&time_start_);
}
double Elapsed() const {
LARGE_INTEGER time_end;
LARGE_INTEGER freq;
QueryPerformanceCounter(&time_end);
if (QueryPerformanceFrequency(&freq) == 0) {
// Hardware does not support this.
return 0.0;
}
return (static_cast<double>(time_end.QuadPart) -
static_cast<double>(time_start_.QuadPart)) /
(static_cast<double>(freq.QuadPart));
}
private:
LARGE_INTEGER time_start_;
};
#else
#include <sys/time.h>
#include <unistd.h>
namespace kaldi {
class Timer {
public:
Timer() { Reset(); }
// You can initialize with bool to control whether or not you want the time to
// be set when the object is created.
explicit Timer(bool set_timer) { if (set_timer) Reset(); }
void Reset() { gettimeofday(&this->time_start_, &time_zone_); }
/// Returns time in seconds.
double Elapsed() const {
struct timeval time_end;
struct timezone time_zone;
gettimeofday(&time_end, &time_zone);
double t1, t2;
t1 = static_cast<double>(time_start_.tv_sec) +
static_cast<double>(time_start_.tv_usec)/(1000*1000);
t2 = static_cast<double>(time_end.tv_sec) +
static_cast<double>(time_end.tv_usec)/(1000*1000);
return t2-t1;
}
private:
struct timeval time_start_;
struct timezone time_zone_;
};
#endif
class Profiler {
public:
// Caution: the 'const char' should always be a string constant; for speed,
// internally the profiling code uses the address of it as a lookup key.
Profiler(const char *function_name): name_(function_name) { }
~Profiler();
private:
Timer tim_;
const char *name_;
};
// To add timing info for a function, you just put
// KALDI_PROFILE;
// at the beginning of the function. Caution: this doesn't
// include the class name.
#define KALDI_PROFILE Profiler _profiler(__func__)
} // namespace kaldi
#endif // KALDI_BASE_TIMER_H_
// This file was automatically created by ./get_version.sh.
// It is only included by ./kaldi-error.cc.
#define KALDI_VERSION "5.5.544~2-f21d7"
#define KALDI_GIT_HEAD "f21d7e768635ca98aeeb43f30e2c6a9f14ab8f0f"
add_library(kaldi-mfcc
feature-mfcc.cc
)
target_link_libraries(kaldi-mfcc PUBLIC kaldi-feat-common)
add_library(fbank
feature-fbank.cc
)
target_link_libraries(fbank PUBLIC kaldi-feat-common)
add_library(kaldi-feat-common
wave-reader.cc
signal.cc
feature-functions.cc
feature-window.cc
resample.cc
mel-computations.cc
)
target_link_libraries(kaldi-feat-common PUBLIC kaldi-base kaldi-matrix kaldi-util)
\ No newline at end of file
// feat/feature-common-inl.h
// Copyright 2016 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FEAT_FEATURE_COMMON_INL_H_
#define KALDI_FEAT_FEATURE_COMMON_INL_H_
#include "feat/resample.h"
// Do not include this file directly. It is included by feat/feature-common.h
namespace kaldi {
template <class F>
void OfflineFeatureTpl<F>::ComputeFeatures(
const VectorBase<BaseFloat> &wave,
BaseFloat sample_freq,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output) {
KALDI_ASSERT(output != NULL);
BaseFloat new_sample_freq = computer_.GetFrameOptions().samp_freq;
if (sample_freq == new_sample_freq) {
Compute(wave, vtln_warp, output);
} else {
if (new_sample_freq < sample_freq &&
! computer_.GetFrameOptions().allow_downsample)
KALDI_ERR << "Waveform and config sample Frequency mismatch: "
<< sample_freq << " .vs " << new_sample_freq
<< " (use --allow-downsample=true to allow "
<< " downsampling the waveform).";
else if (new_sample_freq > sample_freq &&
! computer_.GetFrameOptions().allow_upsample)
KALDI_ERR << "Waveform and config sample Frequency mismatch: "
<< sample_freq << " .vs " << new_sample_freq
<< " (use --allow-upsample=true option to allow "
<< " upsampling the waveform).";
// Resample the waveform.
Vector<BaseFloat> resampled_wave(wave);
ResampleWaveform(sample_freq, wave,
new_sample_freq, &resampled_wave);
Compute(resampled_wave, vtln_warp, output);
}
}
template <class F>
void OfflineFeatureTpl<F>::Compute(
const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output) {
KALDI_ASSERT(output != NULL);
int32 rows_out = NumFrames(wave.Dim(), computer_.GetFrameOptions()),
cols_out = computer_.Dim();
if (rows_out == 0) {
output->Resize(0, 0);
return;
}
output->Resize(rows_out, cols_out);
Vector<BaseFloat> window; // windowed waveform.
bool use_raw_log_energy = computer_.NeedRawLogEnergy();
for (int32 r = 0; r < rows_out; r++) { // r is frame index.
BaseFloat raw_log_energy = 0.0;
ExtractWindow(0, wave, r, computer_.GetFrameOptions(),
feature_window_function_, &window,
(use_raw_log_energy ? &raw_log_energy : NULL));
SubVector<BaseFloat> output_row(*output, r);
computer_.Compute(raw_log_energy, vtln_warp, &window, &output_row);
}
}
template <class F>
void OfflineFeatureTpl<F>::Compute(
const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output) const {
OfflineFeatureTpl<F> temp(*this);
// call the non-const version of Compute() on a temporary copy of this object.
// This is a workaround for const-ness that may sometimes be useful in
// multi-threaded code, although it's not optimally efficient.
temp.Compute(wave, vtln_warp, output);
}
} // end namespace kaldi
#endif
// feat/feature-common.h
// Copyright 2016 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABILITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FEAT_FEATURE_COMMON_H_
#define KALDI_FEAT_FEATURE_COMMON_H_
#include <map>
#include <string>
#include "feat/feature-window.h"
namespace kaldi {
/// @addtogroup feat FeatureExtraction
/// @{
/// This class is only added for documentation, it is not intended to ever be
/// used.
struct ExampleFeatureComputerOptions {
FrameExtractionOptions frame_opts;
// .. more would go here.
};
/// This class is only added for documentation, it is not intended to ever be
/// used. It documents the interface of the *Computer classes which wrap the
/// low-level feature extraction. The template argument F of OfflineFeatureTpl must
/// follow this interface. This interface is intended for features such as
/// MFCCs and PLPs which can be computed frame by frame.
class ExampleFeatureComputer {
public:
typedef ExampleFeatureComputerOptions Options;
/// Returns a reference to the frame-extraction options class, which
/// will be part of our own options class.
const FrameExtractionOptions &GetFrameOptions() const {
return opts_.frame_opts;
}
/// Returns the feature dimension
int32 Dim() const;
/// Returns true if this function may inspect the raw log-energy of the signal
/// (before windowing and pre-emphasis); it's safe to always return true, but
/// setting it to false enables an optimization.
bool NeedRawLogEnergy() const { return true; }
/// constructor from options class; it should not store a reference or pointer
/// to the options class but should copy it.
explicit ExampleFeatureComputer(const ExampleFeatureComputerOptions &opts):
opts_(opts) { }
/// Copy constructor; all of these classes must have one.
ExampleFeatureComputer(const ExampleFeatureComputer &other);
/**
Function that computes one frame of features from
one frame of signal.
@param [in] signal_raw_log_energy The log-energy of the frame of the signal
prior to windowing and pre-emphasis, or
log(numeric_limits<float>::min()), whichever is greater. Must be
ignored by this function if this class returns false from
this->NeedRawLogEnergy().
@param [in] vtln_warp The VTLN warping factor that the user wants
to be applied when computing features for this utterance. Will
normally be 1.0, meaning no warping is to be done. The value will
be ignored for feature types that don't support VLTN, such as
spectrogram features.
@param [in] signal_frame One frame of the signal,
as extracted using the function ExtractWindow() using the options
returned by this->GetFrameOptions(). The function will use the
vector as a workspace, which is why it's a non-const pointer.
@param [out] feature Pointer to a vector of size this->Dim(), to which
the computed feature will be written.
*/
void Compute(BaseFloat signal_raw_log_energy,
BaseFloat vtln_warp,
VectorBase<BaseFloat> *signal_frame,
VectorBase<BaseFloat> *feature);
private:
// disallow assignment.
ExampleFeatureComputer &operator = (const ExampleFeatureComputer &in);
Options opts_;
};
/// This templated class is intended for offline feature extraction, i.e. where
/// you have access to the entire signal at the start. It exists mainly to be
/// drop-in replacement for the old (pre-2016) classes Mfcc, Plp and so on, for
/// use in the offline case. In April 2016 we reorganized the online
/// feature-computation code for greater modularity and to have correct support
/// for the snip-edges=false option.
template <class F>
class OfflineFeatureTpl {
public:
typedef typename F::Options Options;
// Note: feature_window_function_ is the windowing function, which initialized
// using the options class, that we cache at this level.
OfflineFeatureTpl(const Options &opts):
computer_(opts),
feature_window_function_(computer_.GetFrameOptions()) { }
// Internal (and back-compatibility) interface for computing features, which
// requires that the user has already checked that the sampling frequency
// of the waveform is equal to the sampling frequency specified in
// the frame-extraction options.
void Compute(const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output);
// This const version of Compute() is a wrapper that
// calls the non-const version on a temporary object.
// It's less efficient than the non-const version.
void Compute(const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output) const;
/**
Computes the features for one file (one sequence of features).
This is the newer interface where you specify the sample frequency
of the input waveform.
@param [in] wave The input waveform
@param [in] sample_freq The sampling frequency with which
'wave' was sampled.
if sample_freq is higher than the frequency
specified in the config, we will downsample
the waveform, but if lower, it's an error.
@param [in] vtln_warp The VTLN warping factor (will normally
be 1.0)
@param [out] output The matrix of features, where the row-index
is the frame index.
*/
void ComputeFeatures(const VectorBase<BaseFloat> &wave,
BaseFloat sample_freq,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output);
int32 Dim() const { return computer_.Dim(); }
// Copy constructor.
OfflineFeatureTpl(const OfflineFeatureTpl<F> &other):
computer_(other.computer_),
feature_window_function_(other.feature_window_function_) { }
private:
// Disallow assignment.
OfflineFeatureTpl<F> &operator =(const OfflineFeatureTpl<F> &other);
F computer_;
FeatureWindowFunction feature_window_function_;
};
/// @} End of "addtogroup feat"
} // namespace kaldi
#include "feat/feature-common-inl.h"
#endif // KALDI_FEAT_FEATURE_COMMON_H_
// feat/feature-fbank.cc
// Copyright 2009-2012 Karel Vesely
// 2016 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "feat/feature-fbank.h"
namespace kaldi {
FbankComputer::FbankComputer(const FbankOptions &opts):
opts_(opts), srfft_(NULL) {
if (opts.energy_floor > 0.0)
log_energy_floor_ = Log(opts.energy_floor);
int32 padded_window_size = opts.frame_opts.PaddedWindowSize();
if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two...
srfft_ = new SplitRadixRealFft<BaseFloat>(padded_window_size);
// We'll definitely need the filterbanks info for VTLN warping factor 1.0.
// [note: this call caches it.]
GetMelBanks(1.0);
}
FbankComputer::FbankComputer(const FbankComputer &other):
opts_(other.opts_), log_energy_floor_(other.log_energy_floor_),
mel_banks_(other.mel_banks_), srfft_(NULL) {
for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
iter != mel_banks_.end();
++iter)
iter->second = new MelBanks(*(iter->second));
if (other.srfft_)
srfft_ = new SplitRadixRealFft<BaseFloat>(*(other.srfft_));
}
FbankComputer::~FbankComputer() {
for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
iter != mel_banks_.end(); ++iter)
delete iter->second;
delete srfft_;
}
const MelBanks* FbankComputer::GetMelBanks(BaseFloat vtln_warp) {
MelBanks *this_mel_banks = NULL;
std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.find(vtln_warp);
if (iter == mel_banks_.end()) {
this_mel_banks = new MelBanks(opts_.mel_opts,
opts_.frame_opts,
vtln_warp);
mel_banks_[vtln_warp] = this_mel_banks;
} else {
this_mel_banks = iter->second;
}
return this_mel_banks;
}
void FbankComputer::Compute(BaseFloat signal_raw_log_energy,
BaseFloat vtln_warp,
VectorBase<BaseFloat> *signal_frame,
VectorBase<BaseFloat> *feature) {
const MelBanks &mel_banks = *(GetMelBanks(vtln_warp));
KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() &&
feature->Dim() == this->Dim());
// Compute energy after window function (not the raw one).
if (opts_.use_energy && !opts_.raw_energy)
signal_raw_log_energy = Log(std::max<BaseFloat>(VecVec(*signal_frame, *signal_frame),
std::numeric_limits<float>::epsilon()));
if (srfft_ != NULL) // Compute FFT using split-radix algorithm.
srfft_->Compute(signal_frame->Data(), true);
else // An alternative algorithm that works for non-powers-of-two.
RealFft(signal_frame, true);
// Convert the FFT into a power spectrum.
ComputePowerSpectrum(signal_frame);
SubVector<BaseFloat> power_spectrum(*signal_frame, 0,
signal_frame->Dim() / 2 + 1);
// Use magnitude instead of power if requested.
if (!opts_.use_power)
power_spectrum.ApplyPow(0.5);
int32 mel_offset = ((opts_.use_energy && !opts_.htk_compat) ? 1 : 0);
SubVector<BaseFloat> mel_energies(*feature,
mel_offset,
opts_.mel_opts.num_bins);
// Sum with mel fiterbanks over the power spectrum
mel_banks.Compute(power_spectrum, &mel_energies);
if (opts_.use_log_fbank) {
// Avoid log of zero (which should be prevented anyway by dithering).
mel_energies.ApplyFloor(std::numeric_limits<float>::epsilon());
mel_energies.ApplyLog(); // take the log.
}
// Copy energy as first value (or the last, if htk_compat == true).
if (opts_.use_energy) {
if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_) {
signal_raw_log_energy = log_energy_floor_;
}
int32 energy_index = opts_.htk_compat ? opts_.mel_opts.num_bins : 0;
(*feature)(energy_index) = signal_raw_log_energy;
}
}
} // namespace kaldi
// feat/feature-fbank.h
// Copyright 2009-2012 Karel Vesely
// 2016 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FEAT_FEATURE_FBANK_H_
#define KALDI_FEAT_FEATURE_FBANK_H_
#include <map>
#include <string>
#include "feat/feature-common.h"
#include "feat/feature-functions.h"
#include "feat/feature-window.h"
#include "feat/mel-computations.h"
namespace kaldi {
/// @addtogroup feat FeatureExtraction
/// @{
/// FbankOptions contains basic options for computing filterbank features.
/// It only includes things that can be done in a "stateless" way, i.e.
/// it does not include energy max-normalization.
/// It does not include delta computation.
struct FbankOptions {
FrameExtractionOptions frame_opts;
MelBanksOptions mel_opts;
bool use_energy; // append an extra dimension with energy to the filter banks
BaseFloat energy_floor;
bool raw_energy; // If true, compute energy before preemphasis and windowing
bool htk_compat; // If true, put energy last (if using energy)
bool use_log_fbank; // if true (default), produce log-filterbank, else linear
bool use_power; // if true (default), use power in filterbank analysis, else magnitude.
FbankOptions(): mel_opts(23),
// defaults the #mel-banks to 23 for the FBANK computations.
// this seems to be common for 16khz-sampled data,
// but for 8khz-sampled data, 15 may be better.
use_energy(false),
energy_floor(0.0),
raw_energy(true),
htk_compat(false),
use_log_fbank(true),
use_power(true) {}
void Register(OptionsItf *opts) {
frame_opts.Register(opts);
mel_opts.Register(opts);
opts->Register("use-energy", &use_energy,
"Add an extra dimension with energy to the FBANK output.");
opts->Register("energy-floor", &energy_floor,
"Floor on energy (absolute, not relative) in FBANK computation. "
"Only makes a difference if --use-energy=true; only necessary if "
"--dither=0.0. Suggested values: 0.1 or 1.0");
opts->Register("raw-energy", &raw_energy,
"If true, compute energy before preemphasis and windowing");
opts->Register("htk-compat", &htk_compat, "If true, put energy last. "
"Warning: not sufficient to get HTK compatible features (need "
"to change other parameters).");
opts->Register("use-log-fbank", &use_log_fbank,
"If true, produce log-filterbank, else produce linear.");
opts->Register("use-power", &use_power,
"If true, use power, else use magnitude.");
}
};
/// Class for computing mel-filterbank features; see \ref feat_mfcc for more
/// information.
class FbankComputer {
public:
typedef FbankOptions Options;
explicit FbankComputer(const FbankOptions &opts);
FbankComputer(const FbankComputer &other);
int32 Dim() const {
return opts_.mel_opts.num_bins + (opts_.use_energy ? 1 : 0);
}
bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }
const FrameExtractionOptions &GetFrameOptions() const {
return opts_.frame_opts;
}
/**
Function that computes one frame of features from
one frame of signal.
@param [in] signal_raw_log_energy The log-energy of the frame of the signal
prior to windowing and pre-emphasis, or
log(numeric_limits<float>::min()), whichever is greater. Must be
ignored by this function if this class returns false from
this->NeedsRawLogEnergy().
@param [in] vtln_warp The VTLN warping factor that the user wants
to be applied when computing features for this utterance. Will
normally be 1.0, meaning no warping is to be done. The value will
be ignored for feature types that don't support VLTN, such as
spectrogram features.
@param [in] signal_frame One frame of the signal,
as extracted using the function ExtractWindow() using the options
returned by this->GetFrameOptions(). The function will use the
vector as a workspace, which is why it's a non-const pointer.
@param [out] feature Pointer to a vector of size this->Dim(), to which
the computed feature will be written.
*/
void Compute(BaseFloat signal_raw_log_energy,
BaseFloat vtln_warp,
VectorBase<BaseFloat> *signal_frame,
VectorBase<BaseFloat> *feature);
~FbankComputer();
private:
const MelBanks *GetMelBanks(BaseFloat vtln_warp);
FbankOptions opts_;
BaseFloat log_energy_floor_;
std::map<BaseFloat, MelBanks*> mel_banks_; // BaseFloat is VTLN coefficient.
SplitRadixRealFft<BaseFloat> *srfft_;
// Disallow assignment.
FbankComputer &operator =(const FbankComputer &other);
};
typedef OfflineFeatureTpl<FbankComputer> Fbank;
/// @} End of "addtogroup feat"
} // namespace kaldi
#endif // KALDI_FEAT_FEATURE_FBANK_H_
// feat/feature-functions.cc
// Copyright 2009-2011 Karel Vesely; Petr Motlicek; Microsoft Corporation
// 2013 Johns Hopkins University (author: Daniel Povey)
// 2014 IMSL, PKU-HKUST (author: Wei Shi)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "feat/feature-functions.h"
#include "matrix/matrix-functions.h"
namespace kaldi {
void ComputePowerSpectrum(VectorBase<BaseFloat> *waveform) {
int32 dim = waveform->Dim();
// no, letting it be non-power-of-two for now.
// KALDI_ASSERT(dim > 0 && (dim & (dim-1) == 0)); // make sure a power of two.. actually my FFT code
// does not require this (dan) but this is better in case we use different code [dan].
// RealFft(waveform, true); // true == forward (not inverse) FFT; makes no difference here,
// as we just want power spectrum.
// now we have in waveform, first half of complex spectrum
// it's stored as [real0, realN/2, real1, im1, real2, im2, ...]
int32 half_dim = dim/2;
BaseFloat first_energy = (*waveform)(0) * (*waveform)(0),
last_energy = (*waveform)(1) * (*waveform)(1); // handle this special case
for (int32 i = 1; i < half_dim; i++) {
BaseFloat real = (*waveform)(i*2), im = (*waveform)(i*2 + 1);
(*waveform)(i) = real*real + im*im;
}
(*waveform)(0) = first_energy;
(*waveform)(half_dim) = last_energy; // Will actually never be used, and anyway
// if the signal has been bandlimited sensibly this should be zero.
}
DeltaFeatures::DeltaFeatures(const DeltaFeaturesOptions &opts): opts_(opts) {
KALDI_ASSERT(opts.order >= 0 && opts.order < 1000); // just make sure we don't get binary junk.
// opts will normally be 2 or 3.
KALDI_ASSERT(opts.window > 0 && opts.window < 1000); // again, basic sanity check.
// normally the window size will be two.
scales_.resize(opts.order+1);
scales_[0].Resize(1);
scales_[0](0) = 1.0; // trivial window for 0th order delta [i.e. baseline feats]
for (int32 i = 1; i <= opts.order; i++) {
Vector<BaseFloat> &prev_scales = scales_[i-1],
&cur_scales = scales_[i];
int32 window = opts.window; // this code is designed to still
// work if instead we later make it an array and do opts.window[i-1],
// or something like that. "window" is a parameter specifying delta-window
// width which is actually 2*window + 1.
KALDI_ASSERT(window != 0);
int32 prev_offset = (static_cast<int32>(prev_scales.Dim()-1))/2,
cur_offset = prev_offset + window;
cur_scales.Resize(prev_scales.Dim() + 2*window); // also zeros it.
BaseFloat normalizer = 0.0;
for (int32 j = -window; j <= window; j++) {
normalizer += j*j;
for (int32 k = -prev_offset; k <= prev_offset; k++) {
cur_scales(j+k+cur_offset) +=
static_cast<BaseFloat>(j) * prev_scales(k+prev_offset);
}
}
cur_scales.Scale(1.0 / normalizer);
}
}
void DeltaFeatures::Process(const MatrixBase<BaseFloat> &input_feats,
int32 frame,
VectorBase<BaseFloat> *output_frame) const {
KALDI_ASSERT(frame < input_feats.NumRows());
int32 num_frames = input_feats.NumRows(),
feat_dim = input_feats.NumCols();
KALDI_ASSERT(static_cast<int32>(output_frame->Dim()) == feat_dim * (opts_.order+1));
output_frame->SetZero();
for (int32 i = 0; i <= opts_.order; i++) {
const Vector<BaseFloat> &scales = scales_[i];
int32 max_offset = (scales.Dim() - 1) / 2;
SubVector<BaseFloat> output(*output_frame, i*feat_dim, feat_dim);
for (int32 j = -max_offset; j <= max_offset; j++) {
// if asked to read
int32 offset_frame = frame + j;
if (offset_frame < 0) offset_frame = 0;
else if (offset_frame >= num_frames)
offset_frame = num_frames - 1;
BaseFloat scale = scales(j + max_offset);
if (scale != 0.0)
output.AddVec(scale, input_feats.Row(offset_frame));
}
}
}
ShiftedDeltaFeatures::ShiftedDeltaFeatures(
const ShiftedDeltaFeaturesOptions &opts): opts_(opts) {
KALDI_ASSERT(opts.window > 0 && opts.window < 1000);
// Default window is 1.
int32 window = opts.window;
KALDI_ASSERT(window != 0);
scales_.Resize(1 + 2*window); // also zeros it.
BaseFloat normalizer = 0.0;
for (int32 j = -window; j <= window; j++) {
normalizer += j*j;
scales_(j + window) += static_cast<BaseFloat>(j);
}
scales_.Scale(1.0 / normalizer);
}
void ShiftedDeltaFeatures::Process(const MatrixBase<BaseFloat> &input_feats,
int32 frame,
SubVector<BaseFloat> *output_frame) const {
KALDI_ASSERT(frame < input_feats.NumRows());
int32 num_frames = input_feats.NumRows(),
feat_dim = input_feats.NumCols();
KALDI_ASSERT(static_cast<int32>(output_frame->Dim())
== feat_dim * (opts_.num_blocks + 1));
output_frame->SetZero();
// The original features
SubVector<BaseFloat> output(*output_frame, 0, feat_dim);
output.AddVec(1.0, input_feats.Row(frame));
// Concatenate the delta-blocks. Each block is block_shift
// (usually 3) frames apart.
for (int32 i = 0; i < opts_.num_blocks; i++) {
int32 max_offset = (scales_.Dim() - 1) / 2;
SubVector<BaseFloat> output(*output_frame, (i + 1) * feat_dim, feat_dim);
for (int32 j = -max_offset; j <= max_offset; j++) {
int32 offset_frame = frame + j + i * opts_.block_shift;
if (offset_frame < 0) offset_frame = 0;
else if (offset_frame >= num_frames)
offset_frame = num_frames - 1;
BaseFloat scale = scales_(j + max_offset);
if (scale != 0.0)
output.AddVec(scale, input_feats.Row(offset_frame));
}
}
}
void ComputeDeltas(const DeltaFeaturesOptions &delta_opts,
const MatrixBase<BaseFloat> &input_features,
Matrix<BaseFloat> *output_features) {
output_features->Resize(input_features.NumRows(),
input_features.NumCols()
*(delta_opts.order + 1));
DeltaFeatures delta(delta_opts);
for (int32 r = 0; r < static_cast<int32>(input_features.NumRows()); r++) {
SubVector<BaseFloat> row(*output_features, r);
delta.Process(input_features, r, &row);
}
}
void ComputeShiftedDeltas(const ShiftedDeltaFeaturesOptions &delta_opts,
const MatrixBase<BaseFloat> &input_features,
Matrix<BaseFloat> *output_features) {
output_features->Resize(input_features.NumRows(),
input_features.NumCols()
* (delta_opts.num_blocks + 1));
ShiftedDeltaFeatures delta(delta_opts);
for (int32 r = 0; r < static_cast<int32>(input_features.NumRows()); r++) {
SubVector<BaseFloat> row(*output_features, r);
delta.Process(input_features, r, &row);
}
}
void InitIdftBases(int32 n_bases, int32 dimension, Matrix<BaseFloat> *mat_out) {
BaseFloat angle = M_PI / static_cast<BaseFloat>(dimension - 1);
BaseFloat scale = 1.0f / (2.0 * static_cast<BaseFloat>(dimension - 1));
mat_out->Resize(n_bases, dimension);
for (int32 i = 0; i < n_bases; i++) {
(*mat_out)(i, 0) = 1.0 * scale;
BaseFloat i_fl = static_cast<BaseFloat>(i);
for (int32 j = 1; j < dimension - 1; j++) {
BaseFloat j_fl = static_cast<BaseFloat>(j);
(*mat_out)(i, j) = 2.0 * scale * cos(angle * i_fl * j_fl);
}
(*mat_out)(i, dimension -1)
= scale * cos(angle * i_fl * static_cast<BaseFloat>(dimension-1));
}
}
void SpliceFrames(const MatrixBase<BaseFloat> &input_features,
int32 left_context,
int32 right_context,
Matrix<BaseFloat> *output_features) {
int32 T = input_features.NumRows(), D = input_features.NumCols();
if (T == 0 || D == 0)
KALDI_ERR << "SpliceFrames: empty input";
KALDI_ASSERT(left_context >= 0 && right_context >= 0);
int32 N = 1 + left_context + right_context;
output_features->Resize(T, D*N);
for (int32 t = 0; t < T; t++) {
SubVector<BaseFloat> dst_row(*output_features, t);
for (int32 j = 0; j < N; j++) {
int32 t2 = t + j - left_context;
if (t2 < 0) t2 = 0;
if (t2 >= T) t2 = T-1;
SubVector<BaseFloat> dst(dst_row, j*D, D),
src(input_features, t2);
dst.CopyFromVec(src);
}
}
}
void ReverseFrames(const MatrixBase<BaseFloat> &input_features,
Matrix<BaseFloat> *output_features) {
int32 T = input_features.NumRows(), D = input_features.NumCols();
if (T == 0 || D == 0)
KALDI_ERR << "ReverseFrames: empty input";
output_features->Resize(T, D);
for (int32 t = 0; t < T; t++) {
SubVector<BaseFloat> dst_row(*output_features, t);
SubVector<BaseFloat> src_row(input_features, T-1-t);
dst_row.CopyFromVec(src_row);
}
}
void SlidingWindowCmnOptions::Check() const {
KALDI_ASSERT(cmn_window > 0);
if (center)
KALDI_ASSERT(min_window > 0 && min_window <= cmn_window);
// else ignored so value doesn't matter.
}
// Internal version of SlidingWindowCmn with double-precision arguments.
void SlidingWindowCmnInternal(const SlidingWindowCmnOptions &opts,
const MatrixBase<double> &input,
MatrixBase<double> *output) {
opts.Check();
int32 num_frames = input.NumRows(), dim = input.NumCols(),
last_window_start = -1, last_window_end = -1,
warning_count = 0;
Vector<double> cur_sum(dim), cur_sumsq(dim);
for (int32 t = 0; t < num_frames; t++) {
int32 window_start, window_end; // note: window_end will be one
// past the end of the window we use for normalization.
if (opts.center) {
window_start = t - (opts.cmn_window / 2);
window_end = window_start + opts.cmn_window;
} else {
window_start = t - opts.cmn_window;
window_end = t + 1;
}
if (window_start < 0) { // shift window right if starts <0.
window_end -= window_start;
window_start = 0; // or: window_start -= window_start
}
if (!opts.center) {
if (window_end > t)
window_end = std::max(t + 1, opts.min_window);
}
if (window_end > num_frames) {
window_start -= (window_end - num_frames);
window_end = num_frames;
if (window_start < 0) window_start = 0;
}
if (last_window_start == -1) {
SubMatrix<double> input_part(input,
window_start, window_end - window_start,
0, dim);
cur_sum.AddRowSumMat(1.0, input_part , 0.0);
if (opts.normalize_variance)
cur_sumsq.AddDiagMat2(1.0, input_part, kTrans, 0.0);
} else {
if (window_start > last_window_start) {
KALDI_ASSERT(window_start == last_window_start + 1);
SubVector<double> frame_to_remove(input, last_window_start);
cur_sum.AddVec(-1.0, frame_to_remove);
if (opts.normalize_variance)
cur_sumsq.AddVec2(-1.0, frame_to_remove);
}
if (window_end > last_window_end) {
KALDI_ASSERT(window_end == last_window_end + 1);
SubVector<double> frame_to_add(input, last_window_end);
cur_sum.AddVec(1.0, frame_to_add);
if (opts.normalize_variance)
cur_sumsq.AddVec2(1.0, frame_to_add);
}
}
int32 window_frames = window_end - window_start;
last_window_start = window_start;
last_window_end = window_end;
KALDI_ASSERT(window_frames > 0);
SubVector<double> input_frame(input, t),
output_frame(*output, t);
output_frame.CopyFromVec(input_frame);
output_frame.AddVec(-1.0 / window_frames, cur_sum);
if (opts.normalize_variance) {
if (window_frames == 1) {
output_frame.Set(0.0);
} else {
Vector<double> variance(cur_sumsq);
variance.Scale(1.0 / window_frames);
variance.AddVec2(-1.0 / (window_frames * window_frames), cur_sum);
// now "variance" is the variance of the features in the window,
// around their own mean.
int32 num_floored;
variance.ApplyFloor(1.0e-10, &num_floored);
if (num_floored > 0 && num_frames > 1) {
if (opts.max_warnings == warning_count) {
KALDI_WARN << "Suppressing the remaining variance flooring "
<< "warnings. Run program with --max-warnings=-1 to "
<< "see all warnings.";
}
// If opts.max_warnings is a negative number, we won't restrict the
// number of times that the warning is printed out.
else if (opts.max_warnings < 0
|| opts.max_warnings > warning_count) {
KALDI_WARN << "Flooring when normalizing variance, floored "
<< num_floored << " elements; num-frames was "
<< window_frames;
}
warning_count++;
}
variance.ApplyPow(-0.5); // get inverse standard deviation.
output_frame.MulElements(variance);
}
}
}
}
void SlidingWindowCmn(const SlidingWindowCmnOptions &opts,
const MatrixBase<BaseFloat> &input,
MatrixBase<BaseFloat> *output) {
KALDI_ASSERT(SameDim(input, *output) && input.NumRows() > 0);
Matrix<double> input_dbl(input), output_dbl(input.NumRows(), input.NumCols());
// call double-precision version
SlidingWindowCmnInternal(opts, input_dbl, &output_dbl);
output->CopyFromMat(output_dbl);
}
} // namespace kaldi
// feat/feature-functions.h
// Copyright 2009-2011 Karel Vesely; Petr Motlicek; Microsoft Corporation
// 2014 IMSL, PKU-HKUST (author: Wei Shi)
// 2016 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FEAT_FEATURE_FUNCTIONS_H_
#define KALDI_FEAT_FEATURE_FUNCTIONS_H_
#include <string>
#include <vector>
#include "matrix/matrix-lib.h"
#include "util/common-utils.h"
#include "base/kaldi-error.h"
namespace kaldi {
/// @addtogroup feat FeatureExtraction
/// @{
// ComputePowerSpectrum converts a complex FFT (as produced by the FFT
// functions in matrix/matrix-functions.h), and converts it into
// a power spectrum. If the complex FFT is a vector of size n (representing
// half the complex FFT of a real signal of size n, as described there),
// this function computes in the first (n/2) + 1 elements of it, the
// energies of the fft bins from zero to the Nyquist frequency. Contents of the
// remaining (n/2) - 1 elements are undefined at output.
void ComputePowerSpectrum(VectorBase<BaseFloat> *complex_fft);
struct DeltaFeaturesOptions {
int32 order;
int32 window; // e.g. 2; controls window size (window size is 2*window + 1)
// the behavior at the edges is to replicate the first or last frame.
// this is not configurable.
DeltaFeaturesOptions(int32 order = 2, int32 window = 2):
order(order), window(window) { }
void Register(OptionsItf *opts) {
opts->Register("delta-order", &order, "Order of delta computation");
opts->Register("delta-window", &window,
"Parameter controlling window for delta computation (actual window"
" size for each delta order is 1 + 2*delta-window-size)");
}
};
class DeltaFeatures {
public:
// This class provides a low-level function to compute delta features.
// The function takes as input a matrix of features and a frame index
// that it should compute the deltas on. It puts its output in an object
// of type VectorBase, of size (original-feature-dimension) * (opts.order+1).
// This is not the most efficient way to do the computation, but it's
// state-free and thus easier to understand
explicit DeltaFeatures(const DeltaFeaturesOptions &opts);
void Process(const MatrixBase<BaseFloat> &input_feats,
int32 frame,
VectorBase<BaseFloat> *output_frame) const;
private:
DeltaFeaturesOptions opts_;
std::vector<Vector<BaseFloat> > scales_; // a scaling window for each
// of the orders, including zero: multiply the features for each
// dimension by this window.
};
struct ShiftedDeltaFeaturesOptions {
int32 window, // The time delay and advance
num_blocks,
block_shift; // Distance between consecutive blocks
ShiftedDeltaFeaturesOptions():
window(1), num_blocks(7), block_shift(3) { }
void Register(OptionsItf *opts) {
opts->Register("delta-window", &window, "Size of delta advance and delay.");
opts->Register("num-blocks", &num_blocks, "Number of delta blocks in advance"
" of each frame to be concatenated");
opts->Register("block-shift", &block_shift, "Distance between each block");
}
};
class ShiftedDeltaFeatures {
public:
// This class provides a low-level function to compute shifted
// delta cesptra (SDC).
// The function takes as input a matrix of features and a frame index
// that it should compute the deltas on. It puts its output in an object
// of type VectorBase, of size original-feature-dimension + (1 * num_blocks).
explicit ShiftedDeltaFeatures(const ShiftedDeltaFeaturesOptions &opts);
void Process(const MatrixBase<BaseFloat> &input_feats,
int32 frame,
SubVector<BaseFloat> *output_frame) const;
private:
ShiftedDeltaFeaturesOptions opts_;
Vector<BaseFloat> scales_; // a scaling window for each
};
// ComputeDeltas is a convenience function that computes deltas on a feature
// file. If you want to deal with features coming in bit by bit you would have
// to use the DeltaFeatures class directly, and do the computation frame by
// frame. Later we will have to come up with a nice mechanism to do this for
// features coming in.
void ComputeDeltas(const DeltaFeaturesOptions &delta_opts,
const MatrixBase<BaseFloat> &input_features,
Matrix<BaseFloat> *output_features);
// ComputeShiftedDeltas computes deltas from a feature file by applying
// ShiftedDeltaFeatures over the frames. This function is provided for
// convenience, however, ShiftedDeltaFeatures can be used directly.
void ComputeShiftedDeltas(const ShiftedDeltaFeaturesOptions &delta_opts,
const MatrixBase<BaseFloat> &input_features,
Matrix<BaseFloat> *output_features);
// SpliceFrames will normally be used together with LDA.
// It splices frames together to make a window. At the
// start and end of an utterance, it duplicates the first
// and last frames.
// Will throw if input features are empty.
// left_context and right_context must be nonnegative.
// these both represent a number of frames (e.g. 4, 4 is
// a good choice).
void SpliceFrames(const MatrixBase<BaseFloat> &input_features,
int32 left_context,
int32 right_context,
Matrix<BaseFloat> *output_features);
// ReverseFrames reverses the frames in time (used for backwards decoding)
void ReverseFrames(const MatrixBase<BaseFloat> &input_features,
Matrix<BaseFloat> *output_features);
void InitIdftBases(int32 n_bases, int32 dimension, Matrix<BaseFloat> *mat_out);
// This is used for speaker-id. Also see OnlineCmnOptions in ../online2/, which
// is online CMN with no latency, for online speech recognition.
struct SlidingWindowCmnOptions {
int32 cmn_window;
int32 min_window;
int32 max_warnings;
bool normalize_variance;
bool center;
SlidingWindowCmnOptions():
cmn_window(600),
min_window(100),
max_warnings(5),
normalize_variance(false),
center(false) { }
void Register(OptionsItf *opts) {
opts->Register("cmn-window", &cmn_window, "Window in frames for running "
"average CMN computation");
opts->Register("min-cmn-window", &min_window, "Minimum CMN window "
"used at start of decoding (adds latency only at start). "
"Only applicable if center == false, ignored if center==true");
opts->Register("max-warnings", &max_warnings, "Maximum warnings to report "
"per utterance. 0 to disable, -1 to show all.");
opts->Register("norm-vars", &normalize_variance, "If true, normalize "
"variance to one."); // naming this as in apply-cmvn.cc
opts->Register("center", &center, "If true, use a window centered on the "
"current frame (to the extent possible, modulo end effects). "
"If false, window is to the left.");
}
void Check() const;
};
/// Applies sliding-window cepstral mean and/or variance normalization. See the
/// strings registering the options in the options class for information on how
/// this works and what the options are. input and output must have the same
/// dimension.
void SlidingWindowCmn(const SlidingWindowCmnOptions &opts,
const MatrixBase<BaseFloat> &input,
MatrixBase<BaseFloat> *output);
/// @} End of "addtogroup feat"
} // namespace kaldi
#endif // KALDI_FEAT_FEATURE_FUNCTIONS_H_
// feat/feature-mfcc.cc
// Copyright 2009-2011 Karel Vesely; Petr Motlicek
// 2016 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "feat/feature-mfcc.h"
namespace kaldi {
void MfccComputer::Compute(BaseFloat signal_raw_log_energy,
BaseFloat vtln_warp,
VectorBase<BaseFloat> *signal_frame,
VectorBase<BaseFloat> *feature) {
KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() &&
feature->Dim() == this->Dim());
const MelBanks &mel_banks = *(GetMelBanks(vtln_warp));
if (opts_.use_energy && !opts_.raw_energy)
signal_raw_log_energy = Log(std::max<BaseFloat>(VecVec(*signal_frame, *signal_frame),
std::numeric_limits<float>::epsilon()));
if (srfft_ != NULL) // Compute FFT using the split-radix algorithm.
srfft_->Compute(signal_frame->Data(), true);
else // An alternative algorithm that works for non-powers-of-two.
RealFft(signal_frame, true);
// Convert the FFT into a power spectrum.
ComputePowerSpectrum(signal_frame);
SubVector<BaseFloat> power_spectrum(*signal_frame, 0,
signal_frame->Dim() / 2 + 1);
mel_banks.Compute(power_spectrum, &mel_energies_);
// avoid log of zero (which should be prevented anyway by dithering).
mel_energies_.ApplyFloor(std::numeric_limits<float>::epsilon());
mel_energies_.ApplyLog(); // take the log.
feature->SetZero(); // in case there were NaNs.
// feature = dct_matrix_ * mel_energies [which now have log]
feature->AddMatVec(1.0, dct_matrix_, kNoTrans, mel_energies_, 0.0);
if (opts_.cepstral_lifter != 0.0)
feature->MulElements(lifter_coeffs_);
if (opts_.use_energy) {
if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_)
signal_raw_log_energy = log_energy_floor_;
(*feature)(0) = signal_raw_log_energy;
}
if (opts_.htk_compat) {
BaseFloat energy = (*feature)(0);
for (int32 i = 0; i < opts_.num_ceps - 1; i++)
(*feature)(i) = (*feature)(i+1);
if (!opts_.use_energy)
energy *= M_SQRT2; // scale on C0 (actually removing a scale
// we previously added that's part of one common definition of
// the cosine transform.)
(*feature)(opts_.num_ceps - 1) = energy;
}
}
MfccComputer::MfccComputer(const MfccOptions &opts):
opts_(opts), srfft_(NULL),
mel_energies_(opts.mel_opts.num_bins) {
int32 num_bins = opts.mel_opts.num_bins;
if (opts.num_ceps > num_bins)
KALDI_ERR << "num-ceps cannot be larger than num-mel-bins."
<< " It should be smaller or equal. You provided num-ceps: "
<< opts.num_ceps << " and num-mel-bins: "
<< num_bins;
Matrix<BaseFloat> dct_matrix(num_bins, num_bins);
ComputeDctMatrix(&dct_matrix);
// Note that we include zeroth dct in either case. If using the
// energy we replace this with the energy. This means a different
// ordering of features than HTK.
SubMatrix<BaseFloat> dct_rows(dct_matrix, 0, opts.num_ceps, 0, num_bins);
dct_matrix_.Resize(opts.num_ceps, num_bins);
dct_matrix_.CopyFromMat(dct_rows); // subset of rows.
if (opts.cepstral_lifter != 0.0) {
lifter_coeffs_.Resize(opts.num_ceps);
ComputeLifterCoeffs(opts.cepstral_lifter, &lifter_coeffs_);
}
if (opts.energy_floor > 0.0)
log_energy_floor_ = Log(opts.energy_floor);
int32 padded_window_size = opts.frame_opts.PaddedWindowSize();
if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two...
srfft_ = new SplitRadixRealFft<BaseFloat>(padded_window_size);
// We'll definitely need the filterbanks info for VTLN warping factor 1.0.
// [note: this call caches it.]
GetMelBanks(1.0);
}
MfccComputer::MfccComputer(const MfccComputer &other):
opts_(other.opts_), lifter_coeffs_(other.lifter_coeffs_),
dct_matrix_(other.dct_matrix_),
log_energy_floor_(other.log_energy_floor_),
mel_banks_(other.mel_banks_),
srfft_(NULL),
mel_energies_(other.mel_energies_.Dim(), kUndefined) {
for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
iter != mel_banks_.end(); ++iter)
iter->second = new MelBanks(*(iter->second));
if (other.srfft_ != NULL)
srfft_ = new SplitRadixRealFft<BaseFloat>(*(other.srfft_));
}
MfccComputer::~MfccComputer() {
for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
iter != mel_banks_.end();
++iter)
delete iter->second;
delete srfft_;
}
const MelBanks *MfccComputer::GetMelBanks(BaseFloat vtln_warp) {
MelBanks *this_mel_banks = NULL;
std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.find(vtln_warp);
if (iter == mel_banks_.end()) {
this_mel_banks = new MelBanks(opts_.mel_opts,
opts_.frame_opts,
vtln_warp);
mel_banks_[vtln_warp] = this_mel_banks;
} else {
this_mel_banks = iter->second;
}
return this_mel_banks;
}
} // namespace kaldi
// feat/feature-mfcc.h
// Copyright 2009-2011 Karel Vesely; Petr Motlicek; Saarland University
// 2014-2016 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FEAT_FEATURE_MFCC_H_
#define KALDI_FEAT_FEATURE_MFCC_H_
#include <map>
#include <string>
#include "feat/feature-common.h"
#include "feat/feature-functions.h"
#include "feat/feature-window.h"
#include "feat/mel-computations.h"
namespace kaldi {
/// @addtogroup feat FeatureExtraction
/// @{
/// MfccOptions contains basic options for computing MFCC features.
struct MfccOptions {
FrameExtractionOptions frame_opts;
MelBanksOptions mel_opts;
int32 num_ceps; // e.g. 13: num cepstral coeffs, counting zero.
bool use_energy; // use energy; else C0
BaseFloat energy_floor; // 0 by default; set to a value like 1.0 or 0.1 if
// you disable dithering.
bool raw_energy; // If true, compute energy before preemphasis and windowing
BaseFloat cepstral_lifter; // Scaling factor on cepstra for HTK compatibility.
// if 0.0, no liftering is done.
bool htk_compat; // if true, put energy/C0 last and introduce a factor of
// sqrt(2) on C0 to be the same as HTK.
MfccOptions() : mel_opts(23),
// defaults the #mel-banks to 23 for the MFCC computations.
// this seems to be common for 16khz-sampled data,
// but for 8khz-sampled data, 15 may be better.
num_ceps(13),
use_energy(true),
energy_floor(0.0),
raw_energy(true),
cepstral_lifter(22.0),
htk_compat(false) {}
void Register(OptionsItf *opts) {
frame_opts.Register(opts);
mel_opts.Register(opts);
opts->Register("num-ceps", &num_ceps,
"Number of cepstra in MFCC computation (including C0)");
opts->Register("use-energy", &use_energy,
"Use energy (not C0) in MFCC computation");
opts->Register("energy-floor", &energy_floor,
"Floor on energy (absolute, not relative) in MFCC computation. "
"Only makes a difference if --use-energy=true; only necessary if "
"--dither=0.0. Suggested values: 0.1 or 1.0");
opts->Register("raw-energy", &raw_energy,
"If true, compute energy before preemphasis and windowing");
opts->Register("cepstral-lifter", &cepstral_lifter,
"Constant that controls scaling of MFCCs");
opts->Register("htk-compat", &htk_compat,
"If true, put energy or C0 last and use a factor of sqrt(2) on "
"C0. Warning: not sufficient to get HTK compatible features "
"(need to change other parameters).");
}
};
// This is the new-style interface to the MFCC computation.
class MfccComputer {
public:
typedef MfccOptions Options;
explicit MfccComputer(const MfccOptions &opts);
MfccComputer(const MfccComputer &other);
const FrameExtractionOptions &GetFrameOptions() const {
return opts_.frame_opts;
}
int32 Dim() const { return opts_.num_ceps; }
bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }
/**
Function that computes one frame of features from
one frame of signal.
@param [in] signal_raw_log_energy The log-energy of the frame of the signal
prior to windowing and pre-emphasis, or
log(numeric_limits<float>::min()), whichever is greater. Must be
ignored by this function if this class returns false from
this->NeedsRawLogEnergy().
@param [in] vtln_warp The VTLN warping factor that the user wants
to be applied when computing features for this utterance. Will
normally be 1.0, meaning no warping is to be done. The value will
be ignored for feature types that don't support VLTN, such as
spectrogram features.
@param [in] signal_frame One frame of the signal,
as extracted using the function ExtractWindow() using the options
returned by this->GetFrameOptions(). The function will use the
vector as a workspace, which is why it's a non-const pointer.
@param [out] feature Pointer to a vector of size this->Dim(), to which
the computed feature will be written.
*/
void Compute(BaseFloat signal_raw_log_energy,
BaseFloat vtln_warp,
VectorBase<BaseFloat> *signal_frame,
VectorBase<BaseFloat> *feature);
~MfccComputer();
private:
// disallow assignment.
MfccComputer &operator = (const MfccComputer &in);
protected:
const MelBanks *GetMelBanks(BaseFloat vtln_warp);
MfccOptions opts_;
Vector<BaseFloat> lifter_coeffs_;
Matrix<BaseFloat> dct_matrix_; // matrix we left-multiply by to perform DCT.
BaseFloat log_energy_floor_;
std::map<BaseFloat, MelBanks*> mel_banks_; // BaseFloat is VTLN coefficient.
SplitRadixRealFft<BaseFloat> *srfft_;
// note: mel_energies_ is specific to the frame we're processing, it's
// just a temporary workspace.
Vector<BaseFloat> mel_energies_;
};
typedef OfflineFeatureTpl<MfccComputer> Mfcc;
/// @} End of "addtogroup feat"
} // namespace kaldi
#endif // KALDI_FEAT_FEATURE_MFCC_H_
// feat/feature-plp.cc
// Copyright 2009-2011 Petr Motlicek; Karel Vesely
// 2016 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "feat/feature-plp.h"
namespace kaldi {
PlpComputer::PlpComputer(const PlpOptions &opts):
opts_(opts), srfft_(NULL),
mel_energies_duplicated_(opts_.mel_opts.num_bins + 2, kUndefined),
autocorr_coeffs_(opts_.lpc_order + 1, kUndefined),
lpc_coeffs_(opts_.lpc_order, kUndefined),
raw_cepstrum_(opts_.lpc_order, kUndefined) {
if (opts.cepstral_lifter != 0.0) {
lifter_coeffs_.Resize(opts.num_ceps);
ComputeLifterCoeffs(opts.cepstral_lifter, &lifter_coeffs_);
}
InitIdftBases(opts_.lpc_order + 1, opts_.mel_opts.num_bins + 2,
&idft_bases_);
if (opts.energy_floor > 0.0)
log_energy_floor_ = Log(opts.energy_floor);
int32 padded_window_size = opts.frame_opts.PaddedWindowSize();
if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two...
srfft_ = new SplitRadixRealFft<BaseFloat>(padded_window_size);
// We'll definitely need the filterbanks info for VTLN warping factor 1.0.
// [note: this call caches it.]
GetMelBanks(1.0);
}
PlpComputer::PlpComputer(const PlpComputer &other):
opts_(other.opts_), lifter_coeffs_(other.lifter_coeffs_),
idft_bases_(other.idft_bases_), log_energy_floor_(other.log_energy_floor_),
mel_banks_(other.mel_banks_), equal_loudness_(other.equal_loudness_),
srfft_(NULL),
mel_energies_duplicated_(opts_.mel_opts.num_bins + 2, kUndefined),
autocorr_coeffs_(opts_.lpc_order + 1, kUndefined),
lpc_coeffs_(opts_.lpc_order, kUndefined),
raw_cepstrum_(opts_.lpc_order, kUndefined) {
for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
iter != mel_banks_.end(); ++iter)
iter->second = new MelBanks(*(iter->second));
for (std::map<BaseFloat, Vector<BaseFloat>*>::iterator
iter = equal_loudness_.begin();
iter != equal_loudness_.end(); ++iter)
iter->second = new Vector<BaseFloat>(*(iter->second));
if (other.srfft_ != NULL)
srfft_ = new SplitRadixRealFft<BaseFloat>(*(other.srfft_));
}
PlpComputer::~PlpComputer() {
for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
iter != mel_banks_.end(); ++iter)
delete iter->second;
for (std::map<BaseFloat, Vector<BaseFloat>* >::iterator
iter = equal_loudness_.begin();
iter != equal_loudness_.end(); ++iter)
delete iter->second;
delete srfft_;
}
const MelBanks *PlpComputer::GetMelBanks(BaseFloat vtln_warp) {
MelBanks *this_mel_banks = NULL;
std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.find(vtln_warp);
if (iter == mel_banks_.end()) {
this_mel_banks = new MelBanks(opts_.mel_opts,
opts_.frame_opts,
vtln_warp);
mel_banks_[vtln_warp] = this_mel_banks;
} else {
this_mel_banks = iter->second;
}
return this_mel_banks;
}
const Vector<BaseFloat> *PlpComputer::GetEqualLoudness(BaseFloat vtln_warp) {
const MelBanks *this_mel_banks = GetMelBanks(vtln_warp);
Vector<BaseFloat> *ans = NULL;
std::map<BaseFloat, Vector<BaseFloat>*>::iterator iter
= equal_loudness_.find(vtln_warp);
if (iter == equal_loudness_.end()) {
ans = new Vector<BaseFloat>;
GetEqualLoudnessVector(*this_mel_banks, ans);
equal_loudness_[vtln_warp] = ans;
} else {
ans = iter->second;
}
return ans;
}
void PlpComputer::Compute(BaseFloat signal_raw_log_energy,
BaseFloat vtln_warp,
VectorBase<BaseFloat> *signal_frame,
VectorBase<BaseFloat> *feature) {
KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() &&
feature->Dim() == this->Dim());
const MelBanks &mel_banks = *GetMelBanks(vtln_warp);
const Vector<BaseFloat> &equal_loudness = *GetEqualLoudness(vtln_warp);
KALDI_ASSERT(opts_.num_ceps <= opts_.lpc_order+1); // our num-ceps includes C0.
if (opts_.use_energy && !opts_.raw_energy)
signal_raw_log_energy = Log(std::max<BaseFloat>(VecVec(*signal_frame, *signal_frame),
std::numeric_limits<float>::min()));
if (srfft_ != NULL) // Compute FFT using split-radix algorithm.
srfft_->Compute(signal_frame->Data(), true);
else // An alternative algorithm that works for non-powers-of-two.
RealFft(signal_frame, true);
// Convert the FFT into a power spectrum.
ComputePowerSpectrum(signal_frame); // elements 0 ... signal_frame->Dim()/2
SubVector<BaseFloat> power_spectrum(*signal_frame,
0, signal_frame->Dim() / 2 + 1);
int32 num_mel_bins = opts_.mel_opts.num_bins;
SubVector<BaseFloat> mel_energies(mel_energies_duplicated_, 1, num_mel_bins);
mel_banks.Compute(power_spectrum, &mel_energies);
mel_energies.MulElements(equal_loudness);
mel_energies.ApplyPow(opts_.compress_factor);
// duplicate first and last elements
mel_energies_duplicated_(0) = mel_energies_duplicated_(1);
mel_energies_duplicated_(num_mel_bins + 1) =
mel_energies_duplicated_(num_mel_bins);
autocorr_coeffs_.SetZero(); // In case of NaNs or infs
autocorr_coeffs_.AddMatVec(1.0, idft_bases_, kNoTrans,
mel_energies_duplicated_, 0.0);
BaseFloat residual_log_energy = ComputeLpc(autocorr_coeffs_, &lpc_coeffs_);
residual_log_energy = std::max<BaseFloat>(residual_log_energy,
std::numeric_limits<float>::min());
Lpc2Cepstrum(opts_.lpc_order, lpc_coeffs_.Data(), raw_cepstrum_.Data());
feature->Range(1, opts_.num_ceps - 1).CopyFromVec(
raw_cepstrum_.Range(0, opts_.num_ceps - 1));
(*feature)(0) = residual_log_energy;
if (opts_.cepstral_lifter != 0.0)
feature->MulElements(lifter_coeffs_);
if (opts_.cepstral_scale != 1.0)
feature->Scale(opts_.cepstral_scale);
if (opts_.use_energy) {
if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_)
signal_raw_log_energy = log_energy_floor_;
(*feature)(0) = signal_raw_log_energy;
}
if (opts_.htk_compat) { // reorder the features.
BaseFloat log_energy = (*feature)(0);
for (int32 i = 0; i < opts_.num_ceps-1; i++)
(*feature)(i) = (*feature)(i+1);
(*feature)(opts_.num_ceps-1) = log_energy;
}
}
} // namespace kaldi
// feat/feature-plp.h
// Copyright 2009-2011 Petr Motlicek; Karel Vesely
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FEAT_FEATURE_PLP_H_
#define KALDI_FEAT_FEATURE_PLP_H_
#include <map>
#include <string>
#include "feat/feature-common.h"
#include "feat/feature-functions.h"
#include "feat/feature-window.h"
#include "feat/mel-computations.h"
#include "itf/options-itf.h"
namespace kaldi {
/// @addtogroup feat FeatureExtraction
/// @{
/// PlpOptions contains basic options for computing PLP features.
/// It only includes things that can be done in a "stateless" way, i.e.
/// it does not include energy max-normalization.
/// It does not include delta computation.
struct PlpOptions {
FrameExtractionOptions frame_opts;
MelBanksOptions mel_opts;
int32 lpc_order;
int32 num_ceps; // num cepstra including zero
bool use_energy; // use energy; else C0
BaseFloat energy_floor;
bool raw_energy; // If true, compute energy before preemphasis and windowing
BaseFloat compress_factor;
int32 cepstral_lifter;
BaseFloat cepstral_scale;
bool htk_compat; // if true, put energy/C0 last and introduce a factor of
// sqrt(2) on C0 to be the same as HTK.
PlpOptions() : mel_opts(23),
// default number of mel-banks for the PLP computation; this
// seems to be common for 16kHz-sampled data. For 8kHz-sampled
// data, 15 may be better.
lpc_order(12),
num_ceps(13),
use_energy(true),
energy_floor(0.0),
raw_energy(true),
compress_factor(0.33333),
cepstral_lifter(22),
cepstral_scale(1.0),
htk_compat(false) {}
void Register(OptionsItf *opts) {
frame_opts.Register(opts);
mel_opts.Register(opts);
opts->Register("lpc-order", &lpc_order,
"Order of LPC analysis in PLP computation");
opts->Register("num-ceps", &num_ceps,
"Number of cepstra in PLP computation (including C0)");
opts->Register("use-energy", &use_energy,
"Use energy (not C0) for zeroth PLP feature");
opts->Register("energy-floor", &energy_floor,
"Floor on energy (absolute, not relative) in PLP computation. "
"Only makes a difference if --use-energy=true; only necessary if "
"--dither=0.0. Suggested values: 0.1 or 1.0");
opts->Register("raw-energy", &raw_energy,
"If true, compute energy before preemphasis and windowing");
opts->Register("compress-factor", &compress_factor,
"Compression factor in PLP computation");
opts->Register("cepstral-lifter", &cepstral_lifter,
"Constant that controls scaling of PLPs");
opts->Register("cepstral-scale", &cepstral_scale,
"Scaling constant in PLP computation");
opts->Register("htk-compat", &htk_compat,
"If true, put energy or C0 last. Warning: not sufficient "
"to get HTK compatible features (need to change other "
"parameters).");
}
};
/// This is the new-style interface to the PLP computation.
class PlpComputer {
public:
typedef PlpOptions Options;
explicit PlpComputer(const PlpOptions &opts);
PlpComputer(const PlpComputer &other);
const FrameExtractionOptions &GetFrameOptions() const {
return opts_.frame_opts;
}
int32 Dim() const { return opts_.num_ceps; }
bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }
/**
Function that computes one frame of features from
one frame of signal.
@param [in] signal_raw_log_energy The log-energy of the frame of the signal
prior to windowing and pre-emphasis, or
log(numeric_limits<float>::min()), whichever is greater. Must be
ignored by this function if this class returns false from
this->NeedsRawLogEnergy().
@param [in] vtln_warp The VTLN warping factor that the user wants
to be applied when computing features for this utterance. Will
normally be 1.0, meaning no warping is to be done. The value will
be ignored for feature types that don't support VLTN, such as
spectrogram features.
@param [in] signal_frame One frame of the signal,
as extracted using the function ExtractWindow() using the options
returned by this->GetFrameOptions(). The function will use the
vector as a workspace, which is why it's a non-const pointer.
@param [out] feature Pointer to a vector of size this->Dim(), to which
the computed feature will be written.
*/
void Compute(BaseFloat signal_raw_log_energy,
BaseFloat vtln_warp,
VectorBase<BaseFloat> *signal_frame,
VectorBase<BaseFloat> *feature);
~PlpComputer();
private:
const MelBanks *GetMelBanks(BaseFloat vtln_warp);
const Vector<BaseFloat> *GetEqualLoudness(BaseFloat vtln_warp);
PlpOptions opts_;
Vector<BaseFloat> lifter_coeffs_;
Matrix<BaseFloat> idft_bases_;
BaseFloat log_energy_floor_;
std::map<BaseFloat, MelBanks*> mel_banks_; // BaseFloat is VTLN coefficient.
std::map<BaseFloat, Vector<BaseFloat>* > equal_loudness_;
SplitRadixRealFft<BaseFloat> *srfft_;
// temporary vector used inside Compute; size is opts_.mel_opts.num_bins + 2
Vector<BaseFloat> mel_energies_duplicated_;
// temporary vector used inside Compute; size is opts_.lpc_order + 1
Vector<BaseFloat> autocorr_coeffs_;
// temporary vector used inside Compute; size is opts_.lpc_order
Vector<BaseFloat> lpc_coeffs_;
// temporary vector used inside Compute; size is opts_.lpc_order
Vector<BaseFloat> raw_cepstrum_;
// Disallow assignment.
PlpComputer &operator =(const PlpComputer &other);
};
typedef OfflineFeatureTpl<PlpComputer> Plp;
/// @} End of "addtogroup feat"
} // namespace kaldi
#endif // KALDI_FEAT_FEATURE_PLP_H_
// feat/feature-spectrogram.cc
// Copyright 2009-2012 Karel Vesely
// Copyright 2012 Navdeep Jaitly
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "feat/feature-spectrogram.h"
namespace kaldi {
SpectrogramComputer::SpectrogramComputer(const SpectrogramOptions &opts)
: opts_(opts), srfft_(NULL) {
if (opts.energy_floor > 0.0)
log_energy_floor_ = Log(opts.energy_floor);
int32 padded_window_size = opts.frame_opts.PaddedWindowSize();
if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two
srfft_ = new SplitRadixRealFft<BaseFloat>(padded_window_size);
}
SpectrogramComputer::SpectrogramComputer(const SpectrogramComputer &other):
opts_(other.opts_), log_energy_floor_(other.log_energy_floor_), srfft_(NULL) {
if (other.srfft_ != NULL)
srfft_ = new SplitRadixRealFft<BaseFloat>(*other.srfft_);
}
SpectrogramComputer::~SpectrogramComputer() {
delete srfft_;
}
void SpectrogramComputer::Compute(BaseFloat signal_raw_log_energy,
BaseFloat vtln_warp,
VectorBase<BaseFloat> *signal_frame,
VectorBase<BaseFloat> *feature) {
KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() &&
feature->Dim() == this->Dim());
// Compute energy after window function (not the raw one)
if (!opts_.raw_energy)
signal_raw_log_energy = Log(std::max<BaseFloat>(VecVec(*signal_frame, *signal_frame),
std::numeric_limits<float>::epsilon()));
if (srfft_ != NULL) // Compute FFT using split-radix algorithm.
srfft_->Compute(signal_frame->Data(), true);
else // An alternative algorithm that works for non-powers-of-two
RealFft(signal_frame, true);
// Convert the FFT into a power spectrum.
ComputePowerSpectrum(signal_frame);
SubVector<BaseFloat> power_spectrum(*signal_frame,
0, signal_frame->Dim() / 2 + 1);
power_spectrum.ApplyFloor(std::numeric_limits<float>::epsilon());
power_spectrum.ApplyLog();
feature->CopyFromVec(power_spectrum);
if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_)
signal_raw_log_energy = log_energy_floor_;
// The zeroth spectrogram component is always set to the signal energy,
// instead of the square of the constant component of the signal.
(*feature)(0) = signal_raw_log_energy;
}
} // namespace kaldi
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
add_library(kaldi-util
kaldi-holder.cc
kaldi-io.cc
kaldi-semaphore.cc
kaldi-table.cc
kaldi-thread.cc
parse-options.cc
simple-io-funcs.cc
simple-options.cc
text-utils.cc
)
target_link_libraries(kaldi-util PUBLIC kaldi-base kaldi-matrix)
\ No newline at end of file
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册