add tlg decoder

ad8ec177 · Yang Zhou · b5315657 · ad8ec177 · ad8ec177 · ad8ec177
41 changed file
--- a/speechx/examples/decoder/CMakeLists.txt
+++ b/speechx/examples/decoder/CMakeLists.txt
@@ -3,3 +3,7 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
 add_executable(offline_decoder_main ${CMAKE_CURRENT_SOURCE_DIR}/offline_decoder_main.cc)
 target_include_directories(offline_decoder_main PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
 target_link_libraries(offline_decoder_main PUBLIC nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util ${DEPS})
+add_executable(offline_wfst_decoder_main ${CMAKE_CURRENT_SOURCE_DIR}/offline_wfst_decoder_main.cc)
+target_include_directories(offline_wfst_decoder_main PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
+target_link_libraries(offline_wfst_decoder_main PUBLIC nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util kaldi-decoder ${DEPS})
--- a/speechx/examples/decoder/offline_wfst_decoder_main.cc
+++ b/speechx/examples/decoder/offline_wfst_decoder_main.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// todo refactor, repalce with gtest
+#include "base/flags.h"
+#include "base/log.h"
+#include "decoder/ctc_tlg_decoder.h"
+#include "frontend/raw_audio.h"
+#include "kaldi/util/table-types.h"
+#include "nnet/decodable.h"
+#include "nnet/paddle_nnet.h"
+DEFINE_string(feature_respecifier, "", "test feature rspecifier");
+DEFINE_string(model_path, "avg_1.jit.pdmodel", "paddle nnet model");
+DEFINE_string(param_path, "avg_1.jit.pdiparams", "paddle nnet model param");
+DEFINE_string(word_symbol_table, "vocab.txt", "word symbol table");
+DEFINE_string(graph_path, "TLG", "decoder graph");
+using kaldi::BaseFloat;
+using kaldi::Matrix;
+using std::vector;
+int main(int argc, char* argv[]) {
+    gflags::ParseCommandLineFlags(&argc, &argv, false);
+    google::InitGoogleLogging(argv[0]);
+    kaldi::SequentialBaseFloatMatrixReader feature_reader(
+        FLAGS_feature_respecifier);
+    std::string model_graph = FLAGS_model_path;
+    std::string model_params = FLAGS_param_path;
+    std::string word_symbol_table = FLAGS_word_symbol_table;
+    std::string graph_path = FLAGS_graph_path;
+    int32 num_done = 0, num_err = 0;
+    ppspeech::TLGDecoderOptions opts;
+    opts.word_symbol_table = word_symbol_table;
+    opts.fst_path = graph_path;
+    ppspeech::TLGDecoder decoder(opts);
+    ppspeech::ModelOptions model_opts;
+    model_opts.model_path = model_graph;
+    model_opts.params_path = model_params;
+    std::shared_ptr<ppspeech::PaddleNnet> nnet(
+        new ppspeech::PaddleNnet(model_opts));
+    std::shared_ptr<ppspeech::RawDataCache> raw_data(
+        new ppspeech::RawDataCache());
+    std::shared_ptr<ppspeech::Decodable> decodable(
+        new ppspeech::Decodable(nnet, raw_data));
+    int32 chunk_size = 35;
+    decoder.InitDecoder();
+    for (; !feature_reader.Done(); feature_reader.Next()) {
+        string utt = feature_reader.Key();
+        const kaldi::Matrix<BaseFloat> feature = feature_reader.Value();
+        raw_data->SetDim(feature.NumCols());
+        int32 row_idx = 0;
+        int32 num_chunks = feature.NumRows() / chunk_size;
+        for (int chunk_idx = 0; chunk_idx < num_chunks; ++chunk_idx) {
+            kaldi::Vector<kaldi::BaseFloat> feature_chunk(chunk_size *
+                                                          feature.NumCols());
+            for (int row_id = 0; row_id < chunk_size; ++row_id) {
+                kaldi::SubVector<kaldi::BaseFloat> tmp(feature, row_idx);
+                kaldi::SubVector<kaldi::BaseFloat> f_chunk_tmp(
+                    feature_chunk.Data() + row_id * feature.NumCols(),
+                    feature.NumCols());
+                f_chunk_tmp.CopyFromVec(tmp);
+                row_idx++;
+            }
+            raw_data->Accept(feature_chunk);
+            if (chunk_idx == num_chunks - 1) {
+                raw_data->SetFinished();
+            }
+            decoder.AdvanceDecode(decodable);
+        }
+        std::string result;
+        result = decoder.GetFinalBestPath();
+        KALDI_LOG << " the result of " << utt << " is " << result;
+        decodable->Reset();
+        decoder.Reset();
+        ++num_done;
+    }
+    KALDI_LOG << "Done " << num_done << " utterances, " << num_err
+              << " with errors.";
+    return (num_done != 0 ? 0 : 1);
+}
--- a/speechx/speechx/decoder/CMakeLists.txt
+++ b/speechx/speechx/decoder/CMakeLists.txt
@@ -6,5 +6,6 @@ add_library(decoder STATIC
  ctc_decoders/decoder_utils.cpp
  ctc_decoders/path_trie.cpp
  ctc_decoders/scorer.cpp
+  ctc_tlg_decoder.cc
 )
 target_link_libraries(decoder PUBLIC kenlm utils fst)
\ No newline at end of file
--- a/speechx/speechx/decoder/ctc_beam_search_decoder.h
+++ b/speechx/speechx/decoder/ctc_beam_search_decoder.h
@@ -15,7 +15,7 @@
 #include "base/common.h"
 #include "decoder/ctc_decoders/path_trie.h"
 #include "decoder/ctc_decoders/scorer.h"
-#include "nnet/decodable-itf.h"
+#include "kaldi/decoder/decodable-itf.h"
 #include "util/parse-options.h"
 #pragma once

--- a/speechx/speechx/decoder/ctc_tlg_decoder.cc
+++ b/speechx/speechx/decoder/ctc_tlg_decoder.cc
+#include "decoder/ctc_tlg_decoder.h"
+namespace ppspeech {
+TLGDecoder::TLGDecoder(TLGDecoderOptions opts) {
+    fst_.reset(fst::Fst<fst::StdArc>::Read(opts.fst_path));
+    CHECK(fst_ != nullptr);
+    word_symbol_table_.reset(fst::SymbolTable::ReadText(opts.word_symbol_table));
+    decoder_.reset(new kaldi::LatticeFasterOnlineDecoder(*fst_, opts.opts));
+    decoder_->InitDecoding();
+}
+void TLGDecoder::InitDecoder() {
+    decoder_->InitDecoding();
+}
+void TLGDecoder::AdvanceDecode(const std::shared_ptr<kaldi::DecodableInterface>& decodable) {
+    while (1) {
+      AdvanceDecoding(decodable.get());
+      if (decodable->IsLastFrame(num_frame_decoded_)) break;
+    }
+}
+void TLGDecoder::AdvanceDecoding(kaldi::DecodableInterface* decodable) {
+  // skip blank frame?
+  decoder_->AdvanceDecoding(decodable, 1);
+  num_frame_decoded_++;
+}
+void TLGDecoder::Reset() {
+  decoder_->InitDecoding();
+  return;
+}
+std::string TLGDecoder::GetFinalBestPath() {
+  decoder_->FinalizeDecoding();
+  kaldi::Lattice lat;
+  kaldi::LatticeWeight weight;
+  std::vector<int> alignment;
+  std::vector<int> words_id;
+  decoder_->GetBestPath(&lat, true);
+  fst::GetLinearSymbolSequence(lat, &alignment, &words_id, &weight);
+  std::string words;
+  for (int32 idx = 0; idx < words_id.size(); ++idx) {
+    std::string word = word_symbol_table_->Find(words_id[idx]);
+    words += word;
+  }
+  return words;
+}
+}
\ No newline at end of file
--- a/speechx/speechx/decoder/ctc_tlg_decoder.h
+++ b/speechx/speechx/decoder/ctc_tlg_decoder.h
+#pragma once
+#include "kaldi/decoder/lattice-faster-online-decoder.h"
+#include "kaldi/decoder/decodable-itf.h"
+#include "util/parse-options.h"
+#include "base/basic_types.h"
+namespace ppspeech {
+struct TLGDecoderOptions {
+   kaldi::LatticeFasterDecoderConfig opts; 
+   // todo remove later, add into decode resource 
+   std::string word_symbol_table;
+   std::string fst_path;
+   TLGDecoderOptions() 
+       : word_symbol_table(""),
+         fst_path("") {}
+};
+class TLGDecoder {
+  public:
+    explicit TLGDecoder(TLGDecoderOptions opts);
+    void InitDecoder();
+    void Decode();
+    std::string GetBestPath();
+    std::vector<std::pair<double, std::string>> GetNBestPath();
+    std::string GetFinalBestPath();
+    int NumFrameDecoded();
+    int DecodeLikelihoods(const std::vector<std::vector<BaseFloat>>& probs,
+                          std::vector<std::string>& nbest_words);
+    void AdvanceDecode(
+        const std::shared_ptr<kaldi::DecodableInterface>& decodable);
+    void Reset();
+  private:
+    void AdvanceDecoding(kaldi::DecodableInterface* decodable);    
+    std::shared_ptr<kaldi::LatticeFasterOnlineDecoder> decoder_;
+    std::shared_ptr<fst::Fst<fst::StdArc>> fst_;   
+    std::shared_ptr<fst::SymbolTable> word_symbol_table_;
+    int32 num_frame_decoded_;
+  };
+}  // namespace ppspeech
\ No newline at end of file
--- a/speechx/speechx/kaldi/CMakeLists.txt
+++ b/speechx/speechx/kaldi/CMakeLists.txt
@@ -4,3 +4,6 @@ add_subdirectory(base)
 add_subdirectory(util)
 add_subdirectory(feat)
 add_subdirectory(matrix)
+add_subdirectory(lat)
+add_subdirectory(fstext)
+add_subdirectory(decoder)
--- a/speechx/speechx/kaldi/decoder/CMakeLists.txt
+++ b/speechx/speechx/kaldi/decoder/CMakeLists.txt
+add_library(kaldi-decoder
+lattice-faster-decoder.cc
+lattice-faster-online-decoder.cc
+)
+target_link_libraries(kaldi-decoder PUBLIC kaldi-lat)
--- a/speechx/speechx/nnet/decodable-itf.h
+++ b/speechx/speechx/nnet/decodable-itf.h
@@ -121,7 +121,7 @@ class DecodableInterface {
    /// decoding-from-matrix setting where we want to allow the last delta or
    /// LDA
    /// features to be flushed out for compatibility with the baseline setup.
-    virtual bool IsLastFrame(int32 frame) const = 0;
+    virtual bool IsLastFrame(int32 frame) = 0;
    /// The call NumFramesReady() will return the number of frames currently
    /// available

--- a/speechx/speechx/kaldi/decoder/lattice-faster-decoder.cc
+++ b/speechx/speechx/kaldi/decoder/lattice-faster-decoder.cc
@@ -1007,14 +1007,10 @@ template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc>, decoder::StdToken>
 template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, decoder::StdToken >;
 template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, decoder::StdToken >;
-template class LatticeFasterDecoderTpl<fst::ConstGrammarFst, decoder::StdToken>;
-template class LatticeFasterDecoderTpl<fst::VectorGrammarFst, decoder::StdToken>;
 template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc> , decoder::BackpointerToken>;
 template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, decoder::BackpointerToken >;
 template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, decoder::BackpointerToken >;
-template class LatticeFasterDecoderTpl<fst::ConstGrammarFst, decoder::BackpointerToken>;
-template class LatticeFasterDecoderTpl<fst::VectorGrammarFst, decoder::BackpointerToken>;
 } // end namespace kaldi.
--- a/speechx/speechx/kaldi/decoder/lattice-faster-decoder.h
+++ b/speechx/speechx/kaldi/decoder/lattice-faster-decoder.h
@@ -23,11 +23,10 @@
 #ifndef KALDI_DECODER_LATTICE_FASTER_DECODER_H_
 #define KALDI_DECODER_LATTICE_FASTER_DECODER_H_
-#include "decoder/grammar-fst.h"
 #include "fst/fstlib.h"
 #include "fst/memory.h"
 #include "fstext/fstext-lib.h"
-#include "itf/decodable-itf.h"
+#include "decoder/decodable-itf.h"
 #include "lat/determinize-lattice-pruned.h"
 #include "lat/kaldi-lattice.h"
 #include "util/hash-list.h"

--- a/speechx/speechx/kaldi/decoder/lattice-faster-online-decoder.cc
+++ b/speechx/speechx/kaldi/decoder/lattice-faster-online-decoder.cc
@@ -278,8 +278,8 @@ bool LatticeFasterOnlineDecoderTpl<FST>::GetRawLatticePruned(
 template class LatticeFasterOnlineDecoderTpl<fst::Fst<fst::StdArc> >;
 template class LatticeFasterOnlineDecoderTpl<fst::VectorFst<fst::StdArc> >;
 template class LatticeFasterOnlineDecoderTpl<fst::ConstFst<fst::StdArc> >;
-template class LatticeFasterOnlineDecoderTpl<fst::ConstGrammarFst >;
+//template class LatticeFasterOnlineDecoderTpl<fst::ConstGrammarFst >;
-template class LatticeFasterOnlineDecoderTpl<fst::VectorGrammarFst >;
+//template class LatticeFasterOnlineDecoderTpl<fst::VectorGrammarFst >;
 } // end namespace kaldi.
--- a/speechx/speechx/kaldi/decoder/lattice-faster-online-decoder.h
+++ b/speechx/speechx/kaldi/decoder/lattice-faster-online-decoder.h
@@ -30,7 +30,7 @@
 #include "util/stl-utils.h"
 #include "util/hash-list.h"
 #include "fst/fstlib.h"
-#include "itf/decodable-itf.h"
+#include "decoder/decodable-itf.h"
 #include "fstext/fstext-lib.h"
 #include "lat/determinize-lattice-pruned.h"
 #include "lat/kaldi-lattice.h"

--- a/speechx/speechx/kaldi/fstext/CMakeLists.txt
+++ b/speechx/speechx/kaldi/fstext/CMakeLists.txt
+add_library(kaldi-fstext
+kaldi-fst-io.cc
+)
+target_link_libraries(kaldi-fstext PUBLIC kaldi-util)
--- a/speechx/speechx/kaldi/fstext/determinize-lattice-inl.h
+++ b/speechx/speechx/kaldi/fstext/determinize-lattice-inl.h
--- a/speechx/speechx/kaldi/fstext/determinize-lattice.h
+++ b/speechx/speechx/kaldi/fstext/determinize-lattice.h
+// fstext/determinize-lattice.h
+// Copyright 2009-2011  Microsoft Corporation
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+#ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
+#define KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
+#include <fst/fst-decl.h>
+#include <fst/fstlib.h>
+#include <algorithm>
+#include <map>
+#include <set>
+#include <vector>
+#include "fstext/lattice-weight.h"
+namespace fst {
+/// \addtogroup fst_extensions
+///  @{
+// For example of usage, see test-determinize-lattice.cc
+/*
+   DeterminizeLattice implements a special form of determinization
+   with epsilon removal, optimized for a phase of lattice generation.
+   Its input is an FST with weight-type BaseWeightType (usually a pair of
+   floats, with a lexicographical type of order, such as
+   LatticeWeightTpl<float>). Typically this would be a state-level lattice, with
+   input symbols equal to words, and output-symbols equal to p.d.f's (so like
+   the inverse of HCLG).  Imagine representing this as an acceptor of type
+   CompactLatticeWeightTpl<float>, in which the input/output symbols are words,
+   and the weights contain the original weights together with strings (with zero
+   or one symbol in them) containing the original output labels (the p.d.f.'s).
+   We determinize this using acceptor determinization with epsilon removal.
+   Remember (from lattice-weight.h) that CompactLatticeWeightTpl has a special
+   kind of semiring where we always take the string corresponding to the best
+   cost (of type BaseWeightType), and discard the other.  This corresponds to
+   taking the best output-label sequence (of p.d.f.'s) for each input-label
+   sequence (of words).  We couldn't use the Gallic weight for this, or it would
+   die as soon as it detected that the input FST was non-functional.  In our
+   case, any acyclic FST (and many cyclic ones) can be determinized. We assume
+   that there is a function Compare(const BaseWeightType &a, const
+   BaseWeightType &b) that returns (-1, 0, 1) according to whether (a < b, a ==
+   b, a > b) in the total order on the BaseWeightType... this information should
+   be the same as NaturalLess would give, but it's more efficient to do it this
+   way. You can define this for things like TropicalWeight if you need to
+   instantiate this class for that weight type.
+   We implement this determinization in a special way to make it efficient for
+   the types of FSTs that we will apply it to.  One issue is that if we
+   explicitly represent the strings (in CompactLatticeWeightTpl) as vectors of
+   type vector<IntType>, the algorithm takes time quadratic in the length of
+   words (in states), because propagating each arc involves copying a whole
+   vector (of integers representing p.d.f.'s).  Instead we use a hash structure
+   where each string is a pointer (Entry*), and uses a hash from (Entry*,
+   IntType), to the successor string (and a way to get the latest IntType and
+   the ancestor Entry*).  [this is the class LatticeStringRepository].
+   Another issue is that rather than representing a determinized-state as a
+   collection of (state, weight), we represent it in a couple of reduced forms.
+   Suppose a determinized-state is a collection of (state, weight) pairs; call
+   this the "canonical representation".  Note: these collections are always
+   normalized to remove any common weight and string part.  Define end-states as
+   the subset of states that have an arc out of them with a label on, or are
+   final.  If we represent a determinized-state a the set of just its
+   (end-state, weight) pairs, this will be a valid and more compact
+   representation, and will lead to a smaller set of determinized states (like
+   early minimization).  Call this collection of (end-state, weight) pairs the
+   "minimal representation".  As a mechanism to reduce compute, we can also
+   consider another representation. In the determinization algorithm, we start
+   off with a set of (begin-state, weight) pairs (where the "begin-states" are
+   initial or have a label on the transition into them), and the "canonical
+   representation" consists of the epsilon-closure of this set (i.e. follow
+   epsilons).  Call this set of (begin-state, weight) pairs, appropriately
+   normalized, the "initial representation".  If two initial representations are
+   the same, the "canonical representation" and hence the "minimal
+   representation" will be the same.  We can use this to reduce compute.  Note
+   that if two initial representations are different, this does not preclude the
+   other representations from being the same.
+*/
+struct DeterminizeLatticeOptions {
+  float delta;  // A small offset used to measure equality of weights.
+  int max_mem;  // If >0, determinization will fail and return false
+  // when the algorithm's (approximate) memory consumption crosses this
+  // threshold.
+  int max_loop;  // If >0, can be used to detect non-determinizable input
+  // (a case that wouldn't be caught by max_mem).
+  DeterminizeLatticeOptions() : delta(kDelta), max_mem(-1), max_loop(-1) {}
+};
+/**
+    This function implements the normal version of DeterminizeLattice, in which
+    the output strings are represented using sequences of arcs, where all but
+    the first one has an epsilon on the input side.  The debug_ptr argument is
+    an optional pointer to a bool that, if it becomes true while the algorithm
+    is executing, the algorithm will print a traceback and terminate (used in
+    fstdeterminizestar.cc debug non-terminating determinization).  More
+    efficient if ifst is arc-sorted on input label.  If the number of arcs gets
+    more than max_states, it will throw std::runtime_error (otherwise this code
+    does not use exceptions).  This is mainly useful for debug.  */
+template <class Weight, class IntType>
+bool DeterminizeLattice(
+    const Fst<ArcTpl<Weight> > &ifst, MutableFst<ArcTpl<Weight> > *ofst,
+    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
+    bool *debug_ptr = NULL);
+/*  This is a version of DeterminizeLattice with a slightly more "natural"
+   output format, where the output sequences are encoded using the
+   CompactLatticeArcTpl template (i.e. the sequences of output symbols are
+   represented directly as strings) More efficient if ifst is arc-sorted on
+   input label. If the #arcs gets more than max_arcs, it will throw
+   std::runtime_error (otherwise this code does not use exceptions).  This is
+   mainly useful for debug.
+*/
+template <class Weight, class IntType>
+bool DeterminizeLattice(
+    const Fst<ArcTpl<Weight> > &ifst,
+    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
+    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
+    bool *debug_ptr = NULL);
+/// @} end "addtogroup fst_extensions"
+}  // end namespace fst
+#include "fstext/determinize-lattice-inl.h"
+#endif  // KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
--- a/speechx/speechx/kaldi/fstext/determinize-star-inl.h
+++ b/speechx/speechx/kaldi/fstext/determinize-star-inl.h
--- a/speechx/speechx/kaldi/fstext/determinize-star.h
+++ b/speechx/speechx/kaldi/fstext/determinize-star.h
+// fstext/determinize-star.h
+// Copyright 2009-2011  Microsoft Corporation
+//                2014  Guoguo Chen
+//                2015  Hainan Xu
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_H_
+#define KALDI_FSTEXT_DETERMINIZE_STAR_H_
+#include <fst/fst-decl.h>
+#include <fst/fstlib.h>
+#include <algorithm>
+#include <map>
+#include <set>
+#include <stdexcept>  // this algorithm uses exceptions
+#include <vector>
+namespace fst {
+/// \addtogroup fst_extensions
+///  @{
+// For example of usage, see test-determinize-star.cc
+/*
+   DeterminizeStar implements determinization with epsilon removal, which we
+   distinguish with a star.
+   We define a determinized* FST as one in which no state has more than one
+   transition with the same input-label.  Epsilon input labels are not allowed
+   except starting from states that have exactly one arc exiting them (and are
+   not final).  [In the normal definition of determinized, epsilon-input labels
+   are not allowed at all, whereas in Mohri's definition, epsilons are treated
+   as ordinary symbols].  The determinized* definition is intended to simulate
+   the effect of allowing strings of output symbols at each state.
+   The algorithm implemented here takes an Fst<Arc>, and a pointer to a
+   MutableFst<Arc> where it puts its output.  The weight type is assumed to be a
+   float-weight.  It does epsilon removal and determinization.
+   This algorithm may fail if the input has epsilon cycles under
+   certain circumstances (i.e. the semiring is non-idempotent, e.g. the log
+   semiring, or there are negative cost epsilon cycles).
+   This implementation is much less fancy than the one in fst/determinize.h, and
+   does not have an "on-demand" version.
+   The algorithm is a fairly normal determinization algorithm.  We keep in
+   memory the subsets of states, together with their leftover strings and their
+   weights.  The only difference is we detect input epsilon transitions and
+   treat them "specially".
+*/
+// This algorithm will be slightly faster if you sort the input fst on input
+// label.
+/**
+    This function implements the normal version of DeterminizeStar, in which the
+    output strings are represented using sequences of arcs, where all but the
+    first one has an epsilon on the input side.  The debug_ptr argument is an
+    optional pointer to a bool that, if it becomes true while the algorithm is
+    executing, the algorithm will print a traceback and terminate (used in
+    fstdeterminizestar.cc debug non-terminating determinization).
+    If max_states is positive, it will stop determinization and throw an
+    exception as soon as the max-states is reached. This can be useful in test.
+    If allow_partial is true, the algorithm will output partial results when the
+    specified max_states is reached (when larger than zero), instead of throwing
+    out an error.
+    Caution, the return status is un-intuitive: this function will return false
+   if determinization completed normally, and true if it was stopped early by
+    reaching the 'max-states' limit, and a partial FST was generated.
+*/
+template <class F>
+bool DeterminizeStar(F &ifst, MutableFst<typename F::Arc> *ofst,  // NOLINT
+                     float delta = kDelta, bool *debug_ptr = NULL,
+                     int max_states = -1, bool allow_partial = false);
+/*  This is a version of DeterminizeStar with a slightly more "natural" output
+   format, where the output sequences are encoded using the GallicArc (i.e. the
+   output symbols are strings. If max_states is positive, it will stop
+   determinization and throw an exception as soon as the max-states is reached.
+   This can be useful in test. If allow_partial is true, the algorithm will
+   output partial results when the specified max_states is reached (when larger
+   than zero), instead of throwing out an error.
+    Caution, the return status is un-intuitive: this function will return false
+   if determinization completed normally, and true if it was stopped early by
+    reaching the 'max-states' limit, and a partial FST was generated.
+*/
+template <class F>
+bool DeterminizeStar(F &ifst,  // NOLINT
+                     MutableFst<GallicArc<typename F::Arc> > *ofst,
+                     float delta = kDelta, bool *debug_ptr = NULL,
+                     int max_states = -1, bool allow_partial = false);
+/// @} end "addtogroup fst_extensions"
+}  // end namespace fst
+#include "fstext/determinize-star-inl.h"
+#endif  // KALDI_FSTEXT_DETERMINIZE_STAR_H_
--- a/speechx/speechx/kaldi/fstext/fstext-lib.h
+++ b/speechx/speechx/kaldi/fstext/fstext-lib.h
+// fstext/fstext-lib.h
+// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (author:
+// Daniel Povey)
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+#ifndef KALDI_FSTEXT_FSTEXT_LIB_H_
+#define KALDI_FSTEXT_FSTEXT_LIB_H_
+#include "fst/fstlib.h"
+#include "fstext/determinize-lattice.h"
+#include "fstext/determinize-star.h"
+#include "fstext/fstext-utils.h"
+#include "fstext/kaldi-fst-io.h"
+#include "fstext/lattice-utils.h"
+#include "fstext/lattice-weight.h"
+#include "fstext/pre-determinize.h"
+#include "fstext/table-matcher.h"
+#endif  // KALDI_FSTEXT_FSTEXT_LIB_H_
--- a/speechx/speechx/kaldi/fstext/fstext-utils-inl.h
+++ b/speechx/speechx/kaldi/fstext/fstext-utils-inl.h
--- a/speechx/speechx/kaldi/fstext/fstext-utils.h
+++ b/speechx/speechx/kaldi/fstext/fstext-utils.h
+// fstext/fstext-utils.h
+// Copyright 2009-2011  Microsoft Corporation
+//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
+//                2013  Guoguo Chen
+//                2014  Telepoint Global Hosting Service, LLC. (Author: David
+//                Snyder)
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+#ifndef KALDI_FSTEXT_FSTEXT_UTILS_H_
+#define KALDI_FSTEXT_FSTEXT_UTILS_H_
+#include <fst/fst-decl.h>
+#include <fst/fstlib.h>
+#include <algorithm>
+#include <map>
+#include <set>
+#include <vector>
+#include "fstext/determinize-star.h"
+#include "fstext/remove-eps-local.h"
+#include "base/kaldi-common.h"  // for error reporting macros.
+#include "util/text-utils.h"  // for SplitStringToVector
+#include "fst/script/print-impl.h"
+namespace fst {
+/// Returns the highest numbered output symbol id of the FST (or zero
+/// for an empty FST.
+template <class Arc>
+typename Arc::Label HighestNumberedOutputSymbol(const Fst<Arc> &fst);
+/// Returns the highest numbered input symbol id of the FST (or zero
+/// for an empty FST.
+template <class Arc>
+typename Arc::Label HighestNumberedInputSymbol(const Fst<Arc> &fst);
+/// Returns the total number of arcs in an FST.
+template <class Arc>
+typename Arc::StateId NumArcs(const ExpandedFst<Arc> &fst);
+/// GetInputSymbols gets the list of symbols on the input of fst
+/// (including epsilon, if include_eps == true), as a sorted, unique
+/// list.
+template <class Arc, class I>
+void GetInputSymbols(const Fst<Arc> &fst, bool include_eps,
+                     std::vector<I> *symbols);
+/// GetOutputSymbols gets the list of symbols on the output of fst
+/// (including epsilon, if include_eps == true)
+template <class Arc, class I>
+void GetOutputSymbols(const Fst<Arc> &fst, bool include_eps,
+                      std::vector<I> *symbols);
+/// ClearSymbols sets all the symbols on the input and/or
+/// output side of the FST to zero, as specified.
+/// It does not alter the symbol tables.
+template <class Arc>
+void ClearSymbols(bool clear_input, bool clear_output, MutableFst<Arc> *fst);
+template <class I>
+void GetSymbols(const SymbolTable &symtab, bool include_eps,
+                std::vector<I> *syms_out);
+inline void DeterminizeStarInLog(VectorFst<StdArc> *fst, float delta = kDelta,
+                                 bool *debug_ptr = NULL, int max_states = -1);
+// e.g. of using this function: PushInLog<REWEIGHT_TO_INITIAL>(fst,
+// kPushWeights|kPushLabels);
+template <ReweightType rtype>  // == REWEIGHT_TO_{INITIAL, FINAL}
+void PushInLog(VectorFst<StdArc> *fst, uint32 ptype, float delta = kDelta) {
+  // PushInLog pushes the FST
+  // and returns a new pushed FST (labels and weights pushed to the left).
+  VectorFst<LogArc> *fst_log =
+      new VectorFst<LogArc>;  // Want to determinize in log semiring.
+  Cast(*fst, fst_log);
+  VectorFst<StdArc> tmp;
+  *fst = tmp;  // free up memory.
+  VectorFst<LogArc> *fst_pushed_log = new VectorFst<LogArc>;
+  Push<LogArc, rtype>(*fst_log, fst_pushed_log, ptype, delta);
+  Cast(*fst_pushed_log, fst);
+  delete fst_log;
+  delete fst_pushed_log;
+}
+// Minimizes after encoding; applicable to all FSTs.  It is like what you get
+// from the Minimize() function, except it will not push the weights, or the
+// symbols.  This is better for our recipes, as we avoid ever pushing the
+// weights.  However, it will only minimize optimally if your graphs are such
+// that the symbols are as far to the left as they can go, and the weights
+// in combinable paths are the same... hard to formalize this, but it's
+// something that is satisified by our normal FSTs.
+template <class Arc>
+void MinimizeEncoded(VectorFst<Arc> *fst, float delta = kDelta) {
+  Map(fst, QuantizeMapper<Arc>(delta));
+  EncodeMapper<Arc> encoder(kEncodeLabels | kEncodeWeights, ENCODE);
+  Encode(fst, &encoder);
+  internal::AcceptorMinimize(fst);
+  Decode(fst, encoder);
+}
+/// GetLinearSymbolSequence gets the symbol sequence from a linear FST.
+/// If the FST is not just a linear sequence, it returns false.   If it is
+/// a linear sequence (including the empty FST), it returns true.  In this
+/// case it outputs the symbol
+/// sequences as "isymbols_out" and "osymbols_out" (removing epsilons), and
+/// the total weight as "tot_weight". The total weight will be Weight::Zero()
+/// if the FST is empty.  If any of the output pointers are NULL, it does not
+/// create that output.
+template <class Arc, class I>
+bool GetLinearSymbolSequence(const Fst<Arc> &fst, std::vector<I> *isymbols_out,
+                             std::vector<I> *osymbols_out,
+                             typename Arc::Weight *tot_weight_out);
+/// This function converts an FST with a special structure, which is
+/// output by the OpenFst functions ShortestPath and RandGen, and converts
+/// them into a std::vector of separate FSTs.  This special structure is that
+/// the only state that has more than one (arcs-out or final-prob) is the
+/// start state.  fsts_out is resized to the appropriate size.
+template <class Arc>
+void ConvertNbestToVector(const Fst<Arc> &fst,
+                          std::vector<VectorFst<Arc> > *fsts_out);
+/// Takes the n-shortest-paths (using ShortestPath), but outputs
+/// the result as a vector of up to n fsts.  This function will
+/// size the "fsts_out" vector to however many paths it got
+/// (which will not exceed n).  n must be >= 1.
+template <class Arc>
+void NbestAsFsts(const Fst<Arc> &fst, size_t n,
+                 std::vector<VectorFst<Arc> > *fsts_out);
+/// Creates unweighted linear acceptor from symbol sequence.
+template <class Arc, class I>
+void MakeLinearAcceptor(const std::vector<I> &labels, MutableFst<Arc> *ofst);
+/// Creates an unweighted acceptor with a linear structure, with alternatives
+/// at each position.  Epsilon is treated like a normal symbol here.
+/// Each position in "labels" must have at least one alternative.
+template <class Arc, class I>
+void MakeLinearAcceptorWithAlternatives(
+    const std::vector<std::vector<I> > &labels, MutableFst<Arc> *ofst);
+/// Does PreDeterminize and DeterminizeStar and then removes the disambiguation
+/// symbols. This is a form of determinization that will never blow up. Note
+/// that ifst is non-const and can be considered to be destroyed by this
+/// operation.
+/// Does not do epsilon removal (RemoveEpsLocal)-- this is so it's safe to cast
+/// to log and do this, and maintain equivalence in tropical.
+template <class Arc>
+void SafeDeterminizeWrapper(MutableFst<Arc> *ifst, MutableFst<Arc> *ofst,
+                            float delta = kDelta);
+/// SafeDeterminizeMinimizeWapper is as SafeDeterminizeWrapper except that it
+/// also minimizes (encoded minimization, which is safe).  This algorithm will
+/// destroy "ifst".
+template <class Arc>
+void SafeDeterminizeMinimizeWrapper(MutableFst<Arc> *ifst, VectorFst<Arc> *ofst,
+                                    float delta = kDelta);
+/// SafeDeterminizeMinimizeWapperInLog is as SafeDeterminizeMinimizeWrapper
+/// except it first casts tothe log semiring.
+void SafeDeterminizeMinimizeWrapperInLog(VectorFst<StdArc> *ifst,
+                                         VectorFst<StdArc> *ofst,
+                                         float delta = kDelta);
+/// RemoveSomeInputSymbols removes any symbol that appears in "to_remove", from
+/// the input side of the FST, replacing them with epsilon.
+template <class Arc, class I>
+void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
+                            MutableFst<Arc> *fst);
+// MapInputSymbols will replace any input symbol i that is between 0 and
+// symbol_map.size()-1, with symbol_map[i].  It removes the input symbol
+// table of the FST.
+template <class Arc, class I>
+void MapInputSymbols(const std::vector<I> &symbol_map, MutableFst<Arc> *fst);
+template <class Arc>
+void RemoveWeights(MutableFst<Arc> *fst);
+/// Returns true if and only if the FST is such that the input symbols
+/// on arcs entering any given state all have the same value.
+/// if "start_is_epsilon", treat start-state as an epsilon input arc
+/// [i.e. ensure only epsilon can enter start-state].
+template <class Arc>
+bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst<Arc> &fst);
+/// This is as PrecedingInputSymbolsAreSame, but with a functor f that maps
+/// labels to classes. The function tests whether the symbols preceding any
+/// given state are in the same class. Formally, f is of a type F that has an
+/// operator of type F::Result F::operator() (F::Arg a) const; where F::Result
+/// is an integer type and F::Arc can be constructed from Arc::Label. this must
+/// apply to valid labels and also to kNoLabel (so we can have a marker for the
+/// invalid labels.
+template <class Arc, class F>
+bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon,
+                                       const Fst<Arc> &fst, const F &f);
+/// Returns true if and only if the FST is such that the input symbols
+/// on arcs exiting any given state all have the same value.
+/// If end_is_epsilon, treat end-state as an epsilon output arc [i.e. ensure
+/// end-states cannot have non-epsilon output transitions.]
+template <class Arc>
+bool FollowingInputSymbolsAreSame(bool end_is_epsilon, const Fst<Arc> &fst);
+template <class Arc, class F>
+bool FollowingInputSymbolsAreSameClass(bool end_is_epsilon, const Fst<Arc> &fst,
+                                       const F &f);
+/// MakePrecedingInputSymbolsSame ensures that all arcs entering any given fst
+/// state have the same input symbol.  It does this by detecting states
+/// that have differing input symbols going in, and inserting, for each of
+/// the preceding arcs with non-epsilon input symbol, a new dummy state that
+/// has an epsilon link to the fst state.
+/// If "start_is_epsilon", ensure that start-state can have only epsilon-links
+/// into it.
+template <class Arc>
+void MakePrecedingInputSymbolsSame(bool start_is_epsilon, MutableFst<Arc> *fst);
+/// As MakePrecedingInputSymbolsSame, but takes a functor object that maps
+/// labels to classes.
+template <class Arc, class F>
+void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon,
+                                        MutableFst<Arc> *fst, const F &f);
+/// MakeFollowingInputSymbolsSame ensures that all arcs exiting any given fst
+/// state have the same input symbol.  It does this by detecting states that
+/// have differing input symbols on arcs that exit it, and inserting, for each
+/// of the following arcs with non-epsilon input symbol, a new dummy state that
+/// has an input-epsilon link from the fst state.  The output symbol and weight
+/// stay on the link to the dummy state (in order to keep the FST
+/// output-deterministic and stochastic, if it already was). If end_is_epsilon,
+/// treat "being a final-state" like having an epsilon output link.
+template <class Arc>
+void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst<Arc> *fst);
+/// As MakeFollowingInputSymbolsSame, but takes a functor object that maps
+/// labels to classes.
+template <class Arc, class F>
+void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon,
+                                        MutableFst<Arc> *fst, const F &f);
+/// MakeLoopFst creates an FST that has a state that is both initial and
+/// final (weight == Weight::One()), and for each non-NULL pointer fsts[i],
+/// it has an arc out whose output-symbol is i and which goes to a
+/// sub-graph whose input language is equivalent to fsts[i], where the
+/// final-state becomes a transition to the loop-state.  Each fst in "fsts"
+/// should be an acceptor.  The fst MakeLoopFst returns is output-deterministic,
+/// but not output-epsilon free necessarily, and arcs are sorted on output
+/// label. Note: if some of the pointers in the input vector "fsts" have the
+/// same value, "MakeLoopFst" uses this to speed up the computation.
+/// Formally: suppose I is the set of indexes i such that fsts[i] != NULL.
+/// Let L[i] be the language that the acceptor fsts[i] accepts.
+/// Let the language K be the set of input-output pairs i:l such
+/// that i in I and l in L[i].  Then the FST returned by MakeLoopFst
+/// accepts the language K*, where * is the Kleene closure (CLOSURE_STAR)
+/// of K.
+/// We could have implemented this via a combination of "project",
+/// "concat", "union" and "closure".  But that FST would have been
+/// less well optimized and would have a lot of final-states.
+template <class Arc>
+VectorFst<Arc> *MakeLoopFst(const std::vector<const ExpandedFst<Arc> *> &fsts);
+/// ApplyProbabilityScale is applicable to FSTs in the log or tropical semiring.
+/// It multiplies the arc and final weights by "scale" [this is not the Mul
+/// operation of the semiring, it's actual multiplication, which is equivalent
+/// to taking a power in the semiring].
+template <class Arc>
+void ApplyProbabilityScale(float scale, MutableFst<Arc> *fst);
+/// EqualAlign is similar to RandGen, but it generates a sequence with exactly
+/// "length" input symbols.  It returns true on success, false on failure
+/// (failure is partly random but should never happen in practice for normal
+/// speech models.) It generates a random path through the input FST, finds out
+/// which subset of the states it visits along the way have self-loops with
+/// inupt symbols on them, and outputs a path with exactly enough self-loops to
+/// have the requested number of input symbols. Note that EqualAlign does not
+/// use the probabilities on the FST.  It just uses equal probabilities in the
+/// first stage of selection (since the output will anyway not be a truly random
+/// sample from the FST). The input fst "ifst" must be connected or this may
+/// enter an infinite loop.
+template <class Arc>
+bool EqualAlign(const Fst<Arc> &ifst, typename Arc::StateId length,
+                int rand_seed, MutableFst<Arc> *ofst, int num_retries = 10);
+// RemoveUselessArcs removes arcs such that there is no input symbol
+// sequence for which the best path through the FST would contain
+// those arcs [for these purposes, epsilon is not treated as a real symbol].
+// This is mainly geared towards decoding-graph FSTs which may contain
+// transitions that have less likely words on them that would never be
+// taken.  We do not claim that this algorithm removes all such arcs;
+// it just does the best job it can.
+// Only works for tropical (not log) semiring as it uses
+// NaturalLess.
+template <class Arc>
+void RemoveUselessArcs(MutableFst<Arc> *fst);
+// PhiCompose is a version of composition where
+// the right hand FST (fst2) is treated as a backoff
+// LM, with the phi symbol (e.g. #0) treated as a
+// "failure transition", only taken when we don't
+// have a match for the requested symbol.
+template <class Arc>
+void PhiCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
+                typename Arc::Label phi_label, MutableFst<Arc> *fst);
+// PropagateFinal propagates final-probs through
+// "phi" transitions (note that here, phi_label may
+// be epsilon if you want).  If you have a backoff LM
+// with special symbols ("phi") on the backoff arcs
+// instead of epsilon, you may use PhiCompose to compose
+// with it, but this won't do the right thing w.r.t.
+// final probabilities.  You should first call PropagateFinal
+// on the FST with phi's i it (fst2 in PhiCompose above),
+// to fix this.  If a state does not have a final-prob,
+// but has a phi transition, it makes the state's final-prob
+// (phi-prob * final-prob-of-dest-state), and does this
+// recursively i.e. follows phi transitions on the dest state
+// first.  It behaves as if there were a super-final state
+// with a special symbol leading to it, from each currently
+// final state.  Note that this may not behave as desired
+// if there are epsilons in your FST; it might be better
+// to remove those before calling this function.
+template <class Arc>
+void PropagateFinal(typename Arc::Label phi_label, MutableFst<Arc> *fst);
+// PhiCompose is a version of composition where
+// the right hand FST (fst2) has speciall "rho transitions"
+// which are taken whenever no normal transition matches; these
+// transitions will be rewritten with whatever symbol was on
+// the first FST.
+template <class Arc>
+void RhoCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
+                typename Arc::Label rho_label, MutableFst<Arc> *fst);
+/** This function returns true if, in the semiring of the FST, the sum (within
+    the semiring) of all the arcs out of each state in the FST is one, to within
+    delta.  After MakeStochasticFst, this should be true (for a connected FST).
+    @param fst [in] the FST that we are testing.
+    @param delta [in] the tolerance to within which we test equality to 1.
+    @param min_sum [out] if non, NULL, contents will be set to the minimum sum
+   of weights.
+    @param max_sum [out] if non, NULL, contents will be set to the maximum sum
+   of weights.
+    @return Returns true if the FST is stochastic, and false otherwise.
+*/
+template <class Arc>
+bool IsStochasticFst(const Fst<Arc> &fst,
+                     float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
+                     typename Arc::Weight *min_sum = NULL,
+                     typename Arc::Weight *max_sum = NULL);
+// IsStochasticFstInLog makes sure it's stochastic after casting to log.
+inline bool IsStochasticFstInLog(
+    const Fst<StdArc> &fst,
+    float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
+    StdArc::Weight *min_sum = NULL, StdArc::Weight *max_sum = NULL);
+}  // end namespace fst
+#include "fstext/fstext-utils-inl.h"
+#endif  // KALDI_FSTEXT_FSTEXT_UTILS_H_
--- a/speechx/speechx/kaldi/fstext/kaldi-fst-io-inl.h
+++ b/speechx/speechx/kaldi/fstext/kaldi-fst-io-inl.h
--- a/speechx/speechx/kaldi/fstext/kaldi-fst-io.cc
+++ b/speechx/speechx/kaldi/fstext/kaldi-fst-io.cc
+// fstext/kaldi-fst-io.cc
+// Copyright 2009-2011  Microsoft Corporation
+//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
+//                2013  Guoguo Chen
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+#include "fstext/kaldi-fst-io.h"
+#include <string>
+#include "base/kaldi-error.h"
+#include "base/kaldi-math.h"
+#include "util/kaldi-io.h"
+namespace fst {
+VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename) {
+  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
+  // for compatibility with OpenFst conventions.
+  kaldi::Input ki(rxfilename);
+  fst::FstHeader hdr;
+  if (!hdr.Read(ki.Stream(), rxfilename))
+    KALDI_ERR << "Reading FST: error reading FST header from "
+              << kaldi::PrintableRxfilename(rxfilename);
+  FstReadOptions ropts("<unspecified>", &hdr);
+  VectorFst<StdArc> *fst = VectorFst<StdArc>::Read(ki.Stream(), ropts);
+  if (!fst)
+    KALDI_ERR << "Could not read fst from "
+              << kaldi::PrintableRxfilename(rxfilename);
+  return fst;
+}
+// Register const fst to load it automatically. Other types like
+// olabel_lookahead or ngram or compact_fst should be registered
+// through OpenFst registration API.
+static fst::FstRegisterer<VectorFst<StdArc>> VectorFst_StdArc_registerer;
+static fst::FstRegisterer<ConstFst<StdArc>> ConstFst_StdArc_registerer;
+Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err) {
+  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
+  // for compatibility with OpenFst conventions.
+  kaldi::Input ki(rxfilename);
+  fst::FstHeader hdr;
+  // Read FstHeader which contains the type of FST
+  if (!hdr.Read(ki.Stream(), rxfilename)) {
+    if (throw_on_err) {
+      KALDI_ERR << "Reading FST: error reading FST header from "
+                << kaldi::PrintableRxfilename(rxfilename);
+    } else {
+      KALDI_WARN << "We fail to read FST header from "
+                 << kaldi::PrintableRxfilename(rxfilename)
+                 << ". A NULL pointer is returned.";
+      return NULL;
+    }
+  }
+  // Check the type of Arc
+  if (hdr.ArcType() != fst::StdArc::Type()) {
+    if (throw_on_err) {
+      KALDI_ERR << "FST with arc type " << hdr.ArcType()
+                << " is not supported.";
+    } else {
+      KALDI_WARN << "Fst with arc type" << hdr.ArcType()
+                 << " is not supported. A NULL pointer is returned.";
+      return NULL;
+    }
+  }
+  // Read the FST
+  FstReadOptions ropts("<unspecified>", &hdr);
+  Fst<StdArc> *fst = Fst<StdArc>::Read(ki.Stream(), ropts);
+  if (!fst) {
+    if (throw_on_err) {
+      KALDI_ERR << "Could not read fst from "
+                << kaldi::PrintableRxfilename(rxfilename);
+    } else {
+      KALDI_WARN << "Could not read fst from "
+                 << kaldi::PrintableRxfilename(rxfilename)
+                 << ". A NULL pointer is returned.";
+      return NULL;
+    }
+  }
+  return fst;
+}
+VectorFst<StdArc> *CastOrConvertToVectorFst(Fst<StdArc> *fst) {
+  // This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
+  std::string real_type = fst->Type();
+  KALDI_ASSERT(real_type == "vector" || real_type == "const");
+  if (real_type == "vector") {
+    return dynamic_cast<VectorFst<StdArc> *>(fst);
+  } else {
+    // As the 'fst' can't cast to VectorFst, we create a new
+    // VectorFst<StdArc> initialized by 'fst', and delete 'fst'.
+    VectorFst<StdArc> *new_fst = new VectorFst<StdArc>(*fst);
+    delete fst;
+    return new_fst;
+  }
+}
+void ReadFstKaldi(std::string rxfilename, fst::StdVectorFst *ofst) {
+  fst::StdVectorFst *fst = ReadFstKaldi(rxfilename);
+  *ofst = *fst;
+  delete fst;
+}
+void WriteFstKaldi(const VectorFst<StdArc> &fst, std::string wxfilename) {
+  if (wxfilename == "") wxfilename = "-";  // interpret "" as stdout,
+  // for compatibility with OpenFst conventions.
+  bool write_binary = true, write_header = false;
+  kaldi::Output ko(wxfilename, write_binary, write_header);
+  FstWriteOptions wopts(kaldi::PrintableWxfilename(wxfilename));
+  fst.Write(ko.Stream(), wopts);
+}
+fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename) {
+  // ReadFstKaldi() will die with exception on failure.
+  fst::VectorFst<fst::StdArc> *ans = fst::ReadFstKaldi(rxfilename);
+  if (ans->Properties(fst::kAcceptor, true) == 0) {
+    // If it's not already an acceptor, project on the output, i.e. copy olabels
+    // to ilabels.  Generally the G.fst's on disk will have the disambiguation
+    // symbol #0 on the input symbols of the backoff arc, and projection will
+    // replace them with epsilons which is what is on the output symbols of
+    // those arcs.
+    fst::Project(ans, fst::PROJECT_OUTPUT);
+  }
+  if (ans->Properties(fst::kILabelSorted, true) == 0) {
+    // Make sure LM is sorted on ilabel.
+    fst::ILabelCompare<fst::StdArc> ilabel_comp;
+    fst::ArcSort(ans, ilabel_comp);
+  }
+  return ans;
+}
+}  // end namespace fst
--- a/speechx/speechx/kaldi/fstext/kaldi-fst-io.h
+++ b/speechx/speechx/kaldi/fstext/kaldi-fst-io.h
--- a/speechx/speechx/kaldi/fstext/lattice-utils-inl.h
+++ b/speechx/speechx/kaldi/fstext/lattice-utils-inl.h
--- a/speechx/speechx/kaldi/fstext/lattice-utils.h
+++ b/speechx/speechx/kaldi/fstext/lattice-utils.h
--- a/speechx/speechx/kaldi/fstext/lattice-weight.h
+++ b/speechx/speechx/kaldi/fstext/lattice-weight.h
--- a/speechx/speechx/kaldi/fstext/pre-determinize-inl.h
+++ b/speechx/speechx/kaldi/fstext/pre-determinize-inl.h
--- a/speechx/speechx/kaldi/fstext/pre-determinize.h
+++ b/speechx/speechx/kaldi/fstext/pre-determinize.h
--- a/speechx/speechx/kaldi/fstext/remove-eps-local-inl.h
+++ b/speechx/speechx/kaldi/fstext/remove-eps-local-inl.h
--- a/speechx/speechx/kaldi/fstext/remove-eps-local.h
+++ b/speechx/speechx/kaldi/fstext/remove-eps-local.h
--- a/speechx/speechx/kaldi/fstext/table-matcher.h
+++ b/speechx/speechx/kaldi/fstext/table-matcher.h
--- a/speechx/speechx/kaldi/lat/CMakeLists.txt
+++ b/speechx/speechx/kaldi/lat/CMakeLists.txt
+add_library(kaldi-lat
+determinize-lattice-pruned.cc
+lattice-functions.cc
+)
+target_link_libraries(kaldi-lat PUBLIC kaldi-util)
\ No newline at end of file
--- a/speechx/speechx/kaldi/lat/determinize-lattice-pruned-test.cc
+++ b/speechx/speechx/kaldi/lat/determinize-lattice-pruned-test.cc
--- a/speechx/speechx/kaldi/lat/determinize-lattice-pruned.cc
+++ b/speechx/speechx/kaldi/lat/determinize-lattice-pruned.cc
--- a/speechx/speechx/kaldi/lat/determinize-lattice-pruned.h
+++ b/speechx/speechx/kaldi/lat/determinize-lattice-pruned.h
--- a/speechx/speechx/kaldi/lat/kaldi-lattice.h
+++ b/speechx/speechx/kaldi/lat/kaldi-lattice.h
@@ -23,7 +23,7 @@
 #include "fstext/fstext-lib.h"
 #include "base/kaldi-common.h"
-#include "util/common-utils.h"
+// #include "util/common-utils.h"
 namespace kaldi {
@@ -142,13 +142,13 @@ class LatticeHolder {
  T *t_;
 };
-typedef TableWriter<LatticeHolder> LatticeWriter;
+// typedef TableWriter<LatticeHolder> LatticeWriter;
-typedef SequentialTableReader<LatticeHolder> SequentialLatticeReader;
+// typedef SequentialTableReader<LatticeHolder> SequentialLatticeReader;
-typedef RandomAccessTableReader<LatticeHolder> RandomAccessLatticeReader;
+// typedef RandomAccessTableReader<LatticeHolder> RandomAccessLatticeReader;
+//
-typedef TableWriter<CompactLatticeHolder> CompactLatticeWriter;
+// typedef TableWriter<CompactLatticeHolder> CompactLatticeWriter;
-typedef SequentialTableReader<CompactLatticeHolder> SequentialCompactLatticeReader;
+// typedef SequentialTableReader<CompactLatticeHolder> SequentialCompactLatticeReader;
-typedef RandomAccessTableReader<CompactLatticeHolder> RandomAccessCompactLatticeReader;
+// typedef RandomAccessTableReader<CompactLatticeHolder> RandomAccessCompactLatticeReader;
 } // namespace kaldi

--- a/speechx/speechx/kaldi/lat/lattice-functions.cc
+++ b/speechx/speechx/kaldi/lat/lattice-functions.cc
--- a/speechx/speechx/kaldi/lat/lattice-functions.h
+++ b/speechx/speechx/kaldi/lat/lattice-functions.h
--- a/speechx/speechx/nnet/decodable.cc
+++ b/speechx/speechx/nnet/decodable.cc
--- a/speechx/speechx/nnet/decodable.h
+++ b/speechx/speechx/nnet/decodable.h