path_trie.cpp 3.9 KB
Newer Older
1 2
#include "path_trie.h"

3 4 5 6 7 8 9 10 11
#include <algorithm>
#include <limits>
#include <memory>
#include <utility>
#include <vector>

#include "decoder_utils.h"

PathTrie::PathTrie() {
Y
Yibing Liu 已提交
12 13 14 15 16 17
  log_prob_b_prev = -NUM_FLT_INF;
  log_prob_nb_prev = -NUM_FLT_INF;
  log_prob_b_cur = -NUM_FLT_INF;
  log_prob_nb_cur = -NUM_FLT_INF;
  score = -NUM_FLT_INF;

18 19 20
  ROOT_ = -1;
  character = ROOT_;
  exists_ = true;
Y
Yibing Liu 已提交
21
  parent = nullptr;
22 23 24 25
  dictionary_ = nullptr;
  dictionary_state_ = 0;
  has_dictionary_ = false;
  matcher_ = nullptr;
26 27 28
}

PathTrie::~PathTrie() {
29
  for (auto child : children_) {
Y
Yibing Liu 已提交
30 31
    delete child.second;
  }
32 33 34
}

PathTrie* PathTrie::get_path_trie(int new_char, bool reset) {
35 36
  auto child = children_.begin();
  for (child = children_.begin(); child != children_.end(); ++child) {
Y
Yibing Liu 已提交
37 38
    if (child->first == new_char) {
      break;
39
    }
Y
Yibing Liu 已提交
40
  }
41 42 43
  if (child != children_.end()) {
    if (!child->second->exists_) {
      child->second->exists_ = true;
Y
Yibing Liu 已提交
44 45 46 47 48 49 50
      child->second->log_prob_b_prev = -NUM_FLT_INF;
      child->second->log_prob_nb_prev = -NUM_FLT_INF;
      child->second->log_prob_b_cur = -NUM_FLT_INF;
      child->second->log_prob_nb_cur = -NUM_FLT_INF;
    }
    return (child->second);
  } else {
51 52 53
    if (has_dictionary_) {
      matcher_->SetState(dictionary_state_);
      bool found = matcher_->Find(new_char);
Y
Yibing Liu 已提交
54 55 56
      if (!found) {
        // Adding this character causes word outside dictionary
        auto FSTZERO = fst::TropicalWeight::Zero();
57
        auto final_weight = dictionary_->Final(dictionary_state_);
Y
Yibing Liu 已提交
58 59
        bool is_final = (final_weight != FSTZERO);
        if (is_final && reset) {
60
          dictionary_state_ = dictionary_->Start();
61
        }
Y
Yibing Liu 已提交
62 63 64 65 66
        return nullptr;
      } else {
        PathTrie* new_path = new PathTrie;
        new_path->character = new_char;
        new_path->parent = this;
67 68 69 70 71
        new_path->dictionary_ = dictionary_;
        new_path->dictionary_state_ = matcher_->Value().nextstate;
        new_path->has_dictionary_ = true;
        new_path->matcher_ = matcher_;
        children_.push_back(std::make_pair(new_char, new_path));
Y
Yibing Liu 已提交
72 73
        return new_path;
      }
74
    } else {
Y
Yibing Liu 已提交
75 76 77
      PathTrie* new_path = new PathTrie;
      new_path->character = new_char;
      new_path->parent = this;
78
      children_.push_back(std::make_pair(new_char, new_path));
Y
Yibing Liu 已提交
79
      return new_path;
80
    }
Y
Yibing Liu 已提交
81
  }
82 83 84
}

PathTrie* PathTrie::get_path_vec(std::vector<int>& output) {
85
  return get_path_vec(output, ROOT_);
86 87 88
}

PathTrie* PathTrie::get_path_vec(std::vector<int>& output,
Y
Yibing Liu 已提交
89 90
                                 int stop,
                                 size_t max_steps) {
91
  if (character == stop || character == ROOT_ || output.size() == max_steps) {
Y
Yibing Liu 已提交
92 93 94 95 96 97
    std::reverse(output.begin(), output.end());
    return this;
  } else {
    output.push_back(character);
    return parent->get_path_vec(output, stop, max_steps);
  }
98 99
}

Y
Yibing Liu 已提交
100
void PathTrie::iterate_to_vec(std::vector<PathTrie*>& output) {
101
  if (exists_) {
Y
Yibing Liu 已提交
102 103
    log_prob_b_prev = log_prob_b_cur;
    log_prob_nb_prev = log_prob_nb_cur;
104

Y
Yibing Liu 已提交
105 106
    log_prob_b_cur = -NUM_FLT_INF;
    log_prob_nb_cur = -NUM_FLT_INF;
107

Y
Yibing Liu 已提交
108 109 110
    score = log_sum_exp(log_prob_b_prev, log_prob_nb_prev);
    output.push_back(this);
  }
111
  for (auto child : children_) {
Y
Yibing Liu 已提交
112 113
    child.second->iterate_to_vec(output);
  }
114 115 116
}

void PathTrie::remove() {
117
  exists_ = false;
Y
Yibing Liu 已提交
118

119 120 121
  if (children_.size() == 0) {
    auto child = parent->children_.begin();
    for (child = parent->children_.begin(); child != parent->children_.end();
Y
Yibing Liu 已提交
122 123
         ++child) {
      if (child->first == character) {
124
        parent->children_.erase(child);
Y
Yibing Liu 已提交
125 126 127
        break;
      }
    }
128

129
    if (parent->children_.size() == 0 && !parent->exists_) {
Y
Yibing Liu 已提交
130
      parent->remove();
131
    }
Y
Yibing Liu 已提交
132 133 134

    delete this;
  }
135 136 137
}

void PathTrie::set_dictionary(fst::StdVectorFst* dictionary) {
138 139 140
  dictionary_ = dictionary;
  dictionary_state_ = dictionary->Start();
  has_dictionary_ = true;
141 142 143 144
}

using FSTMATCH = fst::SortedMatcher<fst::StdVectorFst>;
void PathTrie::set_matcher(std::shared_ptr<FSTMATCH> matcher) {
145
  matcher_ = matcher;
146
}