提交 c3c3c0b3 编写于 作者: J JiabinYang

polish code, test=develop

上级 7389597c
...@@ -488,12 +488,6 @@ class CPUVector : public std::vector<T, std::allocator<T>> { ...@@ -488,12 +488,6 @@ class CPUVector : public std::vector<T, std::allocator<T>> {
return os; return os;
} }
size_t size() const noexcept {
size_t size =
static_cast<size_t>(std::vector<T, std::allocator<T>>::size());
return size;
}
T &operator[](size_t id) { return this->at(id); } T &operator[](size_t id) { return this->at(id); }
const T &operator[](size_t id) const { return this->at(id); } const T &operator[](size_t id) const { return this->at(id); }
......
...@@ -140,6 +140,58 @@ bool SelectedRows::HasKey(int64_t key) const { ...@@ -140,6 +140,58 @@ bool SelectedRows::HasKey(int64_t key) const {
: true; : true;
} }
int64_t SelectedRows::AutoGrownIndex(int64_t key, bool auto_grown,
bool is_test) {
if (is_test) {
auto iter = id_to_index_.find(key);
if (iter == id_to_index_.end()) {
return -1;
} else {
return iter->second;
}
}
rwlock_->RDLock();
auto iter = id_to_index_.find(key);
if (iter == id_to_index_.end()) {
rwlock_->UNLock();
if (!auto_grown) {
PADDLE_THROW("key %d not found", key);
}
rwlock_->WRLock();
auto map_size = id_to_index_.size();
auto vector_size = rows_.size();
if (map_size != vector_size) {
rwlock_->UNLock();
PADDLE_THROW(
"id_to_index_ size %d should have the same size with rows_ %d",
map_size, vector_size);
}
auto write_iter = id_to_index_.find(key);
if (write_iter == id_to_index_.end()) {
int row_num = rows_.size();
if (row_num == value_->dims()[0]) {
rwlock_->UNLock();
PADDLE_THROW("selected rows is full, then length exceed %d", row_num);
}
// key logic to put a key into id_to_index_
rows_.push_back(key);
auto index = static_cast<int64_t>(rows_.size() - 1);
id_to_index_[key] = index;
rwlock_->UNLock();
return index;
} else {
auto index = write_iter->second;
rwlock_->UNLock();
return index;
}
} else {
auto index = iter->second;
rwlock_->UNLock();
return index;
}
}
void SelectedRows::SyncIndex() { void SelectedRows::SyncIndex() {
rwlock_->WRLock(); rwlock_->WRLock();
id_to_index_.clear(); id_to_index_.clear();
......
...@@ -118,54 +118,17 @@ class SelectedRows { ...@@ -118,54 +118,17 @@ class SelectedRows {
* *
* @return index of the key. * @return index of the key.
*/ */
inline int64_t AutoGrownIndex(int64_t key, bool auto_grown, int64_t AutoGrownIndex(int64_t key, bool auto_grown, bool is_test = false);
bool is_test = false) {
if (is_test) { /*
auto iter = id_to_index_.find(key); * @brief Get the index of the key from id_to_index_ map.
if (iter == id_to_index_.end()) { */
return -1; inline int64_t GetIndexFromId(int64_t key) {
} else {
return iter->second;
}
}
rwlock_->RDLock();
auto iter = id_to_index_.find(key); auto iter = id_to_index_.find(key);
if (iter == id_to_index_.end()) { if (iter == id_to_index_.end()) {
rwlock_->UNLock(); return -1;
if (!auto_grown) {
PADDLE_THROW("key %d not found", key);
}
rwlock_->WRLock();
auto map_size = id_to_index_.size();
auto vector_size = rows_.size();
if (map_size != vector_size) {
rwlock_->UNLock();
PADDLE_THROW(
"id_to_index_ size %d should have the same size with rows_ %d",
map_size, vector_size);
}
auto write_iter = id_to_index_.find(key);
if (write_iter == id_to_index_.end()) {
int row_num = rows_.size();
if (row_num == value_->dims()[0]) {
rwlock_->UNLock();
PADDLE_THROW("selected rows is full, then length exceed %d", row_num);
}
// key logic to put a key into id_to_index_
rows_.push_back(key);
auto index = static_cast<int64_t>(rows_.size() - 1);
id_to_index_[key] = index;
rwlock_->UNLock();
return index;
} else {
auto index = write_iter->second;
rwlock_->UNLock();
return index;
}
} else { } else {
auto index = iter->second; return iter->second;
rwlock_->UNLock();
return index;
} }
} }
...@@ -185,7 +148,7 @@ class SelectedRows { ...@@ -185,7 +148,7 @@ class SelectedRows {
// SelectedRows add a Tensor, will the duplicate rows be handled. // SelectedRows add a Tensor, will the duplicate rows be handled.
Vector<int64_t> rows_; Vector<int64_t> rows_;
std::unordered_map<int64_t, int64_t> std::unordered_map<int64_t, int64_t>
id_to_index_; // should not be used when ids has duplicate member id_to_index_; // should not be used when rows_ has duplicate member
std::unique_ptr<Tensor> value_{nullptr}; std::unique_ptr<Tensor> value_{nullptr};
int64_t height_; // height indicates the underline tensor's height int64_t height_; // height indicates the underline tensor's height
std::unique_ptr<RWLock> rwlock_{nullptr}; std::unique_ptr<RWLock> rwlock_{nullptr};
......
...@@ -101,7 +101,7 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -101,7 +101,7 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
"it should have shape like [N, L], L is the length of the Path") "it should have shape like [N, L], L is the length of the Path")
.AsDispensable(); .AsDispensable();
AddInput( AddInput(
"PCode", "PathCode",
"(LoDTensor, optional), The Code on each Node of the Path from root " "(LoDTensor, optional), The Code on each Node of the Path from root "
"to current word" "to current word"
"it should have shape like [N, L], L is the length of the Path") "it should have shape like [N, L], L is the length of the Path")
......
...@@ -19,9 +19,11 @@ limitations under the License. */ ...@@ -19,9 +19,11 @@ limitations under the License. */
#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/clip_op.h" #include "paddle/fluid/operators/clip_op.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/matrix_bit_code.h" #include "paddle/fluid/operators/math/matrix_bit_code.h"
#include "paddle/fluid/platform/transform.h" #include "paddle/fluid/platform/transform.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -30,31 +32,26 @@ template <typename T, int MajorType = Eigen::RowMajor, ...@@ -30,31 +32,26 @@ template <typename T, int MajorType = Eigen::RowMajor,
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>; using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
using platform::Transform; using platform::Transform;
std::vector<int64_t> cal_rows(const framework::LoDTensor& path) { static std::vector<int64_t> PathToRows(const framework::LoDTensor& path) {
std::set<int64_t> tmp; std::set<int64_t> rows;
std::vector<int64_t> rows; for (int64_t i = 0; i < path.numel(); ++i) {
for (size_t i = 0; i < static_cast<size_t>(path.dims()[0]); i++) { int64_t row = path.data<int64_t>()[i];
for (size_t j = 0; j < static_cast<size_t>(path.dims()[1]); j++) { if (row < 0) {
int64_t temp = continue;
path.data<int64_t>()[i * static_cast<size_t>(path.dims()[1]) + j];
if (temp >= 0) {
tmp.insert(temp);
}
} }
rows.emplace(row);
} }
rows.assign(tmp.begin(), tmp.end()); return std::vector<int64_t>(rows.begin(), rows.end());
return rows;
} }
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> { class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto* in = ctx.Input<framework::LoDTensor>("X"); auto in = detail::Ref(ctx.Input<framework::LoDTensor>("X"));
auto* w = ctx.Input<framework::LoDTensor>("W"); auto w = detail::Ref(ctx.Input<framework::LoDTensor>("W"));
auto* path = ctx.Input<framework::LoDTensor>("PTable"); auto* path = ctx.Input<framework::LoDTensor>("PTable");
auto* code = ctx.Input<framework::LoDTensor>("PCode"); auto* code = ctx.Input<framework::LoDTensor>("PathCode");
auto* label = ctx.Input<framework::LoDTensor>("Label"); auto label = detail::Ref(ctx.Input<framework::LoDTensor>("Label"));
auto* bias = ctx.Input<framework::LoDTensor>("Bias"); auto* bias = ctx.Input<framework::LoDTensor>("Bias");
auto* out = ctx.Output<framework::LoDTensor>("Out"); auto* out = ctx.Output<framework::LoDTensor>("Out");
auto* pre_out = ctx.Output<framework::LoDTensor>("PreOut"); auto* pre_out = ctx.Output<framework::LoDTensor>("PreOut");
...@@ -65,7 +62,7 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> { ...@@ -65,7 +62,7 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
} }
int64_t code_length = int64_t code_length =
path ? path->dims()[1] : math::FindLastSet(num_classes - 1); path ? path->dims()[1] : math::FindLastSet(num_classes - 1);
int64_t batch_size = in->dims()[0]; int64_t batch_size = in.dims()[0];
framework::LoDTensor sum; framework::LoDTensor sum;
auto& dev_ctx = ctx.template device_context<DeviceContext>(); auto& dev_ctx = ctx.template device_context<DeviceContext>();
auto* pre_out_data = pre_out->mutable_data<T>( auto* pre_out_data = pre_out->mutable_data<T>(
...@@ -81,10 +78,10 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> { ...@@ -81,10 +78,10 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
std::unique_ptr<math::MatrixBitCodeFunctor<T>> bit_code; std::unique_ptr<math::MatrixBitCodeFunctor<T>> bit_code;
if (!is_custom) { if (!is_custom) {
bit_code.reset(new math::MatrixBitCodeFunctor<T>(num_classes, bit_code.reset(new math::MatrixBitCodeFunctor<T>(num_classes,
label->data<int64_t>())); label.data<int64_t>()));
} else { } else {
bit_code.reset(new math::MatrixBitCodeFunctor<T>(path, code, bit_code.reset(new math::MatrixBitCodeFunctor<T>(*path, *code,
label->data<int64_t>())); label.data<int64_t>()));
} }
std::vector<int64_t> sum_dims({batch_size, 1UL}); std::vector<int64_t> sum_dims({batch_size, 1UL});
...@@ -95,7 +92,7 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> { ...@@ -95,7 +92,7 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
if (bias) { if (bias) {
bit_code->Add(*bias, pre_out); bit_code->Add(*bias, pre_out);
} }
bit_code->Mul(pre_out, *w, *in); bit_code->Mul(pre_out, w, in);
// clip to [-40, 40] // clip to [-40, 40]
Transform<DeviceContext> trans; Transform<DeviceContext> trans;
trans(ctx.template device_context<DeviceContext>(), pre_out_data, trans(ctx.template device_context<DeviceContext>(), pre_out_data,
...@@ -117,23 +114,23 @@ template <typename DeviceContext, typename T> ...@@ -117,23 +114,23 @@ template <typename DeviceContext, typename T>
class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> { class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto* in = ctx.Input<framework::LoDTensor>("X"); auto in = detail::Ref(ctx.Input<framework::LoDTensor>("X"));
auto* w = ctx.Input<framework::LoDTensor>("W"); auto w = detail::Ref(ctx.Input<framework::LoDTensor>("W"));
auto* path = ctx.Input<framework::LoDTensor>("PTable"); auto* path = ctx.Input<framework::LoDTensor>("PTable");
auto* code = ctx.Input<framework::LoDTensor>("PCode"); auto* code = ctx.Input<framework::LoDTensor>("PathCode");
auto* bias = ctx.Input<framework::LoDTensor>("Bias"); auto* bias = ctx.Input<framework::LoDTensor>("Bias");
auto* in_grad = auto* in_grad =
ctx.Output<framework::LoDTensor>(framework::GradVarName("X")); ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
bool is_sparse = ctx.Attr<bool>("is_sparse"); bool is_sparse = ctx.Attr<bool>("is_sparse");
auto& dev_ctx = ctx.template device_context<DeviceContext>(); auto& dev_ctx = ctx.template device_context<DeviceContext>();
math::SetConstant<DeviceContext, T> zero; math::SetConstant<DeviceContext, T> zero;
auto* label = ctx.Input<framework::LoDTensor>("Label"); auto label = detail::Ref(ctx.Input<framework::LoDTensor>("Label"));
auto* pre_out = ctx.Input<framework::LoDTensor>("PreOut"); auto pre_out = detail::Ref(ctx.Input<framework::LoDTensor>("PreOut"));
auto* out_grad = auto out_grad = detail::Ref(
ctx.Input<framework::LoDTensor>(framework::GradVarName("Out")); ctx.Input<framework::LoDTensor>(framework::GradVarName("Out")));
framework::LoDTensor pre_out_grad; framework::LoDTensor pre_out_grad;
pre_out_grad.mutable_data<T>(pre_out->dims(), ctx.GetPlace()); pre_out_grad.mutable_data<T>(pre_out.dims(), ctx.GetPlace());
in_grad->mutable_data<T>(ctx.GetPlace()); in_grad->mutable_data<T>(ctx.GetPlace());
zero(dev_ctx, in_grad, static_cast<T>(0.0)); zero(dev_ctx, in_grad, static_cast<T>(0.0));
...@@ -147,16 +144,16 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> { ...@@ -147,16 +144,16 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
std::unique_ptr<math::MatrixBitCodeFunctor<T>> bit_code; std::unique_ptr<math::MatrixBitCodeFunctor<T>> bit_code;
if (!is_custom) { if (!is_custom) {
bit_code.reset(new math::MatrixBitCodeFunctor<T>(num_classes, bit_code.reset(new math::MatrixBitCodeFunctor<T>(num_classes,
label->data<int64_t>())); label.data<int64_t>()));
} else { } else {
bit_code.reset(new math::MatrixBitCodeFunctor<T>(path, code, bit_code.reset(new math::MatrixBitCodeFunctor<T>(*path, *code,
label->data<int64_t>())); label.data<int64_t>()));
} }
auto& place = *ctx.template device_context<DeviceContext>().eigen_device(); auto& place = *ctx.template device_context<DeviceContext>().eigen_device();
auto pre_out_mat = EigenMatrix<T>::From(*pre_out); auto pre_out_mat = EigenMatrix<T>::From(pre_out);
auto pre_out_grad_mat = EigenMatrix<T>::From(pre_out_grad); auto pre_out_grad_mat = EigenMatrix<T>::From(pre_out_grad);
auto out_grad_mat = EigenMatrix<T>::From(*out_grad); auto out_grad_mat = EigenMatrix<T>::From(out_grad);
Eigen::array<int, 2> bcast{1, static_cast<int>(pre_out_grad.dims()[1])}; Eigen::array<int, 2> bcast{1, static_cast<int>(pre_out_grad.dims()[1])};
...@@ -181,17 +178,17 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> { ...@@ -181,17 +178,17 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
ctx.Output<framework::LoDTensor>(framework::GradVarName("W")); ctx.Output<framework::LoDTensor>(framework::GradVarName("W"));
w_grad->mutable_data<T>(ctx.GetPlace()); w_grad->mutable_data<T>(ctx.GetPlace());
zero(dev_ctx, w_grad, static_cast<T>(0.0)); zero(dev_ctx, w_grad, static_cast<T>(0.0));
bit_code->MulGradWeight(pre_out_grad, w_grad, *in); bit_code->MulGradWeight(pre_out_grad, w_grad, in);
} else { } else {
framework::Vector<int64_t> real_rows = cal_rows(*path); framework::Vector<int64_t> real_rows = PathToRows(*path);
auto* w_grad = auto* w_grad =
ctx.Output<framework::SelectedRows>(framework::GradVarName("W")); ctx.Output<framework::SelectedRows>(framework::GradVarName("W"));
w_grad->set_rows(real_rows); w_grad->set_rows(real_rows);
// Build a map of id -> row_index to speed up finding the index of one id // Build a map of id -> row_index to speed up finding the index of one id
w_grad->SyncIndex(); w_grad->SyncIndex();
w_grad->set_height(w->dims()[0]); w_grad->set_height(w.dims()[0]);
auto* w_grad_value = w_grad->mutable_value(); auto* w_grad_value = w_grad->mutable_value();
framework::DDim temp_dim(w->dims()); framework::DDim temp_dim(w.dims());
set(temp_dim, 0, real_rows.size()); set(temp_dim, 0, real_rows.size());
w_grad_value->mutable_data<T>(temp_dim, ctx.GetPlace()); w_grad_value->mutable_data<T>(temp_dim, ctx.GetPlace());
...@@ -211,9 +208,9 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> { ...@@ -211,9 +208,9 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
zero(dev_ctx, bias_grad_value, static_cast<T>(0.0)); zero(dev_ctx, bias_grad_value, static_cast<T>(0.0));
bit_code->AddGrad(pre_out_grad, bias_grad); bit_code->AddGrad(pre_out_grad, bias_grad);
} }
bit_code->MulGradWeight(pre_out_grad, w_grad, *in); bit_code->MulGradWeight(pre_out_grad, w_grad, in);
} }
bit_code->MulGradError(pre_out_grad, *w, in_grad); bit_code->MulGradError(pre_out_grad, w, in_grad);
} }
}; };
......
...@@ -19,12 +19,12 @@ namespace operators { ...@@ -19,12 +19,12 @@ namespace operators {
namespace math { namespace math {
template <typename T> template <typename T>
void MatrixBitCodeFunctor<T>::Add(const framework::LoDTensor& vec, void MatrixBitCodeFunctor<T>::Add(const framework::Tensor& vec,
framework::LoDTensor* tmat) { framework::Tensor* tmat) {
size_t batch_size = tmat->dims()[0]; size_t batch_size = tmat->dims()[0];
size_t width = tmat->dims()[1]; size_t width = tmat->dims()[1];
for (size_t i = 0; i < batch_size; ++i) { for (size_t i = 0; i < batch_size; ++i) {
auto code = code_table->get_code(i); auto code = code_table_->get_code(i);
int code_length = code->get_length(); int code_length = code->get_length();
for (int j = 0; j < code_length; ++j) { for (int j = 0; j < code_length; ++j) {
size_t index = code->calc_index(j); size_t index = code->calc_index(j);
...@@ -34,12 +34,12 @@ void MatrixBitCodeFunctor<T>::Add(const framework::LoDTensor& vec, ...@@ -34,12 +34,12 @@ void MatrixBitCodeFunctor<T>::Add(const framework::LoDTensor& vec,
} }
template <typename T> template <typename T>
void MatrixBitCodeFunctor<T>::AddGrad(const framework::LoDTensor& tmat, void MatrixBitCodeFunctor<T>::AddGrad(const framework::Tensor& tmat,
framework::LoDTensor* vec) { framework::Tensor* vec) {
size_t batch_size = tmat.dims()[0]; size_t batch_size = tmat.dims()[0];
size_t width = tmat.dims()[1]; size_t width = tmat.dims()[1];
for (size_t i = 0; i < batch_size; ++i) { for (size_t i = 0; i < batch_size; ++i) {
auto code = code_table->get_code(i); auto code = code_table_->get_code(i);
int code_length = code->get_length(); int code_length = code->get_length();
for (int j = 0; j < code_length; ++j) { for (int j = 0; j < code_length; ++j) {
size_t index = code->calc_index(j); size_t index = code->calc_index(j);
...@@ -49,17 +49,16 @@ void MatrixBitCodeFunctor<T>::AddGrad(const framework::LoDTensor& tmat, ...@@ -49,17 +49,16 @@ void MatrixBitCodeFunctor<T>::AddGrad(const framework::LoDTensor& tmat,
} }
template <typename T> template <typename T>
void MatrixBitCodeFunctor<T>::AddGrad(const framework::LoDTensor& tmat, void MatrixBitCodeFunctor<T>::AddGrad(const framework::Tensor& tmat,
framework::SelectedRows* vec) { framework::SelectedRows* vec) {
size_t batch_size = tmat.dims()[0]; size_t batch_size = tmat.dims()[0];
size_t width = tmat.dims()[1]; size_t width = tmat.dims()[1];
for (size_t i = 0; i < batch_size; ++i) { for (size_t i = 0; i < batch_size; ++i) {
auto code = code_table->get_code(i); auto code = code_table_->get_code(i);
int code_length = code->get_length(); int code_length = code->get_length();
for (int j = 0; j < code_length; ++j) { for (int j = 0; j < code_length; ++j) {
size_t index = code->calc_index(j); size_t index = code->calc_index(j);
int64_t row_index = int64_t row_index = vec->GetIndexFromId(static_cast<int64_t>(index));
vec->AutoGrownIndex(static_cast<int64_t>(index), false, true);
vec->mutable_value()->data<T>()[row_index] += vec->mutable_value()->data<T>()[row_index] +=
tmat.data<T>()[i * width + j]; tmat.data<T>()[i * width + j];
} }
...@@ -67,13 +66,13 @@ void MatrixBitCodeFunctor<T>::AddGrad(const framework::LoDTensor& tmat, ...@@ -67,13 +66,13 @@ void MatrixBitCodeFunctor<T>::AddGrad(const framework::LoDTensor& tmat,
} }
template <typename T> template <typename T>
void MatrixBitCodeFunctor<T>::Sum(const framework::LoDTensor& tmat, void MatrixBitCodeFunctor<T>::Sum(const framework::Tensor& tmat,
framework::LoDTensor* sum, T scale_sum) { framework::Tensor* sum, T scale_sum) {
size_t num_samples = tmat.dims()[0]; size_t num_samples = tmat.dims()[0];
size_t o_width = tmat.dims()[1]; size_t o_width = tmat.dims()[1];
for (size_t i = 0; i < num_samples; ++i) { for (size_t i = 0; i < num_samples; ++i) {
T sm = static_cast<T>(0.0); T sm = static_cast<T>(0.0);
auto code = code_table->get_code(i); auto code = code_table_->get_code(i);
int code_length = code->get_length(); int code_length = code->get_length();
for (int j = 0; j < code_length; ++j) { for (int j = 0; j < code_length; ++j) {
if (code->calc_bit(j)) { if (code->calc_bit(j)) {
...@@ -87,9 +86,9 @@ void MatrixBitCodeFunctor<T>::Sum(const framework::LoDTensor& tmat, ...@@ -87,9 +86,9 @@ void MatrixBitCodeFunctor<T>::Sum(const framework::LoDTensor& tmat,
} }
template <typename T> template <typename T>
void MatrixBitCodeFunctor<T>::Mul(framework::LoDTensor* tmat, void MatrixBitCodeFunctor<T>::Mul(framework::Tensor* tmat,
const framework::LoDTensor& weight, const framework::Tensor& weight,
const framework::LoDTensor& input) { const framework::Tensor& input) {
size_t num_samples = tmat->dims()[0]; size_t num_samples = tmat->dims()[0];
size_t tmat_width = tmat->dims()[1]; size_t tmat_width = tmat->dims()[1];
size_t input_width = input.dims()[1]; size_t input_width = input.dims()[1];
...@@ -98,7 +97,7 @@ void MatrixBitCodeFunctor<T>::Mul(framework::LoDTensor* tmat, ...@@ -98,7 +97,7 @@ void MatrixBitCodeFunctor<T>::Mul(framework::LoDTensor* tmat,
auto weight_value = weight.data<T>(); auto weight_value = weight.data<T>();
auto input_value = input.data<T>(); auto input_value = input.data<T>();
for (size_t i = 0; i < num_samples; ++i) { for (size_t i = 0; i < num_samples; ++i) {
auto code = code_table->get_code(i); auto code = code_table_->get_code(i);
int code_length = code->get_length(); int code_length = code->get_length();
for (int j = 0; j < code_length; ++j) { for (int j = 0; j < code_length; ++j) {
size_t index = code->calc_index(j); size_t index = code->calc_index(j);
...@@ -113,9 +112,9 @@ void MatrixBitCodeFunctor<T>::Mul(framework::LoDTensor* tmat, ...@@ -113,9 +112,9 @@ void MatrixBitCodeFunctor<T>::Mul(framework::LoDTensor* tmat,
} }
template <typename T> template <typename T>
void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat, void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::Tensor& tmat,
framework::LoDTensor* weight, framework::Tensor* weight,
const framework::LoDTensor& input) { const framework::Tensor& input) {
size_t num_samples = tmat.dims()[0]; size_t num_samples = tmat.dims()[0];
size_t input_width = input.dims()[1]; size_t input_width = input.dims()[1];
size_t tmat_width = tmat.dims()[1]; size_t tmat_width = tmat.dims()[1];
...@@ -124,7 +123,7 @@ void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat, ...@@ -124,7 +123,7 @@ void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat,
auto weight_value = weight->data<T>(); auto weight_value = weight->data<T>();
auto input_value = input.data<T>(); auto input_value = input.data<T>();
for (size_t i = 0; i < num_samples; ++i) { for (size_t i = 0; i < num_samples; ++i) {
auto code = code_table->get_code(i); auto code = code_table_->get_code(i);
int code_length = code->get_length(); int code_length = code->get_length();
for (int j = 0; j < code_length; ++j) { for (int j = 0; j < code_length; ++j) {
size_t index = code->calc_index(j); size_t index = code->calc_index(j);
...@@ -138,9 +137,9 @@ void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat, ...@@ -138,9 +137,9 @@ void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat,
} }
template <typename T> template <typename T>
void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat, void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::Tensor& tmat,
framework::SelectedRows* weight, framework::SelectedRows* weight,
const framework::LoDTensor& input) { const framework::Tensor& input) {
size_t num_samples = tmat.dims()[0]; size_t num_samples = tmat.dims()[0];
size_t input_width = input.dims()[1]; size_t input_width = input.dims()[1];
size_t tmat_width = tmat.dims()[1]; size_t tmat_width = tmat.dims()[1];
...@@ -149,13 +148,12 @@ void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat, ...@@ -149,13 +148,12 @@ void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat,
auto weight_value = weight->mutable_value()->data<T>(); auto weight_value = weight->mutable_value()->data<T>();
auto input_value = input.data<T>(); auto input_value = input.data<T>();
for (size_t i = 0; i < num_samples; ++i) { for (size_t i = 0; i < num_samples; ++i) {
auto code = code_table->get_code(i); auto code = code_table_->get_code(i);
int code_length = code->get_length(); int code_length = code->get_length();
for (int j = 0; j < code_length; ++j) { for (int j = 0; j < code_length; ++j) {
size_t index = code->calc_index(j); size_t index = code->calc_index(j);
for (size_t k = 0; k < input_width; ++k) { for (size_t k = 0; k < input_width; ++k) {
int64_t row_index = int64_t row_index = weight->GetIndexFromId(static_cast<int64_t>(index));
weight->AutoGrownIndex(static_cast<int64_t>(index), false, true);
weight_value[row_index * weight_width + k] += weight_value[row_index * weight_width + k] +=
tmat_value[i * tmat_width + j] * input_value[input_width * i + k]; tmat_value[i * tmat_width + j] * input_value[input_width * i + k];
} }
...@@ -164,9 +162,9 @@ void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat, ...@@ -164,9 +162,9 @@ void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat,
} }
template <typename T> template <typename T>
void MatrixBitCodeFunctor<T>::MulGradError(const framework::LoDTensor& tmat, void MatrixBitCodeFunctor<T>::MulGradError(const framework::Tensor& tmat,
const framework::LoDTensor& weight, const framework::Tensor& weight,
framework::LoDTensor* input) { framework::Tensor* input) {
size_t num_samples = tmat.dims()[0]; size_t num_samples = tmat.dims()[0];
size_t tmat_width = tmat.dims()[1]; size_t tmat_width = tmat.dims()[1];
size_t input_width = input->dims()[1]; size_t input_width = input->dims()[1];
...@@ -176,7 +174,7 @@ void MatrixBitCodeFunctor<T>::MulGradError(const framework::LoDTensor& tmat, ...@@ -176,7 +174,7 @@ void MatrixBitCodeFunctor<T>::MulGradError(const framework::LoDTensor& tmat,
auto input_value = input->data<T>(); auto input_value = input->data<T>();
for (size_t i = 0; i < num_samples; ++i) { for (size_t i = 0; i < num_samples; ++i) {
auto code = code_table->get_code(i); auto code = code_table_->get_code(i);
int code_length = code->get_length(); int code_length = code->get_length();
for (int j = 0; j < code_length; ++j) { for (int j = 0; j < code_length; ++j) {
size_t index = code->calc_index(j); size_t index = code->calc_index(j);
...@@ -191,11 +189,11 @@ void MatrixBitCodeFunctor<T>::MulGradError(const framework::LoDTensor& tmat, ...@@ -191,11 +189,11 @@ void MatrixBitCodeFunctor<T>::MulGradError(const framework::LoDTensor& tmat,
} }
template <typename T> template <typename T>
void MatrixBitCodeFunctor<T>::Sub(framework::LoDTensor* tmat) { void MatrixBitCodeFunctor<T>::Sub(framework::Tensor* tmat) {
size_t num_samples = tmat->dims()[0]; size_t num_samples = tmat->dims()[0];
size_t o_width = tmat->dims()[1]; size_t o_width = tmat->dims()[1];
for (size_t i = 0; i < num_samples; ++i) { for (size_t i = 0; i < num_samples; ++i) {
auto code = code_table->get_code(i); auto code = code_table_->get_code(i);
int code_length = code->get_length(); int code_length = code->get_length();
for (int j = 0; j < code_length; ++j) { for (int j = 0; j < code_length; ++j) {
if (code->calc_bit(j)) { if (code->calc_bit(j)) {
......
...@@ -132,13 +132,15 @@ class SimpleCode : public Code { ...@@ -132,13 +132,15 @@ class SimpleCode : public Code {
size_t c_; size_t c_;
}; };
template <typename R> template <typename T>
class CustomCode : public Code { class CustomCode : public Code {
public: public:
CustomCode(const framework::LoDTensor* ptable, CustomCode(const framework::Tensor& ptable, const framework::Tensor& pcode,
const framework::LoDTensor* pcode, const int64_t* ids, const int64_t* ids, int index)
const int index) : ids_(ids), index_(index) {
: ptable_(ptable), pcode_(pcode), ids_(ids), index_(index) {} ptable_ = ptable.Slice(index, index + 1);
pcode_ = pcode.Slice(index, index + 1);
}
/** /**
* Here the id of root shoud be 1 rather than 0, thus the encoding of class c * Here the id of root shoud be 1 rather than 0, thus the encoding of class c
* is `c + num_classes` and all siblings can get the same weight indice using * is `c + num_classes` and all siblings can get the same weight indice using
...@@ -148,20 +150,13 @@ class CustomCode : public Code { ...@@ -148,20 +150,13 @@ class CustomCode : public Code {
* Binary classification path is the suffixes of encoding, thus leave out the * Binary classification path is the suffixes of encoding, thus leave out the
* left most bit in calc_bit. * left most bit in calc_bit.
*/ */
size_t calc_index(int bit) const { size_t calc_index(int bit) const { return ptable_.data<T>()[bit]; }
return ptable_ bool calc_bit(int bit) const { return pcode_.data<T>()[bit]; }
->data<R>()[index_ * static_cast<int>(ptable_->dims()[1]) + bit];
}
bool calc_bit(int bit) const {
return pcode_
->data<R>()[index_ * static_cast<int>(ptable_->dims()[1]) + bit];
}
int get_length() const { int get_length() const {
int length = 0; int length = 0;
for (int i = 0; i < static_cast<int>(ptable_->dims()[1]); i++) { for (int i = 0; i < static_cast<int>(ptable_.dims()[1]); i++) {
if (ptable_->data<R>()[index_ * static_cast<int>(ptable_->dims()[1]) + if (ptable_.data<T>()[i] >= 0) {
i] >= 0) {
length++; length++;
} else { } else {
return length; return length;
...@@ -171,15 +166,15 @@ class CustomCode : public Code { ...@@ -171,15 +166,15 @@ class CustomCode : public Code {
} }
private: private:
const framework::LoDTensor* ptable_; framework::Tensor ptable_;
const framework::LoDTensor* pcode_; framework::Tensor pcode_;
const int64_t* ids_; const int64_t* ids_;
const int index_; const int index_;
}; };
class SimpleCodeTable : public CodeTable { class SimpleCodeTable : public CodeTable {
public: public:
explicit SimpleCodeTable(size_t num_classes, const int64_t* ids) SimpleCodeTable(size_t num_classes, const int64_t* ids)
: num_classes_(num_classes), ids_(ids) {} : num_classes_(num_classes), ids_(ids) {}
std::unique_ptr<Code> get_code(int64_t code) const { std::unique_ptr<Code> get_code(int64_t code) const {
std::unique_ptr<Code> coder(new SimpleCode(code, num_classes_, ids_)); std::unique_ptr<Code> coder(new SimpleCode(code, num_classes_, ids_));
...@@ -193,97 +188,92 @@ class SimpleCodeTable : public CodeTable { ...@@ -193,97 +188,92 @@ class SimpleCodeTable : public CodeTable {
const int64_t* ids_; const int64_t* ids_;
}; };
template <typename R> template <typename T>
class CustomCodeTable : public CodeTable { class CustomCodeTable : public CodeTable {
public: public:
explicit CustomCodeTable(const framework::LoDTensor* ptable, CustomCodeTable(const framework::Tensor& ptable,
const framework::LoDTensor* pcode, const framework::Tensor& pcode, const int64_t* ids)
const int64_t* ids)
: ptable_(ptable), pcode_(pcode), ids_(ids) {} : ptable_(ptable), pcode_(pcode), ids_(ids) {}
std::unique_ptr<Code> get_code(int64_t code) const { std::unique_ptr<Code> get_code(int64_t code) const {
std::unique_ptr<Code> coder(new CustomCode<R>(ptable_, pcode_, ids_, code)); std::unique_ptr<Code> coder(new CustomCode<T>(ptable_, pcode_, ids_, code));
return coder; return coder;
} }
size_t size() const { return static_cast<size_t>(ptable_->dims()[1]); } size_t size() const { return static_cast<size_t>(ptable_.dims()[1]); }
int get_max_code_length() const { int get_max_code_length() const {
return static_cast<size_t>(ptable_->dims()[1]); return static_cast<size_t>(ptable_.dims()[1]);
} }
private: private:
const framework::LoDTensor* ptable_; const framework::Tensor& ptable_;
const framework::LoDTensor* pcode_; const framework::Tensor& pcode_;
const int64_t* ids_; const int64_t* ids_;
}; };
template <typename T> template <typename T>
class MatrixBitCodeFunctor { class MatrixBitCodeFunctor {
public: public:
explicit MatrixBitCodeFunctor(size_t num_classes, const int64_t* ids) MatrixBitCodeFunctor(size_t num_classes, const int64_t* ids)
: num_classes_(num_classes), : num_classes_(num_classes),
ids_(ids), ids_(ids),
code_table(new SimpleCodeTable(num_classes, ids)) {} code_table_(new SimpleCodeTable(num_classes, ids)) {}
explicit MatrixBitCodeFunctor(const framework::LoDTensor* ptable, MatrixBitCodeFunctor(const framework::Tensor& ptable,
const framework::LoDTensor* pcode, const framework::Tensor& pcode, const int64_t* ids)
const int64_t* ids) : num_classes_(static_cast<size_t>(ptable.dims()[1])),
: num_classes_(static_cast<size_t>(ptable->dims()[1])),
ids_(ids), ids_(ids),
code_table(new CustomCodeTable<int64_t>(ptable, pcode, ids)) {} code_table_(new CustomCodeTable<int64_t>(ptable, pcode, ids)) {}
/* For j < code_length /* For j < code_length
tmat(i, j) += vec(0, index(i, j)) tmat(i, j) += vec(0, index(i, j))
*/ */
void Add(const framework::LoDTensor& vec, framework::LoDTensor* tmat); void Add(const framework::Tensor& vec, framework::Tensor* tmat);
/* For j < code_length /* For j < code_length
vec(0, index(i, j)) += tmat(i, j) vec(0, index(i, j)) += tmat(i, j)
*/ */
void AddGrad(const framework::LoDTensor& tmat, framework::LoDTensor* vec); void AddGrad(const framework::Tensor& tmat, framework::Tensor* vec);
/* For selected rows For j < code_length /* For selected rows For j < code_length
vec(0, index(i, j)) += tmat(i, j) vec(0, index(i, j)) += tmat(i, j)
*/ */
void AddGrad(const framework::LoDTensor& tmat, framework::SelectedRows* vec); void AddGrad(const framework::Tensor& tmat, framework::SelectedRows* vec);
/* For j < code_length /* For j < code_length
sum(i, 0) = \sum_j bit(i, j) * tmat(i, j) sum(i, 0) = \sum_j bit(i, j) * tmat(i, j)
*/ */
void Sum(const framework::LoDTensor& tmat, framework::LoDTensor* sum, void Sum(const framework::Tensor& tmat, framework::Tensor* sum, T scale_sum);
T scale_sum);
/* For j < code_length /* For j < code_length
tmat(i, j) -= bit(i, j) tmat(i, j) -= bit(i, j)
*/ */
void Sub(framework::LoDTensor* tmat); void Sub(framework::Tensor* tmat);
/* For j < code_length /* For j < code_length
input.row(i) += tmat(i, j) * weight.row(index(i, j)) input.row(i) += tmat(i, j) * weight.row(index(i, j))
*/ */
void Mul(framework::LoDTensor* tmat, const framework::LoDTensor& weight, void Mul(framework::Tensor* tmat, const framework::Tensor& weight,
const framework::LoDTensor& input); const framework::Tensor& input);
/* For index(i, j) >= 0: /* For index(i, j) >= 0:
weight.row(index(i, j)) += tmat(i, j) * input.row(i) weight.row(index(i, j)) += tmat(i, j) * input.row(i)
*/ */
void MulGradWeight(const framework::LoDTensor& tmat, void MulGradWeight(const framework::Tensor& tmat, framework::Tensor* weight,
framework::LoDTensor* weight, const framework::Tensor& input);
const framework::LoDTensor& input);
/* For SelectedRows Weight, For index(i, j) >= 0: /* For SelectedRows Weight, For index(i, j) >= 0:
weight.row(index(i, j)) += tmat(i, j) * input.row(i) weight.row(index(i, j)) += tmat(i, j) * input.row(i)
*/ */
void MulGradWeight(const framework::LoDTensor& tmat, void MulGradWeight(const framework::Tensor& tmat,
framework::SelectedRows* weight, framework::SelectedRows* weight,
const framework::LoDTensor& input); const framework::Tensor& input);
/* For j < code_length /* For j < code_length
input.row(i) += tmat(i, j) * weight.row(index(i, j)) input.row(i) += tmat(i, j) * weight.row(index(i, j))
*/ */
void MulGradError(const framework::LoDTensor& tmat, void MulGradError(const framework::Tensor& tmat,
const framework::LoDTensor& weight, const framework::Tensor& weight, framework::Tensor* input);
framework::LoDTensor* input);
size_t num_classes_; size_t num_classes_;
const int64_t* ids_; const int64_t* ids_;
std::unique_ptr<CodeTable> code_table; std::unique_ptr<CodeTable> code_table_;
}; };
} // namespace math } // namespace math
} // namespace operators } // namespace operators
......
...@@ -4639,7 +4639,7 @@ def hsigmoid(input, ...@@ -4639,7 +4639,7 @@ def hsigmoid(input,
"X": input, "X": input,
"W": weights, "W": weights,
"PTable": ptable, "PTable": ptable,
"PCode": pcode, "PathCode": pcode,
"Label": label "Label": label
} }
if helper.bias_attr: if helper.bias_attr:
......
...@@ -185,7 +185,7 @@ class TestHSigmoidOpSparse(OpTest): ...@@ -185,7 +185,7 @@ class TestHSigmoidOpSparse(OpTest):
'X': x, 'X': x,
'W': w, 'W': w,
'PTable': ptable, 'PTable': ptable,
'PCode': pcode, 'PathCode': pcode,
'Label': label, 'Label': label,
'Bias': bias 'Bias': bias
} }
...@@ -285,7 +285,7 @@ class TestHSigmoidOpWithCostumTree(OpTest): ...@@ -285,7 +285,7 @@ class TestHSigmoidOpWithCostumTree(OpTest):
'X': x, 'X': x,
'W': w, 'W': w,
'PTable': ptable, 'PTable': ptable,
'PCode': pcode, 'PathCode': pcode,
'Label': label, 'Label': label,
'Bias': bias 'Bias': bias
} }
...@@ -322,7 +322,7 @@ class TestHSigmoidOpWithCostumTreeWithoutBias(OpTest): ...@@ -322,7 +322,7 @@ class TestHSigmoidOpWithCostumTreeWithoutBias(OpTest):
'X': x, 'X': x,
'W': w, 'W': w,
'PTable': ptable, 'PTable': ptable,
'PCode': pcode, 'PathCode': pcode,
'Label': label, 'Label': label,
} }
pre_output, out = hsigmoidWithCustomTree( pre_output, out = hsigmoidWithCustomTree(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册