提交 80ce7edb 编写于 作者: Y Yancey1989

make farward correct

上级 74f519ff
...@@ -70,7 +70,7 @@ class HierarchicalSigmoidOp : public framework::OperatorWithKernel { ...@@ -70,7 +70,7 @@ class HierarchicalSigmoidOp : public framework::OperatorWithKernel {
} }
protected: protected:
framework::OpKernelType GetKernelType( framework::OpKernelType GetActualKernelType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType( return framework::OpKernelType(
framework::ToDataType(ctx.Input<framework::Tensor>("X")->type()), framework::ToDataType(ctx.Input<framework::Tensor>("X")->type()),
...@@ -96,7 +96,7 @@ class HierarchicalSigmoidGradOp : public framework::OperatorWithKernel { ...@@ -96,7 +96,7 @@ class HierarchicalSigmoidGradOp : public framework::OperatorWithKernel {
} }
protected: protected:
framework::OpKernelType GetKernelType( framework::OpKernelType GetActualKernelType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType( return framework::OpKernelType(
framework::ToDataType(ctx.Input<framework::Tensor>("X")->type()), framework::ToDataType(ctx.Input<framework::Tensor>("X")->type()),
......
...@@ -49,34 +49,31 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> { ...@@ -49,34 +49,31 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
auto& place = *ctx.template device_context<DeviceContext>().eigen_device(); auto& place = *ctx.template device_context<DeviceContext>().eigen_device();
auto& device_ctx = ctx.template device_context<DeviceContext>(); auto& device_ctx = ctx.template device_context<DeviceContext>();
math::RowwiseSum<DeviceContext, T> row_sum; math::RowwiseSum<DeviceContext, T> row_sum;
math::MatrixBitCodeFunctor<T> bit_code; math::MatrixBitCodeFunctor<T> bit_code(num_classes, ids->data<int64_t>());
std::vector<int64_t> sum_dims({batch_size, 1UL}); std::vector<int64_t> sum_dims({batch_size, 1UL});
sum.mutable_data<T>(framework::make_ddim(sum_dims), ctx.GetPlace()); sum.mutable_data<T>(framework::make_ddim(sum_dims), ctx.GetPlace());
auto sum_mat = EigenMatrix<T>::From(sum); auto sum_mat = EigenMatrix<T>::From(sum);
out->mutable_data<T>(ctx.GetPlace()); out->mutable_data<T>(ctx.GetPlace());
auto out_mat = framework::EigenVector<T>::Flatten(*out); auto out_mat = framework::EigenVector<T>::Flatten(*out);
if (bias) { if (bias) {
bit_code.Add(num_classes, ids->data<int64_t>(), pre_out, *bias); bit_code.Add(pre_out, *bias);
} }
for (int i = 0; i < in->dims()[0]; ++i) { for (int64_t i = 0; i < batch_size; ++i) {
bit_code.Mul(num_classes, ids->data<int64_t>(), pre_out, auto w_i = w->Slice(i, i + 1);
w->Slice(i, i + 1), in->Slice(i, i + 1)); bit_code.Mul(pre_out, w_i, *in);
} }
// clip the matrix with (-40, 40) // clip the matrix with (-40, 40)
Transform<DeviceContext> trans; Transform<DeviceContext> trans;
trans(ctx.template device_context<DeviceContext>(), pre_out_data, trans(ctx.template device_context<DeviceContext>(), pre_out_data,
pre_out_data + pre_out.numel(), pre_out_data, pre_out_data + pre_out.numel(), pre_out_data,
ClipFunctor<T>(static_cast<T>(-40.0), static_cast<T>(40.0))); ClipFunctor<T>(static_cast<T>(-40.0), static_cast<T>(40.0)));
bit_code.Sum(num_classes, ids->data<int64_t>(), pre_out, *out, bit_code.Sum(pre_out, *out, static_cast<T>(-1));
static_cast<T>(-1));
// softrelu with threshold is 40.0 // softrelu with threshold is 40.0
trans(ctx.template device_context<DeviceContext>(), pre_out_data, trans(ctx.template device_context<DeviceContext>(), pre_out_data,
pre_out_data + pre_out.numel(), pre_out_data, pre_out_data + pre_out.numel(), pre_out_data,
ClipFunctor<T>(static_cast<T>(-40.0), static_cast<T>(40.0))); ClipFunctor<T>(static_cast<T>(-40.0), static_cast<T>(40.0)));
pre_out_mat.device(place) = (static_cast<T>(1.0) + pre_out_mat.exp()).log(); pre_out_mat.device(place) = (static_cast<T>(1.0) + pre_out_mat.exp()).log();
row_sum(device_ctx, pre_out, &sum); row_sum(device_ctx, pre_out, &sum);
out_mat.device(place) = sum_mat + out_mat; out_mat.device(place) = sum_mat + out_mat;
} }
...@@ -103,28 +100,26 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> { ...@@ -103,28 +100,26 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
auto pre_out_mat = EigenMatrix<T>::From(pre_out); auto pre_out_mat = EigenMatrix<T>::From(pre_out);
// init pre_out matrix with {1.0} // init pre_out matrix with {1.0}
math::SetConstant<DeviceContext, T> one; math::SetConstant<DeviceContext, T> one;
math::MatrixBitCodeFunctor<T> bit_code; math::MatrixBitCodeFunctor<T> bit_code(num_classes, ids->data<int64_t>());
one(device_ctx, &pre_out, static_cast<T>(1.0)); one(device_ctx, &pre_out, static_cast<T>(1.0));
// softrelu derivative // softrelu derivative
pre_out_mat.device(place) = pre_out_mat.device(place) =
pre_out_mat * (static_cast<T>(1.0) - static_cast<T>(1.0) / pre_out_mat); pre_out_mat * (static_cast<T>(1.0) - static_cast<T>(1.0) / pre_out_mat);
bit_code.Sub(num_classes, ids->data<int64_t>(), pre_out); bit_code.Sub(pre_out);
if (bias) { if (bias) {
bias->mutable_data<T>(ctx.GetPlace()); bias->mutable_data<T>(ctx.GetPlace());
bit_code.AddGrad(num_classes, ids->data<int64_t>(), pre_out, *bias); bit_code.AddGrad(pre_out, *bias);
} }
in_grad->mutable_data<T>(ctx.GetPlace()); in_grad->mutable_data<T>(ctx.GetPlace());
w->mutable_data<T>(ctx.GetPlace()); w->mutable_data<T>(ctx.GetPlace());
for (int i = 0; i < in_grad->dims()[0]; ++i) { for (int i = 0; i < batch_size; ++i) {
auto p_sliced = w->Slice(i, i + 1); auto w_i = w->Slice(i, i + 1);
auto in_sliced = in->Slice(i, i + 1); // auto in_i = in->Slice(i, i + 1);
auto in_grad_sliced = in_grad->Slice(i, i + 1); // auto in_grad_i = in_grad->Slice(i, i + 1);
bit_code.MulGradWeight(num_classes, ids->data<int64_t>(), pre_out, bit_code.MulGradWeight(pre_out, w_i, *in);
p_sliced, in_sliced); bit_code.MulGradError(pre_out, w_i, *in_grad);
bit_code.MulGradError(num_classes, ids->data<int64_t>(), pre_out,
p_sliced, in_grad_sliced);
} }
} }
}; };
......
...@@ -62,13 +62,13 @@ void ColwiseSum<DeviceContext, T>::operator()(const DeviceContext& context, ...@@ -62,13 +62,13 @@ void ColwiseSum<DeviceContext, T>::operator()(const DeviceContext& context,
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
void RowwiseSum<DeviceContext, T>::operator()(const DeviceContext& context, void RowwiseSum<DeviceContext, T>::operator()(const DeviceContext& context,
const framework::Tensor& input, const framework::Tensor& input,
framework::Tensor* vector) { framework::Tensor* out) {
auto in_dims = input.dims(); auto in_dims = input.dims();
auto size = input.numel() / in_dims[1]; auto size = input.numel() / in_dims[1];
PADDLE_ENFORCE_EQ(vector->numel(), size); PADDLE_ENFORCE_EQ(out->numel(), size);
auto in = framework::EigenMatrix<T, Eigen::ColMajor>::From(input); auto in = framework::EigenMatrix<T>::From(input);
auto vec = framework::EigenMatrix<T, Eigen::ColMajor>::From(*vector); auto vec = framework::EigenVector<T>::Flatten(*out);
vec.device(*context.eigen_device()) = in.sum(Eigen::array<int, 1>({{1}})); vec.device(*context.eigen_device()) = in.sum(Eigen::array<int, 1>({{1}}));
} }
......
...@@ -22,7 +22,7 @@ namespace math { ...@@ -22,7 +22,7 @@ namespace math {
* CodeTable class should support 3 functions: * CodeTable class should support 3 functions:
* *
* size_t size() * size_t size()
* return the number of codes * return the number of ids
* *
* int getMaxCodeLength() * int getMaxCodeLength()
* return the maximal code length * return the maximal code length
...@@ -45,56 +45,47 @@ namespace math { ...@@ -45,56 +45,47 @@ namespace math {
* *
*/ */
/* template <typename T>
for i: void MatrixBitCodeFunctor<T>::Add(framework::Tensor& tmat,
for j < codeLength: const framework::Tensor& vec) {
op(a(i, j), b(0, index(i, j))) SimpleCodeTable code_table(num_classes_);
*/ size_t batch_size = tmat.dims()[0];
template <typename T, class CodeTable, class Op> size_t width = tmat.dims()[1];
static void AddByBitCodeT(Op op, CodeTable code_table, const int64_t* codes, for (size_t i = 0; i < batch_size; ++i) {
const framework::Tensor& tmat, auto code = code_table(static_cast<size_t>(ids_[i]));
const framework::Tensor& vec) {
size_t num_sample = tmat.dims()[0];
size_t width = vec.dims()[1];
for (size_t i = 0; i < num_sample; ++i) {
auto code = code_table(static_cast<size_t>(codes[i]));
int code_length = code.get_length(); int code_length = code.get_length();
for (int j = 0; j < code_length; ++j) { for (int j = 0; j < code_length; ++j) {
size_t index = code.calc_index(j); size_t index = code.calc_index(j);
auto t = tmat.data<T>()[i * width + j]; tmat.data<T>()[i * width + j] += vec.data<T>()[index];
auto v = vec.data<T>()[index];
op(t, v);
} }
} }
} }
template <typename T, class CodeTable> template <typename T>
void SubByBitCodeT(CodeTable code_table, const int64_t* codes, void MatrixBitCodeFunctor<T>::AddGrad(framework::Tensor& tmat,
framework::Tensor& tmat) { framework::Tensor& vec) {
// size_t max_code_length = code_table.get_max_code_length(); SimpleCodeTable code_table(num_classes_);
size_t num_samples = tmat.dims()[0]; size_t batch_size = tmat.dims()[0];
size_t o_width = tmat.dims()[1]; size_t width = tmat.dims()[1];
for (size_t i = 0; i < num_samples; ++i) { for (size_t i = 0; i < batch_size; ++i) {
auto code = code_table(static_cast<size_t>(codes[i])); auto code = code_table(static_cast<size_t>(ids_[i]));
int code_length = code.get_length(); int code_length = code.get_length();
for (int j = 0; j < code_length; ++j) { for (int j = 0; j < code_length; ++j) {
if (code.calc_bit(j)) { size_t index = code.calc_index(j);
tmat.data<T>()[i * o_width + j] -= 1; vec.data<T>()[index] += tmat.data<T>()[i * width + j];
}
} }
} }
} }
template <typename T, class CodeTable> template <typename T>
void SumByBitCodeT(CodeTable code_table, const int64_t* codes, void MatrixBitCodeFunctor<T>::Sum(framework::Tensor& tmat,
framework::Tensor& tmat, framework::Tensor& sum, framework::Tensor& sum, T scale_sum) {
const T& scale_sum) { SimpleCodeTable code_table(num_classes_);
// size_t max_code_length = code_table.get_max_code_length();
size_t num_samples = tmat.dims()[0]; size_t num_samples = tmat.dims()[0];
size_t o_width = tmat.dims()[1]; size_t o_width = tmat.dims()[1];
for (size_t i = 0; i < num_samples; ++i) { for (size_t i = 0; i < num_samples; ++i) {
T sm = static_cast<T>(0.0); T sm = static_cast<T>(0.0);
auto code = code_table(static_cast<size_t>(codes[i])); auto code = code_table(static_cast<size_t>(ids_[i]));
int code_length = code.get_length(); int code_length = code.get_length();
for (int j = 0; j < code_length; ++j) { for (int j = 0; j < code_length; ++j) {
if (code.calc_bit(j)) { if (code.calc_bit(j)) {
...@@ -106,116 +97,99 @@ void SumByBitCodeT(CodeTable code_table, const int64_t* codes, ...@@ -106,116 +97,99 @@ void SumByBitCodeT(CodeTable code_table, const int64_t* codes,
} }
template <typename T> template <typename T>
void MatrixBitCodeFunctor<T>::Add(size_t num_classes, const int64_t* codes, void MatrixBitCodeFunctor<T>::Mul(framework::Tensor& tmat,
framework::Tensor& tmat,
const framework::Tensor& vec) {
auto op = [](T& t, const T& v) { t += v; };
AddByBitCodeT<T>(op, SimpleCodeTable(num_classes), codes, tmat, vec);
}
template <typename T>
void MatrixBitCodeFunctor<T>::AddGrad(size_t num_classes, const int64_t* codes,
framework::Tensor& tmat,
framework::Tensor& vec) {
auto op = [](T& t, T& v) { v += t; };
AddByBitCodeT<T>(op, SimpleCodeTable(num_classes), codes, tmat, vec);
}
template <typename T>
void MatrixBitCodeFunctor<T>::Sum(size_t num_classes, const int64_t* codes,
framework::Tensor& tmat,
framework::Tensor& sum, T scale_sum) {
SumByBitCodeT<T>(SimpleCodeTable(num_classes), codes, tmat, sum, scale_sum);
}
template <typename T>
void MatrixBitCodeFunctor<T>::Mul(size_t num_classes, const int64_t* codes,
framework::Tensor& tmat,
const framework::Tensor& weight, const framework::Tensor& weight,
const framework::Tensor& input) { const framework::Tensor& input) {
SimpleCodeTable code_table(num_classes_);
size_t num_samples = tmat.dims()[0]; size_t num_samples = tmat.dims()[0];
size_t tmat_width = tmat.dims()[1]; size_t tmat_width = tmat.dims()[1];
size_t input_width = input.dims()[1]; size_t input_width = input.dims()[1];
size_t weight_width = weight.dims()[1]; size_t weight_width = weight.dims()[2];
auto tmat_p = tmat.data<T>(); auto tmat_value = tmat.data<T>();
auto weight_p = weight.data<T>(); auto weight_value = weight.data<T>();
auto input_p = input.data<T>(); auto input_value = input.data<T>();
auto code_table = SimpleCodeTable(num_classes);
for (size_t i = 0; i < num_samples; ++i) { for (size_t i = 0; i < num_samples; ++i) {
auto code = code_table(static_cast<size_t>(codes[i])); auto code = code_table(static_cast<size_t>(ids_[i]));
int code_length = code.get_length(); int code_length = code.get_length();
for (int j = 0; j < code_length; ++j) { for (int j = 0; j < code_length; ++j) {
size_t index = code.calc_index(j); size_t index = code.calc_index(j);
T sum = static_cast<T>(0.0); T sum = static_cast<T>(0.0);
for (size_t k = 0; k < input_width; ++k) { for (size_t k = 0; k < input_width; ++k) {
sum += sum += weight_value[weight_width * index + k] *
weight_p[weight_width * index + k] * input_p[input_width * i + k]; input_value[input_width * i + k];
} }
tmat_p[i * tmat_width + j] += sum; tmat_value[i * tmat_width + j] += sum;
} }
} }
} }
template <typename T> template <typename T>
void MatrixBitCodeFunctor<T>::MulGradWeight(size_t num_classes, void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::Tensor& tmat,
const int64_t* codes,
const framework::Tensor& tmat,
framework::Tensor& weight, framework::Tensor& weight,
const framework::Tensor& input) { const framework::Tensor& input) {
SimpleCodeTable code_table(num_classes_);
size_t num_samples = tmat.dims()[0]; size_t num_samples = tmat.dims()[0];
size_t input_width = input.dims()[1]; size_t input_width = input.dims()[1];
size_t weight_width = weight.dims()[1]; size_t weight_width = weight.dims()[1];
auto tmat_p = tmat.data<T>(); auto tmat_value = tmat.data<T>();
auto weight_p = weight.data<T>(); auto weight_value = weight.data<T>();
auto input_p = input.data<T>(); auto input_value = input.data<T>();
auto code_table = SimpleCodeTable(num_classes);
for (size_t i = 0; i < num_samples; ++i) { for (size_t i = 0; i < num_samples; ++i) {
auto code = code_table(static_cast<size_t>(codes[i])); auto code = code_table(static_cast<size_t>(ids_[i]));
int code_length = code.get_length(); int code_length = code.get_length();
for (int j = 0; j < code_length; ++j) { for (int j = 0; j < code_length; ++j) {
size_t index = code.calc_index(j); size_t index = code.calc_index(j);
for (size_t k = 0; k < input_width; ++k) { for (size_t k = 0; k < input_width; ++k) {
weight_p[weight_width * index * k] += weight_value[weight_width * index * k] +=
tmat_p[i * weight_width * j] * input_p[input_width * i + k]; tmat_value[i * weight_width * j] * input_value[input_width * i + k];
} }
} }
} }
} }
template <typename T> template <typename T>
void MatrixBitCodeFunctor<T>::MulGradError(size_t num_classes, void MatrixBitCodeFunctor<T>::MulGradError(const framework::Tensor& tmat,
const int64_t* codes,
const framework::Tensor& tmat,
const framework::Tensor& weight, const framework::Tensor& weight,
framework::Tensor& input) { framework::Tensor& input) {
SimpleCodeTable code_table(num_classes_);
size_t num_samples = tmat.dims()[0]; size_t num_samples = tmat.dims()[0];
size_t input_width = input.dims()[1]; size_t input_width = input.dims()[1];
size_t weight_width = weight.dims()[1]; size_t weight_width = weight.dims()[1];
auto tmat_p = tmat.data<T>(); auto tmat_value = tmat.data<T>();
auto weight_p = weight.data<T>(); auto weight_value = weight.data<T>();
auto input_p = input.data<T>(); auto input_value = input.data<T>();
auto code_table = SimpleCodeTable(num_classes);
for (size_t i = 0; i < num_samples; ++i) { for (size_t i = 0; i < num_samples; ++i) {
auto code = code_table(static_cast<size_t>(codes[i])); auto code = code_table(static_cast<size_t>(ids_[i]));
int code_length = code.get_length(); int code_length = code.get_length();
for (int j = 0; j < code_length; ++j) { for (int j = 0; j < code_length; ++j) {
size_t index = code.calc_index(j); size_t index = code.calc_index(j);
for (size_t k = 0; k < input_width; ++k) { for (size_t k = 0; k < input_width; ++k) {
input_p[weight_width * index * k] += input_value[weight_width * index * k] +=
tmat_p[i * weight_width * j] * weight_p[weight_width * i + k]; tmat_value[i * weight_width * j] *
weight_value[weight_width * i + k];
} }
} }
} }
} }
template <typename T> template <typename T>
void MatrixBitCodeFunctor<T>::Sub(size_t num_classes, const int64_t* codes, void MatrixBitCodeFunctor<T>::Sub(framework::Tensor& tmat) {
framework::Tensor& tmat) { SimpleCodeTable code_table(num_classes_);
SubByBitCodeT<T>(SimpleCodeTable(num_classes), codes, tmat); size_t num_samples = tmat.dims()[0];
size_t o_width = tmat.dims()[1];
for (size_t i = 0; i < num_samples; ++i) {
auto code = code_table(static_cast<size_t>(ids_[i]));
int code_length = code.get_length();
for (int j = 0; j < code_length; ++j) {
if (code.calc_bit(j)) {
tmat.data<T>()[i * o_width + j] -= 1;
}
}
}
} }
template class MatrixBitCodeFunctor<float>; template class MatrixBitCodeFunctor<float>;
......
...@@ -63,46 +63,45 @@ struct SimpleCodeTable { ...@@ -63,46 +63,45 @@ struct SimpleCodeTable {
template <typename T> template <typename T>
class MatrixBitCodeFunctor { class MatrixBitCodeFunctor {
public: public:
explicit MatrixBitCodeFunctor(size_t num_classes, const int64_t* ids)
: num_classes_(num_classes), ids_(ids) {}
/* For j < code_length /* For j < code_length
tmat(i, j) += vec(0, index(i, j)) tmat(i, j) += vec(0, index(i, j))
*/ */
void Add(size_t num_classes, const int64_t* codes, framework::Tensor& tmat, void Add(framework::Tensor& tmat, const framework::Tensor& vec);
const framework::Tensor& vec);
/* For j < code_length /* For j < code_length
vec(0, index(i, j)) += tmat(i, j) vec(0, index(i, j)) += tmat(i, j)
*/ */
void AddGrad(size_t num_classes, const int64_t* codes, void AddGrad(framework::Tensor& tmat, framework::Tensor& vec);
framework::Tensor& tmat, framework::Tensor& vec);
/* For j < code_length /* For j < code_length
sum(i, 0) = \sum_j bit(i, j) * tmat(i, j) sum(i, 0) = \sum_j bit(i, j) * tmat(i, j)
*/ */
void Sum(size_t num_classes, const int64_t* codes, framework::Tensor& tmat, void Sum(framework::Tensor& tmat, framework::Tensor& sum, T scale_sum);
framework::Tensor& sum, T scale_sum);
/* For j < code_length /* For j < code_length
tmat(i, j) -= bit(i, j) tmat(i, j) -= bit(i, j)
*/ */
void Sub(size_t num_classes, const int64_t* codes, framework::Tensor& tmat); void Sub(framework::Tensor& tmat);
/* For j < code_length /* For j < code_length
input.row(i) += tmat(i, j) * weight.row(index(i, j)) input.row(i) += tmat(i, j) * weight.row(index(i, j))
*/ */
void Mul(size_t num_classes, const int64_t* codes, framework::Tensor& tmat, void Mul(framework::Tensor& tmat, const framework::Tensor& weight,
const framework::Tensor& weight, const framework::Tensor& input); const framework::Tensor& input);
/* For index(i, j) >= 0: /* For index(i, j) >= 0:
weight.row(index(i, j)) += tmat(i, j) * input.row(i) weight.row(index(i, j)) += tmat(i, j) * input.row(i)
*/ */
void MulGradWeight(size_t num_classes, const int64_t* codes, void MulGradWeight(const framework::Tensor& tmat, framework::Tensor& weight,
const framework::Tensor& tmat, framework::Tensor& weight,
const framework::Tensor& input); const framework::Tensor& input);
/* For j < code_length /* For j < code_length
input.row(i) += tmat(i, j) * weight.row(index(i, j)) input.row(i) += tmat(i, j) * weight.row(index(i, j))
*/ */
void MulGradError(size_t num_classes, const int64_t* codes, void MulGradError(const framework::Tensor& tmat,
const framework::Tensor& tmat,
const framework::Tensor& weight, framework::Tensor& input); const framework::Tensor& weight, framework::Tensor& input);
size_t num_classes_;
const int64_t* ids_;
}; };
} // namespace math } // namespace math
} // namespace operators } // namespace operators
......
...@@ -49,6 +49,7 @@ def create_op(scope, op_type, inputs, outputs, attrs): ...@@ -49,6 +49,7 @@ def create_op(scope, op_type, inputs, outputs, attrs):
for attr_name in Operator.get_op_attr_names(op_type): for attr_name in Operator.get_op_attr_names(op_type):
if attr_name in attrs: if attr_name in attrs:
kwargs[attr_name] = attrs[attr_name] kwargs[attr_name] = attrs[attr_name]
return Operator(op_type, **kwargs) return Operator(op_type, **kwargs)
...@@ -104,8 +105,6 @@ def get_numeric_gradient(scope, ...@@ -104,8 +105,6 @@ def get_numeric_gradient(scope,
tensor_to_check_dtype = np.float32 tensor_to_check_dtype = np.float32
elif tensor_to_check_dtype == core.DataType.FP64: elif tensor_to_check_dtype == core.DataType.FP64:
tensor_to_check_dtype = np.float64 tensor_to_check_dtype = np.float64
elif tensor_to_check_dtype == core.DataType.INT64:
tensor_to_check_dtype = np.int64
else: else:
raise ValueError("Not supported data type " + str( raise ValueError("Not supported data type " + str(
tensor_to_check_dtype)) tensor_to_check_dtype))
...@@ -115,8 +114,6 @@ def get_numeric_gradient(scope, ...@@ -115,8 +114,6 @@ def get_numeric_gradient(scope,
def __get_elem__(tensor, i): def __get_elem__(tensor, i):
if tensor_to_check_dtype == np.float32: if tensor_to_check_dtype == np.float32:
return tensor.get_float_element(i) return tensor.get_float_element(i)
elif tensor_to_check_dtype == np.int64:
return tensor.get_int64_element(i)
else: else:
return tensor.get_double_element(i) return tensor.get_double_element(i)
...@@ -356,11 +353,13 @@ class OpTest(unittest.TestCase): ...@@ -356,11 +353,13 @@ class OpTest(unittest.TestCase):
op_attrs = self.attrs if hasattr(self, "attrs") else dict() op_attrs = self.attrs if hasattr(self, "attrs") else dict()
self.op = create_op(self.scope, self.op_type, op_inputs, op_outputs, self.op = create_op(self.scope, self.op_type, op_inputs, op_outputs,
op_attrs) op_attrs)
if no_grad_set is None: if no_grad_set is None:
no_grad_set = set() no_grad_set = set()
if not type(output_names) is list: if not type(output_names) is list:
output_names = [output_names] output_names = [output_names]
numeric_grads = user_defined_grads or [ numeric_grads = user_defined_grads or [
get_numeric_gradient( get_numeric_gradient(
self.scope, self.scope,
...@@ -456,7 +455,9 @@ class OpTest(unittest.TestCase): ...@@ -456,7 +455,9 @@ class OpTest(unittest.TestCase):
# infer variable type and infer shape in compile-time # infer variable type and infer shape in compile-time
op.desc.infer_var_type(block.desc) op.desc.infer_var_type(block.desc)
op.desc.infer_shape(block.desc) op.desc.infer_shape(block.desc)
mean_inputs = map(block.var, output_names) mean_inputs = map(block.var, output_names)
if len(mean_inputs) == 1: if len(mean_inputs) == 1:
loss = block.create_var(dtype=mean_inputs[0].dtype, shape=[1]) loss = block.create_var(dtype=mean_inputs[0].dtype, shape=[1])
op = block.append_op( op = block.append_op(
......
import unittest import unittest
import numpy as np import numpy as np
from op_test import OpTest from op_test import OpTest
import math
def find_latest_set(num):
return 1 + int(math.floor(math.log(num, 2)))
class CodeTable(object):
def __init__(self, num_classes, code):
self.c = num_classes + code
def cal_index(self, bit):
return (self.c >> (bit + 1)) - 1
def get_length(self):
return find_latest_set(self.c) - 1
def cal_bit(self, bit):
return self.c & (1 << bit)
def hsigmoid(x, w, ids, bias, num_classes):
# code length =
# initialize pre out with dims={batch_size, code_length}
batch_size = x.shape[0]
code_length = find_latest_set(num_classes - 1)
code_table = [0 for _ in range(code_length)]
pre_output = np.zeros((batch_size, code_length))
pre_sum = np.zeros((batch_size, 1))
out = np.zeros((batch_size, 1)).astype("float32")
# pre_out += code(bias)
for i in xrange(batch_size):
code_table = CodeTable(num_classes, ids[i])
length = code_table.get_length()
for j in xrange(length):
idx = code_table.cal_index(j)
pre_output[i][j] += bias[0][idx]
# pre_out += code(w) * x
for i in xrange(batch_size):
for j in xrange(batch_size):
code_table = CodeTable(num_classes, ids[j])
length = code_table.get_length()
for k in xrange(length):
idx = code_table.cal_index(k)
sum = 0.0
for l in xrange(x.shape[1]):
sum += w[i][idx][l] * x[j][l]
pre_output[j][k] += sum
# clip[-40.0, 40.0]
np.clip(pre_output, -40.0, 40.0)
# out(i, 0) = \sum_j bit(i, j) * preout(i, j)
for i in xrange(batch_size):
code_table = CodeTable(num_classes, ids[i])
length = code_table.get_length()
sum = 0.0
for j in xrange(length):
if code_table.cal_bit(j):
sum += pre_output[i][j]
out[i] = -1.0 * sum
# soft relu
np.clip(pre_output, -40.0, 40.0)
pre_output = np.log(1 + np.exp(pre_output))
pre_sum = pre_output.sum(1).reshape((batch_size, 1))
out += pre_sum
return out
class TestHSigmoidOp(OpTest): class TestHSigmoidOp(OpTest):
...@@ -16,9 +81,8 @@ class TestHSigmoidOp(OpTest): ...@@ -16,9 +81,8 @@ class TestHSigmoidOp(OpTest):
bias = np.random.random((1, num_classes - 1)).astype("float32") bias = np.random.random((1, num_classes - 1)).astype("float32")
self.inputs = {'X': x, 'W': w, 'Ids': ids, 'Bias': bias} self.inputs = {'X': x, 'W': w, 'Ids': ids, 'Bias': bias}
self.attrs = {'num_classes': num_classes} self.attrs = {'num_classes': num_classes}
self.outputs = { out = hsigmoid(x, w, ids, bias, num_classes)
'Out': np.random.random((batch_size, 1)).astype("float32") self.outputs = {'Out': out}
}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册