提交 5be10872 编写于 作者: Q qijun

add selected_rows add cpu functor

上级 9165235a
...@@ -91,7 +91,8 @@ class CrossEntropyGradientOpCUDAKernel : public framework::OpKernel<T> { ...@@ -91,7 +91,8 @@ class CrossEntropyGradientOpCUDAKernel : public framework::OpKernel<T> {
.stream()>>>(dx_data, dy_data, x_data, label_data, .stream()>>>(dx_data, dy_data, x_data, label_data,
batch_size, class_num); batch_size, class_num);
} else { } else {
math::SetConstant<platform::GPUPlace, T>(ctx.device_context(), dx, 0); math::SetConstant<platform::GPUPlace, T> functor;
functor(ctx.device_context(), dx, 0);
auto* label_data = label->data<int>(); auto* label_data = label->data<int>();
grid = (batch_size + block - 1) / block; grid = (batch_size + block - 1) / block;
CrossEntropyGradientKernel<T><<< CrossEntropyGradientKernel<T><<<
......
...@@ -70,7 +70,8 @@ class CrossEntropyGradientOpKernel : public framework::OpKernel<T> { ...@@ -70,7 +70,8 @@ class CrossEntropyGradientOpKernel : public framework::OpKernel<T> {
const T* x_data = x->data<T>(); const T* x_data = x->data<T>();
const int* label_data = label->data<int>(); const int* label_data = label->data<int>();
math::SetConstant<platform::CPUPlace, T>(ctx.device_context(), dx, 0); math::SetConstant<platform::CPUPlace, T> functor;
functor(ctx.device_context(), dx, 0);
for (int i = 0; i < batch_size; ++i) { for (int i = 0; i < batch_size; ++i) {
PADDLE_ASSERT(label_data[i] >= 0 || label_data[i] < class_num); PADDLE_ASSERT(label_data[i] >= 0 || label_data[i] < class_num);
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/math/math_function.h" #include "paddle/operators/math/math_function.h"
#include <set>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -130,6 +131,65 @@ void matmul<platform::CPUPlace, double>( ...@@ -130,6 +131,65 @@ void matmul<platform::CPUPlace, double>(
matrix_b.data<double>(), beta, matrix_out->data<double>()); matrix_b.data<double>(), beta, matrix_out->data<double>());
} }
template struct SetConstant<platform::CPUPlace, float>;
namespace detail {
size_t FindPos(const std::vector<int64_t>& rows, int64_t value) {
for (size_t i = 0; i < rows.size(); i++) {
if (rows[i] == value) {
return i;
}
}
return 0;
}
} // namespace detail
template <typename T>
struct SelectedRowsAdd<platform::CPUPlace, T> {
void operator()(const platform::DeviceContext& context,
const framework::SelectedRows& input1,
const framework::SelectedRows& input2,
framework::SelectedRows* output) {
auto in1_height = input1.height();
PADDLE_ENFORCE_EQ(in1_height, input2.height());
PADDLE_ENFORCE_EQ(in1_height, output->height());
auto& in1_rows = input1.rows();
auto& in2_rows = input2.rows();
auto& out_rows = output->rows();
auto* out_value = output->mutable_value();
auto& in1_value = input1.value();
auto& in2_value = input2.value();
auto in1_row_numel = in1_value.numel() / in1_rows.size();
PADDLE_ENFORCE_EQ(in1_row_numel, in2_value.numel() / in2_rows.size());
PADDLE_ENFORCE_EQ(in1_row_numel, out_value->numel() / out_rows.size());
SetConstant<platform::CPUPlace, T> functor;
functor(context, out_value, 0.0);
auto* out_data = out_value->data<T>();
auto* in1_data = in1_value.data<T>();
for (size_t i = 0; i < in1_rows.size(); i++) {
auto row = detail::FindPos(out_rows, in1_rows[i]);
for (size_t j = 0; j < in1_row_numel; j++) {
out_data[row * in1_row_numel + j] += in1_data[i * in1_row_numel + j];
}
}
auto* in2_data = in2_value.data<T>();
for (size_t i = 0; i < in2_rows.size(); i++) {
auto row = detail::FindPos(out_rows, in2_rows[i]);
for (size_t j = 0; j < in1_row_numel; j++) {
out_data[row * in1_row_numel + j] += in2_data[i * in1_row_numel + j];
}
}
}
};
template struct SelectedRowsAdd<platform::CPUPlace, float>;
} // namespace math } // namespace math
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -53,6 +53,7 @@ int LAPACKE_dgetri(int matrix_layout, int n, double* a, int lda, ...@@ -53,6 +53,7 @@ int LAPACKE_dgetri(int matrix_layout, int n, double* a, int lda,
#include <cmath> #include <cmath>
#include "paddle/framework/eigen.h" #include "paddle/framework/eigen.h"
#include "paddle/framework/selected_rows.h"
#include "paddle/framework/tensor.h" #include "paddle/framework/tensor.h"
#include "paddle/platform/device_context.h" #include "paddle/platform/device_context.h"
#include "paddle/platform/enforce.h" #include "paddle/platform/enforce.h"
...@@ -86,11 +87,22 @@ void matmul(const platform::DeviceContext& context, ...@@ -86,11 +87,22 @@ void matmul(const platform::DeviceContext& context,
framework::Tensor* matrix_out, T beta); framework::Tensor* matrix_out, T beta);
template <typename Place, typename T> template <typename Place, typename T>
void SetConstant(const platform::DeviceContext& context, struct SetConstant {
void operator()(const platform::DeviceContext& context,
framework::Tensor* tensor, T num) { framework::Tensor* tensor, T num) {
auto t = framework::EigenVector<T>::Flatten(*tensor); auto t = framework::EigenVector<T>::Flatten(*tensor);
t.device(*context.GetEigenDevice<Place>()) = t.constant(static_cast<T>(num)); t.device(*context.GetEigenDevice<Place>()) =
} t.constant(static_cast<T>(num));
}
};
template <typename Place, typename T>
struct SelectedRowsAdd {
void operator()(const platform::DeviceContext& context,
const framework::SelectedRows& input1,
const framework::SelectedRows& input2,
framework::SelectedRows* output);
};
} // namespace math } // namespace math
} // namespace operators } // namespace operators
......
#include "paddle/operators/math/math_function.h" #include "paddle/operators/math/math_function.h"
#include "glog/logging.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
...@@ -253,18 +254,69 @@ TEST(math_function, zero) { ...@@ -253,18 +254,69 @@ TEST(math_function, zero) {
auto* cpu_place = new paddle::platform::CPUPlace(); auto* cpu_place = new paddle::platform::CPUPlace();
float* t = tensor.mutable_data<float>({2, 2}, *cpu_place); float* t = tensor.mutable_data<float>({2, 2}, *cpu_place);
paddle::platform::CPUDeviceContext context(*cpu_place); paddle::platform::CPUDeviceContext context(*cpu_place);
paddle::operators::math::SetConstant<paddle::platform::CPUPlace, float>( paddle::operators::math::SetConstant<paddle::platform::CPUPlace, float>
context, &tensor, 0); functor;
functor(context, &tensor, 0);
EXPECT_EQ(t[0], 0); EXPECT_EQ(t[0], 0);
EXPECT_EQ(t[1], 0); EXPECT_EQ(t[1], 0);
EXPECT_EQ(t[2], 0); EXPECT_EQ(t[2], 0);
EXPECT_EQ(t[3], 0); EXPECT_EQ(t[3], 0);
paddle::operators::math::SetConstant<paddle::platform::CPUPlace, float>( functor(context, &tensor, 1);
context, &tensor, 1);
EXPECT_EQ(t[0], 1); EXPECT_EQ(t[0], 1);
EXPECT_EQ(t[1], 1); EXPECT_EQ(t[1], 1);
EXPECT_EQ(t[2], 1); EXPECT_EQ(t[2], 1);
EXPECT_EQ(t[3], 1); EXPECT_EQ(t[3], 1);
} }
TEST(math_function, selected_rows_add) {
using namespace paddle::framework;
using namespace paddle::platform;
using namespace paddle::operators::math;
CPUPlace cpu_place;
CPUDeviceContext ctx(cpu_place);
SetConstant<CPUPlace, float> functor;
int64_t height = 10;
int64_t row_numel = 10;
std::vector<int64_t> rows1{0, 4, 7};
std::unique_ptr<SelectedRows> selected_rows1{new SelectedRows(rows1, height)};
auto* in1_value = selected_rows1->mutable_value();
in1_value->mutable_data<float>(
make_ddim({static_cast<int64_t>(rows1.size()), row_numel}), cpu_place);
functor(ctx, in1_value, 2.0);
std::vector<int64_t> rows2{0, 5, 7, 9};
std::unique_ptr<SelectedRows> selected_rows2{new SelectedRows(rows2, height)};
auto* in2_value = selected_rows2->mutable_value();
in2_value->mutable_data<float>(
make_ddim({static_cast<int64_t>(rows2.size()), row_numel}), cpu_place);
functor(ctx, in2_value, 1.0);
std::unique_ptr<SelectedRows> output{new SelectedRows()};
output->set_height(height);
std::vector<int64_t> out_rows = {0, 4, 5, 7, 9};
output->set_rows(out_rows);
auto* out_value = output->mutable_value();
out_value->mutable_data<float>(make_ddim({5, 10}), cpu_place);
SelectedRowsAdd<CPUPlace, float> add_functor;
add_functor(ctx, *selected_rows1, *selected_rows2, output.get());
auto* data = output->value().data<float>();
// out_rows[0] = 0
EXPECT_EQ(data[0 * row_numel + 0], 3.0);
EXPECT_EQ(data[0 * row_numel + 8], 3.0);
// out_rows[1] = 4
EXPECT_EQ(data[1 * row_numel + 1], 2.0);
// out_rows[2] = 5
EXPECT_EQ(data[2 * row_numel + 6], 1.0);
// out_rows[3] = 7
EXPECT_EQ(data[3 * row_numel + 3], 3.0);
EXPECT_EQ(data[3 * row_numel + 8], 3.0);
// out_rows[4] = 9
EXPECT_EQ(data[4 * row_numel + 4], 1.0);
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册