提交 799b00bc 编写于 作者: L lijianshe02 提交者: GitHub

add elementwise op function and add elementwise add/sub kernels test=develop (#2020)

上级 fb40c748
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "lite/core/context.h"
namespace paddle {
namespace lite {
namespace fluid {
template <lite::TargetType Target>
struct ForRange {
ForRange(const lite::Context<Target>& dev_ctx, size_t limit);
template <typename Function>
void operator()(Function func) const;
};
template <>
struct ForRange<lite::TargetType::kX86> {
ForRange(lite::X86Context& dev_ctx, size_t limit) : limit_(limit) {}
template <typename Function>
void operator()(Function func) const {
for (size_t i = 0; i < limit_; ++i) {
func(i);
}
}
size_t limit_;
};
} // namespace fluid
} // namespace lite
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#define HOSTDEVICE
#define DEVICE
#define HOST
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <type_traits>
#include "lite/core/op_lite.h"
#include "lite/fluid/hostdevice.h"
namespace paddle {
namespace lite {
namespace fluid {
// Transform applys a unary or a binary functor on each element in a
// range defined by a pair of iterators.
//
// - The specialization for CPU calls std::transform.
// - The specialization for CUDA calls thrust::tranform.
//
// NOTE: We need to define InputIter and OutputIter defined as
// different types, because the InputIter points op's inputs and
// OutputIter pints to op's outputs.
//
// NOTE: We don't assume that InputIter to be const InputType* and
// OutputIter to be OutputType*, because we might use a iterator
// class, paddle::fluid::operators::RowwiseTRansformIterator.
template <lite::TargetType Target>
struct Transform {
// The unary version.
template <typename InputIter, typename OutputIter, typename UnaryOperation>
void operator()(const lite::Context<Target>& context,
InputIter first,
InputIter last,
OutputIter result,
UnaryOperation op);
// The binary version.
template <typename InputIter1,
typename InputIter2,
typename OutputIter,
typename BinaryOperation>
void operator()(const lite::Context<Target>& context,
InputIter1 first1,
InputIter1 last1,
InputIter2 first2,
OutputIter result,
BinaryOperation op);
};
template <>
struct Transform<lite::TargetType::kX86> {
template <typename InputIter, typename OutputIter, typename UnaryOperation>
void operator()(const lite::X86Context& context,
InputIter first,
InputIter last,
OutputIter result,
UnaryOperation op) {
std::transform(first, last, result, op);
}
template <typename InputIter1,
typename InputIter2,
typename OutputIter,
typename BinaryOperation>
void operator()(const lite::X86Context& context,
InputIter1 first1,
InputIter1 last1,
InputIter2 first2,
OutputIter result,
BinaryOperation op) {
std::transform(first1, last1, first2, result, op);
}
};
} // namespace fluid
} // namespace lite
} // namespace paddle
......@@ -33,6 +33,7 @@ add_kernel(concat_compute_x86 X86 basic SRCS concat_compute.cc DEPS ${lite_kerne
add_kernel(shape_compute_x86 X86 basic SRCS shape_compute.cc DEPS ${lite_kernel_deps})
add_kernel(sequence_pool_compute_x86 X86 basic SRCS sequence_pool_compute.cc DEPS ${lite_kernel_deps} sequence_pooling)
add_kernel(softmax_compute_x86 X86 basic SRCS softmax_compute.cc DEPS ${lite_kernel_deps} softmax)
add_kernel(elementwise_compute_x86 X86 basic SRCS elementwise_compute.cc DEPS ${lite_kernel_deps})
if(NOT LITE_WITH_X86)
return()
......@@ -46,3 +47,4 @@ lite_cc_test(test_concat_compute_x86 SRCS concat_compute_test.cc DEPS concat_com
lite_cc_test(test_sequence_pool_compute_x86 SRCS sequence_pool_compute_test.cc DEPS sequence_pool_compute_x86)
lite_cc_test(test_shape_compute_x86 SRCS shape_compute_test.cc DEPS shape_compute_x86)
lite_cc_test(test_softmax_compute_x86 SRCS softmax_compute_test.cc DEPS softmax_compute_x86)
lite_cc_test(test_elementwise_compute_x86 SRCS elementwise_compute_test.cc DEPS elementwise_compute_x86)
......@@ -35,21 +35,3 @@ REGISTER_LITE_KERNEL(elementwise_add,
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
#ifdef LITE_WITH_X86
REGISTER_LITE_KERNEL(
elementwise_sub_grad,
kX86,
kFloat,
kNCHW,
paddle::lite::kernels::x86::ElementwiseSubGradCompute<float>,
def)
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kX86))})
.BindInput(paddle::framework::GradVarName("Out"),
{LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput(paddle::framework::GradVarName("X"),
{LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput(paddle::framework::GradVarName("Y"),
{LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
#endif
......@@ -15,11 +15,8 @@
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
#include "lite/fluid/eigen.h"
#include "lite/kernels/x86/elementwise_op_function.h"
namespace paddle {
namespace lite {
......@@ -45,74 +42,17 @@ class ElementwiseSubCompute
void Run() override {
auto& param = *param_.get_mutable<param_t>();
auto& context = ctx_->As<X86Context>();
CHECK(context.x86_device_context());
param.Out->template mutable_data<T>();
paddle::operators::ElementwiseComputeEx<SubFunctor<T>,
platform::CPUDeviceContext,
T>(*context.x86_execution_context(),
&param.X->raw_tensor(),
&param.Y->raw_tensor(),
param.axis,
SubFunctor<T>(),
&param.Out->raw_tensor());
paddle::lite::kernels::x86::ElementwiseComputeEx<SubFunctor<T>,
lite::TargetType::kX86,
T>(
context, param.X, param.Y, param.axis, SubFunctor<T>(), param.Out);
}
virtual ~ElementwiseSubCompute() = default;
};
template <typename T>
struct SubGradDX {
T operator()(T x, T y, T out, T dout) const { return dout; }
};
template <typename T>
struct SubGradDY {
T operator()(T x, T y, T out, T dout) const { return -dout; }
};
#ifdef LITE_WITH_X86
template <typename T>
class ElementwiseSubGradCompute
: public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::ElementwiseGradParam;
void Run() override {
auto& param = *param_.get_mutable<param_t>();
auto& context = ctx_->As<X86Context>();
CHECK(context.x86_device_context());
param.X_grad->template mutable_data<T>();
// skip out, x, y
auto dout = param.Out_grad->raw_tensor();
auto dx = param.X_grad->raw_tensor();
framework::Tensor* dy = nullptr;
if (param.Y_grad) {
param.Y_grad->template mutable_data<T>();
dy = &param.Y_grad->raw_tensor();
}
auto& skip = dout;
paddle::operators::ElemwiseExplicitGradCompute<platform::CPUDeviceContext,
T,
SubGradDX<T>,
SubGradDY<T>>(
*context.x86_execution_context(),
skip,
skip,
skip,
dout,
param.axis,
&dx,
dy,
SubGradDX<T>(),
SubGradDY<T>());
}
virtual ~ElementwiseSubGradCompute() = default;
};
#endif
template <typename T>
class ElementwiseAddCompute
: public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
......@@ -121,16 +61,11 @@ class ElementwiseAddCompute
void Run() override {
auto& param = *param_.get_mutable<param_t>();
auto& context = ctx_->As<X86Context>();
CHECK(context.x86_device_context());
param.Out->template mutable_data<T>();
paddle::operators::ElementwiseComputeEx<AddFunctor<T>,
platform::CPUDeviceContext,
T>(*context.x86_execution_context(),
&param.X->raw_tensor(),
&param.Y->raw_tensor(),
param.axis,
AddFunctor<T>(),
&param.Out->raw_tensor());
paddle::lite::kernels::x86::ElementwiseComputeEx<AddFunctor<T>,
lite::TargetType::kX86,
T>(
context, param.X, param.Y, param.axis, AddFunctor<T>(), param.Out);
}
virtual ~ElementwiseAddCompute() = default;
......
......@@ -74,9 +74,9 @@ TEST(elementwise_add_x86, run_test) {
elementwise_add.SetContext(std::move(ctx));
elementwise_add.Run();
LOG(INFO) << "output: ";
std::vector<float> ref_results = {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3};
for (int i = 0; i < out.dims().production(); i++) {
LOG(INFO) << out_data[i];
EXPECT_NEAR(out_data[i], ref_results[i], 1e-3);
}
}
......
此差异已折叠。
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
// Because Boost 1.41.0's variadic templates has bug on nvcc, boost
// will disable variadic template support in NVCC mode. Define
// BOOST_NO_CXX11_VARIADIC_TEMPLATES on gcc/clang to generate same
// function symbols. For details,
// https://github.com/PaddlePaddle/Paddle/issues/3386
// some platform-independent defintion
#if defined(_WIN32)
#define UNUSED
#define __builtin_expect(EXP, C) (EXP)
#else
#define UNUSED __attribute__((unused))
#endif
#if !defined(_WIN32)
#define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)
#else
// there is no equivalent intrinsics in msvc.
#define UNLIKELY(condition) (condition)
#endif
#if !defined(_WIN32)
#define LIKELY(condition) __builtin_expect(static_cast<bool>(condition), 1)
#else
// there is no equivalent intrinsics in msvc.
#define LIKELY(condition) (condition)
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册