提交 b5c410a2 编写于 作者: L lijianshe02 提交者: GitHub

add ops and kernels that mul, scale, fc, relu, softmax, dropout, elem… (#17711)

* fix conflicts

* fix kernel registry realted bugs test=develop
上级 89b0466c
...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
syntax = "proto2"; syntax = "proto2";
option optimize_for = LITE_RUNTIME; // option optimize_for = LITE_RUNTIME;
package paddle.framework.proto; package paddle.framework.proto;
// Any incompatible changes to ProgramDesc and its dependencies should // Any incompatible changes to ProgramDesc and its dependencies should
......
...@@ -25,10 +25,14 @@ set(LITE_URL "http://paddle-inference-dist.bj.bcebos.com" CACHE STRING "inferenc ...@@ -25,10 +25,14 @@ set(LITE_URL "http://paddle-inference-dist.bj.bcebos.com" CACHE STRING "inferenc
set(LITE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING set(LITE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING
"A path setting inference demo download directories.") "A path setting inference demo download directories.")
# lite_cc_test(test_cxx_api_lite SRCS cxx_api_test.cc if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
# DEPS cxx_api_lite model_parser_lite target_wrapper_host lite_cc_test(test_cxx_api_lite SRCS cxx_api_test.cc
# ${ops_lite} ${host_kernels} ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model DEPS cxx_api_lite model_parser_lite target_wrapper_host
# --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) ${ops_lite} ${host_kernels} ${x86_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model
--optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
add_dependencies(test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz)
endif(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
if(WITH_TESTING) if(WITH_TESTING)
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "lite_naive_model.tar.gz") lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "lite_naive_model.tar.gz")
......
...@@ -32,7 +32,8 @@ namespace lite { ...@@ -32,7 +32,8 @@ namespace lite {
TEST(CXXApi, test) { TEST(CXXApi, test) {
lite::ExecutorLite predictor; lite::ExecutorLite predictor;
#ifndef LITE_WITH_CUDA #ifndef LITE_WITH_CUDA
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)}}); std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
#else #else
std::vector<Place> valid_places({ std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)}, Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)},
...@@ -44,7 +45,8 @@ TEST(CXXApi, test) { ...@@ -44,7 +45,8 @@ TEST(CXXApi, test) {
}); });
#endif #endif
predictor.Build(FLAGS_model_dir, Place{TARGET(kCUDA), PRECISION(kFloat)}, predictor.Build(FLAGS_model_dir,
Place{TARGET(kX86), PRECISION(kFloat)}, // origin cuda
valid_places); valid_places);
auto* input_tensor = predictor.GetInput(0); auto* input_tensor = predictor.GetInput(0);
...@@ -69,7 +71,8 @@ TEST(CXXApi, test) { ...@@ -69,7 +71,8 @@ TEST(CXXApi, test) {
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK #ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
TEST(CXXApi, save_model) { TEST(CXXApi, save_model) {
lite::ExecutorLite predictor; lite::ExecutorLite predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)}}); std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
predictor.Build(FLAGS_model_dir, Place{TARGET(kCUDA), PRECISION(kFloat)}, predictor.Build(FLAGS_model_dir, Place{TARGET(kCUDA), PRECISION(kFloat)},
valid_places); valid_places);
...@@ -78,7 +81,7 @@ TEST(CXXApi, save_model) { ...@@ -78,7 +81,7 @@ TEST(CXXApi, save_model) {
#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK #endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK #ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
TEST(CXXTrainer, train) { /*TEST(CXXTrainer, train) {
Place prefer_place({TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)}); Place prefer_place({TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)});
std::vector<Place> valid_places({prefer_place}); std::vector<Place> valid_places({prefer_place});
auto scope = std::make_shared<lite::Scope>(); auto scope = std::make_shared<lite::Scope>();
...@@ -108,7 +111,7 @@ TEST(CXXTrainer, train) { ...@@ -108,7 +111,7 @@ TEST(CXXTrainer, train) {
data0[0] = 0; data0[0] = 0;
exe.Run(); exe.Run();
} }*/
#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK #endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
} // namespace lite } // namespace lite
...@@ -116,13 +119,31 @@ TEST(CXXTrainer, train) { ...@@ -116,13 +119,31 @@ TEST(CXXTrainer, train) {
USE_LITE_OP(mul); USE_LITE_OP(mul);
USE_LITE_OP(fc); USE_LITE_OP(fc);
USE_LITE_OP(relu);
USE_LITE_OP(scale); USE_LITE_OP(scale);
USE_LITE_OP(feed); USE_LITE_OP(feed);
USE_LITE_OP(fetch); USE_LITE_OP(fetch);
USE_LITE_OP(io_copy); USE_LITE_OP(io_copy);
USE_LITE_OP(elementwise_add)
USE_LITE_OP(elementwise_sub)
USE_LITE_OP(square)
USE_LITE_OP(softmax)
USE_LITE_OP(dropout)
USE_LITE_KERNEL(feed, kHost, kAny, kAny, def); USE_LITE_KERNEL(feed, kHost, kAny, kAny, def);
USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def); USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def);
#ifdef LITE_WITH_X86
USE_LITE_KERNEL(relu, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(fc, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(scale, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(square, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(elementwise_sub, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(elementwise_add, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(softmax, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(dropout, kX86, kFloat, kNCHW, def);
#endif
#ifdef LITE_WITH_CUDA #ifdef LITE_WITH_CUDA
USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def); USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def);
USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, host_to_device); USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, host_to_device);
......
...@@ -95,7 +95,11 @@ struct CUDAContext { ...@@ -95,7 +95,11 @@ struct CUDAContext {
#ifdef LITE_WITH_X86 #ifdef LITE_WITH_X86
struct X86Context { struct X86Context {
// overall information // overall information
X86Context() {
x86_device_context.reset(new ::paddle::platform::CPUDeviceContext);
x86_execution_context.reset(
new ::paddle::framework::ExecutionContext(*x86_device_context));
}
// kernel information // kernel information
// legacy info. // legacy info.
......
...@@ -34,9 +34,13 @@ class RuntimeContextAssignPass : public StmtPass { ...@@ -34,9 +34,13 @@ class RuntimeContextAssignPass : public StmtPass {
auto& inst = node.AsStmt(); auto& inst = node.AsStmt();
switch (inst.picked_kernel().target()) { switch (inst.picked_kernel().target()) {
case TARGET(kHost): case TARGET(kHost):
case TARGET(kX86):
inst.picked_kernel().SetContext(NewHostContext()); inst.picked_kernel().SetContext(NewHostContext());
break; break;
#ifdef LITE_WITH_X86
case TARGET(kX86):
inst.picked_kernel().SetContext(NewX86Context());
break;
#endif
#ifdef LITE_WITH_CUDA #ifdef LITE_WITH_CUDA
case TARGET(kCUDA): case TARGET(kCUDA):
inst.picked_kernel().SetContext(NewCudaContext()); inst.picked_kernel().SetContext(NewCudaContext());
...@@ -61,6 +65,13 @@ class RuntimeContextAssignPass : public StmtPass { ...@@ -61,6 +65,13 @@ class RuntimeContextAssignPass : public StmtPass {
return ctx; return ctx;
} }
#ifdef LITE_WITH_X86
std::unique_ptr<KernelContext> NewX86Context() {
std::unique_ptr<KernelContext> ctx(new KernelContext);
ctx->As<X86Context>();
return ctx;
}
#endif
#ifdef LITE_WITH_ARM #ifdef LITE_WITH_ARM
std::unique_ptr<KernelContext> NewARMContext() { std::unique_ptr<KernelContext> NewARMContext() {
......
...@@ -91,6 +91,10 @@ KernelRegistry::KernelRegistry() ...@@ -91,6 +91,10 @@ KernelRegistry::KernelRegistry()
INIT_FOR(kHost, kAny, kNCHW); INIT_FOR(kHost, kAny, kNCHW);
INIT_FOR(kHost, kAny, kAny); INIT_FOR(kHost, kAny, kAny);
INIT_FOR(kX86, kFloat, kNCHW);
INIT_FOR(kX86, kAny, kNCHW);
INIT_FOR(kX86, kAny, kAny);
INIT_FOR(kARM, kFloat, kNCHW); INIT_FOR(kARM, kFloat, kNCHW);
INIT_FOR(kARM, kAny, kNCHW); INIT_FOR(kARM, kAny, kNCHW);
INIT_FOR(kARM, kAny, kAny); INIT_FOR(kARM, kAny, kAny);
......
...@@ -3,18 +3,29 @@ if(NOT LITE_WITH_X86) ...@@ -3,18 +3,29 @@ if(NOT LITE_WITH_X86)
endif() endif()
cc_library(activation_compute_x86 SRCS activation_compute.cc DEPS ${lite_kernel_deps} activation_op) cc_library(activation_compute_x86 SRCS activation_compute.cc DEPS ${lite_kernel_deps} activation_op)
cc_library(elementwise_compute_x86 SRCS elementwise_compute.cc DEPS ${lite_kernel_deps})
cc_library(mean_compute_x86 SRCS mean_compute.cc DEPS ${lite_kernel_deps}) cc_library(mean_compute_x86 SRCS mean_compute.cc DEPS ${lite_kernel_deps})
cc_library(fill_constant_compute_x86 SRCS fill_constant_compute.cc DEPS ${lite_kernel_deps}) cc_library(fill_constant_compute_x86 SRCS fill_constant_compute.cc DEPS ${lite_kernel_deps})
cc_library(mul_compute_x86 SRCS mul_compute.cc DEPS ${lite_kernel_deps})
cc_library(sgd_compute_x86 SRCS sgd_compute.cc DEPS ${lite_kernel_deps}) cc_library(sgd_compute_x86 SRCS sgd_compute.cc DEPS ${lite_kernel_deps})
cc_library(fc_compute_x86 SRCS fc_compute.cc DEPS ${lite_kernel_deps})
cc_library(mul_compute_x86 SRCS mul_compute.cc DEPS ${lite_kernel_deps})
cc_library(relu_compute_x86 SRCS relu_compute.cc DEPS ${lite_kernel_deps})
cc_library(scale_compute_x86 SRCS scale_compute.cc DEPS ${lite_kernel_deps})
cc_library(elementwise_compute_x86 SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} elementwise_sub_op elementwise_add_op)
cc_library(softmax_compute_x86 SRCS softmax_compute.cc DEPS ${lite_kernel_deps} softmax)
cc_library(dropout_compute_x86 SRCS dropout_compute.cc DEPS ${lite_kernel_deps} )
set(x86_kernels set(x86_kernels
activation_compute_x86 activation_compute_x86
elementwise_compute_x86 elementwise_compute_x86
mean_compute_x86 mean_compute_x86
fill_constant_compute_x86 fill_constant_compute_x86
mul_compute_x86 mul_compute_x86
relu_compute_x86
fc_compute_x86
scale_compute_x86
softmax_compute_x86
dropout_compute_x86
) )
set(x86_kernels "${x86_kernels}" CACHE INTERNAL "x86 kernels") set(x86_kernels "${x86_kernels}" CACHE INTERNAL "x86 kernels")
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <random>
#include <string>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template <typename T>
class DropoutCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::DropoutParam;
void Run() override {
auto& param = *param_.get_mutable<operators::DropoutParam>();
const auto* x_data = param.x->data<T>();
auto* out_data = param.output->template mutable_data<T>();
if (!param.is_test) {
auto* mask_data = param.mask->template mutable_data<T>();
std::random_device rnd;
std::minstd_rand engine;
int seed = param.fix_seed ? param.seed : rnd();
engine.seed(seed);
std::uniform_real_distribution<float> dist(0, 1);
size_t size = framework::product(param.mask->dims().data());
for (size_t i = 0; i < size; ++i) {
if (dist(engine) < param.dropout_prob) {
mask_data[i] = 0;
out_data[i] = 0;
} else {
if (param.dropout_implementation == "upscale_in_train") {
mask_data[i] = 1.0f / static_cast<T>(1.0f - param.dropout_prob);
out_data[i] = x_data[i] / static_cast<T>(1.0f - param.dropout_prob);
} else {
mask_data[i] = 1;
out_data[i] = x_data[i];
}
}
}
} else {
auto X = EigenMatrix<T>::Reshape(param.x->raw_tensor(), 1);
auto Y = EigenMatrix<T>::Reshape(param.output->raw_tensor(), 1);
auto& place = *platform::CPUDeviceContext().eigen_device();
if (param.dropout_implementation == "upscale_in_train") {
Y.device(place) = X;
} else {
Y.device(place) = X * static_cast<T>(1.0f - param.dropout_prob);
}
}
}
virtual ~DropoutCompute() = default;
};
} // namespace x86
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(dropout, kX86, kFloat, kNCHW,
paddle::lite::kernels::x86::DropoutCompute<float>, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Mask", {LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
...@@ -30,6 +30,11 @@ struct SubFunctor { ...@@ -30,6 +30,11 @@ struct SubFunctor {
inline HOSTDEVICE T operator()(T a, T b) const { return a - b; } inline HOSTDEVICE T operator()(T a, T b) const { return a - b; }
}; };
template <typename T>
struct AddFunctor {
inline HOSTDEVICE T operator()(T a, T b) const { return a + b; }
};
template <typename T> template <typename T>
class ElementwiseSubCompute class ElementwiseSubCompute
: public KernelLite<TARGET(kX86), PRECISION(kFloat)> { : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
...@@ -67,10 +72,9 @@ class ElementwiseSubGradCompute ...@@ -67,10 +72,9 @@ class ElementwiseSubGradCompute
: public KernelLite<TARGET(kX86), PRECISION(kFloat)> { : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public: public:
using param_t = operators::ElementwiseGradParam; using param_t = operators::ElementwiseGradParam;
void Run() override { void Run() override {
auto& param = *param_.get_mutable<param_t>(); auto& param = *param_.get_mutable<param_t>();
auto& context = context_->As<X86Context>(); auto& context = ctx_->As<X86Context>();
CHECK(context.x86_device_context); CHECK(context.x86_device_context);
param.X_grad->template mutable_data<T>(); param.X_grad->template mutable_data<T>();
...@@ -89,6 +93,26 @@ class ElementwiseSubGradCompute ...@@ -89,6 +93,26 @@ class ElementwiseSubGradCompute
virtual ~ElementwiseSubGradCompute() = default; virtual ~ElementwiseSubGradCompute() = default;
}; };
template <typename T>
class ElementwiseAddCompute
: public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::ElementwiseParam;
void Run() override {
auto& param = *param_.get_mutable<param_t>();
auto& context = ctx_->As<X86Context>();
CHECK(context.x86_device_context);
param.Out->template mutable_data<T>();
paddle::operators::ElementwiseComputeEx<AddFunctor<T>,
platform::CPUDeviceContext, T>(
*context.x86_execution_context, &param.X->raw_tensor(),
&param.Y->raw_tensor(), param.axis, AddFunctor<T>(),
&param.Out->raw_tensor());
}
virtual ~ElementwiseAddCompute() = default;
};
} // namespace x86 } // namespace x86
} // namespace kernels } // namespace kernels
} // namespace lite } // namespace lite
...@@ -113,3 +137,11 @@ REGISTER_LITE_KERNEL(elementwise_sub_grad, kX86, kFloat, kNCHW, ...@@ -113,3 +137,11 @@ REGISTER_LITE_KERNEL(elementwise_sub_grad, kX86, kFloat, kNCHW,
.BindOutput(paddle::framework::GradVarName("Y"), .BindOutput(paddle::framework::GradVarName("Y"),
{LiteType::GetTensorTy(TARGET(kX86))}) {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize(); .Finalize();
REGISTER_LITE_KERNEL(elementwise_add, kX86, kFloat, kNCHW,
paddle::lite::kernels::x86::ElementwiseAddCompute<float>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <Eigen/Core>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
#include "paddle/fluid/lite/operators/fc_op.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
template <typename T>
void fc_compute_eigen(const T* x, int x_w, int x_h, //
const T* w, int w_w, int w_h, //
const T* b, //
T* out) {
using matrix_t =
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
Eigen::Map<const matrix_t> X(x, x_h, x_w);
Eigen::Map<const matrix_t> W(w, w_h, w_w);
Eigen::Map<matrix_t> Out(out, x_h, w_h);
Out = X * W.transpose();
if (b) {
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>> B(b, w_h);
Out = Out.array().rowwise() + B.transpose().array();
}
}
template <typename T>
__attribute__((optimize("unroll-loops"))) //
T dot(const T* x, const T* y, int dim) {
T out{};
for (int i = 0; i < dim; i++) {
out += x[i] * y[i];
}
return out;
}
template <typename T>
void fc_compute_naive(const T* x, int x_w, int x_h, //
const T* w, int w_w, int w_h, //
const T* b, //
T* out) {
CHECK_EQ(x_w, w_w);
// out shape: (x_h, w_w)
memset(out, 0, x_h * w_h * sizeof(T));
for (int r = 0; r < x_h; r++) {
for (int c = 0; c < w_h; c++) {
out[r * w_h + c] = dot(&x[r * x_w], &w[c * w_w], w_w) + b[c];
}
}
}
template <typename T>
class FcCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::FcParam;
void Run() override {
auto& param = *param_.get_mutable<param_t>();
CHECK_GE(param.input->dims().size(), 2UL);
CHECK_EQ(param.output->dims().size(), 2UL);
fc_compute_eigen(
param.input->data<T>(), // x
param.input->dims().Slice(0, param.in_num_col_dims).production(),
param.input->dims()
.Slice(param.in_num_col_dims, param.input->dims().size())
.production(),
param.w->data<T>(), // w
param.w->dims()[1], // w_w
param.w->dims()[0], // w_h
param.bias->data<T>(), // b
param.output->mutable_data<T>());
}
virtual ~FcCompute() = default;
};
} // namespace x86
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(fc, kX86, kFloat, kNCHW,
paddle::lite::kernels::x86::FcCompute<float>, def)
.BindInput("Input", {LiteType::GetTensorTy(TARGET(kX86))})
.BindInput("Bias", {LiteType::GetTensorTy(TARGET(kX86))})
.BindInput("W", {LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
...@@ -31,7 +31,7 @@ class FillConstantCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -31,7 +31,7 @@ class FillConstantCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
void Run() override { void Run() override {
auto& param = *param_.get_mutable<param_t>(); auto& param = *param_.get_mutable<param_t>();
auto& context = context_->As<X86Context>(); auto& context = ctx_->As<X86Context>();
CHECK(context.x86_device_context); CHECK(context.x86_device_context);
param.Out->template mutable_data<T>(); param.Out->template mutable_data<T>();
......
...@@ -37,7 +37,7 @@ class MeanCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -37,7 +37,7 @@ class MeanCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
void Run() override { void Run() override {
auto& param = *param_.get_mutable<param_t>(); auto& param = *param_.get_mutable<param_t>();
auto& context = context_->As<X86Context>(); auto& context = ctx_->As<X86Context>();
CHECK(context.x86_device_context); CHECK(context.x86_device_context);
param.Out->template mutable_data<T>(); param.Out->template mutable_data<T>();
...@@ -59,7 +59,7 @@ class MeanGradCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -59,7 +59,7 @@ class MeanGradCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
void Run() override { void Run() override {
auto& param = *param_.get_mutable<param_t>(); auto& param = *param_.get_mutable<param_t>();
auto& context = context_->As<X86Context>(); auto& context = ctx_->As<X86Context>();
CHECK_EQ(param.Out_grad->raw_tensor().numel(), 1); CHECK_EQ(param.Out_grad->raw_tensor().numel(), 1);
CHECK(context.x86_device_context); CHECK(context.x86_device_context);
......
...@@ -30,7 +30,7 @@ class MulCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -30,7 +30,7 @@ class MulCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
using param_t = operators::MulParam; using param_t = operators::MulParam;
void Run() override { void Run() override {
auto& context = context_->As<X86Context>(); auto& context = ctx_->As<X86Context>();
auto& param = *param_.get_mutable<operators::MulParam>(); auto& param = *param_.get_mutable<operators::MulParam>();
CHECK(context.x86_device_context); CHECK(context.x86_device_context);
...@@ -68,7 +68,7 @@ template <typename T> ...@@ -68,7 +68,7 @@ template <typename T>
class MulGradCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { class MulGradCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public: public:
void Run() override { void Run() override {
auto& context = context_->As<X86Context>(); auto& context = ctx_->As<X86Context>();
auto& param = *param_.get_mutable<operators::MulGradParam>(); auto& param = *param_.get_mutable<operators::MulGradParam>();
CHECK(context.x86_device_context); CHECK(context.x86_device_context);
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <Eigen/Core>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
#include "paddle/fluid/lite/operators/relu_op.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
template <typename T>
class ReluCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::ReluParam;
void Run() override {
auto& param = *param_.get_mutable<param_t>();
auto n = param.input->dims().production();
const float* input = param.input->data<float>();
float* output = param.output->mutable_data<float>();
for (int i = 0; i < n; i++) {
output[i] = std::max(0.f, input[i]);
}
}
virtual ~ReluCompute() = default;
};
} // namespace x86
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(relu, kX86, kFloat, kNCHW,
paddle::lite::kernels::x86::ReluCompute<float>, def)
.BindInput("Input", {LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <Eigen/Core>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
#include "paddle/fluid/lite/operators/relu_op.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
template <typename T>
void scale_compute(const T* x, T* out, int size, float scale, float bias,
bool bias_before) {
if (bias_before) bias *= scale;
for (int i = 0; i < size; i++) {
out[i] = x[i] * scale + bias;
}
}
template <typename T>
class ScaleCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::ScaleParam;
void Run() override {
auto& param = *param_.get_mutable<param_t>();
scale_compute(param.x->data<T>(), param.output->mutable_data<T>(),
param.x->dims().production(), param.scale, param.bias,
param.bias_after_scale);
}
virtual ~ScaleCompute() = default;
};
} // namespace x86
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(scale, kX86, kFloat, kNCHW,
paddle::lite::kernels::x86::ScaleCompute<float>, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
static inline int CanonicalAxis(const int axis, const int rank) {
if (axis < 0) {
return axis + rank;
}
return axis;
}
static inline int SizeToAxis(const int axis, lite::DDim dims) {
int size = 1;
for (int i = 0; i < axis; i++) {
size *= dims[i];
}
return size;
}
static inline int SizeFromAxis(const int axis, lite::DDim dims) {
int size = 1;
for (int i = axis; i < dims.size(); i++) {
size *= dims[i];
}
return size;
}
template <typename T>
class SoftmaxCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::SoftmaxParam;
void Run() override {
auto& param = *param_.get_mutable<operators::SoftmaxParam>();
// auto& context = context_->As<X86Context>();
CHECK(param.output);
CHECK(param.x);
const int rank = param.x->dims().size();
const int axis = CanonicalAxis(param.axis, rank);
int axis_dim = param.x->dims()[axis];
const int n = SizeToAxis(axis, param.x->dims());
const int d = SizeFromAxis(axis, param.x->dims());
std::vector<int64_t> shape{n, d};
lite::Tensor input_2d, out_2d;
input_2d.ShareDataWith(*param.x);
input_2d.Resize(lite::DDim(shape));
out_2d.ShareDataWith(*param.output);
out_2d.Resize(lite::DDim(shape));
paddle::operators::math::SoftmaxFunctor<platform::CPUDeviceContext, T,
true>()(
platform::CPUDeviceContext(), axis_dim, &input_2d.raw_tensor(),
&out_2d.raw_tensor());
}
virtual ~SoftmaxCompute() = default;
};
} // namespace x86
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(softmax, kX86, kFloat, kNCHW,
paddle::lite::kernels::x86::SoftmaxCompute<float>, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
...@@ -13,8 +13,9 @@ cc_library(elementwise_ops_lite SRCS elementwise_ops.cc DEPS ${op_DEPS}) ...@@ -13,8 +13,9 @@ cc_library(elementwise_ops_lite SRCS elementwise_ops.cc DEPS ${op_DEPS})
cc_library(mean_op_lite SRCS mean_op.cc DEPS ${op_DEPS}) cc_library(mean_op_lite SRCS mean_op.cc DEPS ${op_DEPS})
cc_library(fill_constant_op_lite SRCS fill_constant_op.cc DEPS ${op_DEPS}) cc_library(fill_constant_op_lite SRCS fill_constant_op.cc DEPS ${op_DEPS})
#cc_library(sgd_op_lite SRCS sgd_op.cc DEPS ${op_DEPS}) #cc_library(sgd_op_lite SRCS sgd_op.cc DEPS ${op_DEPS})
cc_library(op_params_lite SRCS op_params.cc DEPS ${tensor_lite} any_lite framework_proto_lite) cc_library(op_params_lite SRCS op_params.cc DEPS ${tensor_lite} any_lite framework_proto_lite)
cc_library(dropout_op_lite SRCS dropout_op.cc DEPS ${op_DEPS})
set(ops_lite set(ops_lite
fc_op_lite fc_op_lite
relu_op_lite relu_op_lite
...@@ -27,7 +28,9 @@ set(ops_lite ...@@ -27,7 +28,9 @@ set(ops_lite
elementwise_ops_lite elementwise_ops_lite
mean_op_lite mean_op_lite
fill_constant_op_lite fill_constant_op_lite
activation_ops_lite
dropout_op_lite
PARENT_SCOPE) PARENT_SCOPE)
lite_cc_test(test_fc_op_lite SRCS fc_op_test.cc DEPS fc_op_lite memory_lite) lite_cc_test(test_fc_op_lite SRCS fc_op_test.cc DEPS fc_op_lite memory_lite fc_compute_x86)
lite_cc_test(test_softmax_op_lite SRCS softmax_op_test.cc DEPS softmax_op_lite memory_lite) lite_cc_test(test_softmax_op_lite SRCS softmax_op_test.cc DEPS softmax_op_lite memory_lite)
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include <vector>
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace operators {
class DropoutOpLite : public OpLite {
public:
explicit DropoutOpLite(const std::string& type) : OpLite(type) {}
bool CheckShape() const override {
CHECK_OR_FALSE(param_.x);
return true;
}
bool InferShape() const override {
const auto x_dims = param_.x->dims();
param_.output->Resize(x_dims);
if (param_.is_test == false) {
param_.mask->Resize(x_dims);
}
// share LoD
// param_.output->set_lod(param_.input->lod());
return true;
}
void AttachKernel(KernelBase* kernel) override { kernel->SetParam(param_); }
// TODO(Superjomn) replace framework::OpDesc with a lite one.
bool AttachImpl(const OpDesc& op_desc, lite::Scope* scope) override {
auto input = op_desc.Input("X").front();
auto out = op_desc.Output("Out").front();
auto Mask = op_desc.Output("Mask").front();
param_.x = GetVar<lite::Tensor>(scope, input);
param_.output = GetMutableVar<lite::Tensor>(scope, out);
param_.mask = GetMutableVar<lite::Tensor>(scope, Mask);
param_.dropout_prob = boost::get<float>(op_desc.GetAttr("dropout_prob"));
if (op_desc.HasAttr("axis")) {
param_.is_test = boost::get<bool>(op_desc.GetAttr("is_test"));
}
param_.fix_seed = boost::get<bool>(op_desc.GetAttr("fix_seed"));
param_.seed = boost::get<int>(op_desc.GetAttr("seed"));
param_.dropout_implementation =
boost::get<int>(op_desc.GetAttr("dropout_implementation"));
return true;
}
std::string DebugString() const override { return "dropout"; }
private:
mutable DropoutParam param_;
};
} // namespace operators
} // namespace lite
} // namespace paddle
REGISTER_LITE_OP(dropout, paddle::lite::operators::DropoutOpLite);
...@@ -43,9 +43,8 @@ class ElementwiseOp : public OpLite { ...@@ -43,9 +43,8 @@ class ElementwiseOp : public OpLite {
param_.X = GetVar<lite::Tensor>(scope, X_name); param_.X = GetVar<lite::Tensor>(scope, X_name);
param_.Y = GetVar<lite::Tensor>(scope, Y_name); param_.Y = GetVar<lite::Tensor>(scope, Y_name);
param_.Out = GetMutableVar<Tensor>(scope, Out_name); param_.Out = GetMutableVar<lite::Tensor>(scope, Out_name);
param_.axis = GetAttr<int>(opdesc.GetAttr("axis")); param_.axis = boost::get<int>(opdesc.GetAttr("axis"));
return true; return true;
} }
...@@ -110,3 +109,4 @@ REGISTER_LITE_OP(elementwise_sub, paddle::lite::operators::ElementwiseOp); ...@@ -110,3 +109,4 @@ REGISTER_LITE_OP(elementwise_sub, paddle::lite::operators::ElementwiseOp);
REGISTER_LITE_OP(elementwise_sub_grad, REGISTER_LITE_OP(elementwise_sub_grad,
paddle::lite::operators::ElementwiseGradExplicitOp); paddle::lite::operators::ElementwiseGradExplicitOp);
#endif #endif
REGISTER_LITE_OP(elementwise_add, paddle::lite::operators::ElementwiseOp);
...@@ -57,10 +57,16 @@ TEST(fc_op_lite, test) { ...@@ -57,10 +57,16 @@ TEST(fc_op_lite, test) {
FcOpLite fc("fc"); FcOpLite fc("fc");
fc.SetValidPlaces({Place{TARGET(kHost), PRECISION(kFloat)}}); fc.SetValidPlaces({Place{TARGET(kX86), PRECISION(kFloat)}});
fc.Attach(desc, &scope); fc.Attach(desc, &scope);
auto kernels = fc.CreateKernels({Place{TARGET(kX86), PRECISION(kFloat)}});
ASSERT_FALSE(kernels.empty());
} }
} // namespace operators } // namespace operators
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
#ifdef LITE_WITH_X86
USE_LITE_KERNEL(fc, kX86, kFloat, kNCHW, def);
#endif
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <string>
#include <vector> #include <vector>
#include "paddle/fluid/lite/core/compatible_tensor.h" #include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/framework.pb.h" #include "paddle/fluid/lite/core/framework.pb.h"
...@@ -94,14 +95,67 @@ struct ScaleParam { ...@@ -94,14 +95,67 @@ struct ScaleParam {
bool bias_after_scale{true}; bool bias_after_scale{true};
}; };
// For Softmax Op // For Softmax op
struct SoftmaxParam { struct SoftmaxParam {
lite::Tensor* x{}; lite::Tensor* x{};
lite::Tensor* output{}; lite::Tensor* output{};
int axis{-1}; int axis{-1};
}; };
// For Convolution op
struct ConvParam {
lite::Tensor* x{};
lite::Tensor* filter{};
lite::Tensor* bias{};
lite::Tensor* residualData{};
lite::Tensor* output{};
std::vector<int> strides{1, 1};
std::vector<int> paddings{0, 0};
int groups{1};
std::vector<int> dilations{1, 1};
bool fuse_relu_before_depthwise_conv{false};
bool use_mkldnn{false};
bool fuse_relu{false}; // only used in mkldnn kernel
bool use_quantizer{
false}; // set true for op that should be quantized, only used for cpu
bool fuse_residual_connection{false};
float scale_in{1.0f}; // only used with mkl-dnn int8
float scale_out{1.0f}; // only used with mkl-dnn int8
float scale_in_eltwise{1.0f}; // only used with mkl-dnn int8
float scale_weights{1.0f}; // only used with mkl-dnn int8
bool force_fp32_output{false}; // only used in mkl-dnn int8
std::string data_format{"Anylayout"};
};
// For Pooling op
struct PoolParam {
lite::Tensor* x{};
lite::Tensor* output{};
std::string pooling_type{""};
std::vector<int> ksize{};
bool global_pooling{
false}; // if true, knernel size and paddings will be ignored
std::vector<int> strides{1, 1};
std::vector<int> paddings{0, 0};
bool exclusive{true};
bool adaptive{false};
bool ceil_mode{false};
bool use_quantizer{false};
std::string data_format{"AnyLayout"};
};
// For Dropout op
struct DropoutParam {
const lite::Tensor* x{};
lite::Tensor* output{};
lite::Tensor* mask{};
float dropout_prob{.5f};
bool is_test{false};
bool fix_seed{false};
int seed{0};
std::string dropout_implementation{"downgrade_in_infer"};
};
/// ----------------------- element wise operators ---------------------- /// ----------------------- element wise operators ----------------------
struct ElementwiseParam { struct ElementwiseParam {
const lite::Tensor* X{}; const lite::Tensor* X{};
......
...@@ -25,7 +25,6 @@ bool ReluOp::InferShape() const { ...@@ -25,7 +25,6 @@ bool ReluOp::InferShape() const {
CHECK_OR_FALSE(param_.output); CHECK_OR_FALSE(param_.output);
// TODO(Superjomn) Enable data sharing. // TODO(Superjomn) Enable data sharing.
param_.output->Resize(param_.input->dims()); param_.output->Resize(param_.input->dims());
// param_.output->ShareDataWith(*param_.input);
// share lod // share lod
// param_.output->set_lod(param_.input->lod()); // param_.output->set_lod(param_.input->lod());
return true; return true;
...@@ -42,8 +41,8 @@ bool ReluOp::AttachImpl(const OpDesc &opdesc, lite::Scope *scope) { ...@@ -42,8 +41,8 @@ bool ReluOp::AttachImpl(const OpDesc &opdesc, lite::Scope *scope) {
return true; return true;
} }
REGISTER_LITE_OP(relu, ReluOp);
} // namespace operators } // namespace operators
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
REGISTER_LITE_OP(relu, paddle::lite::operators::ReluOp);
...@@ -35,7 +35,7 @@ class ReluOp : public OpLite { ...@@ -35,7 +35,7 @@ class ReluOp : public OpLite {
bool AttachImpl(const OpDesc &opdesc, lite::Scope *scope) override; bool AttachImpl(const OpDesc &opdesc, lite::Scope *scope) override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); } void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "tanh"; } std::string DebugString() const override { return "relu"; }
private: private:
mutable ReluParam param_; mutable ReluParam param_;
......
...@@ -128,8 +128,9 @@ struct variant { ...@@ -128,8 +128,9 @@ struct variant {
if (type_id == typeid(T).hash_code()) if (type_id == typeid(T).hash_code())
return reinterpret_cast<T*>(&data); return reinterpret_cast<T*>(&data);
else else
LOG(FATAL) << "unmatched type get, should be " << type_id << " but get " LOG(ERROR) << "unmatched type get, should be " << type_id << " but get "
<< typeid(T).name(); << typeid(T).name();
throw std::invalid_argument("unmatched type");
} }
~variant() { helper_t::destroy(type_id, &data); } ~variant() { helper_t::destroy(type_id, &data); }
}; };
......
...@@ -6,7 +6,8 @@ cc_library(memcpy SRCS memcpy.cc DEPS place) ...@@ -6,7 +6,8 @@ cc_library(memcpy SRCS memcpy.cc DEPS place)
cc_library(memory cc_library(memory
DEPS DEPS
malloc malloc
memcpy) memcpy
)
#if (WITH_GPU) #if (WITH_GPU)
# nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place memory) # nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place memory)
#endif() #endif()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册