未验证 提交 59122809 编写于 作者: Y Yan Chunwei 提交者: GitHub

code clean - refine ARM compile (#17590)

* code clean - refine ARM

cmake enhancement:

- add lite_cc_library and lite_cc_test

code clean:

- remove ARM feed and fetch kernels, reuse the Host's

remove unnecessary comments
上级 310fd514
......@@ -121,7 +121,8 @@ endif()
# for lite, both server and mobile framework.
option(WITH_LITE "Enable lite framework" OFF)
option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF)
option(LITE_WITH_X86 "Enable X86 in lite mode" ON)
option(LITE_WITH_X86 "Enable X86 in lite mode" ON)
option(LITE_WITH_ARM "Enable ARM in lite mode" OFF)
option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK "Enable light-weight framework" OFF)
......
......@@ -172,6 +172,10 @@ if (LITE_WITH_X86)
add_definitions("-DLITE_WITH_X86")
endif()
if (LITE_WITH_ARM)
add_definitions("-DLITE_WITH_ARM")
endif()
if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
add_definitions("-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK")
endif()
......@@ -427,7 +427,7 @@ function(raw_cc_test TARGET_NAME)
endif()
endfunction(raw_cc_test)
function(lite_cc_test args)
function(_lite_cc_test args)
if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
message(STATUS "building lite raw test: ${args}")
raw_cc_test(${args} ${ARGN})
......
......@@ -39,6 +39,10 @@ DEFINE_int32(inner_op_parallelism, 0, "number of threads for inner op");
namespace paddle {
namespace framework {
OpDuppy op_duppy;
Scope scope_duppy;
RuntimeContext runtime_context_duppy({}, {});
std::vector<std::tuple<platform::Place, LibraryType>> kKernelPriority = {
std::make_tuple(platform::CUDAPlace(0), LibraryType::kCUDNN),
std::make_tuple(platform::CUDAPlace(0), LibraryType::kPlain),
......
......@@ -239,9 +239,10 @@ class OpDuppy : public OperatorBase {
void RunImpl(const Scope& scope,
const platform::Place& place) const override {}
};
OpDuppy op_duppy;
Scope scope_duppy;
RuntimeContext runtime_context_duppy({}, {});
extern OpDuppy op_duppy;
extern Scope scope_duppy;
extern RuntimeContext runtime_context_duppy;
class ExecutionContext {
public:
......@@ -255,7 +256,7 @@ class ExecutionContext {
ctx_(ctx),
kernel_configs_(configs) {}
ExecutionContext(const platform::DeviceContext& device_context)
explicit ExecutionContext(const platform::DeviceContext& device_context)
: op_(op_duppy),
scope_(scope_duppy),
device_context_(device_context),
......
......@@ -3,9 +3,10 @@ if (NOT WITH_LITE)
endif()
message(WARNING "Lite enabled!")
message(STATUS "LIGHT_FRAMEWORK: ${LITE_WITH_LIGHT_WEIGHT_FRAMEWORK}")
message(STATUS "LITE_WITH_CUDA: ${LITE_WITH_CUDA}")
message(STATUS "LITE_WITH_X86: ${LITE_WITH_X86}")
message(STATUS "LIGHT_FRAMEWORK:\t${LITE_WITH_LIGHT_WEIGHT_FRAMEWORK}")
message(STATUS "LITE_WITH_CUDA:\t${LITE_WITH_CUDA}")
message(STATUS "LITE_WITH_X86:\t${LITE_WITH_X86}")
message(STATUS "LITE_WITH_ARM:\t${LITE_WITH_ARM}")
set(LITE_MODEL_DIR "${THIRD_PARTY_PATH}/install")
......@@ -29,6 +30,65 @@ function(lite_download_and_uncompress INSTALL_DIR URL FILENAME)
)
endfunction()
function (lite_deps DEPS)
set(options "")
set(oneValueArgs "")
set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS)
cmake_parse_arguments(lite_deps "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(${DEPS} ${lite_deps_DEPS} PARENT_SCOPE)
if(LITE_WITH_X86)
foreach(var ${lite_deps_X86_DEPS})
set(${DEPS} ${${DEPS}} ${var} PARENT_SCOPE)
endforeach()
endif()
if(LITE_WITH_CUDA)
foreach(var ${lite_deps_CUDA_DEPS})
set(${DEPS} ${${DEPS}} ${var} PARENT_SCOPE)
endforeach()
endif()
if(LITE_WITH_ARM)
foreach(var ${lite_deps_ARM_DEPS})
set(${DEPS} ${${DEPS}} ${var} PARENT_SCOPE)
endforeach()
endif()
endfunction()
function(lite_cc_library TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(deps "")
lite_deps(deps
DEPS ${args_DEPS}
X86_DEPS ${args_X86_DEPS}
CUDA_DEPS ${args_CUDA_DEPS}
ARM_DEPS ${args_ARM_DEPS}
)
cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps})
endfunction()
function(lite_cc_test TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(deps "")
lite_deps(deps
DEPS ${args_DEPS}
X86_DEPS ${args_X86_DEPS}
CUDA_DEPS ${args_CUDA_DEPS}
ARM_DEPS ${args_ARM_DEPS}
)
_lite_cc_test(${TARGET} SRCS ${args_SRCS} DEPS ${deps})
endfunction()
add_subdirectory(core)
add_subdirectory(x86)
......@@ -39,4 +99,3 @@ add_subdirectory(kernels)
add_subdirectory(model_parser)
add_subdirectory(utils)
add_subdirectory(api)
......@@ -25,22 +25,8 @@ namespace lite {
void Run(const char* model_dir) {
lite::ExecutorLite predictor;
// #ifndef LITE_WITH_CUDA
// std::vector<Place> valid_places({Place{TARGET(kHost),
// PRECISION(kFloat)}});
// #elif defined(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
// #else
// std::vector<Place> valid_places({
// Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)},
// Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)},
// Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)},
// Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)},
// Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)},
// Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)},
// });
// #endif
std::vector<Place> valid_places({Place{TARGET(kARM), PRECISION(kFloat)}});
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}});
predictor.Build(model_dir, Place{TARGET(kARM), PRECISION(kFloat)},
valid_places);
......@@ -52,8 +38,6 @@ void Run(const char* model_dir) {
data[i] = i;
}
LOG(INFO) << "input " << *input_tensor;
predictor.Run();
auto* out = predictor.GetOutput(0);
......@@ -61,7 +45,7 @@ void Run(const char* model_dir) {
LOG(INFO) << "out " << out->data<float>()[0];
LOG(INFO) << "out " << out->data<float>()[1];
LOG(INFO) << "dims " << out->dims();
LOG(INFO) << "out " << *out;
LOG(INFO) << "out data size: " << out->data_size();
}
} // namespace lite
......@@ -79,12 +63,18 @@ USE_LITE_OP(fc);
USE_LITE_OP(scale);
USE_LITE_OP(feed);
USE_LITE_OP(fetch);
// USE_LITE_OP(io_copy);
USE_LITE_OP(io_copy);
USE_LITE_KERNEL(feed, kHost, kAny, kAny, def);
USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def);
#ifdef LITE_WITH_ARM
USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(scale, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(feed, kARM, kAny, kAny, def);
USE_LITE_KERNEL(fetch, kARM, kAny, kAny, def);
// USE_LITE_KERNEL(feed, kARM, kAny, kAny, def);
// USE_LITE_KERNEL(fetch, kARM, kAny, kAny, def);
#endif // LITE_WITH_ARM
#ifdef LITE_WITH_CUDA
USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def);
......
cc_library(lite_gtest_main SRCS lite_gtest_main.cc DEPS gtest)
cc_library(memory_lite SRCS memory.cc DEPS target_wrapper_lite target_wrapper_host)
cc_library(target_wrapper_lite SRCS target_wrapper.cc)
if (WITH_TESTING)
cc_library(lite_gtest_main SRCS lite_gtest_main.cc DEPS gtest)
endif()
cc_library(memory_lite SRCS memory.cc DEPS target_wrapper_lite)
lite_cc_library(target_wrapper_lite SRCS target_wrapper.cc DEPS target_wrapper_host X86_DEPS target_wrapper_x86 CUDA_DEPS target_wrapper_cuda)
cc_library(lite_tensor SRCS lite_tensor.cc DEPS memory_lite target_wrapper_lite)
if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
cc_library(hvy_tensor SRCS hvy_tensor.cc DEPS lod_tensor)
......@@ -40,10 +42,10 @@ cc_library(program_fake_utils SRCS program_fake_utils.cc DEPS mir_ssa_graph
)
lite_cc_test(test_scope_lite SRCS scope_test.cc DEPS scope_lite)
lite_cc_test(test_kernel_lite SRCS kernel_test.cc DEPS kernel_lite target_wrapper_x86)
lite_cc_test(test_kernel_lite SRCS kernel_test.cc DEPS kernel_lite target_wrapper_lite)
lite_cc_test(test_op_lite SRCS op_lite_test.cc DEPS op_lite)
lite_cc_test(test_tensor_lite SRCS lite_tensor_test.cc DEPS lite_tensor)
lite_cc_test(test_type_system SRCS type_system_test.cc DEPS type_system utils_lite)
#lite_cc_test(test_optimizer_lite SRCS optimizer_test.cc DEPS mir_pass_manager program_fake_utils mir_passes optimizer_lite fc_op_lite)
lite_cc_test(test_types_lite SRCS types_test.cc DEPS types_lite)
lite_cc_test(test_memory_lite SRCS memory_test.cc DEPS memory_lite)
......@@ -15,5 +15,65 @@
#include "paddle/fluid/lite/core/memory.h"
namespace paddle {
namespace lite {} // namespace lite
namespace lite {
void* TargetMalloc(TargetType target, size_t size) {
void* data{nullptr};
switch (target) {
case TargetType::kHost:
case TargetType::kX86:
case TargetType::kARM:
data = TargetWrapper<TARGET(kHost)>::Malloc(size);
break;
#ifdef LITE_WITH_CUDA
case TargetType::kCUDA:
data =
TargetWrapper<TARGET(kCUDA), cudaStream_t, cudaEvent_t>::Malloc(size);
break;
#endif // LITE_WITH_CUDA
default:
LOG(FATAL) << "Unknown supported target " << TargetToStr(target);
}
return data;
}
void TargetFree(TargetType target, void* data) {
switch (target) {
case TargetType::kHost:
case TargetType::kX86:
case TargetType::kARM:
TargetWrapper<TARGET(kHost)>::Free(data);
break;
#ifdef LITE_WITH_CUDA
case TargetType::kCUDA:
TargetWrapper<TARGET(kX86)>::Free(data);
break;
#endif // LITE_WITH_CUDA
default:
LOG(FATAL) << "Unknown type";
}
}
void TargetCopy(TargetType target, void* dst, const void* src, size_t size) {
switch (target) {
case TargetType::kHost:
case TargetType::kX86:
case TargetType::kARM:
TargetWrapper<TARGET(kHost)>::MemcpySync(dst, src, size,
IoDirection::DtoD);
break;
#ifdef LITE_WITH_CUDA
case TargetType::kCUDA:
TargetWrapper<TARGET(kCUDA)>::MemcpySync(dst, src, size,
IoDirection::DtoD);
break;
#endif
default:
LOG(FATAL) << "unsupported type";
}
}
} // namespace lite
} // namespace paddle
......@@ -18,57 +18,16 @@
namespace paddle {
namespace lite {
static void* TargetMalloc(TargetType target, size_t size) {
void* data{nullptr};
switch (target) {
case TargetType::kHost:
#ifdef LITE_WITH_X86
case TargetType::kX86:
#endif
data = TargetWrapper<TARGET(kHost)>::Malloc(size);
break;
#ifdef LITE_WITH_CUDA
case TargetType::kCUDA:
data =
TargetWrapper<TARGET(kCUDA), cudaStream_t, cudaEvent_t>::Malloc(size);
break;
#endif // LITE_WITH_CUDA
default:
LOG(FATAL) << "Unknown supported target " << TargetToStr(target);
}
return data;
}
static void TargetFree(TargetType target, void* data) {
switch (static_cast<int>(target)) {
case static_cast<int>(TargetType::kX86):
TargetWrapper<TARGET(kX86)>::Free(data);
break;
case static_cast<int>(TargetType::kCUDA):
TargetWrapper<TARGET(kX86)>::Free(data);
break;
default:
LOG(FATAL) << "Unknown type";
}
}
// Malloc memory for a specific Target. All the targets should be an element in
// the `switch` here.
void* TargetMalloc(TargetType target, size_t size);
static void TargetCopy(TargetType target, void* dst, const void* src,
size_t size) {
switch (target) {
case TargetType::kX86:
case TargetType::kHost:
TargetWrapper<TARGET(kHost)>::MemcpySync(dst, src, size,
IoDirection::DtoD);
break;
// Free memory for a specific Target. All the targets should be an element in
// the `switch` here.
void TargetFree(TargetType target, void* data);
case TargetType::kCUDA:
TargetWrapper<TARGET(kCUDA)>::MemcpySync(dst, src, size,
IoDirection::DtoD);
break;
default:
LOG(FATAL) << "unsupported type";
}
}
// Copy a buffer from host to another target.
void TargetCopy(TargetType target, void* dst, const void* src, size_t size);
// Memory buffer manager.
class Buffer {
......
......@@ -12,4 +12,23 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/host/relu_compute.h"
#include "paddle/fluid/lite/core/memory.h"
#include <gtest/gtest.h>
namespace paddle {
namespace lite {
TEST(memory, test) {
auto* buf = TargetMalloc(TARGET(kX86), 10);
ASSERT_TRUE(buf);
TargetFree(TARGET(kX86), buf);
#ifdef LITE_WITH_CUDA
auto* buf_cuda = TargetMalloc(TARGET(kCUDA), 10);
ASSERT_TRUE(buf_cuda);
TargetFree(Target(kCUDA), buf_cuda);
#endif
}
} // namespace lite
} // namespace paddle
......@@ -4,4 +4,3 @@ endif()
nv_library(target_wrapper_cuda SRCS target_wrapper.cc)
nv_library(cuda_blas_lite SRCS blas.cc)
cc_library(target_wrapper_host SRCS target_wrapper.cc DEPS target_wrapper_lite)
cc_library(target_wrapper_host SRCS target_wrapper.cc)
if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
if(NOT (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM))
return()
endif()
......@@ -9,14 +9,7 @@ cc_library(relu_compute_arm SRCS relu_compute.cc DEPS ${lite_kernel_deps})
cc_library(mul_compute_arm SRCS mul_compute.cc DEPS ${lite_kernel_deps} eigen3)
cc_library(scale_compute_arm SRCS scale_compute.cc DEPS ${lite_kernel_deps} eigen3)
cc_library(feed_compute_arm SRCS feed_compute.cc DEPS ${lite_kernel_deps})
cc_library(fetch_compute_arm SRCS fetch_compute.cc DEPS ${lite_kernel_deps})
# lite_cc_test(test_fc_compute_arm SRCS fc_compute_test.cc DEPS ${lite_kernel_deps} fc_compute_arm)
set(arm_kernels
feed_compute_arm
fetch_compute_arm
fc_compute_arm
relu_compute_arm
mul_compute_arm
......@@ -24,4 +17,3 @@ set(arm_kernels
)
set(arm_kernels "${arm_kernels}" CACHE INTERNAL "arm kernels")
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
class FeedCompute
: public KernelLite<TARGET(kARM), PRECISION(kAny), DATALAYOUT(kAny)> {
public:
using param_t = operators::FeedParam;
void Run() override {
auto &param = Param<operators::FeedParam>();
LOG(INFO) << "feed_list.size: " << param.feed_list->size();
LOG(INFO) << "col " << param.col;
const lite::Tensor &feed_item = (*param.feed_list)[0];
param.out->ShareDataWith(feed_item);
LOG(INFO) << "FEED input " << feed_item << " col " << param.col;
LOG(INFO) << "FEED output " << *param.out;
}
};
} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(feed, kARM, kAny, kAny,
paddle::lite::kernels::arm::FeedCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
class FetchCompute
: public KernelLite<TARGET(kARM), PRECISION(kAny), DATALAYOUT(kAny)> {
public:
using param_t = operators::FeedParam;
void Run() override {
auto& param = Param<operators::FetchParam>();
auto* fetch_list = param.fetch_list;
if (fetch_list->size() <= static_cast<size_t>(param.col)) {
fetch_list->resize(param.col + 1);
}
auto& dst = fetch_list->at(param.col);
dst.ShareDataWith(*param.input);
}
};
} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(fetch, kARM, kAny, kAny,
paddle::lite::kernels::arm::FetchCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny),
DATALAYOUT(kAny), -1)})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny),
DATALAYOUT(kAny), -1)})
.Finalize();
......@@ -59,9 +59,6 @@ class MulCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
mul_compute_eigen(param.x->data<float>(), x_shape.x, x_shape.y, //
param.y->data<float>(), y_shape.x, y_shape.y, //
param.output->mutable_data<float>());
LOG(INFO) << "MUL x " << *param.x;
LOG(INFO) << "MUL W " << *param.y;
LOG(INFO) << "MUL out " << *param.output;
}
virtual ~MulCompute() = default;
......
message(STATUS "compile with lite host kernels")
cc_library(fc_compute_host SRCS fc_compute.cc DEPS ${lite_kernel_deps} eigen3)
cc_library(relu_compute_host SRCS relu_compute.cc DEPS ${lite_kernel_deps})
cc_library(mul_compute_host SRCS mul_compute.cc DEPS ${lite_kernel_deps} eigen3)
cc_library(scale_compute_host SRCS scale_compute.cc DEPS ${lite_kernel_deps} eigen3)
cc_library(feed_compute_host SRCS feed_compute.cc DEPS ${lite_kernel_deps})
cc_library(fetch_compute_host SRCS fetch_compute.cc DEPS ${lite_kernel_deps})
......@@ -10,11 +6,6 @@ cc_library(fetch_compute_host SRCS fetch_compute.cc DEPS ${lite_kernel_deps})
set(host_kernels
feed_compute_host
fetch_compute_host
fc_compute_host
relu_compute_host
mul_compute_host
scale_compute_host
)
set(host_kernels "${host_kernels}" CACHE INTERNAL "host kernels")
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/host/fc_compute.h"
#include <Eigen/Core>
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace host {
// NOTE should use pure std C++ implementation.
void FcCompute::Run() {
auto& param = this->Param<operators::FcParam>();
CHECK_GE(param.input->dims().size(), 2UL);
CHECK_EQ(param.output->dims().size(), 2UL);
fc_compute_eigen(
param.input->data<float>(), // x
param.input->dims().Slice(0, param.in_num_col_dims).production(),
param.input->dims()
.Slice(param.in_num_col_dims, param.input->dims().size())
.production(),
param.w->data<float>(), // w
param.w->dims()[1], // w_w
param.w->dims()[0], // w_h
param.bias->data<float>(), // b
param.output->mutable_data<float>());
}
// TargetType FcCompute::target() const { return TARGET(kHost); }
// PrecisionType FcCompute::precision() const { return PRECISION(kFloat); }
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(fc, kHost, kFloat, kNCHW,
paddle::lite::kernels::host::FcCompute, def)
.BindInput("Input", {LiteType::GetTensorTy(TARGET(kHost))})
.BindInput("Bias", {LiteType::GetTensorTy(TARGET(kHost))})
.BindInput("W", {LiteType::GetTensorTy(TARGET(kHost))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <Eigen/Core>
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/operators/fc_op.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace host {
class FcCompute : public KernelLite<TARGET(kHost), PRECISION(kFloat)> {
public:
using param_t = operators::FcParam;
void Run() override;
// TargetType target() const override;
// PrecisionType precision() const override;
virtual ~FcCompute() = default;
};
template <typename T>
void fc_compute_eigen(const T* x, int x_w, int x_h, //
const T* w, int w_w, int w_h, //
const T* b, //
T* out) {
using matrix_t =
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
Eigen::Map<const matrix_t> X(x, x_h, x_w);
Eigen::Map<const matrix_t> W(w, w_h, w_w);
Eigen::Map<matrix_t> Out(out, x_h, w_h);
Out = X * W.transpose();
if (b) {
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>> B(b, w_h);
Out = Out.array().rowwise() + B.transpose().array();
}
}
template <typename T>
__attribute__((optimize("unroll-loops"))) //
T dot(const T* x, const T* y, int dim) {
T out{};
for (int i = 0; i < dim; i++) {
out += x[i] * y[i];
}
return out;
}
template <typename T>
void fc_compute_naive(const T* x, int x_w, int x_h, //
const T* w, int w_w, int w_h, //
const T* b, //
T* out) {
CHECK_EQ(x_w, w_w);
// out shape: (x_h, w_w)
memset(out, 0, x_h * w_h * sizeof(T));
for (int r = 0; r < x_h; r++) {
for (int c = 0; c < w_h; c++) {
out[r * w_h + c] = dot(&x[r * x_w], &w[c * w_w], w_w) + b[c];
}
}
}
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/host/fc_compute.h"
#include <gtest/gtest.h>
#include <vector>
#include "paddle/fluid/lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace host {
TEST(fc_compute_naive, test) {
lite::Tensor x, w, b, out, out1;
const int batch_size = 2;
x.Resize({batch_size, 3});
w.Resize({4, 3});
b.Resize({1, 4});
out.Resize({batch_size, 4});
out1.Resize({batch_size, 4});
auto x_data = x.mutable_data<float>();
auto w_data = w.mutable_data<float>();
auto b_data = b.mutable_data<float>();
auto out_data = out.mutable_data<float>();
auto out_data1 = out1.mutable_data<float>();
for (int i = 0; i < product(x.dims()); i++) x_data[i] = i;
for (int i = 0; i < product(w.dims()); i++) w_data[i] = i;
for (int i = 0; i < product(b.dims()); i++) b_data[i] = i;
fc_compute_naive(x_data, 3, batch_size, //
w_data, 3, 4, //
b_data, out_data);
fc_compute_eigen(x_data, 3, batch_size, //
w_data, 3, 4, //
b_data, out_data1);
for (int i = 0; i < product(out.dims()); i++) {
EXPECT_NEAR(out_data[0], out_data1[0], 1e-6);
}
}
TEST(fc_host, init) {
FcCompute fc;
ASSERT_EQ(fc.precision(), PRECISION(kFloat));
ASSERT_EQ(fc.target(), TARGET(kHost));
}
TEST(fc_host, algorithm) {
using matrix_t = Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic>;
using matrix_map_t = Eigen::Map<matrix_t>;
// dim 10, 20
std::vector<float> input(10 * 20);
std::vector<float> w(20 * 20);
std::vector<float> output(10 * 20);
Eigen::Map<const matrix_t> input_mat(input.data(), 10, 20);
Eigen::Map<const matrix_t> weight_mat(w.data(), 20, 20);
matrix_map_t output_mat(output.data(), 10, 20);
output_mat = weight_mat.transpose() * input_mat;
}
TEST(fc_host, compute) {
FcCompute fc;
operators::FcParam param;
lite::Tensor x;
lite::Tensor w;
lite::Tensor bias;
lite::Tensor output;
x.Resize(DDim(std::vector<int64_t>({1, 10, 20})));
w.Resize(DDim(std::vector<int64_t>({20, 20})));
bias.Resize(DDim(std::vector<int64_t>({1, 10})));
output.Resize(DDim(std::vector<int64_t>({10, 20})));
auto* x_data = x.mutable_data<float>();
auto* w_data = w.mutable_data<float>();
auto* bias_data = bias.mutable_data<float>();
auto* output_data = output.mutable_data<float>();
for (int i = 0; i < 10 * 20; i++) x_data[i] = i;
for (int i = 0; i < 20 * 20; i++) w_data[i] = i;
for (int i = 0; i < 10; i++) bias_data[i] = i;
for (int i = 0; i < 10 * 20; i++) output_data[i] = 0;
param.in_num_col_dims = 2;
param.input = &x;
param.w = &w;
param.bias = &bias;
param.output = &output;
param.in_mat_dims = x.dims();
fc.SetParam(param);
fc.Run();
LOG(INFO) << "x";
for (int i = 0; i < 10 * 20; i++) LOG(INFO) << x_data[i];
LOG(INFO) << "output:";
for (int i = 0; i < 10 * 20; i++) LOG(INFO) << output.data<float>()[i];
}
TEST(fc, retrive_op) {
auto fc =
KernelRegistry::Global().Create<TARGET(kHost), PRECISION(kFloat)>("fc");
ASSERT_TRUE(fc);
}
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_KERNEL(fc, kHost, kFloat, kNCHW, def);
......@@ -27,12 +27,12 @@ class FeedCompute
void Run() override {
auto &param = Param<operators::FeedParam>();
LOG(INFO) << "feed_list.size: " << param.feed_list->size();
LOG(INFO) << "col " << param.col;
VLOG(4) << "feed_list.size: " << param.feed_list->size();
VLOG(4) << "col " << param.col;
const lite::Tensor &feed_item = (*param.feed_list)[0];
param.out->ShareDataWith(feed_item);
LOG(INFO) << "FEED input " << feed_item << " col " << param.col;
LOG(INFO) << "FEED output " << *param.out;
VLOG(4) << "FEED input " << feed_item << " col " << param.col;
VLOG(4) << "FEED output " << *param.out;
}
};
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <Eigen/Core>
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/types.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace host {
template <typename T>
void mul_compute_eigen(const T* x, int x_h, int x_w, const T* y, int y_h,
int y_w, T* out) {
using matrix_t =
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
Eigen::Map<const matrix_t> X(x, x_h, x_w);
Eigen::Map<const matrix_t> Y(y, y_h, y_w);
Eigen::Map<matrix_t> Out(out, x_h, y_w);
Out = X * Y;
}
class MulCompute : public KernelLite<TARGET(kHost), PRECISION(kFloat)> {
public:
using param_t = operators::MulParam;
void Run() override {
auto& param = Param<operators::MulParam>();
core::dim2 x_shape(
{static_cast<int>(
param.x->dims().Slice(0, param.x_num_col_dims).production()),
static_cast<int>(
param.x->dims()
.Slice(param.x_num_col_dims, param.x->dims().size())
.production())});
core::dim2 y_shape(
{static_cast<int>(
param.y->dims().Slice(0, param.y_num_col_dims).production()),
static_cast<int>(
param.y->dims()
.Slice(param.y_num_col_dims, param.y->dims().size())
.production())});
mul_compute_eigen(param.x->data<float>(), x_shape.x, x_shape.y, //
param.y->data<float>(), y_shape.x, y_shape.y, //
param.output->mutable_data<float>());
LOG(INFO) << "MUL x " << *param.x;
LOG(INFO) << "MUL W " << *param.y;
LOG(INFO) << "MUL out " << *param.output;
}
virtual ~MulCompute() = default;
};
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(mul, kHost, kFloat, kNCHW,
paddle::lite::kernels::host::MulCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kHost))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kHost))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace host {
class ReluCompute : public KernelLite<TARGET(kHost), PRECISION(kFloat)> {
public:
void Run() override {
auto& param = Param<operators::ReluParam>();
auto n = param.input->dims().production();
const float* input = param.input->data<float>();
float* output = param.output->mutable_data<float>();
for (int i = 0; i < n; i++) {
output[i] = std::max(0.f, input[i]);
}
}
TargetType target() const override { return TARGET(kHost); }
PrecisionType precision() const override { return PRECISION(kFloat); }
};
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(relu, kHost, kFloat, kNCHW,
paddle::lite::kernels::host::ReluCompute, def)
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <Eigen/Core>
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/types.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace host {
template <typename T>
void scale_compute(const T* x, T* out, int size, float scale, float bias,
bool bias_before) {
if (bias_before) bias *= scale;
for (int i = 0; i < size; i++) {
out[i] = x[i] * scale + bias;
}
}
class ScaleCompute : public KernelLite<TARGET(kHost), PRECISION(kFloat)> {
public:
using param_t = operators::MulParam;
void Run() override {
auto& param = Param<operators::ScaleParam>();
scale_compute(param.x->data<float>(), param.output->mutable_data<float>(),
param.x->dims().production(), param.scale, param.bias,
param.bias_after_scale);
}
virtual ~ScaleCompute() = default;
};
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(scale, kHost, kFloat, kNCHW,
paddle::lite::kernels::host::ScaleCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kHost))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))})
.Finalize();
......@@ -15,8 +15,5 @@
#pragma once
#include "paddle/fluid/lite/core/op_registry.h"
USE_LITE_KERNEL(fc, kHost, kFloat, kNCHW, def);
USE_LITE_KERNEL(mul, kHost, kFloat, kNCHW, def);
USE_LITE_KERNEL(scale, kHost, kFloat, kNCHW, def);
USE_LITE_KERNEL(feed, kHost, kAny, kAny, def);
USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def);
......@@ -3,5 +3,4 @@ if(NOT LITE_WITH_X86)
endif()
cc_library(activation_compute SRCS activation_compute.cc DEPS ${lite_kernel_deps} activation_op)
cc_library(elementwise_compute SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} elementwise_op)
cc_library(elementwise_compute SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} elementwise_sub_op)
......@@ -21,5 +21,5 @@ set(ops_lite
io_copy_op_lite
PARENT_SCOPE)
lite_cc_test(test_fc_op_lite SRCS fc_op_test.cc DEPS fc_op_lite fc_compute_host)
lite_cc_test(test_fc_op_lite SRCS fc_op_test.cc DEPS fc_op_lite)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册