diff --git a/CMakeLists.txt b/CMakeLists.txt index 25ef4531915e6f24582eb923afa694c497ded619..0e044525d84f65f365ba2ea4148b37fe21f41589 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -121,7 +121,8 @@ endif() # for lite, both server and mobile framework. option(WITH_LITE "Enable lite framework" OFF) option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF) -option(LITE_WITH_X86 "Enable X86 in lite mode" ON) +option(LITE_WITH_X86 "Enable X86 in lite mode" ON) +option(LITE_WITH_ARM "Enable ARM in lite mode" OFF) option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK "Enable light-weight framework" OFF) diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 6c9c3fd488901f01a47cd3e97fd4cdaa713b1996..a0966d7005d76078ea9c922906d78dd6f00a90fc 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -172,6 +172,10 @@ if (LITE_WITH_X86) add_definitions("-DLITE_WITH_X86") endif() +if (LITE_WITH_ARM) + add_definitions("-DLITE_WITH_ARM") +endif() + if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) add_definitions("-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK") endif() diff --git a/cmake/generic.cmake b/cmake/generic.cmake index cccff7f0b42593d176ae4af4b7d41bebaa70b92a..a028dcbd6be80dd94cf33c333a1bd823b9c13298 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -427,7 +427,7 @@ function(raw_cc_test TARGET_NAME) endif() endfunction(raw_cc_test) -function(lite_cc_test args) +function(_lite_cc_test args) if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) message(STATUS "building lite raw test: ${args}") raw_cc_test(${args} ${ARGN}) diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index fae33f55b054b1dde8ac7bf7cd931d5de911c5aa..dab35bae4d524182c6534a9deb83076d69009bdd 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -39,6 +39,10 @@ DEFINE_int32(inner_op_parallelism, 0, "number of threads for inner op"); namespace paddle { namespace framework { +OpDuppy op_duppy; +Scope scope_duppy; +RuntimeContext runtime_context_duppy({}, {}); + std::vector> kKernelPriority = { std::make_tuple(platform::CUDAPlace(0), LibraryType::kCUDNN), std::make_tuple(platform::CUDAPlace(0), LibraryType::kPlain), diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 5b4bfc1eb473391c950cd6e4def13840489f21ca..8f301c6ebce124aea69532fadc6dc2189c395d72 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -239,9 +239,10 @@ class OpDuppy : public OperatorBase { void RunImpl(const Scope& scope, const platform::Place& place) const override {} }; -OpDuppy op_duppy; -Scope scope_duppy; -RuntimeContext runtime_context_duppy({}, {}); + +extern OpDuppy op_duppy; +extern Scope scope_duppy; +extern RuntimeContext runtime_context_duppy; class ExecutionContext { public: @@ -255,7 +256,7 @@ class ExecutionContext { ctx_(ctx), kernel_configs_(configs) {} - ExecutionContext(const platform::DeviceContext& device_context) + explicit ExecutionContext(const platform::DeviceContext& device_context) : op_(op_duppy), scope_(scope_duppy), device_context_(device_context), diff --git a/paddle/fluid/lite/CMakeLists.txt b/paddle/fluid/lite/CMakeLists.txt index d465c5898184e132ceecd795b6ff8f0d7bc0a814..d97321954b45767f32dae0afa1d9f5f3099fa40f 100644 --- a/paddle/fluid/lite/CMakeLists.txt +++ b/paddle/fluid/lite/CMakeLists.txt @@ -3,9 +3,10 @@ if (NOT WITH_LITE) endif() message(WARNING "Lite enabled!") -message(STATUS "LIGHT_FRAMEWORK: ${LITE_WITH_LIGHT_WEIGHT_FRAMEWORK}") -message(STATUS "LITE_WITH_CUDA: ${LITE_WITH_CUDA}") -message(STATUS "LITE_WITH_X86: ${LITE_WITH_X86}") +message(STATUS "LIGHT_FRAMEWORK:\t${LITE_WITH_LIGHT_WEIGHT_FRAMEWORK}") +message(STATUS "LITE_WITH_CUDA:\t${LITE_WITH_CUDA}") +message(STATUS "LITE_WITH_X86:\t${LITE_WITH_X86}") +message(STATUS "LITE_WITH_ARM:\t${LITE_WITH_ARM}") set(LITE_MODEL_DIR "${THIRD_PARTY_PATH}/install") @@ -29,6 +30,65 @@ function(lite_download_and_uncompress INSTALL_DIR URL FILENAME) ) endfunction() +function (lite_deps DEPS) + set(options "") + set(oneValueArgs "") + set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS) + cmake_parse_arguments(lite_deps "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + set(${DEPS} ${lite_deps_DEPS} PARENT_SCOPE) + + if(LITE_WITH_X86) + foreach(var ${lite_deps_X86_DEPS}) + set(${DEPS} ${${DEPS}} ${var} PARENT_SCOPE) + endforeach() + endif() + + if(LITE_WITH_CUDA) + foreach(var ${lite_deps_CUDA_DEPS}) + set(${DEPS} ${${DEPS}} ${var} PARENT_SCOPE) + endforeach() + endif() + + if(LITE_WITH_ARM) + foreach(var ${lite_deps_ARM_DEPS}) + set(${DEPS} ${${DEPS}} ${var} PARENT_SCOPE) + endforeach() + endif() +endfunction() + +function(lite_cc_library TARGET) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS) + cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + set(deps "") + lite_deps(deps + DEPS ${args_DEPS} + X86_DEPS ${args_X86_DEPS} + CUDA_DEPS ${args_CUDA_DEPS} + ARM_DEPS ${args_ARM_DEPS} + ) + + cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps}) +endfunction() + +function(lite_cc_test TARGET) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS) + cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + set(deps "") + lite_deps(deps + DEPS ${args_DEPS} + X86_DEPS ${args_X86_DEPS} + CUDA_DEPS ${args_CUDA_DEPS} + ARM_DEPS ${args_ARM_DEPS} + ) + _lite_cc_test(${TARGET} SRCS ${args_SRCS} DEPS ${deps}) +endfunction() add_subdirectory(core) add_subdirectory(x86) @@ -39,4 +99,3 @@ add_subdirectory(kernels) add_subdirectory(model_parser) add_subdirectory(utils) add_subdirectory(api) - diff --git a/paddle/fluid/lite/api/cxx_api_bin.cc b/paddle/fluid/lite/api/cxx_api_bin.cc index 7b38f072e2327145b38dc3c3a0a85301597ab68f..4e111097e380ab79e33faf12374d69c56809964c 100644 --- a/paddle/fluid/lite/api/cxx_api_bin.cc +++ b/paddle/fluid/lite/api/cxx_api_bin.cc @@ -25,22 +25,8 @@ namespace lite { void Run(const char* model_dir) { lite::ExecutorLite predictor; - // #ifndef LITE_WITH_CUDA - // std::vector valid_places({Place{TARGET(kHost), - // PRECISION(kFloat)}}); - // #elif defined(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) - // #else - // std::vector valid_places({ - // Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)}, - // Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)}, - // Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)}, - // Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)}, - // Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)}, - // Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)}, - // }); - // #endif - - std::vector valid_places({Place{TARGET(kARM), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, + Place{TARGET(kARM), PRECISION(kFloat)}}); predictor.Build(model_dir, Place{TARGET(kARM), PRECISION(kFloat)}, valid_places); @@ -52,8 +38,6 @@ void Run(const char* model_dir) { data[i] = i; } - LOG(INFO) << "input " << *input_tensor; - predictor.Run(); auto* out = predictor.GetOutput(0); @@ -61,7 +45,7 @@ void Run(const char* model_dir) { LOG(INFO) << "out " << out->data()[0]; LOG(INFO) << "out " << out->data()[1]; LOG(INFO) << "dims " << out->dims(); - LOG(INFO) << "out " << *out; + LOG(INFO) << "out data size: " << out->data_size(); } } // namespace lite @@ -79,12 +63,18 @@ USE_LITE_OP(fc); USE_LITE_OP(scale); USE_LITE_OP(feed); USE_LITE_OP(fetch); -// USE_LITE_OP(io_copy); +USE_LITE_OP(io_copy); + +USE_LITE_KERNEL(feed, kHost, kAny, kAny, def); +USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def); + +#ifdef LITE_WITH_ARM USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def); USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def); USE_LITE_KERNEL(scale, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(feed, kARM, kAny, kAny, def); -USE_LITE_KERNEL(fetch, kARM, kAny, kAny, def); +// USE_LITE_KERNEL(feed, kARM, kAny, kAny, def); +// USE_LITE_KERNEL(fetch, kARM, kAny, kAny, def); +#endif // LITE_WITH_ARM #ifdef LITE_WITH_CUDA USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def); diff --git a/paddle/fluid/lite/core/CMakeLists.txt b/paddle/fluid/lite/core/CMakeLists.txt index 8545fa0f40a9920e4b3541c71ef851904bf7583f..195d8dc0acb6a1d03092764ff9ce30ef9ccfe863 100644 --- a/paddle/fluid/lite/core/CMakeLists.txt +++ b/paddle/fluid/lite/core/CMakeLists.txt @@ -1,6 +1,8 @@ -cc_library(lite_gtest_main SRCS lite_gtest_main.cc DEPS gtest) -cc_library(memory_lite SRCS memory.cc DEPS target_wrapper_lite target_wrapper_host) -cc_library(target_wrapper_lite SRCS target_wrapper.cc) +if (WITH_TESTING) + cc_library(lite_gtest_main SRCS lite_gtest_main.cc DEPS gtest) +endif() +cc_library(memory_lite SRCS memory.cc DEPS target_wrapper_lite) +lite_cc_library(target_wrapper_lite SRCS target_wrapper.cc DEPS target_wrapper_host X86_DEPS target_wrapper_x86 CUDA_DEPS target_wrapper_cuda) cc_library(lite_tensor SRCS lite_tensor.cc DEPS memory_lite target_wrapper_lite) if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) cc_library(hvy_tensor SRCS hvy_tensor.cc DEPS lod_tensor) @@ -40,10 +42,10 @@ cc_library(program_fake_utils SRCS program_fake_utils.cc DEPS mir_ssa_graph ) lite_cc_test(test_scope_lite SRCS scope_test.cc DEPS scope_lite) -lite_cc_test(test_kernel_lite SRCS kernel_test.cc DEPS kernel_lite target_wrapper_x86) +lite_cc_test(test_kernel_lite SRCS kernel_test.cc DEPS kernel_lite target_wrapper_lite) lite_cc_test(test_op_lite SRCS op_lite_test.cc DEPS op_lite) lite_cc_test(test_tensor_lite SRCS lite_tensor_test.cc DEPS lite_tensor) lite_cc_test(test_type_system SRCS type_system_test.cc DEPS type_system utils_lite) #lite_cc_test(test_optimizer_lite SRCS optimizer_test.cc DEPS mir_pass_manager program_fake_utils mir_passes optimizer_lite fc_op_lite) lite_cc_test(test_types_lite SRCS types_test.cc DEPS types_lite) - +lite_cc_test(test_memory_lite SRCS memory_test.cc DEPS memory_lite) diff --git a/paddle/fluid/lite/core/memory.cc b/paddle/fluid/lite/core/memory.cc index 39f312be8d4bf3963c885bbcc501e181f03b6e36..86c18858e273259941b49f91177b5bbcd65973b5 100644 --- a/paddle/fluid/lite/core/memory.cc +++ b/paddle/fluid/lite/core/memory.cc @@ -15,5 +15,65 @@ #include "paddle/fluid/lite/core/memory.h" namespace paddle { -namespace lite {} // namespace lite +namespace lite { + +void* TargetMalloc(TargetType target, size_t size) { + void* data{nullptr}; + switch (target) { + case TargetType::kHost: + case TargetType::kX86: + case TargetType::kARM: + data = TargetWrapper::Malloc(size); + break; +#ifdef LITE_WITH_CUDA + case TargetType::kCUDA: + data = + TargetWrapper::Malloc(size); + break; +#endif // LITE_WITH_CUDA + default: + LOG(FATAL) << "Unknown supported target " << TargetToStr(target); + } + return data; +} + +void TargetFree(TargetType target, void* data) { + switch (target) { + case TargetType::kHost: + case TargetType::kX86: + case TargetType::kARM: + TargetWrapper::Free(data); + break; + +#ifdef LITE_WITH_CUDA + case TargetType::kCUDA: + TargetWrapper::Free(data); + break; +#endif // LITE_WITH_CUDA + default: + LOG(FATAL) << "Unknown type"; + } +} + +void TargetCopy(TargetType target, void* dst, const void* src, size_t size) { + switch (target) { + case TargetType::kHost: + case TargetType::kX86: + case TargetType::kARM: + TargetWrapper::MemcpySync(dst, src, size, + IoDirection::DtoD); + break; + +#ifdef LITE_WITH_CUDA + case TargetType::kCUDA: + TargetWrapper::MemcpySync(dst, src, size, + IoDirection::DtoD); + break; +#endif + default: + LOG(FATAL) << "unsupported type"; + } +} + +} // namespace lite } // namespace paddle diff --git a/paddle/fluid/lite/core/memory.h b/paddle/fluid/lite/core/memory.h index 5b332f7e3ac14f19fc46e4d19cb3041761927c37..5948f6c4a854d9f678c316f351c017788c44c4a2 100644 --- a/paddle/fluid/lite/core/memory.h +++ b/paddle/fluid/lite/core/memory.h @@ -18,57 +18,16 @@ namespace paddle { namespace lite { -static void* TargetMalloc(TargetType target, size_t size) { - void* data{nullptr}; - switch (target) { - case TargetType::kHost: -#ifdef LITE_WITH_X86 - case TargetType::kX86: -#endif - data = TargetWrapper::Malloc(size); - break; -#ifdef LITE_WITH_CUDA - case TargetType::kCUDA: - data = - TargetWrapper::Malloc(size); - break; -#endif // LITE_WITH_CUDA - default: - LOG(FATAL) << "Unknown supported target " << TargetToStr(target); - } - return data; -} - -static void TargetFree(TargetType target, void* data) { - switch (static_cast(target)) { - case static_cast(TargetType::kX86): - TargetWrapper::Free(data); - break; - case static_cast(TargetType::kCUDA): - TargetWrapper::Free(data); - break; - default: - LOG(FATAL) << "Unknown type"; - } -} +// Malloc memory for a specific Target. All the targets should be an element in +// the `switch` here. +void* TargetMalloc(TargetType target, size_t size); -static void TargetCopy(TargetType target, void* dst, const void* src, - size_t size) { - switch (target) { - case TargetType::kX86: - case TargetType::kHost: - TargetWrapper::MemcpySync(dst, src, size, - IoDirection::DtoD); - break; +// Free memory for a specific Target. All the targets should be an element in +// the `switch` here. +void TargetFree(TargetType target, void* data); - case TargetType::kCUDA: - TargetWrapper::MemcpySync(dst, src, size, - IoDirection::DtoD); - break; - default: - LOG(FATAL) << "unsupported type"; - } -} +// Copy a buffer from host to another target. +void TargetCopy(TargetType target, void* dst, const void* src, size_t size); // Memory buffer manager. class Buffer { diff --git a/paddle/fluid/lite/kernels/host/relu_compute.cc b/paddle/fluid/lite/core/memory_test.cc similarity index 59% rename from paddle/fluid/lite/kernels/host/relu_compute.cc rename to paddle/fluid/lite/core/memory_test.cc index 59b9ccd836410fb3c7cfc15df924222aa767cf7c..191fb3931c177d39bb578a97b0fea202a4ba120b 100644 --- a/paddle/fluid/lite/kernels/host/relu_compute.cc +++ b/paddle/fluid/lite/core/memory_test.cc @@ -12,4 +12,23 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/lite/kernels/host/relu_compute.h" +#include "paddle/fluid/lite/core/memory.h" +#include + +namespace paddle { +namespace lite { + +TEST(memory, test) { + auto* buf = TargetMalloc(TARGET(kX86), 10); + ASSERT_TRUE(buf); + TargetFree(TARGET(kX86), buf); + +#ifdef LITE_WITH_CUDA + auto* buf_cuda = TargetMalloc(TARGET(kCUDA), 10); + ASSERT_TRUE(buf_cuda); + TargetFree(Target(kCUDA), buf_cuda); +#endif +} + +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/cuda/CMakeLists.txt b/paddle/fluid/lite/cuda/CMakeLists.txt index 9889b8b1aa02b9f886bf45aaf9b997f0043c3278..505759c7d4afef95423ce3815912794ae28255b0 100644 --- a/paddle/fluid/lite/cuda/CMakeLists.txt +++ b/paddle/fluid/lite/cuda/CMakeLists.txt @@ -4,4 +4,3 @@ endif() nv_library(target_wrapper_cuda SRCS target_wrapper.cc) nv_library(cuda_blas_lite SRCS blas.cc) - diff --git a/paddle/fluid/lite/host/CMakeLists.txt b/paddle/fluid/lite/host/CMakeLists.txt index 576c6e76c142c8c753181334cf0d9c767221744b..90812f3f3cd712571eb7f11261e23c8dcb78b0fe 100644 --- a/paddle/fluid/lite/host/CMakeLists.txt +++ b/paddle/fluid/lite/host/CMakeLists.txt @@ -1,2 +1 @@ -cc_library(target_wrapper_host SRCS target_wrapper.cc DEPS target_wrapper_lite) - +cc_library(target_wrapper_host SRCS target_wrapper.cc) diff --git a/paddle/fluid/lite/kernels/arm/CMakeLists.txt b/paddle/fluid/lite/kernels/arm/CMakeLists.txt index 116db446a0252557d0c4346ba346cd1a7e77a291..b4001a003c37ad8117773aceb8327f232d905bd3 100644 --- a/paddle/fluid/lite/kernels/arm/CMakeLists.txt +++ b/paddle/fluid/lite/kernels/arm/CMakeLists.txt @@ -1,4 +1,4 @@ -if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) +if(NOT (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)) return() endif() @@ -9,14 +9,7 @@ cc_library(relu_compute_arm SRCS relu_compute.cc DEPS ${lite_kernel_deps}) cc_library(mul_compute_arm SRCS mul_compute.cc DEPS ${lite_kernel_deps} eigen3) cc_library(scale_compute_arm SRCS scale_compute.cc DEPS ${lite_kernel_deps} eigen3) -cc_library(feed_compute_arm SRCS feed_compute.cc DEPS ${lite_kernel_deps}) -cc_library(fetch_compute_arm SRCS fetch_compute.cc DEPS ${lite_kernel_deps}) - -# lite_cc_test(test_fc_compute_arm SRCS fc_compute_test.cc DEPS ${lite_kernel_deps} fc_compute_arm) - set(arm_kernels - feed_compute_arm - fetch_compute_arm fc_compute_arm relu_compute_arm mul_compute_arm @@ -24,4 +17,3 @@ set(arm_kernels ) set(arm_kernels "${arm_kernels}" CACHE INTERNAL "arm kernels") - diff --git a/paddle/fluid/lite/kernels/arm/feed_compute.cc b/paddle/fluid/lite/kernels/arm/feed_compute.cc deleted file mode 100644 index 5e51dd5eeb48a90ae90f8b35dbeb9049fd5921c6..0000000000000000000000000000000000000000 --- a/paddle/fluid/lite/kernels/arm/feed_compute.cc +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/lite/core/op_registry.h" -#include "paddle/fluid/lite/core/type_system.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace arm { - -class FeedCompute - : public KernelLite { - public: - using param_t = operators::FeedParam; - - void Run() override { - auto ¶m = Param(); - LOG(INFO) << "feed_list.size: " << param.feed_list->size(); - LOG(INFO) << "col " << param.col; - const lite::Tensor &feed_item = (*param.feed_list)[0]; - param.out->ShareDataWith(feed_item); - LOG(INFO) << "FEED input " << feed_item << " col " << param.col; - LOG(INFO) << "FEED output " << *param.out; - } -}; - -} // namespace arm -} // namespace kernels -} // namespace lite -} // namespace paddle - -REGISTER_LITE_KERNEL(feed, kARM, kAny, kAny, - paddle::lite::kernels::arm::FeedCompute, def) - .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) - .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) - .Finalize(); diff --git a/paddle/fluid/lite/kernels/arm/fetch_compute.cc b/paddle/fluid/lite/kernels/arm/fetch_compute.cc deleted file mode 100644 index ca491ba92dcb53ad50f0bfc2873d5b82bd4a422a..0000000000000000000000000000000000000000 --- a/paddle/fluid/lite/kernels/arm/fetch_compute.cc +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/lite/core/op_registry.h" -#include "paddle/fluid/lite/core/type_system.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace arm { - -class FetchCompute - : public KernelLite { - public: - using param_t = operators::FeedParam; - - void Run() override { - auto& param = Param(); - auto* fetch_list = param.fetch_list; - if (fetch_list->size() <= static_cast(param.col)) { - fetch_list->resize(param.col + 1); - } - - auto& dst = fetch_list->at(param.col); - dst.ShareDataWith(*param.input); - } -}; - -} // namespace arm -} // namespace kernels -} // namespace lite -} // namespace paddle - -REGISTER_LITE_KERNEL(fetch, kARM, kAny, kAny, - paddle::lite::kernels::arm::FetchCompute, def) - .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny), - DATALAYOUT(kAny), -1)}) - .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny), - DATALAYOUT(kAny), -1)}) - .Finalize(); diff --git a/paddle/fluid/lite/kernels/arm/mul_compute.cc b/paddle/fluid/lite/kernels/arm/mul_compute.cc index 5e867f2dc3ed448fc251a1a1a645f9ed9bc651cb..ff12b236031896cfd8503903327ab1141b5171ae 100644 --- a/paddle/fluid/lite/kernels/arm/mul_compute.cc +++ b/paddle/fluid/lite/kernels/arm/mul_compute.cc @@ -59,9 +59,6 @@ class MulCompute : public KernelLite { mul_compute_eigen(param.x->data(), x_shape.x, x_shape.y, // param.y->data(), y_shape.x, y_shape.y, // param.output->mutable_data()); - LOG(INFO) << "MUL x " << *param.x; - LOG(INFO) << "MUL W " << *param.y; - LOG(INFO) << "MUL out " << *param.output; } virtual ~MulCompute() = default; diff --git a/paddle/fluid/lite/kernels/host/CMakeLists.txt b/paddle/fluid/lite/kernels/host/CMakeLists.txt index 81c82abbf6b4b37b4741733698c73bc158494d65..5642d4d9d07e5248fe329b3db95015a4d8efd74d 100644 --- a/paddle/fluid/lite/kernels/host/CMakeLists.txt +++ b/paddle/fluid/lite/kernels/host/CMakeLists.txt @@ -1,8 +1,4 @@ message(STATUS "compile with lite host kernels") -cc_library(fc_compute_host SRCS fc_compute.cc DEPS ${lite_kernel_deps} eigen3) -cc_library(relu_compute_host SRCS relu_compute.cc DEPS ${lite_kernel_deps}) -cc_library(mul_compute_host SRCS mul_compute.cc DEPS ${lite_kernel_deps} eigen3) -cc_library(scale_compute_host SRCS scale_compute.cc DEPS ${lite_kernel_deps} eigen3) cc_library(feed_compute_host SRCS feed_compute.cc DEPS ${lite_kernel_deps}) cc_library(fetch_compute_host SRCS fetch_compute.cc DEPS ${lite_kernel_deps}) @@ -10,11 +6,6 @@ cc_library(fetch_compute_host SRCS fetch_compute.cc DEPS ${lite_kernel_deps}) set(host_kernels feed_compute_host fetch_compute_host - fc_compute_host - relu_compute_host - mul_compute_host - scale_compute_host ) set(host_kernels "${host_kernels}" CACHE INTERNAL "host kernels") - diff --git a/paddle/fluid/lite/kernels/host/fc_compute.cc b/paddle/fluid/lite/kernels/host/fc_compute.cc deleted file mode 100644 index ae5b23ce3ece54b238ff55d24191224417d5acf2..0000000000000000000000000000000000000000 --- a/paddle/fluid/lite/kernels/host/fc_compute.cc +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/lite/kernels/host/fc_compute.h" -#include -#include "paddle/fluid/lite/core/op_registry.h" -#include "paddle/fluid/lite/core/type_system.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace host { - -// NOTE should use pure std C++ implementation. -void FcCompute::Run() { - auto& param = this->Param(); - - CHECK_GE(param.input->dims().size(), 2UL); - CHECK_EQ(param.output->dims().size(), 2UL); - - fc_compute_eigen( - param.input->data(), // x - param.input->dims().Slice(0, param.in_num_col_dims).production(), - param.input->dims() - .Slice(param.in_num_col_dims, param.input->dims().size()) - .production(), - param.w->data(), // w - param.w->dims()[1], // w_w - param.w->dims()[0], // w_h - param.bias->data(), // b - param.output->mutable_data()); -} - -// TargetType FcCompute::target() const { return TARGET(kHost); } - -// PrecisionType FcCompute::precision() const { return PRECISION(kFloat); } - -} // namespace host -} // namespace kernels -} // namespace lite -} // namespace paddle - -REGISTER_LITE_KERNEL(fc, kHost, kFloat, kNCHW, - paddle::lite::kernels::host::FcCompute, def) - .BindInput("Input", {LiteType::GetTensorTy(TARGET(kHost))}) - .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kHost))}) - .BindInput("W", {LiteType::GetTensorTy(TARGET(kHost))}) - .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))}) - .Finalize(); diff --git a/paddle/fluid/lite/kernels/host/fc_compute.h b/paddle/fluid/lite/kernels/host/fc_compute.h deleted file mode 100644 index 1a6c4eb4c0fbe1398bf6e2356eb902a112c578b9..0000000000000000000000000000000000000000 --- a/paddle/fluid/lite/kernels/host/fc_compute.h +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include -#include "paddle/fluid/lite/core/kernel.h" -#include "paddle/fluid/lite/operators/fc_op.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace host { - -class FcCompute : public KernelLite { - public: - using param_t = operators::FcParam; - - void Run() override; - - // TargetType target() const override; - // PrecisionType precision() const override; - - virtual ~FcCompute() = default; -}; - -template -void fc_compute_eigen(const T* x, int x_w, int x_h, // - const T* w, int w_w, int w_h, // - const T* b, // - T* out) { - using matrix_t = - Eigen::Matrix; - - Eigen::Map X(x, x_h, x_w); - Eigen::Map W(w, w_h, w_w); - Eigen::Map Out(out, x_h, w_h); - - Out = X * W.transpose(); - - if (b) { - Eigen::Map> B(b, w_h); - Out = Out.array().rowwise() + B.transpose().array(); - } -} - -template -__attribute__((optimize("unroll-loops"))) // -T dot(const T* x, const T* y, int dim) { - T out{}; - for (int i = 0; i < dim; i++) { - out += x[i] * y[i]; - } - return out; -} - -template -void fc_compute_naive(const T* x, int x_w, int x_h, // - const T* w, int w_w, int w_h, // - const T* b, // - T* out) { - CHECK_EQ(x_w, w_w); - // out shape: (x_h, w_w) - memset(out, 0, x_h * w_h * sizeof(T)); - - for (int r = 0; r < x_h; r++) { - for (int c = 0; c < w_h; c++) { - out[r * w_h + c] = dot(&x[r * x_w], &w[c * w_w], w_w) + b[c]; - } - } -} - -} // namespace host -} // namespace kernels -} // namespace lite -} // namespace paddle diff --git a/paddle/fluid/lite/kernels/host/fc_compute_test.cc b/paddle/fluid/lite/kernels/host/fc_compute_test.cc deleted file mode 100644 index 69b0450900e34ee7d6e98054682c77d89c945ec4..0000000000000000000000000000000000000000 --- a/paddle/fluid/lite/kernels/host/fc_compute_test.cc +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/lite/kernels/host/fc_compute.h" -#include -#include -#include "paddle/fluid/lite/core/op_registry.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace host { - -TEST(fc_compute_naive, test) { - lite::Tensor x, w, b, out, out1; - const int batch_size = 2; - x.Resize({batch_size, 3}); - w.Resize({4, 3}); - b.Resize({1, 4}); - out.Resize({batch_size, 4}); - out1.Resize({batch_size, 4}); - - auto x_data = x.mutable_data(); - auto w_data = w.mutable_data(); - auto b_data = b.mutable_data(); - auto out_data = out.mutable_data(); - auto out_data1 = out1.mutable_data(); - - for (int i = 0; i < product(x.dims()); i++) x_data[i] = i; - for (int i = 0; i < product(w.dims()); i++) w_data[i] = i; - for (int i = 0; i < product(b.dims()); i++) b_data[i] = i; - - fc_compute_naive(x_data, 3, batch_size, // - w_data, 3, 4, // - b_data, out_data); - fc_compute_eigen(x_data, 3, batch_size, // - w_data, 3, 4, // - b_data, out_data1); - - for (int i = 0; i < product(out.dims()); i++) { - EXPECT_NEAR(out_data[0], out_data1[0], 1e-6); - } -} - -TEST(fc_host, init) { - FcCompute fc; - ASSERT_EQ(fc.precision(), PRECISION(kFloat)); - ASSERT_EQ(fc.target(), TARGET(kHost)); -} - -TEST(fc_host, algorithm) { - using matrix_t = Eigen::Matrix; - using matrix_map_t = Eigen::Map; - - // dim 10, 20 - std::vector input(10 * 20); - std::vector w(20 * 20); - std::vector output(10 * 20); - - Eigen::Map input_mat(input.data(), 10, 20); - Eigen::Map weight_mat(w.data(), 20, 20); - matrix_map_t output_mat(output.data(), 10, 20); - - output_mat = weight_mat.transpose() * input_mat; -} - -TEST(fc_host, compute) { - FcCompute fc; - operators::FcParam param; - - lite::Tensor x; - lite::Tensor w; - lite::Tensor bias; - lite::Tensor output; - - x.Resize(DDim(std::vector({1, 10, 20}))); - w.Resize(DDim(std::vector({20, 20}))); - bias.Resize(DDim(std::vector({1, 10}))); - output.Resize(DDim(std::vector({10, 20}))); - - auto* x_data = x.mutable_data(); - auto* w_data = w.mutable_data(); - auto* bias_data = bias.mutable_data(); - auto* output_data = output.mutable_data(); - - for (int i = 0; i < 10 * 20; i++) x_data[i] = i; - for (int i = 0; i < 20 * 20; i++) w_data[i] = i; - for (int i = 0; i < 10; i++) bias_data[i] = i; - for (int i = 0; i < 10 * 20; i++) output_data[i] = 0; - - param.in_num_col_dims = 2; - param.input = &x; - param.w = &w; - param.bias = &bias; - param.output = &output; - param.in_mat_dims = x.dims(); - - fc.SetParam(param); - fc.Run(); - - LOG(INFO) << "x"; - for (int i = 0; i < 10 * 20; i++) LOG(INFO) << x_data[i]; - - LOG(INFO) << "output:"; - for (int i = 0; i < 10 * 20; i++) LOG(INFO) << output.data()[i]; -} - -TEST(fc, retrive_op) { - auto fc = - KernelRegistry::Global().Create("fc"); - ASSERT_TRUE(fc); -} - -} // namespace host -} // namespace kernels -} // namespace lite -} // namespace paddle - -USE_LITE_KERNEL(fc, kHost, kFloat, kNCHW, def); diff --git a/paddle/fluid/lite/kernels/host/feed_compute.cc b/paddle/fluid/lite/kernels/host/feed_compute.cc index ba503c577f409991c9b3e01f4019aad0fcccc43e..7bbd648c20d3f783a1167bc2c9d5e8f7ba79d29d 100644 --- a/paddle/fluid/lite/kernels/host/feed_compute.cc +++ b/paddle/fluid/lite/kernels/host/feed_compute.cc @@ -27,12 +27,12 @@ class FeedCompute void Run() override { auto ¶m = Param(); - LOG(INFO) << "feed_list.size: " << param.feed_list->size(); - LOG(INFO) << "col " << param.col; + VLOG(4) << "feed_list.size: " << param.feed_list->size(); + VLOG(4) << "col " << param.col; const lite::Tensor &feed_item = (*param.feed_list)[0]; param.out->ShareDataWith(feed_item); - LOG(INFO) << "FEED input " << feed_item << " col " << param.col; - LOG(INFO) << "FEED output " << *param.out; + VLOG(4) << "FEED input " << feed_item << " col " << param.col; + VLOG(4) << "FEED output " << *param.out; } }; diff --git a/paddle/fluid/lite/kernels/host/mul_compute.cc b/paddle/fluid/lite/kernels/host/mul_compute.cc deleted file mode 100644 index 2bb509c86ac8363f9b027a27f2938c3b353d9e62..0000000000000000000000000000000000000000 --- a/paddle/fluid/lite/kernels/host/mul_compute.cc +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include "paddle/fluid/lite/core/kernel.h" -#include "paddle/fluid/lite/core/op_registry.h" -#include "paddle/fluid/lite/core/types.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace host { - -template -void mul_compute_eigen(const T* x, int x_h, int x_w, const T* y, int y_h, - int y_w, T* out) { - using matrix_t = - Eigen::Matrix; - - Eigen::Map X(x, x_h, x_w); - Eigen::Map Y(y, y_h, y_w); - Eigen::Map Out(out, x_h, y_w); - - Out = X * Y; -} - -class MulCompute : public KernelLite { - public: - using param_t = operators::MulParam; - - void Run() override { - auto& param = Param(); - core::dim2 x_shape( - {static_cast( - param.x->dims().Slice(0, param.x_num_col_dims).production()), - static_cast( - param.x->dims() - .Slice(param.x_num_col_dims, param.x->dims().size()) - .production())}); - core::dim2 y_shape( - {static_cast( - param.y->dims().Slice(0, param.y_num_col_dims).production()), - static_cast( - param.y->dims() - .Slice(param.y_num_col_dims, param.y->dims().size()) - .production())}); - - mul_compute_eigen(param.x->data(), x_shape.x, x_shape.y, // - param.y->data(), y_shape.x, y_shape.y, // - param.output->mutable_data()); - LOG(INFO) << "MUL x " << *param.x; - LOG(INFO) << "MUL W " << *param.y; - LOG(INFO) << "MUL out " << *param.output; - } - - virtual ~MulCompute() = default; -}; - -} // namespace host -} // namespace kernels -} // namespace lite -} // namespace paddle - -REGISTER_LITE_KERNEL(mul, kHost, kFloat, kNCHW, - paddle::lite::kernels::host::MulCompute, def) - .BindInput("X", {LiteType::GetTensorTy(TARGET(kHost))}) - .BindInput("Y", {LiteType::GetTensorTy(TARGET(kHost))}) - .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))}) - .Finalize(); diff --git a/paddle/fluid/lite/kernels/host/relu_compute.h b/paddle/fluid/lite/kernels/host/relu_compute.h deleted file mode 100644 index aae9e161aabe30c22c7a12953b98d094a9c6637f..0000000000000000000000000000000000000000 --- a/paddle/fluid/lite/kernels/host/relu_compute.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include -#include "paddle/fluid/lite/core/kernel.h" -#include "paddle/fluid/lite/core/op_registry.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace host { - -class ReluCompute : public KernelLite { - public: - void Run() override { - auto& param = Param(); - auto n = param.input->dims().production(); - const float* input = param.input->data(); - float* output = param.output->mutable_data(); - for (int i = 0; i < n; i++) { - output[i] = std::max(0.f, input[i]); - } - } - - TargetType target() const override { return TARGET(kHost); } - PrecisionType precision() const override { return PRECISION(kFloat); } -}; - -} // namespace host -} // namespace kernels -} // namespace lite -} // namespace paddle - -REGISTER_LITE_KERNEL(relu, kHost, kFloat, kNCHW, - paddle::lite::kernels::host::ReluCompute, def) - .Finalize(); diff --git a/paddle/fluid/lite/kernels/host/scale_compute.cc b/paddle/fluid/lite/kernels/host/scale_compute.cc deleted file mode 100644 index 3fc542646ba7aef6f38c6b2e6e6ee35b3a774c32..0000000000000000000000000000000000000000 --- a/paddle/fluid/lite/kernels/host/scale_compute.cc +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include "paddle/fluid/lite/core/kernel.h" -#include "paddle/fluid/lite/core/op_registry.h" -#include "paddle/fluid/lite/core/types.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace host { - -template -void scale_compute(const T* x, T* out, int size, float scale, float bias, - bool bias_before) { - if (bias_before) bias *= scale; - for (int i = 0; i < size; i++) { - out[i] = x[i] * scale + bias; - } -} - -class ScaleCompute : public KernelLite { - public: - using param_t = operators::MulParam; - - void Run() override { - auto& param = Param(); - scale_compute(param.x->data(), param.output->mutable_data(), - param.x->dims().production(), param.scale, param.bias, - param.bias_after_scale); - } - - virtual ~ScaleCompute() = default; -}; - -} // namespace host -} // namespace kernels -} // namespace lite -} // namespace paddle - -REGISTER_LITE_KERNEL(scale, kHost, kFloat, kNCHW, - paddle::lite::kernels::host::ScaleCompute, def) - .BindInput("X", {LiteType::GetTensorTy(TARGET(kHost))}) - .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))}) - .Finalize(); diff --git a/paddle/fluid/lite/kernels/host/use_kernels.h b/paddle/fluid/lite/kernels/host/use_kernels.h index e9e9c88c624d803d1eea2679c2dfe688ca46f514..52e087cdfa0b938841b93c0d8aabf16ff68419ef 100644 --- a/paddle/fluid/lite/kernels/host/use_kernels.h +++ b/paddle/fluid/lite/kernels/host/use_kernels.h @@ -15,8 +15,5 @@ #pragma once #include "paddle/fluid/lite/core/op_registry.h" -USE_LITE_KERNEL(fc, kHost, kFloat, kNCHW, def); -USE_LITE_KERNEL(mul, kHost, kFloat, kNCHW, def); -USE_LITE_KERNEL(scale, kHost, kFloat, kNCHW, def); USE_LITE_KERNEL(feed, kHost, kAny, kAny, def); USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def); diff --git a/paddle/fluid/lite/kernels/x86/CMakeLists.txt b/paddle/fluid/lite/kernels/x86/CMakeLists.txt index b4ac7c7f790eb893a4e6f7375f62de5a0046b333..75133f19f448bedfdafc303ff7f33ca3ced15ab4 100644 --- a/paddle/fluid/lite/kernels/x86/CMakeLists.txt +++ b/paddle/fluid/lite/kernels/x86/CMakeLists.txt @@ -3,5 +3,4 @@ if(NOT LITE_WITH_X86) endif() cc_library(activation_compute SRCS activation_compute.cc DEPS ${lite_kernel_deps} activation_op) -cc_library(elementwise_compute SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} elementwise_op) - +cc_library(elementwise_compute SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} elementwise_sub_op) diff --git a/paddle/fluid/lite/operators/CMakeLists.txt b/paddle/fluid/lite/operators/CMakeLists.txt index 2a0f000cb63302e7cc8ec0de61f74540cc14fec1..1703686e4a110ba65ec17d276b05669edcc05eab 100644 --- a/paddle/fluid/lite/operators/CMakeLists.txt +++ b/paddle/fluid/lite/operators/CMakeLists.txt @@ -21,5 +21,5 @@ set(ops_lite io_copy_op_lite PARENT_SCOPE) -lite_cc_test(test_fc_op_lite SRCS fc_op_test.cc DEPS fc_op_lite fc_compute_host) +lite_cc_test(test_fc_op_lite SRCS fc_op_test.cc DEPS fc_op_lite)