From 033ef5e94f01530027edbc666717b62814229f56 Mon Sep 17 00:00:00 2001 From: ronnywang Date: Wed, 13 Jul 2022 11:33:20 +0800 Subject: [PATCH] [CustomKernel] capi add eager mode support (#44164) * [CustomKernel] add capi eager mode support * add ut * add capi test --- paddle/phi/backends/custom/CMakeLists.txt | 4 + paddle/phi/backends/custom/capi_test.cc | 78 +++++ paddle/phi/capi/include/c_kernel_context.h | 20 ++ paddle/phi/capi/include/c_tensor.h | 4 + paddle/phi/capi/include/kernel_registry.h | 124 ++++++- paddle/phi/capi/include/kernel_utils.h | 329 ++++++++++++------ paddle/phi/capi/lib/c_kernel_context.cc | 85 +++++ paddle/phi/capi/lib/c_tensor.cc | 15 + .../fluid/tests/custom_runtime/CMakeLists.txt | 3 +- .../custom_runtime/custom_cpu_runtime.cc | 215 ------------ .../tests/custom_runtime/custom_cpu_setup.py | 82 ----- .../custom_runtime/test_custom_cpu_plugin.py | 131 +++++++ .../test_custom_device_data_loader.py | 66 ---- 13 files changed, 675 insertions(+), 481 deletions(-) create mode 100644 paddle/phi/backends/custom/capi_test.cc delete mode 100644 python/paddle/fluid/tests/custom_runtime/custom_cpu_runtime.cc delete mode 100644 python/paddle/fluid/tests/custom_runtime/custom_cpu_setup.py create mode 100644 python/paddle/fluid/tests/custom_runtime/test_custom_cpu_plugin.py delete mode 100644 python/paddle/fluid/tests/custom_runtime/test_custom_device_data_loader.py diff --git a/paddle/phi/backends/custom/CMakeLists.txt b/paddle/phi/backends/custom/CMakeLists.txt index d8ed6706eb..ceff429f8e 100644 --- a/paddle/phi/backends/custom/CMakeLists.txt +++ b/paddle/phi/backends/custom/CMakeLists.txt @@ -11,4 +11,8 @@ if(WITH_CUSTOM_DEVICE) custom_device_test SRCS custom_device_test.cc DEPS device_manager device_context) + cc_test( + capi_test + SRCS capi_test.cc + DEPS phi_capi) endif() diff --git a/paddle/phi/backends/custom/capi_test.cc b/paddle/phi/backends/custom/capi_test.cc new file mode 100644 index 0000000000..90b01d0e36 --- /dev/null +++ b/paddle/phi/backends/custom/capi_test.cc @@ -0,0 +1,78 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include + +#include "paddle/phi/capi/all.h" + +#ifndef UNUSED +#define UNUSED __attribute__((unused)) +#endif + +#include "paddle/phi/capi/capi.h" + +TEST(CustomKernel, CAPI) { + std::string str = "capi"; + EXPECT_EQ(str.data(), PD_StringAttr(&str)); + + std::vector int32_vec({1, 2, 3}); + auto int32_list = PD_ListInt32Attr(&int32_vec); + EXPECT_EQ(int32_list.data, int32_vec.data()); + EXPECT_EQ(int32_list.size, int32_vec.size()); + + std::vector int64_vec({1, 2, 3}); + auto int64_list = PD_ListInt64Attr(&int64_vec); + EXPECT_EQ(int64_list.data, int64_vec.data()); + EXPECT_EQ(int64_list.size, int64_vec.size()); + + std::vector float_vec({1, 2, 3}); + auto float_list = PD_ListFloatAttr(&float_vec); + EXPECT_EQ(float_list.data, float_vec.data()); + EXPECT_EQ(float_list.size, float_vec.size()); + + std::vector double_vec({1, 2, 3}); + auto double_list = PD_ListDoubleAttr(&double_vec); + EXPECT_EQ(double_list.data, double_vec.data()); + EXPECT_EQ(double_list.size, double_vec.size()); + + std::vector string_vec{"capi", "api"}; + auto string_list = PD_ListStringAttr(&string_vec); + auto string_data = reinterpret_cast(string_list.data); + for (size_t i = 0; i < string_vec.size(); ++i) { + EXPECT_EQ(string_data[i], string_vec[i].data()); + } + + std::vector bool_vec{true, false, true}; + auto bool_list = PD_ListBoolAttr(&bool_vec); + auto bool_data = reinterpret_cast(bool_list.data); + for (size_t i = 0; i < bool_vec.size(); ++i) { + EXPECT_EQ(bool_data[i], static_cast(bool_vec[i])); + } + + std::vector ptr_vec; + for (size_t i = 0; i < float_vec.size(); ++i) { + ptr_vec.push_back(&float_vec[i]); + } + auto ptr_list = PD_TensorVectorToList(reinterpret_cast(&ptr_vec)); + EXPECT_EQ(ptr_list.data, ptr_vec.data()); + EXPECT_EQ(ptr_list.size, ptr_vec.size()); +} + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/paddle/phi/capi/include/c_kernel_context.h b/paddle/phi/capi/include/c_kernel_context.h index c06cb3cd30..a5524e3aee 100644 --- a/paddle/phi/capi/include/c_kernel_context.h +++ b/paddle/phi/capi/include/c_kernel_context.h @@ -87,6 +87,26 @@ PD_List PD_KernelContextListScalarAttrAt(PD_KernelContext *ctx, size_t index); PD_Place *PD_KernelContextPlaceAttrAt(PD_KernelContext *ctx, size_t index); +const char *PD_StringAttr(void *attr); + +PD_DataType PD_DatatTypeAttr(void *attr); + +PD_DataLayout PD_DatatLayoutAttr(void *attr); + +PD_List PD_ListInt32Attr(void *attr); + +PD_List PD_ListInt64Attr(void *attr); + +PD_List PD_ListFloatAttr(void *attr); + +PD_List PD_ListDoubleAttr(void *attr); + +PD_List PD_ListScalarAttr(void *attr); + +PD_List PD_ListStringAttr(void *attr); + +PD_List PD_ListBoolAttr(void *attr); + #ifdef __cplusplus } // extern "C" #endif diff --git a/paddle/phi/capi/include/c_tensor.h b/paddle/phi/capi/include/c_tensor.h index 494346713c..35ac7dda39 100644 --- a/paddle/phi/capi/include/c_tensor.h +++ b/paddle/phi/capi/include/c_tensor.h @@ -82,6 +82,10 @@ void PD_TensorShareLoDWith(PD_Tensor *dst, const PD_Tensor *src, PD_Status *status); +PD_Tensor *PD_OptionalTensorGetPointer(PD_Tensor *tensor); + +PD_List PD_TensorVectorToList(PD_Tensor *tensor); + #ifdef __cplusplus } // extern "C" #endif diff --git a/paddle/phi/capi/include/kernel_registry.h b/paddle/phi/capi/include/kernel_registry.h index 37b045a606..47ddc0bf5b 100644 --- a/paddle/phi/capi/include/kernel_registry.h +++ b/paddle/phi/capi/include/kernel_registry.h @@ -19,7 +19,129 @@ namespace phi { namespace capi { +// eager mode +inline std::vector PD_TensorVector(PD_Tensor *tensor) { + std::vector ret; + auto list = PD_TensorVectorToList(tensor); + auto data = reinterpret_cast(list.data); + for (size_t i = 0; i < list.size; ++i) { + ret.emplace_back(data[i]); + } + return ret; +} + +inline paddle::optional PD_OptionalTensor( + PD_Tensor *tensor) { + auto ptr = PD_OptionalTensorGetPointer(tensor); + return ptr ? paddle::optional( + phi::capi::DenseTensor(ptr)) + : paddle::optional(paddle::none); +} + +template +inline T PD_Attr(void *attr) { + return *reinterpret_cast(attr); +} + +template <> +inline std::string PD_Attr(void *attr) { + return PD_StringAttr(attr); +} + +template <> +inline PD_DataType PD_Attr(void *attr) { + return PD_DatatTypeAttr(attr); +} + +template <> +inline PD_DataLayout PD_Attr(void *attr) { + return PD_DatatLayoutAttr(attr); +} + +template <> +inline std::vector PD_Attr>(void *attr) { + auto list = PD_ListInt32Attr(attr); + auto data = reinterpret_cast(list.data); + std::vector cc_list(data, data + list.size); + return cc_list; +} + +template <> +inline std::vector PD_Attr>(void *attr) { + auto list = PD_ListInt64Attr(attr); + auto data = reinterpret_cast(list.data); + std::vector cc_list(data, data + list.size); + return cc_list; +} + +template <> +inline std::vector PD_Attr>(void *attr) { + auto list = PD_ListFloatAttr(attr); + auto data = reinterpret_cast(list.data); + std::vector cc_list(data, data + list.size); + return cc_list; +} + +template <> +inline std::vector PD_Attr>(void *attr) { + auto list = PD_ListDoubleAttr(attr); + auto data = reinterpret_cast(list.data); + std::vector cc_list(data, data + list.size); + return cc_list; +} + +template <> +inline phi::capi::Scalar PD_Attr(void *attr) { + return phi::capi::Scalar(reinterpret_cast(attr)); +} + +template <> +inline phi::capi::IntArray PD_Attr(void *attr) { + return phi::capi::IntArray(reinterpret_cast(attr)); +} + +template <> +inline phi::capi::Place PD_Attr(void *attr) { + return phi::capi::Place(reinterpret_cast(attr)); +} + +template <> +inline std::vector PD_Attr>( + void *attr) { + auto c_list = PD_ListScalarAttr(attr); + auto data = reinterpret_cast(c_list.data); + std::vector list; + for (size_t i = 0; i < c_list.size; ++i) { + list.emplace_back(data[i]); + } + PD_DeletePointerList(c_list); + return list; +} +template <> +inline std::vector PD_Attr>(void *attr) { + auto c_list = PD_ListStringAttr(attr); + auto data = reinterpret_cast(c_list.data); + std::vector list; + for (size_t i = 0; i < c_list.size; ++i) { + list.emplace_back(data[i]); + } + PD_DeletePointerList(c_list); + return list; +} + +template <> +inline std::vector PD_Attr>(void *attr) { + auto c_list = PD_ListBoolAttr(attr); + std::vector list; + auto data = reinterpret_cast(c_list.data); + for (size_t i = 0; i < c_list.size; ++i) { + list[i] = static_cast(data[i]); + } + PD_DeleteUInt8List(c_list); + return list; +} +// inline phi::capi::DeviceContext PD_GetDeviceContext(PD_KernelContext *ctx) { return phi::capi::DeviceContext(PD_KernelContextGetDeviceContext(ctx)); } @@ -189,7 +311,7 @@ inline std::vector PD_AttrAt>( template <> inline std::vector PD_AttrAt>( PD_KernelContext *ctx, size_t index) { - auto c_list = PD_KernelContextListScalarAttrAt(ctx, index); + auto c_list = PD_KernelContextListStringAttrAt(ctx, index); auto data = reinterpret_cast(c_list.data); std::vector list; for (size_t i = 0; i < c_list.size; ++i) { diff --git a/paddle/phi/capi/include/kernel_utils.h b/paddle/phi/capi/include/kernel_utils.h index 7302e6f467..246bc9e3c5 100644 --- a/paddle/phi/capi/include/kernel_utils.h +++ b/paddle/phi/capi/include/kernel_utils.h @@ -454,47 +454,67 @@ namespace capi { meta_kernel_fn, \ __VA_ARGS__)) -#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_DEVICE_CONTEXT(dev_ctx) \ +#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_DEVICE_CONTEXT(dev_ctx) \ + template \ + struct CustomKernelCallHelper { \ + template \ + static void Compute(PD_KernelContext *ctx, PreviousArgs &...pargs) { \ + static_assert(in_idx == 0, \ + "Kernel's DeviceContext should appear before Inputs."); \ + static_assert( \ + attr_idx == 0, \ + "Kernel's DeviceContext should appear before Attributes."); \ + static_assert(out_idx == 0, \ + "Kernel's DeviceContext should appear before Outputs."); \ + dev_ctx arg = PD_GetDeviceContext(ctx); \ + CustomKernelCallHelper:: \ + template Compute( \ + ctx, pargs..., arg); \ + } \ + template \ + static void VariadicCompute(const std::tuple &ctx, \ + PreviousArgs &...pargs) { \ + const dev_ctx &arg = std::get(ctx); \ + auto dev_ctx_wrapper = phi::capi::DeviceContext( \ + reinterpret_cast(const_cast(&arg))); \ + return CustomKernelCallHelper::template VariadicCompute( \ + ctx, pargs..., dev_ctx_wrapper); \ + } \ + } + +#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_INPUT(tensor_type) \ template \ - struct CustomKernelCallHelper { \ + struct CustomKernelCallHelper { \ template \ static void Compute(PD_KernelContext *ctx, PreviousArgs &...pargs) { \ - static_assert(in_idx == 0, \ - "Kernel's DeviceContext should appear before Inputs."); \ - static_assert( \ - attr_idx == 0, \ - "Kernel's DeviceContext should appear before Attributes."); \ + static_assert(attr_idx == 0, \ + "Kernel's Input should appear before Attributes."); \ static_assert(out_idx == 0, \ - "Kernel's DeviceContext should appear before Outputs."); \ - dev_ctx arg = PD_GetDeviceContext(ctx); \ + "Kernel's Input should appear before Outputs."); \ + const tensor_type arg = PD_InputAt(ctx, in_idx); \ CustomKernelCallHelper:: \ - template Compute( \ + template Compute( \ ctx, pargs..., arg); \ } \ - } - -#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_INPUT(tensor_type) \ - template \ - struct CustomKernelCallHelper { \ - template \ - static void Compute(PD_KernelContext *ctx, PreviousArgs &...pargs) { \ - static_assert(attr_idx == 0, \ - "Kernel's Input should appear before Attributes."); \ - static_assert(out_idx == 0, \ - "Kernel's Input should appear before Outputs."); \ - const tensor_type arg = PD_InputAt(ctx, in_idx); \ - CustomKernelCallHelper:: \ - template Compute( \ - ctx, pargs..., arg); \ - } \ + template \ + static void VariadicCompute(const std::tuple &ctx, \ + PreviousArgs &...pargs) { \ + const tensor_type &arg = std::get(ctx); \ + auto tensor = phi::capi::DenseTensor( \ + reinterpret_cast(const_cast(&arg))); \ + return CustomKernelCallHelper::template VariadicCompute( \ + ctx, pargs..., tensor); \ + } \ } #define PD_SPECIALIZE_CustomKernelCallHelper_FOR_OPTIONAL_INPUT(tensor_type) \ @@ -516,99 +536,168 @@ namespace capi { template Compute( \ ctx, pargs..., arg); \ } \ + template \ + static void VariadicCompute(const std::tuple &ctx, \ + PreviousArgs &...pargs) { \ + auto &arg = std::get(ctx); \ + paddle::optional tensor = \ + PD_OptionalTensor(reinterpret_cast( \ + const_cast *>(&arg))); \ + return CustomKernelCallHelper::template VariadicCompute( \ + ctx, pargs..., tensor); \ + } \ } -#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_MULTI_INPUT(tensor_type) \ - template \ - struct CustomKernelCallHelper &, \ - Tail...> { \ - template \ - static void Compute(PD_KernelContext *ctx, PreviousArgs &...pargs) { \ - static_assert(attr_idx == 0, \ - "Kernel's Input should appear before Attributes."); \ - static_assert(out_idx == 0, \ - "Kernel's Input should appear before Outputs."); \ - auto arg = PD_MultiInputAt(ctx, in_idx); \ - auto arg_wrapper = PD_GetPointerVector(&arg); \ - CustomKernelCallHelper:: \ - template Compute( \ - ctx, pargs..., arg_wrapper); \ - } \ +#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_MULTI_INPUT(tensor_type) \ + template \ + struct CustomKernelCallHelper &, \ + Tail...> { \ + template \ + static void Compute(PD_KernelContext *ctx, PreviousArgs &...pargs) { \ + static_assert(attr_idx == 0, \ + "Kernel's Input should appear before Attributes."); \ + static_assert(out_idx == 0, \ + "Kernel's Input should appear before Outputs."); \ + auto arg = PD_MultiInputAt(ctx, in_idx); \ + auto arg_wrapper = PD_GetPointerVector(&arg); \ + CustomKernelCallHelper:: \ + template Compute( \ + ctx, pargs..., arg_wrapper); \ + } \ + template \ + static void VariadicCompute(const std::tuple &ctx, \ + PreviousArgs &...pargs) { \ + auto &arg = std::get(ctx); \ + auto tensor = PD_TensorVector(reinterpret_cast( \ + const_cast *>(&arg))); \ + auto tensor_ptr_vec = PD_GetPointerVector(&arg); \ + return CustomKernelCallHelper::template VariadicCompute( \ + ctx, pargs..., tensor_ptr_vec); \ + } \ } -#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_ATTRIBUTE(attr_type) \ - template \ - struct CustomKernelCallHelper { \ - template \ - static void Compute(PD_KernelContext *ctx, PreviousArgs &...pargs) { \ - static_assert(out_idx == 0, \ - "Kernel's Attributes should appear before Outputs."); \ - attr_type arg = PD_AttrAt(ctx, attr_idx); \ - CustomKernelCallHelper:: \ - template Compute( \ - ctx, pargs..., arg); \ - } \ +#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_ATTRIBUTE(attr_type) \ + template \ + struct CustomKernelCallHelper { \ + template \ + static void Compute(PD_KernelContext *ctx, PreviousArgs &...pargs) { \ + static_assert(out_idx == 0, \ + "Kernel's Attributes should appear before Outputs."); \ + attr_type arg = PD_AttrAt(ctx, attr_idx); \ + CustomKernelCallHelper:: \ + template Compute( \ + ctx, pargs..., arg); \ + } \ + template \ + static void VariadicCompute(const std::tuple &ctx, \ + PreviousArgs &...pargs) { \ + auto &arg = std::get(ctx); \ + auto attr = PD_Attr(reinterpret_cast(&arg)); \ + return CustomKernelCallHelper::template VariadicCompute( \ + ctx, pargs..., attr); \ + } \ } -#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_CONST_ATTRIBUTE_REF( \ - attr_type) \ - template \ - struct CustomKernelCallHelper { \ - template \ - static void Compute(PD_KernelContext *ctx, PreviousArgs &...pargs) { \ - static_assert(out_idx == 0, \ - "Kernel's Attributes should appear before Outputs."); \ - attr_type arg = PD_AttrAt(ctx, attr_idx); \ - CustomKernelCallHelper:: \ - template Compute( \ - ctx, pargs..., arg); \ - } \ +#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_CONST_ATTRIBUTE_REF( \ + attr_type) \ + template \ + struct CustomKernelCallHelper { \ + template \ + static void Compute(PD_KernelContext *ctx, PreviousArgs &...pargs) { \ + static_assert(out_idx == 0, \ + "Kernel's Attributes should appear before Outputs."); \ + attr_type arg = PD_AttrAt(ctx, attr_idx); \ + CustomKernelCallHelper:: \ + template Compute( \ + ctx, pargs..., arg); \ + } \ + template \ + static void VariadicCompute(const std::tuple &ctx, \ + PreviousArgs &...pargs) { \ + const attr_type &arg = std::get(ctx); \ + auto attr = PD_Attr( \ + reinterpret_cast(const_cast(&arg))); \ + return CustomKernelCallHelper::template VariadicCompute( \ + ctx, pargs..., attr); \ + } \ } -#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_OUTPUT(tensor_type) \ - template \ - struct CustomKernelCallHelper { \ - template \ - static void Compute(PD_KernelContext *ctx, PreviousArgs &...pargs) { \ - auto arg = PD_OutputAt(ctx, out_idx); \ - tensor_type *ptr = (arg.raw_data() ? &arg : nullptr); \ - CustomKernelCallHelper:: \ - template Compute( \ - ctx, pargs..., ptr); \ - } \ +#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_OUTPUT(tensor_type) \ + template \ + struct CustomKernelCallHelper { \ + template \ + static void Compute(PD_KernelContext *ctx, PreviousArgs &...pargs) { \ + auto arg = PD_OutputAt(ctx, out_idx); \ + tensor_type *ptr = (arg.raw_data() ? &arg : nullptr); \ + CustomKernelCallHelper:: \ + template Compute( \ + ctx, pargs..., ptr); \ + } \ + template \ + static void VariadicCompute(const std::tuple &ctx, \ + PreviousArgs &...pargs) { \ + tensor_type *arg = std::get(ctx); \ + auto tensor = \ + phi::capi::DenseTensor(reinterpret_cast(arg)); \ + auto tensor_ptr = tensor.raw_data() ? &tensor : nullptr; \ + return CustomKernelCallHelper::template VariadicCompute( \ + ctx, pargs..., tensor_ptr); \ + } \ } -#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_MULTI_OUTPUT(tensor_type) \ - template \ - struct CustomKernelCallHelper, Tail...> { \ - template \ - static void Compute(PD_KernelContext *ctx, PreviousArgs &...pargs) { \ - auto arg = PD_MultiOutputAt(ctx, out_idx); \ - auto arg_wrapper = PD_GetPointerVector(&arg); \ - CustomKernelCallHelper:: \ - template Compute( \ - ctx, pargs..., arg_wrapper); \ - } \ +#define PD_SPECIALIZE_CustomKernelCallHelper_FOR_MULTI_OUTPUT(tensor_type) \ + template \ + struct CustomKernelCallHelper, Tail...> { \ + template \ + static void Compute(PD_KernelContext *ctx, PreviousArgs &...pargs) { \ + auto arg = PD_MultiOutputAt(ctx, out_idx); \ + std::vector tensor_ptr_vec; \ + for (auto &tensor : arg) { \ + tensor_ptr_vec.push_back(tensor.raw_data() ? &tensor : nullptr); \ + } \ + CustomKernelCallHelper:: \ + template Compute( \ + ctx, pargs..., tensor_ptr_vec); \ + } \ + template \ + static void VariadicCompute(const std::tuple &ctx, \ + PreviousArgs &...pargs) { \ + std::vector &arg = std::get(ctx); \ + auto tensor_vec = PD_TensorVector(reinterpret_cast( \ + const_cast *>(&arg))); \ + std::vector tensor_ptr_vec; \ + for (auto &tensor : tensor_vec) { \ + tensor_ptr_vec.push_back(tensor.raw_data() ? &tensor : nullptr); \ + } \ + return CustomKernelCallHelper::template VariadicCompute( \ + ctx, pargs..., tensor_ptr_vec); \ + } \ } template @@ -627,9 +716,10 @@ struct CustomKernelImpl { template Compute<0, 0, 0, 0>(ctx); } - static void VariadicCompute(const phi::capi::DeviceContext &dev_ctx, - Args... args) { - return kernel_fn(static_cast(dev_ctx), std::forward(args)...); + static void VariadicCompute(DevCtx dev_ctx, Args... args) { + const std::tuple args_tuple(dev_ctx, args...); + return CustomKernelCallHelper>:: + template VariadicCompute<0>(args_tuple); } private: @@ -693,6 +783,13 @@ struct CustomKernelImpl { static_assert(out_idx > 0, "Kernel should have output argument."); return kernel_fn(dev_ctx, args...); } + + template + static void VariadicCompute(const std::tuple &ctx, + DevCtx dev_ctx, + Args... args) { + return kernel_fn(dev_ctx, args...); + } }; }; diff --git a/paddle/phi/capi/lib/c_kernel_context.cc b/paddle/phi/capi/lib/c_kernel_context.cc index 2e14b019c1..d38a19038e 100644 --- a/paddle/phi/capi/lib/c_kernel_context.cc +++ b/paddle/phi/capi/lib/c_kernel_context.cc @@ -220,4 +220,89 @@ PD_DataLayout PD_KernelContextDataLayoutAttrAt(PD_KernelContext* ctx, kernel_context->AttrAt(index)); } +// eager +const char* PD_StringAttr(void* attr) { + auto* str = reinterpret_cast(attr); + return str->c_str(); +} + +PD_DataType PD_DatatTypeAttr(void* attr) { + auto* dtype = reinterpret_cast(attr); + return phi::capi::ToPDDataType(*dtype); +} + +PD_DataLayout PD_DatatLayoutAttr(void* attr) { + auto* layout = reinterpret_cast(attr); + return phi::capi::ToPDDataLayout(*layout); +} + +PD_List PD_ListInt32Attr(void* attr) { + PD_List list; + const auto& cc_list = *reinterpret_cast*>(attr); + list.size = cc_list.size(); + list.data = const_cast(cc_list.data()); + return list; +} + +PD_List PD_ListInt64Attr(void* attr) { + PD_List list; + const auto& cc_list = *reinterpret_cast*>(attr); + list.size = cc_list.size(); + list.data = const_cast(cc_list.data()); + return list; +} + +PD_List PD_ListFloatAttr(void* attr) { + PD_List list; + const auto& cc_list = *reinterpret_cast*>(attr); + list.size = cc_list.size(); + list.data = const_cast(cc_list.data()); + return list; +} + +PD_List PD_ListDoubleAttr(void* attr) { + PD_List list; + const auto& cc_list = *reinterpret_cast*>(attr); + list.size = cc_list.size(); + list.data = const_cast(cc_list.data()); + return list; +} + +PD_List PD_ListScalarAttr(void* attr) { + PD_List list; + const auto& cc_list = *reinterpret_cast*>(attr); + list.size = cc_list.size(); + auto data = new PD_Scalar*[list.size]; + for (size_t i = 0; i < list.size; ++i) { + data[i] = + const_cast(reinterpret_cast(&cc_list[i])); + } + list.data = data; + return list; +} + +PD_List PD_ListStringAttr(void* attr) { + PD_List list; + const auto& cc_list = *reinterpret_cast*>(attr); + list.size = cc_list.size(); + auto data = new char*[list.size]; + for (size_t i = 0; i < list.size; ++i) { + data[i] = const_cast(cc_list[i].data()); + } + list.data = reinterpret_cast(data); + return list; +} + +PD_List PD_ListBoolAttr(void* attr) { + PD_List list; + const auto& cc_list = *reinterpret_cast*>(attr); + list.size = cc_list.size(); + auto data = reinterpret_cast(new uint8_t[cc_list.size()]); + for (size_t i = 0; i < cc_list.size(); ++i) { + data[i] = static_cast(cc_list[i]); + } + list.data = data; + return list; +} + PD_REGISTER_CAPI(kernel_context); diff --git a/paddle/phi/capi/lib/c_tensor.cc b/paddle/phi/capi/lib/c_tensor.cc index cd0bbd62d8..c81eefe22f 100644 --- a/paddle/phi/capi/lib/c_tensor.cc +++ b/paddle/phi/capi/lib/c_tensor.cc @@ -299,4 +299,19 @@ void PD_TensorShareLoDWith(PD_Tensor* dst, meta_dst.share_lod(meta_src); } +PD_Tensor* PD_OptionalTensorGetPointer(PD_Tensor* tensor) { + auto cc_tensor = + reinterpret_cast*>(tensor); + return reinterpret_cast(cc_tensor->get_ptr()); +} + +PD_List PD_TensorVectorToList(PD_Tensor* tensor) { + auto cc_tensor = + reinterpret_cast*>(tensor); + PD_List list; + list.size = cc_tensor->size(); + list.data = cc_tensor->data(); + return list; +} + PD_REGISTER_CAPI(tensor); diff --git a/python/paddle/fluid/tests/custom_runtime/CMakeLists.txt b/python/paddle/fluid/tests/custom_runtime/CMakeLists.txt index acd441c867..fa2ea2726c 100644 --- a/python/paddle/fluid/tests/custom_runtime/CMakeLists.txt +++ b/python/paddle/fluid/tests/custom_runtime/CMakeLists.txt @@ -1,3 +1,4 @@ if(WITH_CUSTOM_DEVICE) - py_test(test_custom_device_data_loader SRCS test_custom_device_data_loader.py) + py_test(test_custom_cpu_plugin SRCS test_custom_cpu_plugin.py) + set_tests_properties(test_custom_cpu_plugin PROPERTIES TIMEOUT 120) endif() diff --git a/python/paddle/fluid/tests/custom_runtime/custom_cpu_runtime.cc b/python/paddle/fluid/tests/custom_runtime/custom_cpu_runtime.cc deleted file mode 100644 index 18762625c0..0000000000 --- a/python/paddle/fluid/tests/custom_runtime/custom_cpu_runtime.cc +++ /dev/null @@ -1,215 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include - -#include "paddle/phi/backends/device_ext.h" - -#define MEMORY_FRACTION 0.5f - -C_Status Init() { return C_SUCCESS; } - -C_Status InitDevice(const C_Device device) { return C_SUCCESS; } - -C_Status SetDevice(const C_Device device) { return C_SUCCESS; } - -C_Status GetDevice(const C_Device device) { - device->id = 0; - return C_SUCCESS; -} - -C_Status DestroyDevice(const C_Device device) { return C_SUCCESS; } - -C_Status Finalize() { return C_SUCCESS; } - -C_Status GetDevicesCount(size_t *count) { - *count = 1; - return C_SUCCESS; -} - -C_Status GetDevicesList(size_t *devices) { - devices[0] = 0; - return C_SUCCESS; -} - -C_Status MemCpy(const C_Device device, - void *dst, - const void *src, - size_t size) { - memcpy(dst, src, size); - return C_SUCCESS; -} - -C_Status AsyncMemCpy(const C_Device device, - C_Stream stream, - void *dst, - const void *src, - size_t size) { - memcpy(dst, src, size); - return C_SUCCESS; -} - -C_Status MemCpyP2P(const C_Device dst_device, - const C_Device src_device, - void *dst, - const void *src, - size_t size) { - memcpy(dst, src, size); - return C_SUCCESS; -} - -C_Status AsyncMemCpyP2P(const C_Device dst_device, - const C_Device src_device, - C_Stream stream, - void *dst, - const void *src, - size_t size) { - memcpy(dst, src, size); - return C_SUCCESS; -} - -C_Status Allocate(const C_Device device, void **ptr, size_t size) { - auto data = malloc(size); - if (data) { - *ptr = data; - return C_SUCCESS; - } else { - *ptr = nullptr; - } - return C_FAILED; -} - -C_Status Deallocate(const C_Device device, void *ptr, size_t size) { - free(ptr); - return C_SUCCESS; -} - -C_Status CreateStream(const C_Device device, C_Stream *stream) { - stream = nullptr; - return C_SUCCESS; -} - -C_Status DestroyStream(const C_Device device, C_Stream stream) { - return C_SUCCESS; -} - -C_Status CreateEvent(const C_Device device, C_Event *event) { - return C_SUCCESS; -} - -C_Status RecordEvent(const C_Device device, C_Stream stream, C_Event event) { - return C_SUCCESS; -} - -C_Status DestroyEvent(const C_Device device, C_Event event) { - return C_SUCCESS; -} - -C_Status SyncDevice(const C_Device device) { return C_SUCCESS; } - -C_Status SyncStream(const C_Device device, C_Stream stream) { - return C_SUCCESS; -} - -C_Status SyncEvent(const C_Device device, C_Event event) { return C_SUCCESS; } - -C_Status StreamWaitEvent(const C_Device device, - C_Stream stream, - C_Event event) { - return C_SUCCESS; -} - -C_Status VisibleDevices(size_t *devices) { return C_SUCCESS; } - -C_Status DeviceMemStats(const C_Device device, - size_t *total_memory, - size_t *free_memory) { - float memusage; - FILE *fp; - char buffer[1024]; - size_t byte_read; - char *pos; - - fp = fopen("/proc/meminfo", "r"); - byte_read = fread(buffer, 1, sizeof(buffer), fp); - fclose(fp); - buffer[byte_read] = '\0'; - pos = strstr(buffer, "MemTotal:"); - sscanf(pos, "MemTotal: %lu kB", total_memory); - pos = strstr(pos, "MemFree:"); - sscanf(pos, "MemFree: %lu kB", free_memory); - *total_memory = *total_memory * 1024; - *free_memory = *free_memory * 1024; - *free_memory = *free_memory * MEMORY_FRACTION; - - return C_SUCCESS; -} - -C_Status DeviceMinChunkSize(const C_Device device, size_t *size) { - *size = 512; - return C_SUCCESS; -} - -void InitPlugin(CustomRuntimeParams *params) { - PADDLE_CUSTOM_RUNTIME_CHECK_VERSION(params); - params->device_type = "custom_cpu"; - params->sub_device_type = "v0.1"; - - memset(reinterpret_cast(params->interface), - 0, - sizeof(C_DeviceInterface)); - - params->interface->initialize = Init; - params->interface->finalize = Finalize; - - params->interface->init_device = InitDevice; - params->interface->set_device = SetDevice; - params->interface->get_device = GetDevice; - params->interface->deinit_device = DestroyDevice; - - params->interface->create_stream = CreateStream; - params->interface->destroy_stream = DestroyStream; - - params->interface->create_event = CreateEvent; - params->interface->destroy_event = DestroyEvent; - params->interface->record_event = RecordEvent; - - params->interface->synchronize_device = SyncDevice; - params->interface->synchronize_stream = SyncStream; - params->interface->synchronize_event = SyncEvent; - params->interface->stream_wait_event = StreamWaitEvent; - - params->interface->memory_copy_h2d = MemCpy; - params->interface->memory_copy_d2d = MemCpy; - params->interface->memory_copy_d2h = MemCpy; - params->interface->memory_copy_p2p = MemCpyP2P; - params->interface->async_memory_copy_h2d = AsyncMemCpy; - params->interface->async_memory_copy_d2d = AsyncMemCpy; - params->interface->async_memory_copy_d2h = AsyncMemCpy; - params->interface->async_memory_copy_p2p = AsyncMemCpyP2P; - params->interface->device_memory_allocate = Allocate; - params->interface->host_memory_allocate = Allocate; - params->interface->unified_memory_allocate = Allocate; - params->interface->device_memory_deallocate = Deallocate; - params->interface->host_memory_deallocate = Deallocate; - params->interface->unified_memory_deallocate = Deallocate; - - params->interface->get_device_count = GetDevicesCount; - params->interface->get_device_list = GetDevicesList; - params->interface->device_memory_stats = DeviceMemStats; - params->interface->device_min_chunk_size = DeviceMinChunkSize; -} diff --git a/python/paddle/fluid/tests/custom_runtime/custom_cpu_setup.py b/python/paddle/fluid/tests/custom_runtime/custom_cpu_setup.py deleted file mode 100644 index 82accb2ad0..0000000000 --- a/python/paddle/fluid/tests/custom_runtime/custom_cpu_setup.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import site -from paddle.fluid import core -from distutils.sysconfig import get_python_lib -from distutils.core import setup, Extension -from setuptools.command.build_ext import build_ext - - -# refer: https://note.qidong.name/2018/03/setup-warning-strict-prototypes -# Avoid a gcc warning below: -# cc1plus: warning: command line option ‘-Wstrict-prototypes’ is valid -# for C/ObjC but not for C++ -class BuildExt(build_ext): - - def build_extensions(self): - if '-Wstrict-prototypes' in self.compiler.compiler_so: - self.compiler.compiler_so.remove('-Wstrict-prototypes') - super(BuildExt, self).build_extensions() - - -# cc flags -paddle_extra_compile_args = [ - '-std=c++14', - '-shared', - '-fPIC', - '-Wno-parentheses', - '-DPADDLE_WITH_CUSTOM_KERNEL', - '-DPADDLE_WITH_CUSTOM_DEVICE', -] -if core.is_compiled_with_npu(): - paddle_extra_compile_args += ['-D_GLIBCXX_USE_CXX11_ABI=0'] - -# include path -site_packages_path = site.getsitepackages() -include_dirs = list( - map(lambda path: os.path.join(path, 'paddle', 'include'), - site_packages_path)) - -# include path third_party -compile_third_party_path = os.path.join(os.environ['PADDLE_ROOT'], - 'build/third_party') -include_dirs += [ - os.path.join(compile_third_party_path, 'boost/src/extern_boost'), # boost - os.path.join(compile_third_party_path, 'install/gflags/include'), # gflags - os.path.join(compile_third_party_path, 'install/glog/include'), # glog -] - -# libs path -library_dirs = list( - map(lambda path: os.path.join(path, 'paddle', 'fluid'), site_packages_path)) - -# libs -libs = [':core_avx.so'] -if not core.has_avx_core and core.has_noavx_core: - libs = [':core_noavx.so'] - -custom_cpu_plugin_so = Extension('custom_cpu_runtime', - sources=['custom_cpu_runtime.cc'], - include_dirs=include_dirs, - library_dirs=library_dirs, - libraries=libs, - extra_compile_args=paddle_extra_compile_args) - -setup(name='custom_kernel_dot', - version='1.0', - description='custom kernel fot compiling', - cmdclass={'build_ext': BuildExt}, - ext_modules=[custom_cpu_plugin_so]) diff --git a/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_plugin.py b/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_plugin.py new file mode 100644 index 0000000000..7da4f38a83 --- /dev/null +++ b/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_plugin.py @@ -0,0 +1,131 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import site +import unittest +import numpy as np + + +class TestCustomCPUPlugin(unittest.TestCase): + + def setUp(self): + # compile so and set to current path + cur_dir = os.path.dirname(os.path.abspath(__file__)) + cmd = 'rm -rf PaddleCustomDevice && git clone https://github.com/PaddlePaddle/PaddleCustomDevice.git && cd PaddleCustomDevice/backends/custom_cpu && mkdir build && cd build && cmake .. && make -j8' + os.system(cmd) + + # set environment for loading and registering compiled custom kernels + # only valid in current process + os.environ['CUSTOM_DEVICE_ROOT'] = os.path.join( + cur_dir, 'PaddleCustomDevice/backends/custom_cpu/build') + + def test_custom_device_dataloader(self): + import paddle + + with paddle.fluid.framework._test_eager_guard(): + self._test_custom_device_dataloader() + self._test_custom_device_dataloader() + + def _test_custom_device_dataloader(self): + import paddle + + paddle.set_device('custom_cpu') + dataset = paddle.vision.datasets.MNIST( + mode='test', + transform=paddle.vision.transforms.Compose([ + paddle.vision.transforms.CenterCrop(20), + paddle.vision.transforms.RandomResizedCrop(14), + paddle.vision.transforms.Normalize(), + paddle.vision.transforms.ToTensor() + ])) + loader = paddle.io.DataLoader(dataset, + batch_size=32, + num_workers=1, + shuffle=True) + for image, label in loader: + self.assertTrue(image.place.is_custom_place()) + self.assertTrue(label.place.is_custom_place()) + break + + def test_custom_device_mnist(self): + import paddle + + with paddle.fluid.framework._test_eager_guard(): + self._test_custom_device_mnist() + self._test_custom_device_mnist() + + def _test_custom_device_mnist(self): + import paddle + + class MNIST(paddle.nn.Layer): + + def __init__(self): + super(MNIST, self).__init__() + self.shape = 1 * 28 * 28 + self.size = 10 + self.output_weight = self.create_parameter( + [self.shape, self.size]) + self.accuracy = paddle.metric.Accuracy() + + def forward(self, inputs, label=None): + x = paddle.reshape(inputs, shape=[-1, self.shape]) + x = paddle.matmul(x, self.output_weight) + x = paddle.nn.functional.softmax(x) + if label is not None: + self.accuracy.reset() + correct = self.accuracy.compute(x, label) + self.accuracy.update(correct) + acc = self.accuracy.accumulate() + return x, acc + else: + return x + + paddle.set_device('custom_cpu') + dataset = paddle.vision.datasets.MNIST( + mode='train', + transform=paddle.vision.transforms.Compose( + [paddle.vision.transforms.ToTensor()])) + loader = paddle.io.DataLoader(dataset, + batch_size=64, + num_workers=1, + shuffle=True) + + mnist = MNIST() + sgd = paddle.optimizer.SGD(learning_rate=0.01, + parameters=mnist.parameters()) + + data = next(loader()) + img = data[0] + label = data[1] + label_int32 = paddle.cast(label, 'int32') + + pred, acc = mnist(img, label_int32) + avg_loss = paddle.nn.functional.cross_entropy(pred, label_int32) + avg_loss.backward() + sgd.step() + sgd.clear_grad() + + self.assertTrue(pred.place.is_custom_place()) + + def tearDown(self): + del os.environ['CUSTOM_DEVICE_ROOT'] + + +if __name__ == '__main__': + if os.name == 'nt' or sys.platform.startswith('darwin'): + # only support Linux now + exit() + unittest.main() diff --git a/python/paddle/fluid/tests/custom_runtime/test_custom_device_data_loader.py b/python/paddle/fluid/tests/custom_runtime/test_custom_device_data_loader.py deleted file mode 100644 index 775c3f487d..0000000000 --- a/python/paddle/fluid/tests/custom_runtime/test_custom_device_data_loader.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import site -import unittest -import numpy as np - - -class TestCustomDeviceDataLoader(unittest.TestCase): - - def setUp(self): - # compile so and set to current path - cur_dir = os.path.dirname(os.path.abspath(__file__)) - - # --inplace to place output so file to current dir - cmd = 'cd {} && {} custom_cpu_setup.py build_ext --inplace'.format( - cur_dir, sys.executable) - os.system(cmd) - - # set environment for loading and registering compiled custom kernels - # only valid in current process - os.environ['CUSTOM_DEVICE_ROOT'] = cur_dir - - def test_custom_device_dataloader(self): - import paddle - - paddle.set_device('custom_cpu') - dataset = paddle.vision.datasets.MNIST( - mode='test', - transform=paddle.vision.transforms.Compose([ - paddle.vision.transforms.CenterCrop(20), - paddle.vision.transforms.RandomResizedCrop(14), - paddle.vision.transforms.Normalize(), - paddle.vision.transforms.ToTensor() - ])) - loader = paddle.io.DataLoader(dataset, - batch_size=32, - num_workers=1, - shuffle=True) - for image, label in loader: - self.assertTrue(image.place.is_custom_place()) - self.assertTrue(label.place.is_custom_place()) - break - - def tearDown(self): - del os.environ['CUSTOM_DEVICE_ROOT'] - - -if __name__ == '__main__': - if os.name == 'nt' or sys.platform.startswith('darwin'): - # only support Linux now - exit() - unittest.main() -- GitLab