未验证 提交 c58c4ede 编写于 作者: Z zyfncg 提交者: GitHub

【PTen】C++ API Code-Generation (#37668)

* add code-gen file

* add api-gen in cmake

* adjust the code format

* temp save the code

* add genen-api module into pten

* merge the develop code

* fix merge conflict

* fix code conflict with develop

* support reduce_mean/sum

* fix the CI requirement

* fix requirement problem of CI

* remove original api code

* fix bug caused by removing original api
上级 f1c48f85
...@@ -4,6 +4,7 @@ paddle/fluid/API_DEV.spec ...@@ -4,6 +4,7 @@ paddle/fluid/API_DEV.spec
paddle/fluid/API_PR.spec paddle/fluid/API_PR.spec
paddle/fluid/op_use_default_grad_maker_DEV.spec paddle/fluid/op_use_default_grad_maker_DEV.spec
paddle/fluid/op_use_default_grad_maker_PR.spec paddle/fluid/op_use_default_grad_maker_PR.spec
paddle/pten/api/*/api*
*.DS_Store *.DS_Store
*.vs *.vs
......
add_subdirectory(lib) add_subdirectory(lib)
cc_library(pten_api SRCS all.cc DEPS linalg_api math_api creation_api manipulation_api utils_api) cc_library(pten_api SRCS all.cc DEPS pten_function_api utils_api)
...@@ -25,10 +25,7 @@ limitations under the License. */ ...@@ -25,10 +25,7 @@ limitations under the License. */
#endif #endif
// new pten apis // new pten apis
#include "paddle/pten/api/include/creation.h" #include "paddle/pten/api/include/api.h"
#include "paddle/pten/api/include/linalg.h"
#include "paddle/pten/api/include/manipulation.h"
#include "paddle/pten/api/include/math.h"
#include "paddle/pten/api/include/tensor.h" #include "paddle/pten/api/include/tensor.h"
#include "paddle/pten/api/include/utils.h" #include "paddle/pten/api/include/utils.h"
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/pten/api/include/tensor.h"
#include "paddle/pten/common/backend.h"
#include "paddle/pten/common/data_type.h"
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/common/scalar_array.h"
namespace paddle {
namespace experimental {
PD_DLL_DECL Tensor full(const ScalarArray& shape,
const Scalar& value,
DataType dtype = DataType::FLOAT32,
Backend backend = Backend::CPU,
DataLayout layout = DataLayout::NCHW);
PD_DLL_DECL Tensor full_like(const Tensor& x,
const Scalar& value,
DataType dtype = DataType::UNDEFINED,
Backend backend = Backend::UNDEFINED,
DataLayout layout = DataLayout::UNDEFINED);
PD_DLL_DECL Tensor ones_like(const Tensor& x,
DataType dtype = DataType::UNDEFINED,
Backend backend = Backend::UNDEFINED,
DataLayout layout = DataLayout::UNDEFINED);
PD_DLL_DECL Tensor zeros_like(const Tensor& x,
DataType dtype = DataType::UNDEFINED,
Backend backend = Backend::UNDEFINED,
DataLayout layout = DataLayout::UNDEFINED);
} // namespace experimental
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/pten/api/include/tensor.h"
namespace paddle {
namespace experimental {
PD_DLL_DECL Tensor dot(const Tensor& x, const Tensor& y);
PD_DLL_DECL Tensor matmul(const Tensor& x,
const Tensor& y,
bool transpose_x = false,
bool transpose_y = false);
} // namespace experimental
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/pten/api/include/tensor.h"
namespace paddle {
namespace experimental {
PD_DLL_DECL Tensor flatten(const Tensor& x, int start_axis, int stop_axis);
PD_DLL_DECL Tensor cast(const Tensor& x, DataType out_dtype);
PD_DLL_DECL Tensor reshape(const Tensor& x, const std::vector<int64_t>& shape);
} // namespace experimental
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/pten/api/include/tensor.h"
#include "paddle/pten/common/scalar.h"
namespace paddle {
namespace experimental {
PD_DLL_DECL Tensor add(const Tensor& x, const Tensor& y);
PD_DLL_DECL Tensor subtract(const Tensor& x, const Tensor& y);
PD_DLL_DECL Tensor divide(const Tensor& x, const Tensor& y);
PD_DLL_DECL Tensor multiply(const Tensor& x, const Tensor& y);
// TODO(chenweihang): move mean API into stat.h/cc
PD_DLL_DECL Tensor mean(const Tensor& x,
const std::vector<int64_t>& axis,
bool keep_dim);
PD_DLL_DECL Tensor sum(const Tensor& x,
const std::vector<int64_t>& axis,
DataType dtype,
bool keep_dim);
// TODO(chenweihang): Follow-up discussion on the handling of `act` argument
PD_DLL_DECL Tensor scale(const Tensor& x,
const Scalar& scale,
float bias,
bool bias_after_scale);
} // namespace experimental
} // namespace paddle
...@@ -14,8 +14,25 @@ cc_library(kernel_dispatch SRCS kernel_dispatch.cc DEPS pten_tensor device_conte ...@@ -14,8 +14,25 @@ cc_library(kernel_dispatch SRCS kernel_dispatch.cc DEPS pten_tensor device_conte
cc_library(op_meta_info SRCS op_meta_info.cc DEPS pten_tensor) cc_library(op_meta_info SRCS op_meta_info.cc DEPS pten_tensor)
cc_library(math_api SRCS math.cc DEPS pten_tensor pten kernel_dispatch) set(api_gen_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/api_gen.py)
cc_library(linalg_api SRCS linalg.cc DEPS pten_tensor pten kernel_dispatch) set(api_yaml_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/api.yaml)
cc_library(creation_api SRCS creation.cc DEPS pten_tensor pten kernel_dispatch)
cc_library(manipulation_api SRCS manipulation.cc DEPS pten_tensor pten kernel_dispatch) set(api_header_file ${CMAKE_SOURCE_DIR}/paddle/pten/api/include/api.h)
set(api_source_file ${CMAKE_SOURCE_DIR}/paddle/pten/api/lib/api.cc)
set(api_header_file_tmp ${api_header_file}.tmp)
set(api_source_file_tmp ${api_source_file}.tmp)
add_custom_command(
OUTPUT ${api_header_file} ${api_source_file}
COMMAND python ${api_gen_file}
--api_yaml_path ${api_yaml_file}
--api_header_path ${api_header_file_tmp}
--api_source_path ${api_source_file_tmp}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_header_file_tmp} ${api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_source_file_tmp} ${api_source_file}
COMMENT "copy_if_different ${api_header_file} ${api_source_file}"
DEPENDS ${api_yaml_file}
VERBATIM)
cc_library(utils_api SRCS utils.cc DEPS pten_tensor pten kernel_dispatch) cc_library(utils_api SRCS utils.cc DEPS pten_tensor pten kernel_dispatch)
cc_library(pten_function_api SRCS ${api_source_file} DEPS pten_tensor pten kernel_dispatch)
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/api/include/creation.h"
#include <memory>
#include "glog/logging.h"
#include "paddle/pten/api/lib/api_registry.h"
#include "paddle/pten/api/lib/kernel_dispatch.h"
#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/include/core.h"
#include "paddle/pten/include/infermeta.h"
PT_DECLARE_MODULE(CreationCPU);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_MODULE(CreationCUDA);
#endif
namespace paddle {
namespace experimental {
PD_DLL_DECL Tensor full(const ScalarArray& shape,
const Scalar& value,
DataType dtype,
Backend backend,
DataLayout layout) {
// 1. Get kernel signature and kernel
pten::KernelKey kernel_key{backend, layout, dtype};
auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
"fill_constant", kernel_key);
// 2. Get Device Context
auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
auto kernel_context = pten::KernelContext(dev_ctx);
// 3. Auto data transform
kernel_context.EmplaceBackAttr(pten::ScalarArray(shape));
kernel_context.EmplaceBackAttr(pten::Scalar(value));
// 4. InferMeta
auto out_meta = pten::FullInferMeta(shape, dtype, layout);
// 5. Prepare outputs
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
pten::TransToFluidPlace(kernel_key.backend()));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
kernel_context.EmplaceBackOutput(dense_out);
Tensor out;
out.set_impl(dense_out);
// 6. Call kernel
kernel(&kernel_context);
return out;
}
PD_DLL_DECL Tensor full_like(const Tensor& x,
const Scalar& value,
DataType dtype,
Backend backend,
DataLayout layout) {
// 1. Get kernel signature and kernel
auto kernel_key_set = ParseKernelKeyByInputArgs(x);
auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
DataType kernel_data_type =
dtype == DataType::UNDEFINED ? kernel_key.dtype() : dtype;
Backend kernel_backend =
backend == Backend::UNDEFINED ? kernel_key.backend() : backend;
DataLayout kernel_layout =
layout == DataLayout::UNDEFINED ? kernel_key.layout() : layout;
auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
"fill_any_like", {kernel_backend, kernel_layout, kernel_data_type});
// 2. Get Device Context
auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
auto kernel_context = pten::KernelContext(dev_ctx);
// 3. Auto data transform
auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
kernel_context.EmplaceBackAttr(pten::Scalar(value));
// 4. InferMeta
auto out_meta = FullLikeInferMeta(dense_x->meta(), dtype, layout);
// 5. Prepare outputs
Tensor out;
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
pten::TransToFluidPlace(kernel_backend));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
kernel_context.EmplaceBackOutput(dense_out);
out.set_impl(dense_out);
// 6. Call kernel
kernel(&kernel_context);
return out;
}
PD_DLL_DECL Tensor ones_like(const Tensor& x,
DataType dtype,
Backend backend,
DataLayout layout) {
return full_like(x, 1, dtype, backend, layout);
}
PD_DLL_DECL Tensor zeros_like(const Tensor& x,
DataType dtype,
Backend backend,
DataLayout layout) {
return full_like(x, 0, dtype, backend, layout);
}
} // namespace experimental
} // namespace paddle
PT_REGISTER_API(Creation);
...@@ -57,5 +57,46 @@ paddle::platform::DeviceContext* GetDeviceContextByBackend( ...@@ -57,5 +57,46 @@ paddle::platform::DeviceContext* GetDeviceContextByBackend(
return pool.Get(pten::TransToFluidPlace(backend)); return pool.Get(pten::TransToFluidPlace(backend));
} }
DataType ParseDataType(DataType dtype) { return dtype; }
DataType ParseDataType(const Tensor& tensor) { return tensor.type(); }
DataType ParseDataType(const std::vector<Tensor>& tensors) {
if (tensors.empty()) {
return DataType::UNDEFINED;
}
DataType dtype = tensors[0].type();
auto n = tensors.size();
for (size_t i = 1; i < n; ++i) {
if (tensors[i].type() != dtype) {
PADDLE_THROW(platform::errors::InvalidArgument(
"The data_type of input tensor in list isn't consistent, "
"the first tensor is %s, but %dth tensor is %s.",
dtype,
i,
tensors[i].type()));
}
}
return dtype;
}
DataType ParseDataTypeWithInputOrder(DataType dtype, const Tensor& tensor) {
return dtype != DataType::UNDEFINED ? dtype : ParseDataType(tensor);
}
Backend ParseBackend(Backend backend) { return backend; }
Backend ParseBackend(const Tensor& tensor) {
return pten::TransToPtenBackend(tensor.inner_place());
}
Backend ParseBackendWithInputOrder(Backend backend, const Tensor& tensor) {
return backend != Backend::UNDEFINED ? backend : ParseBackend(tensor);
}
DataLayout ParseLayout(DataLayout layout) { return layout; }
DataLayout ParseLayout(const Tensor& tensor) { return tensor.layout(); }
DataLayout ParseLayoutWithInputOrder(DataLayout layout, const Tensor& tensor) {
return layout != DataLayout::UNDEFINED ? layout : ParseLayout(tensor);
}
} // namespace experimental } // namespace experimental
} // namespace paddle } // namespace paddle
...@@ -129,5 +129,25 @@ KernelKeySet ParseKernelKeyByInputArgs(const Args&... args) { ...@@ -129,5 +129,25 @@ KernelKeySet ParseKernelKeyByInputArgs(const Args&... args) {
return detail::KernelKeyParser().apply(args...).key_set; return detail::KernelKeyParser().apply(args...).key_set;
} }
DataType ParseDataType(DataType dtype);
DataType ParseDataType(const Tensor& tensor);
DataType ParseDataType(const std::vector<Tensor>& tensors);
DataType ParseDataTypeWithInputOrder(DataType dtype, const Tensor& tensor);
Backend ParseBackend(Backend backend);
Backend ParseBackend(const Tensor& tensor);
template <typename T, typename... Args>
Backend ParseBackend(T t, Args... args) {
auto backend_set =
BackendSet(ParseBackend(t)) | BackendSet(ParseBackend(args...));
return static_cast<Backend>(64 -
detail::CountLeadingZeros(backend_set.bitset()));
}
Backend ParseBackendWithInputOrder(Backend backend, const Tensor& tensor);
DataLayout ParseLayout(DataLayout layout);
DataLayout ParseLayout(const Tensor& tensor);
DataLayout ParseLayoutWithInputOrder(DataLayout layout, const Tensor& tensor);
} // namespace experimental } // namespace experimental
} // namespace paddle } // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/api/include/linalg.h"
#include <memory>
#include "glog/logging.h"
#include "paddle/pten/api/lib/api_registry.h"
#include "paddle/pten/api/lib/kernel_dispatch.h"
#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/convert_utils.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/include/core.h"
#include "paddle/pten/include/infermeta.h"
PT_DECLARE_MODULE(LinalgCPU);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_MODULE(LinalgCUDA);
#endif
namespace paddle {
namespace experimental {
PD_DLL_DECL Tensor dot(const Tensor& x, const Tensor& y) {
// 1. Get kernel signature and kernel
auto kernel_key_set = ParseKernelKeyByInputArgs(x);
auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
"dot", kernel_key);
// 2. Get Device Context
auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
auto kernel_context = pten::KernelContext(dev_ctx);
// 3. Auto data transform
auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
kernel_context.EmplaceBackInput(dense_x);
auto dense_y = std::dynamic_pointer_cast<pten::DenseTensor>(y.impl());
kernel_context.EmplaceBackInput(dense_y);
// TODO(chenweihang): add transform impl
// 4. InferMeta
auto out_meta = DotInferMeta(dense_x->meta(), dense_y->meta());
// 5. Prepare outputs
Tensor out;
const auto allocator = std::make_shared<DefaultAllocator>(
pten::TransToFluidPlace(kernel_key.backend()));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
kernel_context.EmplaceBackOutput(dense_out);
out.set_impl(dense_out);
// 6. Call kernel
kernel(&kernel_context);
return out;
}
PD_DLL_DECL Tensor matmul(const Tensor& x,
const Tensor& y,
bool transpose_x,
bool transpose_y) {
// 1. Get kernel signature and kernel
auto kernel_key_set = ParseKernelKeyByInputArgs(x, y);
auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
"matmul_v2", kernel_key);
// 2. Get Device Context
auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
auto kernel_context = pten::KernelContext(dev_ctx);
// 3. Auto data transform
auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
auto dense_y = std::dynamic_pointer_cast<pten::DenseTensor>(y.impl());
kernel_context.EmplaceBackInput(dense_x);
kernel_context.EmplaceBackInput(dense_y);
kernel_context.EmplaceBackAttr(transpose_x);
kernel_context.EmplaceBackAttr(transpose_y);
// TODO(chenweihang): add transform impl
// 4. InferMeta
auto out_meta = MatmulInferMeta(
dense_x->meta(), dense_y->meta(), transpose_x, transpose_y);
// 5. Prepare outputs
const auto allocator = std::make_shared<DefaultAllocator>(
pten::TransToFluidPlace(kernel_key.backend()));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
kernel_context.EmplaceBackOutput(dense_out);
Tensor out;
out.set_impl(dense_out);
// 6. Call kernel
kernel(&kernel_context);
return out;
}
} // namespace experimental
} // namespace paddle
PT_REGISTER_API(Linalg);
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/api/include/manipulation.h"
#include <memory>
#include "glog/logging.h"
#include "paddle/pten/api/lib/api_registry.h"
#include "paddle/pten/api/lib/kernel_dispatch.h"
#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/include/core.h"
#include "paddle/pten/infermeta/unary.h"
PT_DECLARE_MODULE(ManipulationCPU);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_MODULE(ManipulationCUDA);
#endif
namespace paddle {
namespace experimental {
PD_DLL_DECL Tensor flatten(const Tensor& x, int start_axis, int stop_axis) {
// 1. Get kernel signature and kernel
auto kernel_key_set = ParseKernelKeyByInputArgs(x);
auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
"flatten_contiguous_range", kernel_key);
// 2. Get Device Context
auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
auto kernel_context = pten::KernelContext(dev_ctx);
// 3. Auto data transform
auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
kernel_context.EmplaceBackInput(dense_x);
kernel_context.EmplaceBackAttr(start_axis);
kernel_context.EmplaceBackAttr(stop_axis);
// 4. InferMeta
auto out_meta = FlattenInferMeta(dense_x->meta(), start_axis, stop_axis);
// 5. Prepare outputs
Tensor out;
const auto allocator = std::make_shared<DefaultAllocator>(
pten::TransToFluidPlace(kernel_key.backend()));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
kernel_context.EmplaceBackOutput(dense_out);
out.set_impl(dense_out);
// 6. Call kernel
kernel(&kernel_context);
return out;
}
PD_DLL_DECL Tensor cast(const Tensor& x, DataType out_dtype) {
// 1. Get kernel signature and kernel
auto kernel_key_set = ParseKernelKeyByInputArgs(x);
auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
"cast", kernel_key);
// 2. Get Device Context
auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
auto kernel_context = pten::KernelContext(dev_ctx);
// 3. Auto data transform
auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
kernel_context.EmplaceBackInput(dense_x);
kernel_context.EmplaceBackAttr(out_dtype);
kernel_context.EmplaceBackAttr(dense_x->meta().dtype);
// 4. InferMeta
auto out_meta = CastInferMeta(dense_x->meta(), out_dtype);
// 5. Prepare outputs
Tensor out;
const auto allocator = std::make_shared<DefaultAllocator>(
pten::TransToFluidPlace(kernel_key.backend()));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
kernel_context.EmplaceBackOutput(dense_out);
out.set_impl(dense_out);
// 6. Call kernel
kernel(&kernel_context);
return out;
}
PD_DLL_DECL Tensor reshape(const Tensor& x, const std::vector<int64_t>& shape) {
// 1. Get kernel signature and kernel
auto kernel_key_set = ParseKernelKeyByInputArgs(x);
auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
"reshape2", kernel_key);
// 2. Get Device Context
auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
auto kernel_context = pten::KernelContext(dev_ctx);
// 3. Auto data transform
auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
kernel_context.EmplaceBackInput(dense_x);
kernel_context.EmplaceBackAttr(shape);
// 4. InferMeta
auto out_meta = InferMetaFromVecValue(dense_x->meta(), shape);
// 5. Prepare outputs
Tensor out;
const auto allocator = std::make_shared<DefaultAllocator>(
pten::TransToFluidPlace(kernel_key.backend()));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
kernel_context.EmplaceBackOutput(dense_out);
out.set_impl(dense_out);
// 6. Call kernel
kernel(&kernel_context);
return out;
}
} // namespace experimental
} // namespace paddle
PT_REGISTER_API(Manipulation);
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/api/include/math.h"
#include <memory>
#include "glog/logging.h"
#include "paddle/pten/api/lib/api_registry.h"
#include "paddle/pten/api/lib/kernel_dispatch.h"
#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/include/core.h"
#include "paddle/pten/include/infermeta.h"
#include "paddle/pten/infermeta/unary.h"
PT_DECLARE_MODULE(MathCPU);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_MODULE(MathCUDA);
#endif
namespace paddle {
namespace experimental {
PD_DLL_DECL Tensor mean(const Tensor& x,
const std::vector<int64_t>& axis,
bool keep_dim) {
// 1. Get kernel signature and kernel
auto kernel_key_set = ParseKernelKeyByInputArgs(x);
auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
"reduce_mean", kernel_key);
// 2. Get Device Context
auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
auto kernel_context = pten::KernelContext(dev_ctx);
// 3. Auto data transform
auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
kernel_context.EmplaceBackInput(dense_x);
// The real value of reduce_all will be get in kernel
// so use default value(false) is OK.
bool reduce_all = false;
DataType out_dtype = DataType::UNDEFINED;
kernel_context.EmplaceBackAttr(axis);
kernel_context.EmplaceBackAttr(keep_dim);
kernel_context.EmplaceBackAttr(reduce_all);
kernel_context.EmplaceBackAttr(dense_x->dtype());
kernel_context.EmplaceBackAttr(out_dtype);
// 4. InferShape
auto out_meta = ReduceInferMeta(dense_x->meta(), axis, keep_dim);
// 5. Prepare outputs
Tensor out;
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
pten::TransToFluidPlace(kernel_key.backend()));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
kernel_context.EmplaceBackOutput(dense_out);
out.set_impl(dense_out);
// 6. Call kernel
kernel(&kernel_context);
return out;
}
PD_DLL_DECL Tensor sum(const Tensor& x,
const std::vector<int64_t>& axis,
DataType dtype,
bool keep_dim) {
// 1. Get kernel signature and kernel
auto kernel_key_set = ParseKernelKeyByInputArgs(x);
auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
"reduce_sum", kernel_key);
// 2. Get Device Context
auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
auto kernel_context = pten::KernelContext(dev_ctx);
// 3. Auto data transform
auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
kernel_context.EmplaceBackInput(dense_x);
// The real value of reduce_all will be get in kernel
// so use default value(false) is OK.
bool reduce_all = false;
DataType out_dtype = DataType::UNDEFINED;
if (dense_x->dtype() == DataType::BOOL ||
dense_x->dtype() == DataType::INT32 ||
dense_x->dtype() == DataType::INT64) {
out_dtype = DataType::INT64;
}
kernel_context.EmplaceBackAttr(axis);
kernel_context.EmplaceBackAttr(keep_dim);
kernel_context.EmplaceBackAttr(reduce_all);
kernel_context.EmplaceBackAttr(dense_x->dtype());
kernel_context.EmplaceBackAttr(out_dtype);
// 4. InferMeta
auto out_meta = ReduceInferMeta(dense_x->meta(), axis, keep_dim);
// 5. Prepare outputs
Tensor out;
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
pten::TransToFluidPlace(kernel_key.backend()));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
kernel_context.EmplaceBackOutput(dense_out);
out.set_impl(dense_out);
// 6. Call kernel
kernel(&kernel_context);
return out;
}
PD_DLL_DECL Tensor add(const Tensor& x, const Tensor& y) {
// 1. Get kernel signature and kernel
auto kernel_key_set = ParseKernelKeyByInputArgs(x, y);
auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
"elementwise_add", kernel_key);
// 2. Get Device Context
auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
auto kernel_context = pten::KernelContext(dev_ctx);
// 3. Auto data transform
auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
kernel_context.EmplaceBackInput(dense_x);
auto dense_y = std::dynamic_pointer_cast<pten::DenseTensor>(y.impl());
kernel_context.EmplaceBackInput(dense_y);
kernel_context.EmplaceBackAttr(-1);
// 4. InferMeta
auto out_meta = ElementwiseInferMeta(dense_x->meta(), dense_y->meta(), -1);
// 5. Prepare outputs
Tensor out;
const auto allocator = std::make_shared<DefaultAllocator>(
pten::TransToFluidPlace(kernel_key.backend()));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
kernel_context.EmplaceBackOutput(dense_out);
out.set_impl(dense_out);
// 6. Call kernel
kernel(&kernel_context);
return out;
}
PD_DLL_DECL Tensor subtract(const Tensor& x, const Tensor& y) {
// 1. Get kernel signature and kernel
auto kernel_key_set = ParseKernelKeyByInputArgs(x, y);
auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
"elementwise_sub", kernel_key);
// 2. Get Device Context
auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
auto kernel_context = pten::KernelContext(dev_ctx);
// 3. Auto data transform
auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
kernel_context.EmplaceBackInput(dense_x);
auto dense_y = std::dynamic_pointer_cast<pten::DenseTensor>(y.impl());
kernel_context.EmplaceBackInput(dense_y);
kernel_context.EmplaceBackAttr(-1);
// 4. InferMeta
auto out_meta = ElementwiseInferMeta(dense_x->meta(), dense_y->meta(), -1);
// 5. Prepare outputs
Tensor out;
const auto allocator = std::make_shared<DefaultAllocator>(
pten::TransToFluidPlace(kernel_key.backend()));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
kernel_context.EmplaceBackOutput(dense_out);
out.set_impl(dense_out);
// 6. Call kernel
kernel(&kernel_context);
return out;
}
PD_DLL_DECL Tensor divide(const Tensor& x, const Tensor& y) {
// 1. Get kernel signature and kernel
auto kernel_key_set = ParseKernelKeyByInputArgs(x, y);
auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
"elementwise_div", kernel_key);
// 2. Get Device Context
auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
auto kernel_context = pten::KernelContext(dev_ctx);
// 3. Auto data transform
auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
kernel_context.EmplaceBackInput(dense_x);
auto dense_y = std::dynamic_pointer_cast<pten::DenseTensor>(y.impl());
kernel_context.EmplaceBackInput(dense_y);
kernel_context.EmplaceBackAttr(-1);
// 4. InferMeta
auto out_meta = ElementwiseInferMeta(dense_x->meta(), dense_y->meta(), -1);
// 5. Prepare outputs
Tensor out;
const auto allocator = std::make_shared<DefaultAllocator>(
pten::TransToFluidPlace(kernel_key.backend()));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
kernel_context.EmplaceBackOutput(dense_out);
out.set_impl(dense_out);
// 6. Call kernel
kernel(&kernel_context);
return out;
}
PD_DLL_DECL Tensor multiply(const Tensor& x, const Tensor& y) {
// 1. Get kernel signature and kernel
auto kernel_key_set = ParseKernelKeyByInputArgs(x, y);
auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
"elementwise_mul", kernel_key);
// 2. Get Device Context
auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
auto kernel_context = pten::KernelContext(dev_ctx);
// 3. Auto data transform
auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
kernel_context.EmplaceBackInput(dense_x);
auto dense_y = std::dynamic_pointer_cast<pten::DenseTensor>(y.impl());
kernel_context.EmplaceBackInput(dense_y);
kernel_context.EmplaceBackAttr(-1);
// 4. InferMeta
auto out_meta = ElementwiseInferMeta(dense_x->meta(), dense_y->meta(), -1);
// 5. Prepare outputs
Tensor out;
const auto allocator = std::make_shared<DefaultAllocator>(
pten::TransToFluidPlace(kernel_key.backend()));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
kernel_context.EmplaceBackOutput(dense_out);
out.set_impl(dense_out);
// 6. Call kernel
kernel(&kernel_context);
return out;
}
PD_DLL_DECL Tensor scale(const Tensor& x,
const Scalar& scale,
float bias,
bool bias_after_scale) {
// 1. Get kernel signature and kernel
auto kernel_key_set = ParseKernelKeyByInputArgs(x);
auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
"scale", kernel_key);
// 2. Get Device Context
auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
auto kernel_context = pten::KernelContext(dev_ctx);
// 3. Auto data transform
auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
kernel_context.EmplaceBackInput(dense_x);
kernel_context.EmplaceBackAttr(pten::Scalar(scale));
kernel_context.EmplaceBackAttr(bias);
kernel_context.EmplaceBackAttr(bias_after_scale);
// 4. InferMeta
auto out_meta = UnchangedInferMeta(dense_x->meta());
// 5. Prepare outputs
Tensor out;
const auto allocator = std::make_shared<DefaultAllocator>(
pten::TransToFluidPlace(kernel_key.backend()));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
kernel_context.EmplaceBackOutput(dense_out);
out.set_impl(dense_out);
// 6. Call kernel
kernel(&kernel_context);
return out;
}
} // namespace experimental
} // namespace paddle
PT_REGISTER_API(Math);
...@@ -19,7 +19,6 @@ limitations under the License. */ ...@@ -19,7 +19,6 @@ limitations under the License. */
#include <vector> #include <vector>
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/pten/api/include/manipulation.h"
#include "paddle/pten/api/include/utils.h" #include "paddle/pten/api/include/utils.h"
#include "paddle/pten/api/lib/ext_compat_utils.h" #include "paddle/pten/api/lib/ext_compat_utils.h"
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/allocator.h"
...@@ -67,6 +66,9 @@ inline bool IsDenseTensor( ...@@ -67,6 +66,9 @@ inline bool IsDenseTensor(
} // namespace detail } // namespace detail
// declare cast api
Tensor cast(const Tensor &x, DataType out_dtype);
/////// Tensor Methods //////// /////// Tensor Methods ////////
/* Part 1: Construction and destruction methods */ /* Part 1: Construction and destruction methods */
......
if(WITH_ROCM) if(WITH_ROCM)
hip_test(test_pten_tensor SRCS test_pten_tensor.cc DEPS pten_tensor utils_api manipulation_api glog) hip_test(test_pten_tensor SRCS test_pten_tensor.cc DEPS pten_tensor pten_function_api utils_api glog)
else() else()
cc_test(test_pten_tensor SRCS test_pten_tensor.cc DEPS pten_tensor utils_api manipulation_api glog) cc_test(test_pten_tensor SRCS test_pten_tensor.cc DEPS pten_tensor pten_function_api utils_api glog)
endif() endif()
cc_test(test_pten_exception SRCS test_pten_exception.cc DEPS gtest) cc_test(test_pten_exception SRCS test_pten_exception.cc DEPS gtest)
......
...@@ -15,8 +15,7 @@ limitations under the License. */ ...@@ -15,8 +15,7 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <memory> #include <memory>
#include "paddle/pten/api/include/creation.h" #include "paddle/pten/api/include/api.h"
#include "paddle/pten/api/include/manipulation.h"
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <memory> #include <memory>
#include "paddle/pten/api/include/linalg.h" #include "paddle/pten/api/include/api.h"
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <memory> #include <memory>
#include "paddle/pten/api/include/math.h" #include "paddle/pten/api/include/api.h"
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <memory> #include <memory>
#include "paddle/pten/api/include/creation.h" #include "paddle/pten/api/include/api.h"
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <memory> #include <memory>
#include "paddle/pten/api/include/manipulation.h" #include "paddle/pten/api/include/api.h"
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <memory> #include <memory>
#include "paddle/pten/api/include/linalg.h" #include "paddle/pten/api/include/api.h"
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <memory> #include <memory>
#include "paddle/pten/api/include/math.h" #include "paddle/pten/api/include/api.h"
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <memory> #include <memory>
#include "paddle/pten/api/include/manipulation.h" #include "paddle/pten/api/include/api.h"
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
......
...@@ -15,8 +15,7 @@ limitations under the License. */ ...@@ -15,8 +15,7 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <memory> #include <memory>
#include "paddle/pten/api/include/creation.h" #include "paddle/pten/api/include/api.h"
#include "paddle/pten/api/include/math.h"
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <memory> #include <memory>
#include "paddle/pten/api/include/creation.h" #include "paddle/pten/api/include/api.h"
#include "paddle/pten/api/include/tensor.h" #include "paddle/pten/api/include/tensor.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <memory> #include <memory>
#include "paddle/pten/api/include/math.h" #include "paddle/pten/api/include/api.h"
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
...@@ -50,7 +50,7 @@ TEST(API, sum) { ...@@ -50,7 +50,7 @@ TEST(API, sum) {
std::vector<int64_t> axis = {0, 1}; std::vector<int64_t> axis = {0, 1};
// 2. test API // 2. test API
auto out = paddle::experimental::sum(x, axis, DataType::UNDEFINED, false); auto out = paddle::experimental::sum(x, axis, false);
// 3. check result // 3. check result
ASSERT_EQ(out.dims().size(), 1); ASSERT_EQ(out.dims().size(), 1);
ASSERT_EQ(out.dims()[0], 1); ASSERT_EQ(out.dims()[0], 1);
......
...@@ -51,6 +51,7 @@ if [ "$pip_index" ]; then ...@@ -51,6 +51,7 @@ if [ "$pip_index" ]; then
fi fi
if [ "$WITH_REQUIREMENT" ]; then if [ "$WITH_REQUIREMENT" ]; then
echo "pyyaml" >> $WITH_REQUIREMENT
echo ">>> install python requirement: $WITH_REQUIREMENT"; echo ">>> install python requirement: $WITH_REQUIREMENT";
pip install $PIP_ARGS -r "$WITH_REQUIREMENT"; pip install $PIP_ARGS -r "$WITH_REQUIREMENT";
fi fi
......
- api : add
args : (const Tensor& x, const Tensor& y)
output : Tensor
infer_meta :
func : ElementwiseInferMeta
param : [x, y, -1]
kernel :
func : elementwise_add
param : [x, y, -1]
- api : cast
args : (const Tensor& x, DataType out_dtype)
output : Tensor
infer_meta :
func : CastInferMeta
kernel :
func : cast
param : [x, out_dtype, x.dtype()]
data_type : x
- api : divide
args : (const Tensor& x, const Tensor& y)
output : Tensor
infer_meta :
func : ElementwiseInferMeta
param : [x, y, -1]
kernel :
func : elementwise_div
param : [x, y, -1]
- api : dot
args : (const Tensor& x, const Tensor& y)
output : Tensor
infer_meta :
func : DotInferMeta
kernel :
func : dot
- api : flatten
args : (const Tensor& x, int start_axis, int stop_axis)
output : Tensor
infer_meta :
func : FlattenInferMeta
kernel :
func : flatten_contiguous_range
- api : full
args : (const ScalarArray& shape, const Scalar& value, DataType dtype=DataType::FLOAT32, Backend place=Backend::CPU, DataLayout layout=DataLayout::NCHW)
output: Tensor
infer_meta :
func : FullInferMeta
param : [shape, dtype, layout]
kernel :
func : fill_constant
param : [shape, value]
data_type : dtype
backend : place
layout : layout
- api : full_like
args : (const Tensor& x, const Scalar& value, DataType dtype = DataType::UNDEFINED, Backend place = Backend::UNDEFINED, DataLayout layout = DataLayout::UNDEFINED)
output: Tensor
infer_meta :
func : FullLikeInferMeta
param : [x, dtype, layout]
kernel :
func : fill_any_like
param : [x, value]
data_type : dtype > x
backend : place > x
layout : layout > x
- api : matmul
args : (const Tensor& x, const Tensor& y, bool transpose_x = false, bool transpose_y = false)
output : Tensor
infer_meta :
func : MatmulInferMeta
kernel :
func : matmul_v2
- api : mean
args : (const Tensor& x, const std::vector<int64_t>& axis, bool keep_dim)
output : Tensor
infer_meta :
func : ReduceInferMeta
kernel :
func : reduce_mean
param : [x, axis, keep_dim, false, x.dtype(), DataType::UNDEFINED]
- api : multiply
args : (const Tensor& x, const Tensor& y)
output : Tensor
infer_meta :
func : ElementwiseInferMeta
param : [x, y, -1]
kernel :
func : elementwise_mul
param : [x, y, -1]
- api : ones_like
args : (const Tensor& x, DataType dtype=DataType::UNDEFINED, Backend place=Backend::UNDEFINED, DataLayout layout=DataLayout::UNDEFINED)
output : Tensor
invoke : full_like(x, 1, dtype, place, layout)
- api : reshape
args : (const Tensor& x, const std::vector<int64_t>& shape)
output : Tensor
infer_meta :
func : InferMetaFromVecValue
kernel :
func : reshape2
- api : scale
args : (const Tensor& x, const Scalar& scale, float bias, bool bias_after_scale)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : scale
- api : subtract
args : (const Tensor& x, const Tensor& y)
output : Tensor
infer_meta :
func : ElementwiseInferMeta
param : [x, y, -1]
kernel :
func : elementwise_sub
param : [x, y, -1]
- api : sum
args : (const Tensor& x, const std::vector<int64_t>& axis, bool keep_dim)
output : Tensor
infer_meta :
func : ReduceInferMeta
kernel :
func : reduce_sum
param : [x, axis, keep_dim, false, x.dtype(), DataType::UNDEFINED]
- api : zeros_like
args : (const Tensor& x, DataType dtype=DataType::UNDEFINED, Backend place=Backend::UNDEFINED, DataLayout layout=DataLayout::UNDEFINED)
output : Tensor
invoke : full_like(x, 0, dtype, place, layout)
# - api : full_like
# args : (const Tensor& x, const Scalar& value, DataType dtype, Backend place)->Tensor
# output: {Tensor : dtype}
# kernel : fill_any_like
# T : [dtype, x]
# backend : [place, x]
# layout : []
# InferMeta : UnchangedInferMeta(x)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import yaml
import argparse
class API:
prefix_tensor_name = 'dense_'
def __init__(self, api_item_yaml):
self.api = api_item_yaml['api']
# args:
# inputs:
# names : [], list of input names
# attrs:
# names : [], list of attribute names
# attr_info : { attr_name : (type, default_values)}
self.args = self.parse_args(api_item_yaml['args'])
self.output = api_item_yaml['output']
self.is_base_api = True
if 'invoke' in api_item_yaml:
self.is_base_api = False
self.invoke = api_item_yaml['invoke']
else:
self.kernel = api_item_yaml['kernel']
if 'backend' not in self.kernel or len(self.kernel['backend']) == 0:
self.kernel['backend'] = None
if 'layout' not in self.kernel or len(self.kernel['layout']) == 0:
self.kernel['layout'] = None
if 'data_type' not in self.kernel or len(self.kernel[
'data_type']) == 0:
self.kernel['data_type'] = None
if 'param' not in self.kernel or len(self.kernel['param']) == 0:
self.kernel['param'] = None
self.infer_meta = api_item_yaml['infer_meta']
if 'param' not in self.infer_meta or len(self.infer_meta[
'param']) == 0:
self.infer_meta['param'] = None
def parse_args(self, args_str):
inputs = {'names': []}
attrs = {'names': [], 'attr_info': {}}
args_str = args_str.strip()
assert args_str.startswith('(') and args_str.endswith(')'), \
f"Args declaration should start with '(' and end with ')', please check the args of {self.api} in api.yaml."
args_str = args_str[1:-1]
args_list = args_str.split(',')
input_types = ['const Tensor&', 'const Tensor &']
attr_types = ['const Scalar&', 'const Scalar &', 'const ScalarArray&', 'const ScalarArray &', \
'int', 'int32_t', 'int64_t', 'size_t', 'float', 'double', 'bool', \
'const std::vector<int64_t>&', 'Backend', 'DataLayout', 'DataType']
args_declare_str = ""
args_define_str = ""
for item in args_list:
item = item.strip()
# match the input tensor
has_input = False
for in_type in input_types:
if item.startswith(in_type):
input_name = item[len(in_type):].strip()
assert len(input_name) > 0, \
f"The input tensor name should not be empty. Please check the args of {self.api} in api.yaml."
inputs['names'].append(input_name)
args_declare_str = args_declare_str + in_type + ' ' + input_name + ', '
args_define_str = args_define_str + in_type + ' ' + input_name + ', '
has_input = True
break
if has_input:
continue
# match the attribute
for attr_type in attr_types:
if item.startswith(attr_type):
attr_name = item[len(attr_type):].strip()
assert len(attr_name) > 0, \
f"The attribute name should not be empty. Please check the args of {self.api} in api.yaml."
default_value = None
if '=' in attr_name:
attr_infos = attr_name.split('=')
attr_name = attr_infos[0].strip()
default_value = attr_infos[1].strip()
default_value_str = "" if default_value is None else '=' + default_value
args_declare_str = args_declare_str + attr_type + ' ' + attr_name + default_value_str + ', '
args_define_str = args_define_str + attr_type + ' ' + attr_name + ', '
attrs['names'].append(attr_name)
attrs['attr_info'][attr_name] = (attr_type, default_value)
break
args = {
'inputs': inputs,
'attrs': attrs,
'args_declare': args_declare_str[:-2],
'args_define': args_define_str[:-2]
}
return args
def gene_api_declaration(self):
return f"""
PD_DLL_DECL {self.output} {self.api}({self.args['args_declare']});
"""
def gene_kernel_select(self, input_names, attrs, kernel):
kernel_key_item_init = """
Backend kernel_backend = Backend::UNDEFINED;
DataLayout kernel_layout = DataLayout::UNDEFINED;
DataType kernel_data_type = DataType::UNDEFINED;
"""
# Check the tensor options
attr_backend_count = 0
attr_layout_count = 0
attr_data_type_count = 0
for attr_name in attrs['names']:
if attrs['attr_info'][attr_name][0] == 'Backend':
assert kernel['backend'] is not None, \
f"{self.api} api: When there is a parameter with 'Backend' type in attributes, you must set backend of kernel manually."
attr_backend_count = attr_backend_count + 1
if attrs['attr_info'][attr_name][0] == 'DataLayout':
assert kernel['layout'] is not None, \
f"{self.api} api: When there is a parameter with 'DataLayout' type in attributes, you must set layout of kernel manually."
attr_layout_count = attr_layout_count + 1
if attrs['attr_info'][attr_name][0] == 'DataType':
assert kernel['data_type'] is not None, \
f"{self.api} api: When there is a parameter with 'DataType' type in attributes, you must set data_type of kernel manually."
attr_data_type_count = attr_data_type_count + 1
# preprocess kernel configures
kernel_select_code = ""
if kernel['backend'] is not None:
if '>' in kernel['backend']:
vars_list = kernel['backend'].split('>')
assert len(
vars_list
) == 2, f"{self.api} api: The number of params to set backend with '>' only allows 2, but received {len(vars_list)}."
assert (vars_list[0].strip() in attrs['names']) and (attrs['attr_info'][vars_list[0].strip()][0] == 'Backend'), \
f"{self.api} api: When use '>' to set kernel backend, the first param should be a attribute with Backend type."
kernel_select_code = kernel_select_code + f"""
kernel_backend = ParseBackendWithInputOrder({vars_list[0].strip()}, {vars_list[1].strip()});
"""
else:
args_str = ""
for ele in kernel['backend'].split(','):
args_str = args_str + ele.strip() + ', '
kernel_select_code = kernel_select_code + f"""
kernel_backend = ParseBackend({args_str[:-2]});
"""
if kernel['layout'] is not None:
if '>' in kernel['layout']:
vars_list = kernel['layout'].split('>')
assert len(
vars_list
) == 2, f"{self.api} api: The number of params to set layout with '>' only allows 2, but received {len(vars_list)}."
assert vars_list[0].strip() in attrs['names'] and attrs['attr_info'][vars_list[0].strip()][0] == 'DataLayout', \
f"{self.api} api: When use '>' to set kernel layout, the first param should be a attribute with DataLayout type."
kernel_select_code = kernel_select_code + f"""
kernel_layout = ParseLayoutWithInputOrder({vars_list[0].strip()}, {vars_list[1].strip()});
"""
else:
vars_list = kernel['layout'].split(',')
assert len(
vars_list
) == 1, f"{self.api} api: The number of params to set layout must be 1, but received {len(vars_list)}."
kernel_select_code = kernel_select_code + f"""
kernel_layout = ParseLayout({vars_list[0].strip()});
"""
if kernel['data_type'] is not None:
if '>' in kernel['data_type']:
vars_list = kernel['data_type'].split('>')
assert len(
vars_list
) == 2, f"{self.api} api: The number of params to set data_type with '>' only allows 2, but received {len(vars_list)}."
assert vars_list[0].strip() in attrs['names'] and attrs['attr_info'][vars_list[0].strip()][0] == 'DataType', \
f"{self.api} api: When use '>' to set kernel data_type, the first param should be a attribute with DataType type."
kernel_select_code = kernel_select_code + f"""
kernel_data_type = ParseDataTypeWithInputOrder({vars_list[0].strip()}, {vars_list[1].strip()});
"""
else:
vars_list = kernel['data_type'].split(',')
assert len(
vars_list
) == 1, f"{self.api} api: The number of params to set data_type only allows 2, but received {len(vars_list)}."
kernel_select_code = kernel_select_code + f"""
kernel_data_type = ParseDataType({vars_list[0].strip()});
"""
if len(input_names) == 0:
assert attr_backend_count > 0 and attr_layout_count > 0 and attr_data_type_count > 0, \
f"{self.api} api: When there is no input tensor, the args must have 'Backend', 'DataLayout' and 'DataType'."
kernel_select_args = ""
for input_name in input_names:
kernel_select_args = kernel_select_args + input_name + ", "
if len(kernel_select_args) > 2:
kernel_select_args = kernel_select_args[:-2]
kernel_select_code = kernel_key_item_init + kernel_select_code
if len(input_names) > 0:
kernel_select_code = kernel_select_code + f"""
if (kernel_backend == Backend::UNDEFINED
|| kernel_layout == DataLayout::UNDEFINED
|| kernel_data_type == DataType::UNDEFINED ) {{
auto kernel_key_set = ParseKernelKeyByInputArgs({kernel_select_args});
auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
if (kernel_backend == Backend::UNDEFINED) {{
kernel_backend = kernel_key.backend();
}}
if (kernel_layout == DataLayout::UNDEFINED) {{
kernel_layout = kernel_key.layout();
}}
if (kernel_data_type == DataType::UNDEFINED) {{
kernel_data_type = kernel_key.dtype();
}}
}}"""
kernel_select_code = kernel_select_code + f"""
auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
"{kernel['func']}", {{kernel_backend, kernel_layout, kernel_data_type}});
VLOG(6) << "{self.api} API kernel key: [" << kernel_backend << ", " << kernel_layout << ", "<< kernel_data_type << "]";
VLOG(6) << "{self.api} API kernel: " << kernel;"""
return kernel_select_code
def gene_infer_meta(self, input_names, attr_names, infer_meta) -> str:
infer_meta_params = infer_meta['param'] if infer_meta[
'param'] is not None else input_names + attr_names
param_code = ""
for param in infer_meta_params:
if param in input_names:
param_code = param_code + self.prefix_tensor_name + param + "->meta(), "
elif param in attr_names:
param_code = param_code + param + ", "
elif isinstance(param, str):
param_code = param_code + "\"" + param + "\", "
elif isinstance(param, bool):
param_code = param_code + str(param).lower() + ", "
else:
param_code = param_code + str(param) + ", "
param_code = param_code[:-2]
return f"""
auto out_meta = pten::{infer_meta['func']}({param_code});
"""
def gene_kernel_context(self, input_names, attrs, infer_meta, kernel_param):
attr_names = attrs['names']
if kernel_param is None:
kernel_param = input_names + attr_names
input_code_str = ""
attr_code_str = ""
for param in kernel_param:
if param in input_names:
# set input for kernel_context
input_code_str = input_code_str + f"""
auto {self.prefix_tensor_name}{param} = std::dynamic_pointer_cast<pten::DenseTensor>({param}.impl());
kernel_context.EmplaceBackInput({self.prefix_tensor_name}{param});"""
elif param in attr_names:
# set attr for kernel_context
if 'ScalarArray' in attrs['attr_info'][param][0]:
param = 'pten::ScalarArray(' + param + ')'
elif 'Scalar' in attrs['attr_info'][param][0]:
param = 'pten::Scalar(' + param + ')'
attr_code_str = attr_code_str + f"""
kernel_context.EmplaceBackAttr({param});"""
elif isinstance(param, bool):
attr_code_str = attr_code_str + f"""
kernel_context.EmplaceBackAttr({str(param).lower()});"""
else:
attr_code_str = attr_code_str + f"""
kernel_context.EmplaceBackAttr({param});"""
return f"""
auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
auto kernel_context = pten::KernelContext(dev_ctx);
{input_code_str}
{attr_code_str}
{self.gene_infer_meta(input_names, attr_names, infer_meta)}
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
pten::TransToFluidPlace(kernel_backend));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
kernel_context.EmplaceBackOutput(dense_out);
Tensor out;
out.set_impl(dense_out);"""
def gene_api_code(self):
if self.is_base_api:
return f"""
PD_DLL_DECL {self.output} {self.api}({self.args["args_define"]}) {{
{self.gene_kernel_select(self.args['inputs']['names'], self.args['attrs'], self.kernel)}
{self.gene_kernel_context(self.args['inputs']['names'], self.args['attrs'], self.infer_meta, self.kernel['param'])}
kernel(&kernel_context);
return out;
}}
"""
else:
return f"""
PD_DLL_DECL {self.output} {self.api}({self.args["args_define"]}) {{
return {self.invoke};
}}
"""
def header_include():
return """
#include "paddle/pten/api/include/tensor.h"
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/common/scalar_array.h"
"""
def source_include(header_file_path):
return f"""
#include "{header_file_path}"
#include <memory>
#include "glog/logging.h"
#include "paddle/pten/api/lib/api_registry.h"
#include "paddle/pten/api/lib/kernel_dispatch.h"
#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/include/core.h"
#include "paddle/pten/include/infermeta.h"
"""
def module_declare():
return """
PT_DECLARE_MODULE(CreationCPU);
PT_DECLARE_MODULE(LinalgCPU);
PT_DECLARE_MODULE(ManipulationCPU);
PT_DECLARE_MODULE(MathCPU);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_MODULE(CreationCUDA);
PT_DECLARE_MODULE(LinalgCUDA);
PT_DECLARE_MODULE(ManipulationCUDA);
PT_DECLARE_MODULE(MathCUDA);
#endif
"""
def api_register():
return """
PT_REGISTER_API(Creation);
PT_REGISTER_API(Linalg);
PT_REGISTER_API(Manipulation);
PT_REGISTER_API(Math);
"""
def api_namespace():
return ("""
namespace paddle {
namespace experimental {
""", """
} // namespace experimental
} // namespace paddle
""")
def generate_api(api_yaml_path, header_file_path, source_file_path):
with open(api_yaml_path, 'r') as f:
apis = yaml.load(f, Loader=yaml.FullLoader)
header_file = open(header_file_path, 'w')
source_file = open(source_file_path, 'w')
namespace = api_namespace()
header_file.write("#pragma once\n")
header_file.write(header_include())
header_file.write(namespace[0])
include_header_file = "paddle/pten/api/include/api.h"
source_file.write(source_include(include_header_file))
source_file.write(module_declare())
source_file.write(namespace[0])
for api in apis:
api_code = API(api)
print(api_code.gene_api_declaration())
header_file.write(api_code.gene_api_declaration())
source_file.write(api_code.gene_api_code())
header_file.write(namespace[1])
source_file.write(namespace[1])
source_file.write(api_register())
header_file.close()
source_file.close()
def main():
parser = argparse.ArgumentParser(
description='Generate PaddlePaddle C++ API files')
parser.add_argument(
'--api_yaml_path',
help='path to yaml file directory',
default='python/paddle/utils/code_gen/api.yaml')
parser.add_argument(
'--api_header_path',
help='output of generated api header code file',
default='paddle/pten/api/include/api.h')
parser.add_argument(
'--api_source_path',
help='output of generated api source code file',
default='paddle/pten/api/lib/api.cc')
options = parser.parse_args()
api_yaml_path = options.api_yaml_path
header_file_path = options.api_header_path
source_file_path = options.api_source_path
generate_api(api_yaml_path, header_file_path, source_file_path)
if __name__ == '__main__':
main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册