未验证 提交 06128b9f 编写于 作者: Z zyfncg 提交者: GitHub

move the directory of fill kernels in pten (#38219)

上级 327e5050
......@@ -275,7 +275,7 @@ if(WITH_PYTHON)
if(NOT ON_INFER)
cc_library(paddle_eager
SRCS eager.cc eager_functions.cc eager_method.cc eager_properties.cc eager_utils.cc
DEPS eager_api autograd_meta backward grad_node_info pten op_function_common dygraph_function dygraph_node math_cpu linalg_cpu creation_cpu utils_cpu manipulation_cpu accumulation_node global_utils utils python)
DEPS eager_api autograd_meta backward grad_node_info pten op_function_common dygraph_function dygraph_node math_cpu linalg_cpu utils_cpu manipulation_cpu accumulation_node global_utils utils python)
add_dependencies(paddle_eager eager_codegen)
add_dependencies(paddle_eager eager_op_function_generator_cmd)
list(APPEND PYBIND_DEPS paddle_eager)
......
......@@ -24,11 +24,11 @@ add_subdirectory(tests)
# make an unity target for compile deps
set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_context)
set(PTEN_DEPS ${PTEN_DEPS} scale_kernel_eigen)
set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu creation_cpu manipulation_cpu)
set(PTEN_DEPS ${PTEN_DEPS} scale_kernel_eigen full_kernel_eigen)
set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu)
set(PTEN_DEPS ${PTEN_DEPS} nary unary binary)
if(WITH_GPU OR WITH_ROCM)
set(PTEN_DEPS ${PTEN_DEPS} math_cuda linalg_cuda creation_cuda manipulation_cuda)
set(PTEN_DEPS ${PTEN_DEPS} math_cuda linalg_cuda manipulation_cuda)
endif()
if(WITH_XPU)
set(PTEN_DEPS ${PTEN_DEPS} manipulation_xpu)
......
......@@ -16,8 +16,7 @@
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/creation.h"
#include "paddle/pten/kernels/cuda/creation.h"
#include "paddle/pten/kernels/full_kernel.h"
namespace pten {
......@@ -36,7 +35,7 @@ DenseTensor FullLike(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
FullLike<T>(dev_ctx, val, &dense_out);
FullLike<T, ContextT>(dev_ctx, val, &dense_out);
return dense_out;
}
......
cc_library(math_cpu SRCS math.cc DEPS dense_tensor kernel_context kernel_factory eigen_function blas pten_transpose_cpu)
cc_library(linalg_cpu SRCS linalg.cc DEPS dense_tensor kernel_context kernel_factory)
cc_library(creation_cpu SRCS creation.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
cc_library(utils_cpu SRCS utils.cc DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
cc_library(manipulation_cpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory utils_cpu unary)
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/pten/kernels/cpu/creation.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/hybird/eigen/fill.h"
namespace pten {
template <typename T>
void FullLike(const CPUContext& dev_ctx, const Scalar& val, DenseTensor* out) {
auto value = val.to<float>();
using CommonType = typename std::common_type<
float,
typename std::conditional<
std::is_same<T, paddle::platform::float16>::value,
float,
T>::type>::type;
auto common_type_value = static_cast<CommonType>(value);
PADDLE_ENFORCE_EQ(
(common_type_value >=
static_cast<CommonType>(std::numeric_limits<T>::lowest())) &&
(common_type_value <=
static_cast<CommonType>(std::numeric_limits<T>::max())),
true,
paddle::platform::errors::InvalidArgument(
"The filled value is out of range for target type, "
"current kernel type is %s, the range should between %f "
"and %f, but now value is %f.",
typeid(T).name(),
static_cast<CommonType>(std::numeric_limits<T>::lowest()),
static_cast<CommonType>(std::numeric_limits<T>::max()),
static_cast<float>(value)));
eigen::fill<CPUContext, T>(dev_ctx, out, value);
}
template <typename T>
void Full(const CPUContext& dev_ctx,
const ScalarArray& shape,
const Scalar& val,
DenseTensor* out) {
out->Resize(paddle::framework::make_ddim(shape.GetData()));
eigen::fill<CPUContext, T>(dev_ctx, out, val.to<T>());
}
} // namespace pten
PT_REGISTER_KERNEL(full_like,
CPU,
ALL_LAYOUT,
pten::FullLike,
float,
double,
int,
int64_t,
bool,
paddle::platform::float16) {}
PT_REGISTER_KERNEL(full,
CPU,
ALL_LAYOUT,
pten::Full,
float,
double,
uint8_t,
int16_t,
int,
int64_t,
bool,
paddle::platform::float16,
paddle::platform::bfloat16,
paddle::platform::complex<float>,
paddle::platform::complex<double>) {}
if(WITH_GPU)
nv_library(math_cuda SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_cuda)
nv_library(linalg_cuda SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
nv_library(creation_cuda SRCS creation.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
nv_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
nv_library(manipulation_cuda SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_cuda unary)
elseif(WITH_ROCM)
hip_library(math_cuda SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_cuda)
hip_library(linalg_cuda SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
hip_library(creation_cuda SRCS creation.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
hip_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
hip_library(manipulation_cuda SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_cuda unary)
endif()
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/pten/kernels/cuda/creation.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/hybird/eigen/fill.h"
namespace pten {
template <typename T>
void FullLike(const CUDAContext& dev_ctx, const Scalar& val, DenseTensor* out) {
auto value = val.to<float>();
using CommonType = typename std::common_type<
float,
typename std::conditional<
std::is_same<T, paddle::platform::float16>::value,
float,
T>::type>::type;
auto common_type_value = static_cast<CommonType>(value);
PADDLE_ENFORCE_EQ(
(common_type_value >=
static_cast<CommonType>(std::numeric_limits<T>::lowest())) &&
(common_type_value <=
static_cast<CommonType>(std::numeric_limits<T>::max())),
true,
paddle::platform::errors::InvalidArgument(
"The filled value is out of range for target type, "
"current kernel type is %s, the range should between %f "
"and %f, but now value is %f.",
typeid(T).name(),
static_cast<CommonType>(std::numeric_limits<T>::lowest()),
static_cast<CommonType>(std::numeric_limits<T>::max()),
static_cast<float>(value)));
eigen::fill<CUDAContext, T>(dev_ctx, out, val.to<float>());
}
template <typename T>
void Full(const CUDAContext& dev_ctx,
const ScalarArray& shape,
const Scalar& val,
DenseTensor* out) {
out->Resize(paddle::framework::make_ddim(shape.GetData()));
eigen::fill<CUDAContext, T>(dev_ctx, out, val.to<T>());
}
} // namespace pten
PT_REGISTER_KERNEL(full_like,
CUDA,
ALL_LAYOUT,
pten::FullLike,
float,
double,
int,
int64_t,
bool,
paddle::platform::float16) {}
PT_REGISTER_KERNEL(full,
CUDA,
ALL_LAYOUT,
pten::Full,
float,
double,
uint8_t,
int16_t,
int,
int64_t,
bool,
paddle::platform::float16,
paddle::platform::complex<float>,
paddle::platform::complex<double>) {}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
// CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/dense_tensor.h"
namespace pten {
template <typename T>
void FullLike(const CUDAContext& dev_ctx, const Scalar& val, DenseTensor* out);
template <typename T>
void Full(const CUDAContext& dev_ctx,
const ScalarArray& shape,
const Scalar& val,
DenseTensor* out);
} // namespace pten
#endif
if(WITH_GPU)
nv_library(scale_kernel_eigen SRCS scale_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
nv_library(full_kernel_eigen SRCS full_kernel.cc full_kernel.cu DEPS kernel_context kernel_factory dense_tensor eigen_function)
elseif(WITH_ROCM)
hip_library(scale_kernel_eigen SRCS scale_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
hip_library(full_kernel_eigen SRCS full_kernel.cc full_kernel.cu DEPS kernel_context kernel_factory dense_tensor eigen_function)
else()
cc_library(scale_kernel_eigen SRCS scale_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
cc_library(full_kernel_eigen SRCS full_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
endif()
......@@ -14,21 +14,60 @@ limitations under the License. */
#pragma once
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/kernels/hybird/eigen/common.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace pten {
namespace eigen {
template <typename DeviceContext, typename T, typename VType>
void fill(const DeviceContext& context, DenseTensor* tensor, VType val) {
void fill_(const DeviceContext& context, DenseTensor* tensor, VType val) {
tensor->mutable_data<T>();
auto t = pten::EigenVector<T>::Flatten(*tensor);
t.device(*context.eigen_device()) = t.constant(static_cast<T>(val));
}
} // namespace eigen
template <typename T, typename ContextT>
void Full(const ContextT& dev_ctx,
const ScalarArray& shape,
const Scalar& val,
DenseTensor* out) {
out->Resize(paddle::framework::make_ddim(shape.GetData()));
fill_<ContextT, T>(dev_ctx, out, val.to<T>());
}
template <typename T, typename ContextT>
void FullLike(const ContextT& dev_ctx, const Scalar& val, DenseTensor* out) {
auto value = val.to<float>();
using CommonType = typename std::common_type<
float,
typename std::conditional<
std::is_same<T, paddle::platform::float16>::value,
float,
T>::type>::type;
auto common_type_value = static_cast<CommonType>(value);
PADDLE_ENFORCE_EQ(
(common_type_value >=
static_cast<CommonType>(std::numeric_limits<T>::lowest())) &&
(common_type_value <=
static_cast<CommonType>(std::numeric_limits<T>::max())),
true,
paddle::platform::errors::InvalidArgument(
"The filled value is out of range for target type, "
"current kernel type is %s, the range should between %f "
"and %f, but now value is %f.",
typeid(T).name(),
static_cast<CommonType>(std::numeric_limits<T>::lowest()),
static_cast<CommonType>(std::numeric_limits<T>::max()),
static_cast<float>(value)));
fill_<ContextT, T>(dev_ctx, out, value);
}
} // namespace pten
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/kernels/full_kernel.h"
#include "paddle/pten/kernels/eigen/full.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/backends/all_context.h"
PT_REGISTER_CTX_KERNEL(full,
CPU,
ALL_LAYOUT,
pten::Full,
float,
double,
uint8_t,
int16_t,
int,
int64_t,
bool,
paddle::platform::float16,
paddle::platform::bfloat16,
paddle::platform::complex<float>,
paddle::platform::complex<double>) {}
PT_REGISTER_CTX_KERNEL(full_like,
CPU,
ALL_LAYOUT,
pten::FullLike,
float,
double,
int,
int64_t,
bool,
paddle::platform::float16) {}
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/kernels/eigen/full.h"
#include "paddle/pten/kernels/full_kernel.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/backends/all_context.h"
PT_REGISTER_CTX_KERNEL(full,
CUDA,
ALL_LAYOUT,
pten::Full,
float,
double,
uint8_t,
int16_t,
int,
int64_t,
bool,
paddle::platform::float16,
paddle::platform::complex<float>,
paddle::platform::complex<double>) {}
PT_REGISTER_CTX_KERNEL(full_like,
CUDA,
ALL_LAYOUT,
pten::FullLike,
float,
double,
int,
int64_t,
bool,
paddle::platform::float16) {}
......@@ -21,13 +21,13 @@
namespace pten {
template <typename T>
void FullLike(const CPUContext& dev_ctx, const Scalar& val, DenseTensor* out);
template <typename T>
void Full(const CPUContext& dev_ctx,
template <typename T, typename ContextT>
void Full(const ContextT& dev_ctx,
const ScalarArray& shape,
const Scalar& val,
DenseTensor* out);
template <typename T, typename ContextT>
void FullLike(const ContextT& dev_ctx, const Scalar& val, DenseTensor* out);
} // namespace pten
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册