未验证 提交 9ffdb2b7 编写于 作者: R RedContritio 提交者: GitHub

【Hackathon No.67】remove reference to operator.h in phi [part 1] (#50624)

* add visit_place to phi/core/utils

* remove reference to operator.h in phi/kernels/funcs/math_function.h

* update data type from framework.proto to phi

* fix enforce error in fluid
上级 7d138402
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/op_kernel_type.h"
#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/core/utils/data_type.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
......
...@@ -18,6 +18,10 @@ ...@@ -18,6 +18,10 @@
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/imperative/reducer.h" #include "paddle/fluid/imperative/reducer.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/core/utils/data_type.h"
namespace paddle { namespace paddle {
namespace imperative { namespace imperative {
......
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/common/transform.h" #include "paddle/phi/common/transform.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
namespace paddle { namespace paddle {
......
...@@ -19,6 +19,7 @@ limitations under the License. */ ...@@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/common/transform.h" #include "paddle/phi/common/transform.h"
#include "paddle/phi/core/visit_type.h" #include "paddle/phi/core/visit_type.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
namespace paddle { namespace paddle {
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
namespace paddle { namespace paddle {
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/core/hostdevice.h" #include "paddle/phi/core/hostdevice.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
namespace paddle { namespace paddle {
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <vector> #include <vector>
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/phi/api/include/tensor.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
namespace paddle { namespace paddle {
......
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#include <string> #include <string>
#include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/jit/kernels.h" #include "paddle/phi/kernels/funcs/jit/kernels.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <stack> #include <stack>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/math/tree2col.h" #include "paddle/fluid/operators/math/tree2col.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/kernels/gpu/reduce.h" #include "paddle/phi/kernels/gpu/reduce.h"
namespace paddle { namespace paddle {
...@@ -20,7 +21,7 @@ namespace operators { ...@@ -20,7 +21,7 @@ namespace operators {
namespace details { namespace details {
TEST(test_reduce_rank_check, all) { TEST(test_reduce_rank_check, all) {
using EnforceNotMet = paddle::platform::EnforceNotMet; using EnforceNotMet = phi::EnforceNotMet;
constexpr int kMaxRank = framework::DDim::kMaxRank; constexpr int kMaxRank = framework::DDim::kMaxRank;
for (int rank = 0; rank < kMaxRank; rank++) { for (int rank = 0; rank < kMaxRank; rank++) {
...@@ -42,7 +43,7 @@ TEST(test_reduce_rank_check, all) { ...@@ -42,7 +43,7 @@ TEST(test_reduce_rank_check, all) {
phi::funcs::details::CheckReduceRank(reduce_rank, rank); phi::funcs::details::CheckReduceRank(reduce_rank, rank);
} else { } else {
ASSERT_THROW(phi::funcs::details::CheckReduceRank(reduce_rank, rank), ASSERT_THROW(phi::funcs::details::CheckReduceRank(reduce_rank, rank),
paddle::platform::EnforceNotMet); EnforceNotMet);
} }
} }
} }
......
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
namespace paddle { namespace paddle {
......
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/enforce.h"
namespace phi {
// need add dependency to phi_place when use phi::VisitPlace
template <typename Visitor>
typename Visitor::result_type VisitPlace(const phi::Place& place,
const Visitor& visitor) {
switch (place.GetType()) {
case phi::AllocationType::GPU: {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
phi::GPUPlace p(place.GetDeviceId());
return visitor(p);
#else
PADDLE_THROW(phi::errors::Unavailable(
("Paddle is not compiled with CUDA. Cannot visit cuda_pinned")));
return typename Visitor::result_type();
#endif
}
case phi::AllocationType::GPUPINNED: {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
phi::GPUPinnedPlace p;
return visitor(p);
#else
PADDLE_THROW(phi::errors::Unavailable(
("Paddle is not compiled with CUDA. Cannot visit cuda_pinned")));
return typename Visitor::result_type();
#endif
}
case phi::AllocationType::XPU: {
#ifdef PADDLE_WITH_XPU
phi::XPUPlace p(place.GetDeviceId());
return visitor(p);
#else
PADDLE_THROW(phi::errors::Unavailable(
("Paddle is not compiled with XPU. Cannot visit xpu device")));
return typename Visitor::result_type();
#endif
}
case phi::AllocationType::NPU: {
#ifdef PADDLE_WITH_ASCEND_CL
phi::NPUPlace p(place.GetDeviceId());
return visitor(p);
#else
PADDLE_THROW(phi::errors::Unavailable(
("Paddle is not compiled with NPU. Cannot visit npu_pinned")));
return typename Visitor::result_type();
#endif
}
case phi::AllocationType::NPUPINNED: {
#ifdef PADDLE_WITH_ASCEND_CL
phi::NPUPinnedPlace p;
return visitor(p);
#else
PADDLE_THROW(phi::errors::Unavailable(
("Paddle is not compiled with NPU. Cannot visit npu_pinned")));
return typename Visitor::result_type();
#endif
}
case phi::AllocationType::IPU: {
#ifdef PADDLE_WITH_IPU
phi::IPUPlace p(place.GetDeviceId());
return visitor(p);
#else
PADDLE_THROW(phi::errors::Unavailable(
("Paddle is not compiled with IPU. Cannot visit ipu device")));
return typename Visitor::result_type();
#endif
}
case phi::AllocationType::MLU: {
#ifdef PADDLE_WITH_MLU
phi::MLUPlace p(place.GetDeviceId());
return visitor(p);
#else
PADDLE_THROW(phi::errors::Unavailable(
("Paddle is not compiled with MLU. Cannot visit mlu device")));
#endif
}
case phi::AllocationType::CUSTOM: {
#ifdef PADDLE_WITH_CUSTOM_DEVICE
phi::CustomPlace p(place.GetDeviceType(), place.GetDeviceId());
return visitor(p);
#else
PADDLE_THROW(phi::errors::Unavailable(
("Paddle is not compiled with CUSTOM. Cannot visit custom device")));
#endif
}
default: {
phi::CPUPlace p;
return visitor(p);
}
}
}
} // namespace phi
...@@ -45,29 +45,25 @@ void RepeatInterleaveWithTensorIndexGradKernel( ...@@ -45,29 +45,25 @@ void RepeatInterleaveWithTensorIndexGradKernel(
repeats_tensor.dims()[0], repeats_tensor.dims()[0],
x_grad->dims()[dim])); x_grad->dims()[dim]));
const auto& index_type = const auto& index_type = repeats_tensor.dtype();
paddle::framework::TransToProtoVarType(repeats_tensor.dtype());
bool index_type_match = bool index_type_match =
index_type == paddle::framework::proto::VarType::INT32 || index_type == phi::DataType::INT32 || index_type == phi::DataType::INT64;
index_type == paddle::framework::proto::VarType::INT64;
PADDLE_ENFORCE_EQ(index_type_match, PADDLE_ENFORCE_EQ(index_type_match,
true, true,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"Input(Repeats) holds the wrong type, it holds %s, but " "Input(Repeats) holds the wrong type, it holds %s, but "
"desires to be %s or %s", "desires to be %s or %s",
paddle::framework::DataTypeToString(index_type), phi::DataTypeToString(index_type),
paddle::framework::DataTypeToString( phi::DataTypeToString(phi::DataType::INT32),
paddle::framework::proto::VarType::INT32), phi::DataTypeToString(phi::DataType::INT64)));
paddle::framework::DataTypeToString(
paddle::framework::proto::VarType::INT64)));
phi::DeviceContextPool::Instance().Get(repeats_tensor.place()); phi::DeviceContextPool::Instance().Get(repeats_tensor.place());
if (index_type == paddle::framework::proto::VarType::INT32) { if (index_type == phi::DataType::INT32) {
phi::funcs::RepeatsTensor2IndexTensor<Context, int>( phi::funcs::RepeatsTensor2IndexTensor<Context, int>(
ctx, repeats_tensor, &index); ctx, repeats_tensor, &index);
IndexSelectGradInner<Context, T, int>(ctx, out_grad, index, x_grad, dim); IndexSelectGradInner<Context, T, int>(ctx, out_grad, index, x_grad, dim);
} else if (index_type == paddle::framework::proto::VarType::INT64) { } else if (index_type == phi::DataType::INT64) {
phi::funcs::RepeatsTensor2IndexTensor<Context, int64_t>( phi::funcs::RepeatsTensor2IndexTensor<Context, int64_t>(
ctx, repeats_tensor, &index); ctx, repeats_tensor, &index);
IndexSelectGradInner<Context, T, int64_t>( IndexSelectGradInner<Context, T, int64_t>(
......
...@@ -45,8 +45,7 @@ struct EmbeddingCPUSparseFunctor { ...@@ -45,8 +45,7 @@ struct EmbeddingCPUSparseFunctor {
int64_t row_width = table_t.value().dims()[1]; int64_t row_width = table_t.value().dims()[1];
const auto* table = table_t.value().template data<T>(); const auto* table = table_t.value().template data<T>();
auto* output = dev_ctx_.template Alloc<T>(output_t); auto* output = dev_ctx_.template Alloc<T>(output_t);
auto input_data_type = auto input_data_type = table_t.value().dtype();
paddle::framework::TransToProtoVarType(table_t.value().dtype());
for (int64_t i = 0; i < ids_numel; ++i) { for (int64_t i = 0; i < ids_numel; ++i) {
if (padding_idx_ != kNoPadding && ids[i] == padding_idx_) { if (padding_idx_ != kNoPadding && ids[i] == padding_idx_) {
...@@ -66,7 +65,7 @@ struct EmbeddingCPUSparseFunctor { ...@@ -66,7 +65,7 @@ struct EmbeddingCPUSparseFunctor {
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"the input key should be exists. But received %d.", id_index)); "the input key should be exists. But received %d.", id_index));
if (input_data_type == paddle::framework::proto::VarType::BF16) { if (input_data_type == phi::DataType::BFLOAT16) {
memcpy(output + i * row_width, memcpy(output + i * row_width,
table + id_index * row_width, table + id_index * row_width,
row_width * sizeof(T)); row_width * sizeof(T));
......
...@@ -43,16 +43,15 @@ void TakeAlongAxisGradKernel(const Context& dev_ctx, ...@@ -43,16 +43,15 @@ void TakeAlongAxisGradKernel(const Context& dev_ctx,
phi::funcs::SetConstant<Context, T> functor; phi::funcs::SetConstant<Context, T> functor;
functor(dev_ctx, x_grad, static_cast<T>(0)); functor(dev_ctx, x_grad, static_cast<T>(0));
const auto& index_type = const auto& index_type = index.dtype();
paddle::framework::TransToProtoVarType(index.dtype()); if (index_type == phi::DataType::INT32) {
if (index_type == paddle::framework::proto::VarType::INT32) {
phi::funcs::cpu_scatter_add_kernel<T, int32_t>( phi::funcs::cpu_scatter_add_kernel<T, int32_t>(
*x_grad, *x_grad,
axis, axis,
index, index,
out_grad, out_grad,
dev_ctx); // the gradient of gather is scatter dev_ctx); // the gradient of gather is scatter
} else if (index_type == paddle::framework::proto::VarType::INT64) { } else if (index_type == phi::DataType::INT64) {
phi::funcs::cpu_scatter_add_kernel<T, int64_t>( phi::funcs::cpu_scatter_add_kernel<T, int64_t>(
*x_grad, axis, index, out_grad, dev_ctx); *x_grad, axis, index, out_grad, dev_ctx);
} }
......
...@@ -12,8 +12,10 @@ ...@@ -12,8 +12,10 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/phi/kernels/unique_consecutive_kernel.h" #include <climits>
#include "paddle/phi/kernels/cpu/unique_consecutive_functor.h" #include "paddle/phi/kernels/cpu/unique_consecutive_functor.h"
#include "paddle/phi/kernels/unique_consecutive_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/errors.h" #include "paddle/phi/core/errors.h"
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <climits>
#include "paddle/phi/kernels/unique_kernel.h" #include "paddle/phi/kernels/unique_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/cpu/cpu_context.h"
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/core/utils/visit_place.h"
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
#include "paddle/phi/backends/dynload/mklml.h" #include "paddle/phi/backends/dynload/mklml.h"
...@@ -236,7 +237,7 @@ void set_constant(const phi::DeviceContext& context, ...@@ -236,7 +237,7 @@ void set_constant(const phi::DeviceContext& context,
#endif #endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
// tensor->place().apply_visitor(func); // tensor->place().apply_visitor(func);
paddle::platform::VisitPlace(tensor->place(), func); phi::VisitPlace(tensor->place(), func);
#elif defined(PADDLE_WITH_XPU) #elif defined(PADDLE_WITH_XPU)
func(phi::XPUPlace()); func(phi::XPUPlace());
#else #else
......
...@@ -17,12 +17,10 @@ limitations under the License. */ ...@@ -17,12 +17,10 @@ limitations under the License. */
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "paddle/fluid/framework/operator.h"
#include "paddle/phi/backends/all_context.h" #include "paddle/phi/backends/all_context.h"
#include "paddle/phi/common/memory_utils.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/core/utils/data_type.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
namespace phi { namespace phi {
namespace funcs { namespace funcs {
......
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#include <vector> #include <vector>
#include "paddle/phi/common/data_type.h" #include "paddle/phi/common/data_type.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
namespace phi { namespace phi {
......
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_primitives.h"
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/gather.cu.h" #include "paddle/phi/kernels/funcs/gather.cu.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/segment_pooling.h" #include "paddle/phi/kernels/funcs/segment_pooling.h"
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <set>
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/core/utils/data_type.h"
#include "paddle/phi/kernels/funcs/concat_and_split_functor.h" #include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
......
...@@ -109,9 +109,6 @@ void BincountCUDAInner(const Context& dev_ctx, ...@@ -109,9 +109,6 @@ void BincountCUDAInner(const Context& dev_ctx,
<<<GET_BLOCKS(input_numel), PADDLE_CUDA_NUM_THREADS, 0, stream>>>( <<<GET_BLOCKS(input_numel), PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
input_data, input_numel, has_weights, weights_data, output_data); input_data, input_numel, has_weights, weights_data, output_data);
} else { } else {
const auto& weights_type =
paddle::framework::TransToProtoVarType(weights->dtype());
if (weights->dtype() == DataType::FLOAT32) { if (weights->dtype() == DataType::FLOAT32) {
float* output_data = dev_ctx.template Alloc<float>(output); float* output_data = dev_ctx.template Alloc<float>(output);
phi::funcs::SetConstant<Context, float>()( phi::funcs::SetConstant<Context, float>()(
......
...@@ -375,9 +375,7 @@ void ClassCenterSampleKernel(const Context& dev_ctx, ...@@ -375,9 +375,7 @@ void ClassCenterSampleKernel(const Context& dev_ctx,
num_classes_per_device_ptr, num_classes_per_device_ptr,
num_classes_per_device_ptr, num_classes_per_device_ptr,
num_classes_per_device.numel(), num_classes_per_device.numel(),
paddle::platform::ToNCCLDataType( phi::ToNCCLDataType(num_classes_per_device.dtype()),
paddle::framework::TransToProtoVarType(
num_classes_per_device.dtype())),
ncclSum, ncclSum,
comm->comm(), comm->comm(),
calcu_stream)); calcu_stream));
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_primitives.h"
#include "paddle/phi/common/memory_utils.h" #include "paddle/phi/common/memory_utils.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/mixed_vector.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
namespace phi { namespace phi {
......
...@@ -96,8 +96,7 @@ void GetClassInterval(const gpuStream_t& stream, ...@@ -96,8 +96,7 @@ void GetClassInterval(const gpuStream_t& stream,
num_classes_per_device_ptr, num_classes_per_device_ptr,
num_classes_per_device_ptr, num_classes_per_device_ptr,
num_classes_per_device.numel(), num_classes_per_device.numel(),
paddle::platform::ToNCCLDataType(paddle::framework::TransToProtoVarType( phi::ToNCCLDataType(num_classes_per_device.dtype()),
num_classes_per_device.dtype())),
ncclSum, ncclSum,
comm->comm(), comm->comm(),
calcu_stream)); calcu_stream));
...@@ -188,8 +187,7 @@ void MarginCrossEntropyGradKernel(const Context& dev_ctx, ...@@ -188,8 +187,7 @@ void MarginCrossEntropyGradKernel(const Context& dev_ctx,
int blocks = NumBlocks(N * D); int blocks = NumBlocks(N * D);
int threads = kNumCUDAThreads; int threads = kNumCUDAThreads;
const auto& label_type = const auto& label_type = label.dtype();
paddle::framework::TransToProtoVarType(label.dtype());
DenseTensor class_interval; DenseTensor class_interval;
GetClassInterval<T, Context>(dev_ctx.stream(), GetClassInterval<T, Context>(dev_ctx.stream(),
...@@ -201,7 +199,7 @@ void MarginCrossEntropyGradKernel(const Context& dev_ctx, ...@@ -201,7 +199,7 @@ void MarginCrossEntropyGradKernel(const Context& dev_ctx,
D, D,
&class_interval); &class_interval);
if (label_type == paddle::framework::proto::VarType::INT32) { if (label_type == phi::DataType::INT32) {
typedef int32_t LabelT; typedef int32_t LabelT;
CalculateGrad<T, LabelT> CalculateGrad<T, LabelT>
<<<blocks, threads, 0, dev_ctx.stream()>>>(logits_grad->data<T>(), <<<blocks, threads, 0, dev_ctx.stream()>>>(logits_grad->data<T>(),
...@@ -215,7 +213,7 @@ void MarginCrossEntropyGradKernel(const Context& dev_ctx, ...@@ -215,7 +213,7 @@ void MarginCrossEntropyGradKernel(const Context& dev_ctx,
N, N,
D, D,
class_interval.data<int>()); class_interval.data<int>());
} else if (label_type == paddle::framework::proto::VarType::INT64) { } else if (label_type == phi::DataType::INT64) {
typedef int64_t LabelT; typedef int64_t LabelT;
CalculateGrad<T, LabelT> CalculateGrad<T, LabelT>
<<<blocks, threads, 0, dev_ctx.stream()>>>(logits_grad->data<T>(), <<<blocks, threads, 0, dev_ctx.stream()>>>(logits_grad->data<T>(),
......
...@@ -92,8 +92,7 @@ void GetClassInterval(const gpuStream_t& stream, ...@@ -92,8 +92,7 @@ void GetClassInterval(const gpuStream_t& stream,
num_classes_per_device_ptr, num_classes_per_device_ptr,
num_classes_per_device_ptr, num_classes_per_device_ptr,
num_classes_per_device.numel(), num_classes_per_device.numel(),
paddle::platform::ToNCCLDataType(paddle::framework::TransToProtoVarType( phi::ToNCCLDataType(num_classes_per_device.dtype()),
num_classes_per_device.dtype())),
ncclSum, ncclSum,
comm->comm(), comm->comm(),
calcu_stream)); calcu_stream));
...@@ -265,8 +264,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx, ...@@ -265,8 +264,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx,
int blocks = NumBlocks(N); int blocks = NumBlocks(N);
int threads = kNumCUDAThreads; int threads = kNumCUDAThreads;
const auto& label_type = const auto& label_type = labels.dtype();
paddle::framework::TransToProtoVarType(labels.dtype());
// copy logits to softmax variable since we can't modify logits, // copy logits to softmax variable since we can't modify logits,
// and it also be used when calculate grad // and it also be used when calculate grad
...@@ -291,7 +289,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx, ...@@ -291,7 +289,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx,
// theta = acos(x_i) // theta = acos(x_i)
// (cos(m1 * theta + m2) - m3) // (cos(m1 * theta + m2) - m3)
// save match_logits, used for gradient computation. // save match_logits, used for gradient computation.
if (label_type == paddle::framework::proto::VarType::INT32) { if (label_type == phi::DataType::INT32) {
typedef int32_t LabelT; typedef int32_t LabelT;
AddMarginToPositiveLogitsKernel<T> AddMarginToPositiveLogitsKernel<T>
<<<NumBlocks(N), threads, 0, dev_ctx.stream()>>>( <<<NumBlocks(N), threads, 0, dev_ctx.stream()>>>(
...@@ -305,7 +303,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx, ...@@ -305,7 +303,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx,
N, N,
D, D,
class_interval.data<int>()); class_interval.data<int>());
} else if (label_type == paddle::framework::proto::VarType::INT64) { } else if (label_type == phi::DataType::INT64) {
typedef int64_t LabelT; typedef int64_t LabelT;
AddMarginToPositiveLogitsKernel<T> AddMarginToPositiveLogitsKernel<T>
<<<NumBlocks(N), threads, 0, dev_ctx.stream()>>>( <<<NumBlocks(N), threads, 0, dev_ctx.stream()>>>(
...@@ -357,15 +355,14 @@ void MarginCrossEntropyKernel(const Context& dev_ctx, ...@@ -357,15 +355,14 @@ void MarginCrossEntropyKernel(const Context& dev_ctx,
auto task = pg->AllReduce(in_tensor, out_tensor, opts); auto task = pg->AllReduce(in_tensor, out_tensor, opts);
task->Wait(); task->Wait();
} else { } else {
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::ncclAllReduce( PADDLE_ENFORCE_GPU_SUCCESS(
logits_max_buff, phi::dynload::ncclAllReduce(logits_max_buff,
logits_max_buff, logits_max_buff,
logits_max.numel(), logits_max.numel(),
paddle::platform::ToNCCLDataType( phi::ToNCCLDataType(logits_max.dtype()),
paddle::framework::TransToProtoVarType(logits_max.dtype())), ncclMax,
ncclMax, comm->comm(),
comm->comm(), stream));
stream));
} }
} }
#endif #endif
...@@ -403,8 +400,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx, ...@@ -403,8 +400,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx,
sum_exp_logits_buff, sum_exp_logits_buff,
sum_exp_logits_buff, sum_exp_logits_buff,
sum_exp_logits.numel(), sum_exp_logits.numel(),
paddle::platform::ToNCCLDataType( phi::ToNCCLDataType(sum_exp_logits.dtype()),
paddle::framework::TransToProtoVarType(sum_exp_logits.dtype())),
ncclSum, ncclSum,
comm->comm(), comm->comm(),
stream)); stream));
...@@ -423,7 +419,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx, ...@@ -423,7 +419,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx,
phi::funcs::SetConstant<Context, T> functor; phi::funcs::SetConstant<Context, T> functor;
functor(dev_ctx, loss, static_cast<T>(0.0)); functor(dev_ctx, loss, static_cast<T>(0.0));
if (label_type == paddle::framework::proto::VarType::INT32) { if (label_type == phi::DataType::INT32) {
typedef int32_t LabelT; typedef int32_t LabelT;
HardLabelSoftmaxWithCrossEntropyKernel<T, LabelT> HardLabelSoftmaxWithCrossEntropyKernel<T, LabelT>
<<<blocks, threads, 0, dev_ctx.stream()>>>(loss_ptr, <<<blocks, threads, 0, dev_ctx.stream()>>>(loss_ptr,
...@@ -433,7 +429,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx, ...@@ -433,7 +429,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx,
N, N,
D, D,
class_interval.data<int>()); class_interval.data<int>());
} else if (label_type == paddle::framework::proto::VarType::INT64) { } else if (label_type == phi::DataType::INT64) {
typedef int64_t LabelT; typedef int64_t LabelT;
HardLabelSoftmaxWithCrossEntropyKernel<T, LabelT> HardLabelSoftmaxWithCrossEntropyKernel<T, LabelT>
<<<blocks, threads, 0, dev_ctx.stream()>>>(loss_ptr, <<<blocks, threads, 0, dev_ctx.stream()>>>(loss_ptr,
...@@ -458,15 +454,14 @@ void MarginCrossEntropyKernel(const Context& dev_ctx, ...@@ -458,15 +454,14 @@ void MarginCrossEntropyKernel(const Context& dev_ctx,
auto task = pg->AllReduce(in_tensor, out_tensor, opts); auto task = pg->AllReduce(in_tensor, out_tensor, opts);
task->Wait(); task->Wait();
} else { } else {
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::ncclAllReduce( PADDLE_ENFORCE_GPU_SUCCESS(
loss_ptr, phi::dynload::ncclAllReduce(loss_ptr,
loss_ptr, loss_ptr,
loss->numel(), loss->numel(),
paddle::platform::ToNCCLDataType( phi::ToNCCLDataType(loss->dtype()),
paddle::framework::TransToProtoVarType(loss->dtype())), ncclSum,
ncclSum, comm->comm(),
comm->comm(), stream));
stream));
} }
} }
#endif #endif
......
...@@ -108,8 +108,7 @@ void SyncBatchNormKernel(const Context &ctx, ...@@ -108,8 +108,7 @@ void SyncBatchNormKernel(const Context &ctx,
} }
if (comm) { if (comm) {
int dtype = paddle::platform::ToNCCLDataType( int dtype = phi::ToNCCLDataType(mean_out->dtype());
paddle::framework::TransToProtoVarType(mean_out->dtype()));
// In-place operation // In-place operation
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::ncclAllReduce(stats, phi::dynload::ncclAllReduce(stats,
......
...@@ -131,32 +131,28 @@ void RepeatInterleaveWithTensorIndexKernel(const Context& ctx, ...@@ -131,32 +131,28 @@ void RepeatInterleaveWithTensorIndexKernel(const Context& ctx,
"But received: [%s], required: [%d].", "But received: [%s], required: [%d].",
repeats_tensor.dims()[0], repeats_tensor.dims()[0],
x.dims()[dim])); x.dims()[dim]));
const auto& index_type = const auto& index_type = repeats_tensor.dtype();
paddle::framework::TransToProtoVarType(repeats_tensor.dtype());
bool index_type_match = bool index_type_match =
index_type == paddle::framework::proto::VarType::INT32 || index_type == phi::DataType::INT32 || index_type == phi::DataType::INT64;
index_type == paddle::framework::proto::VarType::INT64;
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
index_type_match, index_type_match,
true, true,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"Input(RepeatsTensor) holds the wrong type, it holds %s, but " "Input(RepeatsTensor) holds the wrong type, it holds %s, but "
"desires to be %s or %s", "desires to be %s or %s",
paddle::framework::DataTypeToString(index_type), phi::DataTypeToString(index_type),
paddle::framework::DataTypeToString( phi::DataTypeToString(phi::DataType::INT32),
paddle::framework::proto::VarType::INT32), phi::DataTypeToString(phi::DataType::INT64)));
paddle::framework::DataTypeToString(
paddle::framework::proto::VarType::INT64)));
if (place == cpu_place) { if (place == cpu_place) {
auto x_copy = x; auto x_copy = x;
if (index_type == paddle::framework::proto::VarType::INT32) { if (index_type == phi::DataType::INT32) {
phi::funcs::RepeatsTensor2IndexTensor<Context, int>( phi::funcs::RepeatsTensor2IndexTensor<Context, int>(
ctx, repeats_tensor, &index); ctx, repeats_tensor, &index);
auto output_dim = phi::vectorize(x.dims()); auto output_dim = phi::vectorize(x.dims());
output_dim[dim] = index.dims()[0]; output_dim[dim] = index.dims()[0];
out->Resize(phi::make_ddim(output_dim)); out->Resize(phi::make_ddim(output_dim));
IndexSelectInner<Context, T, int>(ctx, &x_copy, index, out, dim); IndexSelectInner<Context, T, int>(ctx, &x_copy, index, out, dim);
} else if (index_type == paddle::framework::proto::VarType::INT64) { } else if (index_type == phi::DataType::INT64) {
phi::funcs::RepeatsTensor2IndexTensor<Context, int64_t>( phi::funcs::RepeatsTensor2IndexTensor<Context, int64_t>(
ctx, repeats_tensor, &index); ctx, repeats_tensor, &index);
auto output_dim = phi::vectorize(x.dims()); auto output_dim = phi::vectorize(x.dims());
...@@ -170,7 +166,7 @@ void RepeatInterleaveWithTensorIndexKernel(const Context& ctx, ...@@ -170,7 +166,7 @@ void RepeatInterleaveWithTensorIndexKernel(const Context& ctx,
int64_t stride = stride_dim[dim]; int64_t stride = stride_dim[dim];
auto stream = ctx.stream(); auto stream = ctx.stream();
auto* in_data = x.data<T>(); auto* in_data = x.data<T>();
if (index_type == paddle::framework::proto::VarType::INT64) { if (index_type == phi::DataType::INT64) {
phi::funcs::RepeatsTensor2IndexTensor<Context, int64_t>( phi::funcs::RepeatsTensor2IndexTensor<Context, int64_t>(
ctx, repeats_tensor, &index); ctx, repeats_tensor, &index);
......
...@@ -188,21 +188,21 @@ void SetValueGradImpl(const Context& dev_ctx, ...@@ -188,21 +188,21 @@ void SetValueGradImpl(const Context& dev_ctx,
(value_grad_dims_size + decrease_axis_size - num_decrease)); (value_grad_dims_size + decrease_axis_size - num_decrease));
fake_value_grad_dims[i] = value_grad_dims[index_grad]; fake_value_grad_dims[i] = value_grad_dims[index_grad];
PADDLE_ENFORCE_EQ((out_dims[i] == value_grad_dims[index_grad]) || PADDLE_ENFORCE_EQ(
(value_grad_dims[index_grad] == 1), (out_dims[i] == value_grad_dims[index_grad]) ||
true, (value_grad_dims[index_grad] == 1),
errors::InvalidArgument( true,
"An error occurred while calculating %s: " errors::InvalidArgument("An error occurred while calculating %s: "
"[%s] can not be accumulated into [%s].", "[%s] can not be accumulated into [%s].",
paddle::framework::GradVarName("ValueTensor"), "ValueTensor@GRAD",
out_dims, out_dims,
value_grad_dims)); value_grad_dims));
} }
} }
VLOG(3) << "Dimensions of " VLOG(3) << "Dimensions of "
<< paddle::framework::GradVarName("ValueTensor") << "([" << "ValueTensor@GRAD"
<< value_grad_dims << "])is broadcasted into [" << "([" << value_grad_dims << "])is broadcasted into ["
<< fake_value_grad_dims << "]."; << fake_value_grad_dims << "].";
auto extent = Eigen::DSizes<Eigen::DenseIndex, RANK>(); auto extent = Eigen::DSizes<Eigen::DenseIndex, RANK>();
......
...@@ -32,11 +32,11 @@ static void Sort(const XPUContext& dev_ctx, ...@@ -32,11 +32,11 @@ static void Sort(const XPUContext& dev_ctx,
scores_slice_cpu.Resize({value.numel()}); scores_slice_cpu.Resize({value.numel()});
T* scores_slice_cpu_data = dev_ctx.template HostAlloc<T>(&scores_slice_cpu); T* scores_slice_cpu_data = dev_ctx.template HostAlloc<T>(&scores_slice_cpu);
paddle::memory::Copy(cpu_place, memory_utils::Copy(cpu_place,
scores_slice_cpu_data, scores_slice_cpu_data,
place, place,
value_data, value_data,
sizeof(T) * value.numel()); sizeof(T) * value.numel());
// Sort index // Sort index
DenseTensor index_t; DenseTensor index_t;
index_t.Resize({value.numel()}); index_t.Resize({value.numel()});
...@@ -52,7 +52,7 @@ static void Sort(const XPUContext& dev_ctx, ...@@ -52,7 +52,7 @@ static void Sort(const XPUContext& dev_ctx,
std::sort(index, index + value.numel(), compare); std::sort(index, index + value.numel(), compare);
index_out->Resize({index_t.numel()}); index_out->Resize({index_t.numel()});
int* idx_out = dev_ctx.template Alloc<int>(index_out); int* idx_out = dev_ctx.template Alloc<int>(index_out);
paddle::memory::Copy( memory_utils::Copy(
place, idx_out, cpu_place, index, sizeof(T) * index_t.numel()); place, idx_out, cpu_place, index, sizeof(T) * index_t.numel());
} }
......
...@@ -222,21 +222,21 @@ void SetValueGradImpl(const Context& dev_ctx, ...@@ -222,21 +222,21 @@ void SetValueGradImpl(const Context& dev_ctx,
(value_grad_dims_size + decrease_axis_size - num_decrease)); (value_grad_dims_size + decrease_axis_size - num_decrease));
fake_value_grad_dims[i] = value_grad_dims[index_grad]; fake_value_grad_dims[i] = value_grad_dims[index_grad];
PADDLE_ENFORCE_EQ((out_dims[i] == value_grad_dims[index_grad]) || PADDLE_ENFORCE_EQ(
(value_grad_dims[index_grad] == 1), (out_dims[i] == value_grad_dims[index_grad]) ||
true, (value_grad_dims[index_grad] == 1),
errors::InvalidArgument( true,
"An error occurred while calculating %s: " errors::InvalidArgument("An error occurred while calculating %s: "
"[%s] can not be accumulated into [%s].", "[%s] can not be accumulated into [%s].",
paddle::framework::GradVarName("ValueTensor"), "ValueTensor@GRAD",
out_dims, out_dims,
value_grad_dims)); value_grad_dims));
} }
} }
VLOG(3) << "Dimensions of " VLOG(3) << "Dimensions of "
<< paddle::framework::GradVarName("ValueTensor") << "([" << "ValueTensor@GRAD"
<< value_grad_dims << "])is broadcasted into [" << "([" << value_grad_dims << "])is broadcasted into ["
<< fake_value_grad_dims << "]."; << fake_value_grad_dims << "].";
std::vector<int64_t> slice_end(RANK, 0); std::vector<int64_t> slice_end(RANK, 0);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册